GOAL = cparser
-FIRM_HOME = $(HOME)/projects/firm
+FIRM_HOME = $(HOME)/jambuild/
FIRM_BUILD = $(FIRM_HOME)/build/i686-pc-linux-gnu/debug/
FIRM_CFLAGS = -I$(FIRM_HOME)/libfirm/include -I$(FIRM_HOME)/obstack -I$(FIRM_HOME)/libcore -I$(FIRM_HOME)/libcore/libcore -I$(FIRM_HOME)
FIRM_LIBS = -L$(FIRM_BUILD) -lfirm -llpp -lcore -lm -lz -ldl
build/%.o: %.c
@echo '===> CC $<'
- $(Q)icc $(CPPFLAGS) $(ICC_CFLAGS) -c $< -o $@
+# $(Q)icc $(CPPFLAGS) $(ICC_CFLAGS) -c $< -o $@
$(Q)$(CC) $(CPPFLAGS) $(CFLAGS) -c $< -o $@
clean:
print_quoted_string(string_literal->value);
}
+static void print_wide_string_literal(
+ const wide_string_literal_expression_t *const wstr)
+{
+ fputs("L\"", out);
+ for (const wchar_rep_t *c = wstr->value.begin,
+ *end = c + wstr->value.size;
+ c != end; ++c) {
+ switch (*c) {
+ case L'\"': fputs("\\\"", out); break;
+ case L'\\': fputs("\\\\", out); break;
+ case L'\a': fputs("\\a", out); break;
+ case L'\b': fputs("\\b", out); break;
+ case L'\f': fputs("\\f", out); break;
+ case L'\n': fputs("\\n", out); break;
+ case L'\r': fputs("\\r", out); break;
+ case L'\t': fputs("\\t", out); break;
+ case L'\v': fputs("\\v", out); break;
+ case L'\?': fputs("\\?", out); break;
+ default: {
+ const unsigned tc = *c;
+ if (tc < 0x80U) {
+ if (!isprint(*c)) {
+ fprintf(out, "\\%03o", (char)*c);
+ } else {
+ fputc(*c, out);
+ }
+ } else if (tc < 0x800) {
+ fputc(0xC0 | (tc >> 6), out);
+ fputc(0x80 | (tc & 0x3F), out);
+ } else if (tc < 0x10000) {
+ fputc(0xE0 | ( tc >> 12), out);
+ fputc(0x80 | ((tc >> 6) & 0x3F), out);
+ fputc(0x80 | ( tc & 0x3F), out);
+ } else {
+ fputc(0xF0 | ( tc >> 18), out);
+ fputc(0x80 | ((tc >> 12) & 0x3F), out);
+ fputc(0x80 | ((tc >> 6) & 0x3F), out);
+ fputc(0x80 | ( tc & 0x3F), out);
+ }
+ }
+ }
+ }
+ fputc('"', out);
+}
+
static void print_call_expression(const call_expression_t *call)
{
print_expression(call->function);
case EXPR_STRING_LITERAL:
print_string_literal(&expression->string);
break;
+ case EXPR_WIDE_STRING_LITERAL:
+ print_wide_string_literal(&expression->wide_string);
+ break;
case EXPR_CALL:
print_call_expression(&expression->call);
break;
typedef struct context_t context_t;
-typedef struct expression_base_t expression_base_t;
-typedef struct const_expression_t const_expression_t;
-typedef struct string_literal_expression_t string_literal_expression_t;
-typedef struct reference_expression_t reference_expression_t;
-typedef struct cast_expression_t cast_expression_t;
-typedef struct call_argument_t call_argument_t;
-typedef struct type_argument_t type_argument_t;
-typedef struct call_expression_t call_expression_t;
-typedef struct binary_expression_t binary_expression_t;
-typedef struct unary_expression_t unary_expression_t;
-typedef struct select_expression_t select_expression_t;
-typedef struct array_access_expression_t array_access_expression_t;
-typedef struct sizeof_expression_t sizeof_expression_t;
-typedef struct conditional_expression_t conditional_expression_t;
-typedef struct expression_list_element_t expression_list_element_t;
-typedef struct comma_expression_t comma_expression_t;
-typedef struct statement_expression_t statement_expression_t;
-typedef struct designator_t designator_t;
-typedef struct offsetof_expression_t offsetof_expression_t;
-typedef struct va_arg_expression_t va_arg_expression_t;
-typedef struct builtin_symbol_expression_t builtin_symbol_expression_t;
-typedef struct classify_type_expression_t classify_type_expression_t;
-typedef union expression_t expression_t;
+typedef struct expression_base_t expression_base_t;
+typedef struct const_expression_t const_expression_t;
+typedef struct string_literal_expression_t string_literal_expression_t;
+typedef struct wide_string_literal_expression_t wide_string_literal_expression_t;
+typedef struct reference_expression_t reference_expression_t;
+typedef struct cast_expression_t cast_expression_t;
+typedef struct call_argument_t call_argument_t;
+typedef struct type_argument_t type_argument_t;
+typedef struct call_expression_t call_expression_t;
+typedef struct binary_expression_t binary_expression_t;
+typedef struct unary_expression_t unary_expression_t;
+typedef struct select_expression_t select_expression_t;
+typedef struct array_access_expression_t array_access_expression_t;
+typedef struct sizeof_expression_t sizeof_expression_t;
+typedef struct conditional_expression_t conditional_expression_t;
+typedef struct expression_list_element_t expression_list_element_t;
+typedef struct comma_expression_t comma_expression_t;
+typedef struct statement_expression_t statement_expression_t;
+typedef struct designator_t designator_t;
+typedef struct offsetof_expression_t offsetof_expression_t;
+typedef struct va_arg_expression_t va_arg_expression_t;
+typedef struct builtin_symbol_expression_t builtin_symbol_expression_t;
+typedef struct classify_type_expression_t classify_type_expression_t;
+typedef union expression_t expression_t;
typedef struct initializer_base_t initializer_base_t;
typedef struct initializer_list_t initializer_list_t;
#define MAGIC_DEFAULT_PN_NUMBER (long) -314159265
static ir_type *ir_type_const_char;
+static ir_type *ir_type_wchar_t;
static ir_type *ir_type_void;
static ir_type *ir_type_int;
void init_ast2firm(void)
{
- type_const_char = make_atomic_type(ATOMIC_TYPE_CHAR, TYPE_QUALIFIER_CONST);
- type_void = make_atomic_type(ATOMIC_TYPE_VOID, TYPE_QUALIFIER_NONE);
- type_int = make_atomic_type(ATOMIC_TYPE_INT, TYPE_QUALIFIER_NONE);
-
- ir_type_int = get_ir_type(type_int);
- ir_type_const_char = get_ir_type(type_const_char);
- ir_type_void = get_ir_type(type_int); /* we don't have a real void
- type in firm */
-
- type_void->base.firm_type = ir_type_void;
}
void exit_ast2firm(void)
literal->value);
}
+static ir_node *wide_string_literal_to_firm(
+ const wide_string_literal_expression_t* const literal)
+{
+ ir_type *const global_type = get_glob_type();
+ ir_type *const elem_type = ir_type_wchar_t;
+ ir_type *const type = new_type_array(unique_ident("strtype"), 1,
+ elem_type);
+
+ ident *const id = unique_ident("Lstr");
+ ir_entity *const entity = new_entity(global_type, id, type);
+ set_entity_ld_ident(entity, id);
+ set_entity_variability(entity, variability_constant);
+
+ ir_mode *const mode = get_type_mode(elem_type);
+
+ const wchar_rep_t *const string = literal->value.begin;
+ const size_t slen = literal->value.size;
+
+ set_array_lower_bound_int(type, 0, 0);
+ set_array_upper_bound_int(type, 0, slen);
+ set_type_size_bytes(type, slen);
+ set_type_state(type, layout_fixed);
+
+ tarval **const tvs = xmalloc(slen * sizeof(tvs[0]));
+ for(size_t i = 0; i < slen; ++i) {
+ tvs[i] = new_tarval_from_long(string[i], mode);
+ }
+
+ set_array_entity_values(entity, tvs, slen);
+ free(tvs);
+
+ dbg_info *const dbgi = get_dbg_info(&literal->expression.source_position);
+
+ return create_symconst(dbgi, entity);
+}
+
static ir_node *deref_address(ir_type *const irtype, ir_node *const addr,
dbg_info *const dbgi)
{
return const_to_firm(&expression->conste);
case EXPR_STRING_LITERAL:
return string_literal_to_firm(&expression->string);
+ case EXPR_WIDE_STRING_LITERAL:
+ return wide_string_literal_to_firm(&expression->wide_string);
case EXPR_REFERENCE:
return reference_expression_to_firm(&expression->reference);
case EXPR_CALL:
void translation_unit_to_firm(translation_unit_t *unit)
{
+ type_const_char = make_atomic_type(ATOMIC_TYPE_CHAR, TYPE_QUALIFIER_CONST);
+ type_void = make_atomic_type(ATOMIC_TYPE_VOID, TYPE_QUALIFIER_NONE);
+ type_int = make_atomic_type(ATOMIC_TYPE_INT, TYPE_QUALIFIER_NONE);
+
+ ir_type_int = get_ir_type(type_int);
+ ir_type_const_char = get_ir_type(type_const_char);
+ ir_type_wchar_t = get_ir_type(type_wchar_t);
+ ir_type_void = get_ir_type(type_int); /* we don't have a real void
+ type in firm */
+
+ type_void->base.firm_type = ir_type_void;
+
/* just to be sure */
continue_label = NULL;
break_label = NULL;
EXPR_REFERENCE,
EXPR_CONST,
EXPR_STRING_LITERAL,
+ EXPR_WIDE_STRING_LITERAL,
EXPR_CALL,
EXPR_UNARY,
EXPR_BINARY,
const char *value;
};
+struct wide_string_literal_expression_t {
+ expression_base_t expression;
+ wide_string_t value;
+};
+
struct builtin_symbol_expression_t {
expression_base_t expression;
symbol_t *symbol;
};
union expression_t {
- expression_type_t type;
- expression_base_t base;
- const_expression_t conste;
- string_literal_expression_t string;
- builtin_symbol_expression_t builtin_symbol;
- reference_expression_t reference;
- call_expression_t call;
- unary_expression_t unary;
- binary_expression_t binary;
- select_expression_t select;
- array_access_expression_t array_access;
- sizeof_expression_t sizeofe;
- offsetof_expression_t offsetofe;
- va_arg_expression_t va_arge;
- conditional_expression_t conditional;
- statement_expression_t statement;
- classify_type_expression_t classify_type;
+ expression_type_t type;
+ expression_base_t base;
+ const_expression_t conste;
+ string_literal_expression_t string;
+ wide_string_literal_expression_t wide_string;
+ builtin_symbol_expression_t builtin_symbol;
+ reference_expression_t reference;
+ call_expression_t call;
+ unary_expression_t unary;
+ binary_expression_t binary;
+ select_expression_t select;
+ array_access_expression_t array_access;
+ sizeof_expression_t sizeofe;
+ offsetof_expression_t offsetofe;
+ va_arg_expression_t va_arge;
+ conditional_expression_t conditional;
+ statement_expression_t statement;
+ classify_type_expression_t classify_type;
};
typedef enum {
lexer_token.v.string = result;
}
+static void parse_wide_string_literal(void)
+{
+ const unsigned start_linenr = lexer_token.source_position.linenr;
+
+ assert(c == '"');
+ next_char();
+
+ while(1) {
+ switch(c) {
+ case '\\': {
+ wchar_rep_t tc = parse_escape_sequence();
+ obstack_grow(&symbol_obstack, &tc, sizeof(tc));
+ break;
+ }
+
+ case EOF:
+ error_prefix_at(lexer_token.source_position.input_name,
+ start_linenr);
+ fprintf(stderr, "string has no end\n");
+ lexer_token.type = T_ERROR;
+ return;
+
+ case '"':
+ next_char();
+ goto end_of_string;
+
+ default: {
+ wchar_rep_t tc = c;
+ obstack_grow(&symbol_obstack, &tc, sizeof(tc));
+ next_char();
+ break;
+ }
+ }
+ }
+
+end_of_string:;
+
+ /* TODO: concatenate multiple strings separated by whitespace... */
+
+ /* add finishing 0 to the string */
+ wchar_rep_t nul = L'\0';
+ obstack_grow(&symbol_obstack, &nul, sizeof(nul));
+ const size_t size = (size_t)obstack_object_size(&symbol_obstack) / sizeof(wchar_rep_t);
+ const wchar_rep_t *const string = obstack_finish(&symbol_obstack);
+
+#if 0 /* TODO hash */
+ /* check if there is already a copy of the string */
+ const wchar_rep_t *const result = strset_insert(&stringset, string);
+ if(result != string) {
+ obstack_free(&symbol_obstack, string);
+ }
+#else
+ const wchar_rep_t *const result = string;
+#endif
+
+ lexer_token.type = T_WIDE_STRING_LITERAL;
+ lexer_token.v.wide_string.begin = result;
+ lexer_token.v.wide_string.size = size;
+}
+
static void parse_character_constant(void)
{
eat('\'');
/* might be a wide string ( L"string" ) */
if(c == '"' && (lexer_token.type == T_IDENTIFIER &&
lexer_token.v.symbol == symbol_L)) {
- parse_string_literal();
- return;
+ parse_wide_string_literal();
}
return;
type_t *type_size_t = NULL;
type_t *type_ptrdiff_t = NULL;
type_t *type_wchar_t = NULL;
-type_t *type_wchar_ptr_t = NULL;
+type_t *type_wchar_t_ptr = NULL;
static statement_t *parse_compound_statement(void);
static statement_t *parse_statement(void);
static size_t get_expression_struct_size(expression_type_t type)
{
static const size_t sizes[] = {
- [EXPR_INVALID] = sizeof(expression_base_t),
- [EXPR_REFERENCE] = sizeof(reference_expression_t),
- [EXPR_CONST] = sizeof(const_expression_t),
- [EXPR_STRING_LITERAL] = sizeof(string_literal_expression_t),
- [EXPR_CALL] = sizeof(call_expression_t),
- [EXPR_UNARY] = sizeof(unary_expression_t),
- [EXPR_BINARY] = sizeof(binary_expression_t),
- [EXPR_CONDITIONAL] = sizeof(conditional_expression_t),
- [EXPR_SELECT] = sizeof(select_expression_t),
- [EXPR_ARRAY_ACCESS] = sizeof(array_access_expression_t),
- [EXPR_SIZEOF] = sizeof(sizeof_expression_t),
- [EXPR_CLASSIFY_TYPE] = sizeof(classify_type_expression_t),
- [EXPR_FUNCTION] = sizeof(string_literal_expression_t),
- [EXPR_PRETTY_FUNCTION] = sizeof(string_literal_expression_t),
- [EXPR_BUILTIN_SYMBOL] = sizeof(builtin_symbol_expression_t),
- [EXPR_OFFSETOF] = sizeof(offsetof_expression_t),
- [EXPR_VA_ARG] = sizeof(va_arg_expression_t),
- [EXPR_STATEMENT] = sizeof(statement_expression_t)
+ [EXPR_INVALID] = sizeof(expression_base_t),
+ [EXPR_REFERENCE] = sizeof(reference_expression_t),
+ [EXPR_CONST] = sizeof(const_expression_t),
+ [EXPR_STRING_LITERAL] = sizeof(string_literal_expression_t),
+ [EXPR_WIDE_STRING_LITERAL] = sizeof(wide_string_literal_expression_t),
+ [EXPR_CALL] = sizeof(call_expression_t),
+ [EXPR_UNARY] = sizeof(unary_expression_t),
+ [EXPR_BINARY] = sizeof(binary_expression_t),
+ [EXPR_CONDITIONAL] = sizeof(conditional_expression_t),
+ [EXPR_SELECT] = sizeof(select_expression_t),
+ [EXPR_ARRAY_ACCESS] = sizeof(array_access_expression_t),
+ [EXPR_SIZEOF] = sizeof(sizeof_expression_t),
+ [EXPR_CLASSIFY_TYPE] = sizeof(classify_type_expression_t),
+ [EXPR_FUNCTION] = sizeof(string_literal_expression_t),
+ [EXPR_PRETTY_FUNCTION] = sizeof(string_literal_expression_t),
+ [EXPR_BUILTIN_SYMBOL] = sizeof(builtin_symbol_expression_t),
+ [EXPR_OFFSETOF] = sizeof(offsetof_expression_t),
+ [EXPR_VA_ARG] = sizeof(va_arg_expression_t),
+ [EXPR_STATEMENT] = sizeof(statement_expression_t)
};
assert(sizeof(sizes) / sizeof(sizes[0]) == EXPR_STATEMENT + 1);
assert(type <= EXPR_STATEMENT);
return cnst;
}
+static expression_t *parse_wide_string_const(void)
+{
+ expression_t *const cnst = allocate_expression_zero(EXPR_WIDE_STRING_LITERAL);
+ cnst->base.datatype = type_wchar_t_ptr;
+ cnst->wide_string.value = token.v.wide_string; /* TODO concatenate */
+ next_token();
+ return cnst;
+}
+
static expression_t *parse_int_const(void)
{
expression_t *cnst = allocate_expression_zero(EXPR_CONST);
return parse_int_const();
case T_FLOATINGPOINT:
return parse_float_const();
- case T_STRING_LITERAL:
+ case T_STRING_LITERAL: /* TODO merge */
return parse_string_const();
+ case T_WIDE_STRING_LITERAL:
+ return parse_wide_string_const();
case T_IDENTIFIER:
return parse_reference();
case T___FUNCTION__:
static void initialize_builtins(void)
{
type_wchar_t = make_global_typedef("__WCHAR_TYPE__", type_int);
- type_wchar_ptr_t = make_pointer_type(type_wchar_t, TYPE_QUALIFIER_NONE);
+ type_wchar_t_ptr = make_pointer_type(type_wchar_t, TYPE_QUALIFIER_NONE);
type_size_t = make_global_typedef("__SIZE_TYPE__",
make_atomic_type(ATOMIC_TYPE_ULONG, TYPE_QUALIFIER_NONE));
type_ptrdiff_t = make_global_typedef("__PTRDIFF_TYPE__",
#define TOKEN_T_H
#include <stdio.h>
+#include "string_rep.h"
#include "symbol.h"
#include "symbol_table.h"
#include "type.h"
typedef struct {
int type;
union {
- symbol_t *symbol;
- long long intvalue;
- long double floatvalue;
- const char *string;
+ symbol_t *symbol;
+ long long intvalue;
+ long double floatvalue;
+ const char *string;
+ wide_string_t wide_string;
} v;
type_t *datatype;
source_position_t source_position;
#define TS(x,str,val)
#endif
-TS(IDENTIFIER, "identifier", = 256)
-TS(INTEGER, "integer number",)
-TS(FLOATINGPOINT, "floatingpoint number",)
-TS(STRING_LITERAL, "string literal",)
+TS(IDENTIFIER, "identifier", = 256)
+TS(INTEGER, "integer number",)
+TS(FLOATINGPOINT, "floatingpoint number",)
+TS(STRING_LITERAL, "string literal",)
+TS(WIDE_STRING_LITERAL, "wide string literal",)
#define S(x) T(x,#x,)
S(auto)