From: Matthias Braun Date: Wed, 24 Feb 2010 15:06:07 +0000 (+0000) Subject: - Rework the way literals are handled, these are now kept as strings until X-Git-Url: http://nsz.repo.hu/git/?a=commitdiff_plain;h=2beaa4f65961fe297663e1cec9e5632b7f3e1cba;hp=373f63fc99fa81faf78910353564fda6ce44d7c1;p=cparser - Rework the way literals are handled, these are now kept as strings until codegeneration - Don't work with wide-strings inside the compiler but always use utf-8 encoded "normal" strings. This simplifies some places (like the printf format checker) and avoids code duplication. [r27212] --- diff --git a/ast.c b/ast.c index 5dc9b4c..fa3cd71 100644 --- a/ast.c +++ b/ast.c @@ -26,6 +26,7 @@ #include "lang_features.h" #include "entity_t.h" #include "printer.h" +#include "types.h" #include #include @@ -68,6 +69,18 @@ void print_indent(void) print_string("\t"); } +static void print_symbol(const symbol_t *symbol) +{ + print_string(symbol->string); +} + +static void print_stringrep(const string_t *string) +{ + for (size_t i = 0; i < string->size; ++i) { + print_char(string->begin[i]); + } +} + /** * Returns 1 if a given precedence level has right-to-left * associativity, else 0. @@ -95,88 +108,93 @@ static int right_to_left(unsigned precedence) static unsigned get_expression_precedence(expression_kind_t kind) { static const unsigned prec[] = { - [EXPR_UNKNOWN] = PREC_PRIMARY, - [EXPR_INVALID] = PREC_PRIMARY, - [EXPR_REFERENCE] = PREC_PRIMARY, - [EXPR_REFERENCE_ENUM_VALUE] = PREC_PRIMARY, - [EXPR_CHARACTER_CONSTANT] = PREC_PRIMARY, - [EXPR_WIDE_CHARACTER_CONSTANT] = PREC_PRIMARY, - [EXPR_CONST] = PREC_PRIMARY, - [EXPR_STRING_LITERAL] = PREC_PRIMARY, - [EXPR_WIDE_STRING_LITERAL] = PREC_PRIMARY, - [EXPR_COMPOUND_LITERAL] = PREC_UNARY, - [EXPR_CALL] = PREC_POSTFIX, - [EXPR_CONDITIONAL] = PREC_CONDITIONAL, - [EXPR_SELECT] = PREC_POSTFIX, - [EXPR_ARRAY_ACCESS] = PREC_POSTFIX, - [EXPR_SIZEOF] = PREC_UNARY, - [EXPR_CLASSIFY_TYPE] = PREC_UNARY, - [EXPR_ALIGNOF] = PREC_UNARY, - - [EXPR_FUNCNAME] = PREC_PRIMARY, - [EXPR_BUILTIN_CONSTANT_P] = PREC_PRIMARY, - [EXPR_BUILTIN_TYPES_COMPATIBLE_P] = PREC_PRIMARY, - [EXPR_OFFSETOF] = PREC_PRIMARY, - [EXPR_VA_START] = PREC_PRIMARY, - [EXPR_VA_ARG] = PREC_PRIMARY, - [EXPR_VA_COPY] = PREC_PRIMARY, - [EXPR_STATEMENT] = PREC_PRIMARY, - [EXPR_LABEL_ADDRESS] = PREC_PRIMARY, - - [EXPR_UNARY_NEGATE] = PREC_UNARY, - [EXPR_UNARY_PLUS] = PREC_UNARY, - [EXPR_UNARY_BITWISE_NEGATE] = PREC_UNARY, - [EXPR_UNARY_NOT] = PREC_UNARY, - [EXPR_UNARY_DEREFERENCE] = PREC_UNARY, - [EXPR_UNARY_TAKE_ADDRESS] = PREC_UNARY, - [EXPR_UNARY_POSTFIX_INCREMENT] = PREC_POSTFIX, - [EXPR_UNARY_POSTFIX_DECREMENT] = PREC_POSTFIX, - [EXPR_UNARY_PREFIX_INCREMENT] = PREC_UNARY, - [EXPR_UNARY_PREFIX_DECREMENT] = PREC_UNARY, - [EXPR_UNARY_CAST] = PREC_UNARY, - [EXPR_UNARY_CAST_IMPLICIT] = PREC_UNARY, - [EXPR_UNARY_ASSUME] = PREC_PRIMARY, - [EXPR_UNARY_DELETE] = PREC_UNARY, - [EXPR_UNARY_DELETE_ARRAY] = PREC_UNARY, - [EXPR_UNARY_THROW] = PREC_ASSIGNMENT, - - [EXPR_BINARY_ADD] = PREC_ADDITIVE, - [EXPR_BINARY_SUB] = PREC_ADDITIVE, - [EXPR_BINARY_MUL] = PREC_MULTIPLICATIVE, - [EXPR_BINARY_DIV] = PREC_MULTIPLICATIVE, - [EXPR_BINARY_MOD] = PREC_MULTIPLICATIVE, - [EXPR_BINARY_EQUAL] = PREC_EQUALITY, - [EXPR_BINARY_NOTEQUAL] = PREC_EQUALITY, - [EXPR_BINARY_LESS] = PREC_RELATIONAL, - [EXPR_BINARY_LESSEQUAL] = PREC_RELATIONAL, - [EXPR_BINARY_GREATER] = PREC_RELATIONAL, - [EXPR_BINARY_GREATEREQUAL] = PREC_RELATIONAL, - [EXPR_BINARY_BITWISE_AND] = PREC_AND, - [EXPR_BINARY_BITWISE_OR] = PREC_OR, - [EXPR_BINARY_BITWISE_XOR] = PREC_XOR, - [EXPR_BINARY_LOGICAL_AND] = PREC_LOGICAL_AND, - [EXPR_BINARY_LOGICAL_OR] = PREC_LOGICAL_OR, - [EXPR_BINARY_SHIFTLEFT] = PREC_SHIFT, - [EXPR_BINARY_SHIFTRIGHT] = PREC_SHIFT, - [EXPR_BINARY_ASSIGN] = PREC_ASSIGNMENT, - [EXPR_BINARY_MUL_ASSIGN] = PREC_ASSIGNMENT, - [EXPR_BINARY_DIV_ASSIGN] = PREC_ASSIGNMENT, - [EXPR_BINARY_MOD_ASSIGN] = PREC_ASSIGNMENT, - [EXPR_BINARY_ADD_ASSIGN] = PREC_ASSIGNMENT, - [EXPR_BINARY_SUB_ASSIGN] = PREC_ASSIGNMENT, - [EXPR_BINARY_SHIFTLEFT_ASSIGN] = PREC_ASSIGNMENT, - [EXPR_BINARY_SHIFTRIGHT_ASSIGN] = PREC_ASSIGNMENT, - [EXPR_BINARY_BITWISE_AND_ASSIGN] = PREC_ASSIGNMENT, - [EXPR_BINARY_BITWISE_XOR_ASSIGN] = PREC_ASSIGNMENT, - [EXPR_BINARY_BITWISE_OR_ASSIGN] = PREC_ASSIGNMENT, - [EXPR_BINARY_COMMA] = PREC_EXPRESSION, - - [EXPR_BINARY_ISGREATER] = PREC_PRIMARY, - [EXPR_BINARY_ISGREATEREQUAL] = PREC_PRIMARY, - [EXPR_BINARY_ISLESS] = PREC_PRIMARY, - [EXPR_BINARY_ISLESSEQUAL] = PREC_PRIMARY, - [EXPR_BINARY_ISLESSGREATER] = PREC_PRIMARY, - [EXPR_BINARY_ISUNORDERED] = PREC_PRIMARY + [EXPR_UNKNOWN] = PREC_PRIMARY, + [EXPR_INVALID] = PREC_PRIMARY, + [EXPR_REFERENCE] = PREC_PRIMARY, + [EXPR_REFERENCE_ENUM_VALUE] = PREC_PRIMARY, + [EXPR_LITERAL_INTEGER] = PREC_PRIMARY, + [EXPR_LITERAL_INTEGER_OCTAL] = PREC_PRIMARY, + [EXPR_LITERAL_INTEGER_HEXADECIMAL] = PREC_PRIMARY, + [EXPR_LITERAL_FLOATINGPOINT] = PREC_PRIMARY, + [EXPR_LITERAL_FLOATINGPOINT_HEXADECIMAL] = PREC_PRIMARY, + [EXPR_LITERAL_CHARACTER] = PREC_PRIMARY, + [EXPR_LITERAL_WIDE_CHARACTER] = PREC_PRIMARY, + [EXPR_LITERAL_MS_NOOP] = PREC_PRIMARY, + [EXPR_STRING_LITERAL] = PREC_PRIMARY, + [EXPR_WIDE_STRING_LITERAL] = PREC_PRIMARY, + [EXPR_COMPOUND_LITERAL] = PREC_UNARY, + [EXPR_CALL] = PREC_POSTFIX, + [EXPR_CONDITIONAL] = PREC_CONDITIONAL, + [EXPR_SELECT] = PREC_POSTFIX, + [EXPR_ARRAY_ACCESS] = PREC_POSTFIX, + [EXPR_SIZEOF] = PREC_UNARY, + [EXPR_CLASSIFY_TYPE] = PREC_UNARY, + [EXPR_ALIGNOF] = PREC_UNARY, + + [EXPR_FUNCNAME] = PREC_PRIMARY, + [EXPR_BUILTIN_CONSTANT_P] = PREC_PRIMARY, + [EXPR_BUILTIN_TYPES_COMPATIBLE_P] = PREC_PRIMARY, + [EXPR_OFFSETOF] = PREC_PRIMARY, + [EXPR_VA_START] = PREC_PRIMARY, + [EXPR_VA_ARG] = PREC_PRIMARY, + [EXPR_VA_COPY] = PREC_PRIMARY, + [EXPR_STATEMENT] = PREC_PRIMARY, + [EXPR_LABEL_ADDRESS] = PREC_PRIMARY, + + [EXPR_UNARY_NEGATE] = PREC_UNARY, + [EXPR_UNARY_PLUS] = PREC_UNARY, + [EXPR_UNARY_BITWISE_NEGATE] = PREC_UNARY, + [EXPR_UNARY_NOT] = PREC_UNARY, + [EXPR_UNARY_DEREFERENCE] = PREC_UNARY, + [EXPR_UNARY_TAKE_ADDRESS] = PREC_UNARY, + [EXPR_UNARY_POSTFIX_INCREMENT] = PREC_POSTFIX, + [EXPR_UNARY_POSTFIX_DECREMENT] = PREC_POSTFIX, + [EXPR_UNARY_PREFIX_INCREMENT] = PREC_UNARY, + [EXPR_UNARY_PREFIX_DECREMENT] = PREC_UNARY, + [EXPR_UNARY_CAST] = PREC_UNARY, + [EXPR_UNARY_CAST_IMPLICIT] = PREC_UNARY, + [EXPR_UNARY_ASSUME] = PREC_PRIMARY, + [EXPR_UNARY_DELETE] = PREC_UNARY, + [EXPR_UNARY_DELETE_ARRAY] = PREC_UNARY, + [EXPR_UNARY_THROW] = PREC_ASSIGNMENT, + + [EXPR_BINARY_ADD] = PREC_ADDITIVE, + [EXPR_BINARY_SUB] = PREC_ADDITIVE, + [EXPR_BINARY_MUL] = PREC_MULTIPLICATIVE, + [EXPR_BINARY_DIV] = PREC_MULTIPLICATIVE, + [EXPR_BINARY_MOD] = PREC_MULTIPLICATIVE, + [EXPR_BINARY_EQUAL] = PREC_EQUALITY, + [EXPR_BINARY_NOTEQUAL] = PREC_EQUALITY, + [EXPR_BINARY_LESS] = PREC_RELATIONAL, + [EXPR_BINARY_LESSEQUAL] = PREC_RELATIONAL, + [EXPR_BINARY_GREATER] = PREC_RELATIONAL, + [EXPR_BINARY_GREATEREQUAL] = PREC_RELATIONAL, + [EXPR_BINARY_BITWISE_AND] = PREC_AND, + [EXPR_BINARY_BITWISE_OR] = PREC_OR, + [EXPR_BINARY_BITWISE_XOR] = PREC_XOR, + [EXPR_BINARY_LOGICAL_AND] = PREC_LOGICAL_AND, + [EXPR_BINARY_LOGICAL_OR] = PREC_LOGICAL_OR, + [EXPR_BINARY_SHIFTLEFT] = PREC_SHIFT, + [EXPR_BINARY_SHIFTRIGHT] = PREC_SHIFT, + [EXPR_BINARY_ASSIGN] = PREC_ASSIGNMENT, + [EXPR_BINARY_MUL_ASSIGN] = PREC_ASSIGNMENT, + [EXPR_BINARY_DIV_ASSIGN] = PREC_ASSIGNMENT, + [EXPR_BINARY_MOD_ASSIGN] = PREC_ASSIGNMENT, + [EXPR_BINARY_ADD_ASSIGN] = PREC_ASSIGNMENT, + [EXPR_BINARY_SUB_ASSIGN] = PREC_ASSIGNMENT, + [EXPR_BINARY_SHIFTLEFT_ASSIGN] = PREC_ASSIGNMENT, + [EXPR_BINARY_SHIFTRIGHT_ASSIGN] = PREC_ASSIGNMENT, + [EXPR_BINARY_BITWISE_AND_ASSIGN] = PREC_ASSIGNMENT, + [EXPR_BINARY_BITWISE_XOR_ASSIGN] = PREC_ASSIGNMENT, + [EXPR_BINARY_BITWISE_OR_ASSIGN] = PREC_ASSIGNMENT, + [EXPR_BINARY_COMMA] = PREC_EXPRESSION, + + [EXPR_BINARY_ISGREATER] = PREC_PRIMARY, + [EXPR_BINARY_ISGREATEREQUAL] = PREC_PRIMARY, + [EXPR_BINARY_ISLESS] = PREC_PRIMARY, + [EXPR_BINARY_ISLESSEQUAL] = PREC_PRIMARY, + [EXPR_BINARY_ISLESSGREATER] = PREC_PRIMARY, + [EXPR_BINARY_ISUNORDERED] = PREC_PRIMARY }; assert((size_t)kind < lengthof(prec)); unsigned res = prec[kind]; @@ -185,51 +203,6 @@ static unsigned get_expression_precedence(expression_kind_t kind) return res; } -/** - * Print a constant expression. - * - * @param cnst the constant expression - */ -static void print_const(const const_expression_t *cnst) -{ - if (cnst->base.type == NULL) - return; - - const type_t *const type = skip_typeref(cnst->base.type); - - if (is_type_atomic(type, ATOMIC_TYPE_BOOL)) { - print_string(cnst->v.int_value ? "true" : "false"); - } else if (is_type_integer(type)) { - print_format("%lld", cnst->v.int_value); - } else if (is_type_float(type)) { - long double const val = cnst->v.float_value; -#ifdef _WIN32 - /* ARG, no way to print long double */ - print_format("%.20g", (double)val); -#else - print_format("%.20Lg", val); -#endif - if (isfinite(val) && truncl(val) == val) - print_string(".0"); - } else { - panic("unknown constant"); - } - - char const* suffix; - switch (type->atomic.akind) { - case ATOMIC_TYPE_UINT: suffix = "U"; break; - case ATOMIC_TYPE_LONG: suffix = "L"; break; - case ATOMIC_TYPE_ULONG: suffix = "UL"; break; - case ATOMIC_TYPE_LONGLONG: suffix = "LL"; break; - case ATOMIC_TYPE_ULONGLONG: suffix = "ULL"; break; - case ATOMIC_TYPE_FLOAT: suffix = "F"; break; - case ATOMIC_TYPE_LONG_DOUBLE: suffix = "L"; break; - - default: return; - } - print_string(suffix); -} - /** * Print a quoted string constant. * @@ -237,7 +210,8 @@ static void print_const(const const_expression_t *cnst) * @param border the border char * @param skip number of chars to skip at the end */ -static void print_quoted_string(const string_t *const string, char border, int skip) +static void print_quoted_string(const string_t *const string, char border, + int skip) { print_char(border); const char *end = string->begin + string->size - skip; @@ -247,15 +221,15 @@ static void print_quoted_string(const string_t *const string, char border, int s print_string("\\"); } switch (tc) { - case '\\': print_string("\\\\"); break; - case '\a': print_string("\\a"); break; - case '\b': print_string("\\b"); break; - case '\f': print_string("\\f"); break; - case '\n': print_string("\\n"); break; - case '\r': print_string("\\r"); break; - case '\t': print_string("\\t"); break; - case '\v': print_string("\\v"); break; - case '\?': print_string("\\?"); break; + case '\\': print_string("\\\\"); break; + case '\a': print_string("\\a"); break; + case '\b': print_string("\\b"); break; + case '\f': print_string("\\f"); break; + case '\n': print_string("\\n"); break; + case '\r': print_string("\\r"); break; + case '\t': print_string("\\t"); break; + case '\v': print_string("\\v"); break; + case '\?': print_string("\\?"); break; case 27: if (c_mode & _GNUC) { print_string("\\e"); break; @@ -273,77 +247,42 @@ static void print_quoted_string(const string_t *const string, char border, int s print_char(border); } -/** - * Prints a wide string literal expression. - * - * @param wstr the wide string literal expression - * @param border the border char - * @param skip number of chars to skip at the end - */ -static void print_quoted_wide_string(const wide_string_t *const wstr, - char border, int skip) +static void print_string_literal(const string_literal_expression_t *literal) { - print_string("L"); - print_char(border); - const wchar_rep_t *end = wstr->begin + wstr->size - skip; - for (const wchar_rep_t *c = wstr->begin; c != end; ++c) { - switch (*c) { - case L'\"': print_string("\\\""); break; - case L'\\': print_string("\\\\"); break; - case L'\a': print_string("\\a"); break; - case L'\b': print_string("\\b"); break; - case L'\f': print_string("\\f"); break; - case L'\n': print_string("\\n"); break; - case L'\r': print_string("\\r"); break; - case L'\t': print_string("\\t"); break; - case L'\v': print_string("\\v"); break; - case L'\?': print_string("\\?"); break; - case 27: - if (c_mode & _GNUC) { - print_string("\\e"); break; - } - /* FALLTHROUGH */ - default: { - const unsigned tc = *c; - if (tc < 0x80U) { - if (isprint(*c)) { - print_char(*c); - } else { - print_format("\\%03o", tc); - } - } else { - print_char(tc); - } - } - } + if (literal->base.kind == EXPR_WIDE_STRING_LITERAL) { + print_char('L'); } - print_char(border); + print_quoted_string(&literal->value, '"', 1); } -/** - * Print a constant character expression. - * - * @param cnst the constant character expression - */ -static void print_character_constant(const const_expression_t *cnst) -{ - print_quoted_string(&cnst->v.character, '\'', 0); -} - -static void print_wide_character_constant(const const_expression_t *cnst) +static void print_literal(const literal_expression_t *literal) { - print_quoted_wide_string(&cnst->v.wide_character, '\'', 0); -} - -/** - * Prints a string literal expression. - * - * @param string_literal the string literal expression - */ -static void print_string_literal( - const string_literal_expression_t *string_literal) -{ - print_quoted_string(&string_literal->value, '"', 1); + switch (literal->base.kind) { + case EXPR_LITERAL_MS_NOOP: + print_string("__noop"); + return; + case EXPR_LITERAL_INTEGER_HEXADECIMAL: + case EXPR_LITERAL_FLOATINGPOINT_HEXADECIMAL: + print_string("0x"); + /* FALLTHROUGH */ + case EXPR_LITERAL_BOOLEAN: + case EXPR_LITERAL_INTEGER: + case EXPR_LITERAL_INTEGER_OCTAL: + case EXPR_LITERAL_FLOATINGPOINT: + print_stringrep(&literal->value); + if (literal->suffix != NULL) + print_symbol(literal->suffix); + return; + case EXPR_LITERAL_WIDE_CHARACTER: + print_char('L'); + /* FALLTHROUGH */ + case EXPR_LITERAL_CHARACTER: + print_quoted_string(&literal->value, '\'', 0); + return; + default: + break; + } + print_string("INVALID LITERAL KIND"); } /** @@ -361,12 +300,6 @@ static void print_funcname(const funcname_expression_t *funcname) print_string(s); } -static void print_wide_string_literal( - const wide_string_literal_expression_t *const wstr) -{ - print_quoted_wide_string(&wstr->value, '"', 1); -} - static void print_compound_literal( const compound_literal_expression_t *expression) { @@ -758,24 +691,16 @@ static void print_expression_prec(const expression_t *expression, unsigned top_p case EXPR_INVALID: print_string("$invalid expression$"); break; - case EXPR_CHARACTER_CONSTANT: - print_character_constant(&expression->conste); - break; - case EXPR_WIDE_CHARACTER_CONSTANT: - print_wide_character_constant(&expression->conste); + case EXPR_WIDE_STRING_LITERAL: + case EXPR_STRING_LITERAL: + print_string_literal(&expression->string_literal); break; - case EXPR_CONST: - print_const(&expression->conste); + EXPR_LITERAL_CASES + print_literal(&expression->literal); break; case EXPR_FUNCNAME: print_funcname(&expression->funcname); break; - case EXPR_STRING_LITERAL: - print_string_literal(&expression->string); - break; - case EXPR_WIDE_STRING_LITERAL: - print_wide_string_literal(&expression->wide_string); - break; case EXPR_COMPOUND_LITERAL: print_compound_literal(&expression->compound_literal); break; @@ -833,10 +758,12 @@ static void print_expression_prec(const expression_t *expression, unsigned top_p print_statement_expression(&expression->statement); break; +#if 0 default: /* TODO */ print_format("some expression of type %d", (int)expression->kind); break; +#endif } if (parenthesized) print_string(")"); @@ -1328,7 +1255,7 @@ void print_initializer(const initializer_t *initializer) print_quoted_string(&initializer->string.string, '"', 1); return; case INITIALIZER_WIDE_STRING: - print_quoted_wide_string(&initializer->wide_string.string, '"', 1); + print_quoted_string(&initializer->string.string, '"', 1); return; case INITIALIZER_DESIGNATOR: print_designator(initializer->designator.designator); @@ -1851,10 +1778,7 @@ static bool is_object_with_constant_address(const expression_t *expression) bool is_constant_expression(const expression_t *expression) { switch (expression->kind) { - - case EXPR_CONST: - case EXPR_CHARACTER_CONSTANT: - case EXPR_WIDE_CHARACTER_CONSTANT: + EXPR_LITERAL_CASES case EXPR_CLASSIFY_TYPE: case EXPR_OFFSETOF: case EXPR_ALIGNOF: diff --git a/ast.h b/ast.h index 9d8c9b7..0f70f3b 100644 --- a/ast.h +++ b/ast.h @@ -25,10 +25,9 @@ #include "entity.h" typedef struct expression_base_t expression_base_t; -typedef struct const_expression_t const_expression_t; +typedef struct literal_expression_t literal_expression_t; typedef struct string_literal_expression_t string_literal_expression_t; typedef struct funcname_expression_t funcname_expression_t; -typedef struct wide_string_literal_expression_t wide_string_literal_expression_t; typedef struct compound_literal_expression_t compound_literal_expression_t; typedef struct reference_expression_t reference_expression_t; typedef struct cast_expression_t cast_expression_t; @@ -125,4 +124,11 @@ bool is_address_constant(const expression_t *expression); long fold_constant_to_int(const expression_t *expression); bool fold_constant_to_bool(const expression_t *expression); +/** + * the type of a literal is usually the biggest type that can hold the value. + * Since this is backend dependent the parses needs this call exposed. + * Works for EXPR_LITERAL_* expressions. + */ +void determine_literal_type(literal_expression_t *literal); + #endif diff --git a/ast2firm.c b/ast2firm.c index 5e7f363..47430b5 100644 --- a/ast2firm.c +++ b/ast2firm.c @@ -1043,6 +1043,22 @@ entity_created: return irentity; } +/** + * Creates a SymConst for a given entity. + * + * @param dbgi debug info + * @param mode the (reference) mode for the SymConst + * @param entity the entity + */ +static ir_node *create_symconst(dbg_info *dbgi, ir_mode *mode, + ir_entity *entity) +{ + assert(entity != NULL); + union symconst_symbol sym; + sym.entity_p = entity; + return new_d_SymConst(dbgi, mode, sym, symconst_addr_ent); +} + static ir_node *create_conv(dbg_info *dbgi, ir_node *value, ir_mode *dest_mode) { ir_mode *value_mode = get_irn_mode(value); @@ -1061,74 +1077,251 @@ static ir_node *create_conv(dbg_info *dbgi, ir_node *value, ir_mode *dest_mode) } /** - * Creates a Const node representing a constant. + * Creates a SymConst node representing a wide string literal. + * + * @param literal the wide string literal */ -static ir_node *const_to_firm(const const_expression_t *cnst) +static ir_node *wide_string_literal_to_firm( + const string_literal_expression_t *literal) { - dbg_info *dbgi = get_dbg_info(&cnst->base.source_position); - type_t *type = skip_typeref(cnst->base.type); - ir_mode *mode = get_ir_mode_storage(type); + ir_type *const global_type = get_glob_type(); + ir_type *const elem_type = ir_type_wchar_t; + dbg_info *const dbgi = get_dbg_info(&literal->base.source_position); + ir_type *const type = new_type_array(1, elem_type); - char buf[128]; - tarval *tv; - size_t len; - if (mode_is_float(mode)) { - tv = new_tarval_from_double(cnst->v.float_value, mode); - } else { - if (mode_is_signed(mode)) { - len = snprintf(buf, sizeof(buf), "%lld", cnst->v.int_value); - } else { - len = snprintf(buf, sizeof(buf), "%llu", - (unsigned long long) cnst->v.int_value); - } - tv = new_tarval_from_str(buf, len, mode); + ident *const id = id_unique("str.%u"); + ir_entity *const entity = new_d_entity(global_type, id, type, dbgi); + set_entity_ld_ident(entity, id); + set_entity_visibility(entity, ir_visibility_private); + add_entity_linkage(entity, IR_LINKAGE_CONSTANT); + + ir_mode *const mode = get_type_mode(elem_type); + const size_t slen = wstrlen(&literal->value); + + set_array_lower_bound_int(type, 0, 0); + set_array_upper_bound_int(type, 0, slen); + set_type_size_bytes(type, slen * get_mode_size_bytes(mode)); + set_type_state(type, layout_fixed); + + ir_initializer_t *initializer = create_initializer_compound(slen); + const char *p = literal->value.begin; + for (size_t i = 0; i < slen; ++i) { + assert(p < literal->value.begin + literal->value.size); + utf32 v = read_utf8_char(&p); + tarval *tv = new_tarval_from_long(v, mode); + ir_initializer_t *val = create_initializer_tarval(tv); + set_initializer_compound_value(initializer, i, val); } + set_entity_initializer(entity, initializer); - ir_node *res = new_d_Const(dbgi, tv); - ir_mode *mode_arith = get_ir_mode_arithmetic(type); - return create_conv(dbgi, res, mode_arith); + return create_symconst(dbgi, mode_P_data, entity); } /** - * Creates a Const node representing a character constant. + * Creates a SymConst node representing a string constant. + * + * @param src_pos the source position of the string constant + * @param id_prefix a prefix for the name of the generated string constant + * @param value the value of the string constant */ -static ir_node *character_constant_to_firm(const const_expression_t *cnst) +static ir_node *string_to_firm(const source_position_t *const src_pos, + const char *const id_prefix, + const string_t *const value) { - dbg_info *dbgi = get_dbg_info(&cnst->base.source_position); - ir_mode *mode = get_ir_mode_arithmetic(cnst->base.type); + ir_type *const global_type = get_glob_type(); + dbg_info *const dbgi = get_dbg_info(src_pos); + ir_type *const type = new_type_array(1, ir_type_const_char); - long long int v; - size_t const size = cnst->v.character.size; - if (size == 1 && char_is_signed) { - v = (signed char)cnst->v.character.begin[0]; - } else { - v = 0; - for (size_t i = 0; i < size; ++i) { - v = (v << 8) | ((unsigned char)cnst->v.character.begin[i]); + ident *const id = id_unique(id_prefix); + ir_entity *const entity = new_d_entity(global_type, id, type, dbgi); + set_entity_ld_ident(entity, id); + set_entity_visibility(entity, ir_visibility_private); + add_entity_linkage(entity, IR_LINKAGE_CONSTANT); + + ir_type *const elem_type = ir_type_const_char; + ir_mode *const mode = get_type_mode(elem_type); + + const char* const string = value->begin; + const size_t slen = value->size; + + set_array_lower_bound_int(type, 0, 0); + set_array_upper_bound_int(type, 0, slen); + set_type_size_bytes(type, slen); + set_type_state(type, layout_fixed); + + ir_initializer_t *initializer = create_initializer_compound(slen); + for (size_t i = 0; i < slen; ++i) { + tarval *tv = new_tarval_from_long(string[i], mode); + ir_initializer_t *val = create_initializer_tarval(tv); + set_initializer_compound_value(initializer, i, val); + } + set_entity_initializer(entity, initializer); + + return create_symconst(dbgi, mode_P_data, entity); +} + +static bool try_create_integer(literal_expression_t *literal, + type_t *type, unsigned char base) +{ + const char *string = literal->value.begin; + size_t size = literal->value.size; + + assert(type->kind == TYPE_ATOMIC); + atomic_type_kind_t akind = type->atomic.akind; + + ir_mode *mode = atomic_modes[akind]; + tarval *tv = new_integer_tarval_from_str(string, size, 1, base, mode); + if (tv == tarval_bad) + return false; + + literal->base.type = type; + literal->target_value = tv; + return true; +} + +static void create_integer_tarval(literal_expression_t *literal) +{ + unsigned us = 0; + unsigned ls = 0; + symbol_t *suffix = literal->suffix; + /* parse suffix */ + if (suffix != NULL) { + for (const char *c = suffix->string; *c != '\0'; ++c) { + if (*c == 'u' || *c == 'U') { ++us; } + if (*c == 'l' || *c == 'L') { ++ls; } } } - char buf[128]; - size_t len = snprintf(buf, sizeof(buf), "%lld", v); - tarval *tv = new_tarval_from_str(buf, len, mode); - return new_d_Const(dbgi, tv); + unsigned char base = 10; + if (literal->base.kind == EXPR_LITERAL_INTEGER_OCTAL) { + base = 8; + } else if (literal->base.kind == EXPR_LITERAL_INTEGER_HEXADECIMAL) { + base = 16; + } else { + assert(literal->base.kind == EXPR_LITERAL_INTEGER); + } + + tarval_int_overflow_mode_t old_mode = tarval_get_integer_overflow_mode(); + + /* now try if the constant is small enough for some types */ + tarval_set_integer_overflow_mode(TV_OVERFLOW_BAD); + if (ls < 1) { + if (us == 0 && try_create_integer(literal, type_int, base)) + goto finished; + if ((us == 1 || base != 10) + && try_create_integer(literal, type_unsigned_int, base)) + goto finished; + } + if (ls < 2) { + if (us == 0 && try_create_integer(literal, type_long, base)) + goto finished; + if ((us == 1 || base != 10) + && try_create_integer(literal, type_unsigned_long, base)) + goto finished; + } + /* last try? then we should not report tarval_bad */ + if (us != 1 && base == 10) + tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP); + if (us == 0 && try_create_integer(literal, type_long_long, base)) + goto finished; + + /* last try */ + assert(us == 1 || base != 10); + tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP); + bool res = try_create_integer(literal, type_unsigned_long_long, base); + if (res == false) + panic("internal error when parsing number literal"); + +finished: + tarval_set_integer_overflow_mode(old_mode); +} + +void determine_literal_type(literal_expression_t *literal) +{ + switch (literal->base.kind) { + case EXPR_LITERAL_INTEGER: + case EXPR_LITERAL_INTEGER_OCTAL: + case EXPR_LITERAL_INTEGER_HEXADECIMAL: + create_integer_tarval(literal); + return; + default: + break; + } } /** - * Creates a Const node representing a wide character constant. + * Creates a Const node representing a constant. */ -static ir_node *wide_character_constant_to_firm(const const_expression_t *cnst) +static ir_node *literal_to_firm(const literal_expression_t *literal) { - dbg_info *dbgi = get_dbg_info(&cnst->base.source_position); - ir_mode *mode = get_ir_mode_arithmetic(cnst->base.type); + type_t *type = skip_typeref(literal->base.type); + ir_mode *mode = get_ir_mode_storage(type); + const char *string = literal->value.begin; + size_t size = literal->value.size; + tarval *tv; - long long int v = cnst->v.wide_character.begin[0]; + switch (literal->base.kind) { + case EXPR_LITERAL_WIDE_CHARACTER: { + utf32 v = read_utf8_char(&string); + char buf[128]; + size_t len = snprintf(buf, sizeof(buf), UTF32_PRINTF_FORMAT, v); - char buf[128]; - size_t len = snprintf(buf, sizeof(buf), "%lld", v); - tarval *tv = new_tarval_from_str(buf, len, mode); + tv = new_tarval_from_str(buf, len, mode); + goto make_const; + } + case EXPR_LITERAL_CHARACTER: { + long long int v; + if (size == 1 && char_is_signed) { + v = (signed char)string[0]; + } else { + v = 0; + for (size_t i = 0; i < size; ++i) { + v = (v << 8) | ((unsigned char)string[i]); + } + } + char buf[128]; + size_t len = snprintf(buf, sizeof(buf), "%lld", v); - return new_d_Const(dbgi, tv); + tv = new_tarval_from_str(buf, len, mode); + goto make_const; + } + case EXPR_LITERAL_INTEGER: + case EXPR_LITERAL_INTEGER_OCTAL: + case EXPR_LITERAL_INTEGER_HEXADECIMAL: + assert(literal->target_value != NULL); + tv = literal->target_value; + goto make_const; + case EXPR_LITERAL_FLOATINGPOINT: + tv = new_tarval_from_str(string, size, mode); + goto make_const; + case EXPR_LITERAL_FLOATINGPOINT_HEXADECIMAL: { + char *buffer = alloca(size + 2); + memcpy(buffer, "0x", 2); + memcpy(buffer+2, string, size); + tv = new_tarval_from_str(buffer, size+2, mode); + goto make_const; + } + case EXPR_LITERAL_BOOLEAN: + if (string[0] == 't') { + tv = get_mode_one(mode); + } else { + assert(string[0] == 'f'); + tv = get_mode_null(mode); + } + goto make_const; + case EXPR_LITERAL_MS_NOOP: + tv = get_mode_null(mode); + goto make_const; + default: + break; + } + panic("Invalid literal kind found"); + +make_const: ; + dbg_info *dbgi = get_dbg_info(&literal->base.source_position); + ir_node *res = new_d_Const(dbgi, tv); + ir_mode *mode_arith = get_ir_mode_arithmetic(type); + return create_conv(dbgi, res, mode_arith); } /* @@ -1192,23 +1385,6 @@ static ir_node *get_trampoline_region(dbg_info *dbgi, ir_entity *entity) region); } - -/** - * Creates a SymConst for a given entity. - * - * @param dbgi debug info - * @param mode the (reference) mode for the SymConst - * @param entity the entity - */ -static ir_node *create_symconst(dbg_info *dbgi, ir_mode *mode, - ir_entity *entity) -{ - assert(entity != NULL); - union symconst_symbol sym; - sym.entity_p = entity; - return new_d_SymConst(dbgi, mode, sym, symconst_addr_ent); -} - /** * Creates a trampoline for a function represented by an entity. * @@ -1230,101 +1406,6 @@ static ir_node *create_trampoline(dbg_info *dbgi, ir_mode *mode, return new_Proj(irn, mode, pn_Builtin_1_result); } -/** - * Creates a SymConst node representing a string constant. - * - * @param src_pos the source position of the string constant - * @param id_prefix a prefix for the name of the generated string constant - * @param value the value of the string constant - */ -static ir_node *string_to_firm(const source_position_t *const src_pos, - const char *const id_prefix, - const string_t *const value) -{ - ir_type *const global_type = get_glob_type(); - dbg_info *const dbgi = get_dbg_info(src_pos); - ir_type *const type = new_type_array(1, ir_type_const_char); - - ident *const id = id_unique(id_prefix); - ir_entity *const entity = new_d_entity(global_type, id, type, dbgi); - set_entity_ld_ident(entity, id); - set_entity_visibility(entity, ir_visibility_private); - add_entity_linkage(entity, IR_LINKAGE_CONSTANT); - - ir_type *const elem_type = ir_type_const_char; - ir_mode *const mode = get_type_mode(elem_type); - - const char* const string = value->begin; - const size_t slen = value->size; - - set_array_lower_bound_int(type, 0, 0); - set_array_upper_bound_int(type, 0, slen); - set_type_size_bytes(type, slen); - set_type_state(type, layout_fixed); - - ir_initializer_t *initializer = create_initializer_compound(slen); - for (size_t i = 0; i < slen; ++i) { - tarval *tv = new_tarval_from_long(string[i], mode); - ir_initializer_t *val = create_initializer_tarval(tv); - set_initializer_compound_value(initializer, i, val); - } - set_entity_initializer(entity, initializer); - - return create_symconst(dbgi, mode_P_data, entity); -} - -/** - * Creates a SymConst node representing a string literal. - * - * @param literal the string literal - */ -static ir_node *string_literal_to_firm( - const string_literal_expression_t* literal) -{ - return string_to_firm(&literal->base.source_position, "str.%u", - &literal->value); -} - -/** - * Creates a SymConst node representing a wide string literal. - * - * @param literal the wide string literal - */ -static ir_node *wide_string_literal_to_firm( - const wide_string_literal_expression_t* const literal) -{ - ir_type *const global_type = get_glob_type(); - ir_type *const elem_type = ir_type_wchar_t; - dbg_info *const dbgi = get_dbg_info(&literal->base.source_position); - ir_type *const type = new_type_array(1, elem_type); - - ident *const id = id_unique("str.%u"); - ir_entity *const entity = new_d_entity(global_type, id, type, dbgi); - set_entity_ld_ident(entity, id); - set_entity_visibility(entity, ir_visibility_private); - add_entity_linkage(entity, IR_LINKAGE_CONSTANT); - - ir_mode *const mode = get_type_mode(elem_type); - - const wchar_rep_t *const string = literal->value.begin; - const size_t slen = literal->value.size; - - set_array_lower_bound_int(type, 0, 0); - set_array_upper_bound_int(type, 0, slen); - set_type_size_bytes(type, slen * get_mode_size_bytes(mode)); - set_type_state(type, layout_fixed); - - ir_initializer_t *initializer = create_initializer_compound(slen); - for (size_t i = 0; i < slen; ++i) { - tarval *tv = new_tarval_from_long(string[i], mode); - ir_initializer_t *val = create_initializer_tarval(tv); - set_initializer_compound_value(initializer, i, val); - } - set_entity_initializer(entity, initializer); - - return create_symconst(dbgi, mode_P_data, entity); -} - /** * Dereference an address. * @@ -3449,16 +3530,13 @@ static ir_node *_expression_to_firm(const expression_t *expression) #endif switch (expression->kind) { - case EXPR_CHARACTER_CONSTANT: - return character_constant_to_firm(&expression->conste); - case EXPR_WIDE_CHARACTER_CONSTANT: - return wide_character_constant_to_firm(&expression->conste); - case EXPR_CONST: - return const_to_firm(&expression->conste); + EXPR_LITERAL_CASES + return literal_to_firm(&expression->literal); case EXPR_STRING_LITERAL: - return string_literal_to_firm(&expression->string); + return string_to_firm(&expression->base.source_position, "str.%u", + &expression->literal.value); case EXPR_WIDE_STRING_LITERAL: - return wide_string_literal_to_firm(&expression->wide_string); + return wide_string_literal_to_firm(&expression->string_literal); case EXPR_REFERENCE: return reference_expression_to_firm(&expression->reference); case EXPR_REFERENCE_ENUM_VALUE: @@ -4084,19 +4162,19 @@ static ir_initializer_t *create_ir_initializer_string( static ir_initializer_t *create_ir_initializer_wide_string( const initializer_wide_string_t *initializer, type_t *type) { - size_t string_len = initializer->string.size; assert(type->kind == TYPE_ARRAY); assert(type->array.size_constant); size_t len = type->array.size; + size_t string_len = wstrlen(&initializer->string); ir_initializer_t *irinitializer = create_initializer_compound(len); - const wchar_rep_t *string = initializer->string.begin; - ir_mode *mode = get_type_mode(ir_type_wchar_t); + const char *p = initializer->string.begin; + ir_mode *mode = get_type_mode(ir_type_wchar_t); for (size_t i = 0; i < len; ++i) { - wchar_rep_t c = 0; + utf32 c = 0; if (i < string_len) { - c = string[i]; + c = read_utf8_char(&p); } tarval *tv = new_tarval_from_long(c, mode); ir_initializer_t *char_initializer = create_initializer_tarval(tv); @@ -5981,6 +6059,9 @@ static void global_asm_to_firm(statement_t *s) void translation_unit_to_firm(translation_unit_t *unit) { + /* initialize firm arithmetic */ + tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP); + /* just to be sure */ continue_label = NULL; break_label = NULL; diff --git a/ast_t.h b/ast_t.h index e1611fb..2e389c0 100644 --- a/ast_t.h +++ b/ast_t.h @@ -29,6 +29,7 @@ #include "type.h" #include "entity_t.h" #include "adt/obst.h" +#include "target_value.h" /** The AST obstack contains all data that must stay in the AST. */ extern struct obstack ast_obstack; @@ -66,9 +67,15 @@ typedef enum expression_kind_t { EXPR_INVALID, EXPR_REFERENCE, EXPR_REFERENCE_ENUM_VALUE, - EXPR_CONST, - EXPR_CHARACTER_CONSTANT, - EXPR_WIDE_CHARACTER_CONSTANT, + EXPR_LITERAL_BOOLEAN, + EXPR_LITERAL_INTEGER, + EXPR_LITERAL_INTEGER_OCTAL, + EXPR_LITERAL_INTEGER_HEXADECIMAL, + EXPR_LITERAL_FLOATINGPOINT, + EXPR_LITERAL_FLOATINGPOINT_HEXADECIMAL, + EXPR_LITERAL_CHARACTER, + EXPR_LITERAL_WIDE_CHARACTER, + EXPR_LITERAL_MS_NOOP, /**< MS __noop extension */ EXPR_STRING_LITERAL, EXPR_WIDE_STRING_LITERAL, EXPR_COMPOUND_LITERAL, @@ -226,6 +233,17 @@ typedef enum funcname_kind_t { EXPR_UNARY_CASES_MANDATORY \ EXPR_UNARY_CASES_OPTIONAL +#define EXPR_LITERAL_CASES \ + case EXPR_LITERAL_BOOLEAN: \ + case EXPR_LITERAL_INTEGER: \ + case EXPR_LITERAL_INTEGER_OCTAL: \ + case EXPR_LITERAL_INTEGER_HEXADECIMAL: \ + case EXPR_LITERAL_FLOATINGPOINT: \ + case EXPR_LITERAL_FLOATINGPOINT_HEXADECIMAL: \ + case EXPR_LITERAL_CHARACTER: \ + case EXPR_LITERAL_WIDE_CHARACTER: \ + case EXPR_LITERAL_MS_NOOP: + /** * The base class of every expression. */ @@ -240,18 +258,15 @@ struct expression_base_t { }; /** - * A constant. + * integer/float constants, character and string literals */ -struct const_expression_t { +struct literal_expression_t { expression_base_t base; - union { - long long int_value; - long double float_value; - string_t character; - wide_string_t wide_character; - } v; - bool is_ms_noop; /**< True, if this constant is the result - of an microsoft __noop operator */ + string_t value; + symbol_t *suffix; + + /* ast2firm data */ + tarval *target_value; }; struct string_literal_expression_t { @@ -265,11 +280,6 @@ struct funcname_expression_t { string_t value; /**< the value once assigned. */ }; -struct wide_string_literal_expression_t { - expression_base_t base; - wide_string_t value; -}; - struct compound_literal_expression_t { expression_base_t base; type_t *type; @@ -395,10 +405,9 @@ struct label_address_expression_t { union expression_t { expression_kind_t kind; expression_base_t base; - const_expression_t conste; + literal_expression_t literal; + string_literal_expression_t string_literal; funcname_expression_t funcname; - string_literal_expression_t string; - wide_string_literal_expression_t wide_string; compound_literal_expression_t compound_literal; builtin_constant_expression_t builtin_constant; builtin_types_compatible_expression_t builtin_types_compatible; @@ -449,7 +458,7 @@ struct initializer_string_t { struct initializer_wide_string_t { initializer_base_t base; - wide_string_t string; + string_t string; }; struct initializer_designator_t { diff --git a/attribute.c b/attribute.c index eca3f61..e709e61 100644 --- a/attribute.c +++ b/attribute.c @@ -420,7 +420,7 @@ const char *get_deprecated_string(const attribute_t *attribute) expression_t *expression = argument->v.expression; if (expression->kind != EXPR_STRING_LITERAL) return NULL; - return expression->string.value.begin; + return expression->literal.value.begin; } return NULL; } diff --git a/diagnostic.c b/diagnostic.c index ad81055..0b92ae5 100644 --- a/diagnostic.c +++ b/diagnostic.c @@ -67,12 +67,6 @@ static void diagnosticvf(const char *const fmt, va_list ap) fputc(*f, stderr); break; - case 'C': { - const wint_t val = va_arg(ap, wint_t); - fprintf(stderr, "%lc", val); - break; - } - case 'c': { const unsigned char val = (unsigned char) va_arg(ap, int); fputc(val, stderr); @@ -91,6 +85,14 @@ static void diagnosticvf(const char *const fmt, va_list ap) break; } + case 'S': { + const string_t *str = va_arg(ap, const string_t*); + for (size_t i = 0; i < str->size; ++i) { + fputc(str->begin[i], stderr); + } + break; + } + case 'u': { const unsigned int val = va_arg(ap, unsigned int); fprintf(stderr, "%u", val); diff --git a/diagnostic.h b/diagnostic.h index 4c12e4c..113481c 100644 --- a/diagnostic.h +++ b/diagnostic.h @@ -51,7 +51,7 @@ * %K const token_t* * %k token_kind_t * %P const source_position_t * - * + * %S const string_t * */ void diagnosticf(const char *fmt, ...); void errorf(const source_position_t *pos, const char *fmt, ...); diff --git a/format_check.c b/format_check.c index 7c8304e..7609018 100644 --- a/format_check.c +++ b/format_check.c @@ -17,8 +17,9 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA * 02111-1307, USA. */ +#include + #include -#include #include "adt/util.h" #include "format_check.h" @@ -93,7 +94,7 @@ static const char* get_length_modifier_name(const format_length_modifier_t mod) static void warn_invalid_length_modifier(const source_position_t *pos, const format_length_modifier_t mod, - const wchar_rep_t conversion) + const utf32 conversion) { warningf(pos, "invalid length modifier '%s' for conversion specifier '%%%c'", @@ -101,113 +102,51 @@ static void warn_invalid_length_modifier(const source_position_t *pos, ); } -typedef struct vchar_t vchar_t; -struct vchar_t { - const void *string; /**< the string */ - size_t position; /**< current position */ - size_t size; /**< size of the string */ - - /** return the first character of the string and setthe position to 0. */ - unsigned (*first)(vchar_t *self); - /** return the next character of the string */ - unsigned (*next)(vchar_t *self); - /** return non_zero if the given character is a digit */ - int (*is_digit)(unsigned vchar); -}; - -static unsigned string_first(vchar_t *self) -{ - self->position = 0; - const string_t *string = self->string; - return string->begin[0]; -} - -static unsigned string_next(vchar_t *self) -{ - ++self->position; - const string_t *string = self->string; - return string->begin[self->position]; -} - -static int string_isdigit(unsigned vchar) -{ - return isdigit(vchar); -} - -static unsigned wstring_first(vchar_t *self) -{ - self->position = 0; - const wide_string_t *wstring = self->string; - return wstring->begin[0]; -} - -static unsigned wstring_next(vchar_t *self) -{ - ++self->position; - const wide_string_t *wstring = self->string; - return wstring->begin[self->position]; -} - -static int wstring_isdigit(unsigned vchar) -{ - return iswdigit(vchar); -} - -static bool atend(vchar_t *self) -{ - return self->position + 1 == self->size; -} - /** * Check printf-style format. */ static int internal_check_printf_format(const expression_t *fmt_expr, - const call_argument_t *arg, const format_spec_t *spec) + const call_argument_t *arg, + const format_spec_t *spec) { - if (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) { + while (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) { fmt_expr = fmt_expr->unary.value; } - vchar_t vchar; - switch (fmt_expr->kind) { - case EXPR_STRING_LITERAL: - vchar.string = &fmt_expr->string.value; - vchar.size = fmt_expr->string.value.size; - vchar.first = string_first; - vchar.next = string_next; - vchar.is_digit = string_isdigit; - break; - - case EXPR_WIDE_STRING_LITERAL: - vchar.string = &fmt_expr->wide_string.value; - vchar.size = fmt_expr->wide_string.value.size; - vchar.first = wstring_first; - vchar.next = wstring_next; - vchar.is_digit = wstring_isdigit; - break; + /* + * gettext results in expressions like (X ? "format_string" : Y) + * we assume the left part is the format string + */ + if (fmt_expr->kind == EXPR_CONDITIONAL) { + conditional_expression_t const *const c = &fmt_expr->conditional; + expression_t const * t = c->true_expression; + if (t == NULL) + t = c->condition; + int const nt = internal_check_printf_format(t, arg, spec); + int const nf = internal_check_printf_format(c->false_expression, arg, spec); + return nt > nf ? nt : nf; + } - case EXPR_CONDITIONAL: { - conditional_expression_t const *const c = &fmt_expr->conditional; - expression_t const * t = c->true_expression; - if (t == NULL) - t = c->condition; - int const nt = internal_check_printf_format(t, arg, spec); - int const nf = internal_check_printf_format(c->false_expression, arg, spec); - return nt > nf ? nt : nf; - } + if (fmt_expr->kind != EXPR_STRING_LITERAL + && fmt_expr->kind != EXPR_WIDE_STRING_LITERAL) + return -1; - default: - return -1; - } + const char *string = fmt_expr->literal.value.begin; + size_t size = fmt_expr->literal.value.size; + const char *c = string; const source_position_t *pos = &fmt_expr->base.source_position; - unsigned fmt = vchar.first(&vchar); unsigned num_fmt = 0; - for (; fmt != '\0'; fmt = vchar.next(&vchar)) { + char fmt; + for (fmt = *c; fmt != '\0'; fmt = *(++c)) { if (fmt != '%') continue; - fmt = vchar.next(&vchar); + fmt = *(++c); + if (fmt == '\0') { + warningf(pos, "dangling %% in format string"); + break; + } if (fmt == '%') continue; @@ -215,15 +154,15 @@ static int internal_check_printf_format(const expression_t *fmt_expr, format_flags_t fmt_flags = FMT_FLAG_NONE; if (fmt == '0') { - fmt = vchar.next(&vchar); + fmt = *(++c); fmt_flags |= FMT_FLAG_ZERO; } /* argument selector or minimum field width */ - if (vchar.is_digit(fmt)) { + if (isdigit(fmt)) { do { - fmt = vchar.next(&vchar); - } while (vchar.is_digit(fmt)); + fmt = *(++c); + } while (isdigit(fmt)); /* digit string was ... */ if (fmt == '$') { @@ -263,13 +202,13 @@ static int internal_check_printf_format(const expression_t *fmt_expr, warningf(pos, "repeated flag '%c' in conversion specification %u", (char)fmt, num_fmt); } fmt_flags |= flag; - fmt = vchar.next(&vchar); + fmt = *(++c); } break_fmt_flags: /* minimum field width */ if (fmt == '*') { - fmt = vchar.next(&vchar); + fmt = *(++c); if (arg == NULL) { warningf(pos, "missing argument for '*' field width in conversion specification %u", num_fmt); return -1; @@ -280,17 +219,17 @@ break_fmt_flags: } arg = arg->next; } else { - while (vchar.is_digit(fmt)) { - fmt = vchar.next(&vchar); + while (isdigit(fmt)) { + fmt = *(++c); } } } /* precision */ if (fmt == '.') { - fmt = vchar.next(&vchar); + fmt = *(++c); if (fmt == '*') { - fmt = vchar.next(&vchar); + fmt = *(++c); if (arg == NULL) { warningf(pos, "missing argument for '*' precision in conversion specification %u", num_fmt); return -1; @@ -302,8 +241,8 @@ break_fmt_flags: arg = arg->next; } else { /* digit string may be omitted */ - while (vchar.is_digit(fmt)) { - fmt = vchar.next(&vchar); + while (isdigit(fmt)) { + fmt = *(++c); } } } @@ -312,9 +251,9 @@ break_fmt_flags: format_length_modifier_t fmt_mod; switch (fmt) { case 'h': - fmt = vchar.next(&vchar); + fmt = *(++c); if (fmt == 'h') { - fmt = vchar.next(&vchar); + fmt = *(++c); fmt_mod = FMT_MOD_hh; } else { fmt_mod = FMT_MOD_h; @@ -322,48 +261,48 @@ break_fmt_flags: break; case 'l': - fmt = vchar.next(&vchar); + fmt = *(++c); if (fmt == 'l') { - fmt = vchar.next(&vchar); + fmt = *(++c); fmt_mod = FMT_MOD_ll; } else { fmt_mod = FMT_MOD_l; } break; - case 'L': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_L; break; - case 'j': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_j; break; - case 't': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_t; break; - case 'z': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_z; break; - case 'q': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_q; break; + case 'L': fmt = *(++c); fmt_mod = FMT_MOD_L; break; + case 'j': fmt = *(++c); fmt_mod = FMT_MOD_j; break; + case 't': fmt = *(++c); fmt_mod = FMT_MOD_t; break; + case 'z': fmt = *(++c); fmt_mod = FMT_MOD_z; break; + case 'q': fmt = *(++c); fmt_mod = FMT_MOD_q; break; /* microsoft mode */ case 'w': if (c_mode & _MS) { - fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_w; + fmt = *(++c); fmt_mod = FMT_MOD_w; } else { fmt_mod = FMT_MOD_NONE; } break; case 'I': if (c_mode & _MS) { - fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_I; + fmt = *(++c); fmt_mod = FMT_MOD_I; if (fmt == '3') { - fmt = vchar.next(&vchar); + fmt = *(++c); if (fmt == '2') { - fmt = vchar.next(&vchar); + fmt = *(++c); fmt_mod = FMT_MOD_I32; } else { /* rewind */ - --vchar.position; + fmt = *(--c); } } else if (fmt == '6') { - fmt = vchar.next(&vchar); + fmt = *(++c); if (fmt == '4') { - fmt = vchar.next(&vchar); + fmt = *(++c); fmt_mod = FMT_MOD_I64; } else { /* rewind */ - --vchar.position; + fmt = *(--c); } } } else { @@ -375,10 +314,6 @@ break_fmt_flags: break; } - if (fmt == '\0') { - warningf(pos, "dangling %% in format string"); - break; - } type_t *expected_type; type_qualifiers_t expected_qual = TYPE_QUALIFIER_NONE; @@ -529,7 +464,7 @@ eval_fmt_mod_unsigned: break; default: - warningf(pos, "encountered unknown conversion specifier '%%%C' at position %u", (wint_t)fmt, num_fmt); + warningf(pos, "encountered unknown conversion specifier '%%%c' at position %u", fmt, num_fmt); if (arg == NULL) { warningf(pos, "too few arguments for format string"); return -1; @@ -577,10 +512,8 @@ eval_fmt_mod_unsigned: goto next_arg; } } - } else { - if (get_unqualified_type(arg_skip) == expected_type_skip) { - goto next_arg; - } + } else if (get_unqualified_type(arg_skip) == expected_type_skip) { + goto next_arg; } if (is_type_valid(arg_skip)) { warningf(pos, @@ -591,7 +524,8 @@ eval_fmt_mod_unsigned: next_arg: arg = arg->next; } - if (!atend(&vchar)) { + assert(fmt == '\0'); + if (c+1 < string + size) { warningf(pos, "format string contains '\\0'"); } return num_fmt; @@ -600,7 +534,8 @@ next_arg: /** * Check printf-style format. */ -static void check_printf_format(call_argument_t const *arg, format_spec_t const *const spec) +static void check_printf_format(call_argument_t const *arg, + format_spec_t const *const spec) { /* find format arg */ size_t idx = 0; @@ -625,16 +560,17 @@ static void check_printf_format(call_argument_t const *arg, format_spec_t const ++num_args; if (num_args > (size_t)num_fmt) { warningf(&fmt_expr->base.source_position, - "%u argument%s but only %u format specifier%s", - num_args, num_args != 1 ? "s" : "", - num_fmt, num_fmt != 1 ? "s" : ""); + "%u argument%s but only %u format specifier%s", + num_args, num_args != 1 ? "s" : "", + num_fmt, num_fmt != 1 ? "s" : ""); } } /** * Check scanf-style format. */ -static void check_scanf_format(const call_argument_t *arg, const format_spec_t *spec) +static void check_scanf_format(const call_argument_t *arg, + const format_spec_t *spec) { /* find format arg */ unsigned idx = 0; @@ -649,252 +585,240 @@ static void check_scanf_format(const call_argument_t *arg, const format_spec_t * fmt_expr = fmt_expr->unary.value; } - vchar_t vchar; - if (fmt_expr->kind == EXPR_WIDE_STRING_LITERAL) { - vchar.string = &fmt_expr->wide_string.value; - vchar.size = fmt_expr->wide_string.value.size; - vchar.first = wstring_first; - vchar.next = wstring_next; - vchar.is_digit = wstring_isdigit; - } else if (fmt_expr->kind == EXPR_STRING_LITERAL) { - vchar.string = &fmt_expr->string.value; - vchar.size = fmt_expr->string.value.size; - vchar.first = string_first; - vchar.next = string_next; - vchar.is_digit = string_isdigit; - } else { + if (fmt_expr->kind != EXPR_STRING_LITERAL + && fmt_expr->kind != EXPR_WIDE_STRING_LITERAL) return; - } + + const char *string = fmt_expr->literal.value.begin; + size_t size = fmt_expr->literal.value.size; + const char *c = string; + /* find the real args */ for (; idx < spec->arg_idx && arg != NULL; ++idx) arg = arg->next; const source_position_t *pos = &fmt_expr->base.source_position; - unsigned fmt = vchar.first(&vchar); unsigned num_fmt = 0; - for (; fmt != '\0'; fmt = vchar.next(&vchar)) { + char fmt; + for (fmt = *c; fmt != '\0'; fmt = *(++c)) { if (fmt != '%') continue; - fmt = vchar.next(&vchar); - + fmt = *(++c); + if (fmt == '\0') { + warningf(pos, "dangling '%%' in format string"); + break; + } if (fmt == '%') continue; ++num_fmt; - /* length modifier */ - format_length_modifier_t fmt_mod; + /* look for length modifiers */ + format_length_modifier_t fmt_mod = FMT_MOD_NONE; switch (fmt) { - case 'h': - fmt = vchar.next(&vchar); - if (fmt == 'h') { - fmt = vchar.next(&vchar); - fmt_mod = FMT_MOD_hh; - } else { - fmt_mod = FMT_MOD_h; - } - break; + case 'h': + fmt = *(++c); + if (fmt == 'h') { + fmt = *(++c); + fmt_mod = FMT_MOD_hh; + } else { + fmt_mod = FMT_MOD_h; + } + break; - case 'l': - fmt = vchar.next(&vchar); - if (fmt == 'l') { - fmt = vchar.next(&vchar); - fmt_mod = FMT_MOD_ll; - } else { - fmt_mod = FMT_MOD_l; - } - break; + case 'l': + fmt = *(++c); + if (fmt == 'l') { + fmt = *(++c); + fmt_mod = FMT_MOD_ll; + } else { + fmt_mod = FMT_MOD_l; + } + break; - case 'L': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_L; break; - case 'j': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_j; break; - case 't': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_t; break; - case 'z': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_z; break; - /* microsoft mode */ - case 'w': - if (c_mode & _MS) { - fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_w; - } else { - fmt_mod = FMT_MOD_NONE; - } - break; - case 'I': - if (c_mode & _MS) { - fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_I; - if (fmt == '3') { - fmt = vchar.next(&vchar); - if (fmt == '2') { - fmt = vchar.next(&vchar); - fmt_mod = FMT_MOD_I32; - } else { - /* rewind */ - --vchar.position; - } - } else if (fmt == '6') { - fmt = vchar.next(&vchar); - if (fmt == '4') { - fmt = vchar.next(&vchar); - fmt_mod = FMT_MOD_I64; - } else { - /* rewind */ - --vchar.position; - } + case 'L': fmt = *(++c); fmt_mod = FMT_MOD_L; break; + case 'j': fmt = *(++c); fmt_mod = FMT_MOD_j; break; + case 't': fmt = *(++c); fmt_mod = FMT_MOD_t; break; + case 'z': fmt = *(++c); fmt_mod = FMT_MOD_z; break; + /* microsoft mode */ + case 'w': + if (c_mode & _MS) { + fmt = *(++c); + fmt_mod = FMT_MOD_w; + } + break; + case 'I': + if (c_mode & _MS) { + fmt = *(++c); + fmt_mod = FMT_MOD_I; + if (fmt == '3') { + fmt = *(++c); + if (fmt == '2') { + fmt = *(++c); + fmt_mod = FMT_MOD_I32; + } else { + /* rewind */ + fmt = *(--c); + } + } else if (fmt == '6') { + fmt = *(++c); + if (fmt == '4') { + fmt = *(++c); + fmt_mod = FMT_MOD_I64; + } else { + /* rewind */ + fmt = *(--c); } - } else { - fmt_mod = FMT_MOD_NONE; } - break; - default: - fmt_mod = FMT_MOD_NONE; - break; + } + break; } if (fmt == '\0') { - warningf(pos, "dangling %% in format string"); + warningf(pos, "dangling % with conversion specififer in format string"); break; } - type_t *expected_type; + type_t *expected_type; switch (fmt) { - case 'd': - case 'i': - switch (fmt_mod) { - case FMT_MOD_NONE: expected_type = type_int; break; - case FMT_MOD_hh: expected_type = type_signed_char; break; - case FMT_MOD_h: expected_type = type_short; break; - case FMT_MOD_l: expected_type = type_long; break; - case FMT_MOD_ll: expected_type = type_long_long; break; - case FMT_MOD_j: expected_type = type_intmax_t; break; - case FMT_MOD_z: expected_type = type_ssize_t; break; - case FMT_MOD_t: expected_type = type_ptrdiff_t; break; - case FMT_MOD_I: expected_type = type_ptrdiff_t; break; - case FMT_MOD_I32: expected_type = type_int32; break; - case FMT_MOD_I64: expected_type = type_int64; break; + case 'd': + case 'i': + switch (fmt_mod) { + case FMT_MOD_NONE: expected_type = type_int; break; + case FMT_MOD_hh: expected_type = type_signed_char; break; + case FMT_MOD_h: expected_type = type_short; break; + case FMT_MOD_l: expected_type = type_long; break; + case FMT_MOD_ll: expected_type = type_long_long; break; + case FMT_MOD_j: expected_type = type_intmax_t; break; + case FMT_MOD_z: expected_type = type_ssize_t; break; + case FMT_MOD_t: expected_type = type_ptrdiff_t; break; + case FMT_MOD_I: expected_type = type_ptrdiff_t; break; + case FMT_MOD_I32: expected_type = type_int32; break; + case FMT_MOD_I64: expected_type = type_int64; break; - default: - warn_invalid_length_modifier(pos, fmt_mod, fmt); - goto next_arg; - } - break; - - case 'o': - case 'X': - case 'x': - goto eval_fmt_mod_unsigned; - - case 'u': -eval_fmt_mod_unsigned: - switch (fmt_mod) { - case FMT_MOD_NONE: expected_type = type_unsigned_int; break; - case FMT_MOD_hh: expected_type = type_unsigned_char; break; - case FMT_MOD_h: expected_type = type_unsigned_short; break; - case FMT_MOD_l: expected_type = type_unsigned_long; break; - case FMT_MOD_ll: expected_type = type_unsigned_long_long; break; - case FMT_MOD_j: expected_type = type_uintmax_t; break; - case FMT_MOD_z: expected_type = type_size_t; break; - case FMT_MOD_t: expected_type = type_uptrdiff_t; break; - case FMT_MOD_I: expected_type = type_size_t; break; - case FMT_MOD_I32: expected_type = type_unsigned_int32; break; - case FMT_MOD_I64: expected_type = type_unsigned_int64; break; + default: + warn_invalid_length_modifier(pos, fmt_mod, fmt); + goto next_arg; + } + break; - default: - warn_invalid_length_modifier(pos, fmt_mod, fmt); - goto next_arg; - } - break; + case 'o': + case 'X': + case 'x': + case 'u': + switch (fmt_mod) { + case FMT_MOD_NONE: expected_type = type_unsigned_int; break; + case FMT_MOD_hh: expected_type = type_unsigned_char; break; + case FMT_MOD_h: expected_type = type_unsigned_short; break; + case FMT_MOD_l: expected_type = type_unsigned_long; break; + case FMT_MOD_ll: expected_type = type_unsigned_long_long; break; + case FMT_MOD_j: expected_type = type_uintmax_t; break; + case FMT_MOD_z: expected_type = type_size_t; break; + case FMT_MOD_t: expected_type = type_uptrdiff_t; break; + case FMT_MOD_I: expected_type = type_size_t; break; + case FMT_MOD_I32: expected_type = type_unsigned_int32; break; + case FMT_MOD_I64: expected_type = type_unsigned_int64; break; - case 'A': - case 'a': - case 'E': - case 'e': - case 'F': - case 'f': - case 'G': - case 'g': - switch (fmt_mod) { - case FMT_MOD_l: /* l modifier is ignored */ - case FMT_MOD_NONE: expected_type = type_double; break; - case FMT_MOD_L: expected_type = type_long_double; break; + default: + warn_invalid_length_modifier(pos, fmt_mod, fmt); + goto next_arg; + } + break; - default: - warn_invalid_length_modifier(pos, fmt_mod, fmt); - goto next_arg; - } - break; + case 'A': + case 'a': + case 'E': + case 'e': + case 'F': + case 'f': + case 'G': + case 'g': + switch (fmt_mod) { + case FMT_MOD_l: expected_type = type_double; break; + case FMT_MOD_NONE: expected_type = type_float; break; + case FMT_MOD_L: expected_type = type_long_double; break; - case 'C': - if (fmt_mod != FMT_MOD_NONE) { - warn_invalid_length_modifier(pos, fmt_mod, fmt); - goto next_arg; - } - expected_type = type_wchar_t; - break; + default: + warn_invalid_length_modifier(pos, fmt_mod, fmt); + goto next_arg; + } + break; - case 'c': - expected_type = type_int; - switch (fmt_mod) { - case FMT_MOD_NONE: expected_type = type_int; break; /* TODO promoted char */ - case FMT_MOD_l: expected_type = type_wint_t; break; - case FMT_MOD_w: expected_type = type_wchar_t; break; + case 'C': + if (fmt_mod != FMT_MOD_NONE) { + warn_invalid_length_modifier(pos, fmt_mod, fmt); + goto next_arg; + } + expected_type = type_wchar_t; + break; - default: - warn_invalid_length_modifier(pos, fmt_mod, fmt); - goto next_arg; - } - break; + case 'c': + expected_type = type_int; + switch (fmt_mod) { + case FMT_MOD_NONE: expected_type = type_int; break; /* TODO promoted char */ + case FMT_MOD_l: expected_type = type_wint_t; break; + case FMT_MOD_w: expected_type = type_wchar_t; break; - case 'S': - if (fmt_mod != FMT_MOD_NONE) { - warn_invalid_length_modifier(pos, fmt_mod, fmt); - goto next_arg; - } - expected_type = type_wchar_t; - break; + default: + warn_invalid_length_modifier(pos, fmt_mod, fmt); + goto next_arg; + } + break; - case 's': - case '[': - switch (fmt_mod) { - case FMT_MOD_NONE: expected_type = type_char; break; - case FMT_MOD_l: expected_type = type_wchar_t; break; - case FMT_MOD_w: expected_type = type_wchar_t; break; + case 'S': + if (fmt_mod != FMT_MOD_NONE) { + warn_invalid_length_modifier(pos, fmt_mod, fmt); + goto next_arg; + } + expected_type = type_wchar_t; + break; - default: - warn_invalid_length_modifier(pos, fmt_mod, fmt); - goto next_arg; - } - break; + case 's': + case '[': + switch (fmt_mod) { + case FMT_MOD_NONE: expected_type = type_char; break; + case FMT_MOD_l: expected_type = type_wchar_t; break; + case FMT_MOD_w: expected_type = type_wchar_t; break; - case 'p': - if (fmt_mod != FMT_MOD_NONE) { + default: warn_invalid_length_modifier(pos, fmt_mod, fmt); goto next_arg; - } - expected_type = type_void_ptr; - break; + } + break; - case 'n': - switch (fmt_mod) { - case FMT_MOD_NONE: expected_type = type_int; break; - case FMT_MOD_hh: expected_type = type_signed_char; break; - case FMT_MOD_h: expected_type = type_short; break; - case FMT_MOD_l: expected_type = type_long; break; - case FMT_MOD_ll: expected_type = type_long_long; break; - case FMT_MOD_j: expected_type = type_intmax_t; break; - case FMT_MOD_z: expected_type = type_ssize_t; break; - case FMT_MOD_t: expected_type = type_ptrdiff_t; break; + case 'p': + if (fmt_mod != FMT_MOD_NONE) { + warn_invalid_length_modifier(pos, fmt_mod, fmt); + goto next_arg; + } + expected_type = type_void_ptr; + break; - default: - warn_invalid_length_modifier(pos, fmt_mod, fmt); - goto next_arg; - } - break; + case 'n': + switch (fmt_mod) { + case FMT_MOD_NONE: expected_type = type_int; break; + case FMT_MOD_hh: expected_type = type_signed_char; break; + case FMT_MOD_h: expected_type = type_short; break; + case FMT_MOD_l: expected_type = type_long; break; + case FMT_MOD_ll: expected_type = type_long_long; break; + case FMT_MOD_j: expected_type = type_intmax_t; break; + case FMT_MOD_z: expected_type = type_ssize_t; break; + case FMT_MOD_t: expected_type = type_ptrdiff_t; break; default: - warningf(pos, "encountered unknown conversion specifier '%%%C' at position %u", (wint_t)fmt, num_fmt); - if (arg == NULL) { - warningf(pos, "too few arguments for format string"); - return; - } + warn_invalid_length_modifier(pos, fmt_mod, fmt); goto next_arg; + } + break; + + default: + warningf(pos, "encountered unknown conversion specifier '%%%c' at format %u", + fmt, num_fmt); + if (arg == NULL) { + warningf(pos, "too few arguments for format string"); + return; + } + goto next_arg; } if (arg == NULL) { @@ -938,7 +862,8 @@ error_arg_type: next_arg: arg = arg->next; } - if (!atend(&vchar)) { + assert(fmt == '\0'); + if (c+1 < string + size) { warningf(pos, "format string contains '\\0'"); } if (arg != NULL) { @@ -948,8 +873,8 @@ next_arg: arg = arg->next; } warningf(pos, "%u argument%s but only %u format specifier%s", - num_args, num_args != 1 ? "s" : "", - num_fmt, num_fmt != 1 ? "s" : ""); + num_args, num_args != 1 ? "s" : "", + num_fmt, num_fmt != 1 ? "s" : ""); } } @@ -1018,8 +943,9 @@ void check_format(const call_expression_t *const call) /* the declaration has a GNU format attribute, check it */ } else { /* - * For some functions we always check the format, even if it was not specified. - * This allows to check format even in MS mode or without header included. + * For some functions we always check the format, even if it was not + * specified. This allows to check format even in MS mode or without + * header included. */ const char *const name = entity->base.symbol->string; for (size_t i = 0; i < lengthof(builtin_table); ++i) { diff --git a/lexer.c b/lexer.c index 247083c..c3f86b3 100644 --- a/lexer.c +++ b/lexer.c @@ -53,8 +53,6 @@ #define strtold(s, e) strtod(s, e) #endif -typedef unsigned int utf32; - static utf32 c; token_t lexer_token; symbol_t *symbol_L; @@ -379,7 +377,7 @@ static inline void next_char(void); #define MATCH_NEWLINE(code) \ case '\r': \ next_char(); \ - if(c == '\n') { \ + if (c == '\n') { \ next_char(); \ } \ lexer_token.source_position.linenr++; \ @@ -389,13 +387,13 @@ static inline void next_char(void); lexer_token.source_position.linenr++; \ code -#define eat(c_type) do { assert(c == c_type); next_char(); } while(0) +#define eat(c_type) do { assert(c == c_type); next_char(); } while (0) static void maybe_concat_lines(void) { eat('\\'); - switch(c) { + switch (c) { MATCH_NEWLINE(return;) default: @@ -415,23 +413,23 @@ static inline void next_char(void) next_real_char(); /* filter trigraphs */ - if(UNLIKELY(c == '\\')) { + if (UNLIKELY(c == '\\')) { maybe_concat_lines(); goto end_of_next_char; } - if(LIKELY(c != '?')) + if (LIKELY(c != '?')) goto end_of_next_char; next_real_char(); - if(LIKELY(c != '?')) { + if (LIKELY(c != '?')) { put_back(c); c = '?'; goto end_of_next_char; } next_real_char(); - switch(c) { + switch (c) { case '=': c = '#'; break; case '(': c = '['; break; case '/': c = '\\'; maybe_concat_lines(); break; @@ -528,14 +526,11 @@ end_of_next_char:; */ static void parse_symbol(void) { - symbol_t *symbol; - char *string; - obstack_1grow(&symbol_obstack, (char) c); next_char(); - while(1) { - switch(c) { + while (true) { + switch (c) { DIGITS SYMBOL_CHARS obstack_1grow(&symbol_obstack, (char) c); @@ -551,221 +546,62 @@ dollar_sign: end_symbol: obstack_1grow(&symbol_obstack, '\0'); - string = obstack_finish(&symbol_obstack); - symbol = symbol_table_insert(string); + char *string = obstack_finish(&symbol_obstack); + symbol_t *symbol = symbol_table_insert(string); - lexer_token.type = symbol->ID; - lexer_token.v.symbol = symbol; + lexer_token.type = symbol->ID; + lexer_token.symbol = symbol; - if(symbol->string != string) { + if (symbol->string != string) { obstack_free(&symbol_obstack, string); } } -static void parse_integer_suffix(bool is_oct_hex) +/** + * parse suffixes like 'LU' or 'f' after numbers + */ +static void parse_number_suffix(void) { - bool is_unsigned = false; - bool min_long = false; - bool min_longlong = false; - bool not_traditional = false; - int pos = 0; - char suffix[4]; - - if (c == 'U' || c == 'u') { - not_traditional = true; - suffix[pos++] = toupper(c); - is_unsigned = true; - next_char(); - if (c == 'L' || c == 'l') { - suffix[pos++] = toupper(c); - min_long = true; - next_char(); - if (c == 'L' || c == 'l') { - suffix[pos++] = toupper(c); - min_longlong = true; - next_char(); - } - } - } else if (c == 'l' || c == 'L') { - suffix[pos++] = toupper(c); - min_long = true; - next_char(); - if (c == 'l' || c == 'L') { - not_traditional = true; - suffix[pos++] = toupper(c); - min_longlong = true; - next_char(); - if (c == 'u' || c == 'U') { - suffix[pos++] = toupper(c); - is_unsigned = true; - next_char(); - } - } else if (c == 'u' || c == 'U') { - not_traditional = true; - suffix[pos++] = toupper(c); - is_unsigned = true; + assert(obstack_object_size(&symbol_obstack) == 0); + while (true) { + switch (c) { + SYMBOL_CHARS + obstack_1grow(&symbol_obstack, (char) c); next_char(); - lexer_token.datatype = type_unsigned_long; + break; + default: + dollar_sign: + goto finish_suffix; } } - - if (warning.traditional && not_traditional) { - suffix[pos] = '\0'; - warningf(&lexer_token.source_position, - "traditional C rejects the '%s' suffix", suffix); +finish_suffix: + if (obstack_object_size(&symbol_obstack) == 0) { + lexer_token.symbol = NULL; + return; } - if (!is_unsigned) { - long long v = lexer_token.v.intvalue; - if (!min_long) { - if (v >= TARGET_INT_MIN && v <= TARGET_INT_MAX) { - lexer_token.datatype = type_int; - return; - } else if (is_oct_hex && v >= 0 && v <= TARGET_UINT_MAX) { - lexer_token.datatype = type_unsigned_int; - return; - } - } - if (!min_longlong) { - if (v >= TARGET_LONG_MIN && v <= TARGET_LONG_MAX) { - lexer_token.datatype = type_long; - return; - } else if (is_oct_hex && v >= 0 && (unsigned long long)v <= (unsigned long long)TARGET_ULONG_MAX) { - lexer_token.datatype = type_unsigned_long; - return; - } - } - unsigned long long uv = (unsigned long long) v; - if (is_oct_hex && uv > (unsigned long long) TARGET_LONGLONG_MAX) { - lexer_token.datatype = type_unsigned_long_long; - return; - } - lexer_token.datatype = type_long_long; - } else { - unsigned long long v = (unsigned long long) lexer_token.v.intvalue; - if (!min_long && v <= TARGET_UINT_MAX) { - lexer_token.datatype = type_unsigned_int; - return; - } - if (!min_longlong && v <= TARGET_ULONG_MAX) { - lexer_token.datatype = type_unsigned_long; - return; - } - lexer_token.datatype = type_unsigned_long_long; - } -} + obstack_1grow(&symbol_obstack, '\0'); + char *string = obstack_finish(&symbol_obstack); + symbol_t *symbol = symbol_table_insert(string); -static void parse_floating_suffix(void) -{ - switch(c) { - /* TODO: do something useful with the suffixes... */ - case 'f': - case 'F': - if (warning.traditional) { - warningf(&lexer_token.source_position, - "traditional C rejects the 'F' suffix"); - } - next_char(); - lexer_token.datatype = type_float; - break; - case 'l': - case 'L': - if (warning.traditional) { - warningf(&lexer_token.source_position, - "traditional C rejects the 'F' suffix"); - } - next_char(); - lexer_token.datatype = type_long_double; - break; - default: - lexer_token.datatype = type_double; - break; + if (symbol->string != string) { + obstack_free(&symbol_obstack, string); } + lexer_token.symbol = symbol; } -/** - * A replacement for strtoull. Only those parts needed for - * our parser are implemented. - */ -static unsigned long long parse_int_string(const char *s, const char **endptr, int base) +static string_t identify_string(char *string, size_t len) { - unsigned long long v = 0; - - switch (base) { - case 16: - for (;; ++s) { - /* check for overrun */ - if (v >= 0x1000000000000000ULL) - break; - switch (tolower(*s)) { - case '0': v <<= 4; break; - case '1': v <<= 4; v |= 0x1; break; - case '2': v <<= 4; v |= 0x2; break; - case '3': v <<= 4; v |= 0x3; break; - case '4': v <<= 4; v |= 0x4; break; - case '5': v <<= 4; v |= 0x5; break; - case '6': v <<= 4; v |= 0x6; break; - case '7': v <<= 4; v |= 0x7; break; - case '8': v <<= 4; v |= 0x8; break; - case '9': v <<= 4; v |= 0x9; break; - case 'a': v <<= 4; v |= 0xa; break; - case 'b': v <<= 4; v |= 0xb; break; - case 'c': v <<= 4; v |= 0xc; break; - case 'd': v <<= 4; v |= 0xd; break; - case 'e': v <<= 4; v |= 0xe; break; - case 'f': v <<= 4; v |= 0xf; break; - default: - goto end; - } - } - break; - case 8: - for (;; ++s) { - /* check for overrun */ - if (v >= 0x2000000000000000ULL) - break; - switch (tolower(*s)) { - case '0': v <<= 3; break; - case '1': v <<= 3; v |= 1; break; - case '2': v <<= 3; v |= 2; break; - case '3': v <<= 3; v |= 3; break; - case '4': v <<= 3; v |= 4; break; - case '5': v <<= 3; v |= 5; break; - case '6': v <<= 3; v |= 6; break; - case '7': v <<= 3; v |= 7; break; - default: - goto end; - } - } - break; - case 10: - for (;; ++s) { - /* check for overrun */ - if (v > 0x1999999999999999ULL) - break; - switch (tolower(*s)) { - case '0': v *= 10; break; - case '1': v *= 10; v += 1; break; - case '2': v *= 10; v += 2; break; - case '3': v *= 10; v += 3; break; - case '4': v *= 10; v += 4; break; - case '5': v *= 10; v += 5; break; - case '6': v *= 10; v += 6; break; - case '7': v *= 10; v += 7; break; - case '8': v *= 10; v += 8; break; - case '9': v *= 10; v += 9; break; - default: - goto end; - } - } - break; - default: - assert(0); - break; + /* TODO hash */ +#if 0 + const char *result = strset_insert(&stringset, concat); + if (result != concat) { + obstack_free(&symbol_obstack, concat); } -end: - *endptr = s; - return v; +#else + const char *result = string; +#endif + return (string_t) {result, len}; } /** @@ -774,29 +610,29 @@ end: */ static void parse_number_hex(void) { - bool is_float = false; - assert(c == 'x' || c == 'X'); - next_char(); + bool is_float = false; + bool has_digits = false; - obstack_1grow(&symbol_obstack, '0'); - obstack_1grow(&symbol_obstack, 'x'); - - while(isxdigit(c)) { + assert(obstack_object_size(&symbol_obstack) == 0); + while (isxdigit(c)) { + has_digits = true; obstack_1grow(&symbol_obstack, (char) c); next_char(); } if (c == '.') { + is_float = true; obstack_1grow(&symbol_obstack, (char) c); next_char(); while (isxdigit(c)) { + has_digits = true; obstack_1grow(&symbol_obstack, (char) c); next_char(); } - is_float = true; } if (c == 'p' || c == 'P') { + is_float = true; obstack_1grow(&symbol_obstack, (char) c); next_char(); @@ -809,39 +645,26 @@ static void parse_number_hex(void) obstack_1grow(&symbol_obstack, (char) c); next_char(); } - is_float = true; + } else if (is_float) { + errorf(&lexer_token.source_position, + "hexadecimal floatingpoint constant requires an exponent"); } - obstack_1grow(&symbol_obstack, '\0'); - char *string = obstack_finish(&symbol_obstack); - if(*string == '\0') { - parse_error("invalid hex number"); - lexer_token.type = T_ERROR; - obstack_free(&symbol_obstack, string); - return; - } + size_t size = obstack_object_size(&symbol_obstack); + char *string = obstack_finish(&symbol_obstack); + lexer_token.literal = identify_string(string, size); - if (is_float) { - char *endptr; - lexer_token.type = T_FLOATINGPOINT; - lexer_token.v.floatvalue = strtold(string, &endptr); - - if(*endptr != '\0') { - parse_error("invalid hex float literal"); - } + lexer_token.type = + is_float ? T_FLOATINGPOINT_HEXADECIMAL : T_INTEGER_HEXADECIMAL; - parse_floating_suffix(); - } else { - const char *endptr; - lexer_token.type = T_INTEGER; - lexer_token.v.intvalue = parse_int_string(string + 2, &endptr, 16); - if(*endptr != '\0') { - parse_error("hex number literal too long"); - } - parse_integer_suffix(true); + if (!has_digits) { + errorf(&lexer_token.source_position, "invalid number literal '0x%S'", + &lexer_token.literal); + lexer_token.literal.begin = "0"; + lexer_token.literal.size = 1; } - obstack_free(&symbol_obstack, string); + parse_number_suffix(); } /** @@ -849,9 +672,9 @@ static void parse_number_hex(void) * * @param char the character to check */ -static inline bool is_octal_digit(utf32 chr) +static bool is_octal_digit(utf32 chr) { - switch(chr) { + switch (chr) { case '0': case '1': case '2': @@ -867,132 +690,86 @@ static inline bool is_octal_digit(utf32 chr) } /** - * Parses a octal number and set the lexer_token. + * Parses a number and sets the lexer_token. */ -static void parse_number_oct(void) +static void parse_number(void) { - while(is_octal_digit(c)) { - obstack_1grow(&symbol_obstack, (char) c); - next_char(); - } - obstack_1grow(&symbol_obstack, '\0'); - char *string = obstack_finish(&symbol_obstack); + bool is_float = false; + bool has_digits = false; - const char *endptr; - lexer_token.type = T_INTEGER; - lexer_token.v.intvalue = parse_int_string(string, &endptr, 8); - if(*endptr != '\0') { - parse_error("octal number literal too long"); + assert(obstack_object_size(&symbol_obstack) == 0); + if (c == '0') { + next_char(); + if (c == 'x' || c == 'X') { + next_char(); + parse_number_hex(); + return; + } else { + has_digits = true; + } + obstack_1grow(&symbol_obstack, '0'); } - obstack_free(&symbol_obstack, string); - parse_integer_suffix(true); -} - -/** - * Parses a decimal including float number and set the - * lexer_token. - */ -static void parse_number_dec(void) -{ - bool is_float = false; while (isdigit(c)) { + has_digits = true; obstack_1grow(&symbol_obstack, (char) c); next_char(); } if (c == '.') { + is_float = true; obstack_1grow(&symbol_obstack, '.'); next_char(); while (isdigit(c)) { + has_digits = true; obstack_1grow(&symbol_obstack, (char) c); next_char(); } - is_float = true; } - if(c == 'e' || c == 'E') { - obstack_1grow(&symbol_obstack, (char) c); + if (c == 'e' || c == 'E') { + is_float = true; + obstack_1grow(&symbol_obstack, 'e'); next_char(); - if(c == '-' || c == '+') { + if (c == '-' || c == '+') { obstack_1grow(&symbol_obstack, (char) c); next_char(); } - while(isdigit(c)) { + while (isdigit(c)) { obstack_1grow(&symbol_obstack, (char) c); next_char(); } - is_float = true; } - obstack_1grow(&symbol_obstack, '\0'); - char *string = obstack_finish(&symbol_obstack); - - if(is_float) { - char *endptr; - lexer_token.type = T_FLOATINGPOINT; - lexer_token.v.floatvalue = strtold(string, &endptr); + size_t size = obstack_object_size(&symbol_obstack); + char *string = obstack_finish(&symbol_obstack); + lexer_token.literal = identify_string(string, size); - if(*endptr != '\0') { - parse_error("invalid number literal"); + /* is it an octal number? */ + if (is_float) { + lexer_token.type = T_FLOATINGPOINT; + } else if (string[0] == '0') { + lexer_token.type = T_INTEGER_OCTAL; + + /* check for invalid octal digits */ + for (size_t i= 0; i < size; ++i) { + char t = string[i]; + if (t == '8' || t == '9') + errorf(&lexer_token.source_position, + "invalid digit '%c' in octal number", t); } - - parse_floating_suffix(); } else { - const char *endptr; - lexer_token.type = T_INTEGER; - lexer_token.v.intvalue = parse_int_string(string, &endptr, 10); - - if(*endptr != '\0') { - parse_error("invalid number literal"); - } - - parse_integer_suffix(false); + lexer_token.type = T_INTEGER; } - obstack_free(&symbol_obstack, string); -} -/** - * Parses a number and sets the lexer_token. - */ -static void parse_number(void) -{ - if (c == '0') { - next_char(); - switch (c) { - case 'X': - case 'x': - parse_number_hex(); - break; - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - parse_number_oct(); - break; - case '8': - case '9': - next_char(); - parse_error("invalid octal number"); - lexer_token.type = T_ERROR; - return; - case '.': - case 'e': - case 'E': - default: - obstack_1grow(&symbol_obstack, '0'); - parse_number_dec(); - return; - } - } else { - parse_number_dec(); + if (!has_digits) { + errorf(&lexer_token.source_position, "invalid number literal '%S'", + &lexer_token.literal); } + + parse_number_suffix(); } /** @@ -1053,7 +830,7 @@ static utf32 parse_octal_sequence(utf32 const first_digit) static utf32 parse_hex_sequence(void) { utf32 value = 0; - while(isxdigit(c)) { + while (isxdigit(c)) { value = 16 * value + digit_value(c); next_char(); } @@ -1102,12 +879,17 @@ static utf32 parse_escape_sequence(void) case 'e': if (c_mode & _GNUC) return 27; /* hopefully 27 is ALWAYS the code for ESCAPE */ - /* FALLTHROUGH */ - default: - /* §6.4.4.4:8 footnote 64 */ - parse_error("unknown escape sequence"); + break; + case 'u': + case 'U': + parse_error("universal character parsing not implemented yet"); return EOF; + default: + break; } + /* §6.4.4.4:8 footnote 64 */ + parse_error("unknown escape sequence"); + return EOF; } /** @@ -1122,84 +904,16 @@ string_t concat_strings(const string_t *const s1, const string_t *const s2) memcpy(concat, s1->begin, len1); memcpy(concat + len1, s2->begin, len2 + 1); - if (warning.traditional) { - warningf(&lexer_token.source_position, - "traditional C rejects string constant concatenation"); - } -#if 0 /* TODO hash */ - const char *result = strset_insert(&stringset, concat); - if(result != concat) { - obstack_free(&symbol_obstack, concat); - } - - return result; -#else - return (string_t){ concat, len1 + len2 + 1 }; -#endif -} - -/** - * Concatenate a string and a wide string. - */ -wide_string_t concat_string_wide_string(const string_t *const s1, const wide_string_t *const s2) -{ - const size_t len1 = s1->size - 1; - const size_t len2 = s2->size - 1; - - wchar_rep_t *const concat = obstack_alloc(&symbol_obstack, (len1 + len2 + 1) * sizeof(*concat)); - const char *const src = s1->begin; - for (size_t i = 0; i != len1; ++i) { - concat[i] = src[i]; - } - memcpy(concat + len1, s2->begin, (len2 + 1) * sizeof(*concat)); - if (warning.traditional) { - warningf(&lexer_token.source_position, - "traditional C rejects string constant concatenation"); - } - - return (wide_string_t){ concat, len1 + len2 + 1 }; -} - -/** - * Concatenate two wide strings. - */ -wide_string_t concat_wide_strings(const wide_string_t *const s1, const wide_string_t *const s2) -{ - const size_t len1 = s1->size - 1; - const size_t len2 = s2->size - 1; - - wchar_rep_t *const concat = obstack_alloc(&symbol_obstack, (len1 + len2 + 1) * sizeof(*concat)); - memcpy(concat, s1->begin, len1 * sizeof(*concat)); - memcpy(concat + len1, s2->begin, (len2 + 1) * sizeof(*concat)); - if (warning.traditional) { - warningf(&lexer_token.source_position, - "traditional C rejects string constant concatenation"); - } - - return (wide_string_t){ concat, len1 + len2 + 1 }; + return identify_string(concat, len1 + len2 + 1); } -/** - * Concatenate a wide string and a string. - */ -wide_string_t concat_wide_string_string(const wide_string_t *const s1, const string_t *const s2) +string_t make_string(const char *string) { - const size_t len1 = s1->size - 1; - const size_t len2 = s2->size - 1; - - wchar_rep_t *const concat = obstack_alloc(&symbol_obstack, (len1 + len2 + 1) * sizeof(*concat)); - memcpy(concat, s1->begin, len1 * sizeof(*concat)); - const char *const src = s2->begin; - wchar_rep_t *const dst = concat + len1; - for (size_t i = 0; i != len2 + 1; ++i) { - dst[i] = src[i]; - } - if (warning.traditional) { - warningf(&lexer_token.source_position, - "traditional C rejects string constant concatenation"); - } + size_t len = strlen(string) + 1; + char *const space = obstack_alloc(&symbol_obstack, len); + memcpy(space, string, len); - return (wide_string_t){ concat, len1 + len2 + 1 }; + return identify_string(space, len); } static void grow_symbol(utf32 const tc) @@ -1231,8 +945,8 @@ static void parse_string_literal(void) eat('"'); - while(1) { - switch(c) { + while (true) { + switch (c) { case '\\': { utf32 const tc = parse_escape_sequence(); if (tc >= 0x100) { @@ -1269,22 +983,11 @@ end_of_string: /* add finishing 0 to the string */ obstack_1grow(&symbol_obstack, '\0'); - const size_t size = (size_t)obstack_object_size(&symbol_obstack); - const char *const string = obstack_finish(&symbol_obstack); + const size_t size = (size_t)obstack_object_size(&symbol_obstack); + char *string = obstack_finish(&symbol_obstack); -#if 0 /* TODO hash */ - /* check if there is already a copy of the string */ - result = strset_insert(&stringset, string); - if(result != string) { - obstack_free(&symbol_obstack, string); - } -#else - const char *const result = string; -#endif - - lexer_token.type = T_STRING_LITERAL; - lexer_token.v.string.begin = result; - lexer_token.v.string.size = size; + lexer_token.type = T_STRING_LITERAL; + lexer_token.literal = identify_string(string, size); } /** @@ -1296,11 +999,11 @@ static void parse_wide_character_constant(void) eat('\''); - while(1) { - switch(c) { + while (true) { + switch (c) { case '\\': { - wchar_rep_t tc = parse_escape_sequence(); - obstack_grow(&symbol_obstack, &tc, sizeof(tc)); + const utf32 tc = parse_escape_sequence(); + grow_symbol(tc); break; } @@ -1321,26 +1024,19 @@ static void parse_wide_character_constant(void) return; } - default: { - wchar_rep_t tc = (wchar_rep_t) c; - obstack_grow(&symbol_obstack, &tc, sizeof(tc)); + default: + grow_symbol(c); next_char(); break; } - } } end_of_wide_char_constant:; - size_t size = (size_t) obstack_object_size(&symbol_obstack); - assert(size % sizeof(wchar_rep_t) == 0); - size /= sizeof(wchar_rep_t); + size_t size = (size_t) obstack_object_size(&symbol_obstack); + char *string = obstack_finish(&symbol_obstack); - const wchar_rep_t *string = obstack_finish(&symbol_obstack); - - lexer_token.type = T_WIDE_CHARACTER_CONSTANT; - lexer_token.v.wide_string.begin = string; - lexer_token.v.wide_string.size = size; - lexer_token.datatype = type_wchar_t; + lexer_token.type = T_WIDE_CHARACTER_CONSTANT; + lexer_token.literal = identify_string(string, size); } /** @@ -1348,64 +1044,9 @@ end_of_wide_char_constant:; */ static void parse_wide_string_literal(void) { - const unsigned start_linenr = lexer_token.source_position.linenr; - - assert(c == '"'); - next_char(); - - while(1) { - switch(c) { - case '\\': { - wchar_rep_t tc = parse_escape_sequence(); - obstack_grow(&symbol_obstack, &tc, sizeof(tc)); - break; - } - - case EOF: { - source_position_t source_position; - source_position.input_name = lexer_token.source_position.input_name; - source_position.linenr = start_linenr; - errorf(&source_position, "string has no end"); - lexer_token.type = T_ERROR; - return; - } - - case '"': - next_char(); - goto end_of_string; - - default: { - wchar_rep_t tc = c; - obstack_grow(&symbol_obstack, &tc, sizeof(tc)); - next_char(); - break; - } - } - } - -end_of_string:; - - /* TODO: concatenate multiple strings separated by whitespace... */ - - /* add finishing 0 to the string */ - wchar_rep_t nul = L'\0'; - obstack_grow(&symbol_obstack, &nul, sizeof(nul)); - const size_t size = (size_t)obstack_object_size(&symbol_obstack) / sizeof(wchar_rep_t); - const wchar_rep_t *const string = obstack_finish(&symbol_obstack); - -#if 0 /* TODO hash */ - /* check if there is already a copy of the string */ - const wchar_rep_t *const result = strset_insert(&stringset, string); - if(result != string) { - obstack_free(&symbol_obstack, string); - } -#else - const wchar_rep_t *const result = string; -#endif - - lexer_token.type = T_WIDE_STRING_LITERAL; - lexer_token.v.wide_string.begin = result; - lexer_token.v.wide_string.size = size; + parse_string_literal(); + if (lexer_token.type == T_STRING_LITERAL) + lexer_token.type = T_WIDE_STRING_LITERAL; } /** @@ -1417,8 +1058,8 @@ static void parse_character_constant(void) eat('\''); - while(1) { - switch(c) { + while (true) { + switch (c) { case '\\': { utf32 const tc = parse_escape_sequence(); if (tc >= 0x100) { @@ -1456,13 +1097,11 @@ static void parse_character_constant(void) } end_of_char_constant:; - const size_t size = (size_t)obstack_object_size(&symbol_obstack); - const char *const string = obstack_finish(&symbol_obstack); + const size_t size = (size_t)obstack_object_size(&symbol_obstack); + char *const string = obstack_finish(&symbol_obstack); - lexer_token.type = T_CHARACTER_CONSTANT; - lexer_token.v.string.begin = string; - lexer_token.v.string.size = size; - lexer_token.datatype = c_mode & _CXX && size == 1 ? type_char : type_int; + lexer_token.type = T_CHARACTER_CONSTANT; + lexer_token.literal = identify_string(string, size); } /** @@ -1472,8 +1111,8 @@ static void skip_multiline_comment(void) { unsigned start_linenr = lexer_token.source_position.linenr; - while(1) { - switch(c) { + while (true) { + switch (c) { case '/': next_char(); if (c == '*') { @@ -1485,7 +1124,7 @@ static void skip_multiline_comment(void) break; case '*': next_char(); - if(c == '/') { + if (c == '/') { next_char(); return; } @@ -1513,8 +1152,8 @@ static void skip_multiline_comment(void) */ static void skip_line_comment(void) { - while(1) { - switch(c) { + while (true) { + switch (c) { case EOF: return; @@ -1555,7 +1194,7 @@ static inline void next_pp_token(void) */ static void eat_until_newline(void) { - while(pp_token.type != '\n' && pp_token.type != T_EOF) { + while (pp_token.type != '\n' && pp_token.type != T_EOF) { next_pp_token(); } } @@ -1566,7 +1205,7 @@ static void eat_until_newline(void) static void define_directive(void) { lexer_next_preprocessing_token(); - if(lexer_token.type != T_IDENTIFIER) { + if (lexer_token.type != T_IDENTIFIER) { parse_error("expected identifier after #define\n"); eat_until_newline(); } @@ -1596,14 +1235,14 @@ static void endif_directive(void) */ static void parse_line_directive(void) { - if(pp_token.type != T_INTEGER) { + if (pp_token.type != T_INTEGER) { parse_error("expected integer"); } else { - lexer_token.source_position.linenr = (unsigned int)(pp_token.v.intvalue - 1); + lexer_token.source_position.linenr = atoi(pp_token.literal.begin); next_pp_token(); } - if(pp_token.type == T_STRING_LITERAL) { - lexer_token.source_position.input_name = pp_token.v.string.begin; + if (pp_token.type == T_STRING_LITERAL) { + lexer_token.source_position.input_name = pp_token.literal.begin; next_pp_token(); } @@ -1638,13 +1277,13 @@ static void parse_pragma(void) bool unknown_pragma = true; next_pp_token(); - if (pp_token.v.symbol->pp_ID == TP_STDC) { + if (pp_token.symbol->pp_ID == TP_STDC) { stdc_pragma_kind_t kind = STDC_UNKNOWN; /* a STDC pragma */ if (c_mode & _C99) { next_pp_token(); - switch (pp_token.v.symbol->pp_ID) { + switch (pp_token.symbol->pp_ID) { case TP_FP_CONTRACT: kind = STDC_FP_CONTRACT; break; @@ -1660,7 +1299,7 @@ static void parse_pragma(void) if (kind != STDC_UNKNOWN) { stdc_pragma_value_kind_t value = STDC_VALUE_UNKNOWN; next_pp_token(); - switch (pp_token.v.symbol->pp_ID) { + switch (pp_token.symbol->pp_ID) { case TP_ON: value = STDC_VALUE_ON; break; @@ -1695,9 +1334,9 @@ static void parse_pragma(void) static void parse_preprocessor_identifier(void) { assert(pp_token.type == T_IDENTIFIER); - symbol_t *symbol = pp_token.v.symbol; + symbol_t *symbol = pp_token.symbol; - switch(symbol->pp_ID) { + switch (symbol->pp_ID) { case TP_include: printf("include - enable header name parsing!\n"); break; @@ -1738,7 +1377,7 @@ static void parse_preprocessor_directive(void) { next_pp_token(); - switch(pp_token.type) { + switch (pp_token.type) { case T_IDENTIFIER: parse_preprocessor_identifier(); break; @@ -1757,8 +1396,8 @@ static void parse_preprocessor_directive(void) #define MAYBE_PROLOG \ next_char(); \ - while(1) { \ - switch(c) { + while (true) { \ + switch (c) { #define MAYBE(ch, set_type) \ case ch: \ @@ -1780,7 +1419,7 @@ static void parse_preprocessor_directive(void) default: \ code \ } \ - } /* end of while(1) */ \ + } /* end of while (true) */ \ break; #define ELSE(set_type) \ @@ -1791,8 +1430,8 @@ static void parse_preprocessor_directive(void) void lexer_next_preprocessing_token(void) { - while(1) { - switch(c) { + while (true) { + switch (c) { case ' ': case '\t': next_char(); @@ -1806,7 +1445,7 @@ void lexer_next_preprocessing_token(void) SYMBOL_CHARS parse_symbol(); /* might be a wide string ( L"string" ) */ - if (lexer_token.v.symbol == symbol_L) { + if (lexer_token.symbol == symbol_L) { switch (c) { case '"': parse_wide_string_literal(); break; case '\'': parse_wide_character_constant(); break; @@ -1831,7 +1470,7 @@ void lexer_next_preprocessing_token(void) DIGITS put_back(c); c = '.'; - parse_number_dec(); + parse_number(); return; case '.': diff --git a/lexer.h b/lexer.h index 3f83f11..a34d591 100644 --- a/lexer.h +++ b/lexer.h @@ -39,9 +39,7 @@ void select_input_encoding(char const* encoding); void lexer_open_stream(FILE *stream, const char *input_name); void lexer_open_buffer(const char *buffer, size_t len, const char *input_name); -string_t concat_strings( const string_t *s1, const string_t *s2); -wide_string_t concat_string_wide_string(const string_t *s1, const wide_string_t *s2); -wide_string_t concat_wide_strings( const wide_string_t *s1, const wide_string_t *s2); -wide_string_t concat_wide_string_string(const wide_string_t *s1, const string_t *s2); +string_t concat_strings(const string_t *s1, const string_t *s2); +string_t make_string(const char *str); #endif diff --git a/parser.c b/parser.c index 502dcc6..101b404 100644 --- a/parser.c +++ b/parser.c @@ -305,11 +305,15 @@ static size_t get_expression_struct_size(expression_kind_t kind) [EXPR_INVALID] = sizeof(expression_base_t), [EXPR_REFERENCE] = sizeof(reference_expression_t), [EXPR_REFERENCE_ENUM_VALUE] = sizeof(reference_expression_t), - [EXPR_CONST] = sizeof(const_expression_t), - [EXPR_CHARACTER_CONSTANT] = sizeof(const_expression_t), - [EXPR_WIDE_CHARACTER_CONSTANT] = sizeof(const_expression_t), + [EXPR_LITERAL_INTEGER] = sizeof(literal_expression_t), + [EXPR_LITERAL_INTEGER_OCTAL] = sizeof(literal_expression_t), + [EXPR_LITERAL_INTEGER_HEXADECIMAL]= sizeof(literal_expression_t), + [EXPR_LITERAL_FLOATINGPOINT] = sizeof(literal_expression_t), + [EXPR_LITERAL_FLOATINGPOINT_HEXADECIMAL] = sizeof(literal_expression_t), + [EXPR_LITERAL_CHARACTER] = sizeof(literal_expression_t), + [EXPR_LITERAL_WIDE_CHARACTER] = sizeof(literal_expression_t), [EXPR_STRING_LITERAL] = sizeof(string_literal_expression_t), - [EXPR_WIDE_STRING_LITERAL] = sizeof(wide_string_literal_expression_t), + [EXPR_WIDE_STRING_LITERAL] = sizeof(string_literal_expression_t), [EXPR_COMPOUND_LITERAL] = sizeof(compound_literal_expression_t), [EXPR_CALL] = sizeof(call_expression_t), [EXPR_UNARY_FIRST] = sizeof(unary_expression_t), @@ -1085,15 +1089,23 @@ static expression_t *parse_assignment_expression(void) return parse_sub_expression(PREC_ASSIGNMENT); } +static void warn_string_concat(const source_position_t *pos) +{ + if (warning.traditional) { + warningf(pos, "traditional C rejects string constant concatenation"); + } +} + static string_t parse_string_literals(void) { assert(token.type == T_STRING_LITERAL); - string_t result = token.v.string; + string_t result = token.literal; next_token(); while (token.type == T_STRING_LITERAL) { - result = concat_strings(&result, &token.v.string); + warn_string_concat(&token.source_position); + result = concat_strings(&result, &token.literal); next_token(); } @@ -1161,7 +1173,7 @@ static attribute_argument_t *parse_attribute_arguments(void) /* is it an identifier */ if (token.type == T_IDENTIFIER && (look_ahead(1)->type == ',' || look_ahead(1)->type == ')')) { - symbol_t *symbol = token.v.symbol; + symbol_t *symbol = token.symbol; argument->kind = ATTRIBUTE_ARGUMENT_SYMBOL; argument->v.symbol = symbol; next_token(); @@ -1204,7 +1216,7 @@ static symbol_t *get_symbol_from_token(void) { switch(token.type) { case T_IDENTIFIER: - return token.v.symbol; + return token.symbol; case T_auto: case T_char: case T_double: @@ -1570,11 +1582,9 @@ unary: determine_lhs_ent(expr->va_starte.ap, lhs_ent); return; + EXPR_LITERAL_CASES case EXPR_UNKNOWN: case EXPR_INVALID: - case EXPR_CONST: - case EXPR_CHARACTER_CONSTANT: - case EXPR_WIDE_CHARACTER_CONSTANT: case EXPR_STRING_LITERAL: case EXPR_WIDE_STRING_LITERAL: case EXPR_COMPOUND_LITERAL: // TODO init? @@ -1620,7 +1630,7 @@ static designator_t *parse_designation(void) T_IDENTIFIER, NULL); return NULL; } - designator->symbol = token.v.symbol; + designator->symbol = token.symbol; next_token(); break; default: @@ -1636,7 +1646,7 @@ end_error: return NULL; } -static initializer_t *initializer_from_string(array_type_t *type, +static initializer_t *initializer_from_string(array_type_t *const type, const string_t *const string) { /* TODO: check len vs. size of array type */ @@ -1649,7 +1659,7 @@ static initializer_t *initializer_from_string(array_type_t *type, } static initializer_t *initializer_from_wide_string(array_type_t *const type, - wide_string_t *const string) + const string_t *const string) { /* TODO: check len vs. size of array type */ (void) type; @@ -1673,6 +1683,7 @@ static initializer_t *initializer_from_expression(type_t *orig_type, type_t *type = skip_typeref(orig_type); type_t *expr_type_orig = expression->base.type; type_t *expr_type = skip_typeref(expr_type_orig); + if (is_type_array(type) && expr_type->kind == TYPE_POINTER) { array_type_t *const array_type = &type->array; type_t *const element_type = skip_typeref(array_type->element_type); @@ -1680,26 +1691,26 @@ static initializer_t *initializer_from_expression(type_t *orig_type, if (element_type->kind == TYPE_ATOMIC) { atomic_type_kind_t akind = element_type->atomic.akind; switch (expression->kind) { - case EXPR_STRING_LITERAL: - if (akind == ATOMIC_TYPE_CHAR - || akind == ATOMIC_TYPE_SCHAR - || akind == ATOMIC_TYPE_UCHAR) { - return initializer_from_string(array_type, - &expression->string.value); - } - break; + case EXPR_STRING_LITERAL: + if (akind == ATOMIC_TYPE_CHAR + || akind == ATOMIC_TYPE_SCHAR + || akind == ATOMIC_TYPE_UCHAR) { + return initializer_from_string(array_type, + &expression->string_literal.value); + } + break; - case EXPR_WIDE_STRING_LITERAL: { - type_t *bare_wchar_type = skip_typeref(type_wchar_t); - if (get_unqualified_type(element_type) == bare_wchar_type) { - return initializer_from_wide_string(array_type, - &expression->wide_string.value); - } - break; + case EXPR_WIDE_STRING_LITERAL: { + type_t *bare_wchar_type = skip_typeref(type_wchar_t); + if (get_unqualified_type(element_type) == bare_wchar_type) { + return initializer_from_wide_string(array_type, + &expression->string_literal.value); } + break; + } - default: - break; + default: + break; } } } @@ -2113,7 +2124,7 @@ static initializer_t *parse_sub_initializer(type_path_t *path, /* GNU-style designator ("identifier: value") */ designator = allocate_ast_zero(sizeof(designator[0])); designator->source_position = token.source_position; - designator->symbol = token.v.symbol; + designator->symbol = token.symbol; eat(T_IDENTIFIER); eat(':'); @@ -2292,6 +2303,18 @@ end_error: return NULL; } +static expression_t *make_size_literal(size_t value) +{ + expression_t *literal = allocate_ast_zero(EXPR_LITERAL_INTEGER); + literal->base.type = type_size_t; + + char buf[128]; + snprintf(buf, sizeof(buf), "%u", (unsigned) value); + literal->literal.value = make_string(buf); + + return literal; +} + /** * Parses an initializer. Parsers either a compound literal * (env->declaration == NULL) or an initializer of a declaration. @@ -2357,13 +2380,9 @@ static initializer_t *parse_initializer(parse_initializer_env_t *env) internal_errorf(HERE, "invalid initializer type"); } - expression_t *cnst = allocate_expression_zero(EXPR_CONST); - cnst->base.type = type_size_t; - cnst->conste.v.int_value = size; - type_t *new_type = duplicate_type(type); - new_type->array.size_expression = cnst; + new_type->array.size_expression = make_size_literal(size); new_type->array.size_constant = true; new_type->array.has_implicit_size = true; new_type->array.size = size; @@ -2402,7 +2421,7 @@ static compound_t *parse_compound_type_specifier(bool is_struct) entity_kind_tag_t const kind = is_struct ? ENTITY_STRUCT : ENTITY_UNION; if (token.type == T_IDENTIFIER) { /* the compound has a name, check if we have seen it already */ - symbol = token.v.symbol; + symbol = token.symbol; next_token(); entity_t *entity = get_tag(symbol, kind); @@ -2487,7 +2506,7 @@ static void parse_enum_entries(type_t *const enum_type) entity_t *entity = allocate_entity_zero(ENTITY_ENUM_VALUE); entity->enum_value.enum_type = enum_type; - entity->base.symbol = token.v.symbol; + entity->base.symbol = token.symbol; entity->base.source_position = token.source_position; next_token(); @@ -2518,7 +2537,7 @@ static type_t *parse_enum_specifier(void) eat(T_enum); switch (token.type) { case T_IDENTIFIER: - symbol = token.v.symbol; + symbol = token.symbol; next_token(); entity = get_tag(symbol, ENTITY_ENUM); @@ -2611,7 +2630,7 @@ static type_t *parse_typeof(void) } switch (token.type) { case T_IDENTIFIER: - if (is_typedef_symbol(token.v.symbol)) { + if (is_typedef_symbol(token.symbol)) { type = parse_typename(); } else { expression = parse_expression(); @@ -2701,7 +2720,7 @@ static attribute_t *parse_attribute_ms_property(attribute_t *attribute) } bool is_put; - symbol_t *symbol = token.v.symbol; + symbol_t *symbol = token.symbol; next_token(); if (strcmp(symbol->string, "put") == 0) { is_put = true; @@ -2718,9 +2737,9 @@ static attribute_t *parse_attribute_ms_property(attribute_t *attribute) goto end_error; } if (is_put) { - property->put_symbol = token.v.symbol; + property->put_symbol = token.symbol; } else { - property->get_symbol = token.v.symbol; + property->get_symbol = token.symbol; } next_token(); } while (next_if(',')); @@ -2739,7 +2758,7 @@ static attribute_t *parse_microsoft_extended_decl_modifier_single(void) if (next_if(T_restrict)) { kind = ATTRIBUTE_MS_RESTRICT; } else if (token.type == T_IDENTIFIER) { - const char *name = token.v.symbol->string; + const char *name = token.symbol->string; next_token(); for (attribute_kind_t k = ATTRIBUTE_MS_FIRST; k <= ATTRIBUTE_MS_LAST; ++k) { @@ -3013,7 +3032,7 @@ wrong_thread_stoarge_class: } } - type_t *const typedef_type = get_typedef_type(token.v.symbol); + type_t *const typedef_type = get_typedef_type(token.symbol); if (typedef_type == NULL) { /* Be somewhat resilient to typos like 'vodi f()' at the beginning of a * declaration, so it doesn't generate 'implicit int' followed by more @@ -3027,7 +3046,7 @@ wrong_thread_stoarge_class: errorf(HERE, "%K does not name a type", &token); entity_t *entity = - create_error_entity(token.v.symbol, ENTITY_TYPEDEF); + create_error_entity(token.symbol, ENTITY_TYPEDEF); type = allocate_type_zero(TYPE_TYPEDEF); type->typedeft.typedefe = &entity->typedefe; @@ -3290,7 +3309,7 @@ static void parse_identifier_list(scope_t *scope) entity_t *entity = allocate_entity_zero(ENTITY_PARAMETER); entity->base.source_position = token.source_position; entity->base.namespc = NAMESPACE_NORMAL; - entity->base.symbol = token.v.symbol; + entity->base.symbol = token.symbol; /* a K&R parameter has no type, yet */ next_token(); @@ -3332,7 +3351,7 @@ static bool has_parameters(void) { /* func(void) is not a parameter */ if (token.type == T_IDENTIFIER) { - entity_t const *const entity = get_entity(token.v.symbol, NAMESPACE_NORMAL); + entity_t const *const entity = get_entity(token.symbol, NAMESPACE_NORMAL); if (entity == NULL) return true; if (entity->kind != ENTITY_TYPEDEF) @@ -3359,7 +3378,7 @@ static void parse_parameters(function_type_t *type, scope_t *scope) int saved_comma_state = save_and_reset_anchor_state(','); if (token.type == T_IDENTIFIER && - !is_typedef_symbol(token.v.symbol)) { + !is_typedef_symbol(token.symbol)) { token_type_t la1_type = (token_type_t)look_ahead(1)->type; if (la1_type == ',' || la1_type == ')') { type->kr_style_parameters = true; @@ -3617,7 +3636,7 @@ ptr_operator_end: ; if (env->must_be_abstract) { errorf(HERE, "no identifier expected in typename"); } else { - env->symbol = token.v.symbol; + env->symbol = token.symbol; env->source_position = token.source_position; } next_token(); @@ -4378,7 +4397,7 @@ static bool is_declaration_specifier(const token_t *token, TYPE_QUALIFIERS return true; case T_IDENTIFIER: - return is_typedef_symbol(token->v.symbol); + return is_typedef_symbol(token->symbol); case T___extension__: STORAGE_CLASSES @@ -4890,9 +4909,7 @@ static bool expression_returns(expression_t const *const expr) case EXPR_REFERENCE: case EXPR_REFERENCE_ENUM_VALUE: - case EXPR_CONST: - case EXPR_CHARACTER_CONSTANT: - case EXPR_WIDE_CHARACTER_CONSTANT: + EXPR_LITERAL_CASES case EXPR_STRING_LITERAL: case EXPR_WIDE_STRING_LITERAL: case EXPR_COMPOUND_LITERAL: // TODO descend into initialisers @@ -5958,82 +5975,181 @@ static expression_t *expected_expression_error(void) return create_invalid_expression(); } +static type_t *get_string_type(void) +{ + return warning.write_strings ? type_const_char_ptr : type_char_ptr; +} + +static type_t *get_wide_string_type(void) +{ + return warning.write_strings ? type_const_wchar_t_ptr : type_wchar_t_ptr; +} + /** * Parse a string constant. */ -static expression_t *parse_string_const(void) +static expression_t *parse_string_literal(void) { - wide_string_t wres; - if (token.type == T_STRING_LITERAL) { - string_t res = token.v.string; + source_position_t begin = token.source_position; + string_t res = token.literal; + bool is_wide = (token.type == T_WIDE_STRING_LITERAL); + + next_token(); + while (token.type == T_STRING_LITERAL + || token.type == T_WIDE_STRING_LITERAL) { + warn_string_concat(&token.source_position); + res = concat_strings(&res, &token.literal); next_token(); - while (token.type == T_STRING_LITERAL) { - res = concat_strings(&res, &token.v.string); - next_token(); - } - if (token.type != T_WIDE_STRING_LITERAL) { - expression_t *const cnst = allocate_expression_zero(EXPR_STRING_LITERAL); - /* note: that we use type_char_ptr here, which is already the - * automatic converted type. revert_automatic_type_conversion - * will construct the array type */ - cnst->base.type = warning.write_strings ? type_const_char_ptr : type_char_ptr; - cnst->string.value = res; - return cnst; - } + is_wide |= token.type == T_WIDE_STRING_LITERAL; + } - wres = concat_string_wide_string(&res, &token.v.wide_string); + expression_t *literal; + if (is_wide) { + literal = allocate_expression_zero(EXPR_WIDE_STRING_LITERAL); + literal->base.type = get_wide_string_type(); } else { - wres = token.v.wide_string; + literal = allocate_expression_zero(EXPR_STRING_LITERAL); + literal->base.type = get_string_type(); } + literal->base.source_position = begin; + literal->literal.value = res; + + return literal; +} + +/** + * Parse a boolean constant. + */ +static expression_t *parse_boolean_literal(bool value) +{ + expression_t *literal = allocate_expression_zero(EXPR_LITERAL_BOOLEAN); + literal->base.source_position = token.source_position; + literal->base.type = type_bool; + literal->literal.value.begin = value ? "true" : "false"; + literal->literal.value.size = value ? 4 : 5; + next_token(); + return literal; +} - for (;;) { - switch (token.type) { - case T_WIDE_STRING_LITERAL: - wres = concat_wide_strings(&wres, &token.v.wide_string); - break; +static void warn_traditional_suffix(void) +{ + if (!warning.traditional) + return; + warningf(&token.source_position, "traditional C rejects the '%Y' suffix", + token.symbol); +} - case T_STRING_LITERAL: - wres = concat_wide_string_string(&wres, &token.v.string); - break; +static void check_integer_suffix(void) +{ + symbol_t *suffix = token.symbol; + if (suffix == NULL) + return; - default: { - expression_t *const cnst = allocate_expression_zero(EXPR_WIDE_STRING_LITERAL); - cnst->base.type = warning.write_strings ? type_const_wchar_t_ptr : type_wchar_t_ptr; - cnst->wide_string.value = wres; - return cnst; + bool not_traditional = false; + const char *c = suffix->string; + if (*c == 'l' || *c == 'L') { + ++c; + if (*c == *(c-1)) { + not_traditional = true; + ++c; + if (*c == 'u' || *c == 'U') { + ++c; + } + } else if (*c == 'u' || *c == 'U') { + not_traditional = true; + ++c; + } + } else if (*c == 'u' || *c == 'U') { + not_traditional = true; + ++c; + if (*c == 'l' || *c == 'L') { + ++c; + if (*c == *(c-1)) { + ++c; } } - next_token(); + } + if (*c != '\0') { + errorf(&token.source_position, + "invalid suffix '%s' on integer constant", suffix->string); + } else if (not_traditional) { + warn_traditional_suffix(); } } -/** - * Parse a boolean constant. - */ -static expression_t *parse_bool_const(bool value) +static type_t *check_floatingpoint_suffix(void) { - expression_t *cnst = allocate_expression_zero(EXPR_CONST); - cnst->base.type = type_bool; - cnst->conste.v.int_value = value; + symbol_t *suffix = token.symbol; + type_t *type = type_double; + if (suffix == NULL) + return type; - next_token(); + bool not_traditional = false; + const char *c = suffix->string; + if (*c == 'f' || *c == 'F') { + ++c; + type = type_float; + } else if (*c == 'l' || *c == 'L') { + ++c; + type = type_long_double; + } + if (*c != '\0') { + errorf(&token.source_position, + "invalid suffix '%s' on floatingpoint constant", suffix->string); + } else if (not_traditional) { + warn_traditional_suffix(); + } - return cnst; + return type; } /** * Parse an integer constant. */ -static expression_t *parse_int_const(void) +static expression_t *parse_number_literal(void) { - expression_t *cnst = allocate_expression_zero(EXPR_CONST); - cnst->base.type = token.datatype; - cnst->conste.v.int_value = token.v.intvalue; + expression_kind_t kind; + type_t *type; + + switch (token.type) { + case T_INTEGER: + kind = EXPR_LITERAL_INTEGER; + check_integer_suffix(); + break; + case T_INTEGER_OCTAL: + kind = EXPR_LITERAL_INTEGER_OCTAL; + check_integer_suffix(); + break; + case T_INTEGER_HEXADECIMAL: + kind = EXPR_LITERAL_INTEGER_HEXADECIMAL; + check_integer_suffix(); + break; + case T_FLOATINGPOINT: + kind = EXPR_LITERAL_FLOATINGPOINT; + type = check_floatingpoint_suffix(); + break; + case T_FLOATINGPOINT_HEXADECIMAL: + kind = EXPR_LITERAL_FLOATINGPOINT_HEXADECIMAL; + type = check_floatingpoint_suffix(); + break; + default: + panic("unexpected token type in parse_number_literal"); + } + expression_t *literal = allocate_expression_zero(kind); + literal->base.source_position = token.source_position; + literal->base.type = type; + literal->literal.value = token.literal; + literal->literal.suffix = token.symbol; next_token(); - return cnst; + /* integer type depends on the size of the number and the size + * representable by the types. The backend/codegeneration has to determine + * that + */ + determine_literal_type(&literal->literal); + return literal; } /** @@ -6041,20 +6157,23 @@ static expression_t *parse_int_const(void) */ static expression_t *parse_character_constant(void) { - expression_t *cnst = allocate_expression_zero(EXPR_CHARACTER_CONSTANT); - cnst->base.type = token.datatype; - cnst->conste.v.character = token.v.string; + expression_t *literal = allocate_expression_zero(EXPR_LITERAL_CHARACTER); + literal->base.source_position = token.source_position; + literal->base.type = c_mode & _CXX ? type_char : type_int; + literal->literal.value = token.literal; - if (cnst->conste.v.character.size != 1) { - if (!GNU_MODE) { + size_t len = literal->literal.value.size; + if (len != 1) { + if (!GNU_MODE && !(c_mode & _C99)) { errorf(HERE, "more than 1 character in character constant"); } else if (warning.multichar) { + literal->base.type = type_int; warningf(HERE, "multi-character character constant"); } } - next_token(); - return cnst; + next_token(); + return literal; } /** @@ -6062,34 +6181,18 @@ static expression_t *parse_character_constant(void) */ static expression_t *parse_wide_character_constant(void) { - expression_t *cnst = allocate_expression_zero(EXPR_WIDE_CHARACTER_CONSTANT); - cnst->base.type = token.datatype; - cnst->conste.v.wide_character = token.v.wide_string; + expression_t *literal = allocate_expression_zero(EXPR_LITERAL_WIDE_CHARACTER); + literal->base.source_position = token.source_position; + literal->base.type = type_int; + literal->literal.value = token.literal; - if (cnst->conste.v.wide_character.size != 1) { - if (!GNU_MODE) { - errorf(HERE, "more than 1 character in character constant"); - } else if (warning.multichar) { - warningf(HERE, "multi-character character constant"); - } + size_t len = wstrlen(&literal->literal.value); + if (len != 1) { + warningf(HERE, "multi-character character constant"); } - next_token(); - - return cnst; -} - -/** - * Parse a float constant. - */ -static expression_t *parse_float_const(void) -{ - expression_t *cnst = allocate_expression_zero(EXPR_CONST); - cnst->base.type = token.datatype; - cnst->conste.v.float_value = token.v.floatvalue; next_token(); - - return cnst; + return literal; } static entity_t *create_implicit_function(symbol_t *symbol, @@ -6149,57 +6252,58 @@ static type_t *automatic_type_conversion(type_t *orig_type) type_t *revert_automatic_type_conversion(const expression_t *expression) { switch (expression->kind) { - case EXPR_REFERENCE: { - entity_t *entity = expression->reference.entity; - if (is_declaration(entity)) { - return entity->declaration.type; - } else if (entity->kind == ENTITY_ENUM_VALUE) { - return entity->enum_value.enum_type; - } else { - panic("no declaration or enum in reference"); - } + case EXPR_REFERENCE: { + entity_t *entity = expression->reference.entity; + if (is_declaration(entity)) { + return entity->declaration.type; + } else if (entity->kind == ENTITY_ENUM_VALUE) { + return entity->enum_value.enum_type; + } else { + panic("no declaration or enum in reference"); } + } - case EXPR_SELECT: { - entity_t *entity = expression->select.compound_entry; - assert(is_declaration(entity)); - type_t *type = entity->declaration.type; - return get_qualified_type(type, - expression->base.type->base.qualifiers); - } + case EXPR_SELECT: { + entity_t *entity = expression->select.compound_entry; + assert(is_declaration(entity)); + type_t *type = entity->declaration.type; + return get_qualified_type(type, + expression->base.type->base.qualifiers); + } - case EXPR_UNARY_DEREFERENCE: { - const expression_t *const value = expression->unary.value; - type_t *const type = skip_typeref(value->base.type); - if (!is_type_pointer(type)) - return type_error_type; - return type->pointer.points_to; - } + case EXPR_UNARY_DEREFERENCE: { + const expression_t *const value = expression->unary.value; + type_t *const type = skip_typeref(value->base.type); + if (!is_type_pointer(type)) + return type_error_type; + return type->pointer.points_to; + } - case EXPR_ARRAY_ACCESS: { - const expression_t *array_ref = expression->array_access.array_ref; - type_t *type_left = skip_typeref(array_ref->base.type); - if (!is_type_pointer(type_left)) - return type_error_type; - return type_left->pointer.points_to; - } + case EXPR_ARRAY_ACCESS: { + const expression_t *array_ref = expression->array_access.array_ref; + type_t *type_left = skip_typeref(array_ref->base.type); + if (!is_type_pointer(type_left)) + return type_error_type; + return type_left->pointer.points_to; + } - case EXPR_STRING_LITERAL: { - size_t size = expression->string.value.size; - return make_array_type(type_char, size, TYPE_QUALIFIER_NONE); - } + case EXPR_STRING_LITERAL: { + size_t size = expression->string_literal.value.size; + return make_array_type(type_char, size, TYPE_QUALIFIER_NONE); + } - case EXPR_WIDE_STRING_LITERAL: { - size_t size = expression->wide_string.value.size; - return make_array_type(type_wchar_t, size, TYPE_QUALIFIER_NONE); - } + case EXPR_WIDE_STRING_LITERAL: { + size_t size = wstrlen(&expression->string_literal.value); + return make_array_type(type_wchar_t, size, TYPE_QUALIFIER_NONE); + } - case EXPR_COMPOUND_LITERAL: - return expression->compound_literal.type; + case EXPR_COMPOUND_LITERAL: + return expression->compound_literal.type; - default: - return expression->base.type; + default: + break; } + return expression->base.type; } /** @@ -6240,7 +6344,7 @@ static entity_t *parse_qualified_identifier(void) parse_error_expected("while parsing identifier", T_IDENTIFIER, NULL); return create_error_entity(sym_anonymous, ENTITY_VARIABLE); } - symbol = token.v.symbol; + symbol = token.symbol; pos = *HERE; next_token(); @@ -6497,7 +6601,7 @@ static expression_t *parse_parenthesized_expression(void) TYPE_SPECIFIERS return parse_cast(); case T_IDENTIFIER: - if (is_typedef_symbol(token.v.symbol)) { + if (is_typedef_symbol(token.symbol)) { return parse_cast(); } } @@ -6584,7 +6688,7 @@ static designator_t *parse_designator(void) T_IDENTIFIER, NULL); return NULL; } - result->symbol = token.v.symbol; + result->symbol = token.symbol; next_token(); designator_t *last_designator = result; @@ -6597,7 +6701,7 @@ static designator_t *parse_designator(void) } designator_t *designator = allocate_ast_zero(sizeof(result[0])); designator->source_position = *HERE; - designator->symbol = token.v.symbol; + designator->symbol = token.symbol; next_token(); last_designator->next = designator; @@ -6930,7 +7034,7 @@ static expression_t *parse_label_address(void) parse_error_expected("while parsing label address", T_IDENTIFIER, NULL); goto end_error; } - symbol_t *symbol = token.v.symbol; + symbol_t *symbol = token.symbol; next_token(); label_t *label = get_label(symbol); @@ -6954,10 +7058,11 @@ end_error: static expression_t *parse_noop_expression(void) { /* the result is a (int)0 */ - expression_t *cnst = allocate_expression_zero(EXPR_CONST); - cnst->base.type = type_int; - cnst->conste.v.int_value = 0; - cnst->conste.is_ms_noop = true; + expression_t *literal = allocate_expression_zero(EXPR_LITERAL_MS_NOOP); + literal->base.type = type_int; + literal->base.source_position = token.source_position; + literal->literal.value.begin = "__noop"; + literal->literal.value.size = 6; eat(T___noop); @@ -6976,7 +7081,7 @@ static expression_t *parse_noop_expression(void) expect(')', end_error); end_error: - return cnst; + return literal; } /** @@ -6985,54 +7090,57 @@ end_error: static expression_t *parse_primary_expression(void) { switch (token.type) { - case T_false: return parse_bool_const(false); - case T_true: return parse_bool_const(true); - case T_INTEGER: return parse_int_const(); - case T_CHARACTER_CONSTANT: return parse_character_constant(); - case T_WIDE_CHARACTER_CONSTANT: return parse_wide_character_constant(); - case T_FLOATINGPOINT: return parse_float_const(); - case T_STRING_LITERAL: - case T_WIDE_STRING_LITERAL: return parse_string_const(); - case T___FUNCTION__: - case T___func__: return parse_function_keyword(); - case T___PRETTY_FUNCTION__: return parse_pretty_function_keyword(); - case T___FUNCSIG__: return parse_funcsig_keyword(); - case T___FUNCDNAME__: return parse_funcdname_keyword(); - case T___builtin_offsetof: return parse_offsetof(); - case T___builtin_va_start: return parse_va_start(); - case T___builtin_va_arg: return parse_va_arg(); - case T___builtin_va_copy: return parse_va_copy(); - case T___builtin_isgreater: - case T___builtin_isgreaterequal: - case T___builtin_isless: - case T___builtin_islessequal: - case T___builtin_islessgreater: - case T___builtin_isunordered: return parse_compare_builtin(); - case T___builtin_constant_p: return parse_builtin_constant(); - case T___builtin_types_compatible_p: return parse_builtin_types_compatible(); - case T__assume: return parse_assume(); - case T_ANDAND: - if (GNU_MODE) - return parse_label_address(); - break; + case T_false: return parse_boolean_literal(false); + case T_true: return parse_boolean_literal(true); + case T_INTEGER: + case T_INTEGER_OCTAL: + case T_INTEGER_HEXADECIMAL: + case T_FLOATINGPOINT: + case T_FLOATINGPOINT_HEXADECIMAL: return parse_number_literal(); + case T_CHARACTER_CONSTANT: return parse_character_constant(); + case T_WIDE_CHARACTER_CONSTANT: return parse_wide_character_constant(); + case T_STRING_LITERAL: + case T_WIDE_STRING_LITERAL: return parse_string_literal(); + case T___FUNCTION__: + case T___func__: return parse_function_keyword(); + case T___PRETTY_FUNCTION__: return parse_pretty_function_keyword(); + case T___FUNCSIG__: return parse_funcsig_keyword(); + case T___FUNCDNAME__: return parse_funcdname_keyword(); + case T___builtin_offsetof: return parse_offsetof(); + case T___builtin_va_start: return parse_va_start(); + case T___builtin_va_arg: return parse_va_arg(); + case T___builtin_va_copy: return parse_va_copy(); + case T___builtin_isgreater: + case T___builtin_isgreaterequal: + case T___builtin_isless: + case T___builtin_islessequal: + case T___builtin_islessgreater: + case T___builtin_isunordered: return parse_compare_builtin(); + case T___builtin_constant_p: return parse_builtin_constant(); + case T___builtin_types_compatible_p: return parse_builtin_types_compatible(); + case T__assume: return parse_assume(); + case T_ANDAND: + if (GNU_MODE) + return parse_label_address(); + break; - case '(': return parse_parenthesized_expression(); - case T___noop: return parse_noop_expression(); + case '(': return parse_parenthesized_expression(); + case T___noop: return parse_noop_expression(); - /* Gracefully handle type names while parsing expressions. */ - case T_COLONCOLON: + /* Gracefully handle type names while parsing expressions. */ + case T_COLONCOLON: + return parse_reference(); + case T_IDENTIFIER: + if (!is_typedef_symbol(token.symbol)) { return parse_reference(); - case T_IDENTIFIER: - if (!is_typedef_symbol(token.v.symbol)) { - return parse_reference(); - } - /* FALLTHROUGH */ - TYPENAME_START { - source_position_t const pos = *HERE; - type_t const *const type = parse_typename(); - errorf(&pos, "encountered type '%T' while parsing expression", type); - return create_invalid_expression(); } + /* FALLTHROUGH */ + TYPENAME_START { + source_position_t const pos = *HERE; + type_t const *const type = parse_typename(); + errorf(&pos, "encountered type '%T' while parsing expression", type); + return create_invalid_expression(); + } } errorf(HERE, "unexpected token %K, expected an expression", &token); @@ -7177,7 +7285,7 @@ static expression_t *parse_select_expression(expression_t *addr) parse_error_expected("while parsing select", T_IDENTIFIER, NULL); return create_invalid_expression(); } - symbol_t *symbol = token.v.symbol; + symbol_t *symbol = token.symbol; next_token(); type_t *const orig_type = addr->base.type; @@ -8277,8 +8385,8 @@ static void warn_string_literal_address(expression_t const* expr) expr = expr->unary.value; } - if (expr->kind == EXPR_STRING_LITERAL || - expr->kind == EXPR_WIDE_STRING_LITERAL) { + if (expr->kind == EXPR_STRING_LITERAL + || expr->kind == EXPR_WIDE_STRING_LITERAL) { warningf(&expr->base.source_position, "comparison with string literal results in unspecified behaviour"); } @@ -8628,13 +8736,20 @@ static bool expression_has_effect(const expression_t *const expr) case EXPR_INVALID: return true; /* do NOT warn */ case EXPR_REFERENCE: return false; case EXPR_REFERENCE_ENUM_VALUE: return false; + case EXPR_LABEL_ADDRESS: return false; + /* suppress the warning for microsoft __noop operations */ - case EXPR_CONST: return expr->conste.is_ms_noop; - case EXPR_CHARACTER_CONSTANT: return false; - case EXPR_WIDE_CHARACTER_CONSTANT: return false; + case EXPR_LITERAL_MS_NOOP: return true; + case EXPR_LITERAL_BOOLEAN: + case EXPR_LITERAL_CHARACTER: + case EXPR_LITERAL_WIDE_CHARACTER: + case EXPR_LITERAL_INTEGER: + case EXPR_LITERAL_INTEGER_OCTAL: + case EXPR_LITERAL_INTEGER_HEXADECIMAL: + case EXPR_LITERAL_FLOATINGPOINT: + case EXPR_LITERAL_FLOATINGPOINT_HEXADECIMAL: return false; case EXPR_STRING_LITERAL: return false; case EXPR_WIDE_STRING_LITERAL: return false; - case EXPR_LABEL_ADDRESS: return false; case EXPR_CALL: { const call_expression_t *const call = &expr->call; @@ -8974,7 +9089,7 @@ static asm_argument_t *parse_asm_arguments(bool is_out) T_IDENTIFIER, NULL); return NULL; } - argument->symbol = token.v.symbol; + argument->symbol = token.symbol; expect(']', end_error); } @@ -9276,7 +9391,7 @@ end_error: static statement_t *parse_label_statement(void) { assert(token.type == T_IDENTIFIER); - symbol_t *symbol = token.v.symbol; + symbol_t *symbol = token.symbol; label_t *label = get_label(symbol); statement_t *const statement = allocate_statement_zero(STATEMENT_LABEL); @@ -9658,7 +9773,7 @@ static statement_t *parse_goto(void) statement->gotos.expression = expression; } else if (token.type == T_IDENTIFIER) { - symbol_t *symbol = token.v.symbol; + symbol_t *symbol = token.symbol; next_token(); statement->gotos.label = get_label(symbol); } else { @@ -9972,7 +10087,7 @@ static statement_t *parse_local_label_declaration(void) T_IDENTIFIER, NULL); goto end_error; } - symbol_t *symbol = token.v.symbol; + symbol_t *symbol = token.symbol; entity_t *entity = get_entity(symbol, NAMESPACE_LABEL); if (entity != NULL && entity->base.parent_scope == current_scope) { errorf(HERE, "multiple definitions of '__label__ %Y' (previous definition %P)", @@ -10010,7 +10125,7 @@ static void parse_namespace_definition(void) symbol_t *symbol = NULL; if (token.type == T_IDENTIFIER) { - symbol = token.v.symbol; + symbol = token.symbol; next_token(); entity = get_entity(symbol, NAMESPACE_NORMAL); @@ -10075,7 +10190,7 @@ static statement_t *intern_parse_statement(void) token_type_t la1_type = (token_type_t)look_ahead(1)->type; if (la1_type == ':') { statement = parse_label_statement(); - } else if (is_typedef_symbol(token.v.symbol)) { + } else if (is_typedef_symbol(token.symbol)) { statement = parse_declaration_statement(); } else { /* it's an identifier, the grammar says this must be an @@ -10085,7 +10200,7 @@ static statement_t *intern_parse_statement(void) switch (la1_type) { case '&': case '*': - if (get_entity(token.v.symbol, NAMESPACE_NORMAL) != NULL) + if (get_entity(token.symbol, NAMESPACE_NORMAL) != NULL) goto expression_statment; /* FALLTHROUGH */ diff --git a/preprocessor.c b/preprocessor.c index 570182c..9c4dd66 100644 --- a/preprocessor.c +++ b/preprocessor.c @@ -484,9 +484,9 @@ end_of_string: const char *const result = string; #endif - pp_token.type = TP_STRING_LITERAL; - pp_token.v.string.begin = result; - pp_token.v.string.size = size; + pp_token.type = TP_STRING_LITERAL; + pp_token.literal.begin = result; + pp_token.literal.size = size; } static void parse_wide_character_constant(void) @@ -532,67 +532,6 @@ end_of_wide_char_constant: /* TODO... */ } -static void parse_wide_string_literal(void) -{ - const unsigned start_linenr = input.position.linenr; - - assert(CC == '"'); - next_char(); - - while(1) { - switch(CC) { - case '\\': { - wchar_rep_t tc = parse_escape_sequence(); - obstack_grow(&symbol_obstack, &tc, sizeof(tc)); - break; - } - - case EOF: { - source_position_t source_position; - source_position.input_name = pp_token.source_position.input_name; - source_position.linenr = start_linenr; - errorf(&source_position, "string has no end"); - pp_token.type = TP_ERROR; - return; - } - - case '"': - next_char(); - goto end_of_string; - - default: { - wchar_rep_t tc = CC; - obstack_grow(&symbol_obstack, &tc, sizeof(tc)); - next_char(); - break; - } - } - } - -end_of_string:; - /* add finishing 0 to the string */ - static const wchar_rep_t nul = L'\0'; - obstack_grow(&symbol_obstack, &nul, sizeof(nul)); - - const size_t size - = (size_t)obstack_object_size(&symbol_obstack) / sizeof(wchar_rep_t); - const wchar_rep_t *const string = obstack_finish(&symbol_obstack); - -#if 0 /* TODO hash */ - /* check if there is already a copy of the string */ - const wchar_rep_t *const result = strset_insert(&stringset, string); - if(result != string) { - obstack_free(&symbol_obstack, string); - } -#else - const wchar_rep_t *const result = string; -#endif - - pp_token.type = TP_WIDE_STRING_LITERAL; - pp_token.v.wide_string.begin = result; - pp_token.v.wide_string.size = size; -} - static void parse_character_constant(void) { const unsigned start_linenr = input.position.linenr; @@ -637,9 +576,9 @@ end_of_char_constant:; const size_t size = (size_t)obstack_object_size(&symbol_obstack); const char *const string = obstack_finish(&symbol_obstack); - pp_token.type = TP_CHARACTER_CONSTANT; - pp_token.v.string.begin = string; - pp_token.v.string.size = size; + pp_token.type = TP_CHARACTER_CONSTANT; + pp_token.literal.begin = string; + pp_token.literal.size = size; } #define SYMBOL_CHARS_WITHOUT_E_P \ @@ -747,7 +686,7 @@ restart: return; /* if it was an identifier then we might need to expand again */ - pp_definition_t *symbol_definition = pp_token.v.symbol->pp_definition; + pp_definition_t *symbol_definition = pp_token.symbol->pp_definition; if(symbol_definition != NULL && !symbol_definition->is_expanding) { symbol_definition->parent_expansion = definition; symbol_definition->expand_pos = 0; @@ -912,7 +851,7 @@ end_symbol: /* might be a wide string or character constant ( L"string"/L'c' ) */ if (CC == '"' && string[0] == 'L' && string[1] == '\0') { obstack_free(&symbol_obstack, string); - parse_wide_string_literal(); + /* TODO */ return; } else if (CC == '\'' && string[0] == 'L' && string[1] == '\0') { obstack_free(&symbol_obstack, string); @@ -922,8 +861,8 @@ end_symbol: symbol_t *symbol = symbol_table_insert(string); - pp_token.type = symbol->pp_ID; - pp_token.v.symbol = symbol; + pp_token.type = symbol->pp_ID; + pp_token.symbol = symbol; /* we can free the memory from symbol obstack if we already had an entry in * the symbol table */ @@ -993,13 +932,12 @@ end_number: size_t size = obstack_object_size(&symbol_obstack); char *string = obstack_finish(&symbol_obstack); - pp_token.type = TP_NUMBER; - pp_token.v.string.begin = string; - pp_token.v.string.size = size; + pp_token.type = TP_NUMBER; + pp_token.literal.begin = string; + pp_token.literal.size = size; } - #define MAYBE_PROLOG \ next_char(); \ while(1) { \ @@ -1292,14 +1230,14 @@ static void emit_pp_token(void) switch(pp_token.type) { case TP_IDENTIFIER: - fputs(pp_token.v.symbol->string, out); + fputs(pp_token.symbol->string, out); break; case TP_NUMBER: - fputs(pp_token.v.string.begin, out); + fputs(pp_token.literal.begin, out); break; case TP_STRING_LITERAL: fputc('"', out); - fputs(pp_token.v.string.begin, out); + fputs(pp_token.literal.begin, out); fputc('"', out); break; case '\n': @@ -1332,22 +1270,6 @@ static bool strings_equal(const string_t *string1, const string_t *string2) return true; } -static bool wide_strings_equal(const wide_string_t *string1, - const wide_string_t *string2) -{ - size_t size = string1->size; - if(size != string2->size) - return false; - - const wchar_rep_t *c1 = string1->begin; - const wchar_rep_t *c2 = string2->begin; - for(size_t i = 0; i < size; ++i, ++c1, ++c2) { - if(*c1 != *c2) - return false; - } - return true; -} - static bool pp_tokens_equal(const token_t *token1, const token_t *token2) { if(token1->type != token2->type) @@ -1358,16 +1280,12 @@ static bool pp_tokens_equal(const token_t *token1, const token_t *token2) /* TODO */ return false; case TP_IDENTIFIER: - return token1->v.symbol == token2->v.symbol; + return token1->symbol == token2->symbol; case TP_NUMBER: case TP_CHARACTER_CONSTANT: case TP_STRING_LITERAL: - return strings_equal(&token1->v.string, &token2->v.string); + return strings_equal(&token1->literal, &token2->literal); - case TP_WIDE_CHARACTER_CONSTANT: - case TP_WIDE_STRING_LITERAL: - return wide_strings_equal(&token1->v.wide_string, - &token2->v.wide_string); default: return true; } @@ -1399,7 +1317,7 @@ static void parse_define_directive(void) "expected identifier after #define, got '%t'", &pp_token); goto error_out; } - symbol_t *symbol = pp_token.v.symbol; + symbol_t *symbol = pp_token.symbol; pp_definition_t *new_definition = obstack_alloc(&pp_obstack, sizeof(new_definition[0])); @@ -1427,7 +1345,7 @@ static void parse_define_directive(void) } break; case TP_IDENTIFIER: - obstack_ptr_grow(&pp_obstack, pp_token.v.symbol); + obstack_ptr_grow(&pp_obstack, pp_token.symbol); next_preprocessing_token(); if (pp_token.type == ',') { @@ -1508,7 +1426,7 @@ static void parse_undef_directive(void) return; } - symbol_t *symbol = pp_token.v.symbol; + symbol_t *symbol = pp_token.symbol; symbol->pp_definition = NULL; next_preprocessing_token(); @@ -1696,7 +1614,7 @@ static void parse_ifdef_ifndef_directive(void) /* just take the true case in the hope to avoid further errors */ condition = true; } else { - symbol_t *symbol = pp_token.v.symbol; + symbol_t *symbol = pp_token.symbol; pp_definition_t *pp_definition = symbol->pp_definition; next_preprocessing_token(); diff --git a/printer.c b/printer.c index 71d48f8..7e6e8f5 100644 --- a/printer.c +++ b/printer.c @@ -26,6 +26,11 @@ static FILE* out; +static void print_char_file(const char c) +{ + fputc(c, out); +} + static void print_string_file(const char *str) { fputs(str, out); @@ -36,26 +41,6 @@ static void print_vformat_file(const char *format, va_list ap) vfprintf(out, format, ap); } -static void print_char_file(wchar_rep_t c) -{ - const unsigned tc = (unsigned) c; - if (tc < 0x80) { - fputc(tc, out); - } else if (tc < 0x800) { - fputc(0xC0 | (tc >> 6), out); - fputc(0x80 | (tc & 0x3F), out); - } else if (tc < 0x10000) { - fputc(0xE0 | ( tc >> 12), out); - fputc(0x80 | ((tc >> 6) & 0x3F), out); - fputc(0x80 | ( tc & 0x3F), out); - } else { - fputc(0xF0 | ( tc >> 18), out); - fputc(0x80 | ((tc >> 12) & 0x3F), out); - fputc(0x80 | ((tc >> 6) & 0x3F), out); - fputc(0x80 | ( tc & 0x3F), out); - } -} - void print_to_file(FILE *new_out) { out = new_out; @@ -68,6 +53,11 @@ void print_to_file(FILE *new_out) static struct obstack *obst; +static void print_char_obstack(const char c) +{ + obstack_1grow(obst, c); +} + static void print_string_obstack(const char *str) { size_t len = strlen(str); @@ -79,26 +69,6 @@ static void print_vformat_obstack(const char *format, va_list ap) obstack_vprintf(obst, format, ap); } -static void print_char_obstack(wchar_rep_t c) -{ - const unsigned tc = (unsigned) c; - if (tc < 0x80) { - obstack_1grow(obst, tc); - } else if (tc < 0x800) { - obstack_1grow(obst, 0xC0 | (tc >> 6)); - obstack_1grow(obst, 0x80 | (tc & 0x3F)); - } else if (tc < 0x10000) { - obstack_1grow(obst, 0xE0 | ( tc >> 12)); - obstack_1grow(obst, 0x80 | ((tc >> 6) & 0x3F)); - obstack_1grow(obst, 0x80 | ( tc & 0x3F)); - } else { - obstack_1grow(obst, 0xF0 | ( tc >> 18)); - obstack_1grow(obst, 0x80 | ((tc >> 12) & 0x3F)); - obstack_1grow(obst, 0x80 | ((tc >> 6) & 0x3F)); - obstack_1grow(obst, 0x80 | ( tc & 0x3F)); - } -} - void print_to_obstack(struct obstack *new_obst) { obst = new_obst; @@ -112,7 +82,7 @@ void print_to_obstack(struct obstack *new_obst) static char *buffer_pos; static char *buffer_end; -static inline void buffer_add_char(int c) +static void print_char_buffer(const char c) { if (buffer_pos == buffer_end) return; @@ -122,7 +92,7 @@ static inline void buffer_add_char(int c) static void print_string_buffer(const char *str) { for (const char *c = str; *c != '\0'; ++c) { - buffer_add_char(*c); + print_char_buffer(*c); } } @@ -133,26 +103,6 @@ static void print_vformat_buffer(const char *format, va_list ap) buffer_pos += written < size ? written : size; } -static void print_char_buffer(wchar_rep_t c) -{ - const unsigned tc = (unsigned) c; - if (tc < 0x80) { - buffer_add_char(tc); - } else if (tc < 0x800) { - buffer_add_char(0xC0 | (tc >> 6)); - buffer_add_char(0x80 | (tc & 0x3F)); - } else if (tc < 0x10000) { - buffer_add_char(0xE0 | ( tc >> 12)); - buffer_add_char(0x80 | ((tc >> 6) & 0x3F)); - buffer_add_char(0x80 | ( tc & 0x3F)); - } else { - buffer_add_char(0xF0 | ( tc >> 18)); - buffer_add_char(0x80 | ((tc >> 12) & 0x3F)); - buffer_add_char(0x80 | ((tc >> 6) & 0x3F)); - buffer_add_char(0x80 | ( tc & 0x3F)); - } -} - void print_to_buffer(char *buffer, size_t buffer_size) { buffer_pos = buffer; @@ -173,7 +123,7 @@ void finish_print_to_buffer(void) void (*print_string)(const char *str) = print_string_file; void (*print_vformat)(const char *format, va_list ap) = print_vformat_file; -void (*print_char)(wchar_rep_t c) = print_char_file; +void (*print_char)(const char c) = print_char_file; void printer_push(void) { diff --git a/printer.h b/printer.h index e30a4b9..f57d3cf 100644 --- a/printer.h +++ b/printer.h @@ -35,8 +35,7 @@ /** print a string into current output */ extern void (*print_string)(const char *str); extern void (*print_vformat)(const char *format, va_list ap); -/** print a single unicode character to current output (encoded as UTF-8) */ -extern void (*print_char)(wchar_rep_t c); +extern void (*print_char)(const char c); /** print a printf style format string to current output */ static inline void __attribute__((format(printf,1,2))) print_format(const char *format, ...) diff --git a/string_rep.h b/string_rep.h index e24b40c..873563b 100644 --- a/string_rep.h +++ b/string_rep.h @@ -20,18 +20,66 @@ #ifndef STRING_REP_H #define STRING_REP_H -#include - -typedef wchar_t wchar_rep_t; +#include +#include typedef struct string_t { - const char *begin; - size_t size; + const char *begin; /**< UTF-8 encoded string, the last character is + * guaranteed to be 0 */ + size_t size; /**< size of string in bytes (not characters) */ } string_t; -typedef struct wide_string_t { - const wchar_rep_t *begin; - size_t size; -} wide_string_t; +typedef unsigned int utf32; +#define UTF32_PRINTF_FORMAT "%u" + +/** + * "parse" an utf8 character from a string. + * Warning: This function only works for valid utf-8 inputs. The behaviour + * is undefined for invalid utf-8 input. + * + * @param p A pointer to a pointer into the string. The pointer + * is incremented for each consumed char + */ +static inline utf32 read_utf8_char(const char **p) +{ + const unsigned char *c = (const unsigned char *) *p; + utf32 result; + + if ((*c & 0x80) == 0) { + /* 1 character encoding: 0b0??????? */ + result = *c++; + } else if ((*c & 0xE0) == 0xC0) { + /* 2 character encoding: 0b110?????, 0b10?????? */ + result = *c++ & 0x1F; + result = (result << 6) | (*c++ & 0x3F); + } else if ((*c & 0xF0) == 0xE0) { + /* 3 character encoding: 0b1110????, 0b10??????, 0b10?????? */ + result = *c++ & 0x0F; + result = (result << 6) | (*c++ & 0x3F); + result = (result << 6) | (*c++ & 0x3F); + } else { + /* 4 character enc.: 0b11110???, 0b10??????, 0b10??????, 0b10?????? */ + assert((*c & 0xF8) == 0xF0); + result = *c++ & 0x07; + result = (result << 6) | (*c++ & 0x3F); + result = (result << 6) | (*c++ & 0x3F); + result = (result << 6) | (*c++ & 0x3F); + } + + *p = (const char*) c; + return result; +} + +static inline size_t wstrlen(const string_t *string) +{ + size_t result = 0; + const char *p = string->begin; + const char *end = p + string->size; + while (p < end) { + read_utf8_char(&p); + ++result; + } + return result; +} #endif diff --git a/token.c b/token.c index 3145d79..1fbaafd 100644 --- a/token.c +++ b/token.c @@ -124,20 +124,42 @@ symbol_t *get_token_symbol(const token_t *token) return token_symbols[token->type]; } +static void print_stringrep(const string_t *string, FILE *f) +{ + for (size_t i = 0; i < string->size; ++i) { + fputc(string->begin[i], f); + } +} + void print_token(FILE *f, const token_t *token) { switch(token->type) { case T_IDENTIFIER: - fprintf(f, "identifier '%s'", token->v.symbol->string); + fprintf(f, "identifier '%s'", token->symbol->string); break; case T_INTEGER: - fprintf(f, "integer number '%lld'", token->v.intvalue); - break; + case T_INTEGER_OCTAL: + case T_INTEGER_HEXADECIMAL: case T_FLOATINGPOINT: - fprintf(f, "floating-point number '%LF'", token->v.floatvalue); + case T_FLOATINGPOINT_HEXADECIMAL: + print_token_type(f, (token_type_t)token->type); + fputs(" '", f); + print_stringrep(&token->literal, f); + if (token->symbol != NULL) + fputs(token->symbol->string, f); + fputc('\'', f); break; + case T_WIDE_STRING_LITERAL: case T_STRING_LITERAL: - fprintf(f, "string \"%s\"", token->v.string.begin); + print_token_type(f, (token_type_t)token->type); + fprintf(f, " \"%s\"", token->literal.begin); + break; + case T_CHARACTER_CONSTANT: + case T_WIDE_CHARACTER_CONSTANT: + print_token_type(f, (token_type_t)token->type); + fputs(" \'", f); + print_stringrep(&token->literal, f); + fputs("'", f); break; default: fputc('\'', f); @@ -180,13 +202,13 @@ void print_pp_token(FILE *f, const token_t *token) { switch((preprocessor_token_type_t) token->type) { case TP_IDENTIFIER: - fprintf(f, "identifier '%s'", token->v.symbol->string); + fprintf(f, "identifier '%s'", token->symbol->string); break; case TP_NUMBER: - fprintf(f, "number '%s'", token->v.string.begin); + fprintf(f, "number '%s'", token->literal.begin); break; case TP_STRING_LITERAL: - fprintf(f, "string \"%s\"", token->v.string.begin); + fprintf(f, "string \"%s\"", token->literal.begin); break; default: print_pp_token_type(f, (preprocessor_token_type_t) token->type); diff --git a/token_t.h b/token_t.h index 10d2248..7bdc8d8 100644 --- a/token_t.h +++ b/token_t.h @@ -60,15 +60,9 @@ struct source_position_t { extern const source_position_t builtin_source_position; typedef struct { - int type; - union { - symbol_t *symbol; - long long intvalue; - long double floatvalue; - string_t string; - wide_string_t wide_string; - } v; - type_t *datatype; + int type; + symbol_t *symbol; /**< contains identifier. Contains number suffix for numbers */ + string_t literal; /**< string value/literal value */ source_position_t source_position; } token_t; diff --git a/tokens.inc b/tokens.inc index d801de9..39c5a6a 100644 --- a/tokens.inc +++ b/tokens.inc @@ -2,13 +2,16 @@ #define TS(x,str,val) #endif -TS(IDENTIFIER, "identifier", = 256) -TS(INTEGER, "integer number",) -TS(CHARACTER_CONSTANT, "character constant",) -TS(WIDE_CHARACTER_CONSTANT, "wide character constant",) -TS(FLOATINGPOINT, "floatingpoint number",) -TS(STRING_LITERAL, "string literal",) -TS(WIDE_STRING_LITERAL, "wide string literal",) +TS(IDENTIFIER, "identifier", = 256) +TS(INTEGER, "integer number",) +TS(INTEGER_OCTAL, "octal integer number",) +TS(INTEGER_HEXADECIMAL, "hexadecimal integer number",) +TS(FLOATINGPOINT, "floatingpoint number",) +TS(FLOATINGPOINT_HEXADECIMAL, "hexadecimal floatingpoint number",) +TS(CHARACTER_CONSTANT, "character constant",) +TS(WIDE_CHARACTER_CONSTANT, "wide character constant",) +TS(STRING_LITERAL, "string literal",) +TS(WIDE_STRING_LITERAL, "wide string literal",) #define ALTERNATE(name, val) T(_CXX, name, #name, val) #define PUNCTUATOR(name, string, val) T(_ALL, name, string, val) diff --git a/walk_statements.c b/walk_statements.c index 4c8a6c6..883a889 100644 --- a/walk_statements.c +++ b/walk_statements.c @@ -107,13 +107,11 @@ static void walk_expression(expression_t const *const expr, walk_expression(expr->va_copye.dst, callback, env); return; + EXPR_LITERAL_CASES case EXPR_INVALID: case EXPR_OFFSETOF: case EXPR_REFERENCE: case EXPR_REFERENCE_ENUM_VALUE: - case EXPR_CONST: - case EXPR_CHARACTER_CONSTANT: - case EXPR_WIDE_CHARACTER_CONSTANT: case EXPR_STRING_LITERAL: case EXPR_WIDE_STRING_LITERAL: case EXPR_FUNCNAME: diff --git a/wrappergen/write_fluffy.c b/wrappergen/write_fluffy.c index 236e625..05763d4 100644 --- a/wrappergen/write_fluffy.c +++ b/wrappergen/write_fluffy.c @@ -223,16 +223,9 @@ static void write_unary_expression(const unary_expression_t *expression) static void write_expression(const expression_t *expression) { - const const_expression_t *constant; - /* TODO */ switch(expression->kind) { - case EXPR_CONST: - constant = &expression->conste; - if(is_type_integer(expression->base.type)) { - fprintf(out, "%lld", constant->v.int_value); - } else { - fprintf(out, "%Lf", constant->v.float_value); - } + case EXPR_LITERAL_INTEGER: + fprintf(out, "%s", expression->literal.value.begin); break; EXPR_UNARY_CASES write_unary_expression((const unary_expression_t*) expression); diff --git a/wrappergen/write_jna.c b/wrappergen/write_jna.c index 5ab41fe..26bfd4e 100644 --- a/wrappergen/write_jna.c +++ b/wrappergen/write_jna.c @@ -298,16 +298,10 @@ static void write_binary_expression(const binary_expression_t *expression) static void write_expression(const expression_t *expression) { - const const_expression_t *constant; /* TODO */ switch(expression->kind) { - case EXPR_CONST: - constant = &expression->conste; - if(is_type_integer(expression->base.type)) { - fprintf(out, "%lld", constant->v.int_value); - } else { - fprintf(out, "%Lf", constant->v.float_value); - } + case EXPR_LITERAL_INTEGER: + fprintf(out, "%s", expression->literal.value.begin); break; case EXPR_REFERENCE_ENUM_VALUE: { /* UHOH... hacking */