X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=parser.c;h=8639a7a377015963aa0edd265e1e9d6cc89fd704;hb=516b4ed5ac675f2d94becf9fa0815ddc65f637ac;hp=e2e560a4e73fccc611f3cce258f80ea37fd3ff6c;hpb=2fb66fd8bd2a5956ab2cad26978ccfb7e105d45f;p=cparser diff --git a/parser.c b/parser.c index e2e560a..8639a7a 100644 --- a/parser.c +++ b/parser.c @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -138,9 +139,6 @@ static elf_visibility_tag_t default_visibility = ELF_VISIBILITY_DEFAULT; #define POP_EXTENSION() \ ((void)(in_gcc_extension = old_gcc_extension)) -/** special symbol used for anonymous entities. */ -static symbol_t *sym_anonymous = NULL; - /** The token anchor set */ static unsigned short token_anchor_set[T_LAST_TOKEN]; @@ -247,8 +245,7 @@ static void semantic_comparison(binary_expression_t *expression); case '~': \ case T_ANDAND: \ case T_CHARACTER_CONSTANT: \ - case T_FLOATINGPOINT: \ - case T_INTEGER: \ + case T_NUMBER: \ case T_MINUSMINUS: \ case T_PLUSPLUS: \ case T_STRING_LITERAL: \ @@ -481,12 +478,17 @@ static inline void next_token(void) #endif } -#define eat(token_kind) (assert(token.kind == (token_kind)), next_token()) +static inline void eat(token_kind_t const kind) +{ + assert(token.kind == kind); + (void)kind; + next_token(); +} -static inline bool next_if(token_kind_t const type) +static inline bool next_if(token_kind_t const kind) { - if (token.kind == type) { - eat(type); + if (token.kind == kind) { + eat(kind); return true; } else { return false; @@ -1052,20 +1054,20 @@ static string_t concat_string_literals(void) string_t result; if (look_ahead(1)->kind == T_STRING_LITERAL) { - append_string(&token.string.string); + append_string(&token.literal.string); eat(T_STRING_LITERAL); warningf(WARN_TRADITIONAL, HERE, "traditional C rejects string constant concatenation"); - string_encoding_t enc = token.string.string.encoding; + string_encoding_t enc = token.literal.string.encoding; do { - if (token.string.string.encoding != STRING_ENCODING_CHAR) { - enc = token.string.string.encoding; + if (token.literal.string.encoding != STRING_ENCODING_CHAR) { + enc = token.literal.string.encoding; } - append_string(&token.string.string); + append_string(&token.literal.string); eat(T_STRING_LITERAL); } while (token.kind == T_STRING_LITERAL); result = finish_string(enc); } else { - result = token.string.string; + result = token.literal.string; eat(T_STRING_LITERAL); } @@ -2460,6 +2462,7 @@ static type_t *parse_typeof(void) } typedef enum specifiers_t { + SPECIFIER_NONE = 0, SPECIFIER_SIGNED = 1 << 0, SPECIFIER_UNSIGNED = 1 << 1, SPECIFIER_LONG = 1 << 2, @@ -5703,110 +5706,204 @@ static expression_t *parse_boolean_literal(bool value) return literal; } -static void warn_traditional_suffix(void) +static void warn_traditional_suffix(char const *const suffix) { - warningf(WARN_TRADITIONAL, HERE, "traditional C rejects the '%S' suffix", - &token.number.suffix); + warningf(WARN_TRADITIONAL, HERE, "traditional C rejects the '%s' suffix", suffix); } -static void check_integer_suffix(void) +static void check_integer_suffix(expression_t *const expr, char const *const suffix) { - const string_t *suffix = &token.number.suffix; - if (suffix->size == 0) - return; - - bool not_traditional = false; - const char *c = suffix->begin; - if (*c == 'l' || *c == 'L') { - ++c; - if (*c == *(c-1)) { - not_traditional = true; - ++c; - if (*c == 'u' || *c == 'U') { + unsigned spec = SPECIFIER_NONE; + char const *c = suffix; + for (;;) { + specifiers_t add; + if (*c == 'L' || *c == 'l') { + add = SPECIFIER_LONG; + if (*c == c[1]) { + add |= SPECIFIER_LONG_LONG; ++c; } - } else if (*c == 'u' || *c == 'U') { - not_traditional = true; - ++c; + } else if (*c == 'U' || *c == 'u') { + add = SPECIFIER_UNSIGNED; + } else { + break; } - } else if (*c == 'u' || *c == 'U') { - not_traditional = true; ++c; - if (*c == 'l' || *c == 'L') { - ++c; - if (*c == *(c-1)) { - ++c; - } - } - } - if (*c != '\0') { - errorf(HERE, "invalid suffix '%S' on integer constant", suffix); - } else if (not_traditional) { - warn_traditional_suffix(); + if (spec & add) + goto error; + spec |= add; + } + + if (*c == '\0') { + type_t *type; + switch (spec) { + case SPECIFIER_NONE: type = type_int; break; + case SPECIFIER_LONG: type = type_long; break; + case SPECIFIER_LONG | SPECIFIER_LONG_LONG: type = type_long_long; break; + case SPECIFIER_UNSIGNED: type = type_unsigned_int; break; + case SPECIFIER_UNSIGNED | SPECIFIER_LONG: type = type_unsigned_long; break; + case SPECIFIER_UNSIGNED | SPECIFIER_LONG | SPECIFIER_LONG_LONG: type = type_unsigned_long_long; break; + default: panic("inconsistent suffix"); + } + if (spec != SPECIFIER_NONE && spec != SPECIFIER_LONG) { + warn_traditional_suffix(suffix); + } + expr->base.type = type; + /* Integer type depends on the size of the number and the size + * representable by the types. The backend/codegeneration has to + * determine that. */ + determine_literal_type(&expr->literal); + } else { +error: + errorf(HERE, "invalid suffix '%s' on integer constant", suffix); } } -static type_t *check_floatingpoint_suffix(void) +static void check_floatingpoint_suffix(expression_t *const expr, char const *const suffix) { - const string_t *suffix = &token.number.suffix; - type_t *type = type_double; - if (suffix->size == 0) - return type; - - bool not_traditional = false; - const char *c = suffix->begin; - if (*c == 'f' || *c == 'F') { - ++c; - type = type_float; - } else if (*c == 'l' || *c == 'L') { - ++c; - type = type_long_double; - } - if (*c != '\0') { - errorf(HERE, "invalid suffix '%S' on floatingpoint constant", suffix); - } else if (not_traditional) { - warn_traditional_suffix(); + type_t *type; + char const *c = suffix; + switch (*c) { + case 'F': + case 'f': type = type_float; ++c; break; + case 'L': + case 'l': type = type_long_double; ++c; break; + default: type = type_double; break; } - return type; + if (*c == '\0') { + expr->base.type = type; + if (suffix[0] != '\0') { + warn_traditional_suffix(suffix); + } + } else { + errorf(HERE, "invalid suffix '%s' on floatingpoint constant", suffix); + } } -/** - * Parse an integer constant. - */ static expression_t *parse_number_literal(void) { - expression_kind_t kind; - type_t *type; + string_t const *const str = &token.literal.string; + char const * i = str->begin; + unsigned digits = 0; + bool is_float = false; - switch (token.kind) { - case T_INTEGER: - kind = EXPR_LITERAL_INTEGER; - check_integer_suffix(); - type = type_int; + /* Parse base prefix. */ + unsigned base; + if (*i == '0') { + switch (*++i) { + case 'B': case 'b': base = 2; ++i; break; + case 'X': case 'x': base = 16; ++i; break; + default: base = 8; digits |= 1U << 0; break; + } + } else { + base = 10; + } + + /* Parse mantissa. */ + for (;; ++i) { + unsigned digit; + switch (*i) { + case '.': + if (is_float) { + errorf(HERE, "multiple decimal points in %K", &token); + i = 0; + goto done; + } + is_float = true; + if (base == 8) + base = 10; + continue; + + case '0': digit = 0; break; + case '1': digit = 1; break; + case '2': digit = 2; break; + case '3': digit = 3; break; + case '4': digit = 4; break; + case '5': digit = 5; break; + case '6': digit = 6; break; + case '7': digit = 7; break; + case '8': digit = 8; break; + case '9': digit = 9; break; + case 'A': case 'a': digit = 10; break; + case 'B': case 'b': digit = 11; break; + case 'C': case 'c': digit = 12; break; + case 'D': case 'd': digit = 13; break; + case 'E': case 'e': digit = 14; break; + case 'F': case 'f': digit = 15; break; + + default: goto done_mantissa; + } + + if (digit >= 10 && base != 16) + goto done_mantissa; + + digits |= 1U << digit; + } +done_mantissa: + + /* Parse exponent. */ + switch (base) { + case 2: + if (is_float) + errorf(HERE, "binary floating %K not allowed", &token); break; - case T_FLOATINGPOINT: - kind = EXPR_LITERAL_FLOATINGPOINT; - type = check_floatingpoint_suffix(); + case 8: + case 10: + if (*i == 'E' || *i == 'e') { + base = 10; + goto parse_exponent; + } + break; + + case 16: + if (*i == 'P' || *i == 'p') { +parse_exponent: + ++i; + is_float = true; + + if (*i == '-' || *i == '+') + ++i; + + if (isdigit(*i)) { + do { + ++i; + } while (isdigit(*i)); + } else { + errorf(HERE, "exponent of %K has no digits", &token); + } + } else if (is_float) { + errorf(HERE, "hexadecimal floating %K requires an exponent", &token); + i = 0; + } break; default: - panic("unexpected token type in parse_number_literal"); + panic("invalid base"); } - expression_t *literal = allocate_expression_zero(kind); - literal->base.type = type; - literal->literal.value = token.number.number; - literal->literal.suffix = token.number.suffix; - next_token(); +done:; + expression_t *const expr = allocate_expression_zero(is_float ? EXPR_LITERAL_FLOATINGPOINT : EXPR_LITERAL_INTEGER); + expr->literal.value = *str; - /* integer type depends on the size of the number and the size - * representable by the types. The backend/codegeneration has to determine - * that - */ - determine_literal_type(&literal->literal); - return literal; + if (i) { + if (digits == 0) { + errorf(HERE, "%K has no digits", &token); + } else if (digits & ~((1U << base) - 1)) { + errorf(HERE, "invalid digit in %K", &token); + } else { + expr->literal.suffix = i; + if (is_float) { + check_floatingpoint_suffix(expr, i); + } else { + check_integer_suffix(expr, i); + } + } + } + + eat(T_NUMBER); + return expr; } /** @@ -5815,10 +5912,10 @@ static expression_t *parse_number_literal(void) static expression_t *parse_character_constant(void) { expression_t *const literal = allocate_expression_zero(EXPR_LITERAL_CHARACTER); - literal->string_literal.value = token.string.string; + literal->string_literal.value = token.literal.string; - size_t const size = get_string_len(&token.string.string); - switch (token.string.string.encoding) { + size_t const size = get_string_len(&token.literal.string); + switch (token.literal.string.encoding) { case STRING_ENCODING_CHAR: literal->base.type = c_mode & _CXX ? type_char : type_int; if (size > 1) { @@ -6622,8 +6719,7 @@ static expression_t *parse_primary_expression(void) switch (token.kind) { case T_false: return parse_boolean_literal(false); case T_true: return parse_boolean_literal(true); - case T_INTEGER: - case T_FLOATINGPOINT: return parse_number_literal(); + case T_NUMBER: return parse_number_literal(); case T_CHARACTER_CONSTANT: return parse_character_constant(); case T_STRING_LITERAL: return parse_string_literal(); case T___func__: return parse_function_keyword(FUNCNAME_FUNCTION); @@ -9715,10 +9811,9 @@ static statement_t *parse_compound_statement(bool inside_expression_statement) add_anchor_token('~'); add_anchor_token(T_CHARACTER_CONSTANT); add_anchor_token(T_COLONCOLON); - add_anchor_token(T_FLOATINGPOINT); add_anchor_token(T_IDENTIFIER); - add_anchor_token(T_INTEGER); add_anchor_token(T_MINUSMINUS); + add_anchor_token(T_NUMBER); add_anchor_token(T_PLUSPLUS); add_anchor_token(T_STRING_LITERAL); add_anchor_token(T__Bool); @@ -9896,10 +9991,9 @@ static statement_t *parse_compound_statement(bool inside_expression_statement) rem_anchor_token(T__Bool); rem_anchor_token(T_STRING_LITERAL); rem_anchor_token(T_PLUSPLUS); + rem_anchor_token(T_NUMBER); rem_anchor_token(T_MINUSMINUS); - rem_anchor_token(T_INTEGER); rem_anchor_token(T_IDENTIFIER); - rem_anchor_token(T_FLOATINGPOINT); rem_anchor_token(T_COLONCOLON); rem_anchor_token(T_CHARACTER_CONSTANT); rem_anchor_token('~'); @@ -10235,8 +10329,6 @@ void parse(void) */ void init_parser(void) { - sym_anonymous = symbol_table_insert(""); - memset(token_anchor_set, 0, sizeof(token_anchor_set)); init_expression_parsers();