X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;ds=sidebyside;f=parser.c;h=a558d94478234d29b1b573a3290d1f4223b1a6d5;hb=47b73b80e0b34b1e716253384d0a7c56f2a5e268;hp=fc156f475649ca929b06dee155e75357b34bf30a;hpb=9795a8c46d189b43d47dca502a9394a8d89c786e;p=cparser diff --git a/parser.c b/parser.c index fc156f4..a558d94 100644 --- a/parser.c +++ b/parser.c @@ -252,11 +252,8 @@ static void semantic_comparison(binary_expression_t *expression); case T_MINUSMINUS: \ case T_PLUSPLUS: \ case T_STRING_LITERAL: \ - case T_WIDE_CHARACTER_CONSTANT: \ - case T_WIDE_STRING_LITERAL: \ case T___FUNCDNAME__: \ case T___FUNCSIG__: \ - case T___FUNCTION__: \ case T___PRETTY_FUNCTION__: \ case T___alignof__: \ case T___builtin_classify_type: \ @@ -328,10 +325,8 @@ static size_t get_expression_struct_size(expression_kind_t kind) [EXPR_LITERAL_BOOLEAN] = sizeof(literal_expression_t), [EXPR_LITERAL_INTEGER] = sizeof(literal_expression_t), [EXPR_LITERAL_FLOATINGPOINT] = sizeof(literal_expression_t), - [EXPR_LITERAL_CHARACTER] = sizeof(literal_expression_t), - [EXPR_LITERAL_WIDE_CHARACTER] = sizeof(literal_expression_t), + [EXPR_LITERAL_CHARACTER] = sizeof(string_literal_expression_t), [EXPR_STRING_LITERAL] = sizeof(string_literal_expression_t), - [EXPR_WIDE_STRING_LITERAL] = sizeof(string_literal_expression_t), [EXPR_COMPOUND_LITERAL] = sizeof(compound_literal_expression_t), [EXPR_CALL] = sizeof(call_expression_t), [EXPR_UNARY_FIRST] = sizeof(unary_expression_t), @@ -433,7 +428,6 @@ static size_t get_initializer_size(initializer_kind_t kind) static const size_t sizes[] = { [INITIALIZER_VALUE] = sizeof(initializer_value_t), [INITIALIZER_STRING] = sizeof(initializer_string_t), - [INITIALIZER_WIDE_STRING] = sizeof(initializer_wide_string_t), [INITIALIZER_LIST] = sizeof(initializer_list_t), [INITIALIZER_DESIGNATOR] = sizeof(initializer_designator_t) }; @@ -631,33 +625,33 @@ static void type_error_incompatible(const char *msg, msg, type1, type2); } -/** - * Expect the current token is the expected token. - * If not, generate an error and skip until the next anchor. - */ -static void expect(token_kind_t const expected) +static bool skip_till(token_kind_t const expected, char const *const context) { if (UNLIKELY(token.kind != expected)) { - parse_error_expected(NULL, expected, NULL); + parse_error_expected(context, expected, NULL); add_anchor_token(expected); eat_until_anchor(); rem_anchor_token(expected); if (token.kind != expected) - return; + return false; } - eat(expected); + return true; +} + +/** + * Expect the current token is the expected token. + * If not, generate an error and skip until the next anchor. + */ +static void expect(token_kind_t const expected) +{ + if (skip_till(expected, NULL)) + eat(expected); } static symbol_t *expect_identifier(char const *const context, source_position_t *const pos) { - if (token.kind != T_IDENTIFIER) { - parse_error_expected(context, T_IDENTIFIER, NULL); - add_anchor_token(T_IDENTIFIER); - eat_until_anchor(); - rem_anchor_token(T_IDENTIFIER); - if (token.kind != T_IDENTIFIER) - return NULL; - } + if (!skip_till(T_IDENTIFIER, context)) + return NULL; symbol_t *const sym = token.base.symbol; if (pos) *pos = *HERE; @@ -1037,27 +1031,64 @@ static expression_t *parse_assignment_expression(void) return parse_subexpression(PREC_ASSIGNMENT); } -static void warn_string_concat(const source_position_t *pos) +static void append_string(string_t const *const s) { - warningf(WARN_TRADITIONAL, pos, "traditional C rejects string constant concatenation"); + /* FIXME Using the ast_obstack is a hack. Using the symbol_obstack is not + * possible, because other tokens are grown there alongside. */ + obstack_grow(&ast_obstack, s->begin, s->size); } -static string_t parse_string_literals(void) +static string_t finish_string(void) { - assert(token.kind == T_STRING_LITERAL); - string_t result = token.string.string; + obstack_1grow(&ast_obstack, '\0'); + size_t const size = obstack_object_size(&ast_obstack) - 1; + char const *const string = obstack_finish(&ast_obstack); + return (string_t){ string, size }; +} - eat(T_STRING_LITERAL); +static string_t concat_string_literals(string_encoding_t *const out_enc) +{ + assert(token.kind == T_STRING_LITERAL); - while (token.kind == T_STRING_LITERAL) { - warn_string_concat(HERE); - result = concat_strings(&result, &token.string.string); + string_t result; + string_encoding_t enc = token.string.encoding; + if (look_ahead(1)->kind == T_STRING_LITERAL) { + append_string(&token.string.string); + eat(T_STRING_LITERAL); + warningf(WARN_TRADITIONAL, HERE, "traditional C rejects string constant concatenation"); + do { + if (token.string.encoding != STRING_ENCODING_CHAR) { + enc = token.string.encoding; + } + append_string(&token.string.string); + eat(T_STRING_LITERAL); + } while (token.kind == T_STRING_LITERAL); + result = finish_string(); + } else { + result = token.string.string; eat(T_STRING_LITERAL); } + *out_enc = enc; return result; } +static string_t parse_string_literals(char const *const context) +{ + if (!skip_till(T_STRING_LITERAL, context)) + return (string_t){ "", 0 }; + + string_encoding_t enc; + source_position_t const pos = *HERE; + string_t const res = concat_string_literals(&enc); + + if (enc != STRING_ENCODING_CHAR) { + errorf(&pos, "expected plain string literal, got wide string literal"); + } + + return res; +} + static attribute_t *allocate_attribute_zero(attribute_kind_t kind) { attribute_t *attribute = allocate_ast_zero(sizeof(*attribute)); @@ -1456,9 +1487,9 @@ unary: return; case EXPR_LITERAL_CASES: + case EXPR_LITERAL_CHARACTER: case EXPR_ERROR: case EXPR_STRING_LITERAL: - case EXPR_WIDE_STRING_LITERAL: case EXPR_COMPOUND_LITERAL: // TODO init? case EXPR_SIZEOF: case EXPR_CLASSIFY_TYPE: @@ -1512,27 +1543,14 @@ static designator_t *parse_designation(void) } } -static initializer_t *initializer_from_string(array_type_t *const type, - const string_t *const string) +static initializer_t *initializer_from_string(array_type_t *const type, string_encoding_t const enc, string_t const *const string) { /* TODO: check len vs. size of array type */ (void) type; initializer_t *initializer = allocate_initializer_zero(INITIALIZER_STRING); - initializer->string.string = *string; - - return initializer; -} - -static initializer_t *initializer_from_wide_string(array_type_t *const type, - const string_t *const string) -{ - /* TODO: check len vs. size of array type */ - (void) type; - - initializer_t *const initializer = - allocate_initializer_zero(INITIALIZER_WIDE_STRING); - initializer->wide_string.string = *string; + initializer->string.encoding = enc; + initializer->string.string = *string; return initializer; } @@ -1545,39 +1563,30 @@ static initializer_t *initializer_from_expression(type_t *orig_type, { /* TODO check that expression is a constant expression */ - /* §6.7.8.14/15 char array may be initialized by string literals */ - type_t *type = skip_typeref(orig_type); - type_t *expr_type_orig = expression->base.type; - type_t *expr_type = skip_typeref(expr_type_orig); + type_t *const type = skip_typeref(orig_type); - if (is_type_array(type) && expr_type->kind == TYPE_POINTER) { + /* §6.7.8.14/15 char array may be initialized by string literals */ + if (expression->kind == EXPR_STRING_LITERAL && is_type_array(type)) { array_type_t *const array_type = &type->array; type_t *const element_type = skip_typeref(array_type->element_type); - - if (element_type->kind == TYPE_ATOMIC) { - atomic_type_kind_t akind = element_type->atomic.akind; - switch (expression->kind) { - case EXPR_STRING_LITERAL: - if (akind == ATOMIC_TYPE_CHAR - || akind == ATOMIC_TYPE_SCHAR - || akind == ATOMIC_TYPE_UCHAR) { - return initializer_from_string(array_type, - &expression->string_literal.value); - } - break; - - case EXPR_WIDE_STRING_LITERAL: { - type_t *bare_wchar_type = skip_typeref(type_wchar_t); - if (get_unqualified_type(element_type) == bare_wchar_type) { - return initializer_from_wide_string(array_type, - &expression->string_literal.value); - } - break; + switch (expression->string_literal.encoding) { + case STRING_ENCODING_CHAR: { + if (is_type_atomic(element_type, ATOMIC_TYPE_CHAR) || + is_type_atomic(element_type, ATOMIC_TYPE_SCHAR) || + is_type_atomic(element_type, ATOMIC_TYPE_UCHAR)) { + goto make_string_init; } + break; + } - default: - break; + case STRING_ENCODING_WIDE: { + type_t *bare_wchar_type = skip_typeref(type_wchar_t); + if (get_unqualified_type(element_type) == bare_wchar_type) { +make_string_init: + return initializer_from_string(array_type, expression->string_literal.encoding, &expression->string_literal.value); } + break; + } } } @@ -2057,9 +2066,7 @@ finish_designator: } /* handle { "string" } special case */ - if ((expression->kind == EXPR_STRING_LITERAL - || expression->kind == EXPR_WIDE_STRING_LITERAL) - && outer_type != NULL) { + if (expression->kind == EXPR_STRING_LITERAL && outer_type != NULL) { sub = initializer_from_expression(outer_type, expression); if (sub != NULL) { next_if(','); @@ -2209,11 +2216,7 @@ static initializer_t *parse_initializer(parse_initializer_env_t *env) break; case INITIALIZER_STRING: - size = result->string.string.size; - break; - - case INITIALIZER_WIDE_STRING: - size = result->wide_string.string.size; + size = get_string_len(result->string.encoding, &result->string.string) + 1; break; case INITIALIZER_DESIGNATOR: @@ -4622,8 +4625,8 @@ static bool expression_returns(expression_t const *const expr) case EXPR_REFERENCE: case EXPR_ENUM_CONSTANT: case EXPR_LITERAL_CASES: + case EXPR_LITERAL_CHARACTER: case EXPR_STRING_LITERAL: - case EXPR_WIDE_STRING_LITERAL: case EXPR_COMPOUND_LITERAL: // TODO descend into initialisers case EXPR_LABEL_ADDRESS: case EXPR_CLASSIFY_TYPE: @@ -4708,7 +4711,6 @@ static bool initializer_returns(initializer_t const *const init) } case INITIALIZER_STRING: - case INITIALIZER_WIDE_STRING: case INITIALIZER_DESIGNATOR: // designators have no payload return true; } @@ -5681,14 +5683,14 @@ struct expression_parser_function_t { static expression_parser_function_t expression_parsers[T_LAST_TOKEN]; -static type_t *get_string_type(void) -{ - return is_warn_on(WARN_WRITE_STRINGS) ? type_const_char_ptr : type_char_ptr; -} - -static type_t *get_wide_string_type(void) +static type_t *get_string_type(string_encoding_t const enc) { - return is_warn_on(WARN_WRITE_STRINGS) ? type_const_wchar_t_ptr : type_wchar_t_ptr; + bool const warn = is_warn_on(WARN_WRITE_STRINGS); + switch (enc) { + case STRING_ENCODING_CHAR: return warn ? type_const_char_ptr : type_char_ptr; + case STRING_ENCODING_WIDE: return warn ? type_const_wchar_t_ptr : type_wchar_t_ptr; + } + panic("invalid string encoding"); } /** @@ -5696,31 +5698,10 @@ static type_t *get_wide_string_type(void) */ static expression_t *parse_string_literal(void) { - source_position_t begin = *HERE; - string_t res = token.string.string; - bool is_wide = (token.kind == T_WIDE_STRING_LITERAL); - - next_token(); - while (token.kind == T_STRING_LITERAL - || token.kind == T_WIDE_STRING_LITERAL) { - warn_string_concat(HERE); - res = concat_strings(&res, &token.string.string); - next_token(); - is_wide |= token.kind == T_WIDE_STRING_LITERAL; - } - - expression_t *literal; - if (is_wide) { - literal = allocate_expression_zero(EXPR_WIDE_STRING_LITERAL); - literal->base.type = get_wide_string_type(); - } else { - literal = allocate_expression_zero(EXPR_STRING_LITERAL); - literal->base.type = get_string_type(); - } - literal->base.source_position = begin; - literal->literal.value = res; - - return literal; + expression_t *const expr = allocate_expression_zero(EXPR_STRING_LITERAL); + expr->string_literal.value = concat_string_literals(&expr->string_literal.encoding); + expr->base.type = get_string_type(expr->string_literal.encoding); + return expr; } /** @@ -5848,42 +5829,36 @@ static expression_t *parse_number_literal(void) */ static expression_t *parse_character_constant(void) { - expression_t *literal = allocate_expression_zero(EXPR_LITERAL_CHARACTER); - literal->base.type = c_mode & _CXX ? type_char : type_int; - literal->literal.value = token.string.string; + expression_t *const literal = allocate_expression_zero(EXPR_LITERAL_CHARACTER); + literal->string_literal.encoding = token.string.encoding; + literal->string_literal.value = token.string.string; + + size_t const size = get_string_len(token.string.encoding, &token.string.string); + switch (token.string.encoding) { + case STRING_ENCODING_CHAR: + literal->base.type = c_mode & _CXX ? type_char : type_int; + if (size > 1) { + if (!GNU_MODE && !(c_mode & _C99)) { + errorf(HERE, "more than 1 character in character constant"); + } else { + literal->base.type = type_int; + warningf(WARN_MULTICHAR, HERE, "multi-character character constant"); + } + } + break; - size_t len = literal->literal.value.size; - if (len > 1) { - if (!GNU_MODE && !(c_mode & _C99)) { - errorf(HERE, "more than 1 character in character constant"); - } else { - literal->base.type = type_int; + case STRING_ENCODING_WIDE: + literal->base.type = type_int; + if (size > 1) { warningf(WARN_MULTICHAR, HERE, "multi-character character constant"); } + break; } eat(T_CHARACTER_CONSTANT); return literal; } -/** - * Parse a wide character constant. - */ -static expression_t *parse_wide_character_constant(void) -{ - expression_t *literal = allocate_expression_zero(EXPR_LITERAL_WIDE_CHARACTER); - literal->base.type = type_int; - literal->literal.value = token.string.string; - - size_t len = wstrlen(&literal->literal.value); - if (len > 1) { - warningf(WARN_MULTICHAR, HERE, "multi-character character constant"); - } - - eat(T_WIDE_CHARACTER_CONSTANT); - return literal; -} - static entity_t *create_implicit_function(symbol_t *symbol, source_position_t const *const pos) { type_t *ntype = allocate_type_zero(TYPE_FUNCTION); @@ -5969,13 +5944,9 @@ type_t *revert_automatic_type_conversion(const expression_t *expression) } case EXPR_STRING_LITERAL: { - size_t size = expression->string_literal.value.size; - return make_array_type(type_char, size, TYPE_QUALIFIER_NONE); - } - - case EXPR_WIDE_STRING_LITERAL: { - size_t size = wstrlen(&expression->string_literal.value); - return make_array_type(type_wchar_t, size, TYPE_QUALIFIER_NONE); + size_t const size = get_string_len(expression->string_literal.encoding, &expression->string_literal.value) + 1; + type_t *const elem = get_unqualified_type(expression->base.type->pointer.points_to); + return make_array_type(elem, size, TYPE_QUALIFIER_NONE); } case EXPR_COMPOUND_LITERAL: @@ -6670,10 +6641,7 @@ static expression_t *parse_primary_expression(void) case T_INTEGER: case T_FLOATINGPOINT: return parse_number_literal(); case T_CHARACTER_CONSTANT: return parse_character_constant(); - case T_WIDE_CHARACTER_CONSTANT: return parse_wide_character_constant(); - case T_STRING_LITERAL: - case T_WIDE_STRING_LITERAL: return parse_string_literal(); - case T___FUNCTION__: + case T_STRING_LITERAL: return parse_string_literal(); case T___func__: return parse_function_keyword(FUNCNAME_FUNCTION); case T___PRETTY_FUNCTION__: return parse_function_keyword(FUNCNAME_PRETTY_FUNCTION); case T___FUNCSIG__: return parse_function_keyword(FUNCNAME_FUNCSIG); @@ -7970,8 +7938,7 @@ static void warn_string_literal_address(expression_t const* expr) expr = expr->unary.value; } - if (expr->kind == EXPR_STRING_LITERAL - || expr->kind == EXPR_WIDE_STRING_LITERAL) { + if (expr->kind == EXPR_STRING_LITERAL) { source_position_t const *const pos = &expr->base.source_position; warningf(WARN_ADDRESS, pos, "comparison with string literal results in unspecified behaviour"); } @@ -8309,11 +8276,9 @@ static bool expression_has_effect(const expression_t *const expr) case EXPR_LITERAL_MS_NOOP: return true; case EXPR_LITERAL_BOOLEAN: case EXPR_LITERAL_CHARACTER: - case EXPR_LITERAL_WIDE_CHARACTER: case EXPR_LITERAL_INTEGER: case EXPR_LITERAL_FLOATINGPOINT: case EXPR_STRING_LITERAL: return false; - case EXPR_WIDE_STRING_LITERAL: return false; case EXPR_CALL: { const call_expression_t *const call = &expr->call; @@ -8531,10 +8496,7 @@ static void register_expression_parser(parse_expression_function parser, { expression_parser_function_t *entry = &expression_parsers[token_kind]; - if (entry->parser != NULL) { - diagnosticf("for token '%k'\n", (token_kind_t)token_kind); - panic("trying to register multiple expression parsers for a token"); - } + assert(!entry->parser); entry->parser = parser; } @@ -8550,11 +8512,7 @@ static void register_infix_parser(parse_expression_infix_function parser, { expression_parser_function_t *entry = &expression_parsers[token_kind]; - if (entry->infix_parser != NULL) { - diagnosticf("for token '%k'\n", (token_kind_t)token_kind); - panic("trying to register multiple infix expression parsers for a " - "token"); - } + assert(!entry->infix_parser); entry->infix_parser = parser; entry->infix_precedence = precedence; } @@ -8640,7 +8598,7 @@ static asm_argument_t *parse_asm_arguments(bool is_out) return NULL; } - argument->constraints = parse_string_literals(); + argument->constraints = parse_string_literals("asm argument"); add_anchor_token(')'); expect('('); expression_t *expression = parse_expression(); @@ -8727,7 +8685,7 @@ static asm_clobber_t *parse_asm_clobbers(void) while (token.kind == T_STRING_LITERAL) { asm_clobber_t *clobber = allocate_ast_zero(sizeof(clobber[0])); - clobber->clobber = parse_string_literals(); + clobber->clobber = parse_string_literals(NULL); *anchor = clobber; anchor = &clobber->next; @@ -8748,40 +8706,27 @@ static statement_t *parse_asm_statement(void) asm_statement_t *asm_statement = &statement->asms; eat(T_asm); + add_anchor_token(')'); + add_anchor_token(':'); + add_anchor_token(T_STRING_LITERAL); if (next_if(T_volatile)) asm_statement->is_volatile = true; expect('('); - add_anchor_token(')'); - if (token.kind != T_STRING_LITERAL) { - parse_error_expected("after asm(", T_STRING_LITERAL, NULL); - goto end_of_asm; - } - asm_statement->asm_text = parse_string_literals(); + rem_anchor_token(T_STRING_LITERAL); + asm_statement->asm_text = parse_string_literals("asm statement"); - add_anchor_token(':'); - if (!next_if(':')) { - rem_anchor_token(':'); - goto end_of_asm; - } + if (next_if(':')) + asm_statement->outputs = parse_asm_arguments(true); - asm_statement->outputs = parse_asm_arguments(true); - if (!next_if(':')) { - rem_anchor_token(':'); - goto end_of_asm; - } + if (next_if(':')) + asm_statement->inputs = parse_asm_arguments(false); - asm_statement->inputs = parse_asm_arguments(false); - if (!next_if(':')) { - rem_anchor_token(':'); - goto end_of_asm; - } rem_anchor_token(':'); + if (next_if(':')) + asm_statement->clobbers = parse_asm_clobbers(); - asm_statement->clobbers = parse_asm_clobbers(); - -end_of_asm: rem_anchor_token(')'); expect(')'); expect(';'); @@ -9428,22 +9373,6 @@ static bool expression_is_local_variable(const expression_t *expression) return is_local_variable(entity); } -/** - * Check if a given expression represents a local variable and - * return its declaration then, else return NULL. - */ -entity_t *expression_is_variable(const expression_t *expression) -{ - if (expression->base.kind != EXPR_REFERENCE) { - return NULL; - } - entity_t *entity = expression->reference.entity; - if (entity->kind != ENTITY_VARIABLE) - return NULL; - - return entity; -} - static void err_or_warn(source_position_t const *const pos, char const *const msg) { if (c_mode & _CXX || strict_mode) { @@ -9810,12 +9739,9 @@ static statement_t *parse_compound_statement(bool inside_expression_statement) add_anchor_token(T_MINUSMINUS); add_anchor_token(T_PLUSPLUS); add_anchor_token(T_STRING_LITERAL); - add_anchor_token(T_WIDE_CHARACTER_CONSTANT); - add_anchor_token(T_WIDE_STRING_LITERAL); add_anchor_token(T__Bool); add_anchor_token(T__Complex); add_anchor_token(T__Imaginary); - add_anchor_token(T___FUNCTION__); add_anchor_token(T___PRETTY_FUNCTION__); add_anchor_token(T___alignof__); add_anchor_token(T___attribute__); @@ -9983,12 +9909,9 @@ static statement_t *parse_compound_statement(bool inside_expression_statement) rem_anchor_token(T___attribute__); rem_anchor_token(T___alignof__); rem_anchor_token(T___PRETTY_FUNCTION__); - rem_anchor_token(T___FUNCTION__); rem_anchor_token(T__Imaginary); rem_anchor_token(T__Complex); rem_anchor_token(T__Bool); - rem_anchor_token(T_WIDE_STRING_LITERAL); - rem_anchor_token(T_WIDE_CHARACTER_CONSTANT); rem_anchor_token(T_STRING_LITERAL); rem_anchor_token(T_PLUSPLUS); rem_anchor_token(T_MINUSMINUS); @@ -10062,7 +9985,7 @@ static void parse_global_asm(void) expect('('); rem_anchor_token(T_STRING_LITERAL); - statement->asms.asm_text = parse_string_literals(); + statement->asms.asm_text = parse_string_literals("global asm"); statement->base.next = unit->global_asm; unit->global_asm = statement; @@ -10077,7 +10000,7 @@ static void parse_linkage_specification(void) eat(T_extern); source_position_t const pos = *HERE; - char const *const linkage = parse_string_literals().begin; + char const *const linkage = parse_string_literals(NULL).begin; linkage_kind_t old_linkage = current_linkage; linkage_kind_t new_linkage; @@ -10215,7 +10138,6 @@ void start_parsing(void) { environment_stack = NEW_ARR_F(stack_entry_t, 0); label_stack = NEW_ARR_F(stack_entry_t, 0); - diagnostic_count = 0; error_count = 0; warning_count = 0;