Include string encoding in string_t.
[cparser] / parser.c
index e87bbb2..e2e560a 100644 (file)
--- a/parser.c
+++ b/parser.c
@@ -254,7 +254,6 @@ static void semantic_comparison(binary_expression_t *expression);
        case T_STRING_LITERAL:            \
        case T___FUNCDNAME__:             \
        case T___FUNCSIG__:               \
-       case T___FUNCTION__:              \
        case T___PRETTY_FUNCTION__:       \
        case T___alignof__:               \
        case T___builtin_classify_type:   \
@@ -326,8 +325,7 @@ static size_t get_expression_struct_size(expression_kind_t kind)
                [EXPR_LITERAL_BOOLEAN]            = sizeof(literal_expression_t),
                [EXPR_LITERAL_INTEGER]            = sizeof(literal_expression_t),
                [EXPR_LITERAL_FLOATINGPOINT]      = sizeof(literal_expression_t),
-               [EXPR_LITERAL_CHARACTER]          = sizeof(literal_expression_t),
-               [EXPR_LITERAL_WIDE_CHARACTER]     = sizeof(literal_expression_t),
+               [EXPR_LITERAL_CHARACTER]          = sizeof(string_literal_expression_t),
                [EXPR_STRING_LITERAL]             = sizeof(string_literal_expression_t),
                [EXPR_COMPOUND_LITERAL]           = sizeof(compound_literal_expression_t),
                [EXPR_CALL]                       = sizeof(call_expression_t),
@@ -429,8 +427,7 @@ static size_t get_initializer_size(initializer_kind_t kind)
 {
        static const size_t sizes[] = {
                [INITIALIZER_VALUE]       = sizeof(initializer_value_t),
-               [INITIALIZER_STRING]      = sizeof(initializer_string_t),
-               [INITIALIZER_WIDE_STRING] = sizeof(initializer_wide_string_t),
+               [INITIALIZER_STRING]      = sizeof(initializer_value_t),
                [INITIALIZER_LIST]        = sizeof(initializer_list_t),
                [INITIALIZER_DESIGNATOR]  = sizeof(initializer_designator_t)
        };
@@ -1041,51 +1038,49 @@ static void append_string(string_t const *const s)
        obstack_grow(&ast_obstack, s->begin, s->size);
 }
 
-static string_t finish_string(void)
+static string_t finish_string(string_encoding_t const enc)
 {
        obstack_1grow(&ast_obstack, '\0');
        size_t      const size   = obstack_object_size(&ast_obstack) - 1;
        char const *const string = obstack_finish(&ast_obstack);
-       return (string_t){ string, size };
+       return (string_t){ string, size, enc };
 }
 
-static string_t concat_string_literals(string_encoding_t *const out_enc)
+static string_t concat_string_literals(void)
 {
        assert(token.kind == T_STRING_LITERAL);
 
-       string_t          result;
-       string_encoding_t enc = token.string.encoding;
+       string_t result;
        if (look_ahead(1)->kind == T_STRING_LITERAL) {
                append_string(&token.string.string);
                eat(T_STRING_LITERAL);
                warningf(WARN_TRADITIONAL, HERE, "traditional C rejects string constant concatenation");
+               string_encoding_t enc = token.string.string.encoding;
                do {
-                       if (token.string.encoding != STRING_ENCODING_CHAR) {
-                               enc = token.string.encoding;
+                       if (token.string.string.encoding != STRING_ENCODING_CHAR) {
+                               enc = token.string.string.encoding;
                        }
                        append_string(&token.string.string);
                        eat(T_STRING_LITERAL);
                } while (token.kind == T_STRING_LITERAL);
-               result = finish_string();
+               result = finish_string(enc);
        } else {
                result = token.string.string;
                eat(T_STRING_LITERAL);
        }
 
-       *out_enc = enc;
        return result;
 }
 
 static string_t parse_string_literals(char const *const context)
 {
        if (!skip_till(T_STRING_LITERAL, context))
-               return (string_t){ "", 0 };
+               return (string_t){ "", 0, STRING_ENCODING_CHAR };
 
-       string_encoding_t       enc;
        source_position_t const pos = *HERE;
-       string_t          const res = concat_string_literals(&enc);
+       string_t          const res = concat_string_literals();
 
-       if (enc != STRING_ENCODING_CHAR) {
+       if (res.encoding != STRING_ENCODING_CHAR) {
                errorf(&pos, "expected plain string literal, got wide string literal");
        }
 
@@ -1490,6 +1485,7 @@ unary:
                        return;
 
                case EXPR_LITERAL_CASES:
+               case EXPR_LITERAL_CHARACTER:
                case EXPR_ERROR:
                case EXPR_STRING_LITERAL:
                case EXPR_COMPOUND_LITERAL: // TODO init?
@@ -1545,31 +1541,6 @@ static designator_t *parse_designation(void)
        }
 }
 
-static initializer_t *initializer_from_string(array_type_t *const type,
-                                              const string_t *const string)
-{
-       /* TODO: check len vs. size of array type */
-       (void) type;
-
-       initializer_t *initializer = allocate_initializer_zero(INITIALIZER_STRING);
-       initializer->string.string = *string;
-
-       return initializer;
-}
-
-static initializer_t *initializer_from_wide_string(array_type_t *const type,
-                                                   const string_t *const string)
-{
-       /* TODO: check len vs. size of array type */
-       (void) type;
-
-       initializer_t *const initializer =
-               allocate_initializer_zero(INITIALIZER_WIDE_STRING);
-       initializer->wide_string.string = *string;
-
-       return initializer;
-}
-
 /**
  * Build an initializer from a given expression.
  */
@@ -1578,37 +1549,32 @@ static initializer_t *initializer_from_expression(type_t *orig_type,
 {
        /* TODO check that expression is a constant expression */
 
-       /* §6.7.8.14/15 char array may be initialized by string literals */
-       type_t *type           = skip_typeref(orig_type);
-       type_t *expr_type_orig = expression->base.type;
-       type_t *expr_type      = skip_typeref(expr_type_orig);
+       type_t *const type = skip_typeref(orig_type);
 
-       if (is_type_array(type) && expr_type->kind == TYPE_POINTER) {
+       /* §6.7.8.14/15 char array may be initialized by string literals */
+       if (expression->kind == EXPR_STRING_LITERAL && is_type_array(type)) {
                array_type_t *const array_type   = &type->array;
                type_t       *const element_type = skip_typeref(array_type->element_type);
-
-               if (element_type->kind == TYPE_ATOMIC && expression->kind == EXPR_STRING_LITERAL) {
-                       switch (expression->string_literal.encoding) {
-                       case STRING_ENCODING_CHAR: {
-                               atomic_type_kind_t const akind = element_type->atomic.akind;
-                               if (akind == ATOMIC_TYPE_CHAR
-                                               || akind == ATOMIC_TYPE_SCHAR
-                                               || akind == ATOMIC_TYPE_UCHAR) {
-                                       return initializer_from_string(array_type,
-                                                       &expression->string_literal.value);
-                               }
-                               break;
+               switch (expression->string_literal.value.encoding) {
+               case STRING_ENCODING_CHAR: {
+                       if (is_type_atomic(element_type, ATOMIC_TYPE_CHAR)  ||
+                           is_type_atomic(element_type, ATOMIC_TYPE_SCHAR) ||
+                           is_type_atomic(element_type, ATOMIC_TYPE_UCHAR)) {
+                               goto make_string_init;
                        }
+                       break;
+               }
 
-                       case STRING_ENCODING_WIDE: {
-                               type_t *bare_wchar_type = skip_typeref(type_wchar_t);
-                               if (get_unqualified_type(element_type) == bare_wchar_type) {
-                                       return initializer_from_wide_string(array_type,
-                                                       &expression->string_literal.value);
-                               }
-                               break;
-                       }
+               case STRING_ENCODING_WIDE: {
+                       type_t *bare_wchar_type = skip_typeref(type_wchar_t);
+                       if (get_unqualified_type(element_type) == bare_wchar_type) {
+make_string_init:;
+                               initializer_t *const init = allocate_initializer_zero(INITIALIZER_STRING);
+                               init->value.value = expression;
+                               return init;
                        }
+                       break;
+               }
                }
        }
 
@@ -2237,13 +2203,10 @@ static initializer_t *parse_initializer(parse_initializer_env_t *env)
                        size = max_index + 1;
                        break;
 
-               case INITIALIZER_STRING:
-                       size = result->string.string.size + 1;
-                       break;
-
-               case INITIALIZER_WIDE_STRING:
-                       size = result->wide_string.string.size;
+               case INITIALIZER_STRING: {
+                       size = get_string_len(&get_init_string(result)->value) + 1;
                        break;
+               }
 
                case INITIALIZER_DESIGNATOR:
                case INITIALIZER_VALUE:
@@ -3035,7 +2998,8 @@ warn_about_long_long:
                        } else {
                                errorf(pos, "multiple datatypes in declaration");
                        }
-                       goto end_error;
+                       specifiers->type = type_error_type;
+                       return;
                }
                }
 
@@ -3064,10 +3028,6 @@ warn_about_long_long:
        if (specifiers->attributes != NULL)
                type = handle_type_attributes(specifiers->attributes, type);
        specifiers->type = type;
-       return;
-
-end_error:
-       specifiers->type = type_error_type;
 }
 
 static type_qualifiers_t parse_type_qualifiers(void)
@@ -4651,6 +4611,7 @@ static bool expression_returns(expression_t const *const expr)
                case EXPR_REFERENCE:
                case EXPR_ENUM_CONSTANT:
                case EXPR_LITERAL_CASES:
+               case EXPR_LITERAL_CHARACTER:
                case EXPR_STRING_LITERAL:
                case EXPR_COMPOUND_LITERAL: // TODO descend into initialisers
                case EXPR_LABEL_ADDRESS:
@@ -4736,7 +4697,6 @@ static bool initializer_returns(initializer_t const *const init)
                }
 
                case INITIALIZER_STRING:
-               case INITIALIZER_WIDE_STRING:
                case INITIALIZER_DESIGNATOR: // designators have no payload
                        return true;
        }
@@ -5501,8 +5461,7 @@ static expression_t *find_create_select(const source_position_t *pos,
                symbol_t *iter_symbol = iter->base.symbol;
                if (iter_symbol == NULL) {
                        type_t *type = iter->declaration.type;
-                       if (type->kind != TYPE_COMPOUND_STRUCT
-                                       && type->kind != TYPE_COMPOUND_UNION)
+                       if (!is_type_compound(type))
                                continue;
 
                        compound_t *sub_compound = type->compound.compound;
@@ -5725,8 +5684,8 @@ static type_t *get_string_type(string_encoding_t const enc)
 static expression_t *parse_string_literal(void)
 {
        expression_t *const expr = allocate_expression_zero(EXPR_STRING_LITERAL);
-       expr->string_literal.value = concat_string_literals(&expr->string_literal.encoding);
-       expr->base.type            = get_string_type(expr->string_literal.encoding);
+       expr->string_literal.value = concat_string_literals();
+       expr->base.type            = get_string_type(expr->string_literal.value.encoding);
        return expr;
 }
 
@@ -5855,15 +5814,14 @@ static expression_t *parse_number_literal(void)
  */
 static expression_t *parse_character_constant(void)
 {
-       expression_t *literal;
-       switch (token.string.encoding) {
-       case STRING_ENCODING_CHAR: {
-               literal = allocate_expression_zero(EXPR_LITERAL_CHARACTER);
-               literal->base.type     = c_mode & _CXX ? type_char : type_int;
-               literal->literal.value = token.string.string;
+       expression_t *const literal = allocate_expression_zero(EXPR_LITERAL_CHARACTER);
+       literal->string_literal.value = token.string.string;
 
-               size_t len = literal->literal.value.size;
-               if (len > 1) {
+       size_t const size = get_string_len(&token.string.string);
+       switch (token.string.string.encoding) {
+       case STRING_ENCODING_CHAR:
+               literal->base.type = c_mode & _CXX ? type_char : type_int;
+               if (size > 1) {
                        if (!GNU_MODE && !(c_mode & _C99)) {
                                errorf(HERE, "more than 1 character in character constant");
                        } else {
@@ -5872,20 +5830,14 @@ static expression_t *parse_character_constant(void)
                        }
                }
                break;
-       }
-
-       case STRING_ENCODING_WIDE: {
-               literal = allocate_expression_zero(EXPR_LITERAL_WIDE_CHARACTER);
-               literal->base.type     = type_int;
-               literal->literal.value = token.string.string;
 
-               size_t len = wstrlen(&literal->literal.value);
-               if (len > 1) {
+       case STRING_ENCODING_WIDE:
+               literal->base.type = type_int;
+               if (size > 1) {
                        warningf(WARN_MULTICHAR, HERE, "multi-character character constant");
                }
                break;
        }
-       }
 
        eat(T_CHARACTER_CONSTANT);
        return literal;
@@ -5976,7 +5928,7 @@ type_t *revert_automatic_type_conversion(const expression_t *expression)
        }
 
        case EXPR_STRING_LITERAL: {
-               size_t  const size = expression->string_literal.value.size + 1;
+               size_t  const size = get_string_len(&expression->string_literal.value) + 1;
                type_t *const elem = get_unqualified_type(expression->base.type->pointer.points_to);
                return make_array_type(elem, size, TYPE_QUALIFIER_NONE);
        }
@@ -6674,7 +6626,6 @@ static expression_t *parse_primary_expression(void)
        case T_FLOATINGPOINT:                return parse_number_literal();
        case T_CHARACTER_CONSTANT:           return parse_character_constant();
        case T_STRING_LITERAL:               return parse_string_literal();
-       case T___FUNCTION__:
        case T___func__:                     return parse_function_keyword(FUNCNAME_FUNCTION);
        case T___PRETTY_FUNCTION__:          return parse_function_keyword(FUNCNAME_PRETTY_FUNCTION);
        case T___FUNCSIG__:                  return parse_function_keyword(FUNCNAME_FUNCSIG);
@@ -6895,9 +6846,7 @@ static expression_t *parse_select_expression(expression_t *addr)
                type_left = type;
        }
 
-       if (type_left->kind != TYPE_COMPOUND_STRUCT &&
-           type_left->kind != TYPE_COMPOUND_UNION) {
-
+       if (!is_type_compound(type_left)) {
                if (is_type_valid(type_left) && !saw_error) {
                        errorf(&pos,
                               "request for member '%Y' in something not a struct or union, but '%T'",
@@ -8309,7 +8258,6 @@ static bool expression_has_effect(const expression_t *const expr)
                case EXPR_LITERAL_MS_NOOP:            return true;
                case EXPR_LITERAL_BOOLEAN:
                case EXPR_LITERAL_CHARACTER:
-               case EXPR_LITERAL_WIDE_CHARACTER:
                case EXPR_LITERAL_INTEGER:
                case EXPR_LITERAL_FLOATINGPOINT:
                case EXPR_STRING_LITERAL:             return false;
@@ -8530,10 +8478,7 @@ static void register_expression_parser(parse_expression_function parser,
 {
        expression_parser_function_t *entry = &expression_parsers[token_kind];
 
-       if (entry->parser != NULL) {
-               diagnosticf("for token '%k'\n", (token_kind_t)token_kind);
-               panic("trying to register multiple expression parsers for a token");
-       }
+       assert(!entry->parser);
        entry->parser = parser;
 }
 
@@ -8549,11 +8494,7 @@ static void register_infix_parser(parse_expression_infix_function parser,
 {
        expression_parser_function_t *entry = &expression_parsers[token_kind];
 
-       if (entry->infix_parser != NULL) {
-               diagnosticf("for token '%k'\n", (token_kind_t)token_kind);
-               panic("trying to register multiple infix expression parsers for a "
-                     "token");
-       }
+       assert(!entry->infix_parser);
        entry->infix_parser     = parser;
        entry->infix_precedence = precedence;
 }
@@ -9783,7 +9724,6 @@ static statement_t *parse_compound_statement(bool inside_expression_statement)
        add_anchor_token(T__Bool);
        add_anchor_token(T__Complex);
        add_anchor_token(T__Imaginary);
-       add_anchor_token(T___FUNCTION__);
        add_anchor_token(T___PRETTY_FUNCTION__);
        add_anchor_token(T___alignof__);
        add_anchor_token(T___attribute__);
@@ -9951,7 +9891,6 @@ static statement_t *parse_compound_statement(bool inside_expression_statement)
        rem_anchor_token(T___attribute__);
        rem_anchor_token(T___alignof__);
        rem_anchor_token(T___PRETTY_FUNCTION__);
-       rem_anchor_token(T___FUNCTION__);
        rem_anchor_token(T__Imaginary);
        rem_anchor_token(T__Complex);
        rem_anchor_token(T__Bool);
@@ -10181,7 +10120,6 @@ void start_parsing(void)
 {
        environment_stack = NEW_ARR_F(stack_entry_t, 0);
        label_stack       = NEW_ARR_F(stack_entry_t, 0);
-       diagnostic_count  = 0;
        error_count       = 0;
        warning_count     = 0;