Augment MATCH_NEWLINE() so its usage looks like an ordinary case label.
[cparser] / parser.c
index 0a0cdf6..0d2c9a2 100644 (file)
--- a/parser.c
+++ b/parser.c
@@ -252,8 +252,6 @@ static void semantic_comparison(binary_expression_t *expression);
        case T_MINUSMINUS:                \
        case T_PLUSPLUS:                  \
        case T_STRING_LITERAL:            \
-       case T_WIDE_CHARACTER_CONSTANT:   \
-       case T_WIDE_STRING_LITERAL:       \
        case T___FUNCDNAME__:             \
        case T___FUNCSIG__:               \
        case T___FUNCTION__:              \
@@ -331,7 +329,6 @@ static size_t get_expression_struct_size(expression_kind_t kind)
                [EXPR_LITERAL_CHARACTER]          = sizeof(literal_expression_t),
                [EXPR_LITERAL_WIDE_CHARACTER]     = sizeof(literal_expression_t),
                [EXPR_STRING_LITERAL]             = sizeof(string_literal_expression_t),
-               [EXPR_WIDE_STRING_LITERAL]        = sizeof(string_literal_expression_t),
                [EXPR_COMPOUND_LITERAL]           = sizeof(compound_literal_expression_t),
                [EXPR_CALL]                       = sizeof(call_expression_t),
                [EXPR_UNARY_FIRST]                = sizeof(unary_expression_t),
@@ -1037,27 +1034,61 @@ static expression_t *parse_assignment_expression(void)
        return parse_subexpression(PREC_ASSIGNMENT);
 }
 
-static void warn_string_concat(const source_position_t *pos)
+static void append_string(string_t const *const s)
 {
-       warningf(WARN_TRADITIONAL, pos, "traditional C rejects string constant concatenation");
+       /* FIXME Using the ast_obstack is a hack.  Using the symbol_obstack is not
+        * possible, because other tokens are grown there alongside. */
+       obstack_grow(&ast_obstack, s->begin, s->size);
 }
 
-static string_t parse_string_literals(void)
+static string_t finish_string(void)
 {
-       assert(token.kind == T_STRING_LITERAL);
-       string_t result = token.string.string;
+       obstack_1grow(&ast_obstack, '\0');
+       size_t      const size   = obstack_object_size(&ast_obstack) - 1;
+       char const *const string = obstack_finish(&ast_obstack);
+       return (string_t){ string, size };
+}
 
-       eat(T_STRING_LITERAL);
+static string_t concat_string_literals(string_encoding_t *const out_enc)
+{
+       assert(token.kind == T_STRING_LITERAL);
 
-       while (token.kind == T_STRING_LITERAL) {
-               warn_string_concat(HERE);
-               result = concat_strings(&result, &token.string.string);
+       string_t          result;
+       string_encoding_t enc = token.string.encoding;
+       if (look_ahead(1)->kind == T_STRING_LITERAL) {
+               append_string(&token.string.string);
+               eat(T_STRING_LITERAL);
+               warningf(WARN_TRADITIONAL, HERE, "traditional C rejects string constant concatenation");
+               do {
+                       if (token.string.encoding != STRING_ENCODING_CHAR) {
+                               enc = token.string.encoding;
+                       }
+                       append_string(&token.string.string);
+                       eat(T_STRING_LITERAL);
+               } while (token.kind == T_STRING_LITERAL);
+               result = finish_string();
+       } else {
+               result = token.string.string;
                eat(T_STRING_LITERAL);
        }
 
+       *out_enc = enc;
        return result;
 }
 
+static string_t parse_string_literals(void)
+{
+       string_encoding_t       enc;
+       source_position_t const pos = *HERE;
+       string_t          const res = concat_string_literals(&enc);
+
+       if (enc != STRING_ENCODING_CHAR) {
+               errorf(&pos, "expected plain string literal, got wide string literal");
+       }
+
+       return res;
+}
+
 static attribute_t *allocate_attribute_zero(attribute_kind_t kind)
 {
        attribute_t *attribute = allocate_ast_zero(sizeof(*attribute));
@@ -1458,7 +1489,6 @@ unary:
                case EXPR_LITERAL_CASES:
                case EXPR_ERROR:
                case EXPR_STRING_LITERAL:
-               case EXPR_WIDE_STRING_LITERAL:
                case EXPR_COMPOUND_LITERAL: // TODO init?
                case EXPR_SIZEOF:
                case EXPR_CLASSIFY_TYPE:
@@ -1554,10 +1584,10 @@ static initializer_t *initializer_from_expression(type_t *orig_type,
                array_type_t *const array_type   = &type->array;
                type_t       *const element_type = skip_typeref(array_type->element_type);
 
-               if (element_type->kind == TYPE_ATOMIC) {
-                       atomic_type_kind_t akind = element_type->atomic.akind;
-                       switch (expression->kind) {
-                       case EXPR_STRING_LITERAL:
+               if (element_type->kind == TYPE_ATOMIC && expression->kind == EXPR_STRING_LITERAL) {
+                       switch (expression->string_literal.encoding) {
+                       case STRING_ENCODING_CHAR: {
+                               atomic_type_kind_t const akind = element_type->atomic.akind;
                                if (akind == ATOMIC_TYPE_CHAR
                                                || akind == ATOMIC_TYPE_SCHAR
                                                || akind == ATOMIC_TYPE_UCHAR) {
@@ -1565,8 +1595,9 @@ static initializer_t *initializer_from_expression(type_t *orig_type,
                                                        &expression->string_literal.value);
                                }
                                break;
+                       }
 
-                       case EXPR_WIDE_STRING_LITERAL: {
+                       case STRING_ENCODING_WIDE: {
                                type_t *bare_wchar_type = skip_typeref(type_wchar_t);
                                if (get_unqualified_type(element_type) == bare_wchar_type) {
                                        return initializer_from_wide_string(array_type,
@@ -1574,9 +1605,6 @@ static initializer_t *initializer_from_expression(type_t *orig_type,
                                }
                                break;
                        }
-
-                       default:
-                               break;
                        }
                }
        }
@@ -1829,7 +1857,8 @@ static bool walk_designator(type_path_t *path, const designator_t *designator,
                        }
                } else {
                        expression_t *array_index = designator->array_index;
-                       assert(designator->array_index != NULL);
+                       if (is_constant_expression(array_index) != EXPR_CLASS_CONSTANT)
+                               return true;
 
                        if (!is_type_array(type)) {
                                if (is_type_valid(type)) {
@@ -2056,9 +2085,7 @@ finish_designator:
                        }
 
                        /* handle { "string" } special case */
-                       if ((expression->kind == EXPR_STRING_LITERAL
-                                       || expression->kind == EXPR_WIDE_STRING_LITERAL)
-                                       && outer_type != NULL) {
+                       if (expression->kind == EXPR_STRING_LITERAL && outer_type != NULL) {
                                sub = initializer_from_expression(outer_type, expression);
                                if (sub != NULL) {
                                        next_if(',');
@@ -2208,7 +2235,7 @@ static initializer_t *parse_initializer(parse_initializer_env_t *env)
                        break;
 
                case INITIALIZER_STRING:
-                       size = result->string.string.size;
+                       size = result->string.string.size + 1;
                        break;
 
                case INITIALIZER_WIDE_STRING:
@@ -4622,7 +4649,6 @@ static bool expression_returns(expression_t const *const expr)
                case EXPR_ENUM_CONSTANT:
                case EXPR_LITERAL_CASES:
                case EXPR_STRING_LITERAL:
-               case EXPR_WIDE_STRING_LITERAL:
                case EXPR_COMPOUND_LITERAL: // TODO descend into initialisers
                case EXPR_LABEL_ADDRESS:
                case EXPR_CLASSIFY_TYPE:
@@ -5680,14 +5706,14 @@ struct expression_parser_function_t {
 
 static expression_parser_function_t expression_parsers[T_LAST_TOKEN];
 
-static type_t *get_string_type(void)
-{
-       return is_warn_on(WARN_WRITE_STRINGS) ? type_const_char_ptr : type_char_ptr;
-}
-
-static type_t *get_wide_string_type(void)
+static type_t *get_string_type(string_encoding_t const enc)
 {
-       return is_warn_on(WARN_WRITE_STRINGS) ? type_const_wchar_t_ptr : type_wchar_t_ptr;
+       bool const warn = is_warn_on(WARN_WRITE_STRINGS);
+       switch (enc) {
+       case STRING_ENCODING_CHAR: return warn ? type_const_char_ptr    : type_char_ptr;
+       case STRING_ENCODING_WIDE: return warn ? type_const_wchar_t_ptr : type_wchar_t_ptr;
+       }
+       panic("invalid string encoding");
 }
 
 /**
@@ -5695,31 +5721,10 @@ static type_t *get_wide_string_type(void)
  */
 static expression_t *parse_string_literal(void)
 {
-       source_position_t begin   = *HERE;
-       string_t          res     = token.string.string;
-       bool              is_wide = (token.kind == T_WIDE_STRING_LITERAL);
-
-       next_token();
-       while (token.kind == T_STRING_LITERAL
-                       || token.kind == T_WIDE_STRING_LITERAL) {
-               warn_string_concat(HERE);
-               res = concat_strings(&res, &token.string.string);
-               next_token();
-               is_wide |= token.kind == T_WIDE_STRING_LITERAL;
-       }
-
-       expression_t *literal;
-       if (is_wide) {
-               literal = allocate_expression_zero(EXPR_WIDE_STRING_LITERAL);
-               literal->base.type = get_wide_string_type();
-       } else {
-               literal = allocate_expression_zero(EXPR_STRING_LITERAL);
-               literal->base.type = get_string_type();
-       }
-       literal->base.source_position = begin;
-       literal->literal.value        = res;
-
-       return literal;
+       expression_t *const expr = allocate_expression_zero(EXPR_STRING_LITERAL);
+       expr->string_literal.value = concat_string_literals(&expr->string_literal.encoding);
+       expr->base.type            = get_string_type(expr->string_literal.encoding);
+       return expr;
 }
 
 /**
@@ -5847,39 +5852,39 @@ static expression_t *parse_number_literal(void)
  */
 static expression_t *parse_character_constant(void)
 {
-       expression_t *literal = allocate_expression_zero(EXPR_LITERAL_CHARACTER);
-       literal->base.type     = c_mode & _CXX ? type_char : type_int;
-       literal->literal.value = token.string.string;
-
-       size_t len = literal->literal.value.size;
-       if (len > 1) {
-               if (!GNU_MODE && !(c_mode & _C99)) {
-                       errorf(HERE, "more than 1 character in character constant");
-               } else {
-                       literal->base.type = type_int;
-                       warningf(WARN_MULTICHAR, HERE, "multi-character character constant");
+       expression_t *literal;
+       switch (token.string.encoding) {
+       case STRING_ENCODING_CHAR: {
+               literal = allocate_expression_zero(EXPR_LITERAL_CHARACTER);
+               literal->base.type     = c_mode & _CXX ? type_char : type_int;
+               literal->literal.value = token.string.string;
+
+               size_t len = literal->literal.value.size;
+               if (len > 1) {
+                       if (!GNU_MODE && !(c_mode & _C99)) {
+                               errorf(HERE, "more than 1 character in character constant");
+                       } else {
+                               literal->base.type = type_int;
+                               warningf(WARN_MULTICHAR, HERE, "multi-character character constant");
+                       }
                }
+               break;
        }
 
-       eat(T_CHARACTER_CONSTANT);
-       return literal;
-}
-
-/**
- * Parse a wide character constant.
- */
-static expression_t *parse_wide_character_constant(void)
-{
-       expression_t *literal = allocate_expression_zero(EXPR_LITERAL_WIDE_CHARACTER);
-       literal->base.type     = type_int;
-       literal->literal.value = token.string.string;
+       case STRING_ENCODING_WIDE: {
+               literal = allocate_expression_zero(EXPR_LITERAL_WIDE_CHARACTER);
+               literal->base.type     = type_int;
+               literal->literal.value = token.string.string;
 
-       size_t len = wstrlen(&literal->literal.value);
-       if (len > 1) {
-               warningf(WARN_MULTICHAR, HERE, "multi-character character constant");
+               size_t len = wstrlen(&literal->literal.value);
+               if (len > 1) {
+                       warningf(WARN_MULTICHAR, HERE, "multi-character character constant");
+               }
+               break;
+       }
        }
 
-       eat(T_WIDE_CHARACTER_CONSTANT);
+       eat(T_CHARACTER_CONSTANT);
        return literal;
 }
 
@@ -5968,13 +5973,9 @@ type_t *revert_automatic_type_conversion(const expression_t *expression)
        }
 
        case EXPR_STRING_LITERAL: {
-               size_t size = expression->string_literal.value.size;
-               return make_array_type(type_char, size, TYPE_QUALIFIER_NONE);
-       }
-
-       case EXPR_WIDE_STRING_LITERAL: {
-               size_t size = wstrlen(&expression->string_literal.value);
-               return make_array_type(type_wchar_t, size, TYPE_QUALIFIER_NONE);
+               size_t  const size = expression->string_literal.value.size + 1;
+               type_t *const elem = get_unqualified_type(expression->base.type->pointer.points_to);
+               return make_array_type(elem, size, TYPE_QUALIFIER_NONE);
        }
 
        case EXPR_COMPOUND_LITERAL:
@@ -6669,9 +6670,7 @@ static expression_t *parse_primary_expression(void)
        case T_INTEGER:
        case T_FLOATINGPOINT:                return parse_number_literal();
        case T_CHARACTER_CONSTANT:           return parse_character_constant();
-       case T_WIDE_CHARACTER_CONSTANT:      return parse_wide_character_constant();
-       case T_STRING_LITERAL:
-       case T_WIDE_STRING_LITERAL:          return parse_string_literal();
+       case T_STRING_LITERAL:               return parse_string_literal();
        case T___FUNCTION__:
        case T___func__:                     return parse_function_keyword(FUNCNAME_FUNCTION);
        case T___PRETTY_FUNCTION__:          return parse_function_keyword(FUNCNAME_PRETTY_FUNCTION);
@@ -7969,8 +7968,7 @@ static void warn_string_literal_address(expression_t const* expr)
                expr = expr->unary.value;
        }
 
-       if (expr->kind == EXPR_STRING_LITERAL
-                       || expr->kind == EXPR_WIDE_STRING_LITERAL) {
+       if (expr->kind == EXPR_STRING_LITERAL) {
                source_position_t const *const pos = &expr->base.source_position;
                warningf(WARN_ADDRESS, pos, "comparison with string literal results in unspecified behaviour");
        }
@@ -8312,7 +8310,6 @@ static bool expression_has_effect(const expression_t *const expr)
                case EXPR_LITERAL_INTEGER:
                case EXPR_LITERAL_FLOATINGPOINT:
                case EXPR_STRING_LITERAL:             return false;
-               case EXPR_WIDE_STRING_LITERAL:        return false;
 
                case EXPR_CALL: {
                        const call_expression_t *const call = &expr->call;
@@ -9809,8 +9806,6 @@ static statement_t *parse_compound_statement(bool inside_expression_statement)
        add_anchor_token(T_MINUSMINUS);
        add_anchor_token(T_PLUSPLUS);
        add_anchor_token(T_STRING_LITERAL);
-       add_anchor_token(T_WIDE_CHARACTER_CONSTANT);
-       add_anchor_token(T_WIDE_STRING_LITERAL);
        add_anchor_token(T__Bool);
        add_anchor_token(T__Complex);
        add_anchor_token(T__Imaginary);
@@ -9986,8 +9981,6 @@ static statement_t *parse_compound_statement(bool inside_expression_statement)
        rem_anchor_token(T__Imaginary);
        rem_anchor_token(T__Complex);
        rem_anchor_token(T__Bool);
-       rem_anchor_token(T_WIDE_STRING_LITERAL);
-       rem_anchor_token(T_WIDE_CHARACTER_CONSTANT);
        rem_anchor_token(T_STRING_LITERAL);
        rem_anchor_token(T_PLUSPLUS);
        rem_anchor_token(T_MINUSMINUS);