fixed crash with unclosed string literal
[cparser] / parser.c
index b81115c..1550ee7 100644 (file)
--- a/parser.c
+++ b/parser.c
@@ -39,15 +39,19 @@ struct declaration_specifiers_t {
 
 typedef declaration_t* (*parsed_declaration_func) (declaration_t *declaration);
 
-static token_t         token;
-static token_t         lookahead_buffer[MAX_LOOKAHEAD];
-static int             lookahead_bufpos;
-static stack_entry_t  *environment_stack = NULL;
-static stack_entry_t  *label_stack       = NULL;
-static context_t      *global_context    = NULL;
-static context_t      *context           = NULL;
-static declaration_t  *last_declaration  = NULL;
-static declaration_t  *current_function  = NULL;
+static token_t             token;
+static token_t             lookahead_buffer[MAX_LOOKAHEAD];
+static int                 lookahead_bufpos;
+static stack_entry_t      *environment_stack = NULL;
+static stack_entry_t      *label_stack       = NULL;
+static context_t          *global_context    = NULL;
+static context_t          *context           = NULL;
+static declaration_t      *last_declaration  = NULL;
+static declaration_t      *current_function  = NULL;
+static switch_statement_t *current_switch    = NULL;
+static statement_t        *current_loop      = NULL;
+static goto_statement_t   *goto_first        = NULL;
+static goto_statement_t   *goto_last         = NULL;
 static struct obstack  temp_obst;
 
 /** The current source position. */
@@ -132,6 +136,11 @@ static void *allocate_ast_zero(size_t size)
        return res;
 }
 
+static declaration_t *allocate_declaration_zero(void)
+{
+       return allocate_ast_zero(sizeof(*allocate_declaration_zero()));
+}
+
 /**
  * Returns the size of a statement node.
  *
@@ -628,7 +637,7 @@ static int get_rank(const type_t *type)
 
        assert(type->kind == TYPE_ATOMIC);
        const atomic_type_t *atomic_type = &type->atomic;
-       atomic_type_type_t   atype       = atomic_type->atype;
+       atomic_type_kind_t   atype       = atomic_type->akind;
        return atype;
 }
 
@@ -818,8 +827,8 @@ static void semantic_assign(type_t *orig_type_left, expression_t **right,
 
 incompatible_assign_types:
        /* TODO: improve error message */
-       errorf(HERE, "incompatible types in %s", context);
-       errorf(HERE, "'%T' <- '%T'", orig_type_left, orig_type_right);
+       errorf(HERE, "incompatible types in %s: '%T' <- '%T'",
+              context, orig_type_left, orig_type_right);
 }
 
 static expression_t *parse_constant_expression(void)
@@ -844,7 +853,7 @@ static type_t *make_global_typedef(const char *name, type_t *type)
 {
        symbol_t *const symbol       = symbol_table_insert(name);
 
-       declaration_t *declaration   = allocate_ast_zero(sizeof(declaration[0]));
+       declaration_t *const declaration = allocate_declaration_zero();
        declaration->namespc         = NAMESPACE_NORMAL;
        declaration->storage_class   = STORAGE_CLASS_TYPEDEF;
        declaration->type            = type;
@@ -859,15 +868,15 @@ static type_t *make_global_typedef(const char *name, type_t *type)
        return typedef_type;
 }
 
-static const char *parse_string_literals(void)
+static string_t parse_string_literals(void)
 {
        assert(token.type == T_STRING_LITERAL);
-       const char *result = token.v.string;
+       string_t result = token.v.string;
 
        next_token();
 
-       while(token.type == T_STRING_LITERAL) {
-               result = concat_strings(result, token.v.string);
+       while (token.type == T_STRING_LITERAL) {
+               result = concat_strings(&result, &token.v.string);
                next_token();
        }
 
@@ -970,13 +979,13 @@ static designator_t *parse_designation(void)
 #endif
 
 static initializer_t *initializer_from_string(array_type_t *type,
-                                              const char *string)
+                                              const string_t *const string)
 {
        /* TODO: check len vs. size of array type */
        (void) type;
 
        initializer_t *initializer = allocate_initializer_zero(INITIALIZER_STRING);
-       initializer->string.string = string;
+       initializer->string.string = *string;
 
        return initializer;
 }
@@ -1008,9 +1017,9 @@ static initializer_t *initializer_from_expression(type_t *type,
                if (element_type->kind == TYPE_ATOMIC) {
                        switch (expression->kind) {
                                case EXPR_STRING_LITERAL:
-                                       if (element_type->atomic.atype == ATOMIC_TYPE_CHAR) {
+                                       if (element_type->atomic.akind == ATOMIC_TYPE_CHAR) {
                                                return initializer_from_string(array_type,
-                                                       expression->string.value);
+                                                       &expression->string.value);
                                        }
 
                                case EXPR_WIDE_STRING_LITERAL: {
@@ -1255,9 +1264,15 @@ static initializer_t *parse_initializer(type_t *type)
 
        if(token.type != '{') {
                expression_t  *expression  = parse_assignment_expression();
+               if (expression->base.datatype == NULL) {
+                       /* something bad happens, don't produce further errors */
+                       return NULL;
+               }
                initializer_t *initializer = initializer_from_expression(type, expression);
                if(initializer == NULL) {
-                       errorf(HERE, "initializer expression '%E', type '%T' is incompatible with type '%T'", expression, expression->base.datatype, type);
+                       errorf(HERE,
+                               "initializer expression '%E' of type '%T' is incompatible with type '%T'",
+                               expression, expression->base.datatype, type);
                }
                return initializer;
        }
@@ -1321,13 +1336,9 @@ static declaration_t *parse_compound_type_specifier(bool is_struct)
        }
 
        if(declaration == NULL) {
-               declaration = allocate_ast_zero(sizeof(declaration[0]));
-
-               if(is_struct) {
-                       declaration->namespc = NAMESPACE_STRUCT;
-               } else {
-                       declaration->namespc = NAMESPACE_UNION;
-               }
+               declaration = allocate_declaration_zero();
+               declaration->namespc         =
+                       (is_struct ? NAMESPACE_STRUCT : NAMESPACE_UNION);
                declaration->source_position = token.source_position;
                declaration->symbol          = symbol;
                declaration->parent_context  = context;
@@ -1361,7 +1372,7 @@ static declaration_t *parse_compound_type_specifier(bool is_struct)
        return declaration;
 }
 
-static void parse_enum_entries(enum_type_t *const enum_type)
+static void parse_enum_entries(type_t *const enum_type)
 {
        eat('{');
 
@@ -1372,15 +1383,15 @@ static void parse_enum_entries(enum_type_t *const enum_type)
        }
 
        do {
-               declaration_t *entry = allocate_ast_zero(sizeof(entry[0]));
-
                if(token.type != T_IDENTIFIER) {
                        parse_error_expected("while parsing enum entry", T_IDENTIFIER, 0);
                        eat_block();
                        return;
                }
+
+               declaration_t *const entry = allocate_declaration_zero();
                entry->storage_class   = STORAGE_CLASS_ENUM_ENTRY;
-               entry->type            = (type_t*) enum_type;
+               entry->type            = enum_type;
                entry->symbol          = token.v.symbol;
                entry->source_position = token.source_position;
                next_token();
@@ -1424,9 +1435,8 @@ static type_t *parse_enum_specifier(void)
        }
 
        if(declaration == NULL) {
-               declaration = allocate_ast_zero(sizeof(declaration[0]));
-
-               declaration->namespc       = NAMESPACE_ENUM;
+               declaration = allocate_declaration_zero();
+               declaration->namespc         = NAMESPACE_ENUM;
                declaration->source_position = token.source_position;
                declaration->symbol          = symbol;
                declaration->parent_context  = context;
@@ -1439,11 +1449,13 @@ static type_t *parse_enum_specifier(void)
                if(declaration->init.is_defined) {
                        errorf(HERE, "multiple definitions of enum %Y", symbol);
                }
-               environment_push(declaration);
+               if (symbol != NULL) {
+                       environment_push(declaration);
+               }
                append_declaration(declaration);
                declaration->init.is_defined = 1;
 
-               parse_enum_entries(&type->enumt);
+               parse_enum_entries(type);
                parse_attributes();
        }
 
@@ -1715,7 +1727,7 @@ static void parse_declaration_specifiers(declaration_specifiers_t *specifiers)
 finish_specifiers:
 
        if(type == NULL) {
-               atomic_type_type_t atomic_type;
+               atomic_type_kind_t atomic_type;
 
                /* match valid basic types */
                switch(type_specifiers) {
@@ -1826,7 +1838,7 @@ finish_specifiers:
                }
 
                type               = allocate_type_zero(TYPE_ATOMIC);
-               type->atomic.atype = atomic_type;
+               type->atomic.akind = atomic_type;
                newtype            = 1;
        } else {
                if(type_specifiers != 0) {
@@ -1866,8 +1878,7 @@ static declaration_t *parse_identifier_list(void)
        declaration_t *declarations     = NULL;
        declaration_t *last_declaration = NULL;
        do {
-               declaration_t *declaration = allocate_ast_zero(sizeof(declaration[0]));
-
+               declaration_t *const declaration = allocate_declaration_zero();
                declaration->source_position = token.source_position;
                declaration->symbol          = token.v.symbol;
                next_token();
@@ -2264,7 +2275,7 @@ static declaration_t *parse_declarator(
                const declaration_specifiers_t *specifiers, bool may_be_abstract)
 {
        type_t        *type         = specifiers->type;
-       declaration_t *declaration  = allocate_ast_zero(sizeof(declaration[0]));
+       declaration_t *const declaration = allocate_declaration_zero();
        declaration->storage_class  = specifiers->storage_class;
        declaration->modifiers      = specifiers->decl_modifiers;
        declaration->is_inline      = specifiers->is_inline;
@@ -2321,6 +2332,11 @@ static declaration_t *internal_record_declaration(
        assert(declaration != previous_declaration);
        if (previous_declaration != NULL
                        && previous_declaration->parent_context == context) {
+               /* can happen for K&R style declarations */
+               if(previous_declaration->type == NULL) {
+                       previous_declaration->type = declaration->type;
+               }
+
                const type_t *const prev_type = skip_typeref(previous_declaration->type);
                if (!types_compatible(type, prev_type)) {
                        errorf(declaration->source_position,
@@ -2460,7 +2476,7 @@ static void parse_init_declarator_rest(declaration_t *declaration)
 
                                case INITIALIZER_STRING: {
                                        initializer_string_t *const string = &initializer->string;
-                                       cnst->conste.v.int_value = strlen(string->string) + 1;
+                                       cnst->conste.v.int_value = string->string.size;
                                        break;
                                }
 
@@ -2494,8 +2510,7 @@ static void parse_anonymous_declaration_rest(
 {
        eat(';');
 
-       declaration_t *declaration = allocate_ast_zero(sizeof(declaration[0]));
-
+       declaration_t *const declaration = allocate_declaration_zero();
        declaration->type            = specifiers->type;
        declaration->storage_class   = specifiers->storage_class;
        declaration->source_position = specifiers->source_position;
@@ -2556,11 +2571,32 @@ static void parse_declaration_rest(declaration_t *ndeclaration,
 
 static declaration_t *finished_kr_declaration(declaration_t *declaration)
 {
-       /* TODO: check that it was actually a parameter that gets a type */
+       symbol_t *symbol  = declaration->symbol;
+       if(symbol == NULL) {
+               errorf(HERE, "anonymous declaration not valid as function parameter");
+               return declaration;
+       }
+       namespace_t namespc = (namespace_t) declaration->namespc;
+       if(namespc != NAMESPACE_NORMAL) {
+               return record_declaration(declaration);
+       }
 
-       /* we should have a declaration for the parameter in the current
-        * scope */
-       return record_declaration(declaration);
+       declaration_t *previous_declaration = get_declaration(symbol, namespc);
+       if(previous_declaration == NULL ||
+                       previous_declaration->parent_context != context) {
+               errorf(HERE, "expected declaration of a function parameter, found '%Y'",
+                      symbol);
+               return declaration;
+       }
+
+       if(previous_declaration->type == NULL) {
+               previous_declaration->type           = declaration->type;
+               previous_declaration->storage_class  = declaration->storage_class;
+               previous_declaration->parent_context = context;
+               return previous_declaration;
+       } else {
+               return record_declaration(declaration);
+       }
 }
 
 static void parse_declaration(parsed_declaration_func finished_declaration)
@@ -2656,6 +2692,31 @@ static void parse_kr_declaration_list(declaration_t *declaration)
        declaration->type = type;
 }
 
+/**
+ * Check if all labels are defined in the current function.
+ */
+static void check_for_missing_labels(void)
+{
+       bool first_err = true;
+       for (const goto_statement_t *goto_statement = goto_first;
+            goto_statement != NULL;
+            goto_statement = goto_statement->next) {
+                const declaration_t *label = goto_statement->label;
+
+                if (label->source_position.input_name == NULL) {
+                        if (first_err) {
+                                first_err = false;
+                                diagnosticf("%s: In function '%Y':\n",
+                                        current_function->source_position.input_name,
+                                        current_function->symbol);
+                        }
+                        errorf(goto_statement->statement.source_position,
+                                "label '%Y' used but not defined", label->symbol);
+                }
+       }
+       goto_first = goto_last = NULL;
+}
+
 static void parse_external_declaration(void)
 {
        /* function-definitions and declarations both start with declaration
@@ -2697,7 +2758,8 @@ static void parse_external_declaration(void)
        /* note that we don't skip typerefs: the standard doesn't allow them here
         * (so we can't use is_type_function here) */
        if(type->kind != TYPE_FUNCTION) {
-               errorf(HERE, "declarator '%#T' has a body but is not a function type", type, ndeclaration->symbol);
+               errorf(HERE, "declarator '%#T' has a body but is not a function type",
+                      type, ndeclaration->symbol);
                eat_block();
                return;
        }
@@ -2728,7 +2790,11 @@ static void parse_external_declaration(void)
 
        declaration_t *parameter = declaration->context.declarations;
        for( ; parameter != NULL; parameter = parameter->next) {
-               assert(parameter->parent_context == NULL || parameter->parent_context == context);
+               if(parameter->parent_context == &ndeclaration->context) {
+                       parameter->parent_context = context;
+               }
+               assert(parameter->parent_context == NULL
+                               || parameter->parent_context == context);
                parameter->parent_context = context;
                environment_push(parameter);
        }
@@ -2744,6 +2810,7 @@ static void parse_external_declaration(void)
                current_function                    = declaration;
 
                declaration->init.statement = parse_compound_statement();
+               check_for_missing_labels();
 
                assert(current_function == declaration);
                current_function = old_current_function;
@@ -2779,15 +2846,12 @@ static void parse_struct_declarators(const declaration_specifiers_t *specifiers)
 
                        type_t *type = make_bitfield_type(base_type, size);
 
-                       declaration = allocate_ast_zero(sizeof(declaration[0]));
-
+                       declaration = allocate_declaration_zero();
                        declaration->namespc         = NAMESPACE_NORMAL;
                        declaration->storage_class   = STORAGE_CLASS_NONE;
                        declaration->source_position = token.source_position;
                        declaration->modifiers       = specifiers->decl_modifiers;
                        declaration->type            = type;
-
-                       record_declaration(declaration);
                } else {
                        declaration = parse_declarator(specifiers,/*may_be_abstract=*/true);
 
@@ -2869,10 +2933,15 @@ static expression_t *create_invalid_expression(void)
        return expression;
 }
 
+/**
+ * Prints an error message if an expression was expected but not read
+ */
 static expression_t *expected_expression_error(void)
 {
-       errorf(HERE, "expected expression, got token '%K'", &token);
-
+       /* skip the error message if the error token was read */
+       if (token.type != T_ERROR) {
+               errorf(HERE, "expected expression, got token '%K'", &token);
+       }
        next_token();
 
        return create_invalid_expression();
@@ -2942,18 +3011,24 @@ static declaration_t *create_implicit_function(symbol_t *symbol,
                free_type(ntype);
        }
 
-       declaration_t *declaration = allocate_ast_zero(sizeof(declaration[0]));
-
+       declaration_t *const declaration = allocate_declaration_zero();
        declaration->storage_class   = STORAGE_CLASS_EXTERN;
        declaration->type            = type;
        declaration->symbol          = symbol;
        declaration->source_position = source_position;
        declaration->parent_context  = global_context;
 
+       context_t *old_context = context;
+       set_context(global_context);
+
        environment_push(declaration);
+       /* prepend the declaration to the global declarations list */
        declaration->next     = context->declarations;
        context->declarations = declaration;
 
+       assert(context == global_context);
+       set_context(old_context);
+
        return declaration;
 }
 
@@ -3216,7 +3291,6 @@ static expression_t *parse_function_keyword(void)
 
        expression->expression.kind     = EXPR_FUNCTION;
        expression->expression.datatype = type_string;
-       expression->value               = current_function->symbol->string;
 
        return (expression_t*) expression;
 }
@@ -3235,7 +3309,6 @@ static expression_t *parse_pretty_function_keyword(void)
 
        expression->expression.kind     = EXPR_PRETTY_FUNCTION;
        expression->expression.datatype = type_string;
-       expression->value               = current_function->symbol->string;
 
        return (expression_t*) expression;
 }
@@ -3552,6 +3625,21 @@ static expression_t *parse_primary_expression(void)
        return create_invalid_expression();
 }
 
+/**
+ * Check if the expression has the character type and issue a warning then.
+ */
+static void check_for_char_index_type(const expression_t *expression) {
+       type_t *type      = expression->base.datatype;
+       type_t *base_type = skip_typeref(type);
+
+       if (base_type->base.kind == TYPE_ATOMIC) {
+               if (base_type->atomic.akind == ATOMIC_TYPE_CHAR) {
+                       warningf(expression->base.source_position,
+                               "array subscript has type '%T'", type);
+               }
+       }
+}
+
 static expression_t *parse_array_expression(unsigned precedence,
                                             expression_t *left)
 {
@@ -3579,12 +3667,14 @@ static expression_t *parse_array_expression(unsigned precedence,
                        return_type             = pointer->points_to;
                        array_access->array_ref = left;
                        array_access->index     = inside;
+                       check_for_char_index_type(inside);
                } else if(is_type_pointer(type_inside)) {
                        pointer_type_t *pointer = &type_inside->pointer;
                        return_type             = pointer->points_to;
                        array_access->array_ref = inside;
                        array_access->index     = left;
                        array_access->flipped   = true;
+                       check_for_char_index_type(left);
                } else {
                        errorf(HERE, "array access on object with non-pointer types '%T', '%T'", type_left, type_inside);
                }
@@ -4790,11 +4880,45 @@ static statement_t *parse_case_statement(void)
        statement->case_label.expression = parse_expression();
 
        expect(':');
+
+       if (! is_constant_expression(statement->case_label.expression)) {
+               errorf(statement->base.source_position,
+                       "case label does not reduce to an integer constant");
+       } else {
+               /* TODO: check if the case label is already known */
+               if (current_switch != NULL) {
+                       /* link all cases into the switch statement */
+                       if (current_switch->last_case == NULL) {
+                               current_switch->first_case =
+                               current_switch->last_case  = &statement->case_label;
+                       } else {
+                               current_switch->last_case->next = &statement->case_label;
+                       }
+               } else {
+                       errorf(statement->base.source_position,
+                               "case label not within a switch statement");
+               }
+       }
        statement->case_label.label_statement = parse_statement();
 
        return statement;
 }
 
+/**
+ * Finds an existing default label of a switch statement.
+ */
+static case_label_statement_t *
+find_default_label(const switch_statement_t *statement)
+{
+       for (case_label_statement_t *label = statement->first_case;
+            label != NULL;
+                label = label->next) {
+               if (label->expression == NULL)
+                       return label;
+       }
+       return NULL;
+}
+
 /**
  * Parse a default statement.
  */
@@ -4807,6 +4931,25 @@ static statement_t *parse_default_statement(void)
        statement->base.source_position = token.source_position;
 
        expect(':');
+       if (current_switch != NULL) {
+               const case_label_statement_t *def_label = find_default_label(current_switch);
+               if (def_label != NULL) {
+                       errorf(HERE, "multiple default labels in one switch");
+                       errorf(def_label->statement.source_position,
+                               "this is the first default label");
+               } else {
+                       /* link all cases into the switch statement */
+                       if (current_switch->last_case == NULL) {
+                               current_switch->first_case =
+                                       current_switch->last_case  = &statement->case_label;
+                       } else {
+                               current_switch->last_case->next = &statement->case_label;
+                       }
+               }
+       } else {
+               errorf(statement->base.source_position,
+                       "'default' label not within a switch statement");
+       }
        statement->label.label_statement = parse_statement();
 
        return statement;
@@ -4827,7 +4970,7 @@ static declaration_t *get_label(symbol_t *symbol)
        }
 
        /* otherwise we need to create a new one */
-       declaration_t *declaration = allocate_ast_zero(sizeof(declaration[0]));
+       declaration_t *const declaration = allocate_declaration_zero();
        declaration->namespc       = NAMESPACE_LABEL;
        declaration->symbol        = symbol;
 
@@ -4912,13 +5055,28 @@ static statement_t *parse_switch(void)
        statement->statement.source_position = token.source_position;
 
        expect('(');
-       statement->expression = parse_expression();
+       expression_t *const expr = parse_expression();
+       type_t       *const type = promote_integer(skip_typeref(expr->base.datatype));
+       statement->expression = create_implicit_cast(expr, type);
        expect(')');
+
+       switch_statement_t *rem = current_switch;
+       current_switch  = statement;
        statement->body = parse_statement();
+       current_switch  = rem;
 
        return (statement_t*) statement;
 }
 
+static statement_t *parse_loop_body(statement_t *const loop)
+{
+       statement_t *const rem = current_loop;
+       current_loop = loop;
+       statement_t *const body = parse_statement();
+       current_loop = rem;
+       return body;
+}
+
 /**
  * Parse a while statement.
  */
@@ -4933,7 +5091,8 @@ static statement_t *parse_while(void)
        expect('(');
        statement->condition = parse_expression();
        expect(')');
-       statement->body = parse_statement();
+
+       statement->body = parse_loop_body((statement_t*)statement);
 
        return (statement_t*) statement;
 }
@@ -4949,7 +5108,7 @@ static statement_t *parse_do(void)
        statement->statement.kind            = STATEMENT_DO_WHILE;
        statement->statement.source_position = token.source_position;
 
-       statement->body = parse_statement();
+       statement->body = parse_loop_body((statement_t*)statement);
        expect(T_while);
        expect('(');
        statement->condition = parse_expression();
@@ -4995,7 +5154,7 @@ static statement_t *parse_for(void)
                statement->step = parse_expression();
        }
        expect(')');
-       statement->body = parse_statement();
+       statement->body = parse_loop_body((statement_t*)statement);
 
        assert(context == &statement->context);
        set_context(last_context);
@@ -5028,6 +5187,13 @@ static statement_t *parse_goto(void)
 
        statement->label = label;
 
+       /* remember the goto's in a list for later checking */
+       if (goto_last == NULL) {
+               goto_first = goto_last = statement;
+       } else {
+               goto_last->next = statement;
+       }
+
        expect(';');
 
        return (statement_t*) statement;
@@ -5038,13 +5204,19 @@ static statement_t *parse_goto(void)
  */
 static statement_t *parse_continue(void)
 {
+       statement_t *statement;
+       if (current_loop == NULL) {
+               errorf(HERE, "continue statement not within loop");
+               statement = NULL;
+       } else {
+               statement = allocate_statement_zero(STATEMENT_CONTINUE);
+
+               statement->base.source_position = token.source_position;
+       }
+
        eat(T_continue);
        expect(';');
 
-       statement_t *statement          = allocate_ast_zero(sizeof(statement[0]));
-       statement->kind                 = STATEMENT_CONTINUE;
-       statement->base.source_position = token.source_position;
-
        return statement;
 }
 
@@ -5053,13 +5225,19 @@ static statement_t *parse_continue(void)
  */
 static statement_t *parse_break(void)
 {
+       statement_t *statement;
+       if (current_switch == NULL && current_loop == NULL) {
+               errorf(HERE, "break statement not within loop or switch");
+               statement = NULL;
+       } else {
+               statement = allocate_statement_zero(STATEMENT_BREAK);
+
+               statement->base.source_position = token.source_position;
+       }
+
        eat(T_break);
        expect(';');
 
-       statement_t *statement          = allocate_ast_zero(sizeof(statement[0]));
-       statement->kind                 = STATEMENT_BREAK;
-       statement->base.source_position = token.source_position;
-
        return statement;
 }