X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=preprocessor.c;h=570182c668e7794f5036104ccb6d7535c4dcad9b;hb=3803edc846afa85c077490e324f4f1a4211121fd;hp=012d90bd74a1c05bfbf01034d7c4ada9f0bc596f;hpb=ae52b69dfadbd0835e01f55167d979cb6a9a6305;p=cparser diff --git a/preprocessor.c b/preprocessor.c index 012d90b..570182c 100644 --- a/preprocessor.c +++ b/preprocessor.c @@ -18,17 +18,35 @@ #define MAX_PUTBACK 3 #define INCLUDE_LIMIT 199 /* 199 is for gcc "compatibility" */ +struct pp_argument_t { + size_t list_len; + token_t *token_list; +}; + struct pp_definition_t { symbol_t *symbol; source_position_t source_position; pp_definition_t *parent_expansion; size_t expand_pos; - bool is_variadic : 1; - bool is_expanding : 1; - size_t argument_count; - token_t *arguments; + bool is_variadic : 1; + bool is_expanding : 1; + bool has_parameters : 1; + size_t n_parameters; + symbol_t *parameters; + + /* replacement */ size_t list_len; - token_t *replacement_list; + token_t *token_list; + +}; + +typedef struct pp_conditional_t pp_conditional_t; +struct pp_conditional_t { + source_position_t source_position; + bool condition; + bool in_else; + bool skip; /**< conditional in skip mode (then+else gets skipped) */ + pp_conditional_t *parent; }; typedef struct pp_input_t pp_input_t; @@ -50,21 +68,23 @@ static pp_input_t *input_stack; static unsigned n_inputs; static struct obstack input_obstack; +static pp_conditional_t *conditional_stack; + token_t pp_token; static bool resolve_escape_sequences = false; static bool do_print_spaces = true; +static bool do_expansions; +static bool skip_mode; static FILE *out; static struct obstack pp_obstack; static unsigned counted_newlines; static unsigned counted_spaces; static const char *printed_input_name = NULL; static pp_definition_t *current_expansion = NULL; -static bool do_expansions; static inline void next_char(void); static void next_preprocessing_token(void); static void print_line_directive(const source_position_t *pos, const char *add); -static void print_spaces(void); static bool open_input(const char *filename) { @@ -163,7 +183,7 @@ static inline void next_real_char(void) if (input.bufpos >= input.bufend) { size_t s = fread(input.buf + MAX_PUTBACK, 1, sizeof(input.buf) - MAX_PUTBACK, input.file); - if(s == 0) { + if (s == 0) { CC = EOF; return; } @@ -294,7 +314,8 @@ static inline bool is_octal_digit(int chr) * Returns the value of a digit. * The only portable way to do it ... */ -static int digit_value(int digit) { +static int digit_value(int digit) +{ switch (digit) { case '0': return 0; case '1': return 1; @@ -719,7 +740,7 @@ restart: current_expansion = definition; goto restart; } - pp_token = definition->replacement_list[definition->expand_pos]; + pp_token = definition->token_list[definition->expand_pos]; ++definition->expand_pos; if(pp_token.type != TP_IDENTIFIER) @@ -737,6 +758,135 @@ restart: } } +static void skip_line_comment(void) +{ + if(do_print_spaces) + counted_spaces++; + + while(1) { + switch(CC) { + case EOF: + return; + + case '\n': + case '\r': + return; + + default: + next_char(); + break; + } + } +} + +static void skip_multiline_comment(void) +{ + if(do_print_spaces) + counted_spaces++; + + unsigned start_linenr = input.position.linenr; + while(1) { + switch(CC) { + case '/': + next_char(); + if (CC == '*') { + /* TODO: nested comment, warn here */ + } + break; + case '*': + next_char(); + if(CC == '/') { + next_char(); + return; + } + break; + + MATCH_NEWLINE( + if(do_print_spaces) { + counted_newlines++; + counted_spaces = 0; + } + break; + ) + + case EOF: { + source_position_t source_position; + source_position.input_name = pp_token.source_position.input_name; + source_position.linenr = start_linenr; + errorf(&source_position, "at end of file while looking for comment end"); + return; + } + + default: + next_char(); + break; + } + } +} + +/* skip spaces advancing at the start of the next preprocessing token */ +static void skip_spaces(bool skip_newline) +{ + while (true) { + switch (CC) { + case ' ': + case '\t': + if(do_print_spaces) + counted_spaces++; + next_char(); + continue; + case '/': + next_char(); + if (CC == '/') { + next_char(); + skip_line_comment(); + continue; + } else if (CC == '*') { + next_char(); + skip_multiline_comment(); + continue; + } else { + put_back(CC); + CC = '/'; + } + return; + + case '\r': + if (!skip_newline) + return; + + next_char(); + if(CC == '\n') { + next_char(); + } + ++input.position.linenr; + if (do_print_spaces) + ++counted_newlines; + continue; + + case '\n': + if (!skip_newline) + return; + + next_char(); + ++input.position.linenr; + if (do_print_spaces) + ++counted_newlines; + continue; + + default: + return; + } + } +} + +static void eat_pp(int type) +{ + (void) type; + assert(pp_token.type == type); + next_preprocessing_token(); +} + static void parse_symbol(void) { obstack_1grow(&symbol_obstack, (char) CC); @@ -760,11 +910,11 @@ end_symbol: char *string = obstack_finish(&symbol_obstack); /* might be a wide string or character constant ( L"string"/L'c' ) */ - if(CC == '"' && string[0] == 'L' && string[1] == '\0') { + if (CC == '"' && string[0] == 'L' && string[1] == '\0') { obstack_free(&symbol_obstack, string); parse_wide_string_literal(); return; - } else if(CC == '\'' && string[0] == 'L' && string[1] == '\0') { + } else if (CC == '\'' && string[0] == 'L' && string[1] == '\0') { obstack_free(&symbol_obstack, string); parse_wide_character_constant(); return; @@ -777,17 +927,34 @@ end_symbol: /* we can free the memory from symbol obstack if we already had an entry in * the symbol table */ - if(symbol->string != string) { + if (symbol->string != string) { obstack_free(&symbol_obstack, string); } + if (!do_expansions) + return; pp_definition_t *pp_definition = symbol->pp_definition; - if(do_expansions && pp_definition != NULL) { - pp_definition->expand_pos = 0; - pp_definition->is_expanding = true, - current_expansion = pp_definition; - expand_next(); + if (pp_definition == NULL) + return; + + if (pp_definition->has_parameters) { + skip_spaces(true); + /* no opening brace -> no expansion */ + if (CC != '(') + return; + next_preprocessing_token(); + eat_pp('('); + + /* parse arguments (TODO) */ + while (pp_token.type != TP_EOF && pp_token.type != ')') + next_preprocessing_token(); + next_preprocessing_token(); } + + pp_definition->expand_pos = 0; + pp_definition->is_expanding = true, + current_expansion = pp_definition; + expand_next(); } static void parse_number(void) @@ -831,67 +998,6 @@ end_number: pp_token.v.string.size = size; } -static void skip_multiline_comment(void) -{ - unsigned start_linenr = input.position.linenr; - - while(1) { - switch(CC) { - case '/': - next_char(); - if (CC == '*') { - /* TODO: nested comment, warn here */ - } - break; - case '*': - next_char(); - if(CC == '/') { - next_char(); - return; - } - break; - - MATCH_NEWLINE( - if(do_print_spaces) { - counted_newlines++; - counted_spaces = 0; - } - break; - ) - - case EOF: { - source_position_t source_position; - source_position.input_name = pp_token.source_position.input_name; - source_position.linenr = start_linenr; - errorf(&source_position, "at end of file while looking for comment end"); - return; - } - - default: - next_char(); - break; - } - } -} - -static void skip_line_comment(void) -{ - while(1) { - switch(CC) { - case EOF: - return; - - case '\n': - case '\r': - return; - - default: - next_char(); - break; - } - } -} - #define MAYBE_PROLOG \ @@ -1016,14 +1122,10 @@ restart: case '*': next_char(); skip_multiline_comment(); - if(do_print_spaces) - counted_spaces++; goto restart; case '/': next_char(); skip_line_comment(); - if(do_print_spaces) - counted_spaces++; goto restart; ELSE('/') case '%': @@ -1149,7 +1251,7 @@ static void print_quoted_string(const char *const string) static void print_line_directive(const source_position_t *pos, const char *add) { - fprintf(out, "# %d ", pos->linenr); + fprintf(out, "# %u ", pos->linenr); print_quoted_string(pos->input_name); if (add != NULL) { fputc(' ', out); @@ -1180,6 +1282,9 @@ static void print_spaces(void) static void emit_pp_token(void) { + if (skip_mode) + return; + if (pp_token.type != '\n') { print_spaces(); input.had_non_space = true; @@ -1205,13 +1310,6 @@ static void emit_pp_token(void) } } -static void eat_pp(preprocessor_token_type_t type) -{ - (void) type; - assert(pp_token.type == type); - next_preprocessing_token(); -} - static void eat_pp_directive(void) { while(pp_token.type != '\n' && pp_token.type != TP_EOF) { @@ -1282,8 +1380,8 @@ static bool pp_definitions_equal(const pp_definition_t *definition1, return false; size_t len = definition1->list_len; - const token_t *t1 = definition1->replacement_list; - const token_t *t2 = definition2->replacement_list; + const token_t *t1 = definition1->token_list; + const token_t *t2 = definition2->token_list; for(size_t i = 0; i < len; ++i, ++t1, ++t2) { if(!pp_tokens_equal(t1, t2)) return false; @@ -1294,12 +1392,12 @@ static bool pp_definitions_equal(const pp_definition_t *definition1, static void parse_define_directive(void) { eat_pp(TP_define); + assert(obstack_object_size(&pp_obstack) == 0); - if(pp_token.type != TP_IDENTIFIER) { + if (pp_token.type != TP_IDENTIFIER) { errorf(&pp_token.source_position, - "expected identifier after #define, got '%T'", &pp_token); - eat_pp_directive(); - return; + "expected identifier after #define, got '%t'", &pp_token); + goto error_out; } symbol_t *symbol = pp_token.v.symbol; @@ -1311,22 +1409,55 @@ static void parse_define_directive(void) /* this is probably the only place where spaces are significant in the * lexer (except for the fact that they separate tokens). #define b(x) * is something else than #define b (x) */ - //token_t *arguments = NULL; - if(CC == '(') { + if (CC == '(') { + /* eat the '(' */ + next_preprocessing_token(); + /* get next token after '(' */ next_preprocessing_token(); - while(pp_token.type != ')') { - if(pp_token.type == TP_DOTDOTDOT) { + + while (true) { + switch (pp_token.type) { + case TP_DOTDOTDOT: new_definition->is_variadic = true; next_preprocessing_token(); - if(pp_token.type != ')') { + if (pp_token.type != ')') { errorf(&input.position, "'...' not at end of macro argument list"); - continue; + goto error_out; } - } else if(pp_token.type != TP_IDENTIFIER) { + break; + case TP_IDENTIFIER: + obstack_ptr_grow(&pp_obstack, pp_token.v.symbol); + next_preprocessing_token(); + + if (pp_token.type == ',') { + next_preprocessing_token(); + break; + } + + if (pp_token.type != ')') { + errorf(&pp_token.source_position, + "expected ',' or ')' after identifier, got '%t'", + &pp_token); + goto error_out; + } + break; + case ')': next_preprocessing_token(); + goto finish_argument_list; + default: + errorf(&pp_token.source_position, + "expected identifier, '...' or ')' in #define argument list, got '%t'", + &pp_token); + goto error_out; } } + + finish_argument_list: + new_definition->has_parameters = true; + new_definition->n_parameters + = obstack_object_size(&pp_obstack) / sizeof(new_definition->parameters[0]); + new_definition->parameters = obstack_finish(&pp_obstack); } else { next_preprocessing_token(); } @@ -1334,18 +1465,18 @@ static void parse_define_directive(void) /* construct a new pp_definition on the obstack */ assert(obstack_object_size(&pp_obstack) == 0); size_t list_len = 0; - while(pp_token.type != '\n' && pp_token.type != TP_EOF) { + while (pp_token.type != '\n' && pp_token.type != TP_EOF) { obstack_grow(&pp_obstack, &pp_token, sizeof(pp_token)); ++list_len; next_preprocessing_token(); } - new_definition->list_len = list_len; - new_definition->replacement_list = obstack_finish(&pp_obstack); + new_definition->list_len = list_len; + new_definition->token_list = obstack_finish(&pp_obstack); pp_definition_t *old_definition = symbol->pp_definition; - if(old_definition != NULL) { - if(!pp_definitions_equal(old_definition, new_definition)) { + if (old_definition != NULL) { + if (!pp_definitions_equal(old_definition, new_definition)) { warningf(&input.position, "multiple definition of macro '%Y' (first defined %P)", symbol, &old_definition->source_position); } else { @@ -1356,6 +1487,14 @@ static void parse_define_directive(void) } symbol->pp_definition = new_definition; + return; + +error_out: + if (obstack_object_size(&pp_obstack) > 0) { + char *ptr = obstack_finish(&pp_obstack); + obstack_free(&pp_obstack, ptr); + } + eat_pp_directive(); } static void parse_undef_directive(void) @@ -1364,7 +1503,7 @@ static void parse_undef_directive(void) if(pp_token.type != TP_IDENTIFIER) { errorf(&input.position, - "expected identifier after #undef, got '%T'", &pp_token); + "expected identifier after #undef, got '%t'", &pp_token); eat_pp_directive(); return; } @@ -1382,16 +1521,17 @@ static void parse_undef_directive(void) static const char *parse_headername(void) { - /* behind an #include we can have the special headername lexems, check - * for them here */ + /* behind an #include we can have the special headername lexems. + * They're only allowed behind an #include so they're not recognized + * by the normal next_preprocessing_token. We handle them as a special + * exception here */ - /* skip spaces */ - while (CC == ' ' || CC == '\t') { - next_char(); - } + /* skip spaces so we reach start of next preprocessing token */ + skip_spaces(false); assert(obstack_object_size(&input_obstack) == 0); + /* check wether we have a "... or <... headername */ switch (CC) { case '<': /* for now until we have proper searchpath handling */ @@ -1455,7 +1595,7 @@ finished_headername: return headername; } -static void parse_include_directive(void) +static bool parse_include_directive(void) { /* don't eat the TP_include here! * we need an alternative parsing for the next token */ @@ -1465,7 +1605,7 @@ static void parse_include_directive(void) const char *headername = parse_headername(); if (headername == NULL) { eat_pp_directive(); - return; + return false; } if (pp_token.type != '\n' && pp_token.type != TP_EOF) { @@ -1478,9 +1618,16 @@ static void parse_include_directive(void) errorf(&pp_token.source_position, "#include nested too deeply"); /* eat \n or EOF */ next_preprocessing_token(); - return; + return false; } + /* we have to reenable space counting and macro expansion here, + * because it is still disabled in directive parsing, + * but we will trigger a preprocessing token reading of the new file + * now and need expansions/space counting */ + do_print_spaces = true; + do_expansions = true; + /* switch inputs */ push_input(); bool res = open_input(headername); @@ -1488,8 +1635,145 @@ static void parse_include_directive(void) errorf(&pp_token.source_position, "failed including '%s': %s", headername, strerror(errno)); pop_restore_input(); + return false; + } + + return true; +} + +static pp_conditional_t *push_conditional(void) +{ + pp_conditional_t *conditional + = obstack_alloc(&pp_obstack, sizeof(*conditional)); + memset(conditional, 0, sizeof(*conditional)); + + conditional->parent = conditional_stack; + conditional_stack = conditional; + + return conditional; +} + +static void pop_conditional(void) +{ + assert(conditional_stack != NULL); + conditional_stack = conditional_stack->parent; +} + +static void check_unclosed_conditionals(void) +{ + while (conditional_stack != NULL) { + pp_conditional_t *conditional = conditional_stack; + + if (conditional->in_else) { + errorf(&conditional->source_position, "unterminated #else"); + } else { + errorf(&conditional->source_position, "unterminated condition"); + } + pop_conditional(); + } +} + +static void parse_ifdef_ifndef_directive(void) +{ + bool is_ifndef = (pp_token.type == TP_ifndef); + bool condition; + next_preprocessing_token(); + + if (skip_mode) { + eat_pp_directive(); + pp_conditional_t *conditional = push_conditional(); + conditional->source_position = pp_token.source_position; + conditional->skip = true; + return; + } + + if (pp_token.type != TP_IDENTIFIER) { + errorf(&pp_token.source_position, + "expected identifier after #%s, got '%t'", + is_ifndef ? "ifndef" : "ifdef", &pp_token); + eat_pp_directive(); + + /* just take the true case in the hope to avoid further errors */ + condition = true; + } else { + symbol_t *symbol = pp_token.v.symbol; + pp_definition_t *pp_definition = symbol->pp_definition; + next_preprocessing_token(); + + if (pp_token.type != '\n') { + errorf(&pp_token.source_position, + "extra tokens at end of #%s", + is_ifndef ? "ifndef" : "ifdef"); + eat_pp_directive(); + } + + /* evaluate wether we are in true or false case */ + condition = is_ifndef ? pp_definition == NULL : pp_definition != NULL; + } + + pp_conditional_t *conditional = push_conditional(); + conditional->source_position = pp_token.source_position; + conditional->condition = condition; + + if (!condition) { + skip_mode = true; + } +} + +static void parse_else_directive(void) +{ + eat_pp(TP_else); + + if (pp_token.type != '\n') { + if (!skip_mode) { + warningf(&pp_token.source_position, "extra tokens at end of #else"); + } + eat_pp_directive(); + } + + pp_conditional_t *conditional = conditional_stack; + if (conditional == NULL) { + errorf(&pp_token.source_position, "#else without prior #if"); + return; + } + + if (conditional->in_else) { + errorf(&pp_token.source_position, + "#else after #else (condition started %P)", + conditional->source_position); + skip_mode = true; + return; + } + + conditional->in_else = true; + if (!conditional->skip) { + skip_mode = conditional->condition; + } + conditional->source_position = pp_token.source_position; +} + +static void parse_endif_directive(void) +{ + eat_pp(TP_endif); + + if (pp_token.type != '\n') { + if (!skip_mode) { + warningf(&pp_token.source_position, + "extra tokens at end of #endif"); + } + eat_pp_directive(); + } + + pp_conditional_t *conditional = conditional_stack; + if (conditional == NULL) { + errorf(&pp_token.source_position, "#endif without prior #if"); return; } + + if (!conditional->skip) { + skip_mode = false; + } + pop_conditional(); } static void parse_preprocessing_directive(void) @@ -1498,24 +1782,56 @@ static void parse_preprocessing_directive(void) do_expansions = false; eat_pp('#'); - switch(pp_token.type) { - case TP_define: - parse_define_directive(); - break; - case TP_undef: - parse_undef_directive(); - break; - case TP_include: - parse_include_directive(); - /* no need to parse ending '\n' */ - do_print_spaces = true; - do_expansions = true; - return; - default: - errorf(&pp_token.source_position, - "invalid preprocessing directive #%T", &pp_token); - eat_pp_directive(); - break; + if (skip_mode) { + switch(pp_token.type) { + case TP_ifdef: + case TP_ifndef: + parse_ifdef_ifndef_directive(); + break; + case TP_else: + parse_else_directive(); + break; + case TP_endif: + parse_endif_directive(); + break; + default: + eat_pp_directive(); + break; + } + } else { + switch(pp_token.type) { + case TP_define: + parse_define_directive(); + break; + case TP_undef: + parse_undef_directive(); + break; + case TP_ifdef: + case TP_ifndef: + parse_ifdef_ifndef_directive(); + break; + case TP_else: + parse_else_directive(); + break; + case TP_endif: + parse_endif_directive(); + break; + case TP_include: { + bool in_new_source = parse_include_directive(); + /* no need to do anything if source file switched */ + if (in_new_source) + return; + break; + } + case '\n': + /* the nop directive */ + break; + default: + errorf(&pp_token.source_position, + "invalid preprocessing directive #%t", &pp_token); + eat_pp_directive(); + break; + } } do_print_spaces = true; @@ -1571,6 +1887,7 @@ int pptest_main(int argc, char **argv) } end_of_main_loop: + check_unclosed_conditionals(); close_input(); obstack_free(&input_obstack, NULL);