X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=preprocessor.c;h=570182c668e7794f5036104ccb6d7535c4dcad9b;hb=f109471af0f1a95e840a66c9e136409d0973021c;hp=f47b28be6eac5228798ff85672d0670cb98d041c;hpb=7730baff98a8ac2975b9550291ee78f19e14cd7e;p=cparser diff --git a/preprocessor.c b/preprocessor.c index f47b28b..570182c 100644 --- a/preprocessor.c +++ b/preprocessor.c @@ -18,17 +18,26 @@ #define MAX_PUTBACK 3 #define INCLUDE_LIMIT 199 /* 199 is for gcc "compatibility" */ +struct pp_argument_t { + size_t list_len; + token_t *token_list; +}; + struct pp_definition_t { symbol_t *symbol; source_position_t source_position; pp_definition_t *parent_expansion; size_t expand_pos; - bool is_variadic : 1; - bool is_expanding : 1; - size_t argument_count; - token_t *arguments; + bool is_variadic : 1; + bool is_expanding : 1; + bool has_parameters : 1; + size_t n_parameters; + symbol_t *parameters; + + /* replacement */ size_t list_len; - token_t *replacement_list; + token_t *token_list; + }; typedef struct pp_conditional_t pp_conditional_t; @@ -76,7 +85,6 @@ static pp_definition_t *current_expansion = NULL; static inline void next_char(void); static void next_preprocessing_token(void); static void print_line_directive(const source_position_t *pos, const char *add); -static void print_spaces(void); static bool open_input(const char *filename) { @@ -175,7 +183,7 @@ static inline void next_real_char(void) if (input.bufpos >= input.bufend) { size_t s = fread(input.buf + MAX_PUTBACK, 1, sizeof(input.buf) - MAX_PUTBACK, input.file); - if(s == 0) { + if (s == 0) { CC = EOF; return; } @@ -306,7 +314,8 @@ static inline bool is_octal_digit(int chr) * Returns the value of a digit. * The only portable way to do it ... */ -static int digit_value(int digit) { +static int digit_value(int digit) +{ switch (digit) { case '0': return 0; case '1': return 1; @@ -731,7 +740,7 @@ restart: current_expansion = definition; goto restart; } - pp_token = definition->replacement_list[definition->expand_pos]; + pp_token = definition->token_list[definition->expand_pos]; ++definition->expand_pos; if(pp_token.type != TP_IDENTIFIER) @@ -749,6 +758,135 @@ restart: } } +static void skip_line_comment(void) +{ + if(do_print_spaces) + counted_spaces++; + + while(1) { + switch(CC) { + case EOF: + return; + + case '\n': + case '\r': + return; + + default: + next_char(); + break; + } + } +} + +static void skip_multiline_comment(void) +{ + if(do_print_spaces) + counted_spaces++; + + unsigned start_linenr = input.position.linenr; + while(1) { + switch(CC) { + case '/': + next_char(); + if (CC == '*') { + /* TODO: nested comment, warn here */ + } + break; + case '*': + next_char(); + if(CC == '/') { + next_char(); + return; + } + break; + + MATCH_NEWLINE( + if(do_print_spaces) { + counted_newlines++; + counted_spaces = 0; + } + break; + ) + + case EOF: { + source_position_t source_position; + source_position.input_name = pp_token.source_position.input_name; + source_position.linenr = start_linenr; + errorf(&source_position, "at end of file while looking for comment end"); + return; + } + + default: + next_char(); + break; + } + } +} + +/* skip spaces advancing at the start of the next preprocessing token */ +static void skip_spaces(bool skip_newline) +{ + while (true) { + switch (CC) { + case ' ': + case '\t': + if(do_print_spaces) + counted_spaces++; + next_char(); + continue; + case '/': + next_char(); + if (CC == '/') { + next_char(); + skip_line_comment(); + continue; + } else if (CC == '*') { + next_char(); + skip_multiline_comment(); + continue; + } else { + put_back(CC); + CC = '/'; + } + return; + + case '\r': + if (!skip_newline) + return; + + next_char(); + if(CC == '\n') { + next_char(); + } + ++input.position.linenr; + if (do_print_spaces) + ++counted_newlines; + continue; + + case '\n': + if (!skip_newline) + return; + + next_char(); + ++input.position.linenr; + if (do_print_spaces) + ++counted_newlines; + continue; + + default: + return; + } + } +} + +static void eat_pp(int type) +{ + (void) type; + assert(pp_token.type == type); + next_preprocessing_token(); +} + static void parse_symbol(void) { obstack_1grow(&symbol_obstack, (char) CC); @@ -772,11 +910,11 @@ end_symbol: char *string = obstack_finish(&symbol_obstack); /* might be a wide string or character constant ( L"string"/L'c' ) */ - if(CC == '"' && string[0] == 'L' && string[1] == '\0') { + if (CC == '"' && string[0] == 'L' && string[1] == '\0') { obstack_free(&symbol_obstack, string); parse_wide_string_literal(); return; - } else if(CC == '\'' && string[0] == 'L' && string[1] == '\0') { + } else if (CC == '\'' && string[0] == 'L' && string[1] == '\0') { obstack_free(&symbol_obstack, string); parse_wide_character_constant(); return; @@ -789,17 +927,34 @@ end_symbol: /* we can free the memory from symbol obstack if we already had an entry in * the symbol table */ - if(symbol->string != string) { + if (symbol->string != string) { obstack_free(&symbol_obstack, string); } + if (!do_expansions) + return; pp_definition_t *pp_definition = symbol->pp_definition; - if(do_expansions && pp_definition != NULL) { - pp_definition->expand_pos = 0; - pp_definition->is_expanding = true, - current_expansion = pp_definition; - expand_next(); + if (pp_definition == NULL) + return; + + if (pp_definition->has_parameters) { + skip_spaces(true); + /* no opening brace -> no expansion */ + if (CC != '(') + return; + next_preprocessing_token(); + eat_pp('('); + + /* parse arguments (TODO) */ + while (pp_token.type != TP_EOF && pp_token.type != ')') + next_preprocessing_token(); + next_preprocessing_token(); } + + pp_definition->expand_pos = 0; + pp_definition->is_expanding = true, + current_expansion = pp_definition; + expand_next(); } static void parse_number(void) @@ -843,67 +998,6 @@ end_number: pp_token.v.string.size = size; } -static void skip_multiline_comment(void) -{ - unsigned start_linenr = input.position.linenr; - - while(1) { - switch(CC) { - case '/': - next_char(); - if (CC == '*') { - /* TODO: nested comment, warn here */ - } - break; - case '*': - next_char(); - if(CC == '/') { - next_char(); - return; - } - break; - - MATCH_NEWLINE( - if(do_print_spaces) { - counted_newlines++; - counted_spaces = 0; - } - break; - ) - - case EOF: { - source_position_t source_position; - source_position.input_name = pp_token.source_position.input_name; - source_position.linenr = start_linenr; - errorf(&source_position, "at end of file while looking for comment end"); - return; - } - - default: - next_char(); - break; - } - } -} - -static void skip_line_comment(void) -{ - while(1) { - switch(CC) { - case EOF: - return; - - case '\n': - case '\r': - return; - - default: - next_char(); - break; - } - } -} - #define MAYBE_PROLOG \ @@ -1028,14 +1122,10 @@ restart: case '*': next_char(); skip_multiline_comment(); - if(do_print_spaces) - counted_spaces++; goto restart; case '/': next_char(); skip_line_comment(); - if(do_print_spaces) - counted_spaces++; goto restart; ELSE('/') case '%': @@ -1161,7 +1251,7 @@ static void print_quoted_string(const char *const string) static void print_line_directive(const source_position_t *pos, const char *add) { - fprintf(out, "# %d ", pos->linenr); + fprintf(out, "# %u ", pos->linenr); print_quoted_string(pos->input_name); if (add != NULL) { fputc(' ', out); @@ -1220,13 +1310,6 @@ static void emit_pp_token(void) } } -static void eat_pp(preprocessor_token_type_t type) -{ - (void) type; - assert(pp_token.type == type); - next_preprocessing_token(); -} - static void eat_pp_directive(void) { while(pp_token.type != '\n' && pp_token.type != TP_EOF) { @@ -1297,8 +1380,8 @@ static bool pp_definitions_equal(const pp_definition_t *definition1, return false; size_t len = definition1->list_len; - const token_t *t1 = definition1->replacement_list; - const token_t *t2 = definition2->replacement_list; + const token_t *t1 = definition1->token_list; + const token_t *t2 = definition2->token_list; for(size_t i = 0; i < len; ++i, ++t1, ++t2) { if(!pp_tokens_equal(t1, t2)) return false; @@ -1309,12 +1392,12 @@ static bool pp_definitions_equal(const pp_definition_t *definition1, static void parse_define_directive(void) { eat_pp(TP_define); + assert(obstack_object_size(&pp_obstack) == 0); - if(pp_token.type != TP_IDENTIFIER) { + if (pp_token.type != TP_IDENTIFIER) { errorf(&pp_token.source_position, - "expected identifier after #define, got '%T'", &pp_token); - eat_pp_directive(); - return; + "expected identifier after #define, got '%t'", &pp_token); + goto error_out; } symbol_t *symbol = pp_token.v.symbol; @@ -1326,22 +1409,55 @@ static void parse_define_directive(void) /* this is probably the only place where spaces are significant in the * lexer (except for the fact that they separate tokens). #define b(x) * is something else than #define b (x) */ - //token_t *arguments = NULL; - if(CC == '(') { + if (CC == '(') { + /* eat the '(' */ + next_preprocessing_token(); + /* get next token after '(' */ next_preprocessing_token(); - while(pp_token.type != ')') { - if(pp_token.type == TP_DOTDOTDOT) { + + while (true) { + switch (pp_token.type) { + case TP_DOTDOTDOT: new_definition->is_variadic = true; next_preprocessing_token(); - if(pp_token.type != ')') { + if (pp_token.type != ')') { errorf(&input.position, "'...' not at end of macro argument list"); - continue; + goto error_out; + } + break; + case TP_IDENTIFIER: + obstack_ptr_grow(&pp_obstack, pp_token.v.symbol); + next_preprocessing_token(); + + if (pp_token.type == ',') { + next_preprocessing_token(); + break; } - } else if(pp_token.type != TP_IDENTIFIER) { + + if (pp_token.type != ')') { + errorf(&pp_token.source_position, + "expected ',' or ')' after identifier, got '%t'", + &pp_token); + goto error_out; + } + break; + case ')': next_preprocessing_token(); + goto finish_argument_list; + default: + errorf(&pp_token.source_position, + "expected identifier, '...' or ')' in #define argument list, got '%t'", + &pp_token); + goto error_out; } } + + finish_argument_list: + new_definition->has_parameters = true; + new_definition->n_parameters + = obstack_object_size(&pp_obstack) / sizeof(new_definition->parameters[0]); + new_definition->parameters = obstack_finish(&pp_obstack); } else { next_preprocessing_token(); } @@ -1349,18 +1465,18 @@ static void parse_define_directive(void) /* construct a new pp_definition on the obstack */ assert(obstack_object_size(&pp_obstack) == 0); size_t list_len = 0; - while(pp_token.type != '\n' && pp_token.type != TP_EOF) { + while (pp_token.type != '\n' && pp_token.type != TP_EOF) { obstack_grow(&pp_obstack, &pp_token, sizeof(pp_token)); ++list_len; next_preprocessing_token(); } - new_definition->list_len = list_len; - new_definition->replacement_list = obstack_finish(&pp_obstack); + new_definition->list_len = list_len; + new_definition->token_list = obstack_finish(&pp_obstack); pp_definition_t *old_definition = symbol->pp_definition; - if(old_definition != NULL) { - if(!pp_definitions_equal(old_definition, new_definition)) { + if (old_definition != NULL) { + if (!pp_definitions_equal(old_definition, new_definition)) { warningf(&input.position, "multiple definition of macro '%Y' (first defined %P)", symbol, &old_definition->source_position); } else { @@ -1371,6 +1487,14 @@ static void parse_define_directive(void) } symbol->pp_definition = new_definition; + return; + +error_out: + if (obstack_object_size(&pp_obstack) > 0) { + char *ptr = obstack_finish(&pp_obstack); + obstack_free(&pp_obstack, ptr); + } + eat_pp_directive(); } static void parse_undef_directive(void) @@ -1379,7 +1503,7 @@ static void parse_undef_directive(void) if(pp_token.type != TP_IDENTIFIER) { errorf(&input.position, - "expected identifier after #undef, got '%T'", &pp_token); + "expected identifier after #undef, got '%t'", &pp_token); eat_pp_directive(); return; } @@ -1397,16 +1521,17 @@ static void parse_undef_directive(void) static const char *parse_headername(void) { - /* behind an #include we can have the special headername lexems, check - * for them here */ + /* behind an #include we can have the special headername lexems. + * They're only allowed behind an #include so they're not recognized + * by the normal next_preprocessing_token. We handle them as a special + * exception here */ - /* skip spaces */ - while (CC == ' ' || CC == '\t') { - next_char(); - } + /* skip spaces so we reach start of next preprocessing token */ + skip_spaces(false); assert(obstack_object_size(&input_obstack) == 0); + /* check wether we have a "... or <... headername */ switch (CC) { case '<': /* for now until we have proper searchpath handling */ @@ -1470,7 +1595,7 @@ finished_headername: return headername; } -static void parse_include_directive(void) +static bool parse_include_directive(void) { /* don't eat the TP_include here! * we need an alternative parsing for the next token */ @@ -1480,7 +1605,7 @@ static void parse_include_directive(void) const char *headername = parse_headername(); if (headername == NULL) { eat_pp_directive(); - return; + return false; } if (pp_token.type != '\n' && pp_token.type != TP_EOF) { @@ -1493,9 +1618,16 @@ static void parse_include_directive(void) errorf(&pp_token.source_position, "#include nested too deeply"); /* eat \n or EOF */ next_preprocessing_token(); - return; + return false; } + /* we have to reenable space counting and macro expansion here, + * because it is still disabled in directive parsing, + * but we will trigger a preprocessing token reading of the new file + * now and need expansions/space counting */ + do_print_spaces = true; + do_expansions = true; + /* switch inputs */ push_input(); bool res = open_input(headername); @@ -1503,8 +1635,10 @@ static void parse_include_directive(void) errorf(&pp_token.source_position, "failed including '%s': %s", headername, strerror(errno)); pop_restore_input(); - return; + return false; } + + return true; } static pp_conditional_t *push_conditional(void) @@ -1555,7 +1689,7 @@ static void parse_ifdef_ifndef_directive(void) if (pp_token.type != TP_IDENTIFIER) { errorf(&pp_token.source_position, - "expected identifier after #%s, got '%T'", + "expected identifier after #%s, got '%t'", is_ifndef ? "ifndef" : "ifdef", &pp_token); eat_pp_directive(); @@ -1682,15 +1816,19 @@ static void parse_preprocessing_directive(void) case TP_endif: parse_endif_directive(); break; - case TP_include: - parse_include_directive(); - /* no need to parse ending '\n' */ - do_print_spaces = true; - do_expansions = true; - return; + case TP_include: { + bool in_new_source = parse_include_directive(); + /* no need to do anything if source file switched */ + if (in_new_source) + return; + break; + } + case '\n': + /* the nop directive */ + break; default: errorf(&pp_token.source_position, - "invalid preprocessing directive #%T", &pp_token); + "invalid preprocessing directive #%t", &pp_token); eat_pp_directive(); break; }