X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=preprocessor.c;h=47275a1b91899ea99543aac8fe900629adec2511;hb=3c9d1105cdebe8a419b5e8a2b53e52ef4fa386a0;hp=3829329242d79061e8041e78d24efe954b0a1659;hpb=2fb66fd8bd2a5956ab2cad26978ccfb7e105d45f;p=cparser diff --git a/preprocessor.c b/preprocessor.c index 3829329..47275a1 100644 --- a/preprocessor.c +++ b/preprocessor.c @@ -90,7 +90,7 @@ static pp_conditional_t *conditional_stack; static token_t pp_token; static bool resolve_escape_sequences = false; -static bool ignore_unknown_chars = true; +static bool error_on_unknown_chars = true; static bool skip_mode; static FILE *out; static struct obstack pp_obstack; @@ -99,7 +99,7 @@ static const char *printed_input_name = NULL; static source_position_t expansion_pos; static pp_definition_t *current_expansion = NULL; static strset_t stringset; -static preprocessor_token_kind_t last_token; +static token_kind_t last_token; static searchpath_entry_t *searchpath; @@ -460,7 +460,9 @@ static string_t sym_make_string(string_encoding_t const enc) return (string_t){ result, len, enc }; } -static void parse_string(utf32 const delimiter, preprocessor_token_kind_t const kind, string_encoding_t const enc, char const *const context) +static void parse_string(utf32 const delimiter, token_kind_t const kind, + string_encoding_t const enc, + char const *const context) { const unsigned start_linenr = input.position.lineno; @@ -477,7 +479,7 @@ static void parse_string(utf32 const delimiter, preprocessor_token_kind_t const } obstack_1grow(&symbol_obstack, tc); } else { - obstack_grow_symbol(&symbol_obstack, tc); + obstack_grow_utf8(&symbol_obstack, tc); } } else { obstack_1grow(&symbol_obstack, (char)input.c); @@ -505,7 +507,7 @@ static void parse_string(utf32 const delimiter, preprocessor_token_kind_t const next_char(); goto end_of_string; } else { - obstack_grow_symbol(&symbol_obstack, input.c); + obstack_grow_utf8(&symbol_obstack, input.c); next_char(); break; } @@ -513,25 +515,25 @@ static void parse_string(utf32 const delimiter, preprocessor_token_kind_t const } end_of_string: - pp_token.kind = kind; - pp_token.string.string = sym_make_string(enc); + pp_token.kind = kind; + pp_token.literal.string = sym_make_string(enc); } static void parse_string_literal(string_encoding_t const enc) { - parse_string('"', TP_STRING_LITERAL, enc, "string literal"); + parse_string('"', T_STRING_LITERAL, enc, "string literal"); } static void parse_character_constant(string_encoding_t const enc) { - parse_string('\'', TP_CHARACTER_CONSTANT, enc, "character constant"); - if (pp_token.string.string.size == 0) { + parse_string('\'', T_CHARACTER_CONSTANT, enc, "character constant"); + if (pp_token.literal.string.size == 0) { parse_error("empty character constant"); } } -#define SYMBOL_CHARS_WITHOUT_E_P \ - case 'a': \ +#define SYMBOL_CASES_WITHOUT_E_P \ + 'a': \ case 'b': \ case 'c': \ case 'd': \ @@ -579,17 +581,17 @@ static void parse_character_constant(string_encoding_t const enc) case 'X': \ case 'Y': \ case 'Z': \ - case '_': + case '_' -#define SYMBOL_CHARS \ - SYMBOL_CHARS_WITHOUT_E_P \ +#define SYMBOL_CASES \ + SYMBOL_CASES_WITHOUT_E_P: \ case 'e': \ case 'p': \ case 'E': \ - case 'P': + case 'P' -#define DIGITS \ - case '0': \ +#define DIGIT_CASES \ + '0': \ case '1': \ case '2': \ case '3': \ @@ -598,7 +600,7 @@ static void parse_character_constant(string_encoding_t const enc) case '6': \ case '7': \ case '8': \ - case '9': + case '9' /** * returns next final token from a preprocessor macro expansion @@ -632,7 +634,7 @@ restart: pp_token.base.source_position = expansion_pos; ++definition->expand_pos; - if (pp_token.kind != TP_IDENTIFIER) + if (pp_token.kind != T_IDENTIFIER) return; /* if it was an identifier then we might need to expand again */ @@ -738,10 +740,17 @@ static void skip_whitespace(void) } } -static void eat_pp(preprocessor_token_kind_t const type) +static inline void eat_pp(pp_token_kind_t const kind) { - (void) type; - assert(pp_token.kind == type); + assert(pp_token.base.symbol->pp_ID == kind); + (void) kind; + next_preprocessing_token(); +} + +static inline void eat_token(token_kind_t const kind) +{ + assert(pp_token.kind == kind); + (void)kind; next_preprocessing_token(); } @@ -752,8 +761,8 @@ static void parse_symbol(void) while (true) { switch (input.c) { - DIGITS - SYMBOL_CHARS + case DIGIT_CASES: + case SYMBOL_CASES: obstack_1grow(&symbol_obstack, (char) input.c); next_char(); break; @@ -780,7 +789,7 @@ end_symbol: symbol_t *symbol = symbol_table_insert(string); - pp_token.kind = symbol->pp_ID; + pp_token.kind = symbol->ID; pp_token.base.symbol = symbol; /* we can free the memory from symbol obstack if we already had an entry in @@ -798,8 +807,8 @@ static void parse_number(void) while (true) { switch (input.c) { case '.': - DIGITS - SYMBOL_CHARS_WITHOUT_E_P + case DIGIT_CASES: + case SYMBOL_CASES_WITHOUT_E_P: obstack_1grow(&symbol_obstack, (char) input.c); next_char(); break; @@ -822,8 +831,8 @@ static void parse_number(void) } end_number: - pp_token.kind = TP_NUMBER; - pp_token.number.number = sym_make_string(STRING_ENCODING_CHAR); + pp_token.kind = T_NUMBER; + pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR); } @@ -876,11 +885,11 @@ restart: info.had_whitespace = true; goto restart; - SYMBOL_CHARS + case SYMBOL_CASES: parse_symbol(); return; - DIGITS + case DIGIT_CASES: parse_number(); return; @@ -911,7 +920,7 @@ restart: case '.': MAYBE_PROLOG - MAYBE('.', TP_DOTDOTDOT) + MAYBE('.', T_DOTDOTDOT) ELSE_CODE( put_back(input.c); input.c = '.'; @@ -920,31 +929,31 @@ restart: ELSE('.') case '&': MAYBE_PROLOG - MAYBE('&', TP_ANDAND) - MAYBE('=', TP_ANDEQUAL) + MAYBE('&', T_ANDAND) + MAYBE('=', T_ANDEQUAL) ELSE('&') case '*': MAYBE_PROLOG - MAYBE('=', TP_ASTERISKEQUAL) + MAYBE('=', T_ASTERISKEQUAL) ELSE('*') case '+': MAYBE_PROLOG - MAYBE('+', TP_PLUSPLUS) - MAYBE('=', TP_PLUSEQUAL) + MAYBE('+', T_PLUSPLUS) + MAYBE('=', T_PLUSEQUAL) ELSE('+') case '-': MAYBE_PROLOG - MAYBE('>', TP_MINUSGREATER) - MAYBE('-', TP_MINUSMINUS) - MAYBE('=', TP_MINUSEQUAL) + MAYBE('>', T_MINUSGREATER) + MAYBE('-', T_MINUSMINUS) + MAYBE('=', T_MINUSEQUAL) ELSE('-') case '!': MAYBE_PROLOG - MAYBE('=', TP_EXCLAMATIONMARKEQUAL) + MAYBE('=', T_EXCLAMATIONMARKEQUAL) ELSE('!') case '/': MAYBE_PROLOG - MAYBE('=', TP_SLASHEQUAL) + MAYBE('=', T_SLASHEQUAL) case '*': next_char(); info.had_whitespace = true; @@ -959,12 +968,12 @@ restart: case '%': MAYBE_PROLOG MAYBE('>', '}') - MAYBE('=', TP_PERCENTEQUAL) + MAYBE('=', T_PERCENTEQUAL) case ':': MAYBE_PROLOG case '%': MAYBE_PROLOG - MAYBE(':', TP_HASHHASH) + MAYBE(':', T_HASHHASH) ELSE_CODE( put_back(input.c); input.c = '%'; @@ -976,28 +985,28 @@ restart: MAYBE_PROLOG MAYBE(':', '[') MAYBE('%', '{') - MAYBE('=', TP_LESSEQUAL) + MAYBE('=', T_LESSEQUAL) case '<': MAYBE_PROLOG - MAYBE('=', TP_LESSLESSEQUAL) - ELSE(TP_LESSLESS) + MAYBE('=', T_LESSLESSEQUAL) + ELSE(T_LESSLESS) ELSE('<') case '>': MAYBE_PROLOG - MAYBE('=', TP_GREATEREQUAL) + MAYBE('=', T_GREATEREQUAL) case '>': MAYBE_PROLOG - MAYBE('=', TP_GREATERGREATEREQUAL) - ELSE(TP_GREATERGREATER) + MAYBE('=', T_GREATERGREATEREQUAL) + ELSE(T_GREATERGREATER) ELSE('>') case '^': MAYBE_PROLOG - MAYBE('=', TP_CARETEQUAL) + MAYBE('=', T_CARETEQUAL) ELSE('^') case '|': MAYBE_PROLOG - MAYBE('=', TP_PIPEEQUAL) - MAYBE('|', TP_PIPEPIPE) + MAYBE('=', T_PIPEEQUAL) + MAYBE('|', T_PIPEPIPE) ELSE('|') case ':': MAYBE_PROLOG @@ -1005,11 +1014,11 @@ restart: ELSE(':') case '=': MAYBE_PROLOG - MAYBE('=', TP_EQUALEQUAL) + MAYBE('=', T_EQUALEQUAL) ELSE('=') case '#': MAYBE_PROLOG - MAYBE('#', TP_HASHHASH) + MAYBE('#', T_HASHHASH) ELSE_CODE( pp_token.kind = '#'; ) @@ -1024,7 +1033,6 @@ restart: case '~': case ';': case ',': - case '\\': pp_token.kind = input.c; next_char(); return; @@ -1039,18 +1047,28 @@ restart: } else { pp_token.base.source_position.lineno++; info.at_line_begin = true; - pp_token.kind = TP_EOF; + pp_token.kind = T_EOF; } return; default: - next_char(); - if (!ignore_unknown_chars) { + if (error_on_unknown_chars) { errorf(&pp_token.base.source_position, - "unknown character '%c' found\n", input.c); + "unknown character '%lc' found\n", input.c); + next_char(); goto restart; } else { - pp_token.kind = input.c; + assert(obstack_object_size(&symbol_obstack) == 0); + obstack_grow_utf8(&symbol_obstack, input.c); + obstack_1grow(&symbol_obstack, '\0'); + char *const string = obstack_finish(&symbol_obstack); + symbol_t *const symbol = symbol_table_insert(string); + if (symbol->string != string) + obstack_free(&symbol_obstack, string); + + pp_token.kind = T_UNKNOWN_CHAR; + pp_token.base.symbol = symbol; + next_char(); return; } } @@ -1129,28 +1147,30 @@ static void emit_pp_token(void) fputc(' ', out); switch (pp_token.kind) { - case TP_IDENTIFIER: - fputs(pp_token.base.symbol->string, out); - break; - case TP_NUMBER: - fputs(pp_token.number.number.begin, out); + case T_NUMBER: + fputs(pp_token.literal.string.begin, out); break; - case TP_STRING_LITERAL: - fputs(get_string_encoding_prefix(pp_token.string.string.encoding), out); + case T_STRING_LITERAL: + fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out); fputc('"', out); - fputs(pp_token.string.string.begin, out); + fputs(pp_token.literal.string.begin, out); fputc('"', out); break; - case TP_CHARACTER_CONSTANT: - fputs(get_string_encoding_prefix(pp_token.string.string.encoding), out); + case T_CHARACTER_CONSTANT: + fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out); fputc('\'', out); - fputs(pp_token.string.string.begin, out); + fputs(pp_token.literal.string.begin, out); fputc('\'', out); break; + default: - print_pp_token_kind(out, pp_token.kind); + if (pp_token.base.symbol) { + fputs(pp_token.base.symbol->string, out); + } else { + print_token_kind(out, pp_token.kind); + } break; } last_token = pp_token.kind; @@ -1184,13 +1204,13 @@ static bool pp_tokens_equal(const token_t *token1, const token_t *token2) return false; switch (token1->kind) { - case TP_IDENTIFIER: + case T_IDENTIFIER: return token1->base.symbol == token2->base.symbol; - case TP_NUMBER: - case TP_CHARACTER_CONSTANT: - case TP_STRING_LITERAL: - return strings_equal(&token1->string.string, &token2->string.string); + case T_NUMBER: + case T_CHARACTER_CONSTANT: + case T_STRING_LITERAL: + return strings_equal(&token1->literal.string, &token2->literal.string); default: return true; @@ -1216,11 +1236,16 @@ static bool pp_definitions_equal(const pp_definition_t *definition1, static void parse_define_directive(void) { eat_pp(TP_define); + if (skip_mode) { + eat_pp_directive(); + return; + } + assert(obstack_object_size(&pp_obstack) == 0); - if (pp_token.kind != TP_IDENTIFIER || info.at_line_begin) { + if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) { errorf(&pp_token.base.source_position, - "expected identifier after #define, got '%t'", &pp_token); + "expected identifier after #define, got %K", &pp_token); goto error_out; } symbol_t *const symbol = pp_token.base.symbol; @@ -1234,44 +1259,45 @@ static void parse_define_directive(void) * lexer (except for the fact that they separate tokens). #define b(x) * is something else than #define b (x) */ if (input.c == '(') { - /* eat the '(' */ - next_preprocessing_token(); - /* get next token after '(' */ - next_preprocessing_token(); + eat_token(T_IDENTIFIER); + eat_token('('); while (true) { switch (pp_token.kind) { - case TP_DOTDOTDOT: + case T_DOTDOTDOT: new_definition->is_variadic = true; - next_preprocessing_token(); + eat_token(T_DOTDOTDOT); if (pp_token.kind != ')') { errorf(&input.position, "'...' not at end of macro argument list"); goto error_out; } break; - case TP_IDENTIFIER: + + case T_IDENTIFIER: obstack_ptr_grow(&pp_obstack, pp_token.base.symbol); - next_preprocessing_token(); + eat_token(T_IDENTIFIER); if (pp_token.kind == ',') { - next_preprocessing_token(); + eat_token(','); break; } if (pp_token.kind != ')') { errorf(&pp_token.base.source_position, - "expected ',' or ')' after identifier, got '%t'", + "expected ',' or ')' after identifier, got %K", &pp_token); goto error_out; } break; + case ')': - next_preprocessing_token(); + eat_token(')'); goto finish_argument_list; + default: errorf(&pp_token.base.source_position, - "expected identifier, '...' or ')' in #define argument list, got '%t'", + "expected identifier, '...' or ')' in #define argument list, got %K", &pp_token); goto error_out; } @@ -1283,7 +1309,7 @@ static void parse_define_directive(void) = obstack_object_size(&pp_obstack) / sizeof(new_definition->parameters[0]); new_definition->parameters = obstack_finish(&pp_obstack); } else { - next_preprocessing_token(); + eat_token(T_IDENTIFIER); } /* construct a new pp_definition on the obstack */ @@ -1323,16 +1349,20 @@ error_out: static void parse_undef_directive(void) { eat_pp(TP_undef); + if (skip_mode) { + eat_pp_directive(); + return; + } - if (pp_token.kind != TP_IDENTIFIER) { + if (pp_token.kind != T_IDENTIFIER) { errorf(&input.position, - "expected identifier after #undef, got '%t'", &pp_token); + "expected identifier after #undef, got %K", &pp_token); eat_pp_directive(); return; } pp_token.base.symbol->pp_definition = NULL; - next_preprocessing_token(); + eat_token(T_IDENTIFIER); if (!info.at_line_begin) { warningf(WARN_OTHER, &input.position, "extra tokens at end of #undef directive"); @@ -1340,16 +1370,16 @@ static void parse_undef_directive(void) eat_pp_directive(); } +/** behind an #include we can have the special headername lexems. + * They're only allowed behind an #include so they're not recognized + * by the normal next_preprocessing_token. We handle them as a special + * exception here */ static void parse_headername(void) { const source_position_t start_position = input.position; string_t string = { NULL, 0, STRING_ENCODING_CHAR }; assert(obstack_object_size(&symbol_obstack) == 0); - /* behind an #include we can have the special headername lexems. - * They're only allowed behind an #include so they're not recognized - * by the normal next_preprocessing_token. We handle them as a special - * exception here */ if (info.at_line_begin) { parse_error("expected headername after #include"); goto finish_error; @@ -1394,8 +1424,8 @@ finished_headername: finish_error: pp_token.base.source_position = start_position; - pp_token.kind = TP_HEADERNAME; - pp_token.string.string = string; + pp_token.kind = T_HEADERNAME; + pp_token.literal.string = string; } static bool do_include(bool system_include, const char *headername) @@ -1460,17 +1490,22 @@ static void skip_till_newline(void) } } -static bool parse_include_directive(void) +static void parse_include_directive(void) { + if (skip_mode) { + eat_pp_directive(); + return; + } + /* don't eat the TP_include here! * we need an alternative parsing for the next token */ skip_whitespace(); bool system_include = input.c == '<'; parse_headername(); - string_t headername = pp_token.string.string; + string_t headername = pp_token.literal.string; if (headername.begin == NULL) { eat_pp_directive(); - return false; + return; } skip_whitespace(); @@ -1484,21 +1519,17 @@ static bool parse_include_directive(void) errorf(&pp_token.base.source_position, "#include nested too deeply"); /* eat \n or EOF */ next_preprocessing_token(); - return false; + return; } /* switch inputs */ emit_newlines(); push_input(); - bool res = do_include(system_include, pp_token.string.string.begin); + bool res = do_include(system_include, pp_token.literal.string.begin); if (!res) { - errorf(&pp_token.base.source_position, - "failed including '%S': %s", pp_token.string, strerror(errno)); + errorf(&pp_token.base.source_position, "failed including '%S': %s", &pp_token.literal, strerror(errno)); pop_restore_input(); - return false; } - - return true; } static pp_conditional_t *push_conditional(void) @@ -1533,11 +1564,10 @@ static void check_unclosed_conditionals(void) } } -static void parse_ifdef_ifndef_directive(void) +static void parse_ifdef_ifndef_directive(bool const is_ifdef) { - bool is_ifndef = (pp_token.kind == TP_ifndef); bool condition; - next_preprocessing_token(); + eat_pp(is_ifdef ? TP_ifdef : TP_ifndef); if (skip_mode) { eat_pp_directive(); @@ -1547,24 +1577,23 @@ static void parse_ifdef_ifndef_directive(void) return; } - if (pp_token.kind != TP_IDENTIFIER || info.at_line_begin) { + if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) { errorf(&pp_token.base.source_position, - "expected identifier after #%s, got '%t'", - is_ifndef ? "ifndef" : "ifdef", &pp_token); + "expected identifier after #%s, got %K", + is_ifdef ? "ifdef" : "ifndef", &pp_token); eat_pp_directive(); /* just take the true case in the hope to avoid further errors */ condition = true; } else { /* evaluate wether we are in true or false case */ - condition = !pp_token.base.symbol->pp_definition == is_ifndef; - - next_preprocessing_token(); + condition = (bool)pp_token.base.symbol->pp_definition == is_ifdef; + eat_token(T_IDENTIFIER); if (!info.at_line_begin) { errorf(&pp_token.base.source_position, "extra tokens at end of #%s", - is_ifndef ? "ifndef" : "ifdef"); + is_ifdef ? "ifdef" : "ifndef"); eat_pp_directive(); } } @@ -1598,7 +1627,7 @@ static void parse_else_directive(void) if (conditional->in_else) { errorf(&pp_token.base.source_position, "#else after #else (condition started %P)", - conditional->source_position); + &conditional->source_position); skip_mode = true; return; } @@ -1635,60 +1664,30 @@ static void parse_endif_directive(void) static void parse_preprocessing_directive(void) { - eat_pp('#'); + eat_token('#'); if (info.at_line_begin) { /* empty directive */ return; } - if (skip_mode) { - switch (pp_token.kind) { - case TP_ifdef: - case TP_ifndef: - parse_ifdef_ifndef_directive(); - break; - case TP_else: - parse_else_directive(); - break; - case TP_endif: - parse_endif_directive(); - break; - default: - eat_pp_directive(); - break; + if (pp_token.base.symbol) { + switch (pp_token.base.symbol->pp_ID) { + case TP_define: parse_define_directive(); break; + case TP_else: parse_else_directive(); break; + case TP_endif: parse_endif_directive(); break; + case TP_ifdef: parse_ifdef_ifndef_directive(true); break; + case TP_ifndef: parse_ifdef_ifndef_directive(false); break; + case TP_include: parse_include_directive(); break; + case TP_undef: parse_undef_directive(); break; + default: goto skip; } } else { - switch (pp_token.kind) { - case TP_define: - parse_define_directive(); - break; - case TP_undef: - parse_undef_directive(); - break; - case TP_ifdef: - case TP_ifndef: - parse_ifdef_ifndef_directive(); - break; - case TP_else: - parse_else_directive(); - break; - case TP_endif: - parse_endif_directive(); - break; - case TP_include: - parse_include_directive(); - break; - default: - if (info.at_line_begin) { - /* the nop directive "#" */ - break; - } - errorf(&pp_token.base.source_position, - "invalid preprocessing directive #%t", &pp_token); - eat_pp_directive(); - break; +skip: + if (!skip_mode) { + errorf(&pp_token.base.source_position, "invalid preprocessing directive #%K", &pp_token); } + eat_pp_directive(); } assert(info.at_line_begin); @@ -1749,6 +1748,8 @@ int pptest_main(int argc, char **argv) obstack_init(&input_obstack); strset_init(&stringset); + error_on_unknown_chars = false; + setup_include_path(); /* simplistic commandline parser */ @@ -1803,9 +1804,9 @@ int pptest_main(int argc, char **argv) if (pp_token.kind == '#' && info.at_line_begin) { parse_preprocessing_directive(); continue; - } else if (pp_token.kind == TP_EOF) { + } else if (pp_token.kind == T_EOF) { goto end_of_main_loop; - } else if (pp_token.kind == TP_IDENTIFIER) { + } else if (pp_token.kind == T_IDENTIFIER) { symbol_t *const symbol = pp_token.base.symbol; pp_definition_t *const pp_definition = symbol->pp_definition; if (pp_definition != NULL && !pp_definition->is_expanding) { @@ -1813,20 +1814,20 @@ int pptest_main(int argc, char **argv) if (pp_definition->has_parameters) { source_position_t position = pp_token.base.source_position; add_token_info_t old_info = info; - next_preprocessing_token(); + eat_token(T_IDENTIFIER); add_token_info_t new_info = info; /* no opening brace -> no expansion */ if (pp_token.kind == '(') { - eat_pp('('); + eat_token('('); /* parse arguments (TODO) */ - while (pp_token.kind != TP_EOF && pp_token.kind != ')') + while (pp_token.kind != T_EOF && pp_token.kind != ')') next_preprocessing_token(); } else { token_t next_token = pp_token; /* restore identifier token */ - pp_token.kind = TP_IDENTIFIER; + pp_token.kind = T_IDENTIFIER; pp_token.base.symbol = symbol; pp_token.base.source_position = position; info = old_info;