X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=lexer.c;h=8a339aa110058836c6f208ad7eb15f9eaa34232f;hb=6f8db408325831c25558eaa99b1d291562f7f65a;hp=e29ec4a3ea66417dabdba175404bcf2d575cb668;hpb=5894d20a1ca98cb5eb0916b083db5838657d6730;p=cparser diff --git a/lexer.c b/lexer.c index e29ec4a..8a339aa 100644 --- a/lexer.c +++ b/lexer.c @@ -19,6 +19,7 @@ */ #include +#include "adt/strutil.h" #include "input.h" #include "diagnostic.h" #include "lexer.h" @@ -57,7 +58,6 @@ static source_position_t lexer_pos; token_t lexer_token; static symbol_t *symbol_L; static strset_t stringset; -static char *encoding; bool allow_dollar_in_symbol = true; /** @@ -324,8 +324,8 @@ finish_suffix: } obstack_1grow(&symbol_obstack, '\0'); - size_t size = obstack_object_size(&symbol_obstack); - char *string = obstack_finish(&symbol_obstack); + size_t size = obstack_object_size(&symbol_obstack) - 1; + char *string = obstack_finish(&symbol_obstack); lexer_token.number.suffix = identify_string(string, size); } @@ -632,26 +632,6 @@ string_t make_string(const char *string) return identify_string(space, len); } -static void grow_symbol(utf32 const tc) -{ - struct obstack *const o = &symbol_obstack; - if (tc < 0x80U) { - obstack_1grow(o, tc); - } else if (tc < 0x800) { - obstack_1grow(o, 0xC0 | (tc >> 6)); - obstack_1grow(o, 0x80 | (tc & 0x3F)); - } else if (tc < 0x10000) { - obstack_1grow(o, 0xE0 | ( tc >> 12)); - obstack_1grow(o, 0x80 | ((tc >> 6) & 0x3F)); - obstack_1grow(o, 0x80 | ( tc & 0x3F)); - } else { - obstack_1grow(o, 0xF0 | ( tc >> 18)); - obstack_1grow(o, 0x80 | ((tc >> 12) & 0x3F)); - obstack_1grow(o, 0x80 | ((tc >> 6) & 0x3F)); - obstack_1grow(o, 0x80 | ( tc & 0x3F)); - } -} - /** * Parse a string literal and set lexer_token. */ @@ -681,7 +661,7 @@ static void parse_string_literal(void) goto end_of_string; default: - grow_symbol(c); + obstack_grow_symbol(&symbol_obstack, c); next_char(); break; } @@ -711,7 +691,7 @@ static void parse_wide_character_constant(void) switch (c) { case '\\': { const utf32 tc = parse_escape_sequence(); - grow_symbol(tc); + obstack_grow_symbol(&symbol_obstack, tc); break; } @@ -732,7 +712,7 @@ static void parse_wide_character_constant(void) } default: - grow_symbol(c); + obstack_grow_symbol(&symbol_obstack, c); next_char(); break; } @@ -796,7 +776,7 @@ static void parse_character_constant(void) } default: - grow_symbol(c); + obstack_grow_symbol(&symbol_obstack, c); next_char(); break; @@ -904,37 +884,6 @@ static void eat_until_newline(void) } } -/** - * Handle the define directive. - */ -static void define_directive(void) -{ - lexer_next_preprocessing_token(); - if (lexer_token.kind != T_IDENTIFIER) { - parse_error("expected identifier after #define\n"); - eat_until_newline(); - } -} - -/** - * Handle the ifdef directive. - */ -static void ifdef_directive(int is_ifndef) -{ - (void) is_ifndef; - lexer_next_preprocessing_token(); - //expect_identifier(); - //extect_newline(); -} - -/** - * Handle the endif directive. - */ -static void endif_directive(void) -{ - //expect_newline(); -} - /** * Parse the line directive. */ @@ -949,7 +898,24 @@ static void parse_line_directive(void) } if (pp_token.kind == T_STRING_LITERAL) { lexer_pos.input_name = pp_token.string.string.begin; + lexer_pos.is_system_header = false; next_pp_token(); + + /* attempt to parse numeric flags as outputted by gcc preprocessor */ + while (pp_token.kind == T_INTEGER) { + /* flags: + * 1 - indicates start of a new file + * 2 - indicates return from a file + * 3 - indicates system header + * 4 - indicates implicit extern "C" in C++ mode + * + * currently we're only interested in "3" + */ + if (streq(pp_token.number.number.begin, "3")) { + lexer_pos.is_system_header = true; + } + next_pp_token(); + } } eat_until_newline(); @@ -1053,36 +1019,17 @@ static void parse_preprocessor_identifier(void) symbol_t *symbol = pp_token.identifier.symbol; switch (symbol->pp_ID) { - case TP_include: - printf("include - enable header name parsing!\n"); - break; - case TP_define: - define_directive(); - break; - case TP_ifdef: - ifdef_directive(0); - break; - case TP_ifndef: - ifdef_directive(1); - break; - case TP_endif: - endif_directive(); - break; case TP_line: next_pp_token(); parse_line_directive(); break; - case TP_if: - case TP_else: - case TP_elif: - case TP_undef: - case TP_error: - /* TODO; output the rest of the line */ - parse_error("#error directive: "); - break; case TP_pragma: parse_pragma(); break; + case TP_error: + /* TODO; output the rest of the line */ + parse_error("#error directive"); + break; } } @@ -1352,26 +1299,14 @@ static void input_error(unsigned delta_lines, unsigned delta_cols, errorf(&lexer_pos, "%s", message); } -void select_input_encoding(char const* new_encoding) +void lexer_switch_input(input_t *new_input, const char *input_name) { - if (encoding != NULL) - xfree(encoding); - encoding = xstrdup(new_encoding); -} - -void lexer_open_stream(FILE *stream, const char *input_name) -{ - if (input != NULL) { - input_free(input); - input = NULL; - } - lexer_pos.lineno = 0; lexer_pos.colno = 0; lexer_pos.input_name = input_name; set_input_error_callback(input_error); - input = input_from_stream(stream, encoding); + input = new_input; bufpos = NULL; bufend = NULL; @@ -1382,10 +1317,6 @@ void lexer_open_stream(FILE *stream, const char *input_name) void exit_lexer(void) { - if (input != NULL) { - input_free(input); - input = NULL; - } strset_destroy(&stringset); } @@ -1393,6 +1324,6 @@ static __attribute__((unused)) void dbg_pos(const source_position_t source_position) { fprintf(stdout, "%s:%u:%u\n", source_position.input_name, - source_position.lineno, source_position.colno); + source_position.lineno, (unsigned)source_position.colno); fflush(stdout); }