X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=lexer.c;h=38a8e1dc521e4fc450b6a38ec2912ac1db8c8d00;hb=b2e2a716bac8a32ba2e8e8349e00301df026d31f;hp=a26d28849fbd3f4afd22f0421809499cd6cb5d11;hpb=76607fad56fb6eb22d9b831e6cfacce916a89cbf;p=cparser diff --git a/lexer.c b/lexer.c index a26d288..38a8e1d 100644 --- a/lexer.c +++ b/lexer.c @@ -53,15 +53,16 @@ #define strtold(s, e) strtod(s, e) #endif -static utf32 c; -token_t lexer_token; -symbol_t *symbol_L; -static FILE *input; -static utf32 buf[BUF_SIZE + MAX_PUTBACK]; -static const utf32 *bufend; -static const utf32 *bufpos; -static strset_t stringset; -bool allow_dollar_in_symbol = true; +static utf32 c; +static source_position_t lexer_pos; +token_t lexer_token; +static symbol_t *symbol_L; +static FILE *input; +static utf32 buf[BUF_SIZE + MAX_PUTBACK]; +static const utf32 *bufend; +static const utf32 *bufpos; +static strset_t stringset; +bool allow_dollar_in_symbol = true; /** * Prints a parse error message at the current token. @@ -70,7 +71,7 @@ bool allow_dollar_in_symbol = true; */ static void parse_error(const char *msg) { - errorf(&lexer_token.source_position, "%s", msg); + errorf(&lexer_pos, "%s", msg); } /** @@ -80,7 +81,7 @@ static void parse_error(const char *msg) */ static NORETURN internal_error(const char *msg) { - internal_errorf(&lexer_token.source_position, "%s", msg); + internal_errorf(&lexer_pos, "%s", msg); } static size_t read_block(unsigned char *const read_buf, size_t const n) @@ -367,6 +368,7 @@ static inline void next_real_char(void) decoder(); } c = *bufpos++; + ++lexer_pos.colno; } /** @@ -378,6 +380,7 @@ static inline void put_back(utf32 const pc) { assert(bufpos > buf); *(--bufpos - buf + buf) = pc; + --lexer_pos.colno; #ifdef DEBUG_CHARS printf("putback '%lc'\n", pc); @@ -386,20 +389,18 @@ static inline void put_back(utf32 const pc) static inline void next_char(void); -#define MATCH_NEWLINE(code) \ - case '\r': \ - next_char(); \ - if (c == '\n') { \ - next_char(); \ - } \ - lexer_token.source_position.lineno++; \ - code \ - case '\n': \ - next_char(); \ - lexer_token.source_position.lineno++; \ +#define MATCH_NEWLINE(code) \ + case '\r': \ + next_char(); \ + if (c == '\n') { \ + case '\n': \ + next_char(); \ + } \ + lexer_pos.lineno++; \ + lexer_pos.colno = 1; \ code -#define eat(c_type) do { assert(c == c_type); next_char(); } while (0) +#define eat(c_type) (assert(c == c_type), next_char()) static void maybe_concat_lines(void) { @@ -687,19 +688,7 @@ static void parse_number_hex(void) */ static bool is_octal_digit(utf32 chr) { - switch (chr) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - return true; - default: - return false; - } + return '0' <= chr && chr <= '7'; } /** @@ -955,8 +944,6 @@ static void grow_symbol(utf32 const tc) */ static void parse_string_literal(void) { - const unsigned start_linenr = lexer_token.source_position.lineno; - eat('"'); while (true) { @@ -964,18 +951,14 @@ static void parse_string_literal(void) case '\\': { utf32 const tc = parse_escape_sequence(); if (tc >= 0x100) { - warningf(&lexer_token.source_position, - "escape sequence out of range"); + warningf(WARN_OTHER, &lexer_pos, "escape sequence out of range"); } obstack_1grow(&symbol_obstack, tc); break; } case EOF: { - source_position_t source_position; - source_position.input_name = lexer_token.source_position.input_name; - source_position.lineno = start_linenr; - errorf(&source_position, "string has no end"); + errorf(&lexer_token.source_position, "string has no end"); lexer_token.type = T_ERROR; return; } @@ -1009,8 +992,6 @@ end_of_string: */ static void parse_wide_character_constant(void) { - const unsigned start_linenr = lexer_token.source_position.lineno; - eat('\''); while (true) { @@ -1031,9 +1012,7 @@ static void parse_wide_character_constant(void) goto end_of_wide_char_constant; case EOF: { - source_position_t source_position = lexer_token.source_position; - source_position.lineno = start_linenr; - errorf(&source_position, "EOF while parsing character constant"); + errorf(&lexer_token.source_position, "EOF while parsing character constant"); lexer_token.type = T_ERROR; return; } @@ -1054,7 +1033,7 @@ end_of_wide_char_constant:; lexer_token.literal = identify_string(string, size); if (size == 0) { - parse_error("empty character constant"); + errorf(&lexer_token.source_position, "empty character constant"); } } @@ -1073,8 +1052,6 @@ static void parse_wide_string_literal(void) */ static void parse_character_constant(void) { - const unsigned start_linenr = lexer_token.source_position.lineno; - eat('\''); while (true) { @@ -1082,8 +1059,7 @@ static void parse_character_constant(void) case '\\': { utf32 const tc = parse_escape_sequence(); if (tc >= 0x100) { - warningf(&lexer_token.source_position, - "escape sequence out of range"); + warningf(WARN_OTHER, &lexer_pos, "escape sequence out of range"); } obstack_1grow(&symbol_obstack, tc); break; @@ -1099,10 +1075,7 @@ static void parse_character_constant(void) goto end_of_char_constant; case EOF: { - source_position_t source_position; - source_position.input_name = lexer_token.source_position.input_name; - source_position.lineno = start_linenr; - errorf(&source_position, "EOF while parsing character constant"); + errorf(&lexer_token.source_position, "EOF while parsing character constant"); lexer_token.type = T_ERROR; return; } @@ -1124,7 +1097,7 @@ end_of_char_constant:; lexer_token.literal = identify_string(string, size); if (size == 0) { - parse_error("empty character constant"); + errorf(&lexer_token.source_position, "empty character constant"); } } @@ -1133,17 +1106,13 @@ end_of_char_constant:; */ static void skip_multiline_comment(void) { - unsigned start_linenr = lexer_token.source_position.lineno; - while (true) { switch (c) { case '/': next_char(); if (c == '*') { /* nested comment, warn here */ - if (warning.comment) { - warningf(&lexer_token.source_position, "'/*' within comment"); - } + warningf(WARN_COMMENT, &lexer_pos, "'/*' within comment"); } break; case '*': @@ -1157,10 +1126,7 @@ static void skip_multiline_comment(void) MATCH_NEWLINE(break;) case EOF: { - source_position_t source_position; - source_position.input_name = lexer_token.source_position.input_name; - source_position.lineno = start_linenr; - errorf(&source_position, "at end of file while looking for comment end"); + errorf(&lexer_token.source_position, "at end of file while looking for comment end"); return; } @@ -1188,8 +1154,7 @@ static void skip_line_comment(void) case '\\': next_char(); if (c == '\n' || c == '\r') { - if (warning.comment) - warningf(&lexer_token.source_position, "multi-line comment"); + warningf(WARN_COMMENT, &lexer_pos, "multi-line comment"); return; } break; @@ -1263,11 +1228,11 @@ static void parse_line_directive(void) parse_error("expected integer"); } else { /* use offset -1 as this is about the next line */ - lexer_token.source_position.lineno = atoi(pp_token.literal.begin) - 1; + lexer_pos.lineno = atoi(pp_token.literal.begin) - 1; next_pp_token(); } if (pp_token.type == T_STRING_LITERAL) { - lexer_token.source_position.input_name = pp_token.literal.begin; + lexer_pos.input_name = pp_token.literal.begin; next_pp_token(); } @@ -1348,8 +1313,8 @@ static void parse_pragma(void) unknown_pragma = true; } eat_until_newline(); - if (unknown_pragma && warning.unknown_pragmas) { - warningf(&pp_token.source_position, "encountered unknown #pragma"); + if (unknown_pragma) { + warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.source_position, "encountered unknown #pragma"); } } @@ -1443,19 +1408,20 @@ static void parse_preprocessor_directive(void) #define ELSE_CODE(code) \ default: \ code \ + return; \ } \ } /* end of while (true) */ \ - break; #define ELSE(set_type) \ ELSE_CODE( \ lexer_token.type = set_type; \ - return; \ ) void lexer_next_preprocessing_token(void) { while (true) { + lexer_token.source_position = lexer_pos; + switch (c) { case ' ': case '\t': @@ -1505,7 +1471,6 @@ void lexer_next_preprocessing_token(void) put_back(c); c = '.'; lexer_token.type = '.'; - return; ) ELSE('.') case '&': @@ -1559,7 +1524,6 @@ void lexer_next_preprocessing_token(void) put_back(c); c = '%'; lexer_token.type = '#'; - return; ) ELSE('#') ELSE('%') @@ -1625,7 +1589,7 @@ void lexer_next_preprocessing_token(void) default: dollar_sign: - errorf(&lexer_token.source_position, "unknown character '%c' found", c); + errorf(&lexer_pos, "unknown character '%c' found", c); next_char(); lexer_token.type = T_ERROR; return; @@ -1656,9 +1620,10 @@ void init_lexer(void) void lexer_open_stream(FILE *stream, const char *input_name) { - input = stream; - lexer_token.source_position.lineno = 0; - lexer_token.source_position.input_name = input_name; + input = stream; + lexer_pos.lineno = 0; + lexer_pos.colno = 0; + lexer_pos.input_name = input_name; bufpos = NULL; bufend = NULL; @@ -1670,9 +1635,10 @@ void lexer_open_stream(FILE *stream, const char *input_name) void lexer_open_buffer(const char *buffer, size_t len, const char *input_name) { - input = NULL; - lexer_token.source_position.lineno = 0; - lexer_token.source_position.input_name = input_name; + input = NULL; + lexer_pos.lineno = 0; + lexer_pos.colno = 0; + lexer_pos.input_name = input_name; #if 0 // TODO bufpos = buffer; @@ -1696,7 +1662,7 @@ void exit_lexer(void) static __attribute__((unused)) void dbg_pos(const source_position_t source_position) { - fprintf(stdout, "%s:%u\n", source_position.input_name, - source_position.lineno); + fprintf(stdout, "%s:%u:%u\n", source_position.input_name, + source_position.lineno, source_position.colno); fflush(stdout); }