X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=lexer.c;h=80e49163564cd8fe88051f093d5ac364a10ba19e;hb=bb8544d38aaa4c716603a5d565c3ff3840454ffc;hp=15c12a0b9102f3cd24e39b75f3c14b4e75ecb265;hpb=dec805ebf7d0ce3fb32780ef83a06d1bb2881407;p=cparser diff --git a/lexer.c b/lexer.c index 15c12a0..80e4916 100644 --- a/lexer.c +++ b/lexer.c @@ -53,15 +53,16 @@ #define strtold(s, e) strtod(s, e) #endif -static utf32 c; -token_t lexer_token; -symbol_t *symbol_L; -static FILE *input; -static utf32 buf[BUF_SIZE + MAX_PUTBACK]; -static const utf32 *bufend; -static const utf32 *bufpos; -static strset_t stringset; -bool allow_dollar_in_symbol = true; +static utf32 c; +static source_position_t lexer_pos; +token_t lexer_token; +static symbol_t *symbol_L; +static FILE *input; +static utf32 buf[BUF_SIZE + MAX_PUTBACK]; +static const utf32 *bufend; +static const utf32 *bufpos; +static strset_t stringset; +bool allow_dollar_in_symbol = true; /** * Prints a parse error message at the current token. @@ -70,7 +71,7 @@ bool allow_dollar_in_symbol = true; */ static void parse_error(const char *msg) { - errorf(&lexer_token.source_position, "%s", msg); + errorf(&lexer_pos, "%s", msg); } /** @@ -80,14 +81,16 @@ static void parse_error(const char *msg) */ static NORETURN internal_error(const char *msg) { - internal_errorf(&lexer_token.source_position, "%s", msg); + internal_errorf(&lexer_pos, "%s", msg); } static size_t read_block(unsigned char *const read_buf, size_t const n) { size_t const s = fread(read_buf, 1, n, input); if (s == 0) { - if (ferror(input)) + /* on OS/X ferror appears to return true on eof as well when running + * the application in gdb... */ + if (!feof(input) && ferror(input)) parse_error("read from input failed"); buf[MAX_PUTBACK] = EOF; bufpos = buf + MAX_PUTBACK; @@ -365,6 +368,7 @@ static inline void next_real_char(void) decoder(); } c = *bufpos++; + ++lexer_pos.colno; } /** @@ -376,6 +380,7 @@ static inline void put_back(utf32 const pc) { assert(bufpos > buf); *(--bufpos - buf + buf) = pc; + --lexer_pos.colno; #ifdef DEBUG_CHARS printf("putback '%lc'\n", pc); @@ -384,17 +389,15 @@ static inline void put_back(utf32 const pc) static inline void next_char(void); -#define MATCH_NEWLINE(code) \ - case '\r': \ - next_char(); \ - if (c == '\n') { \ - next_char(); \ - } \ - lexer_token.source_position.linenr++; \ - code \ - case '\n': \ - next_char(); \ - lexer_token.source_position.linenr++; \ +#define MATCH_NEWLINE(code) \ + case '\r': \ + next_char(); \ + if (c == '\n') { \ + case '\n': \ + next_char(); \ + } \ + lexer_pos.lineno++; \ + lexer_pos.colno = 1; \ code #define eat(c_type) do { assert(c == c_type); next_char(); } while (0) @@ -768,7 +771,7 @@ static void parse_number(void) /* check for invalid octal digits */ for (size_t i= 0; i < size; ++i) { char t = string[i]; - if (t == '8' || t == '9') + if (t >= '8') errorf(&lexer_token.source_position, "invalid digit '%c' in octal number", t); } @@ -953,8 +956,6 @@ static void grow_symbol(utf32 const tc) */ static void parse_string_literal(void) { - const unsigned start_linenr = lexer_token.source_position.linenr; - eat('"'); while (true) { @@ -962,18 +963,14 @@ static void parse_string_literal(void) case '\\': { utf32 const tc = parse_escape_sequence(); if (tc >= 0x100) { - warningf(&lexer_token.source_position, - "escape sequence out of range"); + warningf(WARN_OTHER, &lexer_pos, "escape sequence out of range"); } obstack_1grow(&symbol_obstack, tc); break; } case EOF: { - source_position_t source_position; - source_position.input_name = lexer_token.source_position.input_name; - source_position.linenr = start_linenr; - errorf(&source_position, "string has no end"); + errorf(&lexer_token.source_position, "string has no end"); lexer_token.type = T_ERROR; return; } @@ -1007,8 +1004,6 @@ end_of_string: */ static void parse_wide_character_constant(void) { - const unsigned start_linenr = lexer_token.source_position.linenr; - eat('\''); while (true) { @@ -1029,9 +1024,7 @@ static void parse_wide_character_constant(void) goto end_of_wide_char_constant; case EOF: { - source_position_t source_position = lexer_token.source_position; - source_position.linenr = start_linenr; - errorf(&source_position, "EOF while parsing character constant"); + errorf(&lexer_token.source_position, "EOF while parsing character constant"); lexer_token.type = T_ERROR; return; } @@ -1050,6 +1043,10 @@ end_of_wide_char_constant:; lexer_token.type = T_WIDE_CHARACTER_CONSTANT; lexer_token.literal = identify_string(string, size); + + if (size == 0) { + errorf(&lexer_token.source_position, "empty character constant"); + } } /** @@ -1067,8 +1064,6 @@ static void parse_wide_string_literal(void) */ static void parse_character_constant(void) { - const unsigned start_linenr = lexer_token.source_position.linenr; - eat('\''); while (true) { @@ -1076,8 +1071,7 @@ static void parse_character_constant(void) case '\\': { utf32 const tc = parse_escape_sequence(); if (tc >= 0x100) { - warningf(&lexer_token.source_position, - "escape sequence out of range"); + warningf(WARN_OTHER, &lexer_pos, "escape sequence out of range"); } obstack_1grow(&symbol_obstack, tc); break; @@ -1093,10 +1087,7 @@ static void parse_character_constant(void) goto end_of_char_constant; case EOF: { - source_position_t source_position; - source_position.input_name = lexer_token.source_position.input_name; - source_position.linenr = start_linenr; - errorf(&source_position, "EOF while parsing character constant"); + errorf(&lexer_token.source_position, "EOF while parsing character constant"); lexer_token.type = T_ERROR; return; } @@ -1116,6 +1107,10 @@ end_of_char_constant:; lexer_token.type = T_CHARACTER_CONSTANT; lexer_token.literal = identify_string(string, size); + + if (size == 0) { + errorf(&lexer_token.source_position, "empty character constant"); + } } /** @@ -1123,17 +1118,13 @@ end_of_char_constant:; */ static void skip_multiline_comment(void) { - unsigned start_linenr = lexer_token.source_position.linenr; - while (true) { switch (c) { case '/': next_char(); if (c == '*') { /* nested comment, warn here */ - if (warning.comment) { - warningf(&lexer_token.source_position, "'/*' within comment"); - } + warningf(WARN_COMMENT, &lexer_pos, "'/*' within comment"); } break; case '*': @@ -1147,10 +1138,7 @@ static void skip_multiline_comment(void) MATCH_NEWLINE(break;) case EOF: { - source_position_t source_position; - source_position.input_name = lexer_token.source_position.input_name; - source_position.linenr = start_linenr; - errorf(&source_position, "at end of file while looking for comment end"); + errorf(&lexer_token.source_position, "at end of file while looking for comment end"); return; } @@ -1178,8 +1166,7 @@ static void skip_line_comment(void) case '\\': next_char(); if (c == '\n' || c == '\r') { - if (warning.comment) - warningf(&lexer_token.source_position, "multi-line comment"); + warningf(WARN_COMMENT, &lexer_pos, "multi-line comment"); return; } break; @@ -1253,11 +1240,11 @@ static void parse_line_directive(void) parse_error("expected integer"); } else { /* use offset -1 as this is about the next line */ - lexer_token.source_position.linenr = atoi(pp_token.literal.begin) - 1; + lexer_pos.lineno = atoi(pp_token.literal.begin) - 1; next_pp_token(); } if (pp_token.type == T_STRING_LITERAL) { - lexer_token.source_position.input_name = pp_token.literal.begin; + lexer_pos.input_name = pp_token.literal.begin; next_pp_token(); } @@ -1338,8 +1325,8 @@ static void parse_pragma(void) unknown_pragma = true; } eat_until_newline(); - if (unknown_pragma && warning.unknown_pragmas) { - warningf(&pp_token.source_position, "encountered unknown #pragma"); + if (unknown_pragma) { + warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.source_position, "encountered unknown #pragma"); } } @@ -1433,19 +1420,20 @@ static void parse_preprocessor_directive(void) #define ELSE_CODE(code) \ default: \ code \ + return; \ } \ } /* end of while (true) */ \ - break; #define ELSE(set_type) \ ELSE_CODE( \ lexer_token.type = set_type; \ - return; \ ) void lexer_next_preprocessing_token(void) { while (true) { + lexer_token.source_position = lexer_pos; + switch (c) { case ' ': case '\t': @@ -1495,7 +1483,6 @@ void lexer_next_preprocessing_token(void) put_back(c); c = '.'; lexer_token.type = '.'; - return; ) ELSE('.') case '&': @@ -1549,7 +1536,6 @@ void lexer_next_preprocessing_token(void) put_back(c); c = '%'; lexer_token.type = '#'; - return; ) ELSE('#') ELSE('%') @@ -1615,7 +1601,7 @@ void lexer_next_preprocessing_token(void) default: dollar_sign: - errorf(&lexer_token.source_position, "unknown character '%c' found", c); + errorf(&lexer_pos, "unknown character '%c' found", c); next_char(); lexer_token.type = T_ERROR; return; @@ -1646,9 +1632,10 @@ void init_lexer(void) void lexer_open_stream(FILE *stream, const char *input_name) { - input = stream; - lexer_token.source_position.linenr = 0; - lexer_token.source_position.input_name = input_name; + input = stream; + lexer_pos.lineno = 0; + lexer_pos.colno = 0; + lexer_pos.input_name = input_name; bufpos = NULL; bufend = NULL; @@ -1660,9 +1647,10 @@ void lexer_open_stream(FILE *stream, const char *input_name) void lexer_open_buffer(const char *buffer, size_t len, const char *input_name) { - input = NULL; - lexer_token.source_position.linenr = 0; - lexer_token.source_position.input_name = input_name; + input = NULL; + lexer_pos.lineno = 0; + lexer_pos.colno = 0; + lexer_pos.input_name = input_name; #if 0 // TODO bufpos = buffer; @@ -1686,7 +1674,7 @@ void exit_lexer(void) static __attribute__((unused)) void dbg_pos(const source_position_t source_position) { - fprintf(stdout, "%s:%u\n", source_position.input_name, - source_position.linenr); + fprintf(stdout, "%s:%u:%u\n", source_position.input_name, + source_position.lineno, source_position.colno); fflush(stdout); }