X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=lexer.c;h=1300b3d26f23cda1c90093658115266e03a941ab;hb=22420f9404b76d3ab2536c7d3903cbcf05a792a8;hp=2a6cc2c6c164c7fd2e2bbcf799d50bcc368ac7f1;hpb=d5a5e90eca7d95d0efa66a6e8dd5ca0ed031321b;p=cparser diff --git a/lexer.c b/lexer.c index 2a6cc2c..1300b3d 100644 --- a/lexer.c +++ b/lexer.c @@ -17,13 +17,12 @@ static int c; token_t lexer_token; +symbol_t *symbol_L; static FILE *input; static char buf[1024 + MAX_PUTBACK]; static const char *bufend; static const char *bufpos; static strset_t stringset; -//static FILE **input_stack; -//static char **buf_stack; static void error_prefix_at(const char *input_name, unsigned linenr) { @@ -60,11 +59,16 @@ static inline void next_real_char(void) static inline void put_back(int pc) { - char *p = (char*) bufpos - 1; - bufpos--; - assert(p >= buf); + assert(bufpos >= buf); + assert(bufpos < buf+MAX_PUTBACK || *bufpos == pc); + + char *p = buf + (bufpos - buf); *p = pc; + /* going backwards in the buffer is legal as long as it's not more often + * than MAX_PUTBACK */ + bufpos--; + #ifdef DEBUG_CHARS printf("putback '%c'\n", pc); #endif @@ -85,9 +89,12 @@ static inline void next_char(void); lexer_token.source_position.linenr++; \ code; +#define eat(c_type) do { assert(c == c_type); next_char(); } while(0) + static void maybe_concat_lines(void) { - next_char(); + eat('\\'); + switch(c) { MATCH_NEWLINE(return;) @@ -103,6 +110,7 @@ static inline void next_char(void) { next_real_char(); +#if 0 /* filter trigraphs */ if(UNLIKELY(c == '\\')) { maybe_concat_lines(); @@ -138,6 +146,8 @@ static inline void next_char(void) } end_of_next_char: +#endif + (void) maybe_concat_lines; #ifdef DEBUG_CHARS printf("nchar '%c'\n", c); #else @@ -271,6 +281,21 @@ static void parse_integer_suffix(void) } } +static void parse_floating_suffix(void) +{ + switch(c) { + /* TODO: do something usefull with the suffixes... */ + case 'f': + case 'F': + case 'l': + case 'L': + next_char(); + break; + default: + break; + } +} + static void parse_number_hex(void) { assert(c == 'x' || c == 'X'); @@ -355,6 +380,8 @@ static void parse_floatingpoint_exponent(long double value) lexer_token.type = T_FLOATINGPOINT; lexer_token.v.floatvalue = value; + + parse_floating_suffix(); } static void parse_floatingpoint_fract(int integer_part) @@ -376,6 +403,8 @@ static void parse_floatingpoint_fract(int integer_part) lexer_token.type = T_FLOATINGPOINT; lexer_token.v.floatvalue = value; + + parse_floating_suffix(); } static void parse_number_dec(void) @@ -481,40 +510,40 @@ static int parse_hex_sequence(void) static int parse_escape_sequence(void) { - while(1) { - int ec = c; - next_char(); + eat('\\'); - switch(ec) { - case '"': return '"'; - case '\'': return'\''; - case '\\': return '\\'; - case '?': return '\?'; - case 'a': return '\a'; - case 'b': return '\b'; - case 'f': return '\f'; - case 'n': return '\n'; - case 'r': return '\r'; - case 't': return '\t'; - case 'v': return '\v'; - case 'x': - return parse_hex_sequence(); - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - return parse_octal_sequence(); - case EOF: - parse_error("reached end of file while parsing escape sequence"); - return EOF; - default: - parse_error("unknown escape sequence"); - return EOF; - } + int ec = c; + next_char(); + + switch(ec) { + case '"': return '"'; + case '\'': return'\''; + case '\\': return '\\'; + case '?': return '\?'; + case 'a': return '\a'; + case 'b': return '\b'; + case 'f': return '\f'; + case 'n': return '\n'; + case 'r': return '\r'; + case 't': return '\t'; + case 'v': return '\v'; + case 'x': + return parse_hex_sequence(); + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + return parse_octal_sequence(); + case EOF: + parse_error("reached end of file while parsing escape sequence"); + return EOF; + default: + parse_error("unknown escape sequence"); + return EOF; } } @@ -544,12 +573,12 @@ static void parse_string_literal(void) assert(c == '"'); next_char(); + int tc; while(1) { switch(c) { case '\\': - next_char(); - int ec = parse_escape_sequence(); - obstack_1grow(&symbol_obstack, ec); + tc = parse_escape_sequence(); + obstack_1grow(&symbol_obstack, tc); break; case EOF: @@ -590,14 +619,12 @@ end_of_string: static void parse_character_constant(void) { - assert(c == '\''); - next_char(); + eat('\''); int found_char = 0; while(1) { switch(c) { case '\\': - next_char(); found_char = parse_escape_sequence(); break; @@ -778,7 +805,7 @@ static void parse_preprocessor_identifier(void) } } -static void parse_preprocessor_directive() +static void parse_preprocessor_directive(void) { next_pp_token(); @@ -836,6 +863,12 @@ void lexer_next_preprocessing_token(void) SYMBOL_CHARS parse_symbol(); + /* might be a wide string ( L"string" ) */ + if(c == '"' && (lexer_token.type == T_IDENTIFIER && + lexer_token.v.symbol == symbol_L)) { + parse_string_literal(); + return; + } return; DIGITS @@ -1011,10 +1044,14 @@ void init_lexer(void) void lexer_open_stream(FILE *stream, const char *input_name) { input = stream; - lexer_token.source_position.linenr = 1; + lexer_token.source_position.linenr = 0; lexer_token.source_position.input_name = input_name; - next_char(); + symbol_L = symbol_table_insert("L"); + + /* place a virtual \n at the beginning so the lexer knows that we're + * at the beginning of a line */ + c = '\n'; } void exit_lexer(void)