X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=lexer.c;h=1300b3d26f23cda1c90093658115266e03a941ab;hb=4b6bbffc7e1d9a9ee5a75da79ced92f5bc92a913;hp=7cb82bc4fa8430ac0847d93014d88b8cd315f375;hpb=6734a093fb9d14f6a626293849de8a38b39b9457;p=cparser diff --git a/lexer.c b/lexer.c index 7cb82bc..1300b3d 100644 --- a/lexer.c +++ b/lexer.c @@ -17,13 +17,12 @@ static int c; token_t lexer_token; +symbol_t *symbol_L; static FILE *input; static char buf[1024 + MAX_PUTBACK]; static const char *bufend; static const char *bufpos; static strset_t stringset; -//static FILE **input_stack; -//static char **buf_stack; static void error_prefix_at(const char *input_name, unsigned linenr) { @@ -60,11 +59,16 @@ static inline void next_real_char(void) static inline void put_back(int pc) { - char *p = (char*) bufpos - 1; - bufpos--; - assert(p >= buf); + assert(bufpos >= buf); + assert(bufpos < buf+MAX_PUTBACK || *bufpos == pc); + + char *p = buf + (bufpos - buf); *p = pc; + /* going backwards in the buffer is legal as long as it's not more often + * than MAX_PUTBACK */ + bufpos--; + #ifdef DEBUG_CHARS printf("putback '%c'\n", pc); #endif @@ -85,9 +89,12 @@ static inline void next_char(void); lexer_token.source_position.linenr++; \ code; +#define eat(c_type) do { assert(c == c_type); next_char(); } while(0) + static void maybe_concat_lines(void) { - next_char(); + eat('\\'); + switch(c) { MATCH_NEWLINE(return;) @@ -103,6 +110,7 @@ static inline void next_char(void) { next_real_char(); +#if 0 /* filter trigraphs */ if(UNLIKELY(c == '\\')) { maybe_concat_lines(); @@ -138,6 +146,8 @@ static inline void next_char(void) } end_of_next_char: +#endif + (void) maybe_concat_lines; #ifdef DEBUG_CHARS printf("nchar '%c'\n", c); #else @@ -247,6 +257,45 @@ end_symbol: } } +static void parse_integer_suffix(void) +{ + if(c == 'U' || c == 'U') { + /* TODO do something with the suffixes... */ + next_char(); + if(c == 'L' || c == 'l') { + next_char(); + if(c == 'L' || c == 'l') { + next_char(); + } + } + } else if(c == 'l' || c == 'L') { + next_char(); + if(c == 'l' || c == 'L') { + next_char(); + if(c == 'u' || c == 'U') { + next_char(); + } + } else if(c == 'u' || c == 'U') { + next_char(); + } + } +} + +static void parse_floating_suffix(void) +{ + switch(c) { + /* TODO: do something usefull with the suffixes... */ + case 'f': + case 'F': + case 'l': + case 'L': + next_char(); + break; + default: + break; + } +} + static void parse_number_hex(void) { assert(c == 'x' || c == 'X'); @@ -269,71 +318,118 @@ static void parse_number_hex(void) } else if ('a' <= c && c <= 'f') { value = 16 * value + c - 'a' + 10; } else { - lexer_token.type = T_INTEGER; + parse_integer_suffix(); + + lexer_token.type = T_INTEGER; lexer_token.v.intvalue = value; return; } next_char(); } + + if(c == '.' || c == 'p' || c == 'P') { + next_char(); + panic("Hex floating point numbers not implemented yet"); + } } static void parse_number_oct(void) { int value = 0; + while(c >= '0' && c <= '7') { + value = 8 * value + c - '0'; + next_char(); + } + if (c == '8' || c == '9') { + parse_error("invalid octal number"); + lexer_token.type = T_ERROR; + return; + } + + lexer_token.type = T_INTEGER; + lexer_token.v.intvalue = value; + + parse_integer_suffix(); +} + +static void parse_floatingpoint_exponent(long double value) +{ + unsigned int expo = 0; + long double factor = 10.; + + if(c == '-') { + next_char(); + factor = 0.1; + } else if(c == '+') { + next_char(); + } + + while(c >= '0' && c <= '9') { + expo = 10 * expo + (c - '0'); + next_char(); + } + while(1) { - if ('0' <= c && c <= '7') { - value = 8 * value + c - '0'; - } else if (c == '8' || c == '9') { - parse_error("invalid octal number"); - lexer_token.type = T_ERROR; - return; - } else { - lexer_token.type = T_INTEGER; - lexer_token.v.intvalue = value; - return; - } + if(expo & 1) + value *= factor; + expo >>= 1; + if(expo == 0) + break; + factor *= factor; + } + + lexer_token.type = T_FLOATINGPOINT; + lexer_token.v.floatvalue = value; + + parse_floating_suffix(); +} + +static void parse_floatingpoint_fract(int integer_part) +{ + long double value = integer_part; + long double factor = 1.; + + while(c >= '0' && c <= '9') { + factor *= 0.1; + value += (c - '0') * factor; next_char(); } + + if(c == 'e' || c == 'E') { + next_char(); + parse_floatingpoint_exponent(value); + return; + } + + lexer_token.type = T_FLOATINGPOINT; + lexer_token.v.floatvalue = value; + + parse_floating_suffix(); } static void parse_number_dec(void) { int value = 0; - for(;;) { - if (isdigit(c)) { - value = 10 * value + c - '0'; - } else { - lexer_token.type = T_INTEGER; - lexer_token.v.intvalue = value; - return; - } + while(isdigit(c)) { + value = 10 * value + c - '0'; next_char(); } -} -static void parse_integer_suffix(void) -{ - if(c == 'U' || c == 'U') { - /* TODO do something with the suffixes... */ + if(c == '.') { next_char(); - if(c == 'L' || c == 'l') { - next_char(); - if(c == 'L' || c == 'l') { - next_char(); - } - } - } else if(c == 'l' || c == 'L') { + parse_floatingpoint_fract(value); + return; + } + if(c == 'e' || c == 'E') { next_char(); - if(c == 'l' || c == 'L') { - next_char(); - if(c == 'u' || c == 'U') { - next_char(); - } - } else if(c == 'u' || c == 'U') { - next_char(); - } + parse_floatingpoint_exponent(value); + return; } + parse_integer_suffix(); + + lexer_token.type = T_INTEGER; + lexer_token.v.intvalue = value; } static void parse_number(void) @@ -342,14 +438,42 @@ static void parse_number(void) next_char(); switch (c) { case 'X': - case 'x': parse_number_hex(); break; - default: parse_number_oct(); break; + case 'x': + parse_number_hex(); + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + parse_number_oct(); + break; + case '.': + next_char(); + parse_floatingpoint_fract(0); + break; + case 'e': + case 'E': + parse_floatingpoint_exponent(0); + break; + case '8': + case '9': + next_char(); + parse_error("invalid octal number"); + lexer_token.type = T_ERROR; + return; + default: + put_back(c); + c = '0'; + parse_number_dec(); + return; } } else { parse_number_dec(); } - - parse_integer_suffix(); } static int parse_octal_sequence(void) @@ -386,40 +510,40 @@ static int parse_hex_sequence(void) static int parse_escape_sequence(void) { - while(1) { - int ec = c; - next_char(); + eat('\\'); - switch(ec) { - case '"': return '"'; - case '\'': return'\''; - case '\\': return '\\'; - case '?': return '\?'; - case 'a': return '\a'; - case 'b': return '\b'; - case 'f': return '\f'; - case 'n': return '\n'; - case 'r': return '\r'; - case 't': return '\t'; - case 'v': return '\v'; - case 'x': - return parse_hex_sequence(); - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - return parse_octal_sequence(); - case EOF: - parse_error("reached end of file while parsing escape sequence"); - return EOF; - default: - parse_error("unknown escape sequence"); - return EOF; - } + int ec = c; + next_char(); + + switch(ec) { + case '"': return '"'; + case '\'': return'\''; + case '\\': return '\\'; + case '?': return '\?'; + case 'a': return '\a'; + case 'b': return '\b'; + case 'f': return '\f'; + case 'n': return '\n'; + case 'r': return '\r'; + case 't': return '\t'; + case 'v': return '\v'; + case 'x': + return parse_hex_sequence(); + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + return parse_octal_sequence(); + case EOF: + parse_error("reached end of file while parsing escape sequence"); + return EOF; + default: + parse_error("unknown escape sequence"); + return EOF; } } @@ -449,12 +573,12 @@ static void parse_string_literal(void) assert(c == '"'); next_char(); + int tc; while(1) { switch(c) { case '\\': - next_char(); - int ec = parse_escape_sequence(); - obstack_1grow(&symbol_obstack, ec); + tc = parse_escape_sequence(); + obstack_1grow(&symbol_obstack, tc); break; case EOF: @@ -495,14 +619,12 @@ end_of_string: static void parse_character_constant(void) { - assert(c == '\''); - next_char(); + eat('\''); int found_char = 0; while(1) { switch(c) { case '\\': - next_char(); found_char = parse_escape_sequence(); break; @@ -683,7 +805,7 @@ static void parse_preprocessor_identifier(void) } } -static void parse_preprocessor_directive() +static void parse_preprocessor_directive(void) { next_pp_token(); @@ -741,6 +863,12 @@ void lexer_next_preprocessing_token(void) SYMBOL_CHARS parse_symbol(); + /* might be a wide string ( L"string" ) */ + if(c == '"' && (lexer_token.type == T_IDENTIFIER && + lexer_token.v.symbol == symbol_L)) { + parse_string_literal(); + return; + } return; DIGITS @@ -916,10 +1044,14 @@ void init_lexer(void) void lexer_open_stream(FILE *stream, const char *input_name) { input = stream; - lexer_token.source_position.linenr = 1; + lexer_token.source_position.linenr = 0; lexer_token.source_position.input_name = input_name; - next_char(); + symbol_L = symbol_table_insert("L"); + + /* place a virtual \n at the beginning so the lexer knows that we're + * at the beginning of a line */ + c = '\n'; } void exit_lexer(void)