5 #include "symbol_table_t.h"
7 #include "adt/strset.h"
21 static char buf[1024 + MAX_PUTBACK];
22 static const char *bufend;
23 static const char *bufpos;
24 static strset_t stringset;
25 //static FILE **input_stack;
26 //static char **buf_stack;
28 static void error_prefix_at(const char *input_name, unsigned linenr)
30 fprintf(stderr, "%s:%u: Error: ", input_name, linenr);
33 static void error_prefix(void)
35 error_prefix_at(lexer_token.source_position.input_name,
36 lexer_token.source_position.linenr);
39 static void parse_error(const char *msg)
42 fprintf(stderr, "%s\n", msg);
45 static inline void next_real_char(void)
48 if(bufpos >= bufend) {
49 size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
55 bufpos = buf + MAX_PUTBACK;
56 bufend = buf + MAX_PUTBACK + s;
61 static inline void put_back(int pc)
63 char *p = (char*) bufpos - 1;
69 printf("putback '%c'\n", pc);
73 static inline void next_char(void);
75 #define MATCH_NEWLINE(code) \
81 lexer_token.source_position.linenr++; \
85 lexer_token.source_position.linenr++; \
88 static void maybe_concat_lines(void)
92 MATCH_NEWLINE(return;)
102 static inline void next_char(void)
106 /* filter trigraphs */
107 if(UNLIKELY(c == '\\')) {
108 maybe_concat_lines();
109 goto end_of_next_char;
113 goto end_of_next_char;
116 if(LIKELY(c != '?')) {
119 goto end_of_next_char;
124 case '=': c = '#'; break;
125 case '(': c = '['; break;
126 case '/': c = '\\'; maybe_concat_lines(); break;
127 case ')': c = ']'; break;
128 case '\'': c = '^'; break;
129 case '<': c = '{'; break;
130 case '!': c = '|'; break;
131 case '>': c = '}'; break;
132 case '-': c = '~'; break;
142 printf("nchar '%c'\n", c);
148 #define SYMBOL_CHARS \
215 static void parse_symbol(void)
220 obstack_1grow(&symbol_obstack, c);
227 obstack_1grow(&symbol_obstack, c);
237 obstack_1grow(&symbol_obstack, '\0');
239 string = obstack_finish(&symbol_obstack);
240 symbol = symbol_table_insert(string);
242 lexer_token.type = symbol->ID;
243 lexer_token.v.symbol = symbol;
245 if(symbol->string != string) {
246 obstack_free(&symbol_obstack, string);
250 static void parse_integer_suffix(void)
252 if(c == 'U' || c == 'U') {
253 /* TODO do something with the suffixes... */
255 if(c == 'L' || c == 'l') {
257 if(c == 'L' || c == 'l') {
261 } else if(c == 'l' || c == 'L') {
263 if(c == 'l' || c == 'L') {
265 if(c == 'u' || c == 'U') {
268 } else if(c == 'u' || c == 'U') {
274 static void parse_number_hex(void)
276 assert(c == 'x' || c == 'X');
280 !('A' <= c && c <= 'F') &&
281 !('a' <= c && c <= 'f')) {
282 parse_error("premature end of hex number literal");
283 lexer_token.type = T_ERROR;
290 value = 16 * value + c - '0';
291 } else if ('A' <= c && c <= 'F') {
292 value = 16 * value + c - 'A' + 10;
293 } else if ('a' <= c && c <= 'f') {
294 value = 16 * value + c - 'a' + 10;
296 parse_integer_suffix();
298 lexer_token.type = T_INTEGER;
299 lexer_token.v.intvalue = value;
305 if(c == '.' || c == 'p' || c == 'P') {
307 panic("Hex floating point numbers not implemented yet");
311 static void parse_number_oct(void)
314 while(c >= '0' && c <= '7') {
315 value = 8 * value + c - '0';
318 if (c == '8' || c == '9') {
319 parse_error("invalid octal number");
320 lexer_token.type = T_ERROR;
324 lexer_token.type = T_INTEGER;
325 lexer_token.v.intvalue = value;
327 parse_integer_suffix();
330 static void parse_floatingpoint_exponent(long double value)
332 unsigned int expo = 0;
333 long double factor = 10.;
338 } else if(c == '+') {
342 while(c >= '0' && c <= '9') {
343 expo = 10 * expo + (c - '0');
356 lexer_token.type = T_FLOATINGPOINT;
357 lexer_token.v.floatvalue = value;
360 static void parse_floatingpoint_fract(int integer_part)
362 long double value = integer_part;
363 long double factor = 1.;
365 while(c >= '0' && c <= '9') {
367 value += (c - '0') * factor;
371 if(c == 'e' || c == 'E') {
373 parse_floatingpoint_exponent(value);
377 lexer_token.type = T_FLOATINGPOINT;
378 lexer_token.v.floatvalue = value;
381 static void parse_number_dec(void)
386 value = 10 * value + c - '0';
392 parse_floatingpoint_fract(value);
395 if(c == 'e' || c == 'E') {
397 parse_floatingpoint_exponent(value);
400 parse_integer_suffix();
402 lexer_token.type = T_INTEGER;
403 lexer_token.v.intvalue = value;
406 static void parse_number(void)
427 parse_floatingpoint_fract(0);
431 parse_floatingpoint_exponent(0);
436 parse_error("invalid octal number");
437 lexer_token.type = T_ERROR;
450 static int parse_octal_sequence(void)
454 if(c < '0' || c > '7')
456 value = 8 * value + c - '0';
463 static int parse_hex_sequence(void)
467 if (c >= '0' && c <= '9') {
468 value = 16 * value + c - '0';
469 } else if ('A' <= c && c <= 'F') {
470 value = 16 * value + c - 'A' + 10;
471 } else if ('a' <= c && c <= 'f') {
472 value = 16 * value + c - 'a' + 10;
482 static int parse_escape_sequence(void)
489 case '"': return '"';
490 case '\'': return'\'';
491 case '\\': return '\\';
492 case '?': return '\?';
493 case 'a': return '\a';
494 case 'b': return '\b';
495 case 'f': return '\f';
496 case 'n': return '\n';
497 case 'r': return '\r';
498 case 't': return '\t';
499 case 'v': return '\v';
501 return parse_hex_sequence();
510 return parse_octal_sequence();
512 parse_error("reached end of file while parsing escape sequence");
515 parse_error("unknown escape sequence");
521 const char *concat_strings(const char *s1, const char *s2)
523 size_t len1 = strlen(s1);
524 size_t len2 = strlen(s2);
526 char *concat = obstack_alloc(&symbol_obstack, len1 + len2 + 1);
527 memcpy(concat, s1, len1);
528 memcpy(concat + len1, s2, len2 + 1);
530 const char *result = strset_insert(&stringset, concat);
531 if(result != concat) {
532 obstack_free(&symbol_obstack, concat);
538 static void parse_string_literal(void)
540 unsigned start_linenr = lexer_token.source_position.linenr;
551 int ec = parse_escape_sequence();
552 obstack_1grow(&symbol_obstack, ec);
556 error_prefix_at(lexer_token.source_position.input_name,
558 fprintf(stderr, "string has no end\n");
559 lexer_token.type = T_ERROR;
567 obstack_1grow(&symbol_obstack, c);
575 /* TODO: concatenate multiple strings separated by whitespace... */
577 /* add finishing 0 to the string */
578 obstack_1grow(&symbol_obstack, '\0');
579 string = obstack_finish(&symbol_obstack);
581 /* check if there is already a copy of the string */
582 result = strset_insert(&stringset, string);
583 if(result != string) {
584 obstack_free(&symbol_obstack, string);
587 lexer_token.type = T_STRING_LITERAL;
588 lexer_token.v.string = result;
591 static void parse_character_constant(void)
601 found_char = parse_escape_sequence();
605 parse_error("newline while parsing character constant");
611 goto end_of_char_constant;
614 parse_error("EOF while parsing character constant");
615 lexer_token.type = T_ERROR;
619 if(found_char != 0) {
620 parse_error("more than 1 characters in character "
622 goto end_of_char_constant;
631 end_of_char_constant:
632 lexer_token.type = T_INTEGER;
633 lexer_token.v.intvalue = found_char;
636 static void skip_multiline_comment(void)
638 unsigned start_linenr = lexer_token.source_position.linenr;
650 MATCH_NEWLINE(break;)
653 error_prefix_at(lexer_token.source_position.input_name,
655 fprintf(stderr, "at end of file while looking for comment end\n");
665 static void skip_line_comment(void)
683 static token_t pp_token;
685 static inline void next_pp_token(void)
687 lexer_next_preprocessing_token();
688 pp_token = lexer_token;
691 static void eat_until_newline(void)
693 while(pp_token.type != '\n' && pp_token.type != T_EOF) {
698 static void error_directive(void)
701 fprintf(stderr, "#error directive: \n");
703 /* parse pp-tokens until new-line */
706 static void define_directive(void)
708 lexer_next_preprocessing_token();
709 if(lexer_token.type != T_IDENTIFIER) {
710 parse_error("expected identifier after #define\n");
715 static void ifdef_directive(int is_ifndef)
718 lexer_next_preprocessing_token();
719 //expect_identifier();
723 static void endif_directive(void)
728 static void parse_line_directive(void)
730 if(pp_token.type != T_INTEGER) {
731 parse_error("expected integer");
733 lexer_token.source_position.linenr = pp_token.v.intvalue - 1;
736 if(pp_token.type == T_STRING_LITERAL) {
737 lexer_token.source_position.input_name = pp_token.v.string;
744 static void parse_preprocessor_identifier(void)
746 assert(pp_token.type == T_IDENTIFIER);
747 symbol_t *symbol = pp_token.v.symbol;
749 switch(symbol->pp_ID) {
751 printf("include - enable header name parsing!\n");
767 parse_line_directive();
781 static void parse_preprocessor_directive()
785 switch(pp_token.type) {
787 parse_preprocessor_identifier();
790 parse_line_directive();
793 parse_error("invalid preprocessor directive");
799 #define MAYBE_PROLOG \
804 #define MAYBE(ch, set_type) \
807 lexer_token.type = set_type; \
810 #define ELSE_CODE(code) \
814 } /* end of while(1) */ \
817 #define ELSE(set_type) \
819 lexer_token.type = set_type; \
823 void lexer_next_preprocessing_token(void)
833 lexer_token.type = '\n';
846 parse_string_literal();
850 parse_character_constant();
857 MAYBE('.', T_DOTDOTDOT)
861 lexer_token.type = '.';
868 MAYBE('=', T_ANDEQUAL)
872 MAYBE('=', T_ASTERISKEQUAL)
876 MAYBE('+', T_PLUSPLUS)
877 MAYBE('=', T_PLUSEQUAL)
881 MAYBE('>', T_MINUSGREATER)
882 MAYBE('-', T_MINUSMINUS)
883 MAYBE('=', T_MINUSEQUAL)
887 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
891 MAYBE('=', T_SLASHEQUAL)
894 skip_multiline_comment();
895 lexer_next_preprocessing_token();
900 lexer_next_preprocessing_token();
905 MAYBE('>', T_PERCENTGREATER)
906 MAYBE('=', T_PERCENTEQUAL)
911 MAYBE(':', T_PERCENTCOLONPERCENTCOLON)
915 lexer_token.type = T_PERCENTCOLON;
922 MAYBE(':', T_LESSCOLON)
923 MAYBE('%', T_LESSPERCENT)
924 MAYBE('=', T_LESSEQUAL)
927 MAYBE('=', T_LESSLESSEQUAL)
932 MAYBE('=', T_GREATEREQUAL)
935 MAYBE('=', T_GREATERGREATEREQUAL)
936 ELSE(T_GREATERGREATER)
940 MAYBE('=', T_CARETEQUAL)
944 MAYBE('=', T_PIPEEQUAL)
945 MAYBE('|', T_PIPEPIPE)
949 MAYBE('>', T_COLONGREATER)
953 MAYBE('=', T_EQUALEQUAL)
957 MAYBE('#', T_HASHHASH)
971 lexer_token.type = c;
976 lexer_token.type = T_EOF;
982 fprintf(stderr, "unknown character '%c' found\n", c);
983 lexer_token.type = T_ERROR;
989 void lexer_next_token(void)
991 lexer_next_preprocessing_token();
992 if(lexer_token.type != '\n')
997 lexer_next_preprocessing_token();
998 } while(lexer_token.type == '\n');
1000 if(lexer_token.type == '#') {
1001 parse_preprocessor_directive();
1006 void init_lexer(void)
1008 strset_init(&stringset);
1011 void lexer_open_stream(FILE *stream, const char *input_name)
1014 lexer_token.source_position.linenr = 1;
1015 lexer_token.source_position.input_name = input_name;
1020 void exit_lexer(void)
1022 strset_destroy(&stringset);
1025 static __attribute__((unused))
1026 void dbg_pos(const source_position_t source_position)
1028 fprintf(stdout, "%s:%d\n", source_position.input_name,
1029 source_position.linenr);