5 #include "symbol_table_t.h"
7 #include "adt/strset.h"
20 static char buf[1024 + MAX_PUTBACK];
21 static const char *bufend;
22 static const char *bufpos;
23 static strset_t stringset;
24 //static FILE **input_stack;
25 //static char **buf_stack;
28 void error_prefix_at(const char *input_name, unsigned linenr)
30 fprintf(stderr, "%s:%d: Error: ", input_name, linenr);
34 void error_prefix(void)
36 error_prefix_at(lexer_token.source_position.input_name,
37 lexer_token.source_position.linenr);
41 void parse_error(const char *msg)
44 fprintf(stderr, "%s\n", msg);
51 if(bufpos >= bufend) {
52 size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
58 bufpos = buf + MAX_PUTBACK;
59 bufend = buf + MAX_PUTBACK + s;
63 printf("nchar '%c'\n", c);
70 char *p = (char*) bufpos - 1;
76 printf("putback '%c'\n", pc);
82 int replace_trigraph(void)
84 #define MATCH_TRIGRAPH(ch,replacement) \
90 MATCH_TRIGRAPH('=', '#')
91 MATCH_TRIGRAPH('(', '[')
92 MATCH_TRIGRAPH('/', '\\')
93 MATCH_TRIGRAPH(')', ']')
94 MATCH_TRIGRAPH('\'', '^')
95 MATCH_TRIGRAPH('<', '{')
96 MATCH_TRIGRAPH('!', '|')
97 MATCH_TRIGRAPH('>', '}')
98 MATCH_TRIGRAPH('-', '~')
106 #define SKIP_TRIGRAPHS(custom_putback, no_trigraph_code) \
116 if(replace_trigraph()) { \
125 #define EAT_NEWLINE(newline_code) \
130 lexer_token.source_position.linenr++; \
132 } else if(c == '\n') { \
134 lexer_token.source_position.linenr++; \
138 #define SYMBOL_CHARS \
206 void parse_symbol(void)
211 obstack_1grow(&symbol_obstack, c);
223 obstack_1grow(&symbol_obstack, c);
235 if(replace_trigraph())
247 obstack_1grow(&symbol_obstack, '\0');
249 string = obstack_finish(&symbol_obstack);
250 symbol = symbol_table_insert(string);
252 lexer_token.type = symbol->ID;
253 lexer_token.v.symbol = symbol;
255 if(symbol->string != string) {
256 obstack_free(&symbol_obstack, string);
261 void parse_number_hex(void)
263 assert(c == 'x' || c == 'X');
267 !('A' <= c && c <= 'F') &&
268 !('a' <= c && c <= 'f')) {
269 parse_error("premature end of hex number literal");
270 lexer_token.type = T_ERROR;
277 value = 16 * value + c - '0';
278 } else if ('A' <= c && c <= 'F') {
279 value = 16 * value + c - 'A' + 10;
280 } else if ('a' <= c && c <= 'f') {
281 value = 16 * value + c - 'a' + 10;
283 lexer_token.type = T_INTEGER;
284 lexer_token.v.intvalue = value;
292 void parse_number_oct(void)
294 assert(c == 'o' || c == 'O');
299 if ('0' <= c && c <= '7') {
300 value = 8 * value + c - '0';
302 lexer_token.type = T_INTEGER;
303 lexer_token.v.intvalue = value;
311 void parse_number_dec(int first_char)
315 assert(first_char >= '0' && first_char <= '9');
316 value = first_char - '0';
321 value = 10 * value + c - '0';
323 lexer_token.type = T_INTEGER;
324 lexer_token.v.intvalue = value;
332 void parse_number(void)
334 // TODO check for overflow
335 // TODO check for various invalid inputs sequences
341 case 'x': parse_number_hex(); break;
343 case 'O': parse_number_oct(); break;
344 default: parse_number_dec('0');
352 int parse_escape_sequence(void)
359 case '"': return '"';
360 case '\'': return'\'';
364 case 'a': return '\a';
365 case 'b': return '\b';
366 case 'f': return '\f';
367 case 'n': return '\n';
368 case 'r': return '\r';
369 case 't': return '\t';
370 case 'v': return '\v';
371 case 'x': /* TODO parse hex number ... */
372 parse_error("hex escape sequences not implemented yet");
382 /* TODO parse octal number ... */
383 parse_error("octal escape sequences not implemented yet");
389 /* might be a trigraph */
391 if(replace_trigraph()) {
399 parse_error("reached end of file while parsing escape sequence");
402 parse_error("unknown escape sequence");
409 void parse_string_literal(void)
411 unsigned start_linenr = lexer_token.source_position.linenr;
421 obstack_1grow(&symbol_obstack, '?');
429 int ec = parse_escape_sequence();
430 obstack_1grow(&symbol_obstack, ec);
434 error_prefix_at(lexer_token.source_position.input_name,
436 fprintf(stderr, "string has no end\n");
437 lexer_token.type = T_ERROR;
445 obstack_1grow(&symbol_obstack, c);
453 /* TODO: concatenate multiple strings separated by whitespace... */
455 /* add finishing 0 to the string */
456 obstack_1grow(&symbol_obstack, '\0');
457 string = obstack_finish(&symbol_obstack);
459 /* check if there is already a copy of the string */
460 result = strset_insert(&stringset, string);
461 if(result != string) {
462 obstack_free(&symbol_obstack, string);
465 lexer_token.type = T_STRING_LITERAL;
466 lexer_token.v.string = result;
469 #define MATCH_NEWLINE(code) \
475 lexer_token.source_position.linenr++; \
479 lexer_token.source_position.linenr++; \
483 void parse_character_constant(void)
503 parse_error("newline while parsing character constant");
509 goto end_of_char_constant;
512 parse_error("EOF while parsing character constant");
513 lexer_token.type = T_ERROR;
517 if(found_char != 0) {
518 parse_error("more than 1 characters in character "
520 goto end_of_char_constant;
529 end_of_char_constant:
530 lexer_token.type = T_INTEGER;
531 lexer_token.v.intvalue = found_char;
535 void skip_multiline_comment(void)
537 unsigned start_linenr = lexer_token.source_position.linenr;
568 if(replace_trigraph())
573 /* we don't put back the 2nd ? as the comment text is discarded
577 MATCH_NEWLINE(had_star = 0; break;)
580 error_prefix_at(lexer_token.source_position.input_name,
582 fprintf(stderr, "at end of file while looking for comment end\n");
593 void skip_line_comment(void)
602 if(replace_trigraph())
605 /* we don't put back the 2nd ? as the comment text is discarded
613 lexer_token.source_position.linenr++;
629 static token_t pp_token;
632 void next_pp_token(void)
634 lexer_next_preprocessing_token();
635 pp_token = lexer_token;
639 void eat_until_newline(void)
641 while(pp_token.type != '\n' && pp_token.type != T_EOF) {
647 void error_directive(void)
650 fprintf(stderr, "#error directive: \n");
652 /* parse pp-tokens until new-line */
656 void define_directive(void)
658 lexer_next_preprocessing_token();
659 if(lexer_token.type != T_IDENTIFIER) {
660 parse_error("expected identifier after #define\n");
666 void ifdef_directive(int is_ifndef)
669 lexer_next_preprocessing_token();
670 //expect_identifier();
675 void endif_directive(void)
681 void parse_line_directive(void)
683 if(pp_token.type != T_INTEGER) {
684 parse_error("expected integer");
686 lexer_token.source_position.linenr = pp_token.v.intvalue - 1;
689 if(pp_token.type == T_STRING_LITERAL) {
690 lexer_token.source_position.input_name = pp_token.v.string;
698 void parse_preprocessor_identifier(void)
700 assert(pp_token.type == T_IDENTIFIER);
701 symbol_t *symbol = pp_token.v.symbol;
703 switch(symbol->pp_ID) {
705 printf("include - enable header name parsing!\n");
721 parse_line_directive();
736 void parse_preprocessor_directive()
740 switch(pp_token.type) {
742 parse_preprocessor_identifier();
745 parse_line_directive();
748 parse_error("invalid preprocessor directive");
754 #define MAYBE_PROLOG \
759 #define MAYBE(ch, set_type) \
762 lexer_token.type = set_type; \
765 #define ELSE_CODE(code) \
772 EAT_NEWLINE(break;) \
777 } /* end of while(1) */ \
780 #define ELSE(set_type) \
782 lexer_token.type = set_type; \
786 void lexer_next_preprocessing_token(void)
796 lexer_token.type = '\n';
809 parse_string_literal();
813 parse_character_constant();
820 lexer_token.source_position.linenr++;
823 parse_error("unexpected '\\' found");
824 lexer_token.type = T_ERROR;
832 MAYBE('.', T_DOTDOTDOT)
836 lexer_token.type = '.';
843 MAYBE('=', T_ANDEQUAL)
847 MAYBE('=', T_ASTERISKEQUAL)
851 MAYBE('+', T_PLUSPLUS)
852 MAYBE('=', T_PLUSEQUAL)
856 MAYBE('-', T_MINUSMINUS)
857 MAYBE('=', T_MINUSEQUAL)
861 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
865 MAYBE('=', T_SLASHEQUAL)
868 skip_multiline_comment();
869 lexer_next_preprocessing_token();
874 lexer_next_preprocessing_token();
879 MAYBE('>', T_PERCENTGREATER)
880 MAYBE('=', T_PERCENTEQUAL)
885 MAYBE(':', T_PERCENTCOLONPERCENTCOLON)
889 lexer_token.type = T_PERCENTCOLON;
896 MAYBE(':', T_LESSCOLON)
897 MAYBE('%', T_LESSPERCENT)
900 MAYBE('=', T_LESSLESSEQUAL)
907 MAYBE('=', T_GREATERGREATEREQUAL)
908 ELSE(T_GREATERGREATER)
912 MAYBE('=', T_CARETEQUAL)
916 MAYBE('=', T_PIPEEQUAL)
917 MAYBE('|', T_PIPEPIPE)
921 MAYBE('>', T_COLONGREATER)
925 MAYBE('=', T_EQUALEQUAL)
929 MAYBE('#', T_HASHHASH)
934 /* just a simple ? */
936 lexer_token.type = '?';
939 /* might be a trigraph */
941 if(replace_trigraph()) {
946 lexer_token.type = '?';
958 lexer_token.type = c;
963 lexer_token.type = T_EOF;
969 fprintf(stderr, "unknown character '%c' found\n", c);
970 lexer_token.type = T_ERROR;
976 void lexer_next_token(void)
978 lexer_next_preprocessing_token();
979 if(lexer_token.type != '\n')
984 lexer_next_preprocessing_token();
985 } while(lexer_token.type == '\n');
987 if(lexer_token.type == '#') {
988 parse_preprocessor_directive();
993 void init_lexer(void)
995 strset_init(&stringset);
998 void lexer_open_stream(FILE *stream, const char *input_name)
1001 lexer_token.source_position.linenr = 0;
1002 lexer_token.source_position.input_name = input_name;
1004 /* we place a virtual '\n' at the beginning so the lexer knows we're at the
1005 * beginning of a line */
1009 void exit_lexer(void)
1011 strset_destroy(&stringset);
1014 static __attribute__((unused))
1015 void dbg_pos(const source_position_t source_position)
1017 fprintf(stdout, "%s:%d\n", source_position.input_name,
1018 source_position.linenr);