5 #include "symbol_table_t.h"
7 #include "adt/strset.h"
21 static char buf[1024 + MAX_PUTBACK];
22 static const char *bufend;
23 static const char *bufpos;
24 static strset_t stringset;
25 //static FILE **input_stack;
26 //static char **buf_stack;
28 static void error_prefix_at(const char *input_name, unsigned linenr)
30 fprintf(stderr, "%s:%u: Error: ", input_name, linenr);
33 static void error_prefix(void)
35 error_prefix_at(lexer_token.source_position.input_name,
36 lexer_token.source_position.linenr);
39 static void parse_error(const char *msg)
42 fprintf(stderr, "%s\n", msg);
45 static inline void next_real_char(void)
48 if(bufpos >= bufend) {
49 size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
55 bufpos = buf + MAX_PUTBACK;
56 bufend = buf + MAX_PUTBACK + s;
61 static inline void put_back(int pc)
63 char *p = (char*) bufpos - 1;
69 printf("putback '%c'\n", pc);
73 static inline void next_char(void);
75 #define MATCH_NEWLINE(code) \
81 lexer_token.source_position.linenr++; \
85 lexer_token.source_position.linenr++; \
88 static inline void eat(char c_type)
94 static void maybe_concat_lines(void)
99 MATCH_NEWLINE(return;)
109 static inline void next_char(void)
113 /* filter trigraphs */
114 if(UNLIKELY(c == '\\')) {
115 maybe_concat_lines();
116 goto end_of_next_char;
120 goto end_of_next_char;
123 if(LIKELY(c != '?')) {
126 goto end_of_next_char;
131 case '=': c = '#'; break;
132 case '(': c = '['; break;
133 case '/': c = '\\'; maybe_concat_lines(); break;
134 case ')': c = ']'; break;
135 case '\'': c = '^'; break;
136 case '<': c = '{'; break;
137 case '!': c = '|'; break;
138 case '>': c = '}'; break;
139 case '-': c = '~'; break;
149 printf("nchar '%c'\n", c);
155 #define SYMBOL_CHARS \
222 static void parse_symbol(void)
227 obstack_1grow(&symbol_obstack, c);
234 obstack_1grow(&symbol_obstack, c);
244 obstack_1grow(&symbol_obstack, '\0');
246 string = obstack_finish(&symbol_obstack);
247 symbol = symbol_table_insert(string);
249 lexer_token.type = symbol->ID;
250 lexer_token.v.symbol = symbol;
252 if(symbol->string != string) {
253 obstack_free(&symbol_obstack, string);
257 static void parse_integer_suffix(void)
259 if(c == 'U' || c == 'U') {
260 /* TODO do something with the suffixes... */
262 if(c == 'L' || c == 'l') {
264 if(c == 'L' || c == 'l') {
268 } else if(c == 'l' || c == 'L') {
270 if(c == 'l' || c == 'L') {
272 if(c == 'u' || c == 'U') {
275 } else if(c == 'u' || c == 'U') {
281 static void parse_number_hex(void)
283 assert(c == 'x' || c == 'X');
287 !('A' <= c && c <= 'F') &&
288 !('a' <= c && c <= 'f')) {
289 parse_error("premature end of hex number literal");
290 lexer_token.type = T_ERROR;
297 value = 16 * value + c - '0';
298 } else if ('A' <= c && c <= 'F') {
299 value = 16 * value + c - 'A' + 10;
300 } else if ('a' <= c && c <= 'f') {
301 value = 16 * value + c - 'a' + 10;
303 parse_integer_suffix();
305 lexer_token.type = T_INTEGER;
306 lexer_token.v.intvalue = value;
312 if(c == '.' || c == 'p' || c == 'P') {
314 panic("Hex floating point numbers not implemented yet");
318 static void parse_number_oct(void)
321 while(c >= '0' && c <= '7') {
322 value = 8 * value + c - '0';
325 if (c == '8' || c == '9') {
326 parse_error("invalid octal number");
327 lexer_token.type = T_ERROR;
331 lexer_token.type = T_INTEGER;
332 lexer_token.v.intvalue = value;
334 parse_integer_suffix();
337 static void parse_floatingpoint_exponent(long double value)
339 unsigned int expo = 0;
340 long double factor = 10.;
345 } else if(c == '+') {
349 while(c >= '0' && c <= '9') {
350 expo = 10 * expo + (c - '0');
363 lexer_token.type = T_FLOATINGPOINT;
364 lexer_token.v.floatvalue = value;
367 static void parse_floatingpoint_fract(int integer_part)
369 long double value = integer_part;
370 long double factor = 1.;
372 while(c >= '0' && c <= '9') {
374 value += (c - '0') * factor;
378 if(c == 'e' || c == 'E') {
380 parse_floatingpoint_exponent(value);
384 lexer_token.type = T_FLOATINGPOINT;
385 lexer_token.v.floatvalue = value;
388 static void parse_number_dec(void)
393 value = 10 * value + c - '0';
399 parse_floatingpoint_fract(value);
402 if(c == 'e' || c == 'E') {
404 parse_floatingpoint_exponent(value);
407 parse_integer_suffix();
409 lexer_token.type = T_INTEGER;
410 lexer_token.v.intvalue = value;
413 static void parse_number(void)
434 parse_floatingpoint_fract(0);
438 parse_floatingpoint_exponent(0);
443 parse_error("invalid octal number");
444 lexer_token.type = T_ERROR;
457 static int parse_octal_sequence(void)
461 if(c < '0' || c > '7')
463 value = 8 * value + c - '0';
470 static int parse_hex_sequence(void)
474 if (c >= '0' && c <= '9') {
475 value = 16 * value + c - '0';
476 } else if ('A' <= c && c <= 'F') {
477 value = 16 * value + c - 'A' + 10;
478 } else if ('a' <= c && c <= 'f') {
479 value = 16 * value + c - 'a' + 10;
489 static int parse_escape_sequence(void)
497 case '"': return '"';
498 case '\'': return'\'';
499 case '\\': return '\\';
500 case '?': return '\?';
501 case 'a': return '\a';
502 case 'b': return '\b';
503 case 'f': return '\f';
504 case 'n': return '\n';
505 case 'r': return '\r';
506 case 't': return '\t';
507 case 'v': return '\v';
509 return parse_hex_sequence();
518 return parse_octal_sequence();
520 parse_error("reached end of file while parsing escape sequence");
523 parse_error("unknown escape sequence");
528 const char *concat_strings(const char *s1, const char *s2)
530 size_t len1 = strlen(s1);
531 size_t len2 = strlen(s2);
533 char *concat = obstack_alloc(&symbol_obstack, len1 + len2 + 1);
534 memcpy(concat, s1, len1);
535 memcpy(concat + len1, s2, len2 + 1);
537 const char *result = strset_insert(&stringset, concat);
538 if(result != concat) {
539 obstack_free(&symbol_obstack, concat);
545 static void parse_string_literal(void)
547 unsigned start_linenr = lexer_token.source_position.linenr;
558 tc = parse_escape_sequence();
559 obstack_1grow(&symbol_obstack, tc);
563 error_prefix_at(lexer_token.source_position.input_name,
565 fprintf(stderr, "string has no end\n");
566 lexer_token.type = T_ERROR;
574 obstack_1grow(&symbol_obstack, c);
582 /* TODO: concatenate multiple strings separated by whitespace... */
584 /* add finishing 0 to the string */
585 obstack_1grow(&symbol_obstack, '\0');
586 string = obstack_finish(&symbol_obstack);
588 /* check if there is already a copy of the string */
589 result = strset_insert(&stringset, string);
590 if(result != string) {
591 obstack_free(&symbol_obstack, string);
594 lexer_token.type = T_STRING_LITERAL;
595 lexer_token.v.string = result;
598 static void parse_character_constant(void)
606 found_char = parse_escape_sequence();
610 parse_error("newline while parsing character constant");
616 goto end_of_char_constant;
619 parse_error("EOF while parsing character constant");
620 lexer_token.type = T_ERROR;
624 if(found_char != 0) {
625 parse_error("more than 1 characters in character "
627 goto end_of_char_constant;
636 end_of_char_constant:
637 lexer_token.type = T_INTEGER;
638 lexer_token.v.intvalue = found_char;
641 static void skip_multiline_comment(void)
643 unsigned start_linenr = lexer_token.source_position.linenr;
655 MATCH_NEWLINE(break;)
658 error_prefix_at(lexer_token.source_position.input_name,
660 fprintf(stderr, "at end of file while looking for comment end\n");
670 static void skip_line_comment(void)
688 static token_t pp_token;
690 static inline void next_pp_token(void)
692 lexer_next_preprocessing_token();
693 pp_token = lexer_token;
696 static void eat_until_newline(void)
698 while(pp_token.type != '\n' && pp_token.type != T_EOF) {
703 static void error_directive(void)
706 fprintf(stderr, "#error directive: \n");
708 /* parse pp-tokens until new-line */
711 static void define_directive(void)
713 lexer_next_preprocessing_token();
714 if(lexer_token.type != T_IDENTIFIER) {
715 parse_error("expected identifier after #define\n");
720 static void ifdef_directive(int is_ifndef)
723 lexer_next_preprocessing_token();
724 //expect_identifier();
728 static void endif_directive(void)
733 static void parse_line_directive(void)
735 if(pp_token.type != T_INTEGER) {
736 parse_error("expected integer");
738 lexer_token.source_position.linenr = pp_token.v.intvalue - 1;
741 if(pp_token.type == T_STRING_LITERAL) {
742 lexer_token.source_position.input_name = pp_token.v.string;
749 static void parse_preprocessor_identifier(void)
751 assert(pp_token.type == T_IDENTIFIER);
752 symbol_t *symbol = pp_token.v.symbol;
754 switch(symbol->pp_ID) {
756 printf("include - enable header name parsing!\n");
772 parse_line_directive();
786 static void parse_preprocessor_directive()
790 switch(pp_token.type) {
792 parse_preprocessor_identifier();
795 parse_line_directive();
798 parse_error("invalid preprocessor directive");
804 #define MAYBE_PROLOG \
809 #define MAYBE(ch, set_type) \
812 lexer_token.type = set_type; \
815 #define ELSE_CODE(code) \
819 } /* end of while(1) */ \
822 #define ELSE(set_type) \
824 lexer_token.type = set_type; \
828 void lexer_next_preprocessing_token(void)
838 lexer_token.type = '\n';
851 parse_string_literal();
855 parse_character_constant();
862 MAYBE('.', T_DOTDOTDOT)
866 lexer_token.type = '.';
873 MAYBE('=', T_ANDEQUAL)
877 MAYBE('=', T_ASTERISKEQUAL)
881 MAYBE('+', T_PLUSPLUS)
882 MAYBE('=', T_PLUSEQUAL)
886 MAYBE('>', T_MINUSGREATER)
887 MAYBE('-', T_MINUSMINUS)
888 MAYBE('=', T_MINUSEQUAL)
892 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
896 MAYBE('=', T_SLASHEQUAL)
899 skip_multiline_comment();
900 lexer_next_preprocessing_token();
905 lexer_next_preprocessing_token();
910 MAYBE('>', T_PERCENTGREATER)
911 MAYBE('=', T_PERCENTEQUAL)
916 MAYBE(':', T_PERCENTCOLONPERCENTCOLON)
920 lexer_token.type = T_PERCENTCOLON;
927 MAYBE(':', T_LESSCOLON)
928 MAYBE('%', T_LESSPERCENT)
929 MAYBE('=', T_LESSEQUAL)
932 MAYBE('=', T_LESSLESSEQUAL)
937 MAYBE('=', T_GREATEREQUAL)
940 MAYBE('=', T_GREATERGREATEREQUAL)
941 ELSE(T_GREATERGREATER)
945 MAYBE('=', T_CARETEQUAL)
949 MAYBE('=', T_PIPEEQUAL)
950 MAYBE('|', T_PIPEPIPE)
954 MAYBE('>', T_COLONGREATER)
958 MAYBE('=', T_EQUALEQUAL)
962 MAYBE('#', T_HASHHASH)
976 lexer_token.type = c;
981 lexer_token.type = T_EOF;
987 fprintf(stderr, "unknown character '%c' found\n", c);
988 lexer_token.type = T_ERROR;
994 void lexer_next_token(void)
996 lexer_next_preprocessing_token();
997 if(lexer_token.type != '\n')
1002 lexer_next_preprocessing_token();
1003 } while(lexer_token.type == '\n');
1005 if(lexer_token.type == '#') {
1006 parse_preprocessor_directive();
1011 void init_lexer(void)
1013 strset_init(&stringset);
1016 void lexer_open_stream(FILE *stream, const char *input_name)
1019 lexer_token.source_position.linenr = 1;
1020 lexer_token.source_position.input_name = input_name;
1025 void exit_lexer(void)
1027 strset_destroy(&stringset);
1030 static __attribute__((unused))
1031 void dbg_pos(const source_position_t source_position)
1033 fprintf(stdout, "%s:%d\n", source_position.input_name,
1034 source_position.linenr);