5 #include "symbol_table_t.h"
7 #include "adt/strset.h"
21 static char buf[1024 + MAX_PUTBACK];
22 static const char *bufend;
23 static const char *bufpos;
24 static strset_t stringset;
26 static void error_prefix_at(const char *input_name, unsigned linenr)
28 fprintf(stderr, "%s:%u: Error: ", input_name, linenr);
31 static void error_prefix(void)
33 error_prefix_at(lexer_token.source_position.input_name,
34 lexer_token.source_position.linenr);
37 static void parse_error(const char *msg)
40 fprintf(stderr, "%s\n", msg);
43 static inline void next_real_char(void)
46 if(bufpos >= bufend) {
47 size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
53 bufpos = buf + MAX_PUTBACK;
54 bufend = buf + MAX_PUTBACK + s;
59 static inline void put_back(int pc)
61 char *p = (char*) bufpos - 1;
67 printf("putback '%c'\n", pc);
71 static inline void next_char(void);
73 #define MATCH_NEWLINE(code) \
79 lexer_token.source_position.linenr++; \
83 lexer_token.source_position.linenr++; \
86 static inline void eat(char c_type)
92 static void maybe_concat_lines(void)
97 MATCH_NEWLINE(return;)
107 static inline void next_char(void)
111 /* filter trigraphs */
112 if(UNLIKELY(c == '\\')) {
113 maybe_concat_lines();
114 goto end_of_next_char;
118 goto end_of_next_char;
121 if(LIKELY(c != '?')) {
124 goto end_of_next_char;
129 case '=': c = '#'; break;
130 case '(': c = '['; break;
131 case '/': c = '\\'; maybe_concat_lines(); break;
132 case ')': c = ']'; break;
133 case '\'': c = '^'; break;
134 case '<': c = '{'; break;
135 case '!': c = '|'; break;
136 case '>': c = '}'; break;
137 case '-': c = '~'; break;
147 printf("nchar '%c'\n", c);
153 #define SYMBOL_CHARS \
220 static void parse_symbol(void)
225 obstack_1grow(&symbol_obstack, c);
232 obstack_1grow(&symbol_obstack, c);
242 obstack_1grow(&symbol_obstack, '\0');
244 string = obstack_finish(&symbol_obstack);
245 symbol = symbol_table_insert(string);
247 lexer_token.type = symbol->ID;
248 lexer_token.v.symbol = symbol;
250 if(symbol->string != string) {
251 obstack_free(&symbol_obstack, string);
255 static void parse_integer_suffix(void)
257 if(c == 'U' || c == 'U') {
258 /* TODO do something with the suffixes... */
260 if(c == 'L' || c == 'l') {
262 if(c == 'L' || c == 'l') {
266 } else if(c == 'l' || c == 'L') {
268 if(c == 'l' || c == 'L') {
270 if(c == 'u' || c == 'U') {
273 } else if(c == 'u' || c == 'U') {
279 static void parse_number_hex(void)
281 assert(c == 'x' || c == 'X');
285 !('A' <= c && c <= 'F') &&
286 !('a' <= c && c <= 'f')) {
287 parse_error("premature end of hex number literal");
288 lexer_token.type = T_ERROR;
295 value = 16 * value + c - '0';
296 } else if ('A' <= c && c <= 'F') {
297 value = 16 * value + c - 'A' + 10;
298 } else if ('a' <= c && c <= 'f') {
299 value = 16 * value + c - 'a' + 10;
301 parse_integer_suffix();
303 lexer_token.type = T_INTEGER;
304 lexer_token.v.intvalue = value;
310 if(c == '.' || c == 'p' || c == 'P') {
312 panic("Hex floating point numbers not implemented yet");
316 static void parse_number_oct(void)
319 while(c >= '0' && c <= '7') {
320 value = 8 * value + c - '0';
323 if (c == '8' || c == '9') {
324 parse_error("invalid octal number");
325 lexer_token.type = T_ERROR;
329 lexer_token.type = T_INTEGER;
330 lexer_token.v.intvalue = value;
332 parse_integer_suffix();
335 static void parse_floatingpoint_exponent(long double value)
337 unsigned int expo = 0;
338 long double factor = 10.;
343 } else if(c == '+') {
347 while(c >= '0' && c <= '9') {
348 expo = 10 * expo + (c - '0');
361 lexer_token.type = T_FLOATINGPOINT;
362 lexer_token.v.floatvalue = value;
365 static void parse_floatingpoint_fract(int integer_part)
367 long double value = integer_part;
368 long double factor = 1.;
370 while(c >= '0' && c <= '9') {
372 value += (c - '0') * factor;
376 if(c == 'e' || c == 'E') {
378 parse_floatingpoint_exponent(value);
382 lexer_token.type = T_FLOATINGPOINT;
383 lexer_token.v.floatvalue = value;
386 static void parse_number_dec(void)
391 value = 10 * value + c - '0';
397 parse_floatingpoint_fract(value);
400 if(c == 'e' || c == 'E') {
402 parse_floatingpoint_exponent(value);
405 parse_integer_suffix();
407 lexer_token.type = T_INTEGER;
408 lexer_token.v.intvalue = value;
411 static void parse_number(void)
432 parse_floatingpoint_fract(0);
436 parse_floatingpoint_exponent(0);
441 parse_error("invalid octal number");
442 lexer_token.type = T_ERROR;
455 static int parse_octal_sequence(void)
459 if(c < '0' || c > '7')
461 value = 8 * value + c - '0';
468 static int parse_hex_sequence(void)
472 if (c >= '0' && c <= '9') {
473 value = 16 * value + c - '0';
474 } else if ('A' <= c && c <= 'F') {
475 value = 16 * value + c - 'A' + 10;
476 } else if ('a' <= c && c <= 'f') {
477 value = 16 * value + c - 'a' + 10;
487 static int parse_escape_sequence(void)
495 case '"': return '"';
496 case '\'': return'\'';
497 case '\\': return '\\';
498 case '?': return '\?';
499 case 'a': return '\a';
500 case 'b': return '\b';
501 case 'f': return '\f';
502 case 'n': return '\n';
503 case 'r': return '\r';
504 case 't': return '\t';
505 case 'v': return '\v';
507 return parse_hex_sequence();
516 return parse_octal_sequence();
518 parse_error("reached end of file while parsing escape sequence");
521 parse_error("unknown escape sequence");
526 const char *concat_strings(const char *s1, const char *s2)
528 size_t len1 = strlen(s1);
529 size_t len2 = strlen(s2);
531 char *concat = obstack_alloc(&symbol_obstack, len1 + len2 + 1);
532 memcpy(concat, s1, len1);
533 memcpy(concat + len1, s2, len2 + 1);
535 const char *result = strset_insert(&stringset, concat);
536 if(result != concat) {
537 obstack_free(&symbol_obstack, concat);
543 static void parse_string_literal(void)
545 unsigned start_linenr = lexer_token.source_position.linenr;
556 tc = parse_escape_sequence();
557 obstack_1grow(&symbol_obstack, tc);
561 error_prefix_at(lexer_token.source_position.input_name,
563 fprintf(stderr, "string has no end\n");
564 lexer_token.type = T_ERROR;
572 obstack_1grow(&symbol_obstack, c);
580 /* TODO: concatenate multiple strings separated by whitespace... */
582 /* add finishing 0 to the string */
583 obstack_1grow(&symbol_obstack, '\0');
584 string = obstack_finish(&symbol_obstack);
586 /* check if there is already a copy of the string */
587 result = strset_insert(&stringset, string);
588 if(result != string) {
589 obstack_free(&symbol_obstack, string);
592 lexer_token.type = T_STRING_LITERAL;
593 lexer_token.v.string = result;
596 static void parse_character_constant(void)
604 found_char = parse_escape_sequence();
608 parse_error("newline while parsing character constant");
614 goto end_of_char_constant;
617 parse_error("EOF while parsing character constant");
618 lexer_token.type = T_ERROR;
622 if(found_char != 0) {
623 parse_error("more than 1 characters in character "
625 goto end_of_char_constant;
634 end_of_char_constant:
635 lexer_token.type = T_INTEGER;
636 lexer_token.v.intvalue = found_char;
639 static void skip_multiline_comment(void)
641 unsigned start_linenr = lexer_token.source_position.linenr;
653 MATCH_NEWLINE(break;)
656 error_prefix_at(lexer_token.source_position.input_name,
658 fprintf(stderr, "at end of file while looking for comment end\n");
668 static void skip_line_comment(void)
686 static token_t pp_token;
688 static inline void next_pp_token(void)
690 lexer_next_preprocessing_token();
691 pp_token = lexer_token;
694 static void eat_until_newline(void)
696 while(pp_token.type != '\n' && pp_token.type != T_EOF) {
701 static void error_directive(void)
704 fprintf(stderr, "#error directive: \n");
706 /* parse pp-tokens until new-line */
709 static void define_directive(void)
711 lexer_next_preprocessing_token();
712 if(lexer_token.type != T_IDENTIFIER) {
713 parse_error("expected identifier after #define\n");
718 static void ifdef_directive(int is_ifndef)
721 lexer_next_preprocessing_token();
722 //expect_identifier();
726 static void endif_directive(void)
731 static void parse_line_directive(void)
733 if(pp_token.type != T_INTEGER) {
734 parse_error("expected integer");
736 lexer_token.source_position.linenr = pp_token.v.intvalue - 1;
739 if(pp_token.type == T_STRING_LITERAL) {
740 lexer_token.source_position.input_name = pp_token.v.string;
747 static void parse_preprocessor_identifier(void)
749 assert(pp_token.type == T_IDENTIFIER);
750 symbol_t *symbol = pp_token.v.symbol;
752 switch(symbol->pp_ID) {
754 printf("include - enable header name parsing!\n");
770 parse_line_directive();
784 static void parse_preprocessor_directive()
788 switch(pp_token.type) {
790 parse_preprocessor_identifier();
793 parse_line_directive();
796 parse_error("invalid preprocessor directive");
802 #define MAYBE_PROLOG \
807 #define MAYBE(ch, set_type) \
810 lexer_token.type = set_type; \
813 #define ELSE_CODE(code) \
817 } /* end of while(1) */ \
820 #define ELSE(set_type) \
822 lexer_token.type = set_type; \
826 void lexer_next_preprocessing_token(void)
836 lexer_token.type = '\n';
849 parse_string_literal();
853 parse_character_constant();
860 MAYBE('.', T_DOTDOTDOT)
864 lexer_token.type = '.';
871 MAYBE('=', T_ANDEQUAL)
875 MAYBE('=', T_ASTERISKEQUAL)
879 MAYBE('+', T_PLUSPLUS)
880 MAYBE('=', T_PLUSEQUAL)
884 MAYBE('>', T_MINUSGREATER)
885 MAYBE('-', T_MINUSMINUS)
886 MAYBE('=', T_MINUSEQUAL)
890 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
894 MAYBE('=', T_SLASHEQUAL)
897 skip_multiline_comment();
898 lexer_next_preprocessing_token();
903 lexer_next_preprocessing_token();
908 MAYBE('>', T_PERCENTGREATER)
909 MAYBE('=', T_PERCENTEQUAL)
914 MAYBE(':', T_PERCENTCOLONPERCENTCOLON)
918 lexer_token.type = T_PERCENTCOLON;
925 MAYBE(':', T_LESSCOLON)
926 MAYBE('%', T_LESSPERCENT)
927 MAYBE('=', T_LESSEQUAL)
930 MAYBE('=', T_LESSLESSEQUAL)
935 MAYBE('=', T_GREATEREQUAL)
938 MAYBE('=', T_GREATERGREATEREQUAL)
939 ELSE(T_GREATERGREATER)
943 MAYBE('=', T_CARETEQUAL)
947 MAYBE('=', T_PIPEEQUAL)
948 MAYBE('|', T_PIPEPIPE)
952 MAYBE('>', T_COLONGREATER)
956 MAYBE('=', T_EQUALEQUAL)
960 MAYBE('#', T_HASHHASH)
974 lexer_token.type = c;
979 lexer_token.type = T_EOF;
985 fprintf(stderr, "unknown character '%c' found\n", c);
986 lexer_token.type = T_ERROR;
992 void lexer_next_token(void)
994 lexer_next_preprocessing_token();
995 if(lexer_token.type != '\n')
1000 lexer_next_preprocessing_token();
1001 } while(lexer_token.type == '\n');
1003 if(lexer_token.type == '#') {
1004 parse_preprocessor_directive();
1009 void init_lexer(void)
1011 strset_init(&stringset);
1014 void lexer_open_stream(FILE *stream, const char *input_name)
1017 lexer_token.source_position.linenr = 1;
1018 lexer_token.source_position.input_name = input_name;
1023 void exit_lexer(void)
1025 strset_destroy(&stringset);
1028 static __attribute__((unused))
1029 void dbg_pos(const source_position_t source_position)
1031 fprintf(stdout, "%s:%d\n", source_position.input_name,
1032 source_position.linenr);