5 #include "symbol_table_t.h"
17 source_position_t source_position;
19 static char buf[1027];
20 static const char *bufend;
21 static const char *bufpos;
22 static strset_t stringset;
23 //static FILE **input_stack;
24 //static char **buf_stack;
27 void error_prefix_at(const char *input_name, unsigned linenr)
29 fprintf(stderr, "%s:%d: Error: ", input_name, linenr);
35 error_prefix_at(source_position.input_name, source_position.linenr);
39 void parse_error(const char *msg)
42 fprintf(stderr, "%s\n", msg);
49 if(bufpos >= bufend) {
50 size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
56 bufpos = buf + MAX_PUTBACK;
57 bufend = buf + MAX_PUTBACK + s;
61 printf("nchar '%c'\n", c);
68 char *p = (char*) bufpos - 1;
74 printf("putback '%c'\n", pc);
80 int replace_trigraph(void)
82 #define MATCH_TRIGRAPH(ch,replacement) \
88 MATCH_TRIGRAPH('=', '#')
89 MATCH_TRIGRAPH('(', '[')
90 MATCH_TRIGRAPH('/', '\\')
91 MATCH_TRIGRAPH(')', ']')
92 MATCH_TRIGRAPH('\'', '^')
93 MATCH_TRIGRAPH('<', '{')
94 MATCH_TRIGRAPH('!', '|')
95 MATCH_TRIGRAPH('>', '}')
96 MATCH_TRIGRAPH('-', '~')
104 #define SKIP_TRIGRAPHS(custom_putback, no_trigraph_code) \
114 if(replace_trigraph()) { \
123 #define EAT_NEWLINE(newline_code) \
128 source_position.linenr++; \
130 } else if(c == '\n') { \
132 source_position.linenr++; \
136 #define SYMBOL_CHARS \
192 void parse_symbol(token_t *token)
197 obstack_1grow(&symbol_obstack, c);
208 obstack_1grow(&symbol_obstack, c);
220 if(replace_trigraph())
232 obstack_1grow(&symbol_obstack, '\0');
234 string = obstack_finish(&symbol_obstack);
235 symbol = symbol_table_insert(string);
237 token->type = symbol->ID;
238 token->v.symbol = symbol;
240 if(symbol->string != string) {
241 obstack_free(&symbol_obstack, string);
246 void parse_number_hex(token_t *token)
248 assert(c == 'x' || c == 'X');
252 !('A' <= c && c <= 'F') &&
253 !('a' <= c && c <= 'f')) {
254 parse_error("premature end of hex number literal");
255 token->type = T_ERROR;
262 value = 16 * value + c - '0';
263 } else if ('A' <= c && c <= 'F') {
264 value = 16 * value + c - 'A' + 10;
265 } else if ('a' <= c && c <= 'f') {
266 value = 16 * value + c - 'a' + 10;
268 token->type = T_INTEGER;
269 token->v.intvalue = value;
277 void parse_number_oct(token_t *token)
279 assert(c == 'o' || c == 'O');
284 if ('0' <= c && c <= '7') {
285 value = 8 * value + c - '0';
287 token->type = T_INTEGER;
288 token->v.intvalue = value;
296 void parse_number_dec(token_t *token, int first_char)
300 assert(first_char >= '0' && first_char <= '9');
301 value = first_char - '0';
306 value = 10 * value + c - '0';
308 token->type = T_INTEGER;
309 token->v.intvalue = value;
317 void parse_number(token_t *token)
319 // TODO check for overflow
320 // TODO check for various invalid inputs sequences
326 case 'x': parse_number_hex(token); break;
328 case 'O': parse_number_oct(token); break;
329 default: parse_number_dec(token, '0');
332 parse_number_dec(token, 0);
337 int parse_escape_sequence()
344 case '"': return '"';
345 case '\'': return'\'';
349 case 'a': return '\a';
350 case 'b': return '\b';
351 case 'f': return '\f';
352 case 'n': return '\n';
353 case 'r': return '\r';
354 case 't': return '\t';
355 case 'v': return '\v';
356 case 'x': /* TODO parse hex number ... */
357 parse_error("hex escape sequences not implemented yet");
367 /* TODO parse octal number ... */
368 parse_error("octal escape sequences not implemented yet");
374 /* might be a trigraph */
376 if(replace_trigraph()) {
384 parse_error("reached end of file while parsing escape sequence");
387 parse_error("unknown escape sequence");
394 void parse_string_literal(token_t *token)
396 unsigned start_linenr = source_position.linenr;
406 obstack_1grow(&symbol_obstack, '?');
414 int ec = parse_escape_sequence();
415 obstack_1grow(&symbol_obstack, ec);
419 error_prefix_at(source_position.input_name, start_linenr);
420 fprintf(stderr, "string has no end\n");
421 token->type = T_ERROR;
429 obstack_1grow(&symbol_obstack, c);
437 /* TODO: concatenate multiple strings separated by whitespace... */
439 /* add finishing 0 to the string */
440 obstack_1grow(&symbol_obstack, '\0');
441 string = obstack_finish(&symbol_obstack);
443 /* check if there is already a copy of the string */
444 result = strset_insert(&stringset, string);
445 if(result != string) {
446 obstack_free(&symbol_obstack, string);
449 token->type = T_STRING_LITERAL;
450 token->v.string = result;
453 #define MATCH_NEWLINE(code) \
459 source_position.linenr++; \
463 source_position.linenr++; \
467 void parse_character_constant(token_t *token)
487 parse_error("newline while parsing character constant");
493 goto end_of_char_constant;
496 parse_error("EOF while parsing character constant");
497 token->type = T_ERROR;
501 if(found_char != 0) {
502 parse_error("more than 1 characters in character "
504 goto end_of_char_constant;
513 end_of_char_constant:
514 token->type = T_INTEGER;
515 token->v.intvalue = found_char;
519 void skip_multiline_comment(void)
521 unsigned start_linenr = source_position.linenr;
552 if(replace_trigraph())
557 /* we don't put back the 2nd ? as the comment text is discarded
561 MATCH_NEWLINE(had_star = 0; break;)
564 error_prefix_at(source_position.input_name, start_linenr);
565 fprintf(stderr, "at end of file while looking for comment end\n");
576 void skip_line_comment(void)
585 if(replace_trigraph())
588 /* we don't put back the 2nd ? as the comment text is discarded
596 source_position.linenr++;
613 void lexer_next_preprocessing_token(token_t *token);
615 static token_t pp_token;
618 void next_pp_token(void)
620 lexer_next_preprocessing_token(&pp_token);
624 void eat_until_newline(void)
630 void error_directive(void)
633 fprintf(stderr, "#error directive: \n");
635 /* parse pp-tokens until new-line */
639 void define_directive(void)
643 lexer_next_preprocessing_token(&temptoken);
644 if(temptoken.type != T_IDENTIFIER) {
645 parse_error("expected identifier after #define\n");
651 void ifdef_directive(int is_ifndef)
655 lexer_next_preprocessing_token(&temptoken);
656 //expect_identifier();
661 void endif_directive(void)
667 void parse_line_directive(void)
669 if(pp_token.type != T_INTEGER) {
670 parse_error("expected integer");
672 source_position.linenr = pp_token.v.intvalue - 1;
675 if(pp_token.type == T_STRING_LITERAL) {
676 source_position.input_name = pp_token.v.string;
680 while(pp_token.type != T_EOF && pp_token.type != '\n') {
686 void parse_preprocessor_identifier(void)
688 assert(pp_token.type == T_IDENTIFIER);
689 symbol_t *symbol = pp_token.v.symbol;
691 switch(symbol->pp_ID) {
693 printf("include - enable header name parsing!\n");
709 parse_line_directive();
724 void parse_preprocessor_directive(token_t *result_token)
728 switch(pp_token.type) {
730 parse_preprocessor_identifier();
733 parse_line_directive();
737 lexer_next_token(result_token);
740 #define MAYBE_PROLOG \
745 #define MAYBE(ch, set_type) \
748 token->type = set_type; \
751 #define ELSE_CODE(code) \
758 EAT_NEWLINE(break;) \
763 } /* end of while(1) */ \
766 #define ELSE(set_type) \
768 token->type = set_type; \
773 void eat_whitespace()
790 source_position.linenr++;
808 skip_multiline_comment();
841 void lexer_next_preprocessing_token(token_t *token)
854 parse_preprocessor_directive(token);
879 parse_string_literal(token);
883 parse_character_constant(token);
890 source_position.linenr++;
893 parse_error("unexpected '\\' found");
894 token->type = T_ERROR;
902 MAYBE('.', T_DOTDOTDOT)
913 MAYBE('=', T_ANDEQUAL)
917 MAYBE('=', T_ASTERISKEQUAL)
921 MAYBE('+', T_PLUSPLUS)
922 MAYBE('=', T_PLUSEQUAL)
926 MAYBE('-', T_MINUSMINUS)
927 MAYBE('=', T_MINUSEQUAL)
931 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
935 MAYBE('=', T_SLASHEQUAL)
938 skip_multiline_comment();
939 lexer_next_preprocessing_token(token);
944 lexer_next_preprocessing_token(token);
949 MAYBE('>', T_PERCENTGREATER)
950 MAYBE('=', T_PERCENTEQUAL)
955 MAYBE(':', T_PERCENTCOLONPERCENTCOLON)
959 token->type = T_PERCENTCOLON;
966 MAYBE(':', T_LESSCOLON)
967 MAYBE('%', T_LESSPERCENT)
970 MAYBE('=', T_LESSLESSEQUAL)
977 MAYBE('=', T_GREATERGREATEREQUAL)
978 ELSE(T_GREATERGREATER)
982 MAYBE('=', T_CARETEQUAL)
986 MAYBE('=', T_PIPEEQUAL)
987 MAYBE('|', T_PIPEPIPE)
991 MAYBE('>', T_COLONGREATER)
995 MAYBE('=', T_EQUALEQUAL)
999 MAYBE('#', T_HASHHASH)
1004 /* just a simple ? */
1009 /* might be a trigraph */
1011 if(replace_trigraph()) {
1033 token->type = T_EOF;
1039 fprintf(stderr, "unknown character '%c' found\n", c);
1040 token->type = T_ERROR;
1046 void lexer_next_token(token_t *token)
1049 lexer_next_preprocessing_token(token);
1050 } while(token->type == '\n');
1053 void init_lexer(void)
1055 strset_init(&stringset);
1058 void lexer_open_stream(FILE *stream, const char *input_name)
1061 source_position.linenr = 0;
1062 source_position.input_name = input_name;
1064 /* we place a virtual '\n' at the beginning so the lexer knows we're at the
1065 * beginning of a line */
1069 void exit_lexer(void)
1071 strset_destroy(&stringset);
1074 static __attribute__((unused))
1075 void dbg_pos(const source_position_t source_position)
1077 fprintf(stdout, "%s:%d\n", source_position.input_name, source_position.linenr);