5 #include "symbol_table_t.h"
17 source_position_t source_position;
19 static char buf[1027];
20 static const char *bufend;
21 static const char *bufpos;
22 static strset_t stringset;
23 //static FILE **input_stack;
24 //static char **buf_stack;
27 void error_prefix_at(const char *input_name, unsigned linenr)
29 fprintf(stderr, "%s:%d: Error: ", input_name, linenr);
35 error_prefix_at(source_position.input_name, source_position.linenr);
39 void parse_error(const char *msg)
42 fprintf(stderr, "%s\n", msg);
49 if(bufpos >= bufend) {
50 size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
56 bufpos = buf + MAX_PUTBACK;
57 bufend = buf + MAX_PUTBACK + s;
61 printf("nchar '%c'\n", c);
68 char *p = (char*) bufpos - 1;
74 printf("putback '%c'\n", pc);
80 int replace_trigraph(void)
82 #define MATCH_TRIGRAPH(ch,replacement) \
88 MATCH_TRIGRAPH('=', '#')
89 MATCH_TRIGRAPH('(', '[')
90 MATCH_TRIGRAPH('/', '\\')
91 MATCH_TRIGRAPH(')', ']')
92 MATCH_TRIGRAPH('\'', '^')
93 MATCH_TRIGRAPH('<', '{')
94 MATCH_TRIGRAPH('!', '|')
95 MATCH_TRIGRAPH('>', '}')
96 MATCH_TRIGRAPH('-', '~')
104 #define SKIP_TRIGRAPHS(custom_putback, no_trigraph_code) \
114 if(replace_trigraph()) { \
123 #define EAT_NEWLINE(newline_code) \
128 source_position.linenr++; \
130 } else if(c == '\n') { \
132 source_position.linenr++; \
137 void parse_symbol(token_t *token)
142 obstack_1grow(&symbol_obstack, c);
155 obstack_1grow(&symbol_obstack, c);
167 if(replace_trigraph())
179 obstack_1grow(&symbol_obstack, '\0');
181 string = obstack_finish(&symbol_obstack);
182 symbol = symbol_table_insert(string);
185 token->type = symbol->ID;
187 token->type = T_IDENTIFIER;
189 token->v.symbol = symbol;
191 if(symbol->string != string) {
192 obstack_free(&symbol_obstack, string);
197 void parse_number_hex(token_t *token)
199 assert(c == 'x' || c == 'X');
203 !('A' <= c && c <= 'F') &&
204 !('a' <= c && c <= 'f')) {
205 parse_error("premature end of hex number literal");
206 token->type = T_ERROR;
213 value = 16 * value + c - '0';
214 } else if ('A' <= c && c <= 'F') {
215 value = 16 * value + c - 'A' + 10;
216 } else if ('a' <= c && c <= 'f') {
217 value = 16 * value + c - 'a' + 10;
219 token->type = T_INTEGER;
220 token->v.intvalue = value;
228 void parse_number_oct(token_t *token)
230 assert(c == 'o' || c == 'O');
235 if ('0' <= c && c <= '7') {
236 value = 8 * value + c - '0';
238 token->type = T_INTEGER;
239 token->v.intvalue = value;
247 void parse_number_dec(token_t *token, int first_char)
251 assert(first_char >= '0' && first_char <= '9');
252 value = first_char - '0';
257 value = 10 * value + c - '0';
259 token->type = T_INTEGER;
260 token->v.intvalue = value;
268 void parse_number(token_t *token)
270 // TODO check for overflow
271 // TODO check for various invalid inputs sequences
277 case 'x': parse_number_hex(token); break;
279 case 'O': parse_number_oct(token); break;
280 default: parse_number_dec(token, '0');
283 parse_number_dec(token, 0);
288 int parse_escape_sequence()
295 case '"': return '"';
296 case '\'': return'\'';
300 case 'a': return '\a';
301 case 'b': return '\b';
302 case 'f': return '\f';
303 case 'n': return '\n';
304 case 'r': return '\r';
305 case 't': return '\t';
306 case 'v': return '\v';
307 case 'x': /* TODO parse hex number ... */
308 parse_error("hex escape sequences not implemented yet");
310 case 0 ... 8: /* TODO parse octal number ... */
311 parse_error("octal escape sequences not implemented yet");
317 /* might be a trigraph */
319 if(replace_trigraph()) {
327 parse_error("reached end of file while parsing escape sequence");
330 parse_error("unknown escape sequence");
337 void parse_string_literal(token_t *token)
339 unsigned start_linenr = source_position.linenr;
349 obstack_1grow(&symbol_obstack, '?');
357 int ec = parse_escape_sequence();
358 obstack_1grow(&symbol_obstack, ec);
362 error_prefix_at(source_position.input_name, start_linenr);
363 fprintf(stderr, "string has no end\n");
364 token->type = T_ERROR;
372 obstack_1grow(&symbol_obstack, c);
380 /* TODO: concatenate multiple strings separated by whitespace... */
382 /* add finishing 0 to the string */
383 obstack_1grow(&symbol_obstack, '\0');
384 string = obstack_finish(&symbol_obstack);
386 /* check if there is already a copy of the string */
387 result = strset_insert(&stringset, string);
388 if(result != string) {
389 obstack_free(&symbol_obstack, string);
392 token->type = T_STRING_LITERAL;
393 token->v.string = result;
396 #define MATCH_NEWLINE(code) \
402 source_position.linenr++; \
406 source_position.linenr++; \
410 void parse_character_constant(token_t *token)
430 parse_error("newline while parsing character constant");
436 goto end_of_char_constant;
439 parse_error("EOF while parsing character constant");
440 token->type = T_ERROR;
444 if(found_char != 0) {
445 parse_error("more than 1 characters in character "
447 goto end_of_char_constant;
456 end_of_char_constant:
457 token->type = T_INTEGER;
458 token->v.intvalue = found_char;
462 void skip_multiline_comment(void)
464 unsigned start_linenr = source_position.linenr;
495 if(replace_trigraph())
500 /* we don't put back the 2nd ? as the comment text is discarded
504 MATCH_NEWLINE(had_star = 0; break;)
507 error_prefix_at(source_position.input_name, start_linenr);
508 fprintf(stderr, "at end of file while looking for comment end\n");
519 void skip_line_comment(void)
528 if(replace_trigraph())
531 /* we don't put back the 2nd ? as the comment text is discarded
539 source_position.linenr++;
556 void lexer_next_preprocessing_token(token_t *token);
559 void eat_until_newline(void)
565 void error_directive(void)
568 fprintf(stderr, "#error directive: \n");
570 /* parse pp-tokens until new-line */
574 void define_directive(void)
578 lexer_next_preprocessing_token(&temptoken);
579 if(temptoken.type != T_IDENTIFIER) {
580 parse_error("expected identifier after #define\n");
586 void ifdef_directive(int is_ifndef)
590 lexer_next_preprocessing_token(&temptoken);
591 //expect_identifier();
596 void endif_directive(void)
602 void found_preprocessor_identifier(symbol_t *symbol)
604 switch(symbol->pp_ID) {
606 printf("include - enable header name parsing!\n");
634 void parse_preprocessor_directive(token_t *result_token)
639 lexer_next_preprocessing_token(&temptoken);
640 switch(temptoken.type) {
642 found_preprocessor_identifier(temptoken.v.symbol);
647 #define MAYBE_PROLOG \
652 #define MAYBE(ch, set_type) \
655 token->type = set_type; \
658 #define ELSE_CODE(code) \
665 EAT_NEWLINE(break;) \
670 } /* end of while(1) */ \
673 #define ELSE(set_type) \
675 token->type = set_type; \
680 void eat_whitespace()
697 source_position.linenr++;
715 skip_multiline_comment();
748 void lexer_next_preprocessing_token(token_t *token)
761 parse_preprocessor_directive(token);
779 parse_string_literal(token);
783 parse_character_constant(token);
790 source_position.linenr++;
793 parse_error("unexpected '\\' found");
794 token->type = T_ERROR;
802 MAYBE('.', T_DOTDOTDOT)
813 MAYBE('=', T_ANDEQUAL)
817 MAYBE('=', T_ASTERISKEQUAL)
821 MAYBE('+', T_PLUSPLUS)
822 MAYBE('=', T_PLUSEQUAL)
826 MAYBE('-', T_MINUSMINUS)
827 MAYBE('=', T_MINUSEQUAL)
831 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
835 MAYBE('=', T_SLASHEQUAL)
838 skip_multiline_comment();
839 lexer_next_preprocessing_token(token);
844 lexer_next_preprocessing_token(token);
849 MAYBE('>', T_PERCENTGREATER)
850 MAYBE('=', T_PERCENTEQUAL)
855 MAYBE(':', T_PERCENTCOLONPERCENTCOLON)
859 token->type = T_PERCENTCOLON;
866 MAYBE(':', T_LESSCOLON)
867 MAYBE('%', T_LESSPERCENT)
870 MAYBE('=', T_LESSLESSEQUAL)
877 MAYBE('=', T_GREATERGREATEREQUAL)
878 ELSE(T_GREATERGREATER)
882 MAYBE('=', T_CARETEQUAL)
886 MAYBE('=', T_PIPEEQUAL)
887 MAYBE('|', T_PIPEPIPE)
891 MAYBE('>', T_COLONGREATER)
895 MAYBE('=', T_EQUALEQUAL)
899 MAYBE('#', T_HASHHASH)
904 /* just a simple ? */
909 /* might be a trigraph */
911 if(replace_trigraph()) {
939 fprintf(stderr, "unknown character '%c' found\n", c);
940 token->type = T_ERROR;
946 void lexer_next_token(token_t *token)
949 lexer_next_preprocessing_token(token);
950 } while(token->type == '\n');
953 void init_lexer(void)
955 strset_init(&stringset);
958 void lexer_open_stream(FILE *stream, const char *input_name)
961 source_position.linenr = 0;
962 source_position.input_name = input_name;
964 /* we place a virtual '\n' at the beginning so the lexer knows we're at the
965 * beginning of a line */
969 void exit_lexer(void)
971 strset_destroy(&stringset);
974 static __attribute__((unused))
975 void dbg_pos(const source_position_t source_position)
977 fprintf(stdout, "%s:%d\n", source_position.input_name, source_position.linenr);