5 #include "symbol_table_t.h"
17 source_position_t source_position;
19 static char buf[1027];
20 static const char *bufend;
21 static const char *bufpos;
22 static strset_t stringset;
23 //static FILE **input_stack;
24 //static char **buf_stack;
27 void error_prefix_at(const char *input_name, unsigned linenr)
29 fprintf(stderr, "%s:%d: Error: ", input_name, linenr);
35 error_prefix_at(source_position.input_name, source_position.linenr);
39 void parse_error(const char *msg)
42 fprintf(stderr, "%s\n", msg);
49 if(bufpos >= bufend) {
50 size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
56 bufpos = buf + MAX_PUTBACK;
57 bufend = buf + MAX_PUTBACK + s;
61 printf("nchar '%c'\n", c);
68 char *p = (char*) bufpos - 1;
74 printf("putback '%c'\n", pc);
80 int replace_trigraph(void)
82 #define MATCH_TRIGRAPH(ch,replacement) \
88 MATCH_TRIGRAPH('=', '#')
89 MATCH_TRIGRAPH('(', '[')
90 MATCH_TRIGRAPH('/', '\\')
91 MATCH_TRIGRAPH(')', ']')
92 MATCH_TRIGRAPH('\'', '^')
93 MATCH_TRIGRAPH('<', '{')
94 MATCH_TRIGRAPH('!', '|')
95 MATCH_TRIGRAPH('>', '}')
96 MATCH_TRIGRAPH('-', '~')
104 #define SKIP_TRIGRAPHS(custom_putback, no_trigraph_code) \
114 if(replace_trigraph()) { \
123 #define EAT_NEWLINE(newline_code) \
128 source_position.linenr++; \
130 } else if(c == '\n') { \
132 source_position.linenr++; \
137 void parse_symbol(token_t *token)
142 obstack_1grow(&symbol_obstack, c);
155 obstack_1grow(&symbol_obstack, c);
167 if(replace_trigraph())
179 obstack_1grow(&symbol_obstack, '\0');
181 string = obstack_finish(&symbol_obstack);
182 symbol = symbol_table_insert(string);
186 token->type = symbol->ID;
188 token->type = T_IDENTIFIER;
191 token->type = T_IDENTIFIER;
192 token->v.symbol = symbol;
194 if(symbol->string != string) {
195 obstack_free(&symbol_obstack, string);
200 void parse_number_hex(token_t *token)
202 assert(c == 'x' || c == 'X');
206 !('A' <= c && c <= 'F') &&
207 !('a' <= c && c <= 'f')) {
208 parse_error("premature end of hex number literal");
209 token->type = T_ERROR;
216 value = 16 * value + c - '0';
217 } else if ('A' <= c && c <= 'F') {
218 value = 16 * value + c - 'A' + 10;
219 } else if ('a' <= c && c <= 'f') {
220 value = 16 * value + c - 'a' + 10;
222 token->type = T_INTEGER;
223 token->v.intvalue = value;
231 void parse_number_oct(token_t *token)
233 assert(c == 'o' || c == 'O');
238 if ('0' <= c && c <= '7') {
239 value = 8 * value + c - '0';
241 token->type = T_INTEGER;
242 token->v.intvalue = value;
250 void parse_number_dec(token_t *token, int first_char)
254 assert(first_char >= '0' && first_char <= '9');
255 value = first_char - '0';
260 value = 10 * value + c - '0';
262 token->type = T_INTEGER;
263 token->v.intvalue = value;
271 void parse_number(token_t *token)
273 // TODO check for overflow
274 // TODO check for various invalid inputs sequences
280 case 'x': parse_number_hex(token); break;
282 case 'O': parse_number_oct(token); break;
283 default: parse_number_dec(token, '0');
286 parse_number_dec(token, 0);
291 int parse_escape_sequence()
298 case '"': return '"';
299 case '\'': return'\'';
303 case 'a': return '\a';
304 case 'b': return '\b';
305 case 'f': return '\f';
306 case 'n': return '\n';
307 case 'r': return '\r';
308 case 't': return '\t';
309 case 'v': return '\v';
310 case 'x': /* TODO parse hex number ... */
311 parse_error("hex escape sequences not implemented yet");
313 case 0 ... 8: /* TODO parse octal number ... */
314 parse_error("octal escape sequences not implemented yet");
320 /* might be a trigraph */
322 if(replace_trigraph()) {
330 parse_error("reached end of file while parsing escape sequence");
333 parse_error("unknown escape sequence");
340 void parse_string_literal(token_t *token)
342 unsigned start_linenr = source_position.linenr;
352 obstack_1grow(&symbol_obstack, '?');
360 int ec = parse_escape_sequence();
361 obstack_1grow(&symbol_obstack, ec);
365 error_prefix_at(source_position.input_name, start_linenr);
366 fprintf(stderr, "string has no end\n");
367 token->type = T_ERROR;
375 obstack_1grow(&symbol_obstack, c);
383 /* TODO: concatenate multiple strings separated by whitespace... */
385 /* add finishing 0 to the string */
386 obstack_1grow(&symbol_obstack, '\0');
387 string = obstack_finish(&symbol_obstack);
389 /* check if there is already a copy of the string */
390 result = strset_insert(&stringset, string);
391 if(result != string) {
392 obstack_free(&symbol_obstack, string);
395 token->type = T_STRING_LITERAL;
396 token->v.string = result;
399 #define MATCH_NEWLINE(code) \
405 source_position.linenr++; \
409 source_position.linenr++; \
413 void parse_character_constant(token_t *token)
433 parse_error("newline while parsing character constant");
439 goto end_of_char_constant;
442 parse_error("EOF while parsing character constant");
443 token->type = T_ERROR;
447 if(found_char != 0) {
448 parse_error("more than 1 characters in character "
450 goto end_of_char_constant;
459 end_of_char_constant:
460 token->type = T_INTEGER;
461 token->v.intvalue = found_char;
465 void skip_multiline_comment(void)
467 unsigned start_linenr = source_position.linenr;
498 if(replace_trigraph())
503 /* we don't put back the 2nd ? as the comment text is discarded
507 MATCH_NEWLINE(had_star = 0; break;)
510 error_prefix_at(source_position.input_name, start_linenr);
511 fprintf(stderr, "at end of file while looking for comment end\n");
522 void skip_line_comment(void)
531 if(replace_trigraph())
534 /* we don't put back the 2nd ? as the comment text is discarded
542 source_position.linenr++;
559 void lexer_next_preprocessing_token(token_t *token);
562 void eat_until_newline(void)
568 void error_directive(void)
571 fprintf(stderr, "#error directive: \n");
573 /* parse pp-tokens until new-line */
577 void define_directive(void)
581 lexer_next_preprocessing_token(&temptoken);
582 if(temptoken.type != T_IDENTIFIER) {
583 parse_error("expected identifier after #define\n");
589 void ifdef_directive(int is_ifndef)
593 lexer_next_preprocessing_token(&temptoken);
594 //expect_identifier();
599 void endif_directive(void)
605 void found_preprocessor_identifier(symbol_t *symbol)
607 switch(symbol->pp_ID) {
609 printf("include - enable header name parsing!\n");
637 void parse_preprocessor_directive(token_t *result_token)
642 lexer_next_preprocessing_token(&temptoken);
643 switch(temptoken.type) {
645 found_preprocessor_identifier(temptoken.v.symbol);
650 #define MAYBE_PROLOG \
655 #define MAYBE(ch, set_type) \
658 token->type = set_type; \
661 #define ELSE_CODE(code) \
668 EAT_NEWLINE(break;) \
673 } /* end of while(1) */ \
676 #define ELSE(set_type) \
678 token->type = set_type; \
683 void eat_whitespace()
700 source_position.linenr++;
718 skip_multiline_comment();
751 void lexer_next_preprocessing_token(token_t *token)
764 parse_preprocessor_directive(token);
782 parse_string_literal(token);
786 parse_character_constant(token);
793 source_position.linenr++;
796 parse_error("unexpected '\\' found");
797 token->type = T_ERROR;
805 MAYBE('.', T_DOTDOTDOT)
816 MAYBE('=', T_ANDEQUAL)
820 MAYBE('=', T_ASTERISKEQUAL)
824 MAYBE('+', T_PLUSPLUS)
825 MAYBE('=', T_PLUSEQUAL)
829 MAYBE('-', T_MINUSMINUS)
830 MAYBE('=', T_MINUSEQUAL)
834 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
838 MAYBE('=', T_SLASHEQUAL)
841 skip_multiline_comment();
842 lexer_next_preprocessing_token(token);
847 lexer_next_preprocessing_token(token);
852 MAYBE('>', T_PERCENTGREATER)
853 MAYBE('=', T_PERCENTEQUAL)
858 MAYBE(':', T_PERCENTCOLONPERCENTCOLON)
862 token->type = T_PERCENTCOLON;
869 MAYBE(':', T_LESSCOLON)
870 MAYBE('%', T_LESSPERCENT)
873 MAYBE('=', T_LESSLESSEQUAL)
880 MAYBE('=', T_GREATERGREATEREQUAL)
881 ELSE(T_GREATERGREATER)
885 MAYBE('=', T_CARETEQUAL)
889 MAYBE('=', T_PIPEEQUAL)
890 MAYBE('|', T_PIPEPIPE)
894 MAYBE('>', T_COLONGREATER)
898 MAYBE('=', T_EQUALEQUAL)
902 MAYBE('#', T_HASHHASH)
907 /* just a simple ? */
912 /* might be a trigraph */
914 if(replace_trigraph()) {
942 fprintf(stderr, "unknown character '%c' found\n", c);
943 token->type = T_ERROR;
949 void lexer_next_token(token_t *token)
952 lexer_next_preprocessing_token(token);
953 } while(token->type == '\n');
956 void init_lexer(void)
958 strset_init(&stringset);
961 void lexer_open_stream(FILE *stream, const char *input_name)
964 source_position.linenr = 0;
965 source_position.input_name = input_name;
967 /* we place a virtual '\n' at the beginning so the lexer knows we're at the
968 * beginning of a line */
972 void exit_lexer(void)
974 strset_destroy(&stringset);
977 static __attribute__((unused))
978 void dbg_pos(const source_position_t source_position)
980 fprintf(stdout, "%s:%d\n", source_position.input_name, source_position.linenr);