improve type printing
[cparser] / lexer.c
1 #include <config.h>
2
3 #include "lexer.h"
4 #include "token_t.h"
5 #include "symbol_table_t.h"
6 #include "adt/error.h"
7 #include "adt/strset.h"
8
9 #include <assert.h>
10 #include <errno.h>
11 #include <string.h>
12 #include <ctype.h>
13
14 //#define DEBUG_CHARS
15 #define MAX_PUTBACK 3
16
17 static int         c;
18 token_t            lexer_token;
19 static FILE       *input;
20 static char        buf[1024 + MAX_PUTBACK];
21 static const char *bufend;
22 static const char *bufpos;
23 static strset_t    stringset;
24 //static FILE      **input_stack;
25 //static char      **buf_stack;
26
27 static
28 void error_prefix_at(const char *input_name, unsigned linenr)
29 {
30         fprintf(stderr, "%s:%d: Error: ", input_name, linenr);
31 }
32
33 static
34 void error_prefix(void)
35 {
36         error_prefix_at(lexer_token.source_position.input_name,
37                         lexer_token.source_position.linenr);
38 }
39
40 static
41 void parse_error(const char *msg)
42 {
43         error_prefix();
44         fprintf(stderr, "%s\n", msg);
45 }
46
47 static inline
48 void next_char(void)
49 {
50         bufpos++;
51         if(bufpos >= bufend) {
52                 size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
53                                  input);
54                 if(s == 0) {
55                         c = EOF;
56                         return;
57                 }
58                 bufpos = buf + MAX_PUTBACK;
59                 bufend = buf + MAX_PUTBACK + s;
60         }
61         c = *(bufpos);
62 #ifdef DEBUG_CHARS
63         printf("nchar '%c'\n", c);
64 #endif
65 }
66
67 static inline
68 void put_back(int pc)
69 {
70         char *p = (char*) bufpos - 1;
71         bufpos--;
72         assert(p >= buf);
73         *p = pc;
74
75 #ifdef DEBUG_CHARS
76         printf("putback '%c'\n", pc);
77 #endif
78 }
79
80
81 static
82 int replace_trigraph(void)
83 {
84 #define MATCH_TRIGRAPH(ch,replacement)           \
85         case ch:                                     \
86                 c = replacement;                         \
87                 return 1;
88
89         switch(c) {
90         MATCH_TRIGRAPH('=', '#')
91         MATCH_TRIGRAPH('(', '[')
92         MATCH_TRIGRAPH('/', '\\')
93         MATCH_TRIGRAPH(')', ']')
94         MATCH_TRIGRAPH('\'', '^')
95         MATCH_TRIGRAPH('<', '{')
96         MATCH_TRIGRAPH('!', '|')
97         MATCH_TRIGRAPH('>', '}')
98         MATCH_TRIGRAPH('-', '~')
99         default:
100                 break;
101         }
102
103         return 0;
104 }
105
106 #define SKIP_TRIGRAPHS(custom_putback, no_trigraph_code) \
107         case '?':                                  \
108                 next_char();                           \
109                 if(c != '?') {                         \
110                         custom_putback;                    \
111                         put_back(c);                       \
112                         c = '?';                           \
113                         no_trigraph_code;                  \
114                 }                                      \
115                 next_char();                           \
116                 if(replace_trigraph()) {               \
117                         break;                             \
118                 }                                      \
119                 custom_putback;                        \
120                 put_back('?');                         \
121                 put_back(c);                           \
122                 c = '?';                               \
123                 no_trigraph_code;
124
125 #define EAT_NEWLINE(newline_code)              \
126         if(c == '\r') {                            \
127                 next_char();                           \
128                 if(c == '\n')                          \
129                         next_char();                       \
130                 lexer_token.source_position.linenr++;  \
131                 newline_code;                          \
132         } else if(c == '\n') {                     \
133                 next_char();                           \
134                 lexer_token.source_position.linenr++;  \
135                 newline_code;                          \
136         }
137
138 #define SYMBOL_CHARS  \
139         case 'a':         \
140         case 'b':         \
141         case 'c':         \
142         case 'd':         \
143         case 'e':         \
144         case 'f':         \
145         case 'g':         \
146         case 'h':         \
147         case 'i':         \
148         case 'j':         \
149         case 'k':         \
150         case 'l':         \
151         case 'm':         \
152         case 'n':         \
153         case 'o':         \
154         case 'p':         \
155         case 'q':         \
156         case 'r':         \
157         case 's':         \
158         case 't':         \
159         case 'u':         \
160         case 'v':         \
161         case 'w':         \
162         case 'x':         \
163         case 'y':         \
164         case 'z':         \
165         case 'A':         \
166         case 'B':         \
167         case 'C':         \
168         case 'D':         \
169         case 'E':         \
170         case 'F':         \
171         case 'G':         \
172         case 'H':         \
173         case 'I':         \
174         case 'J':         \
175         case 'K':         \
176         case 'L':         \
177         case 'M':         \
178         case 'N':         \
179         case 'O':         \
180         case 'P':         \
181         case 'Q':         \
182         case 'R':         \
183         case 'S':         \
184         case 'T':         \
185         case 'U':         \
186         case 'V':         \
187         case 'W':         \
188         case 'X':         \
189         case 'Y':         \
190         case 'Z':         \
191         case '_':
192
193 #define DIGITS        \
194         case '0':         \
195         case '1':         \
196         case '2':         \
197         case '3':         \
198         case '4':         \
199         case '5':         \
200         case '6':         \
201         case '7':         \
202         case '8':         \
203         case '9':
204
205 static
206 void parse_symbol(void)
207 {
208         symbol_t *symbol;
209         char     *string;
210
211         obstack_1grow(&symbol_obstack, c);
212         next_char();
213
214         while(1) {
215                 switch(c) {
216                 case '\\':
217                         next_char();
218                         EAT_NEWLINE(break;)
219                         goto end_symbol;
220
221                 DIGITS
222                 SYMBOL_CHARS
223                         obstack_1grow(&symbol_obstack, c);
224                         next_char();
225                         break;
226
227                 case '?':
228                         next_char();
229                         if(c != '?') {
230                                 put_back(c);
231                                 c = '?';
232                                 goto end_symbol;
233                         }
234                         next_char();
235                         if(replace_trigraph())
236                                 break;
237                         put_back('?');
238                         put_back(c);
239                         c = '?';
240                         goto end_symbol;
241
242                 default:
243                         goto end_symbol;
244                 }
245         }
246 end_symbol:
247         obstack_1grow(&symbol_obstack, '\0');
248
249         string = obstack_finish(&symbol_obstack);
250         symbol = symbol_table_insert(string);
251
252         lexer_token.type     = symbol->ID;
253         lexer_token.v.symbol = symbol;
254
255         if(symbol->string != string) {
256                 obstack_free(&symbol_obstack, string);
257         }
258 }
259
260 static
261 void parse_number_hex(void)
262 {
263         assert(c == 'x' || c == 'X');
264         next_char();
265
266         if (!isdigit(c) &&
267                 !('A' <= c && c <= 'F') &&
268                 !('a' <= c && c <= 'f')) {
269                 parse_error("premature end of hex number literal");
270                 lexer_token.type = T_ERROR;
271                 return;
272         }
273
274         int value = 0;
275         for(;;) {
276                 if (isdigit(c)) {
277                         value = 16 * value + c - '0';
278                 } else if ('A' <= c && c <= 'F') {
279                         value = 16 * value + c - 'A' + 10;
280                 } else if ('a' <= c && c <= 'f') {
281                         value = 16 * value + c - 'a' + 10;
282                 } else {
283                         lexer_token.type     = T_INTEGER;
284                         lexer_token.v.intvalue = value;
285                         return;
286                 }
287                 next_char();
288         }
289 }
290
291 static
292 void parse_number_oct(void)
293 {
294         assert(c == 'o' || c == 'O');
295         next_char();
296
297         int value = 0;
298         for(;;) {
299                 if ('0' <= c && c <= '7') {
300                         value = 8 * value + c - '0';
301                 } else {
302                         lexer_token.type       = T_INTEGER;
303                         lexer_token.v.intvalue = value;
304                         return;
305                 }
306                 next_char();
307         }
308 }
309
310 static
311 void parse_number_dec(int first_char)
312 {
313         int value = 0;
314         if(first_char > 0) {
315                 assert(first_char >= '0' && first_char <= '9');
316                 value = first_char - '0';
317         }
318
319         for(;;) {
320                 if (isdigit(c)) {
321                         value = 10 * value + c - '0';
322                 } else {
323                         lexer_token.type       = T_INTEGER;
324                         lexer_token.v.intvalue = value;
325                         return;
326                 }
327                 next_char();
328         }
329 }
330
331 static
332 void parse_number(void)
333 {
334         // TODO check for overflow
335         // TODO check for various invalid inputs sequences
336
337         if (c == '0') {
338                 next_char();
339                 switch (c) {
340                         case 'X':
341                         case 'x': parse_number_hex(); break;
342                         case 'o':
343                         case 'O': parse_number_oct(); break;
344                         default:  parse_number_dec('0');
345                 }
346         } else {
347                 parse_number_dec(0);
348         }
349 }
350
351 static
352 int parse_escape_sequence(void)
353 {
354         while(1) {
355                 int ec = c;
356                 next_char();
357
358                 switch(ec) {
359                 case '"': return '"';
360                 case '\'': return'\'';
361                 case '\\':
362                         EAT_NEWLINE(break;)
363                         return '\\';
364                 case 'a': return '\a';
365                 case 'b': return '\b';
366                 case 'f': return '\f';
367                 case 'n': return '\n';
368                 case 'r': return '\r';
369                 case 't': return '\t';
370                 case 'v': return '\v';
371                 case 'x': /* TODO parse hex number ... */
372                         parse_error("hex escape sequences not implemented yet");
373                         return EOF;
374                 case '0':
375                 case '1':
376                 case '2':
377                 case '3':
378                 case '4':
379                 case '5':
380                 case '6':
381                 case '7':
382                         /* TODO parse octal number ... */
383                         parse_error("octal escape sequences not implemented yet");
384                         return EOF;
385                 case '?':
386                         if(c != '?') {
387                                 return '?';
388                         }
389                         /* might be a trigraph */
390                         next_char();
391                         if(replace_trigraph()) {
392                                 break;
393                         }
394                         put_back(c);
395                         c = '?';
396                         return '?';
397
398                 case EOF:
399                         parse_error("reached end of file while parsing escape sequence");
400                         return EOF;
401                 default:
402                         parse_error("unknown escape sequence");
403                         return EOF;
404                 }
405         }
406 }
407
408 static
409 void parse_string_literal(void)
410 {
411         unsigned    start_linenr = lexer_token.source_position.linenr;
412         char       *string;
413         const char *result;
414
415         assert(c == '"');
416         next_char();
417
418         while(1) {
419                 switch(c) {
420                 SKIP_TRIGRAPHS(,
421                         obstack_1grow(&symbol_obstack, '?');
422                         next_char();
423                         break;
424                 )
425
426                 case '\\':
427                         next_char();
428                         EAT_NEWLINE(break;)
429                         int ec = parse_escape_sequence();
430                         obstack_1grow(&symbol_obstack, ec);
431                         break;
432
433                 case EOF:
434                         error_prefix_at(lexer_token.source_position.input_name,
435                                         start_linenr);
436                         fprintf(stderr, "string has no end\n");
437                         lexer_token.type = T_ERROR;
438                         return;
439
440                 case '"':
441                         next_char();
442                         goto end_of_string;
443
444                 default:
445                         obstack_1grow(&symbol_obstack, c);
446                         next_char();
447                         break;
448                 }
449         }
450
451 end_of_string:
452
453         /* TODO: concatenate multiple strings separated by whitespace... */
454
455         /* add finishing 0 to the string */
456         obstack_1grow(&symbol_obstack, '\0');
457         string = obstack_finish(&symbol_obstack);
458
459         /* check if there is already a copy of the string */
460         result = strset_insert(&stringset, string);
461         if(result != string) {
462                 obstack_free(&symbol_obstack, string);
463         }
464
465         lexer_token.type     = T_STRING_LITERAL;
466         lexer_token.v.string = result;
467 }
468
469 #define MATCH_NEWLINE(code)                   \
470         case '\r':                                \
471                 next_char();                          \
472                 if(c == '\n') {                       \
473                         next_char();                      \
474                 }                                     \
475                 lexer_token.source_position.linenr++; \
476                 code;                                 \
477         case '\n':                                \
478                 next_char();                          \
479                 lexer_token.source_position.linenr++; \
480                 code;
481
482 static
483 void parse_character_constant(void)
484 {
485         assert(c == '\'');
486         next_char();
487
488         int found_char = 0;
489         while(1) {
490                 switch(c) {
491                 SKIP_TRIGRAPHS(,
492                         found_char = '?';
493                         break;
494                 )
495
496                 case '\\':
497                         next_char();
498                         EAT_NEWLINE(break;)
499                         found_char = '\\';
500                         break;
501
502                 MATCH_NEWLINE(
503                         parse_error("newline while parsing character constant");
504                         break;
505                 )
506
507                 case '\'':
508                         next_char();
509                         goto end_of_char_constant;
510
511                 case EOF:
512                         parse_error("EOF while parsing character constant");
513                         lexer_token.type = T_ERROR;
514                         return;
515
516                 default:
517                         if(found_char != 0) {
518                                 parse_error("more than 1 characters in character "
519                                             "constant");
520                                 goto end_of_char_constant;
521                         } else {
522                                 found_char = c;
523                                 next_char();
524                         }
525                         break;
526                 }
527         }
528
529 end_of_char_constant:
530         lexer_token.type       = T_INTEGER;
531         lexer_token.v.intvalue = found_char;
532 }
533
534 static
535 void skip_multiline_comment(void)
536 {
537         unsigned start_linenr = lexer_token.source_position.linenr;
538         int had_star = 0;
539
540         while(1) {
541                 switch(c) {
542                 case '*':
543                         next_char();
544                         had_star = 1;
545                         break;
546
547                 case '/':
548                         next_char();
549                         if(had_star) {
550                                 return;
551                         }
552                         had_star = 0;
553                         break;
554
555                 case '\\':
556                         next_char();
557                         EAT_NEWLINE(break;)
558                         had_star = 0;
559                         break;
560
561                 case '?':
562                         next_char();
563                         if(c != '?') {
564                                 had_star = 0;
565                                 break;
566                         }
567                         next_char();
568                         if(replace_trigraph())
569                                 break;
570                         put_back(c);
571                         c = '?';
572                         had_star = 0;
573                         /* we don't put back the 2nd ? as the comment text is discarded
574                          * anyway */
575                         break;
576
577                 MATCH_NEWLINE(had_star = 0; break;)
578
579                 case EOF:
580                         error_prefix_at(lexer_token.source_position.input_name,
581                                         start_linenr);
582                         fprintf(stderr, "at end of file while looking for comment end\n");
583                         return;
584                 default:
585                         had_star = 0;
586                         next_char();
587                         break;
588                 }
589         }
590 }
591
592 static
593 void skip_line_comment(void)
594 {
595         while(1) {
596                 switch(c) {
597                 case '?':
598                         next_char();
599                         if(c != '?')
600                                 break;
601                         next_char();
602                         if(replace_trigraph())
603                                 break;
604                         put_back('?');
605                         /* we don't put back the 2nd ? as the comment text is discarded
606                          * anyway */
607                         break;
608
609                 case '\\':
610                         next_char();
611                         if(c == '\n') {
612                                 next_char();
613                                 lexer_token.source_position.linenr++;
614                         }
615                         break;
616
617                 case EOF:
618                 case '\r':
619                 case '\n':
620                         return;
621
622                 default:
623                         next_char();
624                         break;
625                 }
626         }
627 }
628
629 static token_t pp_token;
630
631 static inline
632 void next_pp_token(void)
633 {
634         lexer_next_preprocessing_token();
635         pp_token = lexer_token;
636 }
637
638 static
639 void eat_until_newline(void)
640 {
641         while(pp_token.type != '\n' && pp_token.type != T_EOF) {
642                 next_pp_token();
643         }
644 }
645
646 static
647 void error_directive(void)
648 {
649         error_prefix();
650         fprintf(stderr, "#error directive: \n");
651
652         /* parse pp-tokens until new-line */
653 }
654
655 static
656 void define_directive(void)
657 {
658         lexer_next_preprocessing_token();
659         if(lexer_token.type != T_IDENTIFIER) {
660                 parse_error("expected identifier after #define\n");
661                 eat_until_newline();
662         }
663 }
664
665 static
666 void ifdef_directive(int is_ifndef)
667 {
668         (void) is_ifndef;
669         lexer_next_preprocessing_token();
670         //expect_identifier();
671         //extect_newline();
672 }
673
674 static
675 void endif_directive(void)
676 {
677         //expect_newline();
678 }
679
680 static
681 void parse_line_directive(void)
682 {
683         if(pp_token.type != T_INTEGER) {
684                 parse_error("expected integer");
685         } else {
686                 lexer_token.source_position.linenr = pp_token.v.intvalue - 1;
687                 next_pp_token();
688         }
689         if(pp_token.type == T_STRING_LITERAL) {
690                 lexer_token.source_position.input_name = pp_token.v.string;
691                 next_pp_token();
692         }
693
694         eat_until_newline();
695 }
696
697 static
698 void parse_preprocessor_identifier(void)
699 {
700         assert(pp_token.type == T_IDENTIFIER);
701         symbol_t *symbol = pp_token.v.symbol;
702
703         switch(symbol->pp_ID) {
704         case TP_include:
705                 printf("include - enable header name parsing!\n");
706                 break;
707         case TP_define:
708                 define_directive();
709                 break;
710         case TP_ifdef:
711                 ifdef_directive(0);
712                 break;
713         case TP_ifndef:
714                 ifdef_directive(1);
715                 break;
716         case TP_endif:
717                 endif_directive();
718                 break;
719         case TP_line:
720                 next_pp_token();
721                 parse_line_directive();
722                 break;
723         case TP_if:
724         case TP_else:
725         case TP_elif:
726         case TP_undef:
727         case TP_error:
728                 error_directive();
729                 break;
730         case TP_pragma:
731                 break;
732         }
733 }
734
735 static
736 void parse_preprocessor_directive()
737 {
738         next_pp_token();
739
740         switch(pp_token.type) {
741         case T_IDENTIFIER:
742                 parse_preprocessor_identifier();
743                 break;
744         case T_INTEGER:
745                 parse_line_directive();
746                 break;
747         default:
748                 parse_error("invalid preprocessor directive");
749                 eat_until_newline();
750                 break;
751         }
752 }
753
754 #define MAYBE_PROLOG                                       \
755                         next_char();                                   \
756                         while(1) {                                     \
757                                 switch(c) {
758
759 #define MAYBE(ch, set_type)                                \
760                                 case ch:                                   \
761                                         next_char();                           \
762                                         lexer_token.type = set_type;           \
763                                         return;
764
765 #define ELSE_CODE(code)                                    \
766                                 SKIP_TRIGRAPHS(,                           \
767                                         code;                                  \
768                                 )                                          \
769                                                                                                                    \
770                                 case '\\':                                 \
771                                         next_char();                           \
772                                         EAT_NEWLINE(break;)                    \
773                                         /* fallthrough */                      \
774                                 default:                                   \
775                                         code;                                  \
776                                 }                                          \
777                         } /* end of while(1) */                        \
778                         break;
779
780 #define ELSE(set_type)                                     \
781                 ELSE_CODE(                                         \
782                         lexer_token.type = set_type;                   \
783                         return;                                        \
784                 )
785
786 void lexer_next_preprocessing_token(void)
787 {
788         while(1) {
789                 switch(c) {
790                 case ' ':
791                 case '\t':
792                         next_char();
793                         break;
794
795                 MATCH_NEWLINE(
796                         lexer_token.type = '\n';
797                         return;
798                 )
799
800                 SYMBOL_CHARS
801                         parse_symbol();
802                         return;
803
804                 DIGITS
805                         parse_number();
806                         return;
807
808                 case '"':
809                         parse_string_literal();
810                         return;
811
812                 case '\'':
813                         parse_character_constant();
814                         return;
815
816                 case '\\':
817                         next_char();
818                         if(c == '\n') {
819                                 next_char();
820                                 lexer_token.source_position.linenr++;
821                                 break;
822                         } else {
823                                 parse_error("unexpected '\\' found");
824                                 lexer_token.type = T_ERROR;
825                         }
826                         return;
827
828                 case '.':
829                         MAYBE_PROLOG
830                                 case '.':
831                                         MAYBE_PROLOG
832                                         MAYBE('.', T_DOTDOTDOT)
833                                         ELSE_CODE(
834                                                 put_back(c);
835                                                 c = '.';
836                                                 lexer_token.type = '.';
837                                                 return;
838                                         )
839                         ELSE('.')
840                 case '&':
841                         MAYBE_PROLOG
842                         MAYBE('&', T_ANDAND)
843                         MAYBE('=', T_ANDEQUAL)
844                         ELSE('&')
845                 case '*':
846                         MAYBE_PROLOG
847                         MAYBE('=', T_ASTERISKEQUAL)
848                         ELSE('*')
849                 case '+':
850                         MAYBE_PROLOG
851                         MAYBE('+', T_PLUSPLUS)
852                         MAYBE('=', T_PLUSEQUAL)
853                         ELSE('+')
854                 case '-':
855                         MAYBE_PROLOG
856                         MAYBE('-', T_MINUSMINUS)
857                         MAYBE('=', T_MINUSEQUAL)
858                         ELSE('-')
859                 case '!':
860                         MAYBE_PROLOG
861                         MAYBE('=', T_EXCLAMATIONMARKEQUAL)
862                         ELSE('!')
863                 case '/':
864                         MAYBE_PROLOG
865                         MAYBE('=', T_SLASHEQUAL)
866                                 case '*':
867                                         next_char();
868                                         skip_multiline_comment();
869                                         lexer_next_preprocessing_token();
870                                         return;
871                                 case '/':
872                                         next_char();
873                                         skip_line_comment();
874                                         lexer_next_preprocessing_token();
875                                         return;
876                         ELSE('/')
877                 case '%':
878                         MAYBE_PROLOG
879                         MAYBE('>', T_PERCENTGREATER)
880                         MAYBE('=', T_PERCENTEQUAL)
881                                 case ':':
882                                         MAYBE_PROLOG
883                                                 case '%':
884                                                         MAYBE_PROLOG
885                                                         MAYBE(':', T_PERCENTCOLONPERCENTCOLON)
886                                                         ELSE_CODE(
887                                                                 put_back(c);
888                                                                 c = '%';
889                                                                 lexer_token.type = T_PERCENTCOLON;
890                                                                 return;
891                                                         )
892                                         ELSE(T_PERCENTCOLON)
893                         ELSE('%')
894                 case '<':
895                         MAYBE_PROLOG
896                         MAYBE(':', T_LESSCOLON)
897                         MAYBE('%', T_LESSPERCENT)
898                                 case '<':
899                                         MAYBE_PROLOG
900                                         MAYBE('=', T_LESSLESSEQUAL)
901                                         ELSE(T_LESSLESS)
902                         ELSE('<')
903                 case '>':
904                         MAYBE_PROLOG
905                                 case '>':
906                                         MAYBE_PROLOG
907                                         MAYBE('=', T_GREATERGREATEREQUAL)
908                                         ELSE(T_GREATERGREATER)
909                         ELSE('>')
910                 case '^':
911                         MAYBE_PROLOG
912                         MAYBE('=', T_CARETEQUAL)
913                         ELSE('^')
914                 case '|':
915                         MAYBE_PROLOG
916                         MAYBE('=', T_PIPEEQUAL)
917                         MAYBE('|', T_PIPEPIPE)
918                         ELSE('|')
919                 case ':':
920                         MAYBE_PROLOG
921                         MAYBE('>', T_COLONGREATER)
922                         ELSE(':')
923                 case '=':
924                         MAYBE_PROLOG
925                         MAYBE('=', T_EQUALEQUAL)
926                         ELSE('=')
927                 case '#':
928                         MAYBE_PROLOG
929                         MAYBE('#', T_HASHHASH)
930                         ELSE('#')
931
932                 case '?':
933                         next_char();
934                         /* just a simple ? */
935                         if(c != '?') {
936                                 lexer_token.type = '?';
937                                 return;
938                         }
939                         /* might be a trigraph */
940                         next_char();
941                         if(replace_trigraph()) {
942                                 break;
943                         }
944                         put_back(c);
945                         c = '?';
946                         lexer_token.type = '?';
947                         return;
948
949                 case '[':
950                 case ']':
951                 case '(':
952                 case ')':
953                 case '{':
954                 case '}':
955                 case '~':
956                 case ';':
957                 case ',':
958                         lexer_token.type = c;
959                         next_char();
960                         return;
961
962                 case EOF:
963                         lexer_token.type = T_EOF;
964                         return;
965
966                 default:
967                         next_char();
968                         error_prefix();
969                         fprintf(stderr, "unknown character '%c' found\n", c);
970                         lexer_token.type = T_ERROR;
971                         return;
972                 }
973         }
974 }
975
976 void lexer_next_token(void)
977 {
978         lexer_next_preprocessing_token();
979         if(lexer_token.type != '\n')
980                 return;
981
982 newline_found:
983         do {
984                 lexer_next_preprocessing_token();
985         } while(lexer_token.type == '\n');
986
987         if(lexer_token.type == '#') {
988                 parse_preprocessor_directive();
989                 goto newline_found;
990         }
991 }
992
993 void init_lexer(void)
994 {
995         strset_init(&stringset);
996 }
997
998 void lexer_open_stream(FILE *stream, const char *input_name)
999 {
1000         input                                  = stream;
1001         lexer_token.source_position.linenr     = 0;
1002         lexer_token.source_position.input_name = input_name;
1003
1004         /* we place a virtual '\n' at the beginning so the lexer knows we're at the
1005          * beginning of a line */
1006         c = '\n';
1007 }
1008
1009 void exit_lexer(void)
1010 {
1011         strset_destroy(&stringset);
1012 }
1013
1014 static __attribute__((unused))
1015 void dbg_pos(const source_position_t source_position)
1016 {
1017         fprintf(stdout, "%s:%d\n", source_position.input_name,
1018                 source_position.linenr);
1019         fflush(stdout);
1020 }