more work on parser, stdio.h is fully parsed now
[cparser] / lexer.c
1 #include <config.h>
2
3 #include "lexer_t.h"
4 #include "token_t.h"
5 #include "symbol_table_t.h"
6 #include "adt/error.h"
7
8 #include <assert.h>
9 #include <errno.h>
10 #include <string.h>
11 #include <ctype.h>
12
13 //#define DEBUG_CHARS
14 #define MAX_PUTBACK 3
15
16 static int               c;
17 source_position_t source_position;
18 static FILE             *input;
19 static char              buf[1027];
20 static const char       *bufend;
21 static const char       *bufpos;
22 static strset_t          stringset;
23 //static FILE            **input_stack;
24 //static char            **buf_stack;
25
26 static
27 void error_prefix_at(const char *input_name, unsigned linenr)
28 {
29         fprintf(stderr, "%s:%d: Error: ", input_name, linenr);
30 }
31
32 static
33 void error_prefix()
34 {
35         error_prefix_at(source_position.input_name, source_position.linenr);
36 }
37
38 static
39 void parse_error(const char *msg)
40 {
41         error_prefix();
42         fprintf(stderr, "%s\n", msg);
43 }
44
45 static inline
46 void next_char()
47 {
48         bufpos++;
49         if(bufpos >= bufend) {
50                 size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
51                                  input);
52                 if(s == 0) {
53                         c = EOF;
54                         return;
55                 }
56                 bufpos = buf + MAX_PUTBACK;
57                 bufend = buf + MAX_PUTBACK + s;
58         }
59         c = *(bufpos);
60 #ifdef DEBUG_CHARS
61         printf("nchar '%c'\n", c);
62 #endif
63 }
64
65 static inline
66 void put_back(int pc)
67 {
68         char *p = (char*) bufpos - 1;
69         bufpos--;
70         assert(p >= buf);
71         *p = pc;
72
73 #ifdef DEBUG_CHARS
74         printf("putback '%c'\n", pc);
75 #endif
76 }
77
78
79 static
80 int replace_trigraph(void)
81 {
82 #define MATCH_TRIGRAPH(ch,replacement)           \
83         case ch:                                     \
84                 c = replacement;                         \
85                 return 1;
86
87         switch(c) {
88         MATCH_TRIGRAPH('=', '#')
89         MATCH_TRIGRAPH('(', '[')
90         MATCH_TRIGRAPH('/', '\\')
91         MATCH_TRIGRAPH(')', ']')
92         MATCH_TRIGRAPH('\'', '^')
93         MATCH_TRIGRAPH('<', '{')
94         MATCH_TRIGRAPH('!', '|')
95         MATCH_TRIGRAPH('>', '}')
96         MATCH_TRIGRAPH('-', '~')
97         default:
98                 break;
99         }
100
101         return 0;
102 }
103
104 #define SKIP_TRIGRAPHS(custom_putback, no_trigraph_code) \
105         case '?':                                  \
106                 next_char();                           \
107                 if(c != '?') {                         \
108                         custom_putback;                    \
109                         put_back(c);                       \
110                         c = '?';                           \
111                         no_trigraph_code;                  \
112                 }                                      \
113                 next_char();                           \
114                 if(replace_trigraph()) {               \
115                         break;                             \
116                 }                                      \
117                 custom_putback;                        \
118                 put_back('?');                         \
119                 put_back(c);                           \
120                 c = '?';                               \
121                 no_trigraph_code;
122
123 #define EAT_NEWLINE(newline_code)              \
124         if(c == '\r') {                            \
125                 next_char();                           \
126                 if(c == '\n')                          \
127                         next_char();                       \
128                 source_position.linenr++;              \
129                 newline_code;                          \
130         } else if(c == '\n') {                     \
131                 next_char();                           \
132                 source_position.linenr++;              \
133                 newline_code;                          \
134         }
135
136 #define SYMBOL_CHARS  \
137         case 'a':         \
138         case 'b':         \
139         case 'c':         \
140         case 'd':         \
141         case 'e':         \
142         case 'f':         \
143         case 'g':         \
144         case 'h':         \
145         case 'i':         \
146         case 'j':         \
147         case 'k':         \
148         case 'l':         \
149         case 'm':         \
150         case 'n':         \
151         case 'o':         \
152         case 'p':         \
153         case 'q':         \
154         case 'r':         \
155         case 's':         \
156         case 't':         \
157         case 'u':         \
158         case 'v':         \
159         case 'w':         \
160         case 'x':         \
161         case 'y':         \
162         case 'z':         \
163         case 'A':         \
164         case 'B':         \
165         case 'C':         \
166         case 'D':         \
167         case 'E':         \
168         case 'F':         \
169         case 'G':         \
170         case 'H':         \
171         case 'I':         \
172         case 'J':         \
173         case 'K':         \
174         case 'L':         \
175         case 'M':         \
176         case 'N':         \
177         case 'O':         \
178         case 'P':         \
179         case 'Q':         \
180         case 'R':         \
181         case 'S':         \
182         case 'T':         \
183         case 'U':         \
184         case 'V':         \
185         case 'W':         \
186         case 'X':         \
187         case 'Y':         \
188         case 'Z':         \
189         case '_':
190
191 #define DIGITS        \
192         case '0':         \
193         case '1':         \
194         case '2':         \
195         case '3':         \
196         case '4':         \
197         case '5':         \
198         case '6':         \
199         case '7':         \
200         case '8':         \
201         case '9':
202
203 static
204 void parse_symbol(token_t *token)
205 {
206         symbol_t *symbol;
207         char     *string;
208
209         obstack_1grow(&symbol_obstack, c);
210         next_char();
211
212         while(1) {
213                 switch(c) {
214                 case '\\':
215                         next_char();
216                         EAT_NEWLINE(break;)
217                         goto end_symbol;
218
219                 DIGITS
220                 SYMBOL_CHARS
221                         obstack_1grow(&symbol_obstack, c);
222                         next_char();
223                         break;
224
225                 case '?':
226                         next_char();
227                         if(c != '?') {
228                                 put_back(c);
229                                 c = '?';
230                                 goto end_symbol;
231                         }
232                         next_char();
233                         if(replace_trigraph())
234                                 break;
235                         put_back('?');
236                         put_back(c);
237                         c = '?';
238                         goto end_symbol;
239
240                 default:
241                         goto end_symbol;
242                 }
243         }
244 end_symbol:
245         obstack_1grow(&symbol_obstack, '\0');
246
247         string = obstack_finish(&symbol_obstack);
248         symbol = symbol_table_insert(string);
249
250         token->type     = symbol->ID;
251         token->v.symbol = symbol;
252
253         if(symbol->string != string) {
254                 obstack_free(&symbol_obstack, string);
255         }
256 }
257
258 static
259 void parse_number_hex(token_t *token)
260 {
261         assert(c == 'x' || c == 'X');
262         next_char();
263
264         if (!isdigit(c) &&
265                 !('A' <= c && c <= 'F') &&
266                 !('a' <= c && c <= 'f')) {
267                 parse_error("premature end of hex number literal");
268                 token->type = T_ERROR;
269                 return;
270         }
271
272         int value = 0;
273         for(;;) {
274                 if (isdigit(c)) {
275                         value = 16 * value + c - '0';
276                 } else if ('A' <= c && c <= 'F') {
277                         value = 16 * value + c - 'A' + 10;
278                 } else if ('a' <= c && c <= 'f') {
279                         value = 16 * value + c - 'a' + 10;
280                 } else {
281                         token->type     = T_INTEGER;
282                         token->v.intvalue = value;
283                         return;
284                 }
285                 next_char();
286         }
287 }
288
289 static
290 void parse_number_oct(token_t *token)
291 {
292         assert(c == 'o' || c == 'O');
293         next_char();
294
295         int value = 0;
296         for(;;) {
297                 if ('0' <= c && c <= '7') {
298                         value = 8 * value + c - '0';
299                 } else {
300                         token->type     = T_INTEGER;
301                         token->v.intvalue = value;
302                         return;
303                 }
304                 next_char();
305         }
306 }
307
308 static
309 void parse_number_dec(token_t *token, int first_char)
310 {
311         int value = 0;
312         if(first_char > 0) {
313                 assert(first_char >= '0' && first_char <= '9');
314                 value = first_char - '0';
315         }
316
317         for(;;) {
318                 if (isdigit(c)) {
319                         value = 10 * value + c - '0';
320                 } else {
321                         token->type     = T_INTEGER;
322                         token->v.intvalue = value;
323                         return;
324                 }
325                 next_char();
326         }
327 }
328
329 static
330 void parse_number(token_t *token)
331 {
332         // TODO check for overflow
333         // TODO check for various invalid inputs sequences
334
335         if (c == '0') {
336                 next_char();
337                 switch (c) {
338                         case 'X':
339                         case 'x': parse_number_hex(token); break;
340                         case 'o':
341                         case 'O': parse_number_oct(token); break;
342                         default:  parse_number_dec(token, '0');
343                 }
344         } else {
345                 parse_number_dec(token, 0);
346         }
347 }
348
349 static
350 int parse_escape_sequence()
351 {
352         while(1) {
353                 int ec = c;
354                 next_char();
355
356                 switch(ec) {
357                 case '"': return '"';
358                 case '\'': return'\'';
359                 case '\\':
360                         EAT_NEWLINE(break;)
361                         return '\\';
362                 case 'a': return '\a';
363                 case 'b': return '\b';
364                 case 'f': return '\f';
365                 case 'n': return '\n';
366                 case 'r': return '\r';
367                 case 't': return '\t';
368                 case 'v': return '\v';
369                 case 'x': /* TODO parse hex number ... */
370                         parse_error("hex escape sequences not implemented yet");
371                         return EOF;
372                 case '0':
373                 case '1':
374                 case '2':
375                 case '3':
376                 case '4':
377                 case '5':
378                 case '6':
379                 case '7':
380                         /* TODO parse octal number ... */
381                         parse_error("octal escape sequences not implemented yet");
382                         return EOF;
383                 case '?':
384                         if(c != '?') {
385                                 return '?';
386                         }
387                         /* might be a trigraph */
388                         next_char();
389                         if(replace_trigraph()) {
390                                 break;
391                         }
392                         put_back(c);
393                         c = '?';
394                         return '?';
395
396                 case EOF:
397                         parse_error("reached end of file while parsing escape sequence");
398                         return EOF;
399                 default:
400                         parse_error("unknown escape sequence");
401                         return EOF;
402                 }
403         }
404 }
405
406 static
407 void parse_string_literal(token_t *token)
408 {
409         unsigned    start_linenr = source_position.linenr;
410         char       *string;
411         const char *result;
412
413         assert(c == '"');
414         next_char();
415
416         while(1) {
417                 switch(c) {
418                 SKIP_TRIGRAPHS(,
419                         obstack_1grow(&symbol_obstack, '?');
420                         next_char();
421                         break;
422                 )
423
424                 case '\\':
425                         next_char();
426                         EAT_NEWLINE(break;)
427                         int ec = parse_escape_sequence();
428                         obstack_1grow(&symbol_obstack, ec);
429                         break;
430
431                 case EOF:
432                         error_prefix_at(source_position.input_name, start_linenr);
433                         fprintf(stderr, "string has no end\n");
434                         token->type = T_ERROR;
435                         return;
436
437                 case '"':
438                         next_char();
439                         goto end_of_string;
440
441                 default:
442                         obstack_1grow(&symbol_obstack, c);
443                         next_char();
444                         break;
445                 }
446         }
447
448 end_of_string:
449
450         /* TODO: concatenate multiple strings separated by whitespace... */
451
452         /* add finishing 0 to the string */
453         obstack_1grow(&symbol_obstack, '\0');
454         string = obstack_finish(&symbol_obstack);
455
456         /* check if there is already a copy of the string */
457         result = strset_insert(&stringset, string);
458         if(result != string) {
459                 obstack_free(&symbol_obstack, string);
460         }
461
462         token->type     = T_STRING_LITERAL;
463         token->v.string = result;
464 }
465
466 #define MATCH_NEWLINE(code)                 \
467         case '\r':                              \
468                 next_char();                        \
469                 if(c == '\n') {                     \
470                         next_char();                    \
471                 }                                   \
472                 source_position.linenr++;           \
473                 code;                               \
474         case '\n':                              \
475                 next_char();                        \
476                 source_position.linenr++;           \
477                 code;
478
479 static
480 void parse_character_constant(token_t *token)
481 {
482         assert(c == '\'');
483         next_char();
484
485         int found_char = 0;
486         while(1) {
487                 switch(c) {
488                 SKIP_TRIGRAPHS(,
489                         found_char = '?';
490                         break;
491                 )
492
493                 case '\\':
494                         next_char();
495                         EAT_NEWLINE(break;)
496                         found_char = '\\';
497                         break;
498
499                 MATCH_NEWLINE(
500                         parse_error("newline while parsing character constant");
501                         break;
502                 )
503
504                 case '\'':
505                         next_char();
506                         goto end_of_char_constant;
507
508                 case EOF:
509                         parse_error("EOF while parsing character constant");
510                         token->type = T_ERROR;
511                         return;
512
513                 default:
514                         if(found_char != 0) {
515                                 parse_error("more than 1 characters in character "
516                                             "constant");
517                                 goto end_of_char_constant;
518                         } else {
519                                 found_char = c;
520                                 next_char();
521                         }
522                         break;
523                 }
524         }
525
526 end_of_char_constant:
527         token->type       = T_INTEGER;
528         token->v.intvalue = found_char;
529 }
530
531 static
532 void skip_multiline_comment(void)
533 {
534         unsigned start_linenr = source_position.linenr;
535         int had_star = 0;
536
537         while(1) {
538                 switch(c) {
539                 case '*':
540                         next_char();
541                         had_star = 1;
542                         break;
543
544                 case '/':
545                         next_char();
546                         if(had_star) {
547                                 return;
548                         }
549                         had_star = 0;
550                         break;
551
552                 case '\\':
553                         next_char();
554                         EAT_NEWLINE(break;)
555                         had_star = 0;
556                         break;
557
558                 case '?':
559                         next_char();
560                         if(c != '?') {
561                                 had_star = 0;
562                                 break;
563                         }
564                         next_char();
565                         if(replace_trigraph())
566                                 break;
567                         put_back(c);
568                         c = '?';
569                         had_star = 0;
570                         /* we don't put back the 2nd ? as the comment text is discarded
571                          * anyway */
572                         break;
573
574                 MATCH_NEWLINE(had_star = 0; break;)
575
576                 case EOF:
577                         error_prefix_at(source_position.input_name, start_linenr);
578                         fprintf(stderr, "at end of file while looking for comment end\n");
579                         return;
580                 default:
581                         had_star = 0;
582                         next_char();
583                         break;
584                 }
585         }
586 }
587
588 static
589 void skip_line_comment(void)
590 {
591         while(1) {
592                 switch(c) {
593                 case '?':
594                         next_char();
595                         if(c != '?')
596                                 break;
597                         next_char();
598                         if(replace_trigraph())
599                                 break;
600                         put_back('?');
601                         /* we don't put back the 2nd ? as the comment text is discarded
602                          * anyway */
603                         break;
604
605                 case '\\':
606                         next_char();
607                         if(c == '\n') {
608                                 next_char();
609                                 source_position.linenr++;
610                         }
611                         break;
612
613                 case EOF:
614                 case '\r':
615                 case '\n':
616                         return;
617
618                 default:
619                         next_char();
620                         break;
621                 }
622         }
623 }
624
625 static token_t pp_token;
626
627 static inline
628 void next_pp_token(void)
629 {
630         lexer_next_preprocessing_token(&pp_token);
631 }
632
633 static
634 void eat_until_newline(void)
635 {
636         while(pp_token.type != '\n' && pp_token.type != T_EOF) {
637                 next_pp_token();
638         }
639 }
640
641 static
642 void error_directive(void)
643 {
644         error_prefix();
645         fprintf(stderr, "#error directive: \n");
646
647         /* parse pp-tokens until new-line */
648 }
649
650 static
651 void define_directive(void)
652 {
653         token_t temptoken;
654
655         lexer_next_preprocessing_token(&temptoken);
656         if(temptoken.type != T_IDENTIFIER) {
657                 parse_error("expected identifier after #define\n");
658                 eat_until_newline();
659         }
660 }
661
662 static
663 void ifdef_directive(int is_ifndef)
664 {
665         (void) is_ifndef;
666         token_t temptoken;
667         lexer_next_preprocessing_token(&temptoken);
668         //expect_identifier();
669         //extect_newline();
670 }
671
672 static
673 void endif_directive(void)
674 {
675         //expect_newline();
676 }
677
678 static
679 void parse_line_directive(void)
680 {
681         if(pp_token.type != T_INTEGER) {
682                 parse_error("expected integer");
683         } else {
684                 source_position.linenr = pp_token.v.intvalue - 1;
685                 next_pp_token();
686         }
687         if(pp_token.type == T_STRING_LITERAL) {
688                 source_position.input_name = pp_token.v.string;
689                 next_pp_token();
690         }
691
692         eat_until_newline();
693 }
694
695 static
696 void parse_preprocessor_identifier(void)
697 {
698         assert(pp_token.type == T_IDENTIFIER);
699         symbol_t *symbol = pp_token.v.symbol;
700
701         switch(symbol->pp_ID) {
702         case TP_include:
703                 printf("include - enable header name parsing!\n");
704                 break;
705         case TP_define:
706                 define_directive();
707                 break;
708         case TP_ifdef:
709                 ifdef_directive(0);
710                 break;
711         case TP_ifndef:
712                 ifdef_directive(1);
713                 break;
714         case TP_endif:
715                 endif_directive();
716                 break;
717         case TP_line:
718                 next_pp_token();
719                 parse_line_directive();
720                 break;
721         case TP_if:
722         case TP_else:
723         case TP_elif:
724         case TP_undef:
725         case TP_error:
726                 error_directive();
727                 break;
728         case TP_pragma:
729                 break;
730         }
731 }
732
733 static
734 void parse_preprocessor_directive()
735 {
736         next_pp_token();
737
738         switch(pp_token.type) {
739         case T_IDENTIFIER:
740                 parse_preprocessor_identifier();
741                 break;
742         case T_INTEGER:
743                 parse_line_directive();
744                 break;
745         default:
746                 parse_error("invalid preprocessor directive");
747                 eat_until_newline();
748                 break;
749         }
750 }
751
752 #define MAYBE_PROLOG                                       \
753                         next_char();                                   \
754                         while(1) {                                     \
755                                 switch(c) {
756
757 #define MAYBE(ch, set_type)                                \
758                                 case ch:                                   \
759                                         next_char();                           \
760                                         token->type = set_type;                \
761                                         return;
762
763 #define ELSE_CODE(code)                                    \
764                                 SKIP_TRIGRAPHS(,                           \
765                                         code;                                  \
766                                 )                                          \
767                                                                                                                    \
768                                 case '\\':                                 \
769                                         next_char();                           \
770                                         EAT_NEWLINE(break;)                    \
771                                         /* fallthrough */                      \
772                                 default:                                   \
773                                         code;                                  \
774                                 }                                          \
775                         } /* end of while(1) */                        \
776                         break;
777
778 #define ELSE(set_type)                                     \
779                 ELSE_CODE(                                         \
780                         token->type = set_type;                        \
781                         return;                                        \
782                 )
783
784 void lexer_next_preprocessing_token(token_t *token)
785 {
786         while(1) {
787                 switch(c) {
788                 case ' ':
789                 case '\t':
790                         next_char();
791                         break;
792
793                 MATCH_NEWLINE(
794                         token->type = '\n';
795                         return;
796                 )
797
798                 SYMBOL_CHARS
799                         parse_symbol(token);
800                         return;
801
802                 DIGITS
803                         parse_number(token);
804                         return;
805
806                 case '"':
807                         parse_string_literal(token);
808                         return;
809
810                 case '\'':
811                         parse_character_constant(token);
812                         return;
813
814                 case '\\':
815                         next_char();
816                         if(c == '\n') {
817                                 next_char();
818                                 source_position.linenr++;
819                                 break;
820                         } else {
821                                 parse_error("unexpected '\\' found");
822                                 token->type = T_ERROR;
823                         }
824                         return;
825
826                 case '.':
827                         MAYBE_PROLOG
828                                 case '.':
829                                         MAYBE_PROLOG
830                                         MAYBE('.', T_DOTDOTDOT)
831                                         ELSE_CODE(
832                                                 put_back(c);
833                                                 c = '.';
834                                                 token->type = '.';
835                                                 return;
836                                         )
837                         ELSE('.')
838                 case '&':
839                         MAYBE_PROLOG
840                         MAYBE('&', T_ANDAND)
841                         MAYBE('=', T_ANDEQUAL)
842                         ELSE('&')
843                 case '*':
844                         MAYBE_PROLOG
845                         MAYBE('=', T_ASTERISKEQUAL)
846                         ELSE('*')
847                 case '+':
848                         MAYBE_PROLOG
849                         MAYBE('+', T_PLUSPLUS)
850                         MAYBE('=', T_PLUSEQUAL)
851                         ELSE('+')
852                 case '-':
853                         MAYBE_PROLOG
854                         MAYBE('-', T_MINUSMINUS)
855                         MAYBE('=', T_MINUSEQUAL)
856                         ELSE('-')
857                 case '!':
858                         MAYBE_PROLOG
859                         MAYBE('=', T_EXCLAMATIONMARKEQUAL)
860                         ELSE('!')
861                 case '/':
862                         MAYBE_PROLOG
863                         MAYBE('=', T_SLASHEQUAL)
864                                 case '*':
865                                         next_char();
866                                         skip_multiline_comment();
867                                         lexer_next_preprocessing_token(token);
868                                         return;
869                                 case '/':
870                                         next_char();
871                                         skip_line_comment();
872                                         lexer_next_preprocessing_token(token);
873                                         return;
874                         ELSE('/')
875                 case '%':
876                         MAYBE_PROLOG
877                         MAYBE('>', T_PERCENTGREATER)
878                         MAYBE('=', T_PERCENTEQUAL)
879                                 case ':':
880                                         MAYBE_PROLOG
881                                                 case '%':
882                                                         MAYBE_PROLOG
883                                                         MAYBE(':', T_PERCENTCOLONPERCENTCOLON)
884                                                         ELSE_CODE(
885                                                                 put_back(c);
886                                                                 c = '%';
887                                                                 token->type = T_PERCENTCOLON;
888                                                                 return;
889                                                         )
890                                         ELSE(T_PERCENTCOLON)
891                         ELSE('%')
892                 case '<':
893                         MAYBE_PROLOG
894                         MAYBE(':', T_LESSCOLON)
895                         MAYBE('%', T_LESSPERCENT)
896                                 case '<':
897                                         MAYBE_PROLOG
898                                         MAYBE('=', T_LESSLESSEQUAL)
899                                         ELSE(T_LESSLESS)
900                         ELSE('<')
901                 case '>':
902                         MAYBE_PROLOG
903                                 case '>':
904                                         MAYBE_PROLOG
905                                         MAYBE('=', T_GREATERGREATEREQUAL)
906                                         ELSE(T_GREATERGREATER)
907                         ELSE('>')
908                 case '^':
909                         MAYBE_PROLOG
910                         MAYBE('=', T_CARETEQUAL)
911                         ELSE('^')
912                 case '|':
913                         MAYBE_PROLOG
914                         MAYBE('=', T_PIPEEQUAL)
915                         MAYBE('|', T_PIPEPIPE)
916                         ELSE('|')
917                 case ':':
918                         MAYBE_PROLOG
919                         MAYBE('>', T_COLONGREATER)
920                         ELSE(':')
921                 case '=':
922                         MAYBE_PROLOG
923                         MAYBE('=', T_EQUALEQUAL)
924                         ELSE('=')
925                 case '#':
926                         MAYBE_PROLOG
927                         MAYBE('#', T_HASHHASH)
928                         ELSE('#')
929
930                 case '?':
931                         next_char();
932                         /* just a simple ? */
933                         if(c != '?') {
934                                 token->type = '?';
935                                 return;
936                         }
937                         /* might be a trigraph */
938                         next_char();
939                         if(replace_trigraph()) {
940                                 break;
941                         }
942                         put_back(c);
943                         c = '?';
944                         token->type = '?';
945                         return;
946
947                 case '[':
948                 case ']':
949                 case '(':
950                 case ')':
951                 case '{':
952                 case '}':
953                 case '~':
954                 case ';':
955                 case ',':
956                         token->type = c;
957                         next_char();
958                         return;
959
960                 case EOF:
961                         token->type = T_EOF;
962                         return;
963
964                 default:
965                         next_char();
966                         error_prefix();
967                         fprintf(stderr, "unknown character '%c' found\n", c);
968                         token->type = T_ERROR;
969                         return;
970                 }
971         }
972 }
973
974 void lexer_next_token(token_t *token)
975 {
976         lexer_next_preprocessing_token(token);
977         if(token->type != '\n')
978                 return;
979
980 newline_found:
981         do {
982                 lexer_next_preprocessing_token(token);
983         } while(token->type == '\n');
984
985         if(token->type == '#') {
986                 parse_preprocessor_directive();
987                 goto newline_found;
988         }
989 }
990
991 void init_lexer(void)
992 {
993         strset_init(&stringset);
994 }
995
996 void lexer_open_stream(FILE *stream, const char *input_name)
997 {
998         input                      = stream;
999         source_position.linenr     = 0;
1000         source_position.input_name = input_name;
1001
1002         /* we place a virtual '\n' at the beginning so the lexer knows we're at the
1003          * beginning of a line */
1004         c = '\n';
1005 }
1006
1007 void exit_lexer(void)
1008 {
1009         strset_destroy(&stringset);
1010 }
1011
1012 static __attribute__((unused))
1013 void dbg_pos(const source_position_t source_position)
1014 {
1015         fprintf(stdout, "%s:%d\n", source_position.input_name, source_position.linenr);
1016         fflush(stdout);
1017 }