fix put_back in lexer
[cparser] / lexer.c
1 #include <config.h>
2
3 #include "lexer.h"
4 #include "token_t.h"
5 #include "symbol_table_t.h"
6 #include "adt/error.h"
7 #include "adt/strset.h"
8 #include "adt/util.h"
9
10 #include <assert.h>
11 #include <errno.h>
12 #include <string.h>
13 #include <ctype.h>
14
15 //#define DEBUG_CHARS
16 #define MAX_PUTBACK 3
17
18 static int         c;
19 token_t            lexer_token;
20 static FILE       *input;
21 static char        buf[1024 + MAX_PUTBACK];
22 static const char *bufend;
23 static const char *bufpos;
24 static strset_t    stringset;
25
26 static void error_prefix_at(const char *input_name, unsigned linenr)
27 {
28         fprintf(stderr, "%s:%u: Error: ", input_name, linenr);
29 }
30
31 static void error_prefix(void)
32 {
33         error_prefix_at(lexer_token.source_position.input_name,
34                         lexer_token.source_position.linenr);
35 }
36
37 static void parse_error(const char *msg)
38 {
39         error_prefix();
40         fprintf(stderr, "%s\n", msg);
41 }
42
43 static inline void next_real_char(void)
44 {
45         bufpos++;
46         if(bufpos >= bufend) {
47                 size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
48                                  input);
49                 if(s == 0) {
50                         c = EOF;
51                         return;
52                 }
53                 bufpos = buf + MAX_PUTBACK;
54                 bufend = buf + MAX_PUTBACK + s;
55         }
56         c = *(bufpos);
57 }
58
59 static inline void put_back(int pc)
60 {
61         assert(bufpos >= buf);
62         assert(bufpos < buf+MAX_PUTBACK || *bufpos == pc);
63
64         char *p = buf + (bufpos - buf);
65         *p = pc;
66
67         /* going backwards in the buffer is legal as long as it's not more often
68          * than MAX_PUTBACK */
69         bufpos--;
70
71 #ifdef DEBUG_CHARS
72         printf("putback '%c'\n", pc);
73 #endif
74 }
75
76 static inline void next_char(void);
77
78 #define MATCH_NEWLINE(code)                   \
79         case '\r':                                \
80                 next_char();                          \
81                 if(c == '\n') {                       \
82                         next_char();                      \
83                 }                                     \
84                 lexer_token.source_position.linenr++; \
85                 code;                                 \
86         case '\n':                                \
87                 next_char();                          \
88                 lexer_token.source_position.linenr++; \
89                 code;
90
91 static inline void eat(char c_type)
92 {
93         assert(c == c_type);
94         next_char();
95 }
96
97 static void maybe_concat_lines(void)
98 {
99         eat('\\');
100
101         switch(c) {
102         MATCH_NEWLINE(return;)
103
104         default:
105                 break;
106         }
107
108         put_back(c);
109         c = '\\';
110 }
111
112 static inline void next_char(void)
113 {
114         next_real_char();
115
116         /* filter trigraphs */
117         if(UNLIKELY(c == '\\')) {
118                 maybe_concat_lines();
119                 goto end_of_next_char;
120         }
121
122         if(LIKELY(c != '?'))
123                 goto end_of_next_char;
124
125         next_real_char();
126         if(LIKELY(c != '?')) {
127                 put_back(c);
128                 c = '?';
129                 goto end_of_next_char;
130         }
131
132         next_real_char();
133         switch(c) {
134         case '=': c = '#'; break;
135         case '(': c = '['; break;
136         case '/': c = '\\'; maybe_concat_lines(); break;
137         case ')': c = ']'; break;
138         case '\'': c = '^'; break;
139         case '<': c = '{'; break;
140         case '!': c = '|'; break;
141         case '>': c = '}'; break;
142         case '-': c = '~'; break;
143         default:
144                 put_back('?');
145                 put_back(c);
146                 c = '?';
147                 break;
148         }
149
150 end_of_next_char:
151 #ifdef DEBUG_CHARS
152         printf("nchar '%c'\n", c);
153 #else
154         ;
155 #endif
156 }
157
158 #define SYMBOL_CHARS  \
159         case 'a':         \
160         case 'b':         \
161         case 'c':         \
162         case 'd':         \
163         case 'e':         \
164         case 'f':         \
165         case 'g':         \
166         case 'h':         \
167         case 'i':         \
168         case 'j':         \
169         case 'k':         \
170         case 'l':         \
171         case 'm':         \
172         case 'n':         \
173         case 'o':         \
174         case 'p':         \
175         case 'q':         \
176         case 'r':         \
177         case 's':         \
178         case 't':         \
179         case 'u':         \
180         case 'v':         \
181         case 'w':         \
182         case 'x':         \
183         case 'y':         \
184         case 'z':         \
185         case 'A':         \
186         case 'B':         \
187         case 'C':         \
188         case 'D':         \
189         case 'E':         \
190         case 'F':         \
191         case 'G':         \
192         case 'H':         \
193         case 'I':         \
194         case 'J':         \
195         case 'K':         \
196         case 'L':         \
197         case 'M':         \
198         case 'N':         \
199         case 'O':         \
200         case 'P':         \
201         case 'Q':         \
202         case 'R':         \
203         case 'S':         \
204         case 'T':         \
205         case 'U':         \
206         case 'V':         \
207         case 'W':         \
208         case 'X':         \
209         case 'Y':         \
210         case 'Z':         \
211         case '_':
212
213 #define DIGITS        \
214         case '0':         \
215         case '1':         \
216         case '2':         \
217         case '3':         \
218         case '4':         \
219         case '5':         \
220         case '6':         \
221         case '7':         \
222         case '8':         \
223         case '9':
224
225 static void parse_symbol(void)
226 {
227         symbol_t *symbol;
228         char     *string;
229
230         obstack_1grow(&symbol_obstack, c);
231         next_char();
232
233         while(1) {
234                 switch(c) {
235                 DIGITS
236                 SYMBOL_CHARS
237                         obstack_1grow(&symbol_obstack, c);
238                         next_char();
239                         break;
240
241                 default:
242                         goto end_symbol;
243                 }
244         }
245
246 end_symbol:
247         obstack_1grow(&symbol_obstack, '\0');
248
249         string = obstack_finish(&symbol_obstack);
250         symbol = symbol_table_insert(string);
251
252         lexer_token.type     = symbol->ID;
253         lexer_token.v.symbol = symbol;
254
255         if(symbol->string != string) {
256                 obstack_free(&symbol_obstack, string);
257         }
258 }
259
260 static void parse_integer_suffix(void)
261 {
262         if(c == 'U' || c == 'U') {
263                 /* TODO do something with the suffixes... */
264                 next_char();
265                 if(c == 'L' || c == 'l') {
266                         next_char();
267                         if(c == 'L' || c == 'l') {
268                                 next_char();
269                         }
270                 }
271         } else if(c == 'l' || c == 'L') {
272                 next_char();
273                 if(c == 'l' || c == 'L') {
274                         next_char();
275                         if(c == 'u' || c == 'U') {
276                                 next_char();
277                         }
278                 } else if(c == 'u' || c == 'U') {
279                         next_char();
280                 }
281         }
282 }
283
284 static void parse_number_hex(void)
285 {
286         assert(c == 'x' || c == 'X');
287         next_char();
288
289         if (!isdigit(c) &&
290                 !('A' <= c && c <= 'F') &&
291                 !('a' <= c && c <= 'f')) {
292                 parse_error("premature end of hex number literal");
293                 lexer_token.type = T_ERROR;
294                 return;
295         }
296
297         int value = 0;
298         while(1) {
299                 if (isdigit(c)) {
300                         value = 16 * value + c - '0';
301                 } else if ('A' <= c && c <= 'F') {
302                         value = 16 * value + c - 'A' + 10;
303                 } else if ('a' <= c && c <= 'f') {
304                         value = 16 * value + c - 'a' + 10;
305                 } else {
306                         parse_integer_suffix();
307
308                         lexer_token.type       = T_INTEGER;
309                         lexer_token.v.intvalue = value;
310                         return;
311                 }
312                 next_char();
313         }
314
315         if(c == '.' || c == 'p' || c == 'P') {
316                 next_char();
317                 panic("Hex floating point numbers not implemented yet");
318         }
319 }
320
321 static void parse_number_oct(void)
322 {
323         int value = 0;
324         while(c >= '0' && c <= '7') {
325                 value = 8 * value + c - '0';
326                 next_char();
327         }
328         if (c == '8' || c == '9') {
329                 parse_error("invalid octal number");
330                 lexer_token.type = T_ERROR;
331                 return;
332         }
333
334         lexer_token.type       = T_INTEGER;
335         lexer_token.v.intvalue = value;
336
337         parse_integer_suffix();
338 }
339
340 static void parse_floatingpoint_exponent(long double value)
341 {
342         unsigned int expo = 0;
343         long double  factor = 10.;
344
345         if(c == '-') {
346                 next_char();
347                 factor = 0.1;
348         } else if(c == '+') {
349                 next_char();
350         }
351
352         while(c >= '0' && c <= '9') {
353                 expo = 10 * expo + (c - '0');
354                 next_char();
355         }
356
357         while(1) {
358                 if(expo & 1)
359                         value *= factor;
360                 expo >>= 1;
361                 if(expo == 0)
362                         break;
363                 factor *= factor;
364         }
365
366         lexer_token.type         = T_FLOATINGPOINT;
367         lexer_token.v.floatvalue = value;
368 }
369
370 static void parse_floatingpoint_fract(int integer_part)
371 {
372         long double value  = integer_part;
373         long double factor = 1.;
374
375         while(c >= '0' && c <= '9') {
376                 factor *= 0.1;
377                 value  += (c - '0') * factor;
378                 next_char();
379         }
380
381         if(c == 'e' || c == 'E') {
382                 next_char();
383                 parse_floatingpoint_exponent(value);
384                 return;
385         }
386
387         lexer_token.type         = T_FLOATINGPOINT;
388         lexer_token.v.floatvalue = value;
389 }
390
391 static void parse_number_dec(void)
392 {
393         int value = 0;
394
395         while(isdigit(c)) {
396                 value = 10 * value + c - '0';
397                 next_char();
398         }
399
400         if(c == '.') {
401                 next_char();
402                 parse_floatingpoint_fract(value);
403                 return;
404         }
405         if(c == 'e' || c == 'E') {
406                 next_char();
407                 parse_floatingpoint_exponent(value);
408                 return;
409         }
410         parse_integer_suffix();
411
412         lexer_token.type       = T_INTEGER;
413         lexer_token.v.intvalue = value;
414 }
415
416 static void parse_number(void)
417 {
418         if (c == '0') {
419                 next_char();
420                 switch (c) {
421                         case 'X':
422                         case 'x':
423                                 parse_number_hex();
424                                 break;
425                         case '0':
426                         case '1':
427                         case '2':
428                         case '3':
429                         case '4':
430                         case '5':
431                         case '6':
432                         case '7':
433                                 parse_number_oct();
434                                 break;
435                         case '.':
436                                 next_char();
437                                 parse_floatingpoint_fract(0);
438                                 break;
439                         case 'e':
440                         case 'E':
441                                 parse_floatingpoint_exponent(0);
442                                 break;
443                         case '8':
444                         case '9':
445                                 next_char();
446                                 parse_error("invalid octal number");
447                                 lexer_token.type = T_ERROR;
448                                 return;
449                         default:
450                                 put_back(c);
451                                 c = '0';
452                                 parse_number_dec();
453                                 return;
454                 }
455         } else {
456                 parse_number_dec();
457         }
458 }
459
460 static int parse_octal_sequence(void)
461 {
462         int value = 0;
463         while(1) {
464                 if(c < '0' || c > '7')
465                         break;
466                 value = 8 * value + c - '0';
467                 next_char();
468         }
469
470         return value;
471 }
472
473 static int parse_hex_sequence(void)
474 {
475         int value = 0;
476         while(1) {
477                 if (c >= '0' && c <= '9') {
478                         value = 16 * value + c - '0';
479                 } else if ('A' <= c && c <= 'F') {
480                         value = 16 * value + c - 'A' + 10;
481                 } else if ('a' <= c && c <= 'f') {
482                         value = 16 * value + c - 'a' + 10;
483                 } else {
484                         break;
485                 }
486                 next_char();
487         }
488
489         return value;
490 }
491
492 static int parse_escape_sequence(void)
493 {
494         eat('\\');
495
496         int ec = c;
497         next_char();
498
499         switch(ec) {
500         case '"':  return '"';
501         case '\'': return'\'';
502         case '\\': return '\\';
503         case '?': return '\?';
504         case 'a': return '\a';
505         case 'b': return '\b';
506         case 'f': return '\f';
507         case 'n': return '\n';
508         case 'r': return '\r';
509         case 't': return '\t';
510         case 'v': return '\v';
511         case 'x':
512                 return parse_hex_sequence();
513         case '0':
514         case '1':
515         case '2':
516         case '3':
517         case '4':
518         case '5':
519         case '6':
520         case '7':
521                 return parse_octal_sequence();
522         case EOF:
523                 parse_error("reached end of file while parsing escape sequence");
524                 return EOF;
525         default:
526                 parse_error("unknown escape sequence");
527                 return EOF;
528         }
529 }
530
531 const char *concat_strings(const char *s1, const char *s2)
532 {
533         size_t  len1   = strlen(s1);
534         size_t  len2   = strlen(s2);
535
536         char   *concat = obstack_alloc(&symbol_obstack, len1 + len2 + 1);
537         memcpy(concat, s1, len1);
538         memcpy(concat + len1, s2, len2 + 1);
539
540         const char *result = strset_insert(&stringset, concat);
541         if(result != concat) {
542                 obstack_free(&symbol_obstack, concat);
543         }
544
545         return result;
546 }
547
548 static void parse_string_literal(void)
549 {
550         unsigned    start_linenr = lexer_token.source_position.linenr;
551         char       *string;
552         const char *result;
553
554         assert(c == '"');
555         next_char();
556
557         int tc;
558         while(1) {
559                 switch(c) {
560                 case '\\':
561                         tc = parse_escape_sequence();
562                         obstack_1grow(&symbol_obstack, tc);
563                         break;
564
565                 case EOF:
566                         error_prefix_at(lexer_token.source_position.input_name,
567                                         start_linenr);
568                         fprintf(stderr, "string has no end\n");
569                         lexer_token.type = T_ERROR;
570                         return;
571
572                 case '"':
573                         next_char();
574                         goto end_of_string;
575
576                 default:
577                         obstack_1grow(&symbol_obstack, c);
578                         next_char();
579                         break;
580                 }
581         }
582
583 end_of_string:
584
585         /* TODO: concatenate multiple strings separated by whitespace... */
586
587         /* add finishing 0 to the string */
588         obstack_1grow(&symbol_obstack, '\0');
589         string = obstack_finish(&symbol_obstack);
590
591         /* check if there is already a copy of the string */
592         result = strset_insert(&stringset, string);
593         if(result != string) {
594                 obstack_free(&symbol_obstack, string);
595         }
596
597         lexer_token.type     = T_STRING_LITERAL;
598         lexer_token.v.string = result;
599 }
600
601 static void parse_character_constant(void)
602 {
603         eat('\'');
604
605         int found_char = 0;
606         while(1) {
607                 switch(c) {
608                 case '\\':
609                         found_char = parse_escape_sequence();
610                         break;
611
612                 MATCH_NEWLINE(
613                         parse_error("newline while parsing character constant");
614                         break;
615                 )
616
617                 case '\'':
618                         next_char();
619                         goto end_of_char_constant;
620
621                 case EOF:
622                         parse_error("EOF while parsing character constant");
623                         lexer_token.type = T_ERROR;
624                         return;
625
626                 default:
627                         if(found_char != 0) {
628                                 parse_error("more than 1 characters in character "
629                                             "constant");
630                                 goto end_of_char_constant;
631                         } else {
632                                 found_char = c;
633                                 next_char();
634                         }
635                         break;
636                 }
637         }
638
639 end_of_char_constant:
640         lexer_token.type       = T_INTEGER;
641         lexer_token.v.intvalue = found_char;
642 }
643
644 static void skip_multiline_comment(void)
645 {
646         unsigned start_linenr = lexer_token.source_position.linenr;
647
648         while(1) {
649                 switch(c) {
650                 case '*':
651                         next_char();
652                         if(c == '/') {
653                                 next_char();
654                                 return;
655                         }
656                         break;
657
658                 MATCH_NEWLINE(break;)
659
660                 case EOF:
661                         error_prefix_at(lexer_token.source_position.input_name,
662                                         start_linenr);
663                         fprintf(stderr, "at end of file while looking for comment end\n");
664                         return;
665
666                 default:
667                         next_char();
668                         break;
669                 }
670         }
671 }
672
673 static void skip_line_comment(void)
674 {
675         while(1) {
676                 switch(c) {
677                 case EOF:
678                         return;
679
680                 case '\n':
681                 case '\r':
682                         return;
683
684                 default:
685                         next_char();
686                         break;
687                 }
688         }
689 }
690
691 static token_t pp_token;
692
693 static inline void next_pp_token(void)
694 {
695         lexer_next_preprocessing_token();
696         pp_token = lexer_token;
697 }
698
699 static void eat_until_newline(void)
700 {
701         while(pp_token.type != '\n' && pp_token.type != T_EOF) {
702                 next_pp_token();
703         }
704 }
705
706 static void error_directive(void)
707 {
708         error_prefix();
709         fprintf(stderr, "#error directive: \n");
710
711         /* parse pp-tokens until new-line */
712 }
713
714 static void define_directive(void)
715 {
716         lexer_next_preprocessing_token();
717         if(lexer_token.type != T_IDENTIFIER) {
718                 parse_error("expected identifier after #define\n");
719                 eat_until_newline();
720         }
721 }
722
723 static void ifdef_directive(int is_ifndef)
724 {
725         (void) is_ifndef;
726         lexer_next_preprocessing_token();
727         //expect_identifier();
728         //extect_newline();
729 }
730
731 static void endif_directive(void)
732 {
733         //expect_newline();
734 }
735
736 static void parse_line_directive(void)
737 {
738         if(pp_token.type != T_INTEGER) {
739                 parse_error("expected integer");
740         } else {
741                 lexer_token.source_position.linenr = pp_token.v.intvalue - 1;
742                 next_pp_token();
743         }
744         if(pp_token.type == T_STRING_LITERAL) {
745                 lexer_token.source_position.input_name = pp_token.v.string;
746                 next_pp_token();
747         }
748
749         eat_until_newline();
750 }
751
752 static void parse_preprocessor_identifier(void)
753 {
754         assert(pp_token.type == T_IDENTIFIER);
755         symbol_t *symbol = pp_token.v.symbol;
756
757         switch(symbol->pp_ID) {
758         case TP_include:
759                 printf("include - enable header name parsing!\n");
760                 break;
761         case TP_define:
762                 define_directive();
763                 break;
764         case TP_ifdef:
765                 ifdef_directive(0);
766                 break;
767         case TP_ifndef:
768                 ifdef_directive(1);
769                 break;
770         case TP_endif:
771                 endif_directive();
772                 break;
773         case TP_line:
774                 next_pp_token();
775                 parse_line_directive();
776                 break;
777         case TP_if:
778         case TP_else:
779         case TP_elif:
780         case TP_undef:
781         case TP_error:
782                 error_directive();
783                 break;
784         case TP_pragma:
785                 break;
786         }
787 }
788
789 static void parse_preprocessor_directive()
790 {
791         next_pp_token();
792
793         switch(pp_token.type) {
794         case T_IDENTIFIER:
795                 parse_preprocessor_identifier();
796                 break;
797         case T_INTEGER:
798                 parse_line_directive();
799                 break;
800         default:
801                 parse_error("invalid preprocessor directive");
802                 eat_until_newline();
803                 break;
804         }
805 }
806
807 #define MAYBE_PROLOG                                       \
808                         next_char();                                   \
809                         while(1) {                                     \
810                                 switch(c) {
811
812 #define MAYBE(ch, set_type)                                \
813                                 case ch:                                   \
814                                         next_char();                           \
815                                         lexer_token.type = set_type;           \
816                                         return;
817
818 #define ELSE_CODE(code)                                    \
819                                 default:                                   \
820                                         code;                                  \
821                                 }                                          \
822                         } /* end of while(1) */                        \
823                         break;
824
825 #define ELSE(set_type)                                     \
826                 ELSE_CODE(                                         \
827                         lexer_token.type = set_type;                   \
828                         return;                                        \
829                 )
830
831 void lexer_next_preprocessing_token(void)
832 {
833         while(1) {
834                 switch(c) {
835                 case ' ':
836                 case '\t':
837                         next_char();
838                         break;
839
840                 MATCH_NEWLINE(
841                         lexer_token.type = '\n';
842                         return;
843                 )
844
845                 SYMBOL_CHARS
846                         parse_symbol();
847                         return;
848
849                 DIGITS
850                         parse_number();
851                         return;
852
853                 case '"':
854                         parse_string_literal();
855                         return;
856
857                 case '\'':
858                         parse_character_constant();
859                         return;
860
861                 case '.':
862                         MAYBE_PROLOG
863                                 case '.':
864                                         MAYBE_PROLOG
865                                         MAYBE('.', T_DOTDOTDOT)
866                                         ELSE_CODE(
867                                                 put_back(c);
868                                                 c = '.';
869                                                 lexer_token.type = '.';
870                                                 return;
871                                         )
872                         ELSE('.')
873                 case '&':
874                         MAYBE_PROLOG
875                         MAYBE('&', T_ANDAND)
876                         MAYBE('=', T_ANDEQUAL)
877                         ELSE('&')
878                 case '*':
879                         MAYBE_PROLOG
880                         MAYBE('=', T_ASTERISKEQUAL)
881                         ELSE('*')
882                 case '+':
883                         MAYBE_PROLOG
884                         MAYBE('+', T_PLUSPLUS)
885                         MAYBE('=', T_PLUSEQUAL)
886                         ELSE('+')
887                 case '-':
888                         MAYBE_PROLOG
889                         MAYBE('>', T_MINUSGREATER)
890                         MAYBE('-', T_MINUSMINUS)
891                         MAYBE('=', T_MINUSEQUAL)
892                         ELSE('-')
893                 case '!':
894                         MAYBE_PROLOG
895                         MAYBE('=', T_EXCLAMATIONMARKEQUAL)
896                         ELSE('!')
897                 case '/':
898                         MAYBE_PROLOG
899                         MAYBE('=', T_SLASHEQUAL)
900                                 case '*':
901                                         next_char();
902                                         skip_multiline_comment();
903                                         lexer_next_preprocessing_token();
904                                         return;
905                                 case '/':
906                                         next_char();
907                                         skip_line_comment();
908                                         lexer_next_preprocessing_token();
909                                         return;
910                         ELSE('/')
911                 case '%':
912                         MAYBE_PROLOG
913                         MAYBE('>', T_PERCENTGREATER)
914                         MAYBE('=', T_PERCENTEQUAL)
915                                 case ':':
916                                         MAYBE_PROLOG
917                                                 case '%':
918                                                         MAYBE_PROLOG
919                                                         MAYBE(':', T_PERCENTCOLONPERCENTCOLON)
920                                                         ELSE_CODE(
921                                                                 put_back(c);
922                                                                 c = '%';
923                                                                 lexer_token.type = T_PERCENTCOLON;
924                                                                 return;
925                                                         )
926                                         ELSE(T_PERCENTCOLON)
927                         ELSE('%')
928                 case '<':
929                         MAYBE_PROLOG
930                         MAYBE(':', T_LESSCOLON)
931                         MAYBE('%', T_LESSPERCENT)
932                         MAYBE('=', T_LESSEQUAL)
933                                 case '<':
934                                         MAYBE_PROLOG
935                                         MAYBE('=', T_LESSLESSEQUAL)
936                                         ELSE(T_LESSLESS)
937                         ELSE('<')
938                 case '>':
939                         MAYBE_PROLOG
940                         MAYBE('=', T_GREATEREQUAL)
941                                 case '>':
942                                         MAYBE_PROLOG
943                                         MAYBE('=', T_GREATERGREATEREQUAL)
944                                         ELSE(T_GREATERGREATER)
945                         ELSE('>')
946                 case '^':
947                         MAYBE_PROLOG
948                         MAYBE('=', T_CARETEQUAL)
949                         ELSE('^')
950                 case '|':
951                         MAYBE_PROLOG
952                         MAYBE('=', T_PIPEEQUAL)
953                         MAYBE('|', T_PIPEPIPE)
954                         ELSE('|')
955                 case ':':
956                         MAYBE_PROLOG
957                         MAYBE('>', T_COLONGREATER)
958                         ELSE(':')
959                 case '=':
960                         MAYBE_PROLOG
961                         MAYBE('=', T_EQUALEQUAL)
962                         ELSE('=')
963                 case '#':
964                         MAYBE_PROLOG
965                         MAYBE('#', T_HASHHASH)
966                         ELSE('#')
967
968                 case '?':
969                 case '[':
970                 case ']':
971                 case '(':
972                 case ')':
973                 case '{':
974                 case '}':
975                 case '~':
976                 case ';':
977                 case ',':
978                 case '\\':
979                         lexer_token.type = c;
980                         next_char();
981                         return;
982
983                 case EOF:
984                         lexer_token.type = T_EOF;
985                         return;
986
987                 default:
988                         next_char();
989                         error_prefix();
990                         fprintf(stderr, "unknown character '%c' found\n", c);
991                         lexer_token.type = T_ERROR;
992                         return;
993                 }
994         }
995 }
996
997 void lexer_next_token(void)
998 {
999         lexer_next_preprocessing_token();
1000         if(lexer_token.type != '\n')
1001                 return;
1002
1003 newline_found:
1004         do {
1005                 lexer_next_preprocessing_token();
1006         } while(lexer_token.type == '\n');
1007
1008         if(lexer_token.type == '#') {
1009                 parse_preprocessor_directive();
1010                 goto newline_found;
1011         }
1012 }
1013
1014 void init_lexer(void)
1015 {
1016         strset_init(&stringset);
1017 }
1018
1019 void lexer_open_stream(FILE *stream, const char *input_name)
1020 {
1021         input                                  = stream;
1022         lexer_token.source_position.linenr     = 1;
1023         lexer_token.source_position.input_name = input_name;
1024
1025         next_char();
1026 }
1027
1028 void exit_lexer(void)
1029 {
1030         strset_destroy(&stringset);
1031 }
1032
1033 static __attribute__((unused))
1034 void dbg_pos(const source_position_t source_position)
1035 {
1036         fprintf(stdout, "%s:%d\n", source_position.input_name,
1037                 source_position.linenr);
1038         fflush(stdout);
1039 }