More work on cparser:
[cparser] / lexer.c
1 #include <config.h>
2
3 #include "lexer.h"
4 #include "token_t.h"
5 #include "symbol_table_t.h"
6 #include "adt/error.h"
7 #include "adt/strset.h"
8 #include "adt/util.h"
9
10 #include <assert.h>
11 #include <errno.h>
12 #include <string.h>
13 #include <ctype.h>
14
15 #define DEBUG_CHARS
16 #define MAX_PUTBACK 3
17
18 static int         c;
19 token_t            lexer_token;
20 static FILE       *input;
21 static char        buf[1024 + MAX_PUTBACK];
22 static const char *bufend;
23 static const char *bufpos;
24 static strset_t    stringset;
25 //static FILE      **input_stack;
26 //static char      **buf_stack;
27
28 static void error_prefix_at(const char *input_name, unsigned linenr)
29 {
30         fprintf(stderr, "%s:%u: Error: ", input_name, linenr);
31 }
32
33 static void error_prefix(void)
34 {
35         error_prefix_at(lexer_token.source_position.input_name,
36                         lexer_token.source_position.linenr);
37 }
38
39 static void parse_error(const char *msg)
40 {
41         error_prefix();
42         fprintf(stderr, "%s\n", msg);
43 }
44
45 static inline void next_real_char(void)
46 {
47         bufpos++;
48         if(bufpos >= bufend) {
49                 size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
50                                  input);
51                 if(s == 0) {
52                         c = EOF;
53                         return;
54                 }
55                 bufpos = buf + MAX_PUTBACK;
56                 bufend = buf + MAX_PUTBACK + s;
57         }
58         c = *(bufpos);
59 }
60
61 static inline void put_back(int pc)
62 {
63         char *p = (char*) bufpos - 1;
64         bufpos--;
65         assert(p >= buf);
66         *p = pc;
67
68 #ifdef DEBUG_CHARS
69         printf("putback '%c'\n", pc);
70 #endif
71 }
72
73 static inline void next_char(void);
74
75 #define MATCH_NEWLINE(code)                   \
76         case '\r':                                \
77                 next_char();                          \
78                 if(c == '\n') {                       \
79                         next_char();                      \
80                 }                                     \
81                 lexer_token.source_position.linenr++; \
82                 code;                                 \
83         case '\n':                                \
84                 next_char();                          \
85                 lexer_token.source_position.linenr++; \
86                 code;
87
88 static inline void eat(char c_type)
89 {
90         assert(c == c_type);
91         next_char();
92 }
93
94 static void maybe_concat_lines(void)
95 {
96         eat('\\');
97
98         switch(c) {
99         MATCH_NEWLINE(return;)
100
101         default:
102                 break;
103         }
104
105         put_back(c);
106         c = '\\';
107 }
108
109 static inline void next_char(void)
110 {
111         next_real_char();
112
113         /* filter trigraphs */
114         if(UNLIKELY(c == '\\')) {
115                 maybe_concat_lines();
116                 goto end_of_next_char;
117         }
118
119         if(LIKELY(c != '?'))
120                 goto end_of_next_char;
121
122         next_real_char();
123         if(LIKELY(c != '?')) {
124                 put_back(c);
125                 c = '?';
126                 goto end_of_next_char;
127         }
128
129         next_real_char();
130         switch(c) {
131         case '=': c = '#'; break;
132         case '(': c = '['; break;
133         case '/': c = '\\'; maybe_concat_lines(); break;
134         case ')': c = ']'; break;
135         case '\'': c = '^'; break;
136         case '<': c = '{'; break;
137         case '!': c = '|'; break;
138         case '>': c = '}'; break;
139         case '-': c = '~'; break;
140         default:
141                 put_back('?');
142                 put_back(c);
143                 c = '?';
144                 break;
145         }
146
147 end_of_next_char:
148 #ifdef DEBUG_CHARS
149         printf("nchar '%c'\n", c);
150 #else
151         ;
152 #endif
153 }
154
155 #define SYMBOL_CHARS  \
156         case 'a':         \
157         case 'b':         \
158         case 'c':         \
159         case 'd':         \
160         case 'e':         \
161         case 'f':         \
162         case 'g':         \
163         case 'h':         \
164         case 'i':         \
165         case 'j':         \
166         case 'k':         \
167         case 'l':         \
168         case 'm':         \
169         case 'n':         \
170         case 'o':         \
171         case 'p':         \
172         case 'q':         \
173         case 'r':         \
174         case 's':         \
175         case 't':         \
176         case 'u':         \
177         case 'v':         \
178         case 'w':         \
179         case 'x':         \
180         case 'y':         \
181         case 'z':         \
182         case 'A':         \
183         case 'B':         \
184         case 'C':         \
185         case 'D':         \
186         case 'E':         \
187         case 'F':         \
188         case 'G':         \
189         case 'H':         \
190         case 'I':         \
191         case 'J':         \
192         case 'K':         \
193         case 'L':         \
194         case 'M':         \
195         case 'N':         \
196         case 'O':         \
197         case 'P':         \
198         case 'Q':         \
199         case 'R':         \
200         case 'S':         \
201         case 'T':         \
202         case 'U':         \
203         case 'V':         \
204         case 'W':         \
205         case 'X':         \
206         case 'Y':         \
207         case 'Z':         \
208         case '_':
209
210 #define DIGITS        \
211         case '0':         \
212         case '1':         \
213         case '2':         \
214         case '3':         \
215         case '4':         \
216         case '5':         \
217         case '6':         \
218         case '7':         \
219         case '8':         \
220         case '9':
221
222 static void parse_symbol(void)
223 {
224         symbol_t *symbol;
225         char     *string;
226
227         obstack_1grow(&symbol_obstack, c);
228         next_char();
229
230         while(1) {
231                 switch(c) {
232                 DIGITS
233                 SYMBOL_CHARS
234                         obstack_1grow(&symbol_obstack, c);
235                         next_char();
236                         break;
237
238                 default:
239                         goto end_symbol;
240                 }
241         }
242
243 end_symbol:
244         obstack_1grow(&symbol_obstack, '\0');
245
246         string = obstack_finish(&symbol_obstack);
247         symbol = symbol_table_insert(string);
248
249         lexer_token.type     = symbol->ID;
250         lexer_token.v.symbol = symbol;
251
252         if(symbol->string != string) {
253                 obstack_free(&symbol_obstack, string);
254         }
255 }
256
257 static void parse_integer_suffix(void)
258 {
259         if(c == 'U' || c == 'U') {
260                 /* TODO do something with the suffixes... */
261                 next_char();
262                 if(c == 'L' || c == 'l') {
263                         next_char();
264                         if(c == 'L' || c == 'l') {
265                                 next_char();
266                         }
267                 }
268         } else if(c == 'l' || c == 'L') {
269                 next_char();
270                 if(c == 'l' || c == 'L') {
271                         next_char();
272                         if(c == 'u' || c == 'U') {
273                                 next_char();
274                         }
275                 } else if(c == 'u' || c == 'U') {
276                         next_char();
277                 }
278         }
279 }
280
281 static void parse_number_hex(void)
282 {
283         assert(c == 'x' || c == 'X');
284         next_char();
285
286         if (!isdigit(c) &&
287                 !('A' <= c && c <= 'F') &&
288                 !('a' <= c && c <= 'f')) {
289                 parse_error("premature end of hex number literal");
290                 lexer_token.type = T_ERROR;
291                 return;
292         }
293
294         int value = 0;
295         while(1) {
296                 if (isdigit(c)) {
297                         value = 16 * value + c - '0';
298                 } else if ('A' <= c && c <= 'F') {
299                         value = 16 * value + c - 'A' + 10;
300                 } else if ('a' <= c && c <= 'f') {
301                         value = 16 * value + c - 'a' + 10;
302                 } else {
303                         parse_integer_suffix();
304
305                         lexer_token.type       = T_INTEGER;
306                         lexer_token.v.intvalue = value;
307                         return;
308                 }
309                 next_char();
310         }
311
312         if(c == '.' || c == 'p' || c == 'P') {
313                 next_char();
314                 panic("Hex floating point numbers not implemented yet");
315         }
316 }
317
318 static void parse_number_oct(void)
319 {
320         int value = 0;
321         while(c >= '0' && c <= '7') {
322                 value = 8 * value + c - '0';
323                 next_char();
324         }
325         if (c == '8' || c == '9') {
326                 parse_error("invalid octal number");
327                 lexer_token.type = T_ERROR;
328                 return;
329         }
330
331         lexer_token.type       = T_INTEGER;
332         lexer_token.v.intvalue = value;
333
334         parse_integer_suffix();
335 }
336
337 static void parse_floatingpoint_exponent(long double value)
338 {
339         unsigned int expo = 0;
340         long double  factor = 10.;
341
342         if(c == '-') {
343                 next_char();
344                 factor = 0.1;
345         } else if(c == '+') {
346                 next_char();
347         }
348
349         while(c >= '0' && c <= '9') {
350                 expo = 10 * expo + (c - '0');
351                 next_char();
352         }
353
354         while(1) {
355                 if(expo & 1)
356                         value *= factor;
357                 expo >>= 1;
358                 if(expo == 0)
359                         break;
360                 factor *= factor;
361         }
362
363         lexer_token.type         = T_FLOATINGPOINT;
364         lexer_token.v.floatvalue = value;
365 }
366
367 static void parse_floatingpoint_fract(int integer_part)
368 {
369         long double value  = integer_part;
370         long double factor = 1.;
371
372         while(c >= '0' && c <= '9') {
373                 factor *= 0.1;
374                 value  += (c - '0') * factor;
375                 next_char();
376         }
377
378         if(c == 'e' || c == 'E') {
379                 next_char();
380                 parse_floatingpoint_exponent(value);
381                 return;
382         }
383
384         lexer_token.type         = T_FLOATINGPOINT;
385         lexer_token.v.floatvalue = value;
386 }
387
388 static void parse_number_dec(void)
389 {
390         int value = 0;
391
392         while(isdigit(c)) {
393                 value = 10 * value + c - '0';
394                 next_char();
395         }
396
397         if(c == '.') {
398                 next_char();
399                 parse_floatingpoint_fract(value);
400                 return;
401         }
402         if(c == 'e' || c == 'E') {
403                 next_char();
404                 parse_floatingpoint_exponent(value);
405                 return;
406         }
407         parse_integer_suffix();
408
409         lexer_token.type       = T_INTEGER;
410         lexer_token.v.intvalue = value;
411 }
412
413 static void parse_number(void)
414 {
415         if (c == '0') {
416                 next_char();
417                 switch (c) {
418                         case 'X':
419                         case 'x':
420                                 parse_number_hex();
421                                 break;
422                         case '0':
423                         case '1':
424                         case '2':
425                         case '3':
426                         case '4':
427                         case '5':
428                         case '6':
429                         case '7':
430                                 parse_number_oct();
431                                 break;
432                         case '.':
433                                 next_char();
434                                 parse_floatingpoint_fract(0);
435                                 break;
436                         case 'e':
437                         case 'E':
438                                 parse_floatingpoint_exponent(0);
439                                 break;
440                         case '8':
441                         case '9':
442                                 next_char();
443                                 parse_error("invalid octal number");
444                                 lexer_token.type = T_ERROR;
445                                 return;
446                         default:
447                                 put_back(c);
448                                 c = '0';
449                                 parse_number_dec();
450                                 return;
451                 }
452         } else {
453                 parse_number_dec();
454         }
455 }
456
457 static int parse_octal_sequence(void)
458 {
459         int value = 0;
460         while(1) {
461                 if(c < '0' || c > '7')
462                         break;
463                 value = 8 * value + c - '0';
464                 next_char();
465         }
466
467         return value;
468 }
469
470 static int parse_hex_sequence(void)
471 {
472         int value = 0;
473         while(1) {
474                 if (c >= '0' && c <= '9') {
475                         value = 16 * value + c - '0';
476                 } else if ('A' <= c && c <= 'F') {
477                         value = 16 * value + c - 'A' + 10;
478                 } else if ('a' <= c && c <= 'f') {
479                         value = 16 * value + c - 'a' + 10;
480                 } else {
481                         break;
482                 }
483                 next_char();
484         }
485
486         return value;
487 }
488
489 static int parse_escape_sequence(void)
490 {
491         eat('\\');
492
493         int ec = c;
494         next_char();
495
496         switch(ec) {
497         case '"':  return '"';
498         case '\'': return'\'';
499         case '\\': return '\\';
500         case '?': return '\?';
501         case 'a': return '\a';
502         case 'b': return '\b';
503         case 'f': return '\f';
504         case 'n': return '\n';
505         case 'r': return '\r';
506         case 't': return '\t';
507         case 'v': return '\v';
508         case 'x':
509                 return parse_hex_sequence();
510         case '0':
511         case '1':
512         case '2':
513         case '3':
514         case '4':
515         case '5':
516         case '6':
517         case '7':
518                 return parse_octal_sequence();
519         case EOF:
520                 parse_error("reached end of file while parsing escape sequence");
521                 return EOF;
522         default:
523                 parse_error("unknown escape sequence");
524                 return EOF;
525         }
526 }
527
528 const char *concat_strings(const char *s1, const char *s2)
529 {
530         size_t  len1   = strlen(s1);
531         size_t  len2   = strlen(s2);
532
533         char   *concat = obstack_alloc(&symbol_obstack, len1 + len2 + 1);
534         memcpy(concat, s1, len1);
535         memcpy(concat + len1, s2, len2 + 1);
536
537         const char *result = strset_insert(&stringset, concat);
538         if(result != concat) {
539                 obstack_free(&symbol_obstack, concat);
540         }
541
542         return result;
543 }
544
545 static void parse_string_literal(void)
546 {
547         unsigned    start_linenr = lexer_token.source_position.linenr;
548         char       *string;
549         const char *result;
550
551         assert(c == '"');
552         next_char();
553
554         int tc;
555         while(1) {
556                 switch(c) {
557                 case '\\':
558                         tc = parse_escape_sequence();
559                         obstack_1grow(&symbol_obstack, tc);
560                         break;
561
562                 case EOF:
563                         error_prefix_at(lexer_token.source_position.input_name,
564                                         start_linenr);
565                         fprintf(stderr, "string has no end\n");
566                         lexer_token.type = T_ERROR;
567                         return;
568
569                 case '"':
570                         next_char();
571                         goto end_of_string;
572
573                 default:
574                         obstack_1grow(&symbol_obstack, c);
575                         next_char();
576                         break;
577                 }
578         }
579
580 end_of_string:
581
582         /* TODO: concatenate multiple strings separated by whitespace... */
583
584         /* add finishing 0 to the string */
585         obstack_1grow(&symbol_obstack, '\0');
586         string = obstack_finish(&symbol_obstack);
587
588         /* check if there is already a copy of the string */
589         result = strset_insert(&stringset, string);
590         if(result != string) {
591                 obstack_free(&symbol_obstack, string);
592         }
593
594         lexer_token.type     = T_STRING_LITERAL;
595         lexer_token.v.string = result;
596 }
597
598 static void parse_character_constant(void)
599 {
600         eat('\'');
601
602         int found_char = 0;
603         while(1) {
604                 switch(c) {
605                 case '\\':
606                         found_char = parse_escape_sequence();
607                         break;
608
609                 MATCH_NEWLINE(
610                         parse_error("newline while parsing character constant");
611                         break;
612                 )
613
614                 case '\'':
615                         next_char();
616                         goto end_of_char_constant;
617
618                 case EOF:
619                         parse_error("EOF while parsing character constant");
620                         lexer_token.type = T_ERROR;
621                         return;
622
623                 default:
624                         if(found_char != 0) {
625                                 parse_error("more than 1 characters in character "
626                                             "constant");
627                                 goto end_of_char_constant;
628                         } else {
629                                 found_char = c;
630                                 next_char();
631                         }
632                         break;
633                 }
634         }
635
636 end_of_char_constant:
637         lexer_token.type       = T_INTEGER;
638         lexer_token.v.intvalue = found_char;
639 }
640
641 static void skip_multiline_comment(void)
642 {
643         unsigned start_linenr = lexer_token.source_position.linenr;
644
645         while(1) {
646                 switch(c) {
647                 case '*':
648                         next_char();
649                         if(c == '/') {
650                                 next_char();
651                                 return;
652                         }
653                         break;
654
655                 MATCH_NEWLINE(break;)
656
657                 case EOF:
658                         error_prefix_at(lexer_token.source_position.input_name,
659                                         start_linenr);
660                         fprintf(stderr, "at end of file while looking for comment end\n");
661                         return;
662
663                 default:
664                         next_char();
665                         break;
666                 }
667         }
668 }
669
670 static void skip_line_comment(void)
671 {
672         while(1) {
673                 switch(c) {
674                 case EOF:
675                         return;
676
677                 case '\n':
678                 case '\r':
679                         return;
680
681                 default:
682                         next_char();
683                         break;
684                 }
685         }
686 }
687
688 static token_t pp_token;
689
690 static inline void next_pp_token(void)
691 {
692         lexer_next_preprocessing_token();
693         pp_token = lexer_token;
694 }
695
696 static void eat_until_newline(void)
697 {
698         while(pp_token.type != '\n' && pp_token.type != T_EOF) {
699                 next_pp_token();
700         }
701 }
702
703 static void error_directive(void)
704 {
705         error_prefix();
706         fprintf(stderr, "#error directive: \n");
707
708         /* parse pp-tokens until new-line */
709 }
710
711 static void define_directive(void)
712 {
713         lexer_next_preprocessing_token();
714         if(lexer_token.type != T_IDENTIFIER) {
715                 parse_error("expected identifier after #define\n");
716                 eat_until_newline();
717         }
718 }
719
720 static void ifdef_directive(int is_ifndef)
721 {
722         (void) is_ifndef;
723         lexer_next_preprocessing_token();
724         //expect_identifier();
725         //extect_newline();
726 }
727
728 static void endif_directive(void)
729 {
730         //expect_newline();
731 }
732
733 static void parse_line_directive(void)
734 {
735         if(pp_token.type != T_INTEGER) {
736                 parse_error("expected integer");
737         } else {
738                 lexer_token.source_position.linenr = pp_token.v.intvalue - 1;
739                 next_pp_token();
740         }
741         if(pp_token.type == T_STRING_LITERAL) {
742                 lexer_token.source_position.input_name = pp_token.v.string;
743                 next_pp_token();
744         }
745
746         eat_until_newline();
747 }
748
749 static void parse_preprocessor_identifier(void)
750 {
751         assert(pp_token.type == T_IDENTIFIER);
752         symbol_t *symbol = pp_token.v.symbol;
753
754         switch(symbol->pp_ID) {
755         case TP_include:
756                 printf("include - enable header name parsing!\n");
757                 break;
758         case TP_define:
759                 define_directive();
760                 break;
761         case TP_ifdef:
762                 ifdef_directive(0);
763                 break;
764         case TP_ifndef:
765                 ifdef_directive(1);
766                 break;
767         case TP_endif:
768                 endif_directive();
769                 break;
770         case TP_line:
771                 next_pp_token();
772                 parse_line_directive();
773                 break;
774         case TP_if:
775         case TP_else:
776         case TP_elif:
777         case TP_undef:
778         case TP_error:
779                 error_directive();
780                 break;
781         case TP_pragma:
782                 break;
783         }
784 }
785
786 static void parse_preprocessor_directive()
787 {
788         next_pp_token();
789
790         switch(pp_token.type) {
791         case T_IDENTIFIER:
792                 parse_preprocessor_identifier();
793                 break;
794         case T_INTEGER:
795                 parse_line_directive();
796                 break;
797         default:
798                 parse_error("invalid preprocessor directive");
799                 eat_until_newline();
800                 break;
801         }
802 }
803
804 #define MAYBE_PROLOG                                       \
805                         next_char();                                   \
806                         while(1) {                                     \
807                                 switch(c) {
808
809 #define MAYBE(ch, set_type)                                \
810                                 case ch:                                   \
811                                         next_char();                           \
812                                         lexer_token.type = set_type;           \
813                                         return;
814
815 #define ELSE_CODE(code)                                    \
816                                 default:                                   \
817                                         code;                                  \
818                                 }                                          \
819                         } /* end of while(1) */                        \
820                         break;
821
822 #define ELSE(set_type)                                     \
823                 ELSE_CODE(                                         \
824                         lexer_token.type = set_type;                   \
825                         return;                                        \
826                 )
827
828 void lexer_next_preprocessing_token(void)
829 {
830         while(1) {
831                 switch(c) {
832                 case ' ':
833                 case '\t':
834                         next_char();
835                         break;
836
837                 MATCH_NEWLINE(
838                         lexer_token.type = '\n';
839                         return;
840                 )
841
842                 SYMBOL_CHARS
843                         parse_symbol();
844                         return;
845
846                 DIGITS
847                         parse_number();
848                         return;
849
850                 case '"':
851                         parse_string_literal();
852                         return;
853
854                 case '\'':
855                         parse_character_constant();
856                         return;
857
858                 case '.':
859                         MAYBE_PROLOG
860                                 case '.':
861                                         MAYBE_PROLOG
862                                         MAYBE('.', T_DOTDOTDOT)
863                                         ELSE_CODE(
864                                                 put_back(c);
865                                                 c = '.';
866                                                 lexer_token.type = '.';
867                                                 return;
868                                         )
869                         ELSE('.')
870                 case '&':
871                         MAYBE_PROLOG
872                         MAYBE('&', T_ANDAND)
873                         MAYBE('=', T_ANDEQUAL)
874                         ELSE('&')
875                 case '*':
876                         MAYBE_PROLOG
877                         MAYBE('=', T_ASTERISKEQUAL)
878                         ELSE('*')
879                 case '+':
880                         MAYBE_PROLOG
881                         MAYBE('+', T_PLUSPLUS)
882                         MAYBE('=', T_PLUSEQUAL)
883                         ELSE('+')
884                 case '-':
885                         MAYBE_PROLOG
886                         MAYBE('>', T_MINUSGREATER)
887                         MAYBE('-', T_MINUSMINUS)
888                         MAYBE('=', T_MINUSEQUAL)
889                         ELSE('-')
890                 case '!':
891                         MAYBE_PROLOG
892                         MAYBE('=', T_EXCLAMATIONMARKEQUAL)
893                         ELSE('!')
894                 case '/':
895                         MAYBE_PROLOG
896                         MAYBE('=', T_SLASHEQUAL)
897                                 case '*':
898                                         next_char();
899                                         skip_multiline_comment();
900                                         lexer_next_preprocessing_token();
901                                         return;
902                                 case '/':
903                                         next_char();
904                                         skip_line_comment();
905                                         lexer_next_preprocessing_token();
906                                         return;
907                         ELSE('/')
908                 case '%':
909                         MAYBE_PROLOG
910                         MAYBE('>', T_PERCENTGREATER)
911                         MAYBE('=', T_PERCENTEQUAL)
912                                 case ':':
913                                         MAYBE_PROLOG
914                                                 case '%':
915                                                         MAYBE_PROLOG
916                                                         MAYBE(':', T_PERCENTCOLONPERCENTCOLON)
917                                                         ELSE_CODE(
918                                                                 put_back(c);
919                                                                 c = '%';
920                                                                 lexer_token.type = T_PERCENTCOLON;
921                                                                 return;
922                                                         )
923                                         ELSE(T_PERCENTCOLON)
924                         ELSE('%')
925                 case '<':
926                         MAYBE_PROLOG
927                         MAYBE(':', T_LESSCOLON)
928                         MAYBE('%', T_LESSPERCENT)
929                         MAYBE('=', T_LESSEQUAL)
930                                 case '<':
931                                         MAYBE_PROLOG
932                                         MAYBE('=', T_LESSLESSEQUAL)
933                                         ELSE(T_LESSLESS)
934                         ELSE('<')
935                 case '>':
936                         MAYBE_PROLOG
937                         MAYBE('=', T_GREATEREQUAL)
938                                 case '>':
939                                         MAYBE_PROLOG
940                                         MAYBE('=', T_GREATERGREATEREQUAL)
941                                         ELSE(T_GREATERGREATER)
942                         ELSE('>')
943                 case '^':
944                         MAYBE_PROLOG
945                         MAYBE('=', T_CARETEQUAL)
946                         ELSE('^')
947                 case '|':
948                         MAYBE_PROLOG
949                         MAYBE('=', T_PIPEEQUAL)
950                         MAYBE('|', T_PIPEPIPE)
951                         ELSE('|')
952                 case ':':
953                         MAYBE_PROLOG
954                         MAYBE('>', T_COLONGREATER)
955                         ELSE(':')
956                 case '=':
957                         MAYBE_PROLOG
958                         MAYBE('=', T_EQUALEQUAL)
959                         ELSE('=')
960                 case '#':
961                         MAYBE_PROLOG
962                         MAYBE('#', T_HASHHASH)
963                         ELSE('#')
964
965                 case '?':
966                 case '[':
967                 case ']':
968                 case '(':
969                 case ')':
970                 case '{':
971                 case '}':
972                 case '~':
973                 case ';':
974                 case ',':
975                 case '\\':
976                         lexer_token.type = c;
977                         next_char();
978                         return;
979
980                 case EOF:
981                         lexer_token.type = T_EOF;
982                         return;
983
984                 default:
985                         next_char();
986                         error_prefix();
987                         fprintf(stderr, "unknown character '%c' found\n", c);
988                         lexer_token.type = T_ERROR;
989                         return;
990                 }
991         }
992 }
993
994 void lexer_next_token(void)
995 {
996         lexer_next_preprocessing_token();
997         if(lexer_token.type != '\n')
998                 return;
999
1000 newline_found:
1001         do {
1002                 lexer_next_preprocessing_token();
1003         } while(lexer_token.type == '\n');
1004
1005         if(lexer_token.type == '#') {
1006                 parse_preprocessor_directive();
1007                 goto newline_found;
1008         }
1009 }
1010
1011 void init_lexer(void)
1012 {
1013         strset_init(&stringset);
1014 }
1015
1016 void lexer_open_stream(FILE *stream, const char *input_name)
1017 {
1018         input                                  = stream;
1019         lexer_token.source_position.linenr     = 1;
1020         lexer_token.source_position.input_name = input_name;
1021
1022         next_char();
1023 }
1024
1025 void exit_lexer(void)
1026 {
1027         strset_destroy(&stringset);
1028 }
1029
1030 static __attribute__((unused))
1031 void dbg_pos(const source_position_t source_position)
1032 {
1033         fprintf(stdout, "%s:%d\n", source_position.input_name,
1034                 source_position.linenr);
1035         fflush(stdout);
1036 }