02149fdcb1a7d4540609282e0d241ddd9e365363
[cparser] / lexer.c
1 #include <config.h>
2
3 #include "lexer.h"
4 #include "token_t.h"
5 #include "symbol_table_t.h"
6 #include "adt/error.h"
7 #include "adt/strset.h"
8 #include "adt/util.h"
9
10 #include <assert.h>
11 #include <errno.h>
12 #include <string.h>
13 #include <stdbool.h>
14 #include <ctype.h>
15
16 //#define DEBUG_CHARS
17 #define MAX_PUTBACK 3
18
19 static int         c;
20 token_t            lexer_token;
21 symbol_t          *symbol_L;
22 static FILE       *input;
23 static char        buf[1024 + MAX_PUTBACK];
24 static const char *bufend;
25 static const char *bufpos;
26 static strset_t    stringset;
27
28 static void error_prefix_at(const char *input_name, unsigned linenr)
29 {
30         fprintf(stderr, "%s:%u: Error: ", input_name, linenr);
31 }
32
33 static void error_prefix(void)
34 {
35         error_prefix_at(lexer_token.source_position.input_name,
36                         lexer_token.source_position.linenr);
37 }
38
39 static void parse_error(const char *msg)
40 {
41         error_prefix();
42         fprintf(stderr, "%s\n", msg);
43 }
44
45 static inline void next_real_char(void)
46 {
47         bufpos++;
48         if(bufpos >= bufend) {
49                 size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
50                                  input);
51                 if(s == 0) {
52                         c = EOF;
53                         return;
54                 }
55                 bufpos = buf + MAX_PUTBACK;
56                 bufend = buf + MAX_PUTBACK + s;
57         }
58         c = *(bufpos);
59 }
60
61 static inline void put_back(int pc)
62 {
63         assert(bufpos >= buf);
64         //assert(bufpos < buf+MAX_PUTBACK || *bufpos == pc);
65
66         char *p = buf + (bufpos - buf);
67         *p = pc;
68
69         /* going backwards in the buffer is legal as long as it's not more often
70          * than MAX_PUTBACK */
71         bufpos--;
72
73 #ifdef DEBUG_CHARS
74         printf("putback '%c'\n", pc);
75 #endif
76 }
77
78 static inline void next_char(void);
79
80 #define MATCH_NEWLINE(code)                   \
81         case '\r':                                \
82                 next_char();                          \
83                 if(c == '\n') {                       \
84                         next_char();                      \
85                 }                                     \
86                 lexer_token.source_position.linenr++; \
87                 code;                                 \
88         case '\n':                                \
89                 next_char();                          \
90                 lexer_token.source_position.linenr++; \
91                 code;
92
93 #define eat(c_type)  do { assert(c == c_type); next_char(); } while(0)
94
95 static void maybe_concat_lines(void)
96 {
97         eat('\\');
98
99         switch(c) {
100         MATCH_NEWLINE(return;)
101
102         default:
103                 break;
104         }
105
106         put_back(c);
107         c = '\\';
108 }
109
110 static inline void next_char(void)
111 {
112         next_real_char();
113
114         /* filter trigraphs */
115         if(UNLIKELY(c == '\\')) {
116                 maybe_concat_lines();
117                 goto end_of_next_char;
118         }
119
120         if(LIKELY(c != '?'))
121                 goto end_of_next_char;
122
123         next_real_char();
124         if(LIKELY(c != '?')) {
125                 put_back(c);
126                 c = '?';
127                 goto end_of_next_char;
128         }
129
130         next_real_char();
131         switch(c) {
132         case '=': c = '#'; break;
133         case '(': c = '['; break;
134         case '/': c = '\\'; maybe_concat_lines(); break;
135         case ')': c = ']'; break;
136         case '\'': c = '^'; break;
137         case '<': c = '{'; break;
138         case '!': c = '|'; break;
139         case '>': c = '}'; break;
140         case '-': c = '~'; break;
141         default:
142                 put_back('?');
143                 put_back(c);
144                 c = '?';
145                 break;
146         }
147
148 end_of_next_char:;
149 #ifdef DEBUG_CHARS
150         printf("nchar '%c'\n", c);
151 #endif
152 }
153
154 #define SYMBOL_CHARS  \
155         case 'a':         \
156         case 'b':         \
157         case 'c':         \
158         case 'd':         \
159         case 'e':         \
160         case 'f':         \
161         case 'g':         \
162         case 'h':         \
163         case 'i':         \
164         case 'j':         \
165         case 'k':         \
166         case 'l':         \
167         case 'm':         \
168         case 'n':         \
169         case 'o':         \
170         case 'p':         \
171         case 'q':         \
172         case 'r':         \
173         case 's':         \
174         case 't':         \
175         case 'u':         \
176         case 'v':         \
177         case 'w':         \
178         case 'x':         \
179         case 'y':         \
180         case 'z':         \
181         case 'A':         \
182         case 'B':         \
183         case 'C':         \
184         case 'D':         \
185         case 'E':         \
186         case 'F':         \
187         case 'G':         \
188         case 'H':         \
189         case 'I':         \
190         case 'J':         \
191         case 'K':         \
192         case 'L':         \
193         case 'M':         \
194         case 'N':         \
195         case 'O':         \
196         case 'P':         \
197         case 'Q':         \
198         case 'R':         \
199         case 'S':         \
200         case 'T':         \
201         case 'U':         \
202         case 'V':         \
203         case 'W':         \
204         case 'X':         \
205         case 'Y':         \
206         case 'Z':         \
207         case '_':
208
209 #define DIGITS        \
210         case '0':         \
211         case '1':         \
212         case '2':         \
213         case '3':         \
214         case '4':         \
215         case '5':         \
216         case '6':         \
217         case '7':         \
218         case '8':         \
219         case '9':
220
221 static void parse_symbol(void)
222 {
223         symbol_t *symbol;
224         char     *string;
225
226         obstack_1grow(&symbol_obstack, c);
227         next_char();
228
229         while(1) {
230                 switch(c) {
231                 DIGITS
232                 SYMBOL_CHARS
233                         obstack_1grow(&symbol_obstack, c);
234                         next_char();
235                         break;
236
237                 default:
238                         goto end_symbol;
239                 }
240         }
241
242 end_symbol:
243         obstack_1grow(&symbol_obstack, '\0');
244
245         string = obstack_finish(&symbol_obstack);
246         symbol = symbol_table_insert(string);
247
248         lexer_token.type     = symbol->ID;
249         lexer_token.v.symbol = symbol;
250
251         if(symbol->string != string) {
252                 obstack_free(&symbol_obstack, string);
253         }
254 }
255
256 static void parse_integer_suffix(void)
257 {
258         if(c == 'U' || c == 'U') {
259                 /* TODO do something with the suffixes... */
260                 next_char();
261                 if(c == 'L' || c == 'l') {
262                         next_char();
263                         if(c == 'L' || c == 'l') {
264                                 next_char();
265                         }
266                 }
267         } else if(c == 'l' || c == 'L') {
268                 next_char();
269                 if(c == 'l' || c == 'L') {
270                         next_char();
271                         if(c == 'u' || c == 'U') {
272                                 next_char();
273                         }
274                 } else if(c == 'u' || c == 'U') {
275                         next_char();
276                 }
277         }
278 }
279
280 static void parse_floating_suffix(void)
281 {
282         switch(c) {
283         /* TODO: do something usefull with the suffixes... */
284         case 'f':
285         case 'F':
286         case 'l':
287         case 'L':
288                 next_char();
289                 break;
290         default:
291                 break;
292         }
293 }
294
295 static inline bool is_hex_digit(int c)
296 {
297         return (c >= '0' && c <= '9')
298                         || (c >= 'a' && c <= 'z')
299                         || (c >= 'A' && c <= 'Z');
300 }
301
302 static void parse_number_hex(void)
303 {
304         assert(c == 'x' || c == 'X');
305         next_char();
306
307         while(is_hex_digit(c)) {
308                 obstack_1grow(&symbol_obstack, c);
309                 next_char();
310         }
311         obstack_1grow(&symbol_obstack, '\0');
312         char *string = obstack_finish(&symbol_obstack);
313
314         if(c == '.' || c == 'p' || c == 'P') {
315                 next_char();
316                 panic("Hex floating point numbers not implemented yet");
317         }
318         if(*string == '\0') {
319                 parse_error("invalid hex number");
320                 lexer_token.type = T_ERROR;
321         }
322
323         char *endptr;
324         int value = strtol(string, &endptr, 16);
325         if(*endptr != '\0') {
326                 parse_error("hex number literal too long");
327         }
328
329         lexer_token.type       = T_INTEGER;
330         lexer_token.v.intvalue = value;
331
332         parse_integer_suffix();
333         obstack_free(&symbol_obstack, string);
334 }
335
336 static inline bool is_octal_digit(int chr)
337 {
338         return '0' <= chr && chr <= '7';
339 }
340
341 static void parse_number_oct(void)
342 {
343         while(is_octal_digit(c)) {
344                 obstack_1grow(&symbol_obstack, c);
345                 next_char();
346         }
347         obstack_1grow(&symbol_obstack, '\0');
348         char *string = obstack_finish(&symbol_obstack);
349
350         char *endptr;
351         int value = strtol(string, &endptr, 8);
352         if(*endptr != '\0') {
353                 parse_error("octal number literal too long");
354         }
355
356         lexer_token.type       = T_INTEGER;
357         lexer_token.v.intvalue = value;
358
359         parse_integer_suffix();
360         obstack_free(&symbol_obstack, string);
361 }
362
363 static void parse_number_dec(void)
364 {
365         bool is_float = false;
366         while(isdigit(c)) {
367                 obstack_1grow(&symbol_obstack, c);
368                 next_char();
369         }
370
371         if(c == '.') {
372                 obstack_1grow(&symbol_obstack, '.');
373                 next_char();
374
375                 while(isdigit(c)) {
376                         obstack_1grow(&symbol_obstack, c);
377                         next_char();
378                 }
379                 is_float = true;
380         }
381         if(c == 'e' || c == 'E') {
382                 obstack_1grow(&symbol_obstack, 'e');
383                 next_char();
384
385                 if(c == '-' || c == '+') {
386                         obstack_1grow(&symbol_obstack, c);
387                         next_char();
388                 }
389
390                 while(isdigit(c)) {
391                         obstack_1grow(&symbol_obstack, c);
392                         next_char();
393                 }
394                 is_float = true;
395         }
396
397         obstack_1grow(&symbol_obstack, '\0');
398         char *string = obstack_finish(&symbol_obstack);
399
400         char *endptr;
401         if(is_float) {
402                 lexer_token.type         = T_FLOATINGPOINT;
403                 lexer_token.v.floatvalue = strtod(string, &endptr);
404
405                 if(*endptr != '\0') {
406                         parse_error("invalid number literal");
407                 }
408
409                 parse_floating_suffix();
410         } else {
411                 lexer_token.type       = T_INTEGER;
412                 lexer_token.v.intvalue = strtol(string, &endptr, 10);
413
414                 if(*endptr != '\0') {
415                         parse_error("invalid number literal");
416                 }
417
418                 parse_integer_suffix();
419         }
420         obstack_free(&symbol_obstack, string);
421 }
422
423 static void parse_number(void)
424 {
425         if (c == '0') {
426                 next_char();
427                 switch (c) {
428                         case 'X':
429                         case 'x':
430                                 parse_number_hex();
431                                 break;
432                         case '0':
433                         case '1':
434                         case '2':
435                         case '3':
436                         case '4':
437                         case '5':
438                         case '6':
439                         case '7':
440                                 parse_number_oct();
441                                 break;
442                         case '8':
443                         case '9':
444                                 next_char();
445                                 parse_error("invalid octal number");
446                                 lexer_token.type = T_ERROR;
447                                 return;
448                         case '.':
449                         case 'e':
450                         case 'E':
451                         default:
452                                 obstack_1grow(&symbol_obstack, '0');
453                                 parse_number_dec();
454                                 return;
455                 }
456         } else {
457                 parse_number_dec();
458         }
459 }
460
461 static int parse_octal_sequence(const int first_digit)
462 {
463         assert(is_octal_digit(first_digit));
464         int value = first_digit - '0';
465         if (!is_octal_digit(c)) return value;
466         value = 8 * value + c - '0';
467         next_char();
468         if (!is_octal_digit(c)) return value;
469         value = 8 * value + c - '0';
470         next_char();
471         return value;
472 }
473
474 static int parse_hex_sequence(void)
475 {
476         int value = 0;
477         while(1) {
478                 if (c >= '0' && c <= '9') {
479                         value = 16 * value + c - '0';
480                 } else if ('A' <= c && c <= 'F') {
481                         value = 16 * value + c - 'A' + 10;
482                 } else if ('a' <= c && c <= 'f') {
483                         value = 16 * value + c - 'a' + 10;
484                 } else {
485                         break;
486                 }
487                 next_char();
488         }
489
490         return value;
491 }
492
493 static int parse_escape_sequence(void)
494 {
495         eat('\\');
496
497         int ec = c;
498         next_char();
499
500         switch(ec) {
501         case '"':  return '"';
502         case '\'': return '\'';
503         case '\\': return '\\';
504         case '?': return '\?';
505         case 'a': return '\a';
506         case 'b': return '\b';
507         case 'f': return '\f';
508         case 'n': return '\n';
509         case 'r': return '\r';
510         case 't': return '\t';
511         case 'v': return '\v';
512         case 'x':
513                 return parse_hex_sequence();
514         case '0':
515         case '1':
516         case '2':
517         case '3':
518         case '4':
519         case '5':
520         case '6':
521         case '7':
522                 return parse_octal_sequence(ec);
523         case EOF:
524                 parse_error("reached end of file while parsing escape sequence");
525                 return EOF;
526         default:
527                 parse_error("unknown escape sequence");
528                 return EOF;
529         }
530 }
531
532 const char *concat_strings(const char *s1, const char *s2)
533 {
534         size_t  len1   = strlen(s1);
535         size_t  len2   = strlen(s2);
536
537         char   *concat = obstack_alloc(&symbol_obstack, len1 + len2 + 1);
538         memcpy(concat, s1, len1);
539         memcpy(concat + len1, s2, len2 + 1);
540
541         const char *result = strset_insert(&stringset, concat);
542         if(result != concat) {
543                 obstack_free(&symbol_obstack, concat);
544         }
545
546         return result;
547 }
548
549 static void parse_string_literal(void)
550 {
551         unsigned    start_linenr = lexer_token.source_position.linenr;
552         char       *string;
553         const char *result;
554
555         assert(c == '"');
556         next_char();
557
558         int tc;
559         while(1) {
560                 switch(c) {
561                 case '\\':
562                         tc = parse_escape_sequence();
563                         obstack_1grow(&symbol_obstack, tc);
564                         break;
565
566                 case EOF:
567                         error_prefix_at(lexer_token.source_position.input_name,
568                                         start_linenr);
569                         fprintf(stderr, "string has no end\n");
570                         lexer_token.type = T_ERROR;
571                         return;
572
573                 case '"':
574                         next_char();
575                         goto end_of_string;
576
577                 default:
578                         obstack_1grow(&symbol_obstack, c);
579                         next_char();
580                         break;
581                 }
582         }
583
584 end_of_string:
585
586         /* TODO: concatenate multiple strings separated by whitespace... */
587
588         /* add finishing 0 to the string */
589         obstack_1grow(&symbol_obstack, '\0');
590         string = obstack_finish(&symbol_obstack);
591
592         /* check if there is already a copy of the string */
593         result = strset_insert(&stringset, string);
594         if(result != string) {
595                 obstack_free(&symbol_obstack, string);
596         }
597
598         lexer_token.type     = T_STRING_LITERAL;
599         lexer_token.v.string = result;
600 }
601
602 static void parse_character_constant(void)
603 {
604         eat('\'');
605
606         int found_char = 0;
607         while(1) {
608                 switch(c) {
609                 case '\\':
610                         found_char = parse_escape_sequence();
611                         break;
612
613                 MATCH_NEWLINE(
614                         parse_error("newline while parsing character constant");
615                         break;
616                 )
617
618                 case '\'':
619                         next_char();
620                         goto end_of_char_constant;
621
622                 case EOF:
623                         parse_error("EOF while parsing character constant");
624                         lexer_token.type = T_ERROR;
625                         return;
626
627                 default:
628                         if(found_char != 0) {
629                                 parse_error("more than 1 characters in character "
630                                             "constant");
631                                 goto end_of_char_constant;
632                         } else {
633                                 found_char = c;
634                                 next_char();
635                         }
636                         break;
637                 }
638         }
639
640 end_of_char_constant:
641         lexer_token.type       = T_INTEGER;
642         lexer_token.v.intvalue = found_char;
643 }
644
645 static void skip_multiline_comment(void)
646 {
647         unsigned start_linenr = lexer_token.source_position.linenr;
648
649         while(1) {
650                 switch(c) {
651                 case '*':
652                         next_char();
653                         if(c == '/') {
654                                 next_char();
655                                 return;
656                         }
657                         break;
658
659                 MATCH_NEWLINE(break;)
660
661                 case EOF:
662                         error_prefix_at(lexer_token.source_position.input_name,
663                                         start_linenr);
664                         fprintf(stderr, "at end of file while looking for comment end\n");
665                         return;
666
667                 default:
668                         next_char();
669                         break;
670                 }
671         }
672 }
673
674 static void skip_line_comment(void)
675 {
676         while(1) {
677                 switch(c) {
678                 case EOF:
679                         return;
680
681                 case '\n':
682                 case '\r':
683                         return;
684
685                 default:
686                         next_char();
687                         break;
688                 }
689         }
690 }
691
692 static token_t pp_token;
693
694 static inline void next_pp_token(void)
695 {
696         lexer_next_preprocessing_token();
697         pp_token = lexer_token;
698 }
699
700 static void eat_until_newline(void)
701 {
702         while(pp_token.type != '\n' && pp_token.type != T_EOF) {
703                 next_pp_token();
704         }
705 }
706
707 static void error_directive(void)
708 {
709         error_prefix();
710         fprintf(stderr, "#error directive: \n");
711
712         /* parse pp-tokens until new-line */
713 }
714
715 static void define_directive(void)
716 {
717         lexer_next_preprocessing_token();
718         if(lexer_token.type != T_IDENTIFIER) {
719                 parse_error("expected identifier after #define\n");
720                 eat_until_newline();
721         }
722 }
723
724 static void ifdef_directive(int is_ifndef)
725 {
726         (void) is_ifndef;
727         lexer_next_preprocessing_token();
728         //expect_identifier();
729         //extect_newline();
730 }
731
732 static void endif_directive(void)
733 {
734         //expect_newline();
735 }
736
737 static void parse_line_directive(void)
738 {
739         if(pp_token.type != T_INTEGER) {
740                 parse_error("expected integer");
741         } else {
742                 lexer_token.source_position.linenr = pp_token.v.intvalue - 1;
743                 next_pp_token();
744         }
745         if(pp_token.type == T_STRING_LITERAL) {
746                 lexer_token.source_position.input_name = pp_token.v.string;
747                 next_pp_token();
748         }
749
750         eat_until_newline();
751 }
752
753 static void parse_preprocessor_identifier(void)
754 {
755         assert(pp_token.type == T_IDENTIFIER);
756         symbol_t *symbol = pp_token.v.symbol;
757
758         switch(symbol->pp_ID) {
759         case TP_include:
760                 printf("include - enable header name parsing!\n");
761                 break;
762         case TP_define:
763                 define_directive();
764                 break;
765         case TP_ifdef:
766                 ifdef_directive(0);
767                 break;
768         case TP_ifndef:
769                 ifdef_directive(1);
770                 break;
771         case TP_endif:
772                 endif_directive();
773                 break;
774         case TP_line:
775                 next_pp_token();
776                 parse_line_directive();
777                 break;
778         case TP_if:
779         case TP_else:
780         case TP_elif:
781         case TP_undef:
782         case TP_error:
783                 error_directive();
784                 break;
785         case TP_pragma:
786                 break;
787         }
788 }
789
790 static void parse_preprocessor_directive(void)
791 {
792         next_pp_token();
793
794         switch(pp_token.type) {
795         case T_IDENTIFIER:
796                 parse_preprocessor_identifier();
797                 break;
798         case T_INTEGER:
799                 parse_line_directive();
800                 break;
801         default:
802                 parse_error("invalid preprocessor directive");
803                 eat_until_newline();
804                 break;
805         }
806 }
807
808 #define MAYBE_PROLOG                                       \
809                         next_char();                                   \
810                         while(1) {                                     \
811                                 switch(c) {
812
813 #define MAYBE(ch, set_type)                                \
814                                 case ch:                                   \
815                                         next_char();                           \
816                                         lexer_token.type = set_type;           \
817                                         return;
818
819 #define ELSE_CODE(code)                                    \
820                                 default:                                   \
821                                         code;                                  \
822                                 }                                          \
823                         } /* end of while(1) */                        \
824                         break;
825
826 #define ELSE(set_type)                                     \
827                 ELSE_CODE(                                         \
828                         lexer_token.type = set_type;                   \
829                         return;                                        \
830                 )
831
832 void lexer_next_preprocessing_token(void)
833 {
834         while(1) {
835                 switch(c) {
836                 case ' ':
837                 case '\t':
838                         next_char();
839                         break;
840
841                 MATCH_NEWLINE(
842                         lexer_token.type = '\n';
843                         return;
844                 )
845
846                 SYMBOL_CHARS
847                         parse_symbol();
848                         /* might be a wide string ( L"string" ) */
849                         if(c == '"' && (lexer_token.type == T_IDENTIFIER &&
850                            lexer_token.v.symbol == symbol_L)) {
851                                 parse_string_literal();
852                                 return;
853                         }
854                         return;
855
856                 DIGITS
857                         parse_number();
858                         return;
859
860                 case '"':
861                         parse_string_literal();
862                         return;
863
864                 case '\'':
865                         parse_character_constant();
866                         return;
867
868                 case '.':
869                         MAYBE_PROLOG
870                                 case '.':
871                                         MAYBE_PROLOG
872                                         MAYBE('.', T_DOTDOTDOT)
873                                         ELSE_CODE(
874                                                 put_back(c);
875                                                 c = '.';
876                                                 lexer_token.type = '.';
877                                                 return;
878                                         )
879                         ELSE('.')
880                 case '&':
881                         MAYBE_PROLOG
882                         MAYBE('&', T_ANDAND)
883                         MAYBE('=', T_ANDEQUAL)
884                         ELSE('&')
885                 case '*':
886                         MAYBE_PROLOG
887                         MAYBE('=', T_ASTERISKEQUAL)
888                         ELSE('*')
889                 case '+':
890                         MAYBE_PROLOG
891                         MAYBE('+', T_PLUSPLUS)
892                         MAYBE('=', T_PLUSEQUAL)
893                         ELSE('+')
894                 case '-':
895                         MAYBE_PROLOG
896                         MAYBE('>', T_MINUSGREATER)
897                         MAYBE('-', T_MINUSMINUS)
898                         MAYBE('=', T_MINUSEQUAL)
899                         ELSE('-')
900                 case '!':
901                         MAYBE_PROLOG
902                         MAYBE('=', T_EXCLAMATIONMARKEQUAL)
903                         ELSE('!')
904                 case '/':
905                         MAYBE_PROLOG
906                         MAYBE('=', T_SLASHEQUAL)
907                                 case '*':
908                                         next_char();
909                                         skip_multiline_comment();
910                                         lexer_next_preprocessing_token();
911                                         return;
912                                 case '/':
913                                         next_char();
914                                         skip_line_comment();
915                                         lexer_next_preprocessing_token();
916                                         return;
917                         ELSE('/')
918                 case '%':
919                         MAYBE_PROLOG
920                         MAYBE('>', T_PERCENTGREATER)
921                         MAYBE('=', T_PERCENTEQUAL)
922                                 case ':':
923                                         MAYBE_PROLOG
924                                                 case '%':
925                                                         MAYBE_PROLOG
926                                                         MAYBE(':', T_PERCENTCOLONPERCENTCOLON)
927                                                         ELSE_CODE(
928                                                                 put_back(c);
929                                                                 c = '%';
930                                                                 lexer_token.type = T_PERCENTCOLON;
931                                                                 return;
932                                                         )
933                                         ELSE(T_PERCENTCOLON)
934                         ELSE('%')
935                 case '<':
936                         MAYBE_PROLOG
937                         MAYBE(':', T_LESSCOLON)
938                         MAYBE('%', T_LESSPERCENT)
939                         MAYBE('=', T_LESSEQUAL)
940                                 case '<':
941                                         MAYBE_PROLOG
942                                         MAYBE('=', T_LESSLESSEQUAL)
943                                         ELSE(T_LESSLESS)
944                         ELSE('<')
945                 case '>':
946                         MAYBE_PROLOG
947                         MAYBE('=', T_GREATEREQUAL)
948                                 case '>':
949                                         MAYBE_PROLOG
950                                         MAYBE('=', T_GREATERGREATEREQUAL)
951                                         ELSE(T_GREATERGREATER)
952                         ELSE('>')
953                 case '^':
954                         MAYBE_PROLOG
955                         MAYBE('=', T_CARETEQUAL)
956                         ELSE('^')
957                 case '|':
958                         MAYBE_PROLOG
959                         MAYBE('=', T_PIPEEQUAL)
960                         MAYBE('|', T_PIPEPIPE)
961                         ELSE('|')
962                 case ':':
963                         MAYBE_PROLOG
964                         MAYBE('>', T_COLONGREATER)
965                         ELSE(':')
966                 case '=':
967                         MAYBE_PROLOG
968                         MAYBE('=', T_EQUALEQUAL)
969                         ELSE('=')
970                 case '#':
971                         MAYBE_PROLOG
972                         MAYBE('#', T_HASHHASH)
973                         ELSE('#')
974
975                 case '?':
976                 case '[':
977                 case ']':
978                 case '(':
979                 case ')':
980                 case '{':
981                 case '}':
982                 case '~':
983                 case ';':
984                 case ',':
985                 case '\\':
986                         lexer_token.type = c;
987                         next_char();
988                         return;
989
990                 case EOF:
991                         lexer_token.type = T_EOF;
992                         return;
993
994                 default:
995                         next_char();
996                         error_prefix();
997                         fprintf(stderr, "unknown character '%c' found\n", c);
998                         lexer_token.type = T_ERROR;
999                         return;
1000                 }
1001         }
1002 }
1003
1004 void lexer_next_token(void)
1005 {
1006         lexer_next_preprocessing_token();
1007         if(lexer_token.type != '\n')
1008                 return;
1009
1010 newline_found:
1011         do {
1012                 lexer_next_preprocessing_token();
1013         } while(lexer_token.type == '\n');
1014
1015         if(lexer_token.type == '#') {
1016                 parse_preprocessor_directive();
1017                 goto newline_found;
1018         }
1019 }
1020
1021 void init_lexer(void)
1022 {
1023         strset_init(&stringset);
1024 }
1025
1026 void lexer_open_stream(FILE *stream, const char *input_name)
1027 {
1028         input                                  = stream;
1029         lexer_token.source_position.linenr     = 0;
1030         lexer_token.source_position.input_name = input_name;
1031
1032         symbol_L = symbol_table_insert("L");
1033
1034         /* place a virtual \n at the beginning so the lexer knows that we're
1035          * at the beginning of a line */
1036         c = '\n';
1037 }
1038
1039 void exit_lexer(void)
1040 {
1041         strset_destroy(&stringset);
1042 }
1043
1044 static __attribute__((unused))
1045 void dbg_pos(const source_position_t source_position)
1046 {
1047         fprintf(stdout, "%s:%d\n", source_position.input_name,
1048                 source_position.linenr);
1049         fflush(stdout);
1050 }