de2cac8cd1680217eff2e5130ccbdb749eea579f
[cparser] / lexer.c
1 #include <config.h>
2
3 #include "lexer.h"
4 #include "token_t.h"
5 #include "symbol_table_t.h"
6 #include "adt/error.h"
7 #include "adt/strset.h"
8 #include "adt/util.h"
9
10 #include <assert.h>
11 #include <errno.h>
12 #include <string.h>
13 #include <stdbool.h>
14 #include <ctype.h>
15
16 //#define DEBUG_CHARS
17 #define MAX_PUTBACK 3
18
19 static int         c;
20 token_t            lexer_token;
21 symbol_t          *symbol_L;
22 static FILE       *input;
23 static char        buf[1024 + MAX_PUTBACK];
24 static const char *bufend;
25 static const char *bufpos;
26 static strset_t    stringset;
27
28 static void error_prefix_at(const char *input_name, unsigned linenr)
29 {
30         fprintf(stderr, "%s:%u: Error: ", input_name, linenr);
31 }
32
33 static void error_prefix(void)
34 {
35         error_prefix_at(lexer_token.source_position.input_name,
36                         lexer_token.source_position.linenr);
37 }
38
39 static void parse_error(const char *msg)
40 {
41         error_prefix();
42         fprintf(stderr, "%s\n", msg);
43 }
44
45 static inline void next_real_char(void)
46 {
47         bufpos++;
48         if(bufpos >= bufend) {
49                 size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
50                                  input);
51                 if(s == 0) {
52                         c = EOF;
53                         return;
54                 }
55                 bufpos = buf + MAX_PUTBACK;
56                 bufend = buf + MAX_PUTBACK + s;
57         }
58         c = *(bufpos);
59 }
60
61 static inline void put_back(int pc)
62 {
63         assert(bufpos >= buf);
64         //assert(bufpos < buf+MAX_PUTBACK || *bufpos == pc);
65
66         char *p = buf + (bufpos - buf);
67         *p = pc;
68
69         /* going backwards in the buffer is legal as long as it's not more often
70          * than MAX_PUTBACK */
71         bufpos--;
72
73 #ifdef DEBUG_CHARS
74         printf("putback '%c'\n", pc);
75 #endif
76 }
77
78 static inline void next_char(void);
79
80 #define MATCH_NEWLINE(code)                   \
81         case '\r':                                \
82                 next_char();                          \
83                 if(c == '\n') {                       \
84                         next_char();                      \
85                 }                                     \
86                 lexer_token.source_position.linenr++; \
87                 code;                                 \
88         case '\n':                                \
89                 next_char();                          \
90                 lexer_token.source_position.linenr++; \
91                 code;
92
93 #define eat(c_type)  do { assert(c == c_type); next_char(); } while(0)
94
95 static void maybe_concat_lines(void)
96 {
97         eat('\\');
98
99         switch(c) {
100         MATCH_NEWLINE(return;)
101
102         default:
103                 break;
104         }
105
106         put_back(c);
107         c = '\\';
108 }
109
110 static inline void next_char(void)
111 {
112         next_real_char();
113
114         /* filter trigraphs */
115         if(UNLIKELY(c == '\\')) {
116                 maybe_concat_lines();
117                 goto end_of_next_char;
118         }
119
120         if(LIKELY(c != '?'))
121                 goto end_of_next_char;
122
123         next_real_char();
124         if(LIKELY(c != '?')) {
125                 put_back(c);
126                 c = '?';
127                 goto end_of_next_char;
128         }
129
130         next_real_char();
131         switch(c) {
132         case '=': c = '#'; break;
133         case '(': c = '['; break;
134         case '/': c = '\\'; maybe_concat_lines(); break;
135         case ')': c = ']'; break;
136         case '\'': c = '^'; break;
137         case '<': c = '{'; break;
138         case '!': c = '|'; break;
139         case '>': c = '}'; break;
140         case '-': c = '~'; break;
141         default:
142                 put_back('?');
143                 put_back(c);
144                 c = '?';
145                 break;
146         }
147
148 end_of_next_char:;
149 #ifdef DEBUG_CHARS
150         printf("nchar '%c'\n", c);
151 #endif
152 }
153
154 #define SYMBOL_CHARS  \
155         case 'a':         \
156         case 'b':         \
157         case 'c':         \
158         case 'd':         \
159         case 'e':         \
160         case 'f':         \
161         case 'g':         \
162         case 'h':         \
163         case 'i':         \
164         case 'j':         \
165         case 'k':         \
166         case 'l':         \
167         case 'm':         \
168         case 'n':         \
169         case 'o':         \
170         case 'p':         \
171         case 'q':         \
172         case 'r':         \
173         case 's':         \
174         case 't':         \
175         case 'u':         \
176         case 'v':         \
177         case 'w':         \
178         case 'x':         \
179         case 'y':         \
180         case 'z':         \
181         case 'A':         \
182         case 'B':         \
183         case 'C':         \
184         case 'D':         \
185         case 'E':         \
186         case 'F':         \
187         case 'G':         \
188         case 'H':         \
189         case 'I':         \
190         case 'J':         \
191         case 'K':         \
192         case 'L':         \
193         case 'M':         \
194         case 'N':         \
195         case 'O':         \
196         case 'P':         \
197         case 'Q':         \
198         case 'R':         \
199         case 'S':         \
200         case 'T':         \
201         case 'U':         \
202         case 'V':         \
203         case 'W':         \
204         case 'X':         \
205         case 'Y':         \
206         case 'Z':         \
207         case '_':
208
209 #define DIGITS        \
210         case '0':         \
211         case '1':         \
212         case '2':         \
213         case '3':         \
214         case '4':         \
215         case '5':         \
216         case '6':         \
217         case '7':         \
218         case '8':         \
219         case '9':
220
221 static void parse_symbol(void)
222 {
223         symbol_t *symbol;
224         char     *string;
225
226         obstack_1grow(&symbol_obstack, c);
227         next_char();
228
229         while(1) {
230                 switch(c) {
231                 DIGITS
232                 SYMBOL_CHARS
233                         obstack_1grow(&symbol_obstack, c);
234                         next_char();
235                         break;
236
237                 default:
238                         goto end_symbol;
239                 }
240         }
241
242 end_symbol:
243         obstack_1grow(&symbol_obstack, '\0');
244
245         string = obstack_finish(&symbol_obstack);
246         symbol = symbol_table_insert(string);
247
248         lexer_token.type     = symbol->ID;
249         lexer_token.v.symbol = symbol;
250
251         if(symbol->string != string) {
252                 obstack_free(&symbol_obstack, string);
253         }
254 }
255
256 static void parse_integer_suffix(void)
257 {
258         if(c == 'U' || c == 'U') {
259                 /* TODO do something with the suffixes... */
260                 next_char();
261                 if(c == 'L' || c == 'l') {
262                         next_char();
263                         if(c == 'L' || c == 'l') {
264                                 next_char();
265                         }
266                 }
267         } else if(c == 'l' || c == 'L') {
268                 next_char();
269                 if(c == 'l' || c == 'L') {
270                         next_char();
271                         if(c == 'u' || c == 'U') {
272                                 next_char();
273                         }
274                 } else if(c == 'u' || c == 'U') {
275                         next_char();
276                 }
277         }
278 }
279
280 static void parse_floating_suffix(void)
281 {
282         switch(c) {
283         /* TODO: do something usefull with the suffixes... */
284         case 'f':
285         case 'F':
286         case 'l':
287         case 'L':
288                 next_char();
289                 break;
290         default:
291                 break;
292         }
293 }
294
295 static inline bool is_hex_digit(int c)
296 {
297         return (c >= '0' && c <= '9')
298                         || (c >= 'a' && c <= 'z')
299                         || (c >= 'A' && c <= 'Z');
300 }
301
302 static void parse_number_hex(void)
303 {
304         assert(c == 'x' || c == 'X');
305         next_char();
306
307         while(is_hex_digit(c)) {
308                 obstack_1grow(&symbol_obstack, c);
309                 next_char();
310         }
311         obstack_1grow(&symbol_obstack, '\0');
312         char *string = obstack_finish(&symbol_obstack);
313
314         if(c == '.' || c == 'p' || c == 'P') {
315                 next_char();
316                 panic("Hex floating point numbers not implemented yet");
317         }
318         if(*string == '\0') {
319                 parse_error("invalid hex number");
320                 lexer_token.type = T_ERROR;
321         }
322
323         char *endptr;
324         lexer_token.type       = T_INTEGER;
325         lexer_token.v.intvalue = strtoll(string, &endptr, 16);
326         if(*endptr != '\0') {
327                 parse_error("hex number literal too long");
328         }
329
330         obstack_free(&symbol_obstack, string);
331 }
332
333 static inline bool is_octal_digit(int chr)
334 {
335         return '0' <= chr && chr <= '7';
336 }
337
338 static void parse_number_oct(void)
339 {
340         while(is_octal_digit(c)) {
341                 obstack_1grow(&symbol_obstack, c);
342                 next_char();
343         }
344         obstack_1grow(&symbol_obstack, '\0');
345         char *string = obstack_finish(&symbol_obstack);
346
347         char *endptr;
348         lexer_token.type       = T_INTEGER;
349         lexer_token.v.intvalue = strtoll(string, &endptr, 8);
350         if(*endptr != '\0') {
351                 parse_error("octal number literal too long");
352         }
353
354         obstack_free(&symbol_obstack, string);
355         parse_integer_suffix();
356 }
357
358 static void parse_number_dec(void)
359 {
360         bool is_float = false;
361         while(isdigit(c)) {
362                 obstack_1grow(&symbol_obstack, c);
363                 next_char();
364         }
365
366         if(c == '.') {
367                 obstack_1grow(&symbol_obstack, '.');
368                 next_char();
369
370                 while(isdigit(c)) {
371                         obstack_1grow(&symbol_obstack, c);
372                         next_char();
373                 }
374                 is_float = true;
375         }
376         if(c == 'e' || c == 'E') {
377                 obstack_1grow(&symbol_obstack, 'e');
378                 next_char();
379
380                 if(c == '-' || c == '+') {
381                         obstack_1grow(&symbol_obstack, c);
382                         next_char();
383                 }
384
385                 while(isdigit(c)) {
386                         obstack_1grow(&symbol_obstack, c);
387                         next_char();
388                 }
389                 is_float = true;
390         }
391
392         obstack_1grow(&symbol_obstack, '\0');
393         char *string = obstack_finish(&symbol_obstack);
394
395         char *endptr;
396         if(is_float) {
397                 lexer_token.type         = T_FLOATINGPOINT;
398                 lexer_token.v.floatvalue = strtold(string, &endptr);
399
400                 if(*endptr != '\0') {
401                         parse_error("invalid number literal");
402                 }
403
404                 parse_floating_suffix();
405         } else {
406                 lexer_token.type       = T_INTEGER;
407                 lexer_token.v.intvalue = strtoll(string, &endptr, 10);
408
409                 if(*endptr != '\0') {
410                         parse_error("invalid number literal");
411                 }
412
413                 parse_integer_suffix();
414         }
415         obstack_free(&symbol_obstack, string);
416 }
417
418 static void parse_number(void)
419 {
420         if (c == '0') {
421                 next_char();
422                 switch (c) {
423                         case 'X':
424                         case 'x':
425                                 parse_number_hex();
426                                 break;
427                         case '0':
428                         case '1':
429                         case '2':
430                         case '3':
431                         case '4':
432                         case '5':
433                         case '6':
434                         case '7':
435                                 parse_number_oct();
436                                 break;
437                         case '8':
438                         case '9':
439                                 next_char();
440                                 parse_error("invalid octal number");
441                                 lexer_token.type = T_ERROR;
442                                 return;
443                         case '.':
444                         case 'e':
445                         case 'E':
446                         default:
447                                 obstack_1grow(&symbol_obstack, '0');
448                                 parse_number_dec();
449                                 return;
450                 }
451         } else {
452                 parse_number_dec();
453         }
454 }
455
456 static int parse_octal_sequence(const int first_digit)
457 {
458         assert(is_octal_digit(first_digit));
459         int value = first_digit - '0';
460         if (!is_octal_digit(c)) return value;
461         value = 8 * value + c - '0';
462         next_char();
463         if (!is_octal_digit(c)) return value;
464         value = 8 * value + c - '0';
465         next_char();
466         return value;
467 }
468
469 static int parse_hex_sequence(void)
470 {
471         int value = 0;
472         while(1) {
473                 if (c >= '0' && c <= '9') {
474                         value = 16 * value + c - '0';
475                 } else if ('A' <= c && c <= 'F') {
476                         value = 16 * value + c - 'A' + 10;
477                 } else if ('a' <= c && c <= 'f') {
478                         value = 16 * value + c - 'a' + 10;
479                 } else {
480                         break;
481                 }
482                 next_char();
483         }
484
485         return value;
486 }
487
488 static int parse_escape_sequence(void)
489 {
490         eat('\\');
491
492         int ec = c;
493         next_char();
494
495         switch(ec) {
496         case '"':  return '"';
497         case '\'': return '\'';
498         case '\\': return '\\';
499         case '?': return '\?';
500         case 'a': return '\a';
501         case 'b': return '\b';
502         case 'f': return '\f';
503         case 'n': return '\n';
504         case 'r': return '\r';
505         case 't': return '\t';
506         case 'v': return '\v';
507         case 'x':
508                 return parse_hex_sequence();
509         case '0':
510         case '1':
511         case '2':
512         case '3':
513         case '4':
514         case '5':
515         case '6':
516         case '7':
517                 return parse_octal_sequence(ec);
518         case EOF:
519                 parse_error("reached end of file while parsing escape sequence");
520                 return EOF;
521         default:
522                 parse_error("unknown escape sequence");
523                 return EOF;
524         }
525 }
526
527 const char *concat_strings(const char *s1, const char *s2)
528 {
529         size_t  len1   = strlen(s1);
530         size_t  len2   = strlen(s2);
531
532         char   *concat = obstack_alloc(&symbol_obstack, len1 + len2 + 1);
533         memcpy(concat, s1, len1);
534         memcpy(concat + len1, s2, len2 + 1);
535
536         const char *result = strset_insert(&stringset, concat);
537         if(result != concat) {
538                 obstack_free(&symbol_obstack, concat);
539         }
540
541         return result;
542 }
543
544 static void parse_string_literal(void)
545 {
546         unsigned    start_linenr = lexer_token.source_position.linenr;
547         char       *string;
548         const char *result;
549
550         assert(c == '"');
551         next_char();
552
553         int tc;
554         while(1) {
555                 switch(c) {
556                 case '\\':
557                         tc = parse_escape_sequence();
558                         obstack_1grow(&symbol_obstack, tc);
559                         break;
560
561                 case EOF:
562                         error_prefix_at(lexer_token.source_position.input_name,
563                                         start_linenr);
564                         fprintf(stderr, "string has no end\n");
565                         lexer_token.type = T_ERROR;
566                         return;
567
568                 case '"':
569                         next_char();
570                         goto end_of_string;
571
572                 default:
573                         obstack_1grow(&symbol_obstack, c);
574                         next_char();
575                         break;
576                 }
577         }
578
579 end_of_string:
580
581         /* TODO: concatenate multiple strings separated by whitespace... */
582
583         /* add finishing 0 to the string */
584         obstack_1grow(&symbol_obstack, '\0');
585         string = obstack_finish(&symbol_obstack);
586
587         /* check if there is already a copy of the string */
588         result = strset_insert(&stringset, string);
589         if(result != string) {
590                 obstack_free(&symbol_obstack, string);
591         }
592
593         lexer_token.type     = T_STRING_LITERAL;
594         lexer_token.v.string = result;
595 }
596
597 static void parse_character_constant(void)
598 {
599         eat('\'');
600
601         int found_char = 0;
602         while(1) {
603                 switch(c) {
604                 case '\\':
605                         found_char = parse_escape_sequence();
606                         break;
607
608                 MATCH_NEWLINE(
609                         parse_error("newline while parsing character constant");
610                         break;
611                 )
612
613                 case '\'':
614                         next_char();
615                         goto end_of_char_constant;
616
617                 case EOF:
618                         parse_error("EOF while parsing character constant");
619                         lexer_token.type = T_ERROR;
620                         return;
621
622                 default:
623                         if(found_char != 0) {
624                                 parse_error("more than 1 characters in character "
625                                             "constant");
626                                 goto end_of_char_constant;
627                         } else {
628                                 found_char = c;
629                                 next_char();
630                         }
631                         break;
632                 }
633         }
634
635 end_of_char_constant:
636         lexer_token.type       = T_INTEGER;
637         lexer_token.v.intvalue = found_char;
638 }
639
640 static void skip_multiline_comment(void)
641 {
642         unsigned start_linenr = lexer_token.source_position.linenr;
643
644         while(1) {
645                 switch(c) {
646                 case '*':
647                         next_char();
648                         if(c == '/') {
649                                 next_char();
650                                 return;
651                         }
652                         break;
653
654                 MATCH_NEWLINE(break;)
655
656                 case EOF:
657                         error_prefix_at(lexer_token.source_position.input_name,
658                                         start_linenr);
659                         fprintf(stderr, "at end of file while looking for comment end\n");
660                         return;
661
662                 default:
663                         next_char();
664                         break;
665                 }
666         }
667 }
668
669 static void skip_line_comment(void)
670 {
671         while(1) {
672                 switch(c) {
673                 case EOF:
674                         return;
675
676                 case '\n':
677                 case '\r':
678                         return;
679
680                 default:
681                         next_char();
682                         break;
683                 }
684         }
685 }
686
687 static token_t pp_token;
688
689 static inline void next_pp_token(void)
690 {
691         lexer_next_preprocessing_token();
692         pp_token = lexer_token;
693 }
694
695 static void eat_until_newline(void)
696 {
697         while(pp_token.type != '\n' && pp_token.type != T_EOF) {
698                 next_pp_token();
699         }
700 }
701
702 static void error_directive(void)
703 {
704         error_prefix();
705         fprintf(stderr, "#error directive: \n");
706
707         /* parse pp-tokens until new-line */
708 }
709
710 static void define_directive(void)
711 {
712         lexer_next_preprocessing_token();
713         if(lexer_token.type != T_IDENTIFIER) {
714                 parse_error("expected identifier after #define\n");
715                 eat_until_newline();
716         }
717 }
718
719 static void ifdef_directive(int is_ifndef)
720 {
721         (void) is_ifndef;
722         lexer_next_preprocessing_token();
723         //expect_identifier();
724         //extect_newline();
725 }
726
727 static void endif_directive(void)
728 {
729         //expect_newline();
730 }
731
732 static void parse_line_directive(void)
733 {
734         if(pp_token.type != T_INTEGER) {
735                 parse_error("expected integer");
736         } else {
737                 lexer_token.source_position.linenr = pp_token.v.intvalue - 1;
738                 next_pp_token();
739         }
740         if(pp_token.type == T_STRING_LITERAL) {
741                 lexer_token.source_position.input_name = pp_token.v.string;
742                 next_pp_token();
743         }
744
745         eat_until_newline();
746 }
747
748 static void parse_preprocessor_identifier(void)
749 {
750         assert(pp_token.type == T_IDENTIFIER);
751         symbol_t *symbol = pp_token.v.symbol;
752
753         switch(symbol->pp_ID) {
754         case TP_include:
755                 printf("include - enable header name parsing!\n");
756                 break;
757         case TP_define:
758                 define_directive();
759                 break;
760         case TP_ifdef:
761                 ifdef_directive(0);
762                 break;
763         case TP_ifndef:
764                 ifdef_directive(1);
765                 break;
766         case TP_endif:
767                 endif_directive();
768                 break;
769         case TP_line:
770                 next_pp_token();
771                 parse_line_directive();
772                 break;
773         case TP_if:
774         case TP_else:
775         case TP_elif:
776         case TP_undef:
777         case TP_error:
778                 error_directive();
779                 break;
780         case TP_pragma:
781                 break;
782         }
783 }
784
785 static void parse_preprocessor_directive(void)
786 {
787         next_pp_token();
788
789         switch(pp_token.type) {
790         case T_IDENTIFIER:
791                 parse_preprocessor_identifier();
792                 break;
793         case T_INTEGER:
794                 parse_line_directive();
795                 break;
796         default:
797                 parse_error("invalid preprocessor directive");
798                 eat_until_newline();
799                 break;
800         }
801 }
802
803 #define MAYBE_PROLOG                                       \
804                         next_char();                                   \
805                         while(1) {                                     \
806                                 switch(c) {
807
808 #define MAYBE(ch, set_type)                                \
809                                 case ch:                                   \
810                                         next_char();                           \
811                                         lexer_token.type = set_type;           \
812                                         return;
813
814 #define ELSE_CODE(code)                                    \
815                                 default:                                   \
816                                         code;                                  \
817                                 }                                          \
818                         } /* end of while(1) */                        \
819                         break;
820
821 #define ELSE(set_type)                                     \
822                 ELSE_CODE(                                         \
823                         lexer_token.type = set_type;                   \
824                         return;                                        \
825                 )
826
827 void lexer_next_preprocessing_token(void)
828 {
829         while(1) {
830                 switch(c) {
831                 case ' ':
832                 case '\t':
833                         next_char();
834                         break;
835
836                 MATCH_NEWLINE(
837                         lexer_token.type = '\n';
838                         return;
839                 )
840
841                 SYMBOL_CHARS
842                         parse_symbol();
843                         /* might be a wide string ( L"string" ) */
844                         if(c == '"' && (lexer_token.type == T_IDENTIFIER &&
845                            lexer_token.v.symbol == symbol_L)) {
846                                 parse_string_literal();
847                                 return;
848                         }
849                         return;
850
851                 DIGITS
852                         parse_number();
853                         return;
854
855                 case '"':
856                         parse_string_literal();
857                         return;
858
859                 case '\'':
860                         parse_character_constant();
861                         return;
862
863                 case '.':
864                         MAYBE_PROLOG
865                                 case '.':
866                                         MAYBE_PROLOG
867                                         MAYBE('.', T_DOTDOTDOT)
868                                         ELSE_CODE(
869                                                 put_back(c);
870                                                 c = '.';
871                                                 lexer_token.type = '.';
872                                                 return;
873                                         )
874                         ELSE('.')
875                 case '&':
876                         MAYBE_PROLOG
877                         MAYBE('&', T_ANDAND)
878                         MAYBE('=', T_ANDEQUAL)
879                         ELSE('&')
880                 case '*':
881                         MAYBE_PROLOG
882                         MAYBE('=', T_ASTERISKEQUAL)
883                         ELSE('*')
884                 case '+':
885                         MAYBE_PROLOG
886                         MAYBE('+', T_PLUSPLUS)
887                         MAYBE('=', T_PLUSEQUAL)
888                         ELSE('+')
889                 case '-':
890                         MAYBE_PROLOG
891                         MAYBE('>', T_MINUSGREATER)
892                         MAYBE('-', T_MINUSMINUS)
893                         MAYBE('=', T_MINUSEQUAL)
894                         ELSE('-')
895                 case '!':
896                         MAYBE_PROLOG
897                         MAYBE('=', T_EXCLAMATIONMARKEQUAL)
898                         ELSE('!')
899                 case '/':
900                         MAYBE_PROLOG
901                         MAYBE('=', T_SLASHEQUAL)
902                                 case '*':
903                                         next_char();
904                                         skip_multiline_comment();
905                                         lexer_next_preprocessing_token();
906                                         return;
907                                 case '/':
908                                         next_char();
909                                         skip_line_comment();
910                                         lexer_next_preprocessing_token();
911                                         return;
912                         ELSE('/')
913                 case '%':
914                         MAYBE_PROLOG
915                         MAYBE('>', T_PERCENTGREATER)
916                         MAYBE('=', T_PERCENTEQUAL)
917                                 case ':':
918                                         MAYBE_PROLOG
919                                                 case '%':
920                                                         MAYBE_PROLOG
921                                                         MAYBE(':', T_PERCENTCOLONPERCENTCOLON)
922                                                         ELSE_CODE(
923                                                                 put_back(c);
924                                                                 c = '%';
925                                                                 lexer_token.type = T_PERCENTCOLON;
926                                                                 return;
927                                                         )
928                                         ELSE(T_PERCENTCOLON)
929                         ELSE('%')
930                 case '<':
931                         MAYBE_PROLOG
932                         MAYBE(':', T_LESSCOLON)
933                         MAYBE('%', T_LESSPERCENT)
934                         MAYBE('=', T_LESSEQUAL)
935                                 case '<':
936                                         MAYBE_PROLOG
937                                         MAYBE('=', T_LESSLESSEQUAL)
938                                         ELSE(T_LESSLESS)
939                         ELSE('<')
940                 case '>':
941                         MAYBE_PROLOG
942                         MAYBE('=', T_GREATEREQUAL)
943                                 case '>':
944                                         MAYBE_PROLOG
945                                         MAYBE('=', T_GREATERGREATEREQUAL)
946                                         ELSE(T_GREATERGREATER)
947                         ELSE('>')
948                 case '^':
949                         MAYBE_PROLOG
950                         MAYBE('=', T_CARETEQUAL)
951                         ELSE('^')
952                 case '|':
953                         MAYBE_PROLOG
954                         MAYBE('=', T_PIPEEQUAL)
955                         MAYBE('|', T_PIPEPIPE)
956                         ELSE('|')
957                 case ':':
958                         MAYBE_PROLOG
959                         MAYBE('>', T_COLONGREATER)
960                         ELSE(':')
961                 case '=':
962                         MAYBE_PROLOG
963                         MAYBE('=', T_EQUALEQUAL)
964                         ELSE('=')
965                 case '#':
966                         MAYBE_PROLOG
967                         MAYBE('#', T_HASHHASH)
968                         ELSE('#')
969
970                 case '?':
971                 case '[':
972                 case ']':
973                 case '(':
974                 case ')':
975                 case '{':
976                 case '}':
977                 case '~':
978                 case ';':
979                 case ',':
980                 case '\\':
981                         lexer_token.type = c;
982                         next_char();
983                         return;
984
985                 case EOF:
986                         lexer_token.type = T_EOF;
987                         return;
988
989                 default:
990                         next_char();
991                         error_prefix();
992                         fprintf(stderr, "unknown character '%c' found\n", c);
993                         lexer_token.type = T_ERROR;
994                         return;
995                 }
996         }
997 }
998
999 void lexer_next_token(void)
1000 {
1001         lexer_next_preprocessing_token();
1002         if(lexer_token.type != '\n')
1003                 return;
1004
1005 newline_found:
1006         do {
1007                 lexer_next_preprocessing_token();
1008         } while(lexer_token.type == '\n');
1009
1010         if(lexer_token.type == '#') {
1011                 parse_preprocessor_directive();
1012                 goto newline_found;
1013         }
1014 }
1015
1016 void init_lexer(void)
1017 {
1018         strset_init(&stringset);
1019 }
1020
1021 void lexer_open_stream(FILE *stream, const char *input_name)
1022 {
1023         input                                  = stream;
1024         lexer_token.source_position.linenr     = 0;
1025         lexer_token.source_position.input_name = input_name;
1026
1027         symbol_L = symbol_table_insert("L");
1028
1029         /* place a virtual \n at the beginning so the lexer knows that we're
1030          * at the beginning of a line */
1031         c = '\n';
1032 }
1033
1034 void exit_lexer(void)
1035 {
1036         strset_destroy(&stringset);
1037 }
1038
1039 static __attribute__((unused))
1040 void dbg_pos(const source_position_t source_position)
1041 {
1042         fprintf(stdout, "%s:%d\n", source_position.input_name,
1043                 source_position.linenr);
1044         fflush(stdout);
1045 }