Generate IR for calling function pointers.
[cparser] / lexer.c
1 #include <config.h>
2
3 #include "lexer.h"
4 #include "token_t.h"
5 #include "symbol_table_t.h"
6 #include "adt/error.h"
7 #include "adt/strset.h"
8 #include "adt/util.h"
9
10 #include <assert.h>
11 #include <errno.h>
12 #include <string.h>
13 #include <ctype.h>
14
15 //#define DEBUG_CHARS
16 #define MAX_PUTBACK 3
17
18 static int         c;
19 token_t            lexer_token;
20 symbol_t          *symbol_L;
21 static FILE       *input;
22 static char        buf[1024 + MAX_PUTBACK];
23 static const char *bufend;
24 static const char *bufpos;
25 static strset_t    stringset;
26
27 static void error_prefix_at(const char *input_name, unsigned linenr)
28 {
29         fprintf(stderr, "%s:%u: Error: ", input_name, linenr);
30 }
31
32 static void error_prefix(void)
33 {
34         error_prefix_at(lexer_token.source_position.input_name,
35                         lexer_token.source_position.linenr);
36 }
37
38 static void parse_error(const char *msg)
39 {
40         error_prefix();
41         fprintf(stderr, "%s\n", msg);
42 }
43
44 static inline void next_real_char(void)
45 {
46         bufpos++;
47         if(bufpos >= bufend) {
48                 size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
49                                  input);
50                 if(s == 0) {
51                         c = EOF;
52                         return;
53                 }
54                 bufpos = buf + MAX_PUTBACK;
55                 bufend = buf + MAX_PUTBACK + s;
56         }
57         c = *(bufpos);
58 }
59
60 static inline void put_back(int pc)
61 {
62         assert(bufpos >= buf);
63         assert(bufpos < buf+MAX_PUTBACK || *bufpos == pc);
64
65         char *p = buf + (bufpos - buf);
66         *p = pc;
67
68         /* going backwards in the buffer is legal as long as it's not more often
69          * than MAX_PUTBACK */
70         bufpos--;
71
72 #ifdef DEBUG_CHARS
73         printf("putback '%c'\n", pc);
74 #endif
75 }
76
77 static inline void next_char(void);
78
79 #define MATCH_NEWLINE(code)                   \
80         case '\r':                                \
81                 next_char();                          \
82                 if(c == '\n') {                       \
83                         next_char();                      \
84                 }                                     \
85                 lexer_token.source_position.linenr++; \
86                 code;                                 \
87         case '\n':                                \
88                 next_char();                          \
89                 lexer_token.source_position.linenr++; \
90                 code;
91
92 #define eat(c_type)  do { assert(c == c_type); next_char(); } while(0)
93
94 static void maybe_concat_lines(void)
95 {
96         eat('\\');
97
98         switch(c) {
99         MATCH_NEWLINE(return;)
100
101         default:
102                 break;
103         }
104
105         put_back(c);
106         c = '\\';
107 }
108
109 static inline void next_char(void)
110 {
111         next_real_char();
112
113 #if 0
114         /* filter trigraphs */
115         if(UNLIKELY(c == '\\')) {
116                 maybe_concat_lines();
117                 goto end_of_next_char;
118         }
119
120         if(LIKELY(c != '?'))
121                 goto end_of_next_char;
122
123         next_real_char();
124         if(LIKELY(c != '?')) {
125                 put_back(c);
126                 c = '?';
127                 goto end_of_next_char;
128         }
129
130         next_real_char();
131         switch(c) {
132         case '=': c = '#'; break;
133         case '(': c = '['; break;
134         case '/': c = '\\'; maybe_concat_lines(); break;
135         case ')': c = ']'; break;
136         case '\'': c = '^'; break;
137         case '<': c = '{'; break;
138         case '!': c = '|'; break;
139         case '>': c = '}'; break;
140         case '-': c = '~'; break;
141         default:
142                 put_back('?');
143                 put_back(c);
144                 c = '?';
145                 break;
146         }
147
148 end_of_next_char:
149 #endif
150         (void) maybe_concat_lines;
151 #ifdef DEBUG_CHARS
152         printf("nchar '%c'\n", c);
153 #else
154         ;
155 #endif
156 }
157
158 #define SYMBOL_CHARS  \
159         case 'a':         \
160         case 'b':         \
161         case 'c':         \
162         case 'd':         \
163         case 'e':         \
164         case 'f':         \
165         case 'g':         \
166         case 'h':         \
167         case 'i':         \
168         case 'j':         \
169         case 'k':         \
170         case 'l':         \
171         case 'm':         \
172         case 'n':         \
173         case 'o':         \
174         case 'p':         \
175         case 'q':         \
176         case 'r':         \
177         case 's':         \
178         case 't':         \
179         case 'u':         \
180         case 'v':         \
181         case 'w':         \
182         case 'x':         \
183         case 'y':         \
184         case 'z':         \
185         case 'A':         \
186         case 'B':         \
187         case 'C':         \
188         case 'D':         \
189         case 'E':         \
190         case 'F':         \
191         case 'G':         \
192         case 'H':         \
193         case 'I':         \
194         case 'J':         \
195         case 'K':         \
196         case 'L':         \
197         case 'M':         \
198         case 'N':         \
199         case 'O':         \
200         case 'P':         \
201         case 'Q':         \
202         case 'R':         \
203         case 'S':         \
204         case 'T':         \
205         case 'U':         \
206         case 'V':         \
207         case 'W':         \
208         case 'X':         \
209         case 'Y':         \
210         case 'Z':         \
211         case '_':
212
213 #define DIGITS        \
214         case '0':         \
215         case '1':         \
216         case '2':         \
217         case '3':         \
218         case '4':         \
219         case '5':         \
220         case '6':         \
221         case '7':         \
222         case '8':         \
223         case '9':
224
225 static void parse_symbol(void)
226 {
227         symbol_t *symbol;
228         char     *string;
229
230         obstack_1grow(&symbol_obstack, c);
231         next_char();
232
233         while(1) {
234                 switch(c) {
235                 DIGITS
236                 SYMBOL_CHARS
237                         obstack_1grow(&symbol_obstack, c);
238                         next_char();
239                         break;
240
241                 default:
242                         goto end_symbol;
243                 }
244         }
245
246 end_symbol:
247         obstack_1grow(&symbol_obstack, '\0');
248
249         string = obstack_finish(&symbol_obstack);
250         symbol = symbol_table_insert(string);
251
252         lexer_token.type     = symbol->ID;
253         lexer_token.v.symbol = symbol;
254
255         if(symbol->string != string) {
256                 obstack_free(&symbol_obstack, string);
257         }
258 }
259
260 static void parse_integer_suffix(void)
261 {
262         if(c == 'U' || c == 'U') {
263                 /* TODO do something with the suffixes... */
264                 next_char();
265                 if(c == 'L' || c == 'l') {
266                         next_char();
267                         if(c == 'L' || c == 'l') {
268                                 next_char();
269                         }
270                 }
271         } else if(c == 'l' || c == 'L') {
272                 next_char();
273                 if(c == 'l' || c == 'L') {
274                         next_char();
275                         if(c == 'u' || c == 'U') {
276                                 next_char();
277                         }
278                 } else if(c == 'u' || c == 'U') {
279                         next_char();
280                 }
281         }
282 }
283
284 static void parse_floating_suffix(void)
285 {
286         switch(c) {
287         /* TODO: do something usefull with the suffixes... */
288         case 'f':
289         case 'F':
290         case 'l':
291         case 'L':
292                 next_char();
293                 break;
294         default:
295                 break;
296         }
297 }
298
299 static void parse_number_hex(void)
300 {
301         assert(c == 'x' || c == 'X');
302         next_char();
303
304         if (!isdigit(c) &&
305                 !('A' <= c && c <= 'F') &&
306                 !('a' <= c && c <= 'f')) {
307                 parse_error("premature end of hex number literal");
308                 lexer_token.type = T_ERROR;
309                 return;
310         }
311
312         int value = 0;
313         while(1) {
314                 if (isdigit(c)) {
315                         value = 16 * value + c - '0';
316                 } else if ('A' <= c && c <= 'F') {
317                         value = 16 * value + c - 'A' + 10;
318                 } else if ('a' <= c && c <= 'f') {
319                         value = 16 * value + c - 'a' + 10;
320                 } else {
321                         parse_integer_suffix();
322
323                         lexer_token.type       = T_INTEGER;
324                         lexer_token.v.intvalue = value;
325                         return;
326                 }
327                 next_char();
328         }
329
330         if(c == '.' || c == 'p' || c == 'P') {
331                 next_char();
332                 panic("Hex floating point numbers not implemented yet");
333         }
334 }
335
336 static void parse_number_oct(void)
337 {
338         int value = 0;
339         while(c >= '0' && c <= '7') {
340                 value = 8 * value + c - '0';
341                 next_char();
342         }
343         if (c == '8' || c == '9') {
344                 parse_error("invalid octal number");
345                 lexer_token.type = T_ERROR;
346                 return;
347         }
348
349         lexer_token.type       = T_INTEGER;
350         lexer_token.v.intvalue = value;
351
352         parse_integer_suffix();
353 }
354
355 static void parse_floatingpoint_exponent(long double value)
356 {
357         unsigned int expo = 0;
358         long double  factor = 10.;
359
360         if(c == '-') {
361                 next_char();
362                 factor = 0.1;
363         } else if(c == '+') {
364                 next_char();
365         }
366
367         while(c >= '0' && c <= '9') {
368                 expo = 10 * expo + (c - '0');
369                 next_char();
370         }
371
372         while(1) {
373                 if(expo & 1)
374                         value *= factor;
375                 expo >>= 1;
376                 if(expo == 0)
377                         break;
378                 factor *= factor;
379         }
380
381         lexer_token.type         = T_FLOATINGPOINT;
382         lexer_token.v.floatvalue = value;
383
384         parse_floating_suffix();
385 }
386
387 static void parse_floatingpoint_fract(int integer_part)
388 {
389         long double value  = integer_part;
390         long double factor = 1.;
391
392         while(c >= '0' && c <= '9') {
393                 factor *= 0.1;
394                 value  += (c - '0') * factor;
395                 next_char();
396         }
397
398         if(c == 'e' || c == 'E') {
399                 next_char();
400                 parse_floatingpoint_exponent(value);
401                 return;
402         }
403
404         lexer_token.type         = T_FLOATINGPOINT;
405         lexer_token.v.floatvalue = value;
406
407         parse_floating_suffix();
408 }
409
410 static void parse_number_dec(void)
411 {
412         int value = 0;
413
414         while(isdigit(c)) {
415                 value = 10 * value + c - '0';
416                 next_char();
417         }
418
419         if(c == '.') {
420                 next_char();
421                 parse_floatingpoint_fract(value);
422                 return;
423         }
424         if(c == 'e' || c == 'E') {
425                 next_char();
426                 parse_floatingpoint_exponent(value);
427                 return;
428         }
429         parse_integer_suffix();
430
431         lexer_token.type       = T_INTEGER;
432         lexer_token.v.intvalue = value;
433 }
434
435 static void parse_number(void)
436 {
437         if (c == '0') {
438                 next_char();
439                 switch (c) {
440                         case 'X':
441                         case 'x':
442                                 parse_number_hex();
443                                 break;
444                         case '0':
445                         case '1':
446                         case '2':
447                         case '3':
448                         case '4':
449                         case '5':
450                         case '6':
451                         case '7':
452                                 parse_number_oct();
453                                 break;
454                         case '.':
455                                 next_char();
456                                 parse_floatingpoint_fract(0);
457                                 break;
458                         case 'e':
459                         case 'E':
460                                 parse_floatingpoint_exponent(0);
461                                 break;
462                         case '8':
463                         case '9':
464                                 next_char();
465                                 parse_error("invalid octal number");
466                                 lexer_token.type = T_ERROR;
467                                 return;
468                         default:
469                                 put_back(c);
470                                 c = '0';
471                                 parse_number_dec();
472                                 return;
473                 }
474         } else {
475                 parse_number_dec();
476         }
477 }
478
479 static inline int is_octal_digit(int chr)
480 {
481         return '0' <= chr && chr <= '7';
482 }
483
484 static int parse_octal_sequence(const int first_digit)
485 {
486         assert(is_octal_digit(first_digit));
487         int value = first_digit - '0';
488         if (!is_octal_digit(c)) return value;
489         value = 8 * value + c - '0';
490         next_char();
491         if (!is_octal_digit(c)) return value;
492         value = 8 * value + c - '0';
493         next_char();
494         return value;
495 }
496
497 static int parse_hex_sequence(void)
498 {
499         int value = 0;
500         while(1) {
501                 if (c >= '0' && c <= '9') {
502                         value = 16 * value + c - '0';
503                 } else if ('A' <= c && c <= 'F') {
504                         value = 16 * value + c - 'A' + 10;
505                 } else if ('a' <= c && c <= 'f') {
506                         value = 16 * value + c - 'a' + 10;
507                 } else {
508                         break;
509                 }
510                 next_char();
511         }
512
513         return value;
514 }
515
516 static int parse_escape_sequence(void)
517 {
518         eat('\\');
519
520         int ec = c;
521         next_char();
522
523         switch(ec) {
524         case '"':  return '"';
525         case '\'': return '\'';
526         case '\\': return '\\';
527         case '?': return '\?';
528         case 'a': return '\a';
529         case 'b': return '\b';
530         case 'f': return '\f';
531         case 'n': return '\n';
532         case 'r': return '\r';
533         case 't': return '\t';
534         case 'v': return '\v';
535         case 'x':
536                 return parse_hex_sequence();
537         case '0':
538         case '1':
539         case '2':
540         case '3':
541         case '4':
542         case '5':
543         case '6':
544         case '7':
545                 return parse_octal_sequence(ec);
546         case EOF:
547                 parse_error("reached end of file while parsing escape sequence");
548                 return EOF;
549         default:
550                 parse_error("unknown escape sequence");
551                 return EOF;
552         }
553 }
554
555 const char *concat_strings(const char *s1, const char *s2)
556 {
557         size_t  len1   = strlen(s1);
558         size_t  len2   = strlen(s2);
559
560         char   *concat = obstack_alloc(&symbol_obstack, len1 + len2 + 1);
561         memcpy(concat, s1, len1);
562         memcpy(concat + len1, s2, len2 + 1);
563
564         const char *result = strset_insert(&stringset, concat);
565         if(result != concat) {
566                 obstack_free(&symbol_obstack, concat);
567         }
568
569         return result;
570 }
571
572 static void parse_string_literal(void)
573 {
574         unsigned    start_linenr = lexer_token.source_position.linenr;
575         char       *string;
576         const char *result;
577
578         assert(c == '"');
579         next_char();
580
581         int tc;
582         while(1) {
583                 switch(c) {
584                 case '\\':
585                         tc = parse_escape_sequence();
586                         obstack_1grow(&symbol_obstack, tc);
587                         break;
588
589                 case EOF:
590                         error_prefix_at(lexer_token.source_position.input_name,
591                                         start_linenr);
592                         fprintf(stderr, "string has no end\n");
593                         lexer_token.type = T_ERROR;
594                         return;
595
596                 case '"':
597                         next_char();
598                         goto end_of_string;
599
600                 default:
601                         obstack_1grow(&symbol_obstack, c);
602                         next_char();
603                         break;
604                 }
605         }
606
607 end_of_string:
608
609         /* TODO: concatenate multiple strings separated by whitespace... */
610
611         /* add finishing 0 to the string */
612         obstack_1grow(&symbol_obstack, '\0');
613         string = obstack_finish(&symbol_obstack);
614
615         /* check if there is already a copy of the string */
616         result = strset_insert(&stringset, string);
617         if(result != string) {
618                 obstack_free(&symbol_obstack, string);
619         }
620
621         lexer_token.type     = T_STRING_LITERAL;
622         lexer_token.v.string = result;
623 }
624
625 static void parse_character_constant(void)
626 {
627         eat('\'');
628
629         int found_char = 0;
630         while(1) {
631                 switch(c) {
632                 case '\\':
633                         found_char = parse_escape_sequence();
634                         break;
635
636                 MATCH_NEWLINE(
637                         parse_error("newline while parsing character constant");
638                         break;
639                 )
640
641                 case '\'':
642                         next_char();
643                         goto end_of_char_constant;
644
645                 case EOF:
646                         parse_error("EOF while parsing character constant");
647                         lexer_token.type = T_ERROR;
648                         return;
649
650                 default:
651                         if(found_char != 0) {
652                                 parse_error("more than 1 characters in character "
653                                             "constant");
654                                 goto end_of_char_constant;
655                         } else {
656                                 found_char = c;
657                                 next_char();
658                         }
659                         break;
660                 }
661         }
662
663 end_of_char_constant:
664         lexer_token.type       = T_INTEGER;
665         lexer_token.v.intvalue = found_char;
666 }
667
668 static void skip_multiline_comment(void)
669 {
670         unsigned start_linenr = lexer_token.source_position.linenr;
671
672         while(1) {
673                 switch(c) {
674                 case '*':
675                         next_char();
676                         if(c == '/') {
677                                 next_char();
678                                 return;
679                         }
680                         break;
681
682                 MATCH_NEWLINE(break;)
683
684                 case EOF:
685                         error_prefix_at(lexer_token.source_position.input_name,
686                                         start_linenr);
687                         fprintf(stderr, "at end of file while looking for comment end\n");
688                         return;
689
690                 default:
691                         next_char();
692                         break;
693                 }
694         }
695 }
696
697 static void skip_line_comment(void)
698 {
699         while(1) {
700                 switch(c) {
701                 case EOF:
702                         return;
703
704                 case '\n':
705                 case '\r':
706                         return;
707
708                 default:
709                         next_char();
710                         break;
711                 }
712         }
713 }
714
715 static token_t pp_token;
716
717 static inline void next_pp_token(void)
718 {
719         lexer_next_preprocessing_token();
720         pp_token = lexer_token;
721 }
722
723 static void eat_until_newline(void)
724 {
725         while(pp_token.type != '\n' && pp_token.type != T_EOF) {
726                 next_pp_token();
727         }
728 }
729
730 static void error_directive(void)
731 {
732         error_prefix();
733         fprintf(stderr, "#error directive: \n");
734
735         /* parse pp-tokens until new-line */
736 }
737
738 static void define_directive(void)
739 {
740         lexer_next_preprocessing_token();
741         if(lexer_token.type != T_IDENTIFIER) {
742                 parse_error("expected identifier after #define\n");
743                 eat_until_newline();
744         }
745 }
746
747 static void ifdef_directive(int is_ifndef)
748 {
749         (void) is_ifndef;
750         lexer_next_preprocessing_token();
751         //expect_identifier();
752         //extect_newline();
753 }
754
755 static void endif_directive(void)
756 {
757         //expect_newline();
758 }
759
760 static void parse_line_directive(void)
761 {
762         if(pp_token.type != T_INTEGER) {
763                 parse_error("expected integer");
764         } else {
765                 lexer_token.source_position.linenr = pp_token.v.intvalue - 1;
766                 next_pp_token();
767         }
768         if(pp_token.type == T_STRING_LITERAL) {
769                 lexer_token.source_position.input_name = pp_token.v.string;
770                 next_pp_token();
771         }
772
773         eat_until_newline();
774 }
775
776 static void parse_preprocessor_identifier(void)
777 {
778         assert(pp_token.type == T_IDENTIFIER);
779         symbol_t *symbol = pp_token.v.symbol;
780
781         switch(symbol->pp_ID) {
782         case TP_include:
783                 printf("include - enable header name parsing!\n");
784                 break;
785         case TP_define:
786                 define_directive();
787                 break;
788         case TP_ifdef:
789                 ifdef_directive(0);
790                 break;
791         case TP_ifndef:
792                 ifdef_directive(1);
793                 break;
794         case TP_endif:
795                 endif_directive();
796                 break;
797         case TP_line:
798                 next_pp_token();
799                 parse_line_directive();
800                 break;
801         case TP_if:
802         case TP_else:
803         case TP_elif:
804         case TP_undef:
805         case TP_error:
806                 error_directive();
807                 break;
808         case TP_pragma:
809                 break;
810         }
811 }
812
813 static void parse_preprocessor_directive(void)
814 {
815         next_pp_token();
816
817         switch(pp_token.type) {
818         case T_IDENTIFIER:
819                 parse_preprocessor_identifier();
820                 break;
821         case T_INTEGER:
822                 parse_line_directive();
823                 break;
824         default:
825                 parse_error("invalid preprocessor directive");
826                 eat_until_newline();
827                 break;
828         }
829 }
830
831 #define MAYBE_PROLOG                                       \
832                         next_char();                                   \
833                         while(1) {                                     \
834                                 switch(c) {
835
836 #define MAYBE(ch, set_type)                                \
837                                 case ch:                                   \
838                                         next_char();                           \
839                                         lexer_token.type = set_type;           \
840                                         return;
841
842 #define ELSE_CODE(code)                                    \
843                                 default:                                   \
844                                         code;                                  \
845                                 }                                          \
846                         } /* end of while(1) */                        \
847                         break;
848
849 #define ELSE(set_type)                                     \
850                 ELSE_CODE(                                         \
851                         lexer_token.type = set_type;                   \
852                         return;                                        \
853                 )
854
855 void lexer_next_preprocessing_token(void)
856 {
857         while(1) {
858                 switch(c) {
859                 case ' ':
860                 case '\t':
861                         next_char();
862                         break;
863
864                 MATCH_NEWLINE(
865                         lexer_token.type = '\n';
866                         return;
867                 )
868
869                 SYMBOL_CHARS
870                         parse_symbol();
871                         /* might be a wide string ( L"string" ) */
872                         if(c == '"' && (lexer_token.type == T_IDENTIFIER &&
873                            lexer_token.v.symbol == symbol_L)) {
874                                 parse_string_literal();
875                                 return;
876                         }
877                         return;
878
879                 DIGITS
880                         parse_number();
881                         return;
882
883                 case '"':
884                         parse_string_literal();
885                         return;
886
887                 case '\'':
888                         parse_character_constant();
889                         return;
890
891                 case '.':
892                         MAYBE_PROLOG
893                                 case '.':
894                                         MAYBE_PROLOG
895                                         MAYBE('.', T_DOTDOTDOT)
896                                         ELSE_CODE(
897                                                 put_back(c);
898                                                 c = '.';
899                                                 lexer_token.type = '.';
900                                                 return;
901                                         )
902                         ELSE('.')
903                 case '&':
904                         MAYBE_PROLOG
905                         MAYBE('&', T_ANDAND)
906                         MAYBE('=', T_ANDEQUAL)
907                         ELSE('&')
908                 case '*':
909                         MAYBE_PROLOG
910                         MAYBE('=', T_ASTERISKEQUAL)
911                         ELSE('*')
912                 case '+':
913                         MAYBE_PROLOG
914                         MAYBE('+', T_PLUSPLUS)
915                         MAYBE('=', T_PLUSEQUAL)
916                         ELSE('+')
917                 case '-':
918                         MAYBE_PROLOG
919                         MAYBE('>', T_MINUSGREATER)
920                         MAYBE('-', T_MINUSMINUS)
921                         MAYBE('=', T_MINUSEQUAL)
922                         ELSE('-')
923                 case '!':
924                         MAYBE_PROLOG
925                         MAYBE('=', T_EXCLAMATIONMARKEQUAL)
926                         ELSE('!')
927                 case '/':
928                         MAYBE_PROLOG
929                         MAYBE('=', T_SLASHEQUAL)
930                                 case '*':
931                                         next_char();
932                                         skip_multiline_comment();
933                                         lexer_next_preprocessing_token();
934                                         return;
935                                 case '/':
936                                         next_char();
937                                         skip_line_comment();
938                                         lexer_next_preprocessing_token();
939                                         return;
940                         ELSE('/')
941                 case '%':
942                         MAYBE_PROLOG
943                         MAYBE('>', T_PERCENTGREATER)
944                         MAYBE('=', T_PERCENTEQUAL)
945                                 case ':':
946                                         MAYBE_PROLOG
947                                                 case '%':
948                                                         MAYBE_PROLOG
949                                                         MAYBE(':', T_PERCENTCOLONPERCENTCOLON)
950                                                         ELSE_CODE(
951                                                                 put_back(c);
952                                                                 c = '%';
953                                                                 lexer_token.type = T_PERCENTCOLON;
954                                                                 return;
955                                                         )
956                                         ELSE(T_PERCENTCOLON)
957                         ELSE('%')
958                 case '<':
959                         MAYBE_PROLOG
960                         MAYBE(':', T_LESSCOLON)
961                         MAYBE('%', T_LESSPERCENT)
962                         MAYBE('=', T_LESSEQUAL)
963                                 case '<':
964                                         MAYBE_PROLOG
965                                         MAYBE('=', T_LESSLESSEQUAL)
966                                         ELSE(T_LESSLESS)
967                         ELSE('<')
968                 case '>':
969                         MAYBE_PROLOG
970                         MAYBE('=', T_GREATEREQUAL)
971                                 case '>':
972                                         MAYBE_PROLOG
973                                         MAYBE('=', T_GREATERGREATEREQUAL)
974                                         ELSE(T_GREATERGREATER)
975                         ELSE('>')
976                 case '^':
977                         MAYBE_PROLOG
978                         MAYBE('=', T_CARETEQUAL)
979                         ELSE('^')
980                 case '|':
981                         MAYBE_PROLOG
982                         MAYBE('=', T_PIPEEQUAL)
983                         MAYBE('|', T_PIPEPIPE)
984                         ELSE('|')
985                 case ':':
986                         MAYBE_PROLOG
987                         MAYBE('>', T_COLONGREATER)
988                         ELSE(':')
989                 case '=':
990                         MAYBE_PROLOG
991                         MAYBE('=', T_EQUALEQUAL)
992                         ELSE('=')
993                 case '#':
994                         MAYBE_PROLOG
995                         MAYBE('#', T_HASHHASH)
996                         ELSE('#')
997
998                 case '?':
999                 case '[':
1000                 case ']':
1001                 case '(':
1002                 case ')':
1003                 case '{':
1004                 case '}':
1005                 case '~':
1006                 case ';':
1007                 case ',':
1008                 case '\\':
1009                         lexer_token.type = c;
1010                         next_char();
1011                         return;
1012
1013                 case EOF:
1014                         lexer_token.type = T_EOF;
1015                         return;
1016
1017                 default:
1018                         next_char();
1019                         error_prefix();
1020                         fprintf(stderr, "unknown character '%c' found\n", c);
1021                         lexer_token.type = T_ERROR;
1022                         return;
1023                 }
1024         }
1025 }
1026
1027 void lexer_next_token(void)
1028 {
1029         lexer_next_preprocessing_token();
1030         if(lexer_token.type != '\n')
1031                 return;
1032
1033 newline_found:
1034         do {
1035                 lexer_next_preprocessing_token();
1036         } while(lexer_token.type == '\n');
1037
1038         if(lexer_token.type == '#') {
1039                 parse_preprocessor_directive();
1040                 goto newline_found;
1041         }
1042 }
1043
1044 void init_lexer(void)
1045 {
1046         strset_init(&stringset);
1047 }
1048
1049 void lexer_open_stream(FILE *stream, const char *input_name)
1050 {
1051         input                                  = stream;
1052         lexer_token.source_position.linenr     = 0;
1053         lexer_token.source_position.input_name = input_name;
1054
1055         symbol_L = symbol_table_insert("L");
1056
1057         /* place a virtual \n at the beginning so the lexer knows that we're
1058          * at the beginning of a line */
1059         c = '\n';
1060 }
1061
1062 void exit_lexer(void)
1063 {
1064         strset_destroy(&stringset);
1065 }
1066
1067 static __attribute__((unused))
1068 void dbg_pos(const source_position_t source_position)
1069 {
1070         fprintf(stdout, "%s:%d\n", source_position.input_name,
1071                 source_position.linenr);
1072         fflush(stdout);
1073 }