stricter warnings
[cparser] / lexer.c
1 #include <config.h>
2
3 #include "lexer.h"
4 #include "token_t.h"
5 #include "symbol_table_t.h"
6 #include "adt/error.h"
7 #include "adt/strset.h"
8 #include "adt/util.h"
9
10 #include <assert.h>
11 #include <errno.h>
12 #include <string.h>
13 #include <ctype.h>
14
15 //#define DEBUG_CHARS
16 #define MAX_PUTBACK 3
17
18 static int         c;
19 token_t            lexer_token;
20 static FILE       *input;
21 static char        buf[1024 + MAX_PUTBACK];
22 static const char *bufend;
23 static const char *bufpos;
24 static strset_t    stringset;
25
26 static void error_prefix_at(const char *input_name, unsigned linenr)
27 {
28         fprintf(stderr, "%s:%u: Error: ", input_name, linenr);
29 }
30
31 static void error_prefix(void)
32 {
33         error_prefix_at(lexer_token.source_position.input_name,
34                         lexer_token.source_position.linenr);
35 }
36
37 static void parse_error(const char *msg)
38 {
39         error_prefix();
40         fprintf(stderr, "%s\n", msg);
41 }
42
43 static inline void next_real_char(void)
44 {
45         bufpos++;
46         if(bufpos >= bufend) {
47                 size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
48                                  input);
49                 if(s == 0) {
50                         c = EOF;
51                         return;
52                 }
53                 bufpos = buf + MAX_PUTBACK;
54                 bufend = buf + MAX_PUTBACK + s;
55         }
56         c = *(bufpos);
57 }
58
59 static inline void put_back(int pc)
60 {
61         assert(bufpos >= buf);
62         assert(bufpos < buf+MAX_PUTBACK || *bufpos == pc);
63
64         char *p = buf + (bufpos - buf);
65         *p = pc;
66
67         /* going backwards in the buffer is legal as long as it's not more often
68          * than MAX_PUTBACK */
69         bufpos--;
70
71 #ifdef DEBUG_CHARS
72         printf("putback '%c'\n", pc);
73 #endif
74 }
75
76 static inline void next_char(void);
77
78 #define MATCH_NEWLINE(code)                   \
79         case '\r':                                \
80                 next_char();                          \
81                 if(c == '\n') {                       \
82                         next_char();                      \
83                 }                                     \
84                 lexer_token.source_position.linenr++; \
85                 code;                                 \
86         case '\n':                                \
87                 next_char();                          \
88                 lexer_token.source_position.linenr++; \
89                 code;
90
91 #define eat(c_type)  do { assert(c == c_type); next_char(); } while(0)
92
93 static void maybe_concat_lines(void)
94 {
95         eat('\\');
96
97         switch(c) {
98         MATCH_NEWLINE(return;)
99
100         default:
101                 break;
102         }
103
104         put_back(c);
105         c = '\\';
106 }
107
108 static inline void next_char(void)
109 {
110         next_real_char();
111
112 #if 0
113         /* filter trigraphs */
114         if(UNLIKELY(c == '\\')) {
115                 maybe_concat_lines();
116                 goto end_of_next_char;
117         }
118
119         if(LIKELY(c != '?'))
120                 goto end_of_next_char;
121
122         next_real_char();
123         if(LIKELY(c != '?')) {
124                 put_back(c);
125                 c = '?';
126                 goto end_of_next_char;
127         }
128
129         next_real_char();
130         switch(c) {
131         case '=': c = '#'; break;
132         case '(': c = '['; break;
133         case '/': c = '\\'; maybe_concat_lines(); break;
134         case ')': c = ']'; break;
135         case '\'': c = '^'; break;
136         case '<': c = '{'; break;
137         case '!': c = '|'; break;
138         case '>': c = '}'; break;
139         case '-': c = '~'; break;
140         default:
141                 put_back('?');
142                 put_back(c);
143                 c = '?';
144                 break;
145         }
146
147 end_of_next_char:
148 #endif
149         (void) maybe_concat_lines;
150 #ifdef DEBUG_CHARS
151         printf("nchar '%c'\n", c);
152 #else
153         ;
154 #endif
155 }
156
157 #define SYMBOL_CHARS  \
158         case 'a':         \
159         case 'b':         \
160         case 'c':         \
161         case 'd':         \
162         case 'e':         \
163         case 'f':         \
164         case 'g':         \
165         case 'h':         \
166         case 'i':         \
167         case 'j':         \
168         case 'k':         \
169         case 'l':         \
170         case 'm':         \
171         case 'n':         \
172         case 'o':         \
173         case 'p':         \
174         case 'q':         \
175         case 'r':         \
176         case 's':         \
177         case 't':         \
178         case 'u':         \
179         case 'v':         \
180         case 'w':         \
181         case 'x':         \
182         case 'y':         \
183         case 'z':         \
184         case 'A':         \
185         case 'B':         \
186         case 'C':         \
187         case 'D':         \
188         case 'E':         \
189         case 'F':         \
190         case 'G':         \
191         case 'H':         \
192         case 'I':         \
193         case 'J':         \
194         case 'K':         \
195         case 'L':         \
196         case 'M':         \
197         case 'N':         \
198         case 'O':         \
199         case 'P':         \
200         case 'Q':         \
201         case 'R':         \
202         case 'S':         \
203         case 'T':         \
204         case 'U':         \
205         case 'V':         \
206         case 'W':         \
207         case 'X':         \
208         case 'Y':         \
209         case 'Z':         \
210         case '_':
211
212 #define DIGITS        \
213         case '0':         \
214         case '1':         \
215         case '2':         \
216         case '3':         \
217         case '4':         \
218         case '5':         \
219         case '6':         \
220         case '7':         \
221         case '8':         \
222         case '9':
223
224 static void parse_symbol(void)
225 {
226         symbol_t *symbol;
227         char     *string;
228
229         obstack_1grow(&symbol_obstack, c);
230         next_char();
231
232         while(1) {
233                 switch(c) {
234                 DIGITS
235                 SYMBOL_CHARS
236                         obstack_1grow(&symbol_obstack, c);
237                         next_char();
238                         break;
239
240                 default:
241                         goto end_symbol;
242                 }
243         }
244
245 end_symbol:
246         obstack_1grow(&symbol_obstack, '\0');
247
248         string = obstack_finish(&symbol_obstack);
249         symbol = symbol_table_insert(string);
250
251         lexer_token.type     = symbol->ID;
252         lexer_token.v.symbol = symbol;
253
254         if(symbol->string != string) {
255                 obstack_free(&symbol_obstack, string);
256         }
257 }
258
259 static void parse_integer_suffix(void)
260 {
261         if(c == 'U' || c == 'U') {
262                 /* TODO do something with the suffixes... */
263                 next_char();
264                 if(c == 'L' || c == 'l') {
265                         next_char();
266                         if(c == 'L' || c == 'l') {
267                                 next_char();
268                         }
269                 }
270         } else if(c == 'l' || c == 'L') {
271                 next_char();
272                 if(c == 'l' || c == 'L') {
273                         next_char();
274                         if(c == 'u' || c == 'U') {
275                                 next_char();
276                         }
277                 } else if(c == 'u' || c == 'U') {
278                         next_char();
279                 }
280         }
281 }
282
283 static void parse_floating_suffix(void)
284 {
285         switch(c) {
286         /* TODO: do something usefull with the suffixes... */
287         case 'f':
288         case 'F':
289         case 'l':
290         case 'L':
291                 next_char();
292                 break;
293         default:
294                 break;
295         }
296 }
297
298 static void parse_number_hex(void)
299 {
300         assert(c == 'x' || c == 'X');
301         next_char();
302
303         if (!isdigit(c) &&
304                 !('A' <= c && c <= 'F') &&
305                 !('a' <= c && c <= 'f')) {
306                 parse_error("premature end of hex number literal");
307                 lexer_token.type = T_ERROR;
308                 return;
309         }
310
311         int value = 0;
312         while(1) {
313                 if (isdigit(c)) {
314                         value = 16 * value + c - '0';
315                 } else if ('A' <= c && c <= 'F') {
316                         value = 16 * value + c - 'A' + 10;
317                 } else if ('a' <= c && c <= 'f') {
318                         value = 16 * value + c - 'a' + 10;
319                 } else {
320                         parse_integer_suffix();
321
322                         lexer_token.type       = T_INTEGER;
323                         lexer_token.v.intvalue = value;
324                         return;
325                 }
326                 next_char();
327         }
328
329         if(c == '.' || c == 'p' || c == 'P') {
330                 next_char();
331                 panic("Hex floating point numbers not implemented yet");
332         }
333 }
334
335 static void parse_number_oct(void)
336 {
337         int value = 0;
338         while(c >= '0' && c <= '7') {
339                 value = 8 * value + c - '0';
340                 next_char();
341         }
342         if (c == '8' || c == '9') {
343                 parse_error("invalid octal number");
344                 lexer_token.type = T_ERROR;
345                 return;
346         }
347
348         lexer_token.type       = T_INTEGER;
349         lexer_token.v.intvalue = value;
350
351         parse_integer_suffix();
352 }
353
354 static void parse_floatingpoint_exponent(long double value)
355 {
356         unsigned int expo = 0;
357         long double  factor = 10.;
358
359         if(c == '-') {
360                 next_char();
361                 factor = 0.1;
362         } else if(c == '+') {
363                 next_char();
364         }
365
366         while(c >= '0' && c <= '9') {
367                 expo = 10 * expo + (c - '0');
368                 next_char();
369         }
370
371         while(1) {
372                 if(expo & 1)
373                         value *= factor;
374                 expo >>= 1;
375                 if(expo == 0)
376                         break;
377                 factor *= factor;
378         }
379
380         lexer_token.type         = T_FLOATINGPOINT;
381         lexer_token.v.floatvalue = value;
382
383         parse_floating_suffix();
384 }
385
386 static void parse_floatingpoint_fract(int integer_part)
387 {
388         long double value  = integer_part;
389         long double factor = 1.;
390
391         while(c >= '0' && c <= '9') {
392                 factor *= 0.1;
393                 value  += (c - '0') * factor;
394                 next_char();
395         }
396
397         if(c == 'e' || c == 'E') {
398                 next_char();
399                 parse_floatingpoint_exponent(value);
400                 return;
401         }
402
403         lexer_token.type         = T_FLOATINGPOINT;
404         lexer_token.v.floatvalue = value;
405
406         parse_floating_suffix();
407 }
408
409 static void parse_number_dec(void)
410 {
411         int value = 0;
412
413         while(isdigit(c)) {
414                 value = 10 * value + c - '0';
415                 next_char();
416         }
417
418         if(c == '.') {
419                 next_char();
420                 parse_floatingpoint_fract(value);
421                 return;
422         }
423         if(c == 'e' || c == 'E') {
424                 next_char();
425                 parse_floatingpoint_exponent(value);
426                 return;
427         }
428         parse_integer_suffix();
429
430         lexer_token.type       = T_INTEGER;
431         lexer_token.v.intvalue = value;
432 }
433
434 static void parse_number(void)
435 {
436         if (c == '0') {
437                 next_char();
438                 switch (c) {
439                         case 'X':
440                         case 'x':
441                                 parse_number_hex();
442                                 break;
443                         case '0':
444                         case '1':
445                         case '2':
446                         case '3':
447                         case '4':
448                         case '5':
449                         case '6':
450                         case '7':
451                                 parse_number_oct();
452                                 break;
453                         case '.':
454                                 next_char();
455                                 parse_floatingpoint_fract(0);
456                                 break;
457                         case 'e':
458                         case 'E':
459                                 parse_floatingpoint_exponent(0);
460                                 break;
461                         case '8':
462                         case '9':
463                                 next_char();
464                                 parse_error("invalid octal number");
465                                 lexer_token.type = T_ERROR;
466                                 return;
467                         default:
468                                 put_back(c);
469                                 c = '0';
470                                 parse_number_dec();
471                                 return;
472                 }
473         } else {
474                 parse_number_dec();
475         }
476 }
477
478 static int parse_octal_sequence(void)
479 {
480         int value = 0;
481         while(1) {
482                 if(c < '0' || c > '7')
483                         break;
484                 value = 8 * value + c - '0';
485                 next_char();
486         }
487
488         return value;
489 }
490
491 static int parse_hex_sequence(void)
492 {
493         int value = 0;
494         while(1) {
495                 if (c >= '0' && c <= '9') {
496                         value = 16 * value + c - '0';
497                 } else if ('A' <= c && c <= 'F') {
498                         value = 16 * value + c - 'A' + 10;
499                 } else if ('a' <= c && c <= 'f') {
500                         value = 16 * value + c - 'a' + 10;
501                 } else {
502                         break;
503                 }
504                 next_char();
505         }
506
507         return value;
508 }
509
510 static int parse_escape_sequence(void)
511 {
512         eat('\\');
513
514         int ec = c;
515         next_char();
516
517         switch(ec) {
518         case '"':  return '"';
519         case '\'': return'\'';
520         case '\\': return '\\';
521         case '?': return '\?';
522         case 'a': return '\a';
523         case 'b': return '\b';
524         case 'f': return '\f';
525         case 'n': return '\n';
526         case 'r': return '\r';
527         case 't': return '\t';
528         case 'v': return '\v';
529         case 'x':
530                 return parse_hex_sequence();
531         case '0':
532         case '1':
533         case '2':
534         case '3':
535         case '4':
536         case '5':
537         case '6':
538         case '7':
539                 return parse_octal_sequence();
540         case EOF:
541                 parse_error("reached end of file while parsing escape sequence");
542                 return EOF;
543         default:
544                 parse_error("unknown escape sequence");
545                 return EOF;
546         }
547 }
548
549 const char *concat_strings(const char *s1, const char *s2)
550 {
551         size_t  len1   = strlen(s1);
552         size_t  len2   = strlen(s2);
553
554         char   *concat = obstack_alloc(&symbol_obstack, len1 + len2 + 1);
555         memcpy(concat, s1, len1);
556         memcpy(concat + len1, s2, len2 + 1);
557
558         const char *result = strset_insert(&stringset, concat);
559         if(result != concat) {
560                 obstack_free(&symbol_obstack, concat);
561         }
562
563         return result;
564 }
565
566 static void parse_string_literal(void)
567 {
568         unsigned    start_linenr = lexer_token.source_position.linenr;
569         char       *string;
570         const char *result;
571
572         assert(c == '"');
573         next_char();
574
575         int tc;
576         while(1) {
577                 switch(c) {
578                 case '\\':
579                         tc = parse_escape_sequence();
580                         obstack_1grow(&symbol_obstack, tc);
581                         break;
582
583                 case EOF:
584                         error_prefix_at(lexer_token.source_position.input_name,
585                                         start_linenr);
586                         fprintf(stderr, "string has no end\n");
587                         lexer_token.type = T_ERROR;
588                         return;
589
590                 case '"':
591                         next_char();
592                         goto end_of_string;
593
594                 default:
595                         obstack_1grow(&symbol_obstack, c);
596                         next_char();
597                         break;
598                 }
599         }
600
601 end_of_string:
602
603         /* TODO: concatenate multiple strings separated by whitespace... */
604
605         /* add finishing 0 to the string */
606         obstack_1grow(&symbol_obstack, '\0');
607         string = obstack_finish(&symbol_obstack);
608
609         /* check if there is already a copy of the string */
610         result = strset_insert(&stringset, string);
611         if(result != string) {
612                 obstack_free(&symbol_obstack, string);
613         }
614
615         lexer_token.type     = T_STRING_LITERAL;
616         lexer_token.v.string = result;
617 }
618
619 static void parse_character_constant(void)
620 {
621         eat('\'');
622
623         int found_char = 0;
624         while(1) {
625                 switch(c) {
626                 case '\\':
627                         found_char = parse_escape_sequence();
628                         break;
629
630                 MATCH_NEWLINE(
631                         parse_error("newline while parsing character constant");
632                         break;
633                 )
634
635                 case '\'':
636                         next_char();
637                         goto end_of_char_constant;
638
639                 case EOF:
640                         parse_error("EOF while parsing character constant");
641                         lexer_token.type = T_ERROR;
642                         return;
643
644                 default:
645                         if(found_char != 0) {
646                                 parse_error("more than 1 characters in character "
647                                             "constant");
648                                 goto end_of_char_constant;
649                         } else {
650                                 found_char = c;
651                                 next_char();
652                         }
653                         break;
654                 }
655         }
656
657 end_of_char_constant:
658         lexer_token.type       = T_INTEGER;
659         lexer_token.v.intvalue = found_char;
660 }
661
662 static void skip_multiline_comment(void)
663 {
664         unsigned start_linenr = lexer_token.source_position.linenr;
665
666         while(1) {
667                 switch(c) {
668                 case '*':
669                         next_char();
670                         if(c == '/') {
671                                 next_char();
672                                 return;
673                         }
674                         break;
675
676                 MATCH_NEWLINE(break;)
677
678                 case EOF:
679                         error_prefix_at(lexer_token.source_position.input_name,
680                                         start_linenr);
681                         fprintf(stderr, "at end of file while looking for comment end\n");
682                         return;
683
684                 default:
685                         next_char();
686                         break;
687                 }
688         }
689 }
690
691 static void skip_line_comment(void)
692 {
693         while(1) {
694                 switch(c) {
695                 case EOF:
696                         return;
697
698                 case '\n':
699                 case '\r':
700                         return;
701
702                 default:
703                         next_char();
704                         break;
705                 }
706         }
707 }
708
709 static token_t pp_token;
710
711 static inline void next_pp_token(void)
712 {
713         lexer_next_preprocessing_token();
714         pp_token = lexer_token;
715 }
716
717 static void eat_until_newline(void)
718 {
719         while(pp_token.type != '\n' && pp_token.type != T_EOF) {
720                 next_pp_token();
721         }
722 }
723
724 static void error_directive(void)
725 {
726         error_prefix();
727         fprintf(stderr, "#error directive: \n");
728
729         /* parse pp-tokens until new-line */
730 }
731
732 static void define_directive(void)
733 {
734         lexer_next_preprocessing_token();
735         if(lexer_token.type != T_IDENTIFIER) {
736                 parse_error("expected identifier after #define\n");
737                 eat_until_newline();
738         }
739 }
740
741 static void ifdef_directive(int is_ifndef)
742 {
743         (void) is_ifndef;
744         lexer_next_preprocessing_token();
745         //expect_identifier();
746         //extect_newline();
747 }
748
749 static void endif_directive(void)
750 {
751         //expect_newline();
752 }
753
754 static void parse_line_directive(void)
755 {
756         if(pp_token.type != T_INTEGER) {
757                 parse_error("expected integer");
758         } else {
759                 lexer_token.source_position.linenr = pp_token.v.intvalue - 1;
760                 next_pp_token();
761         }
762         if(pp_token.type == T_STRING_LITERAL) {
763                 lexer_token.source_position.input_name = pp_token.v.string;
764                 next_pp_token();
765         }
766
767         eat_until_newline();
768 }
769
770 static void parse_preprocessor_identifier(void)
771 {
772         assert(pp_token.type == T_IDENTIFIER);
773         symbol_t *symbol = pp_token.v.symbol;
774
775         switch(symbol->pp_ID) {
776         case TP_include:
777                 printf("include - enable header name parsing!\n");
778                 break;
779         case TP_define:
780                 define_directive();
781                 break;
782         case TP_ifdef:
783                 ifdef_directive(0);
784                 break;
785         case TP_ifndef:
786                 ifdef_directive(1);
787                 break;
788         case TP_endif:
789                 endif_directive();
790                 break;
791         case TP_line:
792                 next_pp_token();
793                 parse_line_directive();
794                 break;
795         case TP_if:
796         case TP_else:
797         case TP_elif:
798         case TP_undef:
799         case TP_error:
800                 error_directive();
801                 break;
802         case TP_pragma:
803                 break;
804         }
805 }
806
807 static void parse_preprocessor_directive(void)
808 {
809         next_pp_token();
810
811         switch(pp_token.type) {
812         case T_IDENTIFIER:
813                 parse_preprocessor_identifier();
814                 break;
815         case T_INTEGER:
816                 parse_line_directive();
817                 break;
818         default:
819                 parse_error("invalid preprocessor directive");
820                 eat_until_newline();
821                 break;
822         }
823 }
824
825 #define MAYBE_PROLOG                                       \
826                         next_char();                                   \
827                         while(1) {                                     \
828                                 switch(c) {
829
830 #define MAYBE(ch, set_type)                                \
831                                 case ch:                                   \
832                                         next_char();                           \
833                                         lexer_token.type = set_type;           \
834                                         return;
835
836 #define ELSE_CODE(code)                                    \
837                                 default:                                   \
838                                         code;                                  \
839                                 }                                          \
840                         } /* end of while(1) */                        \
841                         break;
842
843 #define ELSE(set_type)                                     \
844                 ELSE_CODE(                                         \
845                         lexer_token.type = set_type;                   \
846                         return;                                        \
847                 )
848
849 void lexer_next_preprocessing_token(void)
850 {
851         while(1) {
852                 switch(c) {
853                 case ' ':
854                 case '\t':
855                         next_char();
856                         break;
857
858                 MATCH_NEWLINE(
859                         lexer_token.type = '\n';
860                         return;
861                 )
862
863                 SYMBOL_CHARS
864                         parse_symbol();
865                         return;
866
867                 DIGITS
868                         parse_number();
869                         return;
870
871                 case '"':
872                         parse_string_literal();
873                         return;
874
875                 case '\'':
876                         parse_character_constant();
877                         return;
878
879                 case '.':
880                         MAYBE_PROLOG
881                                 case '.':
882                                         MAYBE_PROLOG
883                                         MAYBE('.', T_DOTDOTDOT)
884                                         ELSE_CODE(
885                                                 put_back(c);
886                                                 c = '.';
887                                                 lexer_token.type = '.';
888                                                 return;
889                                         )
890                         ELSE('.')
891                 case '&':
892                         MAYBE_PROLOG
893                         MAYBE('&', T_ANDAND)
894                         MAYBE('=', T_ANDEQUAL)
895                         ELSE('&')
896                 case '*':
897                         MAYBE_PROLOG
898                         MAYBE('=', T_ASTERISKEQUAL)
899                         ELSE('*')
900                 case '+':
901                         MAYBE_PROLOG
902                         MAYBE('+', T_PLUSPLUS)
903                         MAYBE('=', T_PLUSEQUAL)
904                         ELSE('+')
905                 case '-':
906                         MAYBE_PROLOG
907                         MAYBE('>', T_MINUSGREATER)
908                         MAYBE('-', T_MINUSMINUS)
909                         MAYBE('=', T_MINUSEQUAL)
910                         ELSE('-')
911                 case '!':
912                         MAYBE_PROLOG
913                         MAYBE('=', T_EXCLAMATIONMARKEQUAL)
914                         ELSE('!')
915                 case '/':
916                         MAYBE_PROLOG
917                         MAYBE('=', T_SLASHEQUAL)
918                                 case '*':
919                                         next_char();
920                                         skip_multiline_comment();
921                                         lexer_next_preprocessing_token();
922                                         return;
923                                 case '/':
924                                         next_char();
925                                         skip_line_comment();
926                                         lexer_next_preprocessing_token();
927                                         return;
928                         ELSE('/')
929                 case '%':
930                         MAYBE_PROLOG
931                         MAYBE('>', T_PERCENTGREATER)
932                         MAYBE('=', T_PERCENTEQUAL)
933                                 case ':':
934                                         MAYBE_PROLOG
935                                                 case '%':
936                                                         MAYBE_PROLOG
937                                                         MAYBE(':', T_PERCENTCOLONPERCENTCOLON)
938                                                         ELSE_CODE(
939                                                                 put_back(c);
940                                                                 c = '%';
941                                                                 lexer_token.type = T_PERCENTCOLON;
942                                                                 return;
943                                                         )
944                                         ELSE(T_PERCENTCOLON)
945                         ELSE('%')
946                 case '<':
947                         MAYBE_PROLOG
948                         MAYBE(':', T_LESSCOLON)
949                         MAYBE('%', T_LESSPERCENT)
950                         MAYBE('=', T_LESSEQUAL)
951                                 case '<':
952                                         MAYBE_PROLOG
953                                         MAYBE('=', T_LESSLESSEQUAL)
954                                         ELSE(T_LESSLESS)
955                         ELSE('<')
956                 case '>':
957                         MAYBE_PROLOG
958                         MAYBE('=', T_GREATEREQUAL)
959                                 case '>':
960                                         MAYBE_PROLOG
961                                         MAYBE('=', T_GREATERGREATEREQUAL)
962                                         ELSE(T_GREATERGREATER)
963                         ELSE('>')
964                 case '^':
965                         MAYBE_PROLOG
966                         MAYBE('=', T_CARETEQUAL)
967                         ELSE('^')
968                 case '|':
969                         MAYBE_PROLOG
970                         MAYBE('=', T_PIPEEQUAL)
971                         MAYBE('|', T_PIPEPIPE)
972                         ELSE('|')
973                 case ':':
974                         MAYBE_PROLOG
975                         MAYBE('>', T_COLONGREATER)
976                         ELSE(':')
977                 case '=':
978                         MAYBE_PROLOG
979                         MAYBE('=', T_EQUALEQUAL)
980                         ELSE('=')
981                 case '#':
982                         MAYBE_PROLOG
983                         MAYBE('#', T_HASHHASH)
984                         ELSE('#')
985
986                 case '?':
987                 case '[':
988                 case ']':
989                 case '(':
990                 case ')':
991                 case '{':
992                 case '}':
993                 case '~':
994                 case ';':
995                 case ',':
996                 case '\\':
997                         lexer_token.type = c;
998                         next_char();
999                         return;
1000
1001                 case EOF:
1002                         lexer_token.type = T_EOF;
1003                         return;
1004
1005                 default:
1006                         next_char();
1007                         error_prefix();
1008                         fprintf(stderr, "unknown character '%c' found\n", c);
1009                         lexer_token.type = T_ERROR;
1010                         return;
1011                 }
1012         }
1013 }
1014
1015 void lexer_next_token(void)
1016 {
1017         lexer_next_preprocessing_token();
1018         if(lexer_token.type != '\n')
1019                 return;
1020
1021 newline_found:
1022         do {
1023                 lexer_next_preprocessing_token();
1024         } while(lexer_token.type == '\n');
1025
1026         if(lexer_token.type == '#') {
1027                 parse_preprocessor_directive();
1028                 goto newline_found;
1029         }
1030 }
1031
1032 void init_lexer(void)
1033 {
1034         strset_init(&stringset);
1035 }
1036
1037 void lexer_open_stream(FILE *stream, const char *input_name)
1038 {
1039         input                                  = stream;
1040         lexer_token.source_position.linenr     = 0;
1041         lexer_token.source_position.input_name = input_name;
1042
1043         /* place a virtual \n at the beginning so the lexer knows that we're
1044          * at the beginning of a line */
1045         c = '\n';
1046 }
1047
1048 void exit_lexer(void)
1049 {
1050         strset_destroy(&stringset);
1051 }
1052
1053 static __attribute__((unused))
1054 void dbg_pos(const source_position_t source_position)
1055 {
1056         fprintf(stdout, "%s:%d\n", source_position.input_name,
1057                 source_position.linenr);
1058         fflush(stdout);
1059 }