a6983280be85dceb22868846c0536e6d2cc3d7ba
[cparser] / lexer.c
1 #include <config.h>
2
3 #include "lexer.h"
4 #include "token_t.h"
5 #include "symbol_table_t.h"
6 #include "adt/error.h"
7 #include "adt/strset.h"
8 #include "adt/util.h"
9
10 #include <assert.h>
11 #include <errno.h>
12 #include <string.h>
13 #include <ctype.h>
14
15 //#define DEBUG_CHARS
16 #define MAX_PUTBACK 3
17
18 static int         c;
19 token_t            lexer_token;
20 symbol_t          *symbol_L;
21 static FILE       *input;
22 static char        buf[1024 + MAX_PUTBACK];
23 static const char *bufend;
24 static const char *bufpos;
25 static strset_t    stringset;
26
27 static void error_prefix_at(const char *input_name, unsigned linenr)
28 {
29         fprintf(stderr, "%s:%u: Error: ", input_name, linenr);
30 }
31
32 static void error_prefix(void)
33 {
34         error_prefix_at(lexer_token.source_position.input_name,
35                         lexer_token.source_position.linenr);
36 }
37
38 static void parse_error(const char *msg)
39 {
40         error_prefix();
41         fprintf(stderr, "%s\n", msg);
42 }
43
44 static inline void next_real_char(void)
45 {
46         bufpos++;
47         if(bufpos >= bufend) {
48                 size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
49                                  input);
50                 if(s == 0) {
51                         c = EOF;
52                         return;
53                 }
54                 bufpos = buf + MAX_PUTBACK;
55                 bufend = buf + MAX_PUTBACK + s;
56         }
57         c = *(bufpos);
58 }
59
60 static inline void put_back(int pc)
61 {
62         assert(bufpos >= buf);
63         assert(bufpos < buf+MAX_PUTBACK || *bufpos == pc);
64
65         char *p = buf + (bufpos - buf);
66         *p = pc;
67
68         /* going backwards in the buffer is legal as long as it's not more often
69          * than MAX_PUTBACK */
70         bufpos--;
71
72 #ifdef DEBUG_CHARS
73         printf("putback '%c'\n", pc);
74 #endif
75 }
76
77 static inline void next_char(void);
78
79 #define MATCH_NEWLINE(code)                   \
80         case '\r':                                \
81                 next_char();                          \
82                 if(c == '\n') {                       \
83                         next_char();                      \
84                 }                                     \
85                 lexer_token.source_position.linenr++; \
86                 code;                                 \
87         case '\n':                                \
88                 next_char();                          \
89                 lexer_token.source_position.linenr++; \
90                 code;
91
92 #define eat(c_type)  do { assert(c == c_type); next_char(); } while(0)
93
94 static void maybe_concat_lines(void)
95 {
96         eat('\\');
97
98         switch(c) {
99         MATCH_NEWLINE(return;)
100
101         default:
102                 break;
103         }
104
105         put_back(c);
106         c = '\\';
107 }
108
109 static inline void next_char(void)
110 {
111         next_real_char();
112
113 #if 0
114         /* filter trigraphs */
115         if(UNLIKELY(c == '\\')) {
116                 maybe_concat_lines();
117                 goto end_of_next_char;
118         }
119
120         if(LIKELY(c != '?'))
121                 goto end_of_next_char;
122
123         next_real_char();
124         if(LIKELY(c != '?')) {
125                 put_back(c);
126                 c = '?';
127                 goto end_of_next_char;
128         }
129
130         next_real_char();
131         switch(c) {
132         case '=': c = '#'; break;
133         case '(': c = '['; break;
134         case '/': c = '\\'; maybe_concat_lines(); break;
135         case ')': c = ']'; break;
136         case '\'': c = '^'; break;
137         case '<': c = '{'; break;
138         case '!': c = '|'; break;
139         case '>': c = '}'; break;
140         case '-': c = '~'; break;
141         default:
142                 put_back('?');
143                 put_back(c);
144                 c = '?';
145                 break;
146         }
147
148 end_of_next_char:;
149 #endif
150         (void) maybe_concat_lines;
151 #ifdef DEBUG_CHARS
152         printf("nchar '%c'\n", c);
153 #endif
154 }
155
156 #define SYMBOL_CHARS  \
157         case 'a':         \
158         case 'b':         \
159         case 'c':         \
160         case 'd':         \
161         case 'e':         \
162         case 'f':         \
163         case 'g':         \
164         case 'h':         \
165         case 'i':         \
166         case 'j':         \
167         case 'k':         \
168         case 'l':         \
169         case 'm':         \
170         case 'n':         \
171         case 'o':         \
172         case 'p':         \
173         case 'q':         \
174         case 'r':         \
175         case 's':         \
176         case 't':         \
177         case 'u':         \
178         case 'v':         \
179         case 'w':         \
180         case 'x':         \
181         case 'y':         \
182         case 'z':         \
183         case 'A':         \
184         case 'B':         \
185         case 'C':         \
186         case 'D':         \
187         case 'E':         \
188         case 'F':         \
189         case 'G':         \
190         case 'H':         \
191         case 'I':         \
192         case 'J':         \
193         case 'K':         \
194         case 'L':         \
195         case 'M':         \
196         case 'N':         \
197         case 'O':         \
198         case 'P':         \
199         case 'Q':         \
200         case 'R':         \
201         case 'S':         \
202         case 'T':         \
203         case 'U':         \
204         case 'V':         \
205         case 'W':         \
206         case 'X':         \
207         case 'Y':         \
208         case 'Z':         \
209         case '_':
210
211 #define DIGITS        \
212         case '0':         \
213         case '1':         \
214         case '2':         \
215         case '3':         \
216         case '4':         \
217         case '5':         \
218         case '6':         \
219         case '7':         \
220         case '8':         \
221         case '9':
222
223 static void parse_symbol(void)
224 {
225         symbol_t *symbol;
226         char     *string;
227
228         obstack_1grow(&symbol_obstack, c);
229         next_char();
230
231         while(1) {
232                 switch(c) {
233                 DIGITS
234                 SYMBOL_CHARS
235                         obstack_1grow(&symbol_obstack, c);
236                         next_char();
237                         break;
238
239                 default:
240                         goto end_symbol;
241                 }
242         }
243
244 end_symbol:
245         obstack_1grow(&symbol_obstack, '\0');
246
247         string = obstack_finish(&symbol_obstack);
248         symbol = symbol_table_insert(string);
249
250         lexer_token.type     = symbol->ID;
251         lexer_token.v.symbol = symbol;
252
253         if(symbol->string != string) {
254                 obstack_free(&symbol_obstack, string);
255         }
256 }
257
258 static void parse_integer_suffix(void)
259 {
260         if(c == 'U' || c == 'U') {
261                 /* TODO do something with the suffixes... */
262                 next_char();
263                 if(c == 'L' || c == 'l') {
264                         next_char();
265                         if(c == 'L' || c == 'l') {
266                                 next_char();
267                         }
268                 }
269         } else if(c == 'l' || c == 'L') {
270                 next_char();
271                 if(c == 'l' || c == 'L') {
272                         next_char();
273                         if(c == 'u' || c == 'U') {
274                                 next_char();
275                         }
276                 } else if(c == 'u' || c == 'U') {
277                         next_char();
278                 }
279         }
280 }
281
282 static void parse_floating_suffix(void)
283 {
284         switch(c) {
285         /* TODO: do something usefull with the suffixes... */
286         case 'f':
287         case 'F':
288         case 'l':
289         case 'L':
290                 next_char();
291                 break;
292         default:
293                 break;
294         }
295 }
296
297 static void parse_number_hex(void)
298 {
299         assert(c == 'x' || c == 'X');
300         next_char();
301
302         if (!isdigit(c) &&
303                 !('A' <= c && c <= 'F') &&
304                 !('a' <= c && c <= 'f')) {
305                 parse_error("premature end of hex number literal");
306                 lexer_token.type = T_ERROR;
307                 return;
308         }
309
310         int value = 0;
311         while(1) {
312                 if (isdigit(c)) {
313                         value = 16 * value + c - '0';
314                 } else if ('A' <= c && c <= 'F') {
315                         value = 16 * value + c - 'A' + 10;
316                 } else if ('a' <= c && c <= 'f') {
317                         value = 16 * value + c - 'a' + 10;
318                 } else {
319                         parse_integer_suffix();
320
321                         lexer_token.type       = T_INTEGER;
322                         lexer_token.v.intvalue = value;
323                         return;
324                 }
325                 next_char();
326         }
327
328         if(c == '.' || c == 'p' || c == 'P') {
329                 next_char();
330                 panic("Hex floating point numbers not implemented yet");
331         }
332 }
333
334 static void parse_number_oct(void)
335 {
336         int value = 0;
337         while(c >= '0' && c <= '7') {
338                 value = 8 * value + c - '0';
339                 next_char();
340         }
341         if (c == '8' || c == '9') {
342                 parse_error("invalid octal number");
343                 lexer_token.type = T_ERROR;
344                 return;
345         }
346
347         lexer_token.type       = T_INTEGER;
348         lexer_token.v.intvalue = value;
349
350         parse_integer_suffix();
351 }
352
353 static void parse_floatingpoint_exponent(long double value)
354 {
355         unsigned int expo = 0;
356         long double  factor = 10.;
357
358         if(c == '-') {
359                 next_char();
360                 factor = 0.1;
361         } else if(c == '+') {
362                 next_char();
363         }
364
365         while(c >= '0' && c <= '9') {
366                 expo = 10 * expo + (c - '0');
367                 next_char();
368         }
369
370         while(1) {
371                 if(expo & 1)
372                         value *= factor;
373                 expo >>= 1;
374                 if(expo == 0)
375                         break;
376                 factor *= factor;
377         }
378
379         lexer_token.type         = T_FLOATINGPOINT;
380         lexer_token.v.floatvalue = value;
381
382         parse_floating_suffix();
383 }
384
385 static void parse_floatingpoint_fract(int integer_part)
386 {
387         long double value  = integer_part;
388         long double factor = 1.;
389
390         while(c >= '0' && c <= '9') {
391                 factor *= 0.1;
392                 value  += (c - '0') * factor;
393                 next_char();
394         }
395
396         if(c == 'e' || c == 'E') {
397                 next_char();
398                 parse_floatingpoint_exponent(value);
399                 return;
400         }
401
402         lexer_token.type         = T_FLOATINGPOINT;
403         lexer_token.v.floatvalue = value;
404
405         parse_floating_suffix();
406 }
407
408 static void parse_number_dec(void)
409 {
410         int value = 0;
411
412         while(isdigit(c)) {
413                 value = 10 * value + c - '0';
414                 next_char();
415         }
416
417         if(c == '.') {
418                 next_char();
419                 parse_floatingpoint_fract(value);
420                 return;
421         }
422         if(c == 'e' || c == 'E') {
423                 next_char();
424                 parse_floatingpoint_exponent(value);
425                 return;
426         }
427         parse_integer_suffix();
428
429         lexer_token.type       = T_INTEGER;
430         lexer_token.v.intvalue = value;
431 }
432
433 static void parse_number(void)
434 {
435         if (c == '0') {
436                 next_char();
437                 switch (c) {
438                         case 'X':
439                         case 'x':
440                                 parse_number_hex();
441                                 break;
442                         case '0':
443                         case '1':
444                         case '2':
445                         case '3':
446                         case '4':
447                         case '5':
448                         case '6':
449                         case '7':
450                                 parse_number_oct();
451                                 break;
452                         case '.':
453                                 next_char();
454                                 parse_floatingpoint_fract(0);
455                                 break;
456                         case 'e':
457                         case 'E':
458                                 parse_floatingpoint_exponent(0);
459                                 break;
460                         case '8':
461                         case '9':
462                                 next_char();
463                                 parse_error("invalid octal number");
464                                 lexer_token.type = T_ERROR;
465                                 return;
466                         default:
467                                 put_back(c);
468                                 c = '0';
469                                 parse_number_dec();
470                                 return;
471                 }
472         } else {
473                 parse_number_dec();
474         }
475 }
476
477 static inline int is_octal_digit(int chr)
478 {
479         return '0' <= chr && chr <= '7';
480 }
481
482 static int parse_octal_sequence(const int first_digit)
483 {
484         assert(is_octal_digit(first_digit));
485         int value = first_digit - '0';
486         if (!is_octal_digit(c)) return value;
487         value = 8 * value + c - '0';
488         next_char();
489         if (!is_octal_digit(c)) return value;
490         value = 8 * value + c - '0';
491         next_char();
492         return value;
493 }
494
495 static int parse_hex_sequence(void)
496 {
497         int value = 0;
498         while(1) {
499                 if (c >= '0' && c <= '9') {
500                         value = 16 * value + c - '0';
501                 } else if ('A' <= c && c <= 'F') {
502                         value = 16 * value + c - 'A' + 10;
503                 } else if ('a' <= c && c <= 'f') {
504                         value = 16 * value + c - 'a' + 10;
505                 } else {
506                         break;
507                 }
508                 next_char();
509         }
510
511         return value;
512 }
513
514 static int parse_escape_sequence(void)
515 {
516         eat('\\');
517
518         int ec = c;
519         next_char();
520
521         switch(ec) {
522         case '"':  return '"';
523         case '\'': return '\'';
524         case '\\': return '\\';
525         case '?': return '\?';
526         case 'a': return '\a';
527         case 'b': return '\b';
528         case 'f': return '\f';
529         case 'n': return '\n';
530         case 'r': return '\r';
531         case 't': return '\t';
532         case 'v': return '\v';
533         case 'x':
534                 return parse_hex_sequence();
535         case '0':
536         case '1':
537         case '2':
538         case '3':
539         case '4':
540         case '5':
541         case '6':
542         case '7':
543                 return parse_octal_sequence(ec);
544         case EOF:
545                 parse_error("reached end of file while parsing escape sequence");
546                 return EOF;
547         default:
548                 parse_error("unknown escape sequence");
549                 return EOF;
550         }
551 }
552
553 const char *concat_strings(const char *s1, const char *s2)
554 {
555         size_t  len1   = strlen(s1);
556         size_t  len2   = strlen(s2);
557
558         char   *concat = obstack_alloc(&symbol_obstack, len1 + len2 + 1);
559         memcpy(concat, s1, len1);
560         memcpy(concat + len1, s2, len2 + 1);
561
562         const char *result = strset_insert(&stringset, concat);
563         if(result != concat) {
564                 obstack_free(&symbol_obstack, concat);
565         }
566
567         return result;
568 }
569
570 static void parse_string_literal(void)
571 {
572         unsigned    start_linenr = lexer_token.source_position.linenr;
573         char       *string;
574         const char *result;
575
576         assert(c == '"');
577         next_char();
578
579         int tc;
580         while(1) {
581                 switch(c) {
582                 case '\\':
583                         tc = parse_escape_sequence();
584                         obstack_1grow(&symbol_obstack, tc);
585                         break;
586
587                 case EOF:
588                         error_prefix_at(lexer_token.source_position.input_name,
589                                         start_linenr);
590                         fprintf(stderr, "string has no end\n");
591                         lexer_token.type = T_ERROR;
592                         return;
593
594                 case '"':
595                         next_char();
596                         goto end_of_string;
597
598                 default:
599                         obstack_1grow(&symbol_obstack, c);
600                         next_char();
601                         break;
602                 }
603         }
604
605 end_of_string:
606
607         /* TODO: concatenate multiple strings separated by whitespace... */
608
609         /* add finishing 0 to the string */
610         obstack_1grow(&symbol_obstack, '\0');
611         string = obstack_finish(&symbol_obstack);
612
613         /* check if there is already a copy of the string */
614         result = strset_insert(&stringset, string);
615         if(result != string) {
616                 obstack_free(&symbol_obstack, string);
617         }
618
619         lexer_token.type     = T_STRING_LITERAL;
620         lexer_token.v.string = result;
621 }
622
623 static void parse_character_constant(void)
624 {
625         eat('\'');
626
627         int found_char = 0;
628         while(1) {
629                 switch(c) {
630                 case '\\':
631                         found_char = parse_escape_sequence();
632                         break;
633
634                 MATCH_NEWLINE(
635                         parse_error("newline while parsing character constant");
636                         break;
637                 )
638
639                 case '\'':
640                         next_char();
641                         goto end_of_char_constant;
642
643                 case EOF:
644                         parse_error("EOF while parsing character constant");
645                         lexer_token.type = T_ERROR;
646                         return;
647
648                 default:
649                         if(found_char != 0) {
650                                 parse_error("more than 1 characters in character "
651                                             "constant");
652                                 goto end_of_char_constant;
653                         } else {
654                                 found_char = c;
655                                 next_char();
656                         }
657                         break;
658                 }
659         }
660
661 end_of_char_constant:
662         lexer_token.type       = T_INTEGER;
663         lexer_token.v.intvalue = found_char;
664 }
665
666 static void skip_multiline_comment(void)
667 {
668         unsigned start_linenr = lexer_token.source_position.linenr;
669
670         while(1) {
671                 switch(c) {
672                 case '*':
673                         next_char();
674                         if(c == '/') {
675                                 next_char();
676                                 return;
677                         }
678                         break;
679
680                 MATCH_NEWLINE(break;)
681
682                 case EOF:
683                         error_prefix_at(lexer_token.source_position.input_name,
684                                         start_linenr);
685                         fprintf(stderr, "at end of file while looking for comment end\n");
686                         return;
687
688                 default:
689                         next_char();
690                         break;
691                 }
692         }
693 }
694
695 static void skip_line_comment(void)
696 {
697         while(1) {
698                 switch(c) {
699                 case EOF:
700                         return;
701
702                 case '\n':
703                 case '\r':
704                         return;
705
706                 default:
707                         next_char();
708                         break;
709                 }
710         }
711 }
712
713 static token_t pp_token;
714
715 static inline void next_pp_token(void)
716 {
717         lexer_next_preprocessing_token();
718         pp_token = lexer_token;
719 }
720
721 static void eat_until_newline(void)
722 {
723         while(pp_token.type != '\n' && pp_token.type != T_EOF) {
724                 next_pp_token();
725         }
726 }
727
728 static void error_directive(void)
729 {
730         error_prefix();
731         fprintf(stderr, "#error directive: \n");
732
733         /* parse pp-tokens until new-line */
734 }
735
736 static void define_directive(void)
737 {
738         lexer_next_preprocessing_token();
739         if(lexer_token.type != T_IDENTIFIER) {
740                 parse_error("expected identifier after #define\n");
741                 eat_until_newline();
742         }
743 }
744
745 static void ifdef_directive(int is_ifndef)
746 {
747         (void) is_ifndef;
748         lexer_next_preprocessing_token();
749         //expect_identifier();
750         //extect_newline();
751 }
752
753 static void endif_directive(void)
754 {
755         //expect_newline();
756 }
757
758 static void parse_line_directive(void)
759 {
760         if(pp_token.type != T_INTEGER) {
761                 parse_error("expected integer");
762         } else {
763                 lexer_token.source_position.linenr = pp_token.v.intvalue - 1;
764                 next_pp_token();
765         }
766         if(pp_token.type == T_STRING_LITERAL) {
767                 lexer_token.source_position.input_name = pp_token.v.string;
768                 next_pp_token();
769         }
770
771         eat_until_newline();
772 }
773
774 static void parse_preprocessor_identifier(void)
775 {
776         assert(pp_token.type == T_IDENTIFIER);
777         symbol_t *symbol = pp_token.v.symbol;
778
779         switch(symbol->pp_ID) {
780         case TP_include:
781                 printf("include - enable header name parsing!\n");
782                 break;
783         case TP_define:
784                 define_directive();
785                 break;
786         case TP_ifdef:
787                 ifdef_directive(0);
788                 break;
789         case TP_ifndef:
790                 ifdef_directive(1);
791                 break;
792         case TP_endif:
793                 endif_directive();
794                 break;
795         case TP_line:
796                 next_pp_token();
797                 parse_line_directive();
798                 break;
799         case TP_if:
800         case TP_else:
801         case TP_elif:
802         case TP_undef:
803         case TP_error:
804                 error_directive();
805                 break;
806         case TP_pragma:
807                 break;
808         }
809 }
810
811 static void parse_preprocessor_directive(void)
812 {
813         next_pp_token();
814
815         switch(pp_token.type) {
816         case T_IDENTIFIER:
817                 parse_preprocessor_identifier();
818                 break;
819         case T_INTEGER:
820                 parse_line_directive();
821                 break;
822         default:
823                 parse_error("invalid preprocessor directive");
824                 eat_until_newline();
825                 break;
826         }
827 }
828
829 #define MAYBE_PROLOG                                       \
830                         next_char();                                   \
831                         while(1) {                                     \
832                                 switch(c) {
833
834 #define MAYBE(ch, set_type)                                \
835                                 case ch:                                   \
836                                         next_char();                           \
837                                         lexer_token.type = set_type;           \
838                                         return;
839
840 #define ELSE_CODE(code)                                    \
841                                 default:                                   \
842                                         code;                                  \
843                                 }                                          \
844                         } /* end of while(1) */                        \
845                         break;
846
847 #define ELSE(set_type)                                     \
848                 ELSE_CODE(                                         \
849                         lexer_token.type = set_type;                   \
850                         return;                                        \
851                 )
852
853 void lexer_next_preprocessing_token(void)
854 {
855         while(1) {
856                 switch(c) {
857                 case ' ':
858                 case '\t':
859                         next_char();
860                         break;
861
862                 MATCH_NEWLINE(
863                         lexer_token.type = '\n';
864                         return;
865                 )
866
867                 SYMBOL_CHARS
868                         parse_symbol();
869                         /* might be a wide string ( L"string" ) */
870                         if(c == '"' && (lexer_token.type == T_IDENTIFIER &&
871                            lexer_token.v.symbol == symbol_L)) {
872                                 parse_string_literal();
873                                 return;
874                         }
875                         return;
876
877                 DIGITS
878                         parse_number();
879                         return;
880
881                 case '"':
882                         parse_string_literal();
883                         return;
884
885                 case '\'':
886                         parse_character_constant();
887                         return;
888
889                 case '.':
890                         MAYBE_PROLOG
891                                 case '.':
892                                         MAYBE_PROLOG
893                                         MAYBE('.', T_DOTDOTDOT)
894                                         ELSE_CODE(
895                                                 put_back(c);
896                                                 c = '.';
897                                                 lexer_token.type = '.';
898                                                 return;
899                                         )
900                         ELSE('.')
901                 case '&':
902                         MAYBE_PROLOG
903                         MAYBE('&', T_ANDAND)
904                         MAYBE('=', T_ANDEQUAL)
905                         ELSE('&')
906                 case '*':
907                         MAYBE_PROLOG
908                         MAYBE('=', T_ASTERISKEQUAL)
909                         ELSE('*')
910                 case '+':
911                         MAYBE_PROLOG
912                         MAYBE('+', T_PLUSPLUS)
913                         MAYBE('=', T_PLUSEQUAL)
914                         ELSE('+')
915                 case '-':
916                         MAYBE_PROLOG
917                         MAYBE('>', T_MINUSGREATER)
918                         MAYBE('-', T_MINUSMINUS)
919                         MAYBE('=', T_MINUSEQUAL)
920                         ELSE('-')
921                 case '!':
922                         MAYBE_PROLOG
923                         MAYBE('=', T_EXCLAMATIONMARKEQUAL)
924                         ELSE('!')
925                 case '/':
926                         MAYBE_PROLOG
927                         MAYBE('=', T_SLASHEQUAL)
928                                 case '*':
929                                         next_char();
930                                         skip_multiline_comment();
931                                         lexer_next_preprocessing_token();
932                                         return;
933                                 case '/':
934                                         next_char();
935                                         skip_line_comment();
936                                         lexer_next_preprocessing_token();
937                                         return;
938                         ELSE('/')
939                 case '%':
940                         MAYBE_PROLOG
941                         MAYBE('>', T_PERCENTGREATER)
942                         MAYBE('=', T_PERCENTEQUAL)
943                                 case ':':
944                                         MAYBE_PROLOG
945                                                 case '%':
946                                                         MAYBE_PROLOG
947                                                         MAYBE(':', T_PERCENTCOLONPERCENTCOLON)
948                                                         ELSE_CODE(
949                                                                 put_back(c);
950                                                                 c = '%';
951                                                                 lexer_token.type = T_PERCENTCOLON;
952                                                                 return;
953                                                         )
954                                         ELSE(T_PERCENTCOLON)
955                         ELSE('%')
956                 case '<':
957                         MAYBE_PROLOG
958                         MAYBE(':', T_LESSCOLON)
959                         MAYBE('%', T_LESSPERCENT)
960                         MAYBE('=', T_LESSEQUAL)
961                                 case '<':
962                                         MAYBE_PROLOG
963                                         MAYBE('=', T_LESSLESSEQUAL)
964                                         ELSE(T_LESSLESS)
965                         ELSE('<')
966                 case '>':
967                         MAYBE_PROLOG
968                         MAYBE('=', T_GREATEREQUAL)
969                                 case '>':
970                                         MAYBE_PROLOG
971                                         MAYBE('=', T_GREATERGREATEREQUAL)
972                                         ELSE(T_GREATERGREATER)
973                         ELSE('>')
974                 case '^':
975                         MAYBE_PROLOG
976                         MAYBE('=', T_CARETEQUAL)
977                         ELSE('^')
978                 case '|':
979                         MAYBE_PROLOG
980                         MAYBE('=', T_PIPEEQUAL)
981                         MAYBE('|', T_PIPEPIPE)
982                         ELSE('|')
983                 case ':':
984                         MAYBE_PROLOG
985                         MAYBE('>', T_COLONGREATER)
986                         ELSE(':')
987                 case '=':
988                         MAYBE_PROLOG
989                         MAYBE('=', T_EQUALEQUAL)
990                         ELSE('=')
991                 case '#':
992                         MAYBE_PROLOG
993                         MAYBE('#', T_HASHHASH)
994                         ELSE('#')
995
996                 case '?':
997                 case '[':
998                 case ']':
999                 case '(':
1000                 case ')':
1001                 case '{':
1002                 case '}':
1003                 case '~':
1004                 case ';':
1005                 case ',':
1006                 case '\\':
1007                         lexer_token.type = c;
1008                         next_char();
1009                         return;
1010
1011                 case EOF:
1012                         lexer_token.type = T_EOF;
1013                         return;
1014
1015                 default:
1016                         next_char();
1017                         error_prefix();
1018                         fprintf(stderr, "unknown character '%c' found\n", c);
1019                         lexer_token.type = T_ERROR;
1020                         return;
1021                 }
1022         }
1023 }
1024
1025 void lexer_next_token(void)
1026 {
1027         lexer_next_preprocessing_token();
1028         if(lexer_token.type != '\n')
1029                 return;
1030
1031 newline_found:
1032         do {
1033                 lexer_next_preprocessing_token();
1034         } while(lexer_token.type == '\n');
1035
1036         if(lexer_token.type == '#') {
1037                 parse_preprocessor_directive();
1038                 goto newline_found;
1039         }
1040 }
1041
1042 void init_lexer(void)
1043 {
1044         strset_init(&stringset);
1045 }
1046
1047 void lexer_open_stream(FILE *stream, const char *input_name)
1048 {
1049         input                                  = stream;
1050         lexer_token.source_position.linenr     = 0;
1051         lexer_token.source_position.input_name = input_name;
1052
1053         symbol_L = symbol_table_insert("L");
1054
1055         /* place a virtual \n at the beginning so the lexer knows that we're
1056          * at the beginning of a line */
1057         c = '\n';
1058 }
1059
1060 void exit_lexer(void)
1061 {
1062         strset_destroy(&stringset);
1063 }
1064
1065 static __attribute__((unused))
1066 void dbg_pos(const source_position_t source_position)
1067 {
1068         fprintf(stdout, "%s:%d\n", source_position.input_name,
1069                 source_position.linenr);
1070         fflush(stdout);
1071 }