Parse an integer suffix after a hex number.
[cparser] / lexer.c
1 #include <config.h>
2
3 #include "lexer.h"
4 #include "token_t.h"
5 #include "symbol_table_t.h"
6 #include "adt/error.h"
7 #include "adt/strset.h"
8 #include "adt/util.h"
9 #include "type_t.h"
10
11 #include <assert.h>
12 #include <errno.h>
13 #include <string.h>
14 #include <stdbool.h>
15 #include <ctype.h>
16
17 //#define DEBUG_CHARS
18 #define MAX_PUTBACK 3
19
20 static int         c;
21 token_t            lexer_token;
22 symbol_t          *symbol_L;
23 static FILE       *input;
24 static char        buf[1024 + MAX_PUTBACK];
25 static const char *bufend;
26 static const char *bufpos;
27 static strset_t    stringset;
28
29 static type_t     *type_int        = NULL;
30 static type_t     *type_uint       = NULL;
31 static type_t     *type_long       = NULL;
32 static type_t     *type_ulong      = NULL;
33 static type_t     *type_longlong   = NULL;
34 static type_t     *type_ulonglong  = NULL;
35 static type_t     *type_float      = NULL;
36 static type_t     *type_double     = NULL;
37 static type_t     *type_longdouble = NULL;
38
39 static void error_prefix_at(const char *input_name, unsigned linenr)
40 {
41         fprintf(stderr, "%s:%u: Error: ", input_name, linenr);
42 }
43
44 static void error_prefix(void)
45 {
46         error_prefix_at(lexer_token.source_position.input_name,
47                         lexer_token.source_position.linenr);
48 }
49
50 static void parse_error(const char *msg)
51 {
52         error_prefix();
53         fprintf(stderr, "%s\n", msg);
54 }
55
56 static inline void next_real_char(void)
57 {
58         bufpos++;
59         if(bufpos >= bufend) {
60                 size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
61                                  input);
62                 if(s == 0) {
63                         c = EOF;
64                         return;
65                 }
66                 bufpos = buf + MAX_PUTBACK;
67                 bufend = buf + MAX_PUTBACK + s;
68         }
69         c = *(bufpos);
70 }
71
72 static inline void put_back(int pc)
73 {
74         assert(bufpos >= buf);
75         //assert(bufpos < buf+MAX_PUTBACK || *bufpos == pc);
76
77         char *p = buf + (bufpos - buf);
78         *p = pc;
79
80         /* going backwards in the buffer is legal as long as it's not more often
81          * than MAX_PUTBACK */
82         bufpos--;
83
84 #ifdef DEBUG_CHARS
85         printf("putback '%c'\n", pc);
86 #endif
87 }
88
89 static inline void next_char(void);
90
91 #define MATCH_NEWLINE(code)                   \
92         case '\r':                                \
93                 next_char();                          \
94                 if(c == '\n') {                       \
95                         next_char();                      \
96                 }                                     \
97                 lexer_token.source_position.linenr++; \
98                 code;                                 \
99         case '\n':                                \
100                 next_char();                          \
101                 lexer_token.source_position.linenr++; \
102                 code;
103
104 #define eat(c_type)  do { assert(c == c_type); next_char(); } while(0)
105
106 static void maybe_concat_lines(void)
107 {
108         eat('\\');
109
110         switch(c) {
111         MATCH_NEWLINE(return;)
112
113         default:
114                 break;
115         }
116
117         put_back(c);
118         c = '\\';
119 }
120
121 static inline void next_char(void)
122 {
123         next_real_char();
124
125         /* filter trigraphs */
126         if(UNLIKELY(c == '\\')) {
127                 maybe_concat_lines();
128                 goto end_of_next_char;
129         }
130
131         if(LIKELY(c != '?'))
132                 goto end_of_next_char;
133
134         next_real_char();
135         if(LIKELY(c != '?')) {
136                 put_back(c);
137                 c = '?';
138                 goto end_of_next_char;
139         }
140
141         next_real_char();
142         switch(c) {
143         case '=': c = '#'; break;
144         case '(': c = '['; break;
145         case '/': c = '\\'; maybe_concat_lines(); break;
146         case ')': c = ']'; break;
147         case '\'': c = '^'; break;
148         case '<': c = '{'; break;
149         case '!': c = '|'; break;
150         case '>': c = '}'; break;
151         case '-': c = '~'; break;
152         default:
153                 put_back('?');
154                 put_back(c);
155                 c = '?';
156                 break;
157         }
158
159 end_of_next_char:;
160 #ifdef DEBUG_CHARS
161         printf("nchar '%c'\n", c);
162 #endif
163 }
164
165 #define SYMBOL_CHARS  \
166         case 'a':         \
167         case 'b':         \
168         case 'c':         \
169         case 'd':         \
170         case 'e':         \
171         case 'f':         \
172         case 'g':         \
173         case 'h':         \
174         case 'i':         \
175         case 'j':         \
176         case 'k':         \
177         case 'l':         \
178         case 'm':         \
179         case 'n':         \
180         case 'o':         \
181         case 'p':         \
182         case 'q':         \
183         case 'r':         \
184         case 's':         \
185         case 't':         \
186         case 'u':         \
187         case 'v':         \
188         case 'w':         \
189         case 'x':         \
190         case 'y':         \
191         case 'z':         \
192         case 'A':         \
193         case 'B':         \
194         case 'C':         \
195         case 'D':         \
196         case 'E':         \
197         case 'F':         \
198         case 'G':         \
199         case 'H':         \
200         case 'I':         \
201         case 'J':         \
202         case 'K':         \
203         case 'L':         \
204         case 'M':         \
205         case 'N':         \
206         case 'O':         \
207         case 'P':         \
208         case 'Q':         \
209         case 'R':         \
210         case 'S':         \
211         case 'T':         \
212         case 'U':         \
213         case 'V':         \
214         case 'W':         \
215         case 'X':         \
216         case 'Y':         \
217         case 'Z':         \
218         case '_':
219
220 #define DIGITS        \
221         case '0':         \
222         case '1':         \
223         case '2':         \
224         case '3':         \
225         case '4':         \
226         case '5':         \
227         case '6':         \
228         case '7':         \
229         case '8':         \
230         case '9':
231
232 static void parse_symbol(void)
233 {
234         symbol_t *symbol;
235         char     *string;
236
237         obstack_1grow(&symbol_obstack, c);
238         next_char();
239
240         while(1) {
241                 switch(c) {
242                 DIGITS
243                 SYMBOL_CHARS
244                         obstack_1grow(&symbol_obstack, c);
245                         next_char();
246                         break;
247
248                 default:
249                         goto end_symbol;
250                 }
251         }
252
253 end_symbol:
254         obstack_1grow(&symbol_obstack, '\0');
255
256         string = obstack_finish(&symbol_obstack);
257         symbol = symbol_table_insert(string);
258
259         lexer_token.type     = symbol->ID;
260         lexer_token.v.symbol = symbol;
261
262         if(symbol->string != string) {
263                 obstack_free(&symbol_obstack, string);
264         }
265 }
266
267 static void parse_integer_suffix(void)
268 {
269         if(c == 'U' || c == 'U') {
270                 next_char();
271                 if(c == 'L' || c == 'l') {
272                         next_char();
273                         if(c == 'L' || c == 'l') {
274                                 next_char();
275                                 lexer_token.datatype = type_ulonglong;
276                         } else {
277                                 lexer_token.datatype = type_ulong;
278                         }
279                 } else {
280                         lexer_token.datatype = type_uint;
281                 }
282         } else if(c == 'l' || c == 'L') {
283                 next_char();
284                 if(c == 'l' || c == 'L') {
285                         next_char();
286                         if(c == 'u' || c == 'U') {
287                                 next_char();
288                                 lexer_token.datatype = type_ulonglong;
289                         } else {
290                                 lexer_token.datatype = type_longlong;
291                         }
292                 } else if(c == 'u' || c == 'U') {
293                         next_char();
294                         lexer_token.datatype = type_ulong;
295                 } else {
296                         lexer_token.datatype = type_int;
297                 }
298         } else {
299                 lexer_token.datatype = type_int;
300         }
301 }
302
303 static void parse_floating_suffix(void)
304 {
305         switch(c) {
306         /* TODO: do something usefull with the suffixes... */
307         case 'f':
308         case 'F':
309                 next_char();
310                 lexer_token.datatype = type_float;
311                 break;
312         case 'l':
313         case 'L':
314                 next_char();
315                 lexer_token.datatype = type_longdouble;
316                 break;
317         default:
318                 lexer_token.datatype = type_double;
319                 break;
320         }
321 }
322
323 static inline bool is_hex_digit(int c)
324 {
325         return (c >= '0' && c <= '9')
326                         || (c >= 'a' && c <= 'f')
327                         || (c >= 'A' && c <= 'F');
328 }
329
330 static void parse_number_hex(void)
331 {
332         assert(c == 'x' || c == 'X');
333         next_char();
334
335         while(is_hex_digit(c)) {
336                 obstack_1grow(&symbol_obstack, c);
337                 next_char();
338         }
339         obstack_1grow(&symbol_obstack, '\0');
340         char *string = obstack_finish(&symbol_obstack);
341
342         if(c == '.' || c == 'p' || c == 'P') {
343                 next_char();
344                 panic("Hex floating point numbers not implemented yet");
345         }
346         if(*string == '\0') {
347                 parse_error("invalid hex number");
348                 lexer_token.type = T_ERROR;
349         }
350
351         char *endptr;
352         lexer_token.type       = T_INTEGER;
353         lexer_token.v.intvalue = strtoull(string, &endptr, 16);
354         if(*endptr != '\0') {
355                 parse_error("hex number literal too long");
356         }
357
358         obstack_free(&symbol_obstack, string);
359         parse_integer_suffix();
360 }
361
362 static inline bool is_octal_digit(int chr)
363 {
364         return '0' <= chr && chr <= '7';
365 }
366
367 static void parse_number_oct(void)
368 {
369         while(is_octal_digit(c)) {
370                 obstack_1grow(&symbol_obstack, c);
371                 next_char();
372         }
373         obstack_1grow(&symbol_obstack, '\0');
374         char *string = obstack_finish(&symbol_obstack);
375
376         char *endptr;
377         lexer_token.type       = T_INTEGER;
378         lexer_token.v.intvalue = strtoull(string, &endptr, 8);
379         if(*endptr != '\0') {
380                 parse_error("octal number literal too long");
381         }
382
383         obstack_free(&symbol_obstack, string);
384         parse_integer_suffix();
385 }
386
387 static void parse_number_dec(void)
388 {
389         bool is_float = false;
390         while(isdigit(c)) {
391                 obstack_1grow(&symbol_obstack, c);
392                 next_char();
393         }
394
395         if(c == '.') {
396                 obstack_1grow(&symbol_obstack, '.');
397                 next_char();
398
399                 while(isdigit(c)) {
400                         obstack_1grow(&symbol_obstack, c);
401                         next_char();
402                 }
403                 is_float = true;
404         }
405         if(c == 'e' || c == 'E') {
406                 obstack_1grow(&symbol_obstack, 'e');
407                 next_char();
408
409                 if(c == '-' || c == '+') {
410                         obstack_1grow(&symbol_obstack, c);
411                         next_char();
412                 }
413
414                 while(isdigit(c)) {
415                         obstack_1grow(&symbol_obstack, c);
416                         next_char();
417                 }
418                 is_float = true;
419         }
420
421         obstack_1grow(&symbol_obstack, '\0');
422         char *string = obstack_finish(&symbol_obstack);
423
424         char *endptr;
425         if(is_float) {
426                 lexer_token.type         = T_FLOATINGPOINT;
427                 lexer_token.v.floatvalue = strtold(string, &endptr);
428
429                 if(*endptr != '\0') {
430                         parse_error("invalid number literal");
431                 }
432
433                 parse_floating_suffix();
434         } else {
435                 lexer_token.type       = T_INTEGER;
436                 lexer_token.v.intvalue = strtoull(string, &endptr, 10);
437
438                 if(*endptr != '\0') {
439                         parse_error("invalid number literal");
440                 }
441
442                 parse_integer_suffix();
443         }
444         obstack_free(&symbol_obstack, string);
445 }
446
447 static void parse_number(void)
448 {
449         if (c == '0') {
450                 next_char();
451                 switch (c) {
452                         case 'X':
453                         case 'x':
454                                 parse_number_hex();
455                                 break;
456                         case '0':
457                         case '1':
458                         case '2':
459                         case '3':
460                         case '4':
461                         case '5':
462                         case '6':
463                         case '7':
464                                 parse_number_oct();
465                                 break;
466                         case '8':
467                         case '9':
468                                 next_char();
469                                 parse_error("invalid octal number");
470                                 lexer_token.type = T_ERROR;
471                                 return;
472                         case '.':
473                         case 'e':
474                         case 'E':
475                         default:
476                                 obstack_1grow(&symbol_obstack, '0');
477                                 parse_number_dec();
478                                 return;
479                 }
480         } else {
481                 parse_number_dec();
482         }
483 }
484
485 static int parse_octal_sequence(const int first_digit)
486 {
487         assert(is_octal_digit(first_digit));
488         int value = first_digit - '0';
489         if (!is_octal_digit(c)) return value;
490         value = 8 * value + c - '0';
491         next_char();
492         if (!is_octal_digit(c)) return value;
493         value = 8 * value + c - '0';
494         next_char();
495         return value;
496 }
497
498 static int parse_hex_sequence(void)
499 {
500         int value = 0;
501         while(1) {
502                 if (c >= '0' && c <= '9') {
503                         value = 16 * value + c - '0';
504                 } else if ('A' <= c && c <= 'F') {
505                         value = 16 * value + c - 'A' + 10;
506                 } else if ('a' <= c && c <= 'f') {
507                         value = 16 * value + c - 'a' + 10;
508                 } else {
509                         break;
510                 }
511                 next_char();
512         }
513
514         return value;
515 }
516
517 static int parse_escape_sequence(void)
518 {
519         eat('\\');
520
521         int ec = c;
522         next_char();
523
524         switch(ec) {
525         case '"':  return '"';
526         case '\'': return '\'';
527         case '\\': return '\\';
528         case '?': return '\?';
529         case 'a': return '\a';
530         case 'b': return '\b';
531         case 'f': return '\f';
532         case 'n': return '\n';
533         case 'r': return '\r';
534         case 't': return '\t';
535         case 'v': return '\v';
536         case 'x':
537                 return parse_hex_sequence();
538         case '0':
539         case '1':
540         case '2':
541         case '3':
542         case '4':
543         case '5':
544         case '6':
545         case '7':
546                 return parse_octal_sequence(ec);
547         case EOF:
548                 parse_error("reached end of file while parsing escape sequence");
549                 return EOF;
550         default:
551                 parse_error("unknown escape sequence");
552                 return EOF;
553         }
554 }
555
556 const char *concat_strings(const char *s1, const char *s2)
557 {
558         size_t  len1   = strlen(s1);
559         size_t  len2   = strlen(s2);
560
561         char   *concat = obstack_alloc(&symbol_obstack, len1 + len2 + 1);
562         memcpy(concat, s1, len1);
563         memcpy(concat + len1, s2, len2 + 1);
564
565         const char *result = strset_insert(&stringset, concat);
566         if(result != concat) {
567                 obstack_free(&symbol_obstack, concat);
568         }
569
570         return result;
571 }
572
573 static void parse_string_literal(void)
574 {
575         unsigned    start_linenr = lexer_token.source_position.linenr;
576         char       *string;
577         const char *result;
578
579         assert(c == '"');
580         next_char();
581
582         int tc;
583         while(1) {
584                 switch(c) {
585                 case '\\':
586                         tc = parse_escape_sequence();
587                         obstack_1grow(&symbol_obstack, tc);
588                         break;
589
590                 case EOF:
591                         error_prefix_at(lexer_token.source_position.input_name,
592                                         start_linenr);
593                         fprintf(stderr, "string has no end\n");
594                         lexer_token.type = T_ERROR;
595                         return;
596
597                 case '"':
598                         next_char();
599                         goto end_of_string;
600
601                 default:
602                         obstack_1grow(&symbol_obstack, c);
603                         next_char();
604                         break;
605                 }
606         }
607
608 end_of_string:
609
610         /* TODO: concatenate multiple strings separated by whitespace... */
611
612         /* add finishing 0 to the string */
613         obstack_1grow(&symbol_obstack, '\0');
614         string = obstack_finish(&symbol_obstack);
615
616         /* check if there is already a copy of the string */
617         result = strset_insert(&stringset, string);
618         if(result != string) {
619                 obstack_free(&symbol_obstack, string);
620         }
621
622         lexer_token.type     = T_STRING_LITERAL;
623         lexer_token.v.string = result;
624 }
625
626 static void parse_character_constant(void)
627 {
628         eat('\'');
629
630         int found_char = 0;
631         while(1) {
632                 switch(c) {
633                 case '\\':
634                         found_char = parse_escape_sequence();
635                         break;
636
637                 MATCH_NEWLINE(
638                         parse_error("newline while parsing character constant");
639                         break;
640                 )
641
642                 case '\'':
643                         next_char();
644                         goto end_of_char_constant;
645
646                 case EOF:
647                         parse_error("EOF while parsing character constant");
648                         lexer_token.type = T_ERROR;
649                         return;
650
651                 default:
652                         if(found_char != 0) {
653                                 parse_error("more than 1 characters in character "
654                                             "constant");
655                                 goto end_of_char_constant;
656                         } else {
657                                 found_char = c;
658                                 next_char();
659                         }
660                         break;
661                 }
662         }
663
664 end_of_char_constant:
665         lexer_token.type       = T_INTEGER;
666         lexer_token.v.intvalue = found_char;
667 }
668
669 static void skip_multiline_comment(void)
670 {
671         unsigned start_linenr = lexer_token.source_position.linenr;
672
673         while(1) {
674                 switch(c) {
675                 case '*':
676                         next_char();
677                         if(c == '/') {
678                                 next_char();
679                                 return;
680                         }
681                         break;
682
683                 MATCH_NEWLINE(break;)
684
685                 case EOF:
686                         error_prefix_at(lexer_token.source_position.input_name,
687                                         start_linenr);
688                         fprintf(stderr, "at end of file while looking for comment end\n");
689                         return;
690
691                 default:
692                         next_char();
693                         break;
694                 }
695         }
696 }
697
698 static void skip_line_comment(void)
699 {
700         while(1) {
701                 switch(c) {
702                 case EOF:
703                         return;
704
705                 case '\n':
706                 case '\r':
707                         return;
708
709                 default:
710                         next_char();
711                         break;
712                 }
713         }
714 }
715
716 static token_t pp_token;
717
718 static inline void next_pp_token(void)
719 {
720         lexer_next_preprocessing_token();
721         pp_token = lexer_token;
722 }
723
724 static void eat_until_newline(void)
725 {
726         while(pp_token.type != '\n' && pp_token.type != T_EOF) {
727                 next_pp_token();
728         }
729 }
730
731 static void error_directive(void)
732 {
733         error_prefix();
734         fprintf(stderr, "#error directive: \n");
735
736         /* parse pp-tokens until new-line */
737 }
738
739 static void define_directive(void)
740 {
741         lexer_next_preprocessing_token();
742         if(lexer_token.type != T_IDENTIFIER) {
743                 parse_error("expected identifier after #define\n");
744                 eat_until_newline();
745         }
746 }
747
748 static void ifdef_directive(int is_ifndef)
749 {
750         (void) is_ifndef;
751         lexer_next_preprocessing_token();
752         //expect_identifier();
753         //extect_newline();
754 }
755
756 static void endif_directive(void)
757 {
758         //expect_newline();
759 }
760
761 static void parse_line_directive(void)
762 {
763         if(pp_token.type != T_INTEGER) {
764                 parse_error("expected integer");
765         } else {
766                 lexer_token.source_position.linenr = pp_token.v.intvalue - 1;
767                 next_pp_token();
768         }
769         if(pp_token.type == T_STRING_LITERAL) {
770                 lexer_token.source_position.input_name = pp_token.v.string;
771                 next_pp_token();
772         }
773
774         eat_until_newline();
775 }
776
777 static void parse_preprocessor_identifier(void)
778 {
779         assert(pp_token.type == T_IDENTIFIER);
780         symbol_t *symbol = pp_token.v.symbol;
781
782         switch(symbol->pp_ID) {
783         case TP_include:
784                 printf("include - enable header name parsing!\n");
785                 break;
786         case TP_define:
787                 define_directive();
788                 break;
789         case TP_ifdef:
790                 ifdef_directive(0);
791                 break;
792         case TP_ifndef:
793                 ifdef_directive(1);
794                 break;
795         case TP_endif:
796                 endif_directive();
797                 break;
798         case TP_line:
799                 next_pp_token();
800                 parse_line_directive();
801                 break;
802         case TP_if:
803         case TP_else:
804         case TP_elif:
805         case TP_undef:
806         case TP_error:
807                 error_directive();
808                 break;
809         case TP_pragma:
810                 break;
811         }
812 }
813
814 static void parse_preprocessor_directive(void)
815 {
816         next_pp_token();
817
818         switch(pp_token.type) {
819         case T_IDENTIFIER:
820                 parse_preprocessor_identifier();
821                 break;
822         case T_INTEGER:
823                 parse_line_directive();
824                 break;
825         default:
826                 parse_error("invalid preprocessor directive");
827                 eat_until_newline();
828                 break;
829         }
830 }
831
832 #define MAYBE_PROLOG                                       \
833                         next_char();                                   \
834                         while(1) {                                     \
835                                 switch(c) {
836
837 #define MAYBE(ch, set_type)                                \
838                                 case ch:                                   \
839                                         next_char();                           \
840                                         lexer_token.type = set_type;           \
841                                         return;
842
843 #define ELSE_CODE(code)                                    \
844                                 default:                                   \
845                                         code;                                  \
846                                 }                                          \
847                         } /* end of while(1) */                        \
848                         break;
849
850 #define ELSE(set_type)                                     \
851                 ELSE_CODE(                                         \
852                         lexer_token.type = set_type;                   \
853                         return;                                        \
854                 )
855
856 void lexer_next_preprocessing_token(void)
857 {
858         while(1) {
859                 switch(c) {
860                 case ' ':
861                 case '\t':
862                         next_char();
863                         break;
864
865                 MATCH_NEWLINE(
866                         lexer_token.type = '\n';
867                         return;
868                 )
869
870                 SYMBOL_CHARS
871                         parse_symbol();
872                         /* might be a wide string ( L"string" ) */
873                         if(c == '"' && (lexer_token.type == T_IDENTIFIER &&
874                            lexer_token.v.symbol == symbol_L)) {
875                                 parse_string_literal();
876                                 return;
877                         }
878                         return;
879
880                 DIGITS
881                         parse_number();
882                         return;
883
884                 case '"':
885                         parse_string_literal();
886                         return;
887
888                 case '\'':
889                         parse_character_constant();
890                         return;
891
892                 case '.':
893                         MAYBE_PROLOG
894                                 case '.':
895                                         MAYBE_PROLOG
896                                         MAYBE('.', T_DOTDOTDOT)
897                                         ELSE_CODE(
898                                                 put_back(c);
899                                                 c = '.';
900                                                 lexer_token.type = '.';
901                                                 return;
902                                         )
903                         ELSE('.')
904                 case '&':
905                         MAYBE_PROLOG
906                         MAYBE('&', T_ANDAND)
907                         MAYBE('=', T_ANDEQUAL)
908                         ELSE('&')
909                 case '*':
910                         MAYBE_PROLOG
911                         MAYBE('=', T_ASTERISKEQUAL)
912                         ELSE('*')
913                 case '+':
914                         MAYBE_PROLOG
915                         MAYBE('+', T_PLUSPLUS)
916                         MAYBE('=', T_PLUSEQUAL)
917                         ELSE('+')
918                 case '-':
919                         MAYBE_PROLOG
920                         MAYBE('>', T_MINUSGREATER)
921                         MAYBE('-', T_MINUSMINUS)
922                         MAYBE('=', T_MINUSEQUAL)
923                         ELSE('-')
924                 case '!':
925                         MAYBE_PROLOG
926                         MAYBE('=', T_EXCLAMATIONMARKEQUAL)
927                         ELSE('!')
928                 case '/':
929                         MAYBE_PROLOG
930                         MAYBE('=', T_SLASHEQUAL)
931                                 case '*':
932                                         next_char();
933                                         skip_multiline_comment();
934                                         lexer_next_preprocessing_token();
935                                         return;
936                                 case '/':
937                                         next_char();
938                                         skip_line_comment();
939                                         lexer_next_preprocessing_token();
940                                         return;
941                         ELSE('/')
942                 case '%':
943                         MAYBE_PROLOG
944                         MAYBE('>', T_PERCENTGREATER)
945                         MAYBE('=', T_PERCENTEQUAL)
946                                 case ':':
947                                         MAYBE_PROLOG
948                                                 case '%':
949                                                         MAYBE_PROLOG
950                                                         MAYBE(':', T_PERCENTCOLONPERCENTCOLON)
951                                                         ELSE_CODE(
952                                                                 put_back(c);
953                                                                 c = '%';
954                                                                 lexer_token.type = T_PERCENTCOLON;
955                                                                 return;
956                                                         )
957                                         ELSE(T_PERCENTCOLON)
958                         ELSE('%')
959                 case '<':
960                         MAYBE_PROLOG
961                         MAYBE(':', T_LESSCOLON)
962                         MAYBE('%', T_LESSPERCENT)
963                         MAYBE('=', T_LESSEQUAL)
964                                 case '<':
965                                         MAYBE_PROLOG
966                                         MAYBE('=', T_LESSLESSEQUAL)
967                                         ELSE(T_LESSLESS)
968                         ELSE('<')
969                 case '>':
970                         MAYBE_PROLOG
971                         MAYBE('=', T_GREATEREQUAL)
972                                 case '>':
973                                         MAYBE_PROLOG
974                                         MAYBE('=', T_GREATERGREATEREQUAL)
975                                         ELSE(T_GREATERGREATER)
976                         ELSE('>')
977                 case '^':
978                         MAYBE_PROLOG
979                         MAYBE('=', T_CARETEQUAL)
980                         ELSE('^')
981                 case '|':
982                         MAYBE_PROLOG
983                         MAYBE('=', T_PIPEEQUAL)
984                         MAYBE('|', T_PIPEPIPE)
985                         ELSE('|')
986                 case ':':
987                         MAYBE_PROLOG
988                         MAYBE('>', T_COLONGREATER)
989                         ELSE(':')
990                 case '=':
991                         MAYBE_PROLOG
992                         MAYBE('=', T_EQUALEQUAL)
993                         ELSE('=')
994                 case '#':
995                         MAYBE_PROLOG
996                         MAYBE('#', T_HASHHASH)
997                         ELSE('#')
998
999                 case '?':
1000                 case '[':
1001                 case ']':
1002                 case '(':
1003                 case ')':
1004                 case '{':
1005                 case '}':
1006                 case '~':
1007                 case ';':
1008                 case ',':
1009                 case '\\':
1010                         lexer_token.type = c;
1011                         next_char();
1012                         return;
1013
1014                 case EOF:
1015                         lexer_token.type = T_EOF;
1016                         return;
1017
1018                 default:
1019                         next_char();
1020                         error_prefix();
1021                         fprintf(stderr, "unknown character '%c' found\n", c);
1022                         lexer_token.type = T_ERROR;
1023                         return;
1024                 }
1025         }
1026 }
1027
1028 void lexer_next_token(void)
1029 {
1030         lexer_next_preprocessing_token();
1031         if(lexer_token.type != '\n')
1032                 return;
1033
1034 newline_found:
1035         do {
1036                 lexer_next_preprocessing_token();
1037         } while(lexer_token.type == '\n');
1038
1039         if(lexer_token.type == '#') {
1040                 parse_preprocessor_directive();
1041                 goto newline_found;
1042         }
1043 }
1044
1045 void init_lexer(void)
1046 {
1047         strset_init(&stringset);
1048
1049         type_int       = make_atomic_type(ATOMIC_TYPE_INT, TYPE_QUALIFIER_CONST);
1050         type_uint      = make_atomic_type(ATOMIC_TYPE_UINT, TYPE_QUALIFIER_CONST);
1051         type_long      = make_atomic_type(ATOMIC_TYPE_LONG, TYPE_QUALIFIER_CONST);
1052         type_ulong     = make_atomic_type(ATOMIC_TYPE_ULONG, TYPE_QUALIFIER_CONST);
1053         type_longlong  = make_atomic_type(ATOMIC_TYPE_LONGLONG,
1054                                           TYPE_QUALIFIER_CONST);
1055         type_ulonglong = make_atomic_type(ATOMIC_TYPE_ULONGLONG,
1056                                           TYPE_QUALIFIER_CONST);
1057
1058         type_float      = make_atomic_type(ATOMIC_TYPE_FLOAT, TYPE_QUALIFIER_CONST);
1059         type_double     = make_atomic_type(ATOMIC_TYPE_DOUBLE,
1060                                            TYPE_QUALIFIER_CONST);
1061         type_longdouble = make_atomic_type(ATOMIC_TYPE_LONG_DOUBLE,
1062                                            TYPE_QUALIFIER_CONST);
1063 }
1064
1065 void lexer_open_stream(FILE *stream, const char *input_name)
1066 {
1067         input                                  = stream;
1068         lexer_token.source_position.linenr     = 0;
1069         lexer_token.source_position.input_name = input_name;
1070
1071         symbol_L = symbol_table_insert("L");
1072
1073         /* place a virtual \n at the beginning so the lexer knows that we're
1074          * at the beginning of a line */
1075         c = '\n';
1076 }
1077
1078 void exit_lexer(void)
1079 {
1080         strset_destroy(&stringset);
1081 }
1082
1083 static __attribute__((unused))
1084 void dbg_pos(const source_position_t source_position)
1085 {
1086         fprintf(stdout, "%s:%d\n", source_position.input_name,
1087                 source_position.linenr);
1088         fflush(stdout);
1089 }