Hexadecimal is base 16, not 36. (;
[cparser] / lexer.c
1 #include <config.h>
2
3 #include "lexer.h"
4 #include "token_t.h"
5 #include "symbol_table_t.h"
6 #include "adt/error.h"
7 #include "adt/strset.h"
8 #include "adt/util.h"
9 #include "type_t.h"
10
11 #include <assert.h>
12 #include <errno.h>
13 #include <string.h>
14 #include <stdbool.h>
15 #include <ctype.h>
16
17 //#define DEBUG_CHARS
18 #define MAX_PUTBACK 3
19
20 static int         c;
21 token_t            lexer_token;
22 symbol_t          *symbol_L;
23 static FILE       *input;
24 static char        buf[1024 + MAX_PUTBACK];
25 static const char *bufend;
26 static const char *bufpos;
27 static strset_t    stringset;
28
29 static type_t     *type_int        = NULL;
30 static type_t     *type_uint       = NULL;
31 static type_t     *type_long       = NULL;
32 static type_t     *type_ulong      = NULL;
33 static type_t     *type_longlong   = NULL;
34 static type_t     *type_ulonglong  = NULL;
35 static type_t     *type_float      = NULL;
36 static type_t     *type_double     = NULL;
37 static type_t     *type_longdouble = NULL;
38
39 static void error_prefix_at(const char *input_name, unsigned linenr)
40 {
41         fprintf(stderr, "%s:%u: Error: ", input_name, linenr);
42 }
43
44 static void error_prefix(void)
45 {
46         error_prefix_at(lexer_token.source_position.input_name,
47                         lexer_token.source_position.linenr);
48 }
49
50 static void parse_error(const char *msg)
51 {
52         error_prefix();
53         fprintf(stderr, "%s\n", msg);
54 }
55
56 static inline void next_real_char(void)
57 {
58         bufpos++;
59         if(bufpos >= bufend) {
60                 size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
61                                  input);
62                 if(s == 0) {
63                         c = EOF;
64                         return;
65                 }
66                 bufpos = buf + MAX_PUTBACK;
67                 bufend = buf + MAX_PUTBACK + s;
68         }
69         c = *(bufpos);
70 }
71
72 static inline void put_back(int pc)
73 {
74         assert(bufpos >= buf);
75         //assert(bufpos < buf+MAX_PUTBACK || *bufpos == pc);
76
77         char *p = buf + (bufpos - buf);
78         *p = pc;
79
80         /* going backwards in the buffer is legal as long as it's not more often
81          * than MAX_PUTBACK */
82         bufpos--;
83
84 #ifdef DEBUG_CHARS
85         printf("putback '%c'\n", pc);
86 #endif
87 }
88
89 static inline void next_char(void);
90
91 #define MATCH_NEWLINE(code)                   \
92         case '\r':                                \
93                 next_char();                          \
94                 if(c == '\n') {                       \
95                         next_char();                      \
96                 }                                     \
97                 lexer_token.source_position.linenr++; \
98                 code;                                 \
99         case '\n':                                \
100                 next_char();                          \
101                 lexer_token.source_position.linenr++; \
102                 code;
103
104 #define eat(c_type)  do { assert(c == c_type); next_char(); } while(0)
105
106 static void maybe_concat_lines(void)
107 {
108         eat('\\');
109
110         switch(c) {
111         MATCH_NEWLINE(return;)
112
113         default:
114                 break;
115         }
116
117         put_back(c);
118         c = '\\';
119 }
120
121 static inline void next_char(void)
122 {
123         next_real_char();
124
125         /* filter trigraphs */
126         if(UNLIKELY(c == '\\')) {
127                 maybe_concat_lines();
128                 goto end_of_next_char;
129         }
130
131         if(LIKELY(c != '?'))
132                 goto end_of_next_char;
133
134         next_real_char();
135         if(LIKELY(c != '?')) {
136                 put_back(c);
137                 c = '?';
138                 goto end_of_next_char;
139         }
140
141         next_real_char();
142         switch(c) {
143         case '=': c = '#'; break;
144         case '(': c = '['; break;
145         case '/': c = '\\'; maybe_concat_lines(); break;
146         case ')': c = ']'; break;
147         case '\'': c = '^'; break;
148         case '<': c = '{'; break;
149         case '!': c = '|'; break;
150         case '>': c = '}'; break;
151         case '-': c = '~'; break;
152         default:
153                 put_back('?');
154                 put_back(c);
155                 c = '?';
156                 break;
157         }
158
159 end_of_next_char:;
160 #ifdef DEBUG_CHARS
161         printf("nchar '%c'\n", c);
162 #endif
163 }
164
165 #define SYMBOL_CHARS  \
166         case 'a':         \
167         case 'b':         \
168         case 'c':         \
169         case 'd':         \
170         case 'e':         \
171         case 'f':         \
172         case 'g':         \
173         case 'h':         \
174         case 'i':         \
175         case 'j':         \
176         case 'k':         \
177         case 'l':         \
178         case 'm':         \
179         case 'n':         \
180         case 'o':         \
181         case 'p':         \
182         case 'q':         \
183         case 'r':         \
184         case 's':         \
185         case 't':         \
186         case 'u':         \
187         case 'v':         \
188         case 'w':         \
189         case 'x':         \
190         case 'y':         \
191         case 'z':         \
192         case 'A':         \
193         case 'B':         \
194         case 'C':         \
195         case 'D':         \
196         case 'E':         \
197         case 'F':         \
198         case 'G':         \
199         case 'H':         \
200         case 'I':         \
201         case 'J':         \
202         case 'K':         \
203         case 'L':         \
204         case 'M':         \
205         case 'N':         \
206         case 'O':         \
207         case 'P':         \
208         case 'Q':         \
209         case 'R':         \
210         case 'S':         \
211         case 'T':         \
212         case 'U':         \
213         case 'V':         \
214         case 'W':         \
215         case 'X':         \
216         case 'Y':         \
217         case 'Z':         \
218         case '_':
219
220 #define DIGITS        \
221         case '0':         \
222         case '1':         \
223         case '2':         \
224         case '3':         \
225         case '4':         \
226         case '5':         \
227         case '6':         \
228         case '7':         \
229         case '8':         \
230         case '9':
231
232 static void parse_symbol(void)
233 {
234         symbol_t *symbol;
235         char     *string;
236
237         obstack_1grow(&symbol_obstack, c);
238         next_char();
239
240         while(1) {
241                 switch(c) {
242                 DIGITS
243                 SYMBOL_CHARS
244                         obstack_1grow(&symbol_obstack, c);
245                         next_char();
246                         break;
247
248                 default:
249                         goto end_symbol;
250                 }
251         }
252
253 end_symbol:
254         obstack_1grow(&symbol_obstack, '\0');
255
256         string = obstack_finish(&symbol_obstack);
257         symbol = symbol_table_insert(string);
258
259         lexer_token.type     = symbol->ID;
260         lexer_token.v.symbol = symbol;
261
262         if(symbol->string != string) {
263                 obstack_free(&symbol_obstack, string);
264         }
265 }
266
267 static void parse_integer_suffix(void)
268 {
269         if(c == 'U' || c == 'U') {
270                 next_char();
271                 if(c == 'L' || c == 'l') {
272                         next_char();
273                         if(c == 'L' || c == 'l') {
274                                 next_char();
275                                 lexer_token.datatype = type_ulonglong;
276                         } else {
277                                 lexer_token.datatype = type_ulong;
278                         }
279                 } else {
280                         lexer_token.datatype = type_uint;
281                 }
282         } else if(c == 'l' || c == 'L') {
283                 next_char();
284                 if(c == 'l' || c == 'L') {
285                         next_char();
286                         if(c == 'u' || c == 'U') {
287                                 next_char();
288                                 lexer_token.datatype = type_ulonglong;
289                         } else {
290                                 lexer_token.datatype = type_longlong;
291                         }
292                 } else if(c == 'u' || c == 'U') {
293                         next_char();
294                         lexer_token.datatype = type_ulong;
295                 } else {
296                         lexer_token.datatype = type_int;
297                 }
298         } else {
299                 lexer_token.datatype = type_int;
300         }
301 }
302
303 static void parse_floating_suffix(void)
304 {
305         switch(c) {
306         /* TODO: do something usefull with the suffixes... */
307         case 'f':
308         case 'F':
309                 next_char();
310                 lexer_token.datatype = type_float;
311                 break;
312         case 'l':
313         case 'L':
314                 next_char();
315                 lexer_token.datatype = type_longdouble;
316                 break;
317         default:
318                 lexer_token.datatype = type_double;
319                 break;
320         }
321 }
322
323 static inline bool is_hex_digit(int c)
324 {
325         return (c >= '0' && c <= '9')
326                         || (c >= 'a' && c <= 'f')
327                         || (c >= 'A' && c <= 'F');
328 }
329
330 static void parse_number_hex(void)
331 {
332         assert(c == 'x' || c == 'X');
333         next_char();
334
335         while(is_hex_digit(c)) {
336                 obstack_1grow(&symbol_obstack, c);
337                 next_char();
338         }
339         obstack_1grow(&symbol_obstack, '\0');
340         char *string = obstack_finish(&symbol_obstack);
341
342         if(c == '.' || c == 'p' || c == 'P') {
343                 next_char();
344                 panic("Hex floating point numbers not implemented yet");
345         }
346         if(*string == '\0') {
347                 parse_error("invalid hex number");
348                 lexer_token.type = T_ERROR;
349         }
350
351         char *endptr;
352         lexer_token.type       = T_INTEGER;
353         lexer_token.v.intvalue = strtoull(string, &endptr, 16);
354         if(*endptr != '\0') {
355                 parse_error("hex number literal too long");
356         }
357
358         obstack_free(&symbol_obstack, string);
359 }
360
361 static inline bool is_octal_digit(int chr)
362 {
363         return '0' <= chr && chr <= '7';
364 }
365
366 static void parse_number_oct(void)
367 {
368         while(is_octal_digit(c)) {
369                 obstack_1grow(&symbol_obstack, c);
370                 next_char();
371         }
372         obstack_1grow(&symbol_obstack, '\0');
373         char *string = obstack_finish(&symbol_obstack);
374
375         char *endptr;
376         lexer_token.type       = T_INTEGER;
377         lexer_token.v.intvalue = strtoull(string, &endptr, 8);
378         if(*endptr != '\0') {
379                 parse_error("octal number literal too long");
380         }
381
382         obstack_free(&symbol_obstack, string);
383         parse_integer_suffix();
384 }
385
386 static void parse_number_dec(void)
387 {
388         bool is_float = false;
389         while(isdigit(c)) {
390                 obstack_1grow(&symbol_obstack, c);
391                 next_char();
392         }
393
394         if(c == '.') {
395                 obstack_1grow(&symbol_obstack, '.');
396                 next_char();
397
398                 while(isdigit(c)) {
399                         obstack_1grow(&symbol_obstack, c);
400                         next_char();
401                 }
402                 is_float = true;
403         }
404         if(c == 'e' || c == 'E') {
405                 obstack_1grow(&symbol_obstack, 'e');
406                 next_char();
407
408                 if(c == '-' || c == '+') {
409                         obstack_1grow(&symbol_obstack, c);
410                         next_char();
411                 }
412
413                 while(isdigit(c)) {
414                         obstack_1grow(&symbol_obstack, c);
415                         next_char();
416                 }
417                 is_float = true;
418         }
419
420         obstack_1grow(&symbol_obstack, '\0');
421         char *string = obstack_finish(&symbol_obstack);
422
423         char *endptr;
424         if(is_float) {
425                 lexer_token.type         = T_FLOATINGPOINT;
426                 lexer_token.v.floatvalue = strtold(string, &endptr);
427
428                 if(*endptr != '\0') {
429                         parse_error("invalid number literal");
430                 }
431
432                 parse_floating_suffix();
433         } else {
434                 lexer_token.type       = T_INTEGER;
435                 lexer_token.v.intvalue = strtoull(string, &endptr, 10);
436
437                 if(*endptr != '\0') {
438                         parse_error("invalid number literal");
439                 }
440
441                 parse_integer_suffix();
442         }
443         obstack_free(&symbol_obstack, string);
444 }
445
446 static void parse_number(void)
447 {
448         if (c == '0') {
449                 next_char();
450                 switch (c) {
451                         case 'X':
452                         case 'x':
453                                 parse_number_hex();
454                                 break;
455                         case '0':
456                         case '1':
457                         case '2':
458                         case '3':
459                         case '4':
460                         case '5':
461                         case '6':
462                         case '7':
463                                 parse_number_oct();
464                                 break;
465                         case '8':
466                         case '9':
467                                 next_char();
468                                 parse_error("invalid octal number");
469                                 lexer_token.type = T_ERROR;
470                                 return;
471                         case '.':
472                         case 'e':
473                         case 'E':
474                         default:
475                                 obstack_1grow(&symbol_obstack, '0');
476                                 parse_number_dec();
477                                 return;
478                 }
479         } else {
480                 parse_number_dec();
481         }
482 }
483
484 static int parse_octal_sequence(const int first_digit)
485 {
486         assert(is_octal_digit(first_digit));
487         int value = first_digit - '0';
488         if (!is_octal_digit(c)) return value;
489         value = 8 * value + c - '0';
490         next_char();
491         if (!is_octal_digit(c)) return value;
492         value = 8 * value + c - '0';
493         next_char();
494         return value;
495 }
496
497 static int parse_hex_sequence(void)
498 {
499         int value = 0;
500         while(1) {
501                 if (c >= '0' && c <= '9') {
502                         value = 16 * value + c - '0';
503                 } else if ('A' <= c && c <= 'F') {
504                         value = 16 * value + c - 'A' + 10;
505                 } else if ('a' <= c && c <= 'f') {
506                         value = 16 * value + c - 'a' + 10;
507                 } else {
508                         break;
509                 }
510                 next_char();
511         }
512
513         return value;
514 }
515
516 static int parse_escape_sequence(void)
517 {
518         eat('\\');
519
520         int ec = c;
521         next_char();
522
523         switch(ec) {
524         case '"':  return '"';
525         case '\'': return '\'';
526         case '\\': return '\\';
527         case '?': return '\?';
528         case 'a': return '\a';
529         case 'b': return '\b';
530         case 'f': return '\f';
531         case 'n': return '\n';
532         case 'r': return '\r';
533         case 't': return '\t';
534         case 'v': return '\v';
535         case 'x':
536                 return parse_hex_sequence();
537         case '0':
538         case '1':
539         case '2':
540         case '3':
541         case '4':
542         case '5':
543         case '6':
544         case '7':
545                 return parse_octal_sequence(ec);
546         case EOF:
547                 parse_error("reached end of file while parsing escape sequence");
548                 return EOF;
549         default:
550                 parse_error("unknown escape sequence");
551                 return EOF;
552         }
553 }
554
555 const char *concat_strings(const char *s1, const char *s2)
556 {
557         size_t  len1   = strlen(s1);
558         size_t  len2   = strlen(s2);
559
560         char   *concat = obstack_alloc(&symbol_obstack, len1 + len2 + 1);
561         memcpy(concat, s1, len1);
562         memcpy(concat + len1, s2, len2 + 1);
563
564         const char *result = strset_insert(&stringset, concat);
565         if(result != concat) {
566                 obstack_free(&symbol_obstack, concat);
567         }
568
569         return result;
570 }
571
572 static void parse_string_literal(void)
573 {
574         unsigned    start_linenr = lexer_token.source_position.linenr;
575         char       *string;
576         const char *result;
577
578         assert(c == '"');
579         next_char();
580
581         int tc;
582         while(1) {
583                 switch(c) {
584                 case '\\':
585                         tc = parse_escape_sequence();
586                         obstack_1grow(&symbol_obstack, tc);
587                         break;
588
589                 case EOF:
590                         error_prefix_at(lexer_token.source_position.input_name,
591                                         start_linenr);
592                         fprintf(stderr, "string has no end\n");
593                         lexer_token.type = T_ERROR;
594                         return;
595
596                 case '"':
597                         next_char();
598                         goto end_of_string;
599
600                 default:
601                         obstack_1grow(&symbol_obstack, c);
602                         next_char();
603                         break;
604                 }
605         }
606
607 end_of_string:
608
609         /* TODO: concatenate multiple strings separated by whitespace... */
610
611         /* add finishing 0 to the string */
612         obstack_1grow(&symbol_obstack, '\0');
613         string = obstack_finish(&symbol_obstack);
614
615         /* check if there is already a copy of the string */
616         result = strset_insert(&stringset, string);
617         if(result != string) {
618                 obstack_free(&symbol_obstack, string);
619         }
620
621         lexer_token.type     = T_STRING_LITERAL;
622         lexer_token.v.string = result;
623 }
624
625 static void parse_character_constant(void)
626 {
627         eat('\'');
628
629         int found_char = 0;
630         while(1) {
631                 switch(c) {
632                 case '\\':
633                         found_char = parse_escape_sequence();
634                         break;
635
636                 MATCH_NEWLINE(
637                         parse_error("newline while parsing character constant");
638                         break;
639                 )
640
641                 case '\'':
642                         next_char();
643                         goto end_of_char_constant;
644
645                 case EOF:
646                         parse_error("EOF while parsing character constant");
647                         lexer_token.type = T_ERROR;
648                         return;
649
650                 default:
651                         if(found_char != 0) {
652                                 parse_error("more than 1 characters in character "
653                                             "constant");
654                                 goto end_of_char_constant;
655                         } else {
656                                 found_char = c;
657                                 next_char();
658                         }
659                         break;
660                 }
661         }
662
663 end_of_char_constant:
664         lexer_token.type       = T_INTEGER;
665         lexer_token.v.intvalue = found_char;
666 }
667
668 static void skip_multiline_comment(void)
669 {
670         unsigned start_linenr = lexer_token.source_position.linenr;
671
672         while(1) {
673                 switch(c) {
674                 case '*':
675                         next_char();
676                         if(c == '/') {
677                                 next_char();
678                                 return;
679                         }
680                         break;
681
682                 MATCH_NEWLINE(break;)
683
684                 case EOF:
685                         error_prefix_at(lexer_token.source_position.input_name,
686                                         start_linenr);
687                         fprintf(stderr, "at end of file while looking for comment end\n");
688                         return;
689
690                 default:
691                         next_char();
692                         break;
693                 }
694         }
695 }
696
697 static void skip_line_comment(void)
698 {
699         while(1) {
700                 switch(c) {
701                 case EOF:
702                         return;
703
704                 case '\n':
705                 case '\r':
706                         return;
707
708                 default:
709                         next_char();
710                         break;
711                 }
712         }
713 }
714
715 static token_t pp_token;
716
717 static inline void next_pp_token(void)
718 {
719         lexer_next_preprocessing_token();
720         pp_token = lexer_token;
721 }
722
723 static void eat_until_newline(void)
724 {
725         while(pp_token.type != '\n' && pp_token.type != T_EOF) {
726                 next_pp_token();
727         }
728 }
729
730 static void error_directive(void)
731 {
732         error_prefix();
733         fprintf(stderr, "#error directive: \n");
734
735         /* parse pp-tokens until new-line */
736 }
737
738 static void define_directive(void)
739 {
740         lexer_next_preprocessing_token();
741         if(lexer_token.type != T_IDENTIFIER) {
742                 parse_error("expected identifier after #define\n");
743                 eat_until_newline();
744         }
745 }
746
747 static void ifdef_directive(int is_ifndef)
748 {
749         (void) is_ifndef;
750         lexer_next_preprocessing_token();
751         //expect_identifier();
752         //extect_newline();
753 }
754
755 static void endif_directive(void)
756 {
757         //expect_newline();
758 }
759
760 static void parse_line_directive(void)
761 {
762         if(pp_token.type != T_INTEGER) {
763                 parse_error("expected integer");
764         } else {
765                 lexer_token.source_position.linenr = pp_token.v.intvalue - 1;
766                 next_pp_token();
767         }
768         if(pp_token.type == T_STRING_LITERAL) {
769                 lexer_token.source_position.input_name = pp_token.v.string;
770                 next_pp_token();
771         }
772
773         eat_until_newline();
774 }
775
776 static void parse_preprocessor_identifier(void)
777 {
778         assert(pp_token.type == T_IDENTIFIER);
779         symbol_t *symbol = pp_token.v.symbol;
780
781         switch(symbol->pp_ID) {
782         case TP_include:
783                 printf("include - enable header name parsing!\n");
784                 break;
785         case TP_define:
786                 define_directive();
787                 break;
788         case TP_ifdef:
789                 ifdef_directive(0);
790                 break;
791         case TP_ifndef:
792                 ifdef_directive(1);
793                 break;
794         case TP_endif:
795                 endif_directive();
796                 break;
797         case TP_line:
798                 next_pp_token();
799                 parse_line_directive();
800                 break;
801         case TP_if:
802         case TP_else:
803         case TP_elif:
804         case TP_undef:
805         case TP_error:
806                 error_directive();
807                 break;
808         case TP_pragma:
809                 break;
810         }
811 }
812
813 static void parse_preprocessor_directive(void)
814 {
815         next_pp_token();
816
817         switch(pp_token.type) {
818         case T_IDENTIFIER:
819                 parse_preprocessor_identifier();
820                 break;
821         case T_INTEGER:
822                 parse_line_directive();
823                 break;
824         default:
825                 parse_error("invalid preprocessor directive");
826                 eat_until_newline();
827                 break;
828         }
829 }
830
831 #define MAYBE_PROLOG                                       \
832                         next_char();                                   \
833                         while(1) {                                     \
834                                 switch(c) {
835
836 #define MAYBE(ch, set_type)                                \
837                                 case ch:                                   \
838                                         next_char();                           \
839                                         lexer_token.type = set_type;           \
840                                         return;
841
842 #define ELSE_CODE(code)                                    \
843                                 default:                                   \
844                                         code;                                  \
845                                 }                                          \
846                         } /* end of while(1) */                        \
847                         break;
848
849 #define ELSE(set_type)                                     \
850                 ELSE_CODE(                                         \
851                         lexer_token.type = set_type;                   \
852                         return;                                        \
853                 )
854
855 void lexer_next_preprocessing_token(void)
856 {
857         while(1) {
858                 switch(c) {
859                 case ' ':
860                 case '\t':
861                         next_char();
862                         break;
863
864                 MATCH_NEWLINE(
865                         lexer_token.type = '\n';
866                         return;
867                 )
868
869                 SYMBOL_CHARS
870                         parse_symbol();
871                         /* might be a wide string ( L"string" ) */
872                         if(c == '"' && (lexer_token.type == T_IDENTIFIER &&
873                            lexer_token.v.symbol == symbol_L)) {
874                                 parse_string_literal();
875                                 return;
876                         }
877                         return;
878
879                 DIGITS
880                         parse_number();
881                         return;
882
883                 case '"':
884                         parse_string_literal();
885                         return;
886
887                 case '\'':
888                         parse_character_constant();
889                         return;
890
891                 case '.':
892                         MAYBE_PROLOG
893                                 case '.':
894                                         MAYBE_PROLOG
895                                         MAYBE('.', T_DOTDOTDOT)
896                                         ELSE_CODE(
897                                                 put_back(c);
898                                                 c = '.';
899                                                 lexer_token.type = '.';
900                                                 return;
901                                         )
902                         ELSE('.')
903                 case '&':
904                         MAYBE_PROLOG
905                         MAYBE('&', T_ANDAND)
906                         MAYBE('=', T_ANDEQUAL)
907                         ELSE('&')
908                 case '*':
909                         MAYBE_PROLOG
910                         MAYBE('=', T_ASTERISKEQUAL)
911                         ELSE('*')
912                 case '+':
913                         MAYBE_PROLOG
914                         MAYBE('+', T_PLUSPLUS)
915                         MAYBE('=', T_PLUSEQUAL)
916                         ELSE('+')
917                 case '-':
918                         MAYBE_PROLOG
919                         MAYBE('>', T_MINUSGREATER)
920                         MAYBE('-', T_MINUSMINUS)
921                         MAYBE('=', T_MINUSEQUAL)
922                         ELSE('-')
923                 case '!':
924                         MAYBE_PROLOG
925                         MAYBE('=', T_EXCLAMATIONMARKEQUAL)
926                         ELSE('!')
927                 case '/':
928                         MAYBE_PROLOG
929                         MAYBE('=', T_SLASHEQUAL)
930                                 case '*':
931                                         next_char();
932                                         skip_multiline_comment();
933                                         lexer_next_preprocessing_token();
934                                         return;
935                                 case '/':
936                                         next_char();
937                                         skip_line_comment();
938                                         lexer_next_preprocessing_token();
939                                         return;
940                         ELSE('/')
941                 case '%':
942                         MAYBE_PROLOG
943                         MAYBE('>', T_PERCENTGREATER)
944                         MAYBE('=', T_PERCENTEQUAL)
945                                 case ':':
946                                         MAYBE_PROLOG
947                                                 case '%':
948                                                         MAYBE_PROLOG
949                                                         MAYBE(':', T_PERCENTCOLONPERCENTCOLON)
950                                                         ELSE_CODE(
951                                                                 put_back(c);
952                                                                 c = '%';
953                                                                 lexer_token.type = T_PERCENTCOLON;
954                                                                 return;
955                                                         )
956                                         ELSE(T_PERCENTCOLON)
957                         ELSE('%')
958                 case '<':
959                         MAYBE_PROLOG
960                         MAYBE(':', T_LESSCOLON)
961                         MAYBE('%', T_LESSPERCENT)
962                         MAYBE('=', T_LESSEQUAL)
963                                 case '<':
964                                         MAYBE_PROLOG
965                                         MAYBE('=', T_LESSLESSEQUAL)
966                                         ELSE(T_LESSLESS)
967                         ELSE('<')
968                 case '>':
969                         MAYBE_PROLOG
970                         MAYBE('=', T_GREATEREQUAL)
971                                 case '>':
972                                         MAYBE_PROLOG
973                                         MAYBE('=', T_GREATERGREATEREQUAL)
974                                         ELSE(T_GREATERGREATER)
975                         ELSE('>')
976                 case '^':
977                         MAYBE_PROLOG
978                         MAYBE('=', T_CARETEQUAL)
979                         ELSE('^')
980                 case '|':
981                         MAYBE_PROLOG
982                         MAYBE('=', T_PIPEEQUAL)
983                         MAYBE('|', T_PIPEPIPE)
984                         ELSE('|')
985                 case ':':
986                         MAYBE_PROLOG
987                         MAYBE('>', T_COLONGREATER)
988                         ELSE(':')
989                 case '=':
990                         MAYBE_PROLOG
991                         MAYBE('=', T_EQUALEQUAL)
992                         ELSE('=')
993                 case '#':
994                         MAYBE_PROLOG
995                         MAYBE('#', T_HASHHASH)
996                         ELSE('#')
997
998                 case '?':
999                 case '[':
1000                 case ']':
1001                 case '(':
1002                 case ')':
1003                 case '{':
1004                 case '}':
1005                 case '~':
1006                 case ';':
1007                 case ',':
1008                 case '\\':
1009                         lexer_token.type = c;
1010                         next_char();
1011                         return;
1012
1013                 case EOF:
1014                         lexer_token.type = T_EOF;
1015                         return;
1016
1017                 default:
1018                         next_char();
1019                         error_prefix();
1020                         fprintf(stderr, "unknown character '%c' found\n", c);
1021                         lexer_token.type = T_ERROR;
1022                         return;
1023                 }
1024         }
1025 }
1026
1027 void lexer_next_token(void)
1028 {
1029         lexer_next_preprocessing_token();
1030         if(lexer_token.type != '\n')
1031                 return;
1032
1033 newline_found:
1034         do {
1035                 lexer_next_preprocessing_token();
1036         } while(lexer_token.type == '\n');
1037
1038         if(lexer_token.type == '#') {
1039                 parse_preprocessor_directive();
1040                 goto newline_found;
1041         }
1042 }
1043
1044 void init_lexer(void)
1045 {
1046         strset_init(&stringset);
1047
1048         type_int       = make_atomic_type(ATOMIC_TYPE_INT, TYPE_QUALIFIER_CONST);
1049         type_uint      = make_atomic_type(ATOMIC_TYPE_UINT, TYPE_QUALIFIER_CONST);
1050         type_long      = make_atomic_type(ATOMIC_TYPE_LONG, TYPE_QUALIFIER_CONST);
1051         type_ulong     = make_atomic_type(ATOMIC_TYPE_ULONG, TYPE_QUALIFIER_CONST);
1052         type_longlong  = make_atomic_type(ATOMIC_TYPE_LONGLONG,
1053                                           TYPE_QUALIFIER_CONST);
1054         type_ulonglong = make_atomic_type(ATOMIC_TYPE_ULONGLONG,
1055                                           TYPE_QUALIFIER_CONST);
1056
1057         type_float      = make_atomic_type(ATOMIC_TYPE_FLOAT, TYPE_QUALIFIER_CONST);
1058         type_double     = make_atomic_type(ATOMIC_TYPE_DOUBLE,
1059                                            TYPE_QUALIFIER_CONST);
1060         type_longdouble = make_atomic_type(ATOMIC_TYPE_LONG_DOUBLE,
1061                                            TYPE_QUALIFIER_CONST);
1062 }
1063
1064 void lexer_open_stream(FILE *stream, const char *input_name)
1065 {
1066         input                                  = stream;
1067         lexer_token.source_position.linenr     = 0;
1068         lexer_token.source_position.input_name = input_name;
1069
1070         symbol_L = symbol_table_insert("L");
1071
1072         /* place a virtual \n at the beginning so the lexer knows that we're
1073          * at the beginning of a line */
1074         c = '\n';
1075 }
1076
1077 void exit_lexer(void)
1078 {
1079         strset_destroy(&stringset);
1080 }
1081
1082 static __attribute__((unused))
1083 void dbg_pos(const source_position_t source_position)
1084 {
1085         fprintf(stdout, "%s:%d\n", source_position.input_name,
1086                 source_position.linenr);
1087         fflush(stdout);
1088 }