- implemented decimal floating pointer numbers in lexer
[cparser] / lexer.c
1 #include <config.h>
2
3 #include "lexer.h"
4 #include "token_t.h"
5 #include "symbol_table_t.h"
6 #include "adt/error.h"
7 #include "adt/strset.h"
8 #include "adt/util.h"
9
10 #include <assert.h>
11 #include <errno.h>
12 #include <string.h>
13 #include <ctype.h>
14
15 //#define DEBUG_CHARS
16 #define MAX_PUTBACK 3
17
18 static int         c;
19 token_t            lexer_token;
20 static FILE       *input;
21 static char        buf[1024 + MAX_PUTBACK];
22 static const char *bufend;
23 static const char *bufpos;
24 static strset_t    stringset;
25 //static FILE      **input_stack;
26 //static char      **buf_stack;
27
28 static void error_prefix_at(const char *input_name, unsigned linenr)
29 {
30         fprintf(stderr, "%s:%u: Error: ", input_name, linenr);
31 }
32
33 static void error_prefix(void)
34 {
35         error_prefix_at(lexer_token.source_position.input_name,
36                         lexer_token.source_position.linenr);
37 }
38
39 static void parse_error(const char *msg)
40 {
41         error_prefix();
42         fprintf(stderr, "%s\n", msg);
43 }
44
45 static inline void next_real_char(void)
46 {
47         bufpos++;
48         if(bufpos >= bufend) {
49                 size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
50                                  input);
51                 if(s == 0) {
52                         c = EOF;
53                         return;
54                 }
55                 bufpos = buf + MAX_PUTBACK;
56                 bufend = buf + MAX_PUTBACK + s;
57         }
58         c = *(bufpos);
59 }
60
61 static inline void put_back(int pc)
62 {
63         char *p = (char*) bufpos - 1;
64         bufpos--;
65         assert(p >= buf);
66         *p = pc;
67
68 #ifdef DEBUG_CHARS
69         printf("putback '%c'\n", pc);
70 #endif
71 }
72
73 static inline void next_char(void);
74
75 #define MATCH_NEWLINE(code)                   \
76         case '\r':                                \
77                 next_char();                          \
78                 if(c == '\n') {                       \
79                         next_char();                      \
80                 }                                     \
81                 lexer_token.source_position.linenr++; \
82                 code;                                 \
83         case '\n':                                \
84                 next_char();                          \
85                 lexer_token.source_position.linenr++; \
86                 code;
87
88 static void maybe_concat_lines(void)
89 {
90         next_char();
91         switch(c) {
92         MATCH_NEWLINE(return;)
93
94         default:
95                 break;
96         }
97
98         put_back(c);
99         c = '\\';
100 }
101
102 static inline void next_char(void)
103 {
104         next_real_char();
105
106         /* filter trigraphs */
107         if(UNLIKELY(c == '\\')) {
108                 maybe_concat_lines();
109                 goto end_of_next_char;
110         }
111
112         if(LIKELY(c != '?'))
113                 goto end_of_next_char;
114
115         next_real_char();
116         if(LIKELY(c != '?')) {
117                 put_back(c);
118                 c = '?';
119                 goto end_of_next_char;
120         }
121
122         next_real_char();
123         switch(c) {
124         case '=': c = '#'; break;
125         case '(': c = '['; break;
126         case '/': c = '\\'; maybe_concat_lines(); break;
127         case ')': c = ']'; break;
128         case '\'': c = '^'; break;
129         case '<': c = '{'; break;
130         case '!': c = '|'; break;
131         case '>': c = '}'; break;
132         case '-': c = '~'; break;
133         default:
134                 put_back('?');
135                 put_back(c);
136                 c = '?';
137                 break;
138         }
139
140 end_of_next_char:
141 #ifdef DEBUG_CHARS
142         printf("nchar '%c'\n", c);
143 #else
144         ;
145 #endif
146 }
147
148 #define SYMBOL_CHARS  \
149         case 'a':         \
150         case 'b':         \
151         case 'c':         \
152         case 'd':         \
153         case 'e':         \
154         case 'f':         \
155         case 'g':         \
156         case 'h':         \
157         case 'i':         \
158         case 'j':         \
159         case 'k':         \
160         case 'l':         \
161         case 'm':         \
162         case 'n':         \
163         case 'o':         \
164         case 'p':         \
165         case 'q':         \
166         case 'r':         \
167         case 's':         \
168         case 't':         \
169         case 'u':         \
170         case 'v':         \
171         case 'w':         \
172         case 'x':         \
173         case 'y':         \
174         case 'z':         \
175         case 'A':         \
176         case 'B':         \
177         case 'C':         \
178         case 'D':         \
179         case 'E':         \
180         case 'F':         \
181         case 'G':         \
182         case 'H':         \
183         case 'I':         \
184         case 'J':         \
185         case 'K':         \
186         case 'L':         \
187         case 'M':         \
188         case 'N':         \
189         case 'O':         \
190         case 'P':         \
191         case 'Q':         \
192         case 'R':         \
193         case 'S':         \
194         case 'T':         \
195         case 'U':         \
196         case 'V':         \
197         case 'W':         \
198         case 'X':         \
199         case 'Y':         \
200         case 'Z':         \
201         case '_':
202
203 #define DIGITS        \
204         case '0':         \
205         case '1':         \
206         case '2':         \
207         case '3':         \
208         case '4':         \
209         case '5':         \
210         case '6':         \
211         case '7':         \
212         case '8':         \
213         case '9':
214
215 static void parse_symbol(void)
216 {
217         symbol_t *symbol;
218         char     *string;
219
220         obstack_1grow(&symbol_obstack, c);
221         next_char();
222
223         while(1) {
224                 switch(c) {
225                 DIGITS
226                 SYMBOL_CHARS
227                         obstack_1grow(&symbol_obstack, c);
228                         next_char();
229                         break;
230
231                 default:
232                         goto end_symbol;
233                 }
234         }
235
236 end_symbol:
237         obstack_1grow(&symbol_obstack, '\0');
238
239         string = obstack_finish(&symbol_obstack);
240         symbol = symbol_table_insert(string);
241
242         lexer_token.type     = symbol->ID;
243         lexer_token.v.symbol = symbol;
244
245         if(symbol->string != string) {
246                 obstack_free(&symbol_obstack, string);
247         }
248 }
249
250 static void parse_integer_suffix(void)
251 {
252         if(c == 'U' || c == 'U') {
253                 /* TODO do something with the suffixes... */
254                 next_char();
255                 if(c == 'L' || c == 'l') {
256                         next_char();
257                         if(c == 'L' || c == 'l') {
258                                 next_char();
259                         }
260                 }
261         } else if(c == 'l' || c == 'L') {
262                 next_char();
263                 if(c == 'l' || c == 'L') {
264                         next_char();
265                         if(c == 'u' || c == 'U') {
266                                 next_char();
267                         }
268                 } else if(c == 'u' || c == 'U') {
269                         next_char();
270                 }
271         }
272 }
273
274 static void parse_number_hex(void)
275 {
276         assert(c == 'x' || c == 'X');
277         next_char();
278
279         if (!isdigit(c) &&
280                 !('A' <= c && c <= 'F') &&
281                 !('a' <= c && c <= 'f')) {
282                 parse_error("premature end of hex number literal");
283                 lexer_token.type = T_ERROR;
284                 return;
285         }
286
287         int value = 0;
288         while(1) {
289                 if (isdigit(c)) {
290                         value = 16 * value + c - '0';
291                 } else if ('A' <= c && c <= 'F') {
292                         value = 16 * value + c - 'A' + 10;
293                 } else if ('a' <= c && c <= 'f') {
294                         value = 16 * value + c - 'a' + 10;
295                 } else {
296                         parse_integer_suffix();
297
298                         lexer_token.type       = T_INTEGER;
299                         lexer_token.v.intvalue = value;
300                         return;
301                 }
302                 next_char();
303         }
304
305         if(c == '.' || c == 'p' || c == 'P') {
306                 next_char();
307                 panic("Hex floating point numbers not implemented yet");
308         }
309 }
310
311 static void parse_number_oct(void)
312 {
313         int value = 0;
314         while(c >= '0' && c <= '7') {
315                 value = 8 * value + c - '0';
316                 next_char();
317         }
318         if (c == '8' || c == '9') {
319                 parse_error("invalid octal number");
320                 lexer_token.type = T_ERROR;
321                 return;
322         }
323
324         lexer_token.type       = T_INTEGER;
325         lexer_token.v.intvalue = value;
326
327         parse_integer_suffix();
328 }
329
330 static void parse_floatingpoint_exponent(long double value)
331 {
332         unsigned int expo = 0;
333         long double  factor = 10.;
334
335         if(c == '-') {
336                 next_char();
337                 factor = 0.1;
338         } else if(c == '+') {
339                 next_char();
340         }
341
342         while(c >= '0' && c <= '9') {
343                 expo = 10 * expo + (c - '0');
344                 next_char();
345         }
346
347         while(1) {
348                 if(expo & 1)
349                         value *= factor;
350                 expo >>= 1;
351                 if(expo == 0)
352                         break;
353                 factor *= factor;
354         }
355
356         lexer_token.type         = T_FLOATINGPOINT;
357         lexer_token.v.floatvalue = value;
358 }
359
360 static void parse_floatingpoint_fract(int integer_part)
361 {
362         long double value  = integer_part;
363         long double factor = 1.;
364
365         while(c >= '0' && c <= '9') {
366                 factor *= 0.1;
367                 value  += (c - '0') * factor;
368                 next_char();
369         }
370
371         if(c == 'e' || c == 'E') {
372                 next_char();
373                 parse_floatingpoint_exponent(value);
374                 return;
375         }
376
377         lexer_token.type         = T_FLOATINGPOINT;
378         lexer_token.v.floatvalue = value;
379 }
380
381 static void parse_number_dec(void)
382 {
383         int value = 0;
384
385         while(isdigit(c)) {
386                 value = 10 * value + c - '0';
387                 next_char();
388         }
389
390         if(c == '.') {
391                 next_char();
392                 parse_floatingpoint_fract(value);
393                 return;
394         }
395         if(c == 'e' || c == 'E') {
396                 next_char();
397                 parse_floatingpoint_exponent(value);
398                 return;
399         }
400         parse_integer_suffix();
401
402         lexer_token.type       = T_INTEGER;
403         lexer_token.v.intvalue = value;
404 }
405
406 static void parse_number(void)
407 {
408         if (c == '0') {
409                 next_char();
410                 switch (c) {
411                         case 'X':
412                         case 'x':
413                                 parse_number_hex();
414                                 break;
415                         case '0':
416                         case '1':
417                         case '2':
418                         case '3':
419                         case '4':
420                         case '5':
421                         case '6':
422                         case '7':
423                                 parse_number_oct();
424                                 break;
425                         case '.':
426                                 next_char();
427                                 parse_floatingpoint_fract(0);
428                                 break;
429                         case 'e':
430                         case 'E':
431                                 parse_floatingpoint_exponent(0);
432                                 break;
433                         case '8':
434                         case '9':
435                                 next_char();
436                                 parse_error("invalid octal number");
437                                 lexer_token.type = T_ERROR;
438                                 return;
439                         default:
440                                 put_back(c);
441                                 c = '0';
442                                 parse_number_dec();
443                                 return;
444                 }
445         } else {
446                 parse_number_dec();
447         }
448 }
449
450 static int parse_octal_sequence(void)
451 {
452         int value = 0;
453         while(1) {
454                 if(c < '0' || c > '7')
455                         break;
456                 value = 8 * value + c - '0';
457                 next_char();
458         }
459
460         return value;
461 }
462
463 static int parse_hex_sequence(void)
464 {
465         int value = 0;
466         while(1) {
467                 if (c >= '0' && c <= '9') {
468                         value = 16 * value + c - '0';
469                 } else if ('A' <= c && c <= 'F') {
470                         value = 16 * value + c - 'A' + 10;
471                 } else if ('a' <= c && c <= 'f') {
472                         value = 16 * value + c - 'a' + 10;
473                 } else {
474                         break;
475                 }
476                 next_char();
477         }
478
479         return value;
480 }
481
482 static int parse_escape_sequence(void)
483 {
484         while(1) {
485                 int ec = c;
486                 next_char();
487
488                 switch(ec) {
489                 case '"':  return '"';
490                 case '\'': return'\'';
491                 case '\\': return '\\';
492                 case '?': return '\?';
493                 case 'a': return '\a';
494                 case 'b': return '\b';
495                 case 'f': return '\f';
496                 case 'n': return '\n';
497                 case 'r': return '\r';
498                 case 't': return '\t';
499                 case 'v': return '\v';
500                 case 'x':
501                         return parse_hex_sequence();
502                 case '0':
503                 case '1':
504                 case '2':
505                 case '3':
506                 case '4':
507                 case '5':
508                 case '6':
509                 case '7':
510                         return parse_octal_sequence();
511                 case EOF:
512                         parse_error("reached end of file while parsing escape sequence");
513                         return EOF;
514                 default:
515                         parse_error("unknown escape sequence");
516                         return EOF;
517                 }
518         }
519 }
520
521 const char *concat_strings(const char *s1, const char *s2)
522 {
523         size_t  len1   = strlen(s1);
524         size_t  len2   = strlen(s2);
525
526         char   *concat = obstack_alloc(&symbol_obstack, len1 + len2 + 1);
527         memcpy(concat, s1, len1);
528         memcpy(concat + len1, s2, len2 + 1);
529
530         const char *result = strset_insert(&stringset, concat);
531         if(result != concat) {
532                 obstack_free(&symbol_obstack, concat);
533         }
534
535         return result;
536 }
537
538 static void parse_string_literal(void)
539 {
540         unsigned    start_linenr = lexer_token.source_position.linenr;
541         char       *string;
542         const char *result;
543
544         assert(c == '"');
545         next_char();
546
547         while(1) {
548                 switch(c) {
549                 case '\\':
550                         next_char();
551                         int ec = parse_escape_sequence();
552                         obstack_1grow(&symbol_obstack, ec);
553                         break;
554
555                 case EOF:
556                         error_prefix_at(lexer_token.source_position.input_name,
557                                         start_linenr);
558                         fprintf(stderr, "string has no end\n");
559                         lexer_token.type = T_ERROR;
560                         return;
561
562                 case '"':
563                         next_char();
564                         goto end_of_string;
565
566                 default:
567                         obstack_1grow(&symbol_obstack, c);
568                         next_char();
569                         break;
570                 }
571         }
572
573 end_of_string:
574
575         /* TODO: concatenate multiple strings separated by whitespace... */
576
577         /* add finishing 0 to the string */
578         obstack_1grow(&symbol_obstack, '\0');
579         string = obstack_finish(&symbol_obstack);
580
581         /* check if there is already a copy of the string */
582         result = strset_insert(&stringset, string);
583         if(result != string) {
584                 obstack_free(&symbol_obstack, string);
585         }
586
587         lexer_token.type     = T_STRING_LITERAL;
588         lexer_token.v.string = result;
589 }
590
591 static void parse_character_constant(void)
592 {
593         assert(c == '\'');
594         next_char();
595
596         int found_char = 0;
597         while(1) {
598                 switch(c) {
599                 case '\\':
600                         next_char();
601                         found_char = parse_escape_sequence();
602                         break;
603
604                 MATCH_NEWLINE(
605                         parse_error("newline while parsing character constant");
606                         break;
607                 )
608
609                 case '\'':
610                         next_char();
611                         goto end_of_char_constant;
612
613                 case EOF:
614                         parse_error("EOF while parsing character constant");
615                         lexer_token.type = T_ERROR;
616                         return;
617
618                 default:
619                         if(found_char != 0) {
620                                 parse_error("more than 1 characters in character "
621                                             "constant");
622                                 goto end_of_char_constant;
623                         } else {
624                                 found_char = c;
625                                 next_char();
626                         }
627                         break;
628                 }
629         }
630
631 end_of_char_constant:
632         lexer_token.type       = T_INTEGER;
633         lexer_token.v.intvalue = found_char;
634 }
635
636 static void skip_multiline_comment(void)
637 {
638         unsigned start_linenr = lexer_token.source_position.linenr;
639
640         while(1) {
641                 switch(c) {
642                 case '*':
643                         next_char();
644                         if(c == '/') {
645                                 next_char();
646                                 return;
647                         }
648                         break;
649
650                 MATCH_NEWLINE(break;)
651
652                 case EOF:
653                         error_prefix_at(lexer_token.source_position.input_name,
654                                         start_linenr);
655                         fprintf(stderr, "at end of file while looking for comment end\n");
656                         return;
657
658                 default:
659                         next_char();
660                         break;
661                 }
662         }
663 }
664
665 static void skip_line_comment(void)
666 {
667         while(1) {
668                 switch(c) {
669                 case EOF:
670                         return;
671
672                 case '\n':
673                 case '\r':
674                         return;
675
676                 default:
677                         next_char();
678                         break;
679                 }
680         }
681 }
682
683 static token_t pp_token;
684
685 static inline void next_pp_token(void)
686 {
687         lexer_next_preprocessing_token();
688         pp_token = lexer_token;
689 }
690
691 static void eat_until_newline(void)
692 {
693         while(pp_token.type != '\n' && pp_token.type != T_EOF) {
694                 next_pp_token();
695         }
696 }
697
698 static void error_directive(void)
699 {
700         error_prefix();
701         fprintf(stderr, "#error directive: \n");
702
703         /* parse pp-tokens until new-line */
704 }
705
706 static void define_directive(void)
707 {
708         lexer_next_preprocessing_token();
709         if(lexer_token.type != T_IDENTIFIER) {
710                 parse_error("expected identifier after #define\n");
711                 eat_until_newline();
712         }
713 }
714
715 static void ifdef_directive(int is_ifndef)
716 {
717         (void) is_ifndef;
718         lexer_next_preprocessing_token();
719         //expect_identifier();
720         //extect_newline();
721 }
722
723 static void endif_directive(void)
724 {
725         //expect_newline();
726 }
727
728 static void parse_line_directive(void)
729 {
730         if(pp_token.type != T_INTEGER) {
731                 parse_error("expected integer");
732         } else {
733                 lexer_token.source_position.linenr = pp_token.v.intvalue - 1;
734                 next_pp_token();
735         }
736         if(pp_token.type == T_STRING_LITERAL) {
737                 lexer_token.source_position.input_name = pp_token.v.string;
738                 next_pp_token();
739         }
740
741         eat_until_newline();
742 }
743
744 static void parse_preprocessor_identifier(void)
745 {
746         assert(pp_token.type == T_IDENTIFIER);
747         symbol_t *symbol = pp_token.v.symbol;
748
749         switch(symbol->pp_ID) {
750         case TP_include:
751                 printf("include - enable header name parsing!\n");
752                 break;
753         case TP_define:
754                 define_directive();
755                 break;
756         case TP_ifdef:
757                 ifdef_directive(0);
758                 break;
759         case TP_ifndef:
760                 ifdef_directive(1);
761                 break;
762         case TP_endif:
763                 endif_directive();
764                 break;
765         case TP_line:
766                 next_pp_token();
767                 parse_line_directive();
768                 break;
769         case TP_if:
770         case TP_else:
771         case TP_elif:
772         case TP_undef:
773         case TP_error:
774                 error_directive();
775                 break;
776         case TP_pragma:
777                 break;
778         }
779 }
780
781 static void parse_preprocessor_directive()
782 {
783         next_pp_token();
784
785         switch(pp_token.type) {
786         case T_IDENTIFIER:
787                 parse_preprocessor_identifier();
788                 break;
789         case T_INTEGER:
790                 parse_line_directive();
791                 break;
792         default:
793                 parse_error("invalid preprocessor directive");
794                 eat_until_newline();
795                 break;
796         }
797 }
798
799 #define MAYBE_PROLOG                                       \
800                         next_char();                                   \
801                         while(1) {                                     \
802                                 switch(c) {
803
804 #define MAYBE(ch, set_type)                                \
805                                 case ch:                                   \
806                                         next_char();                           \
807                                         lexer_token.type = set_type;           \
808                                         return;
809
810 #define ELSE_CODE(code)                                    \
811                                 default:                                   \
812                                         code;                                  \
813                                 }                                          \
814                         } /* end of while(1) */                        \
815                         break;
816
817 #define ELSE(set_type)                                     \
818                 ELSE_CODE(                                         \
819                         lexer_token.type = set_type;                   \
820                         return;                                        \
821                 )
822
823 void lexer_next_preprocessing_token(void)
824 {
825         while(1) {
826                 switch(c) {
827                 case ' ':
828                 case '\t':
829                         next_char();
830                         break;
831
832                 MATCH_NEWLINE(
833                         lexer_token.type = '\n';
834                         return;
835                 )
836
837                 SYMBOL_CHARS
838                         parse_symbol();
839                         return;
840
841                 DIGITS
842                         parse_number();
843                         return;
844
845                 case '"':
846                         parse_string_literal();
847                         return;
848
849                 case '\'':
850                         parse_character_constant();
851                         return;
852
853                 case '.':
854                         MAYBE_PROLOG
855                                 case '.':
856                                         MAYBE_PROLOG
857                                         MAYBE('.', T_DOTDOTDOT)
858                                         ELSE_CODE(
859                                                 put_back(c);
860                                                 c = '.';
861                                                 lexer_token.type = '.';
862                                                 return;
863                                         )
864                         ELSE('.')
865                 case '&':
866                         MAYBE_PROLOG
867                         MAYBE('&', T_ANDAND)
868                         MAYBE('=', T_ANDEQUAL)
869                         ELSE('&')
870                 case '*':
871                         MAYBE_PROLOG
872                         MAYBE('=', T_ASTERISKEQUAL)
873                         ELSE('*')
874                 case '+':
875                         MAYBE_PROLOG
876                         MAYBE('+', T_PLUSPLUS)
877                         MAYBE('=', T_PLUSEQUAL)
878                         ELSE('+')
879                 case '-':
880                         MAYBE_PROLOG
881                         MAYBE('>', T_MINUSGREATER)
882                         MAYBE('-', T_MINUSMINUS)
883                         MAYBE('=', T_MINUSEQUAL)
884                         ELSE('-')
885                 case '!':
886                         MAYBE_PROLOG
887                         MAYBE('=', T_EXCLAMATIONMARKEQUAL)
888                         ELSE('!')
889                 case '/':
890                         MAYBE_PROLOG
891                         MAYBE('=', T_SLASHEQUAL)
892                                 case '*':
893                                         next_char();
894                                         skip_multiline_comment();
895                                         lexer_next_preprocessing_token();
896                                         return;
897                                 case '/':
898                                         next_char();
899                                         skip_line_comment();
900                                         lexer_next_preprocessing_token();
901                                         return;
902                         ELSE('/')
903                 case '%':
904                         MAYBE_PROLOG
905                         MAYBE('>', T_PERCENTGREATER)
906                         MAYBE('=', T_PERCENTEQUAL)
907                                 case ':':
908                                         MAYBE_PROLOG
909                                                 case '%':
910                                                         MAYBE_PROLOG
911                                                         MAYBE(':', T_PERCENTCOLONPERCENTCOLON)
912                                                         ELSE_CODE(
913                                                                 put_back(c);
914                                                                 c = '%';
915                                                                 lexer_token.type = T_PERCENTCOLON;
916                                                                 return;
917                                                         )
918                                         ELSE(T_PERCENTCOLON)
919                         ELSE('%')
920                 case '<':
921                         MAYBE_PROLOG
922                         MAYBE(':', T_LESSCOLON)
923                         MAYBE('%', T_LESSPERCENT)
924                         MAYBE('=', T_LESSEQUAL)
925                                 case '<':
926                                         MAYBE_PROLOG
927                                         MAYBE('=', T_LESSLESSEQUAL)
928                                         ELSE(T_LESSLESS)
929                         ELSE('<')
930                 case '>':
931                         MAYBE_PROLOG
932                         MAYBE('=', T_GREATEREQUAL)
933                                 case '>':
934                                         MAYBE_PROLOG
935                                         MAYBE('=', T_GREATERGREATEREQUAL)
936                                         ELSE(T_GREATERGREATER)
937                         ELSE('>')
938                 case '^':
939                         MAYBE_PROLOG
940                         MAYBE('=', T_CARETEQUAL)
941                         ELSE('^')
942                 case '|':
943                         MAYBE_PROLOG
944                         MAYBE('=', T_PIPEEQUAL)
945                         MAYBE('|', T_PIPEPIPE)
946                         ELSE('|')
947                 case ':':
948                         MAYBE_PROLOG
949                         MAYBE('>', T_COLONGREATER)
950                         ELSE(':')
951                 case '=':
952                         MAYBE_PROLOG
953                         MAYBE('=', T_EQUALEQUAL)
954                         ELSE('=')
955                 case '#':
956                         MAYBE_PROLOG
957                         MAYBE('#', T_HASHHASH)
958                         ELSE('#')
959
960                 case '?':
961                 case '[':
962                 case ']':
963                 case '(':
964                 case ')':
965                 case '{':
966                 case '}':
967                 case '~':
968                 case ';':
969                 case ',':
970                 case '\\':
971                         lexer_token.type = c;
972                         next_char();
973                         return;
974
975                 case EOF:
976                         lexer_token.type = T_EOF;
977                         return;
978
979                 default:
980                         next_char();
981                         error_prefix();
982                         fprintf(stderr, "unknown character '%c' found\n", c);
983                         lexer_token.type = T_ERROR;
984                         return;
985                 }
986         }
987 }
988
989 void lexer_next_token(void)
990 {
991         lexer_next_preprocessing_token();
992         if(lexer_token.type != '\n')
993                 return;
994
995 newline_found:
996         do {
997                 lexer_next_preprocessing_token();
998         } while(lexer_token.type == '\n');
999
1000         if(lexer_token.type == '#') {
1001                 parse_preprocessor_directive();
1002                 goto newline_found;
1003         }
1004 }
1005
1006 void init_lexer(void)
1007 {
1008         strset_init(&stringset);
1009 }
1010
1011 void lexer_open_stream(FILE *stream, const char *input_name)
1012 {
1013         input                                  = stream;
1014         lexer_token.source_position.linenr     = 1;
1015         lexer_token.source_position.input_name = input_name;
1016
1017         next_char();
1018 }
1019
1020 void exit_lexer(void)
1021 {
1022         strset_destroy(&stringset);
1023 }
1024
1025 static __attribute__((unused))
1026 void dbg_pos(const source_position_t source_position)
1027 {
1028         fprintf(stdout, "%s:%d\n", source_position.input_name,
1029                 source_position.linenr);
1030         fflush(stdout);
1031 }