Test multiline commits
[cparser] / lexer.c
1 #include <config.h>
2
3 #include "lexer.h"
4 #include "token_t.h"
5 #include "symbol_table_t.h"
6 #include "adt/error.h"
7 #include "adt/strset.h"
8 #include "adt/util.h"
9
10 #include <assert.h>
11 #include <errno.h>
12 #include <string.h>
13 #include <ctype.h>
14
15 #define DEBUG_CHARS
16 #define MAX_PUTBACK 3
17
18 static int         c;
19 token_t            lexer_token;
20 static FILE       *input;
21 static char        buf[1024 + MAX_PUTBACK];
22 static const char *bufend;
23 static const char *bufpos;
24 static strset_t    stringset;
25
26 static void error_prefix_at(const char *input_name, unsigned linenr)
27 {
28         fprintf(stderr, "%s:%u: Error: ", input_name, linenr);
29 }
30
31 static void error_prefix(void)
32 {
33         error_prefix_at(lexer_token.source_position.input_name,
34                         lexer_token.source_position.linenr);
35 }
36
37 static void parse_error(const char *msg)
38 {
39         error_prefix();
40         fprintf(stderr, "%s\n", msg);
41 }
42
43 static inline void next_real_char(void)
44 {
45         bufpos++;
46         if(bufpos >= bufend) {
47                 size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
48                                  input);
49                 if(s == 0) {
50                         c = EOF;
51                         return;
52                 }
53                 bufpos = buf + MAX_PUTBACK;
54                 bufend = buf + MAX_PUTBACK + s;
55         }
56         c = *(bufpos);
57 }
58
59 static inline void put_back(int pc)
60 {
61         char *p = (char*) bufpos - 1;
62         bufpos--;
63         assert(p >= buf);
64         *p = pc;
65
66 #ifdef DEBUG_CHARS
67         printf("putback '%c'\n", pc);
68 #endif
69 }
70
71 static inline void next_char(void);
72
73 #define MATCH_NEWLINE(code)                   \
74         case '\r':                                \
75                 next_char();                          \
76                 if(c == '\n') {                       \
77                         next_char();                      \
78                 }                                     \
79                 lexer_token.source_position.linenr++; \
80                 code;                                 \
81         case '\n':                                \
82                 next_char();                          \
83                 lexer_token.source_position.linenr++; \
84                 code;
85
86 static inline void eat(char c_type)
87 {
88         assert(c == c_type);
89         next_char();
90 }
91
92 static void maybe_concat_lines(void)
93 {
94         eat('\\');
95
96         switch(c) {
97         MATCH_NEWLINE(return;)
98
99         default:
100                 break;
101         }
102
103         put_back(c);
104         c = '\\';
105 }
106
107 static inline void next_char(void)
108 {
109         next_real_char();
110
111         /* filter trigraphs */
112         if(UNLIKELY(c == '\\')) {
113                 maybe_concat_lines();
114                 goto end_of_next_char;
115         }
116
117         if(LIKELY(c != '?'))
118                 goto end_of_next_char;
119
120         next_real_char();
121         if(LIKELY(c != '?')) {
122                 put_back(c);
123                 c = '?';
124                 goto end_of_next_char;
125         }
126
127         next_real_char();
128         switch(c) {
129         case '=': c = '#'; break;
130         case '(': c = '['; break;
131         case '/': c = '\\'; maybe_concat_lines(); break;
132         case ')': c = ']'; break;
133         case '\'': c = '^'; break;
134         case '<': c = '{'; break;
135         case '!': c = '|'; break;
136         case '>': c = '}'; break;
137         case '-': c = '~'; break;
138         default:
139                 put_back('?');
140                 put_back(c);
141                 c = '?';
142                 break;
143         }
144
145 end_of_next_char:
146 #ifdef DEBUG_CHARS
147         printf("nchar '%c'\n", c);
148 #else
149         ;
150 #endif
151 }
152
153 #define SYMBOL_CHARS  \
154         case 'a':         \
155         case 'b':         \
156         case 'c':         \
157         case 'd':         \
158         case 'e':         \
159         case 'f':         \
160         case 'g':         \
161         case 'h':         \
162         case 'i':         \
163         case 'j':         \
164         case 'k':         \
165         case 'l':         \
166         case 'm':         \
167         case 'n':         \
168         case 'o':         \
169         case 'p':         \
170         case 'q':         \
171         case 'r':         \
172         case 's':         \
173         case 't':         \
174         case 'u':         \
175         case 'v':         \
176         case 'w':         \
177         case 'x':         \
178         case 'y':         \
179         case 'z':         \
180         case 'A':         \
181         case 'B':         \
182         case 'C':         \
183         case 'D':         \
184         case 'E':         \
185         case 'F':         \
186         case 'G':         \
187         case 'H':         \
188         case 'I':         \
189         case 'J':         \
190         case 'K':         \
191         case 'L':         \
192         case 'M':         \
193         case 'N':         \
194         case 'O':         \
195         case 'P':         \
196         case 'Q':         \
197         case 'R':         \
198         case 'S':         \
199         case 'T':         \
200         case 'U':         \
201         case 'V':         \
202         case 'W':         \
203         case 'X':         \
204         case 'Y':         \
205         case 'Z':         \
206         case '_':
207
208 #define DIGITS        \
209         case '0':         \
210         case '1':         \
211         case '2':         \
212         case '3':         \
213         case '4':         \
214         case '5':         \
215         case '6':         \
216         case '7':         \
217         case '8':         \
218         case '9':
219
220 static void parse_symbol(void)
221 {
222         symbol_t *symbol;
223         char     *string;
224
225         obstack_1grow(&symbol_obstack, c);
226         next_char();
227
228         while(1) {
229                 switch(c) {
230                 DIGITS
231                 SYMBOL_CHARS
232                         obstack_1grow(&symbol_obstack, c);
233                         next_char();
234                         break;
235
236                 default:
237                         goto end_symbol;
238                 }
239         }
240
241 end_symbol:
242         obstack_1grow(&symbol_obstack, '\0');
243
244         string = obstack_finish(&symbol_obstack);
245         symbol = symbol_table_insert(string);
246
247         lexer_token.type     = symbol->ID;
248         lexer_token.v.symbol = symbol;
249
250         if(symbol->string != string) {
251                 obstack_free(&symbol_obstack, string);
252         }
253 }
254
255 static void parse_integer_suffix(void)
256 {
257         if(c == 'U' || c == 'U') {
258                 /* TODO do something with the suffixes... */
259                 next_char();
260                 if(c == 'L' || c == 'l') {
261                         next_char();
262                         if(c == 'L' || c == 'l') {
263                                 next_char();
264                         }
265                 }
266         } else if(c == 'l' || c == 'L') {
267                 next_char();
268                 if(c == 'l' || c == 'L') {
269                         next_char();
270                         if(c == 'u' || c == 'U') {
271                                 next_char();
272                         }
273                 } else if(c == 'u' || c == 'U') {
274                         next_char();
275                 }
276         }
277 }
278
279 static void parse_number_hex(void)
280 {
281         assert(c == 'x' || c == 'X');
282         next_char();
283
284         if (!isdigit(c) &&
285                 !('A' <= c && c <= 'F') &&
286                 !('a' <= c && c <= 'f')) {
287                 parse_error("premature end of hex number literal");
288                 lexer_token.type = T_ERROR;
289                 return;
290         }
291
292         int value = 0;
293         while(1) {
294                 if (isdigit(c)) {
295                         value = 16 * value + c - '0';
296                 } else if ('A' <= c && c <= 'F') {
297                         value = 16 * value + c - 'A' + 10;
298                 } else if ('a' <= c && c <= 'f') {
299                         value = 16 * value + c - 'a' + 10;
300                 } else {
301                         parse_integer_suffix();
302
303                         lexer_token.type       = T_INTEGER;
304                         lexer_token.v.intvalue = value;
305                         return;
306                 }
307                 next_char();
308         }
309
310         if(c == '.' || c == 'p' || c == 'P') {
311                 next_char();
312                 panic("Hex floating point numbers not implemented yet");
313         }
314 }
315
316 static void parse_number_oct(void)
317 {
318         int value = 0;
319         while(c >= '0' && c <= '7') {
320                 value = 8 * value + c - '0';
321                 next_char();
322         }
323         if (c == '8' || c == '9') {
324                 parse_error("invalid octal number");
325                 lexer_token.type = T_ERROR;
326                 return;
327         }
328
329         lexer_token.type       = T_INTEGER;
330         lexer_token.v.intvalue = value;
331
332         parse_integer_suffix();
333 }
334
335 static void parse_floatingpoint_exponent(long double value)
336 {
337         unsigned int expo = 0;
338         long double  factor = 10.;
339
340         if(c == '-') {
341                 next_char();
342                 factor = 0.1;
343         } else if(c == '+') {
344                 next_char();
345         }
346
347         while(c >= '0' && c <= '9') {
348                 expo = 10 * expo + (c - '0');
349                 next_char();
350         }
351
352         while(1) {
353                 if(expo & 1)
354                         value *= factor;
355                 expo >>= 1;
356                 if(expo == 0)
357                         break;
358                 factor *= factor;
359         }
360
361         lexer_token.type         = T_FLOATINGPOINT;
362         lexer_token.v.floatvalue = value;
363 }
364
365 static void parse_floatingpoint_fract(int integer_part)
366 {
367         long double value  = integer_part;
368         long double factor = 1.;
369
370         while(c >= '0' && c <= '9') {
371                 factor *= 0.1;
372                 value  += (c - '0') * factor;
373                 next_char();
374         }
375
376         if(c == 'e' || c == 'E') {
377                 next_char();
378                 parse_floatingpoint_exponent(value);
379                 return;
380         }
381
382         lexer_token.type         = T_FLOATINGPOINT;
383         lexer_token.v.floatvalue = value;
384 }
385
386 static void parse_number_dec(void)
387 {
388         int value = 0;
389
390         while(isdigit(c)) {
391                 value = 10 * value + c - '0';
392                 next_char();
393         }
394
395         if(c == '.') {
396                 next_char();
397                 parse_floatingpoint_fract(value);
398                 return;
399         }
400         if(c == 'e' || c == 'E') {
401                 next_char();
402                 parse_floatingpoint_exponent(value);
403                 return;
404         }
405         parse_integer_suffix();
406
407         lexer_token.type       = T_INTEGER;
408         lexer_token.v.intvalue = value;
409 }
410
411 static void parse_number(void)
412 {
413         if (c == '0') {
414                 next_char();
415                 switch (c) {
416                         case 'X':
417                         case 'x':
418                                 parse_number_hex();
419                                 break;
420                         case '0':
421                         case '1':
422                         case '2':
423                         case '3':
424                         case '4':
425                         case '5':
426                         case '6':
427                         case '7':
428                                 parse_number_oct();
429                                 break;
430                         case '.':
431                                 next_char();
432                                 parse_floatingpoint_fract(0);
433                                 break;
434                         case 'e':
435                         case 'E':
436                                 parse_floatingpoint_exponent(0);
437                                 break;
438                         case '8':
439                         case '9':
440                                 next_char();
441                                 parse_error("invalid octal number");
442                                 lexer_token.type = T_ERROR;
443                                 return;
444                         default:
445                                 put_back(c);
446                                 c = '0';
447                                 parse_number_dec();
448                                 return;
449                 }
450         } else {
451                 parse_number_dec();
452         }
453 }
454
455 static int parse_octal_sequence(void)
456 {
457         int value = 0;
458         while(1) {
459                 if(c < '0' || c > '7')
460                         break;
461                 value = 8 * value + c - '0';
462                 next_char();
463         }
464
465         return value;
466 }
467
468 static int parse_hex_sequence(void)
469 {
470         int value = 0;
471         while(1) {
472                 if (c >= '0' && c <= '9') {
473                         value = 16 * value + c - '0';
474                 } else if ('A' <= c && c <= 'F') {
475                         value = 16 * value + c - 'A' + 10;
476                 } else if ('a' <= c && c <= 'f') {
477                         value = 16 * value + c - 'a' + 10;
478                 } else {
479                         break;
480                 }
481                 next_char();
482         }
483
484         return value;
485 }
486
487 static int parse_escape_sequence(void)
488 {
489         eat('\\');
490
491         int ec = c;
492         next_char();
493
494         switch(ec) {
495         case '"':  return '"';
496         case '\'': return'\'';
497         case '\\': return '\\';
498         case '?': return '\?';
499         case 'a': return '\a';
500         case 'b': return '\b';
501         case 'f': return '\f';
502         case 'n': return '\n';
503         case 'r': return '\r';
504         case 't': return '\t';
505         case 'v': return '\v';
506         case 'x':
507                 return parse_hex_sequence();
508         case '0':
509         case '1':
510         case '2':
511         case '3':
512         case '4':
513         case '5':
514         case '6':
515         case '7':
516                 return parse_octal_sequence();
517         case EOF:
518                 parse_error("reached end of file while parsing escape sequence");
519                 return EOF;
520         default:
521                 parse_error("unknown escape sequence");
522                 return EOF;
523         }
524 }
525
526 const char *concat_strings(const char *s1, const char *s2)
527 {
528         size_t  len1   = strlen(s1);
529         size_t  len2   = strlen(s2);
530
531         char   *concat = obstack_alloc(&symbol_obstack, len1 + len2 + 1);
532         memcpy(concat, s1, len1);
533         memcpy(concat + len1, s2, len2 + 1);
534
535         const char *result = strset_insert(&stringset, concat);
536         if(result != concat) {
537                 obstack_free(&symbol_obstack, concat);
538         }
539
540         return result;
541 }
542
543 static void parse_string_literal(void)
544 {
545         unsigned    start_linenr = lexer_token.source_position.linenr;
546         char       *string;
547         const char *result;
548
549         assert(c == '"');
550         next_char();
551
552         int tc;
553         while(1) {
554                 switch(c) {
555                 case '\\':
556                         tc = parse_escape_sequence();
557                         obstack_1grow(&symbol_obstack, tc);
558                         break;
559
560                 case EOF:
561                         error_prefix_at(lexer_token.source_position.input_name,
562                                         start_linenr);
563                         fprintf(stderr, "string has no end\n");
564                         lexer_token.type = T_ERROR;
565                         return;
566
567                 case '"':
568                         next_char();
569                         goto end_of_string;
570
571                 default:
572                         obstack_1grow(&symbol_obstack, c);
573                         next_char();
574                         break;
575                 }
576         }
577
578 end_of_string:
579
580         /* TODO: concatenate multiple strings separated by whitespace... */
581
582         /* add finishing 0 to the string */
583         obstack_1grow(&symbol_obstack, '\0');
584         string = obstack_finish(&symbol_obstack);
585
586         /* check if there is already a copy of the string */
587         result = strset_insert(&stringset, string);
588         if(result != string) {
589                 obstack_free(&symbol_obstack, string);
590         }
591
592         lexer_token.type     = T_STRING_LITERAL;
593         lexer_token.v.string = result;
594 }
595
596 static void parse_character_constant(void)
597 {
598         eat('\'');
599
600         int found_char = 0;
601         while(1) {
602                 switch(c) {
603                 case '\\':
604                         found_char = parse_escape_sequence();
605                         break;
606
607                 MATCH_NEWLINE(
608                         parse_error("newline while parsing character constant");
609                         break;
610                 )
611
612                 case '\'':
613                         next_char();
614                         goto end_of_char_constant;
615
616                 case EOF:
617                         parse_error("EOF while parsing character constant");
618                         lexer_token.type = T_ERROR;
619                         return;
620
621                 default:
622                         if(found_char != 0) {
623                                 parse_error("more than 1 characters in character "
624                                             "constant");
625                                 goto end_of_char_constant;
626                         } else {
627                                 found_char = c;
628                                 next_char();
629                         }
630                         break;
631                 }
632         }
633
634 end_of_char_constant:
635         lexer_token.type       = T_INTEGER;
636         lexer_token.v.intvalue = found_char;
637 }
638
639 static void skip_multiline_comment(void)
640 {
641         unsigned start_linenr = lexer_token.source_position.linenr;
642
643         while(1) {
644                 switch(c) {
645                 case '*':
646                         next_char();
647                         if(c == '/') {
648                                 next_char();
649                                 return;
650                         }
651                         break;
652
653                 MATCH_NEWLINE(break;)
654
655                 case EOF:
656                         error_prefix_at(lexer_token.source_position.input_name,
657                                         start_linenr);
658                         fprintf(stderr, "at end of file while looking for comment end\n");
659                         return;
660
661                 default:
662                         next_char();
663                         break;
664                 }
665         }
666 }
667
668 static void skip_line_comment(void)
669 {
670         while(1) {
671                 switch(c) {
672                 case EOF:
673                         return;
674
675                 case '\n':
676                 case '\r':
677                         return;
678
679                 default:
680                         next_char();
681                         break;
682                 }
683         }
684 }
685
686 static token_t pp_token;
687
688 static inline void next_pp_token(void)
689 {
690         lexer_next_preprocessing_token();
691         pp_token = lexer_token;
692 }
693
694 static void eat_until_newline(void)
695 {
696         while(pp_token.type != '\n' && pp_token.type != T_EOF) {
697                 next_pp_token();
698         }
699 }
700
701 static void error_directive(void)
702 {
703         error_prefix();
704         fprintf(stderr, "#error directive: \n");
705
706         /* parse pp-tokens until new-line */
707 }
708
709 static void define_directive(void)
710 {
711         lexer_next_preprocessing_token();
712         if(lexer_token.type != T_IDENTIFIER) {
713                 parse_error("expected identifier after #define\n");
714                 eat_until_newline();
715         }
716 }
717
718 static void ifdef_directive(int is_ifndef)
719 {
720         (void) is_ifndef;
721         lexer_next_preprocessing_token();
722         //expect_identifier();
723         //extect_newline();
724 }
725
726 static void endif_directive(void)
727 {
728         //expect_newline();
729 }
730
731 static void parse_line_directive(void)
732 {
733         if(pp_token.type != T_INTEGER) {
734                 parse_error("expected integer");
735         } else {
736                 lexer_token.source_position.linenr = pp_token.v.intvalue - 1;
737                 next_pp_token();
738         }
739         if(pp_token.type == T_STRING_LITERAL) {
740                 lexer_token.source_position.input_name = pp_token.v.string;
741                 next_pp_token();
742         }
743
744         eat_until_newline();
745 }
746
747 static void parse_preprocessor_identifier(void)
748 {
749         assert(pp_token.type == T_IDENTIFIER);
750         symbol_t *symbol = pp_token.v.symbol;
751
752         switch(symbol->pp_ID) {
753         case TP_include:
754                 printf("include - enable header name parsing!\n");
755                 break;
756         case TP_define:
757                 define_directive();
758                 break;
759         case TP_ifdef:
760                 ifdef_directive(0);
761                 break;
762         case TP_ifndef:
763                 ifdef_directive(1);
764                 break;
765         case TP_endif:
766                 endif_directive();
767                 break;
768         case TP_line:
769                 next_pp_token();
770                 parse_line_directive();
771                 break;
772         case TP_if:
773         case TP_else:
774         case TP_elif:
775         case TP_undef:
776         case TP_error:
777                 error_directive();
778                 break;
779         case TP_pragma:
780                 break;
781         }
782 }
783
784 static void parse_preprocessor_directive()
785 {
786         next_pp_token();
787
788         switch(pp_token.type) {
789         case T_IDENTIFIER:
790                 parse_preprocessor_identifier();
791                 break;
792         case T_INTEGER:
793                 parse_line_directive();
794                 break;
795         default:
796                 parse_error("invalid preprocessor directive");
797                 eat_until_newline();
798                 break;
799         }
800 }
801
802 #define MAYBE_PROLOG                                       \
803                         next_char();                                   \
804                         while(1) {                                     \
805                                 switch(c) {
806
807 #define MAYBE(ch, set_type)                                \
808                                 case ch:                                   \
809                                         next_char();                           \
810                                         lexer_token.type = set_type;           \
811                                         return;
812
813 #define ELSE_CODE(code)                                    \
814                                 default:                                   \
815                                         code;                                  \
816                                 }                                          \
817                         } /* end of while(1) */                        \
818                         break;
819
820 #define ELSE(set_type)                                     \
821                 ELSE_CODE(                                         \
822                         lexer_token.type = set_type;                   \
823                         return;                                        \
824                 )
825
826 void lexer_next_preprocessing_token(void)
827 {
828         while(1) {
829                 switch(c) {
830                 case ' ':
831                 case '\t':
832                         next_char();
833                         break;
834
835                 MATCH_NEWLINE(
836                         lexer_token.type = '\n';
837                         return;
838                 )
839
840                 SYMBOL_CHARS
841                         parse_symbol();
842                         return;
843
844                 DIGITS
845                         parse_number();
846                         return;
847
848                 case '"':
849                         parse_string_literal();
850                         return;
851
852                 case '\'':
853                         parse_character_constant();
854                         return;
855
856                 case '.':
857                         MAYBE_PROLOG
858                                 case '.':
859                                         MAYBE_PROLOG
860                                         MAYBE('.', T_DOTDOTDOT)
861                                         ELSE_CODE(
862                                                 put_back(c);
863                                                 c = '.';
864                                                 lexer_token.type = '.';
865                                                 return;
866                                         )
867                         ELSE('.')
868                 case '&':
869                         MAYBE_PROLOG
870                         MAYBE('&', T_ANDAND)
871                         MAYBE('=', T_ANDEQUAL)
872                         ELSE('&')
873                 case '*':
874                         MAYBE_PROLOG
875                         MAYBE('=', T_ASTERISKEQUAL)
876                         ELSE('*')
877                 case '+':
878                         MAYBE_PROLOG
879                         MAYBE('+', T_PLUSPLUS)
880                         MAYBE('=', T_PLUSEQUAL)
881                         ELSE('+')
882                 case '-':
883                         MAYBE_PROLOG
884                         MAYBE('>', T_MINUSGREATER)
885                         MAYBE('-', T_MINUSMINUS)
886                         MAYBE('=', T_MINUSEQUAL)
887                         ELSE('-')
888                 case '!':
889                         MAYBE_PROLOG
890                         MAYBE('=', T_EXCLAMATIONMARKEQUAL)
891                         ELSE('!')
892                 case '/':
893                         MAYBE_PROLOG
894                         MAYBE('=', T_SLASHEQUAL)
895                                 case '*':
896                                         next_char();
897                                         skip_multiline_comment();
898                                         lexer_next_preprocessing_token();
899                                         return;
900                                 case '/':
901                                         next_char();
902                                         skip_line_comment();
903                                         lexer_next_preprocessing_token();
904                                         return;
905                         ELSE('/')
906                 case '%':
907                         MAYBE_PROLOG
908                         MAYBE('>', T_PERCENTGREATER)
909                         MAYBE('=', T_PERCENTEQUAL)
910                                 case ':':
911                                         MAYBE_PROLOG
912                                                 case '%':
913                                                         MAYBE_PROLOG
914                                                         MAYBE(':', T_PERCENTCOLONPERCENTCOLON)
915                                                         ELSE_CODE(
916                                                                 put_back(c);
917                                                                 c = '%';
918                                                                 lexer_token.type = T_PERCENTCOLON;
919                                                                 return;
920                                                         )
921                                         ELSE(T_PERCENTCOLON)
922                         ELSE('%')
923                 case '<':
924                         MAYBE_PROLOG
925                         MAYBE(':', T_LESSCOLON)
926                         MAYBE('%', T_LESSPERCENT)
927                         MAYBE('=', T_LESSEQUAL)
928                                 case '<':
929                                         MAYBE_PROLOG
930                                         MAYBE('=', T_LESSLESSEQUAL)
931                                         ELSE(T_LESSLESS)
932                         ELSE('<')
933                 case '>':
934                         MAYBE_PROLOG
935                         MAYBE('=', T_GREATEREQUAL)
936                                 case '>':
937                                         MAYBE_PROLOG
938                                         MAYBE('=', T_GREATERGREATEREQUAL)
939                                         ELSE(T_GREATERGREATER)
940                         ELSE('>')
941                 case '^':
942                         MAYBE_PROLOG
943                         MAYBE('=', T_CARETEQUAL)
944                         ELSE('^')
945                 case '|':
946                         MAYBE_PROLOG
947                         MAYBE('=', T_PIPEEQUAL)
948                         MAYBE('|', T_PIPEPIPE)
949                         ELSE('|')
950                 case ':':
951                         MAYBE_PROLOG
952                         MAYBE('>', T_COLONGREATER)
953                         ELSE(':')
954                 case '=':
955                         MAYBE_PROLOG
956                         MAYBE('=', T_EQUALEQUAL)
957                         ELSE('=')
958                 case '#':
959                         MAYBE_PROLOG
960                         MAYBE('#', T_HASHHASH)
961                         ELSE('#')
962
963                 case '?':
964                 case '[':
965                 case ']':
966                 case '(':
967                 case ')':
968                 case '{':
969                 case '}':
970                 case '~':
971                 case ';':
972                 case ',':
973                 case '\\':
974                         lexer_token.type = c;
975                         next_char();
976                         return;
977
978                 case EOF:
979                         lexer_token.type = T_EOF;
980                         return;
981
982                 default:
983                         next_char();
984                         error_prefix();
985                         fprintf(stderr, "unknown character '%c' found\n", c);
986                         lexer_token.type = T_ERROR;
987                         return;
988                 }
989         }
990 }
991
992 void lexer_next_token(void)
993 {
994         lexer_next_preprocessing_token();
995         if(lexer_token.type != '\n')
996                 return;
997
998 newline_found:
999         do {
1000                 lexer_next_preprocessing_token();
1001         } while(lexer_token.type == '\n');
1002
1003         if(lexer_token.type == '#') {
1004                 parse_preprocessor_directive();
1005                 goto newline_found;
1006         }
1007 }
1008
1009 void init_lexer(void)
1010 {
1011         strset_init(&stringset);
1012 }
1013
1014 void lexer_open_stream(FILE *stream, const char *input_name)
1015 {
1016         input                                  = stream;
1017         lexer_token.source_position.linenr     = 1;
1018         lexer_token.source_position.input_name = input_name;
1019
1020         next_char();
1021 }
1022
1023 void exit_lexer(void)
1024 {
1025         strset_destroy(&stringset);
1026 }
1027
1028 static __attribute__((unused))
1029 void dbg_pos(const source_position_t source_position)
1030 {
1031         fprintf(stdout, "%s:%d\n", source_position.input_name,
1032                 source_position.linenr);
1033         fflush(stdout);
1034 }