make it compile
[cparser] / lexer.c
1 #include <config.h>
2
3 #include "lexer_t.h"
4 #include "token_t.h"
5 #include "symbol_table_t.h"
6 #include "adt/error.h"
7
8 #include <assert.h>
9 #include <errno.h>
10 #include <string.h>
11 #include <ctype.h>
12
13 //#define DEBUG_CHARS
14 #define MAX_PUTBACK 3
15
16 static int               c;
17 source_position_t source_position;
18 static FILE             *input;
19 static char              buf[1027];
20 static const char       *bufend;
21 static const char       *bufpos;
22 static strset_t          stringset;
23 //static FILE            **input_stack;
24 //static char            **buf_stack;
25
26 static
27 void error_prefix_at(const char *input_name, unsigned linenr)
28 {
29         fprintf(stderr, "%s:%d: Error: ", input_name, linenr);
30 }
31
32 static
33 void error_prefix()
34 {
35         error_prefix_at(source_position.input_name, source_position.linenr);
36 }
37
38 static
39 void parse_error(const char *msg)
40 {
41         error_prefix();
42         fprintf(stderr, "%s\n", msg);
43 }
44
45 static inline
46 void next_char()
47 {
48         bufpos++;
49         if(bufpos >= bufend) {
50                 size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
51                                  input);
52                 if(s == 0) {
53                         c = EOF;
54                         return;
55                 }
56                 bufpos = buf + MAX_PUTBACK;
57                 bufend = buf + MAX_PUTBACK + s;
58         }
59         c = *(bufpos);
60 #ifdef DEBUG_CHARS
61         printf("nchar '%c'\n", c);
62 #endif
63 }
64
65 static inline
66 void put_back(int pc)
67 {
68         char *p = (char*) bufpos - 1;
69         bufpos--;
70         assert(p >= buf);
71         *p = pc;
72
73 #ifdef DEBUG_CHARS
74         printf("putback '%c'\n", pc);
75 #endif
76 }
77
78
79 static
80 int replace_trigraph(void)
81 {
82 #define MATCH_TRIGRAPH(ch,replacement)           \
83         case ch:                                     \
84                 c = replacement;                         \
85                 return 1;
86
87         switch(c) {
88         MATCH_TRIGRAPH('=', '#')
89         MATCH_TRIGRAPH('(', '[')
90         MATCH_TRIGRAPH('/', '\\')
91         MATCH_TRIGRAPH(')', ']')
92         MATCH_TRIGRAPH('\'', '^')
93         MATCH_TRIGRAPH('<', '{')
94         MATCH_TRIGRAPH('!', '|')
95         MATCH_TRIGRAPH('>', '}')
96         MATCH_TRIGRAPH('-', '~')
97         default:
98                 break;
99         }
100
101         return 0;
102 }
103
104 #define SKIP_TRIGRAPHS(custom_putback, no_trigraph_code) \
105         case '?':                                  \
106                 next_char();                           \
107                 if(c != '?') {                         \
108                         custom_putback;                    \
109                         put_back(c);                       \
110                         c = '?';                           \
111                         no_trigraph_code;                  \
112                 }                                      \
113                 next_char();                           \
114                 if(replace_trigraph()) {               \
115                         break;                             \
116                 }                                      \
117                 custom_putback;                        \
118                 put_back('?');                         \
119                 put_back(c);                           \
120                 c = '?';                               \
121                 no_trigraph_code;
122
123 #define EAT_NEWLINE(newline_code)              \
124         if(c == '\r') {                            \
125                 next_char();                           \
126                 if(c == '\n')                          \
127                         next_char();                       \
128                 source_position.linenr++;              \
129                 newline_code;                          \
130         } else if(c == '\n') {                     \
131                 next_char();                           \
132                 source_position.linenr++;              \
133                 newline_code;                          \
134         }
135
136 #define SYMBOL_CHARS  \
137         case 'a':         \
138         case 'b':         \
139         case 'c':         \
140         case 'd':         \
141         case 'e':         \
142         case 'f':         \
143         case 'g':         \
144         case 'h':         \
145         case 'i':         \
146         case 'j':         \
147         case 'k':         \
148         case 'l':         \
149         case 'm':         \
150         case 'n':         \
151         case 'o':         \
152         case 'p':         \
153         case 'q':         \
154         case 'r':         \
155         case 's':         \
156         case 't':         \
157         case 'u':         \
158         case 'v':         \
159         case 'w':         \
160         case 'x':         \
161         case 'y':         \
162         case 'z':         \
163         case 'A':         \
164         case 'B':         \
165         case 'C':         \
166         case 'D':         \
167         case 'E':         \
168         case 'F':         \
169         case 'G':         \
170         case 'H':         \
171         case 'I':         \
172         case 'J':         \
173         case 'K':         \
174         case 'L':         \
175         case 'M':         \
176         case 'N':         \
177         case 'O':         \
178         case 'P':         \
179         case 'Q':         \
180         case 'R':         \
181         case 'S':         \
182         case 'T':         \
183         case 'U':         \
184         case 'V':         \
185         case 'W':         \
186         case 'X':         \
187         case 'Y':         \
188         case 'Z':         \
189         case '_':
190
191 static
192 void parse_symbol(token_t *token)
193 {
194         symbol_t *symbol;
195         char     *string;
196
197         obstack_1grow(&symbol_obstack, c);
198         next_char();
199
200         while(1) {
201                 switch(c) {
202                 case '\\':
203                         next_char();
204                         EAT_NEWLINE(break;)
205                         goto end_symbol;
206
207                 SYMBOL_CHARS
208                         obstack_1grow(&symbol_obstack, c);
209                         next_char();
210                         break;
211
212                 case '?':
213                         next_char();
214                         if(c != '?') {
215                                 put_back(c);
216                                 c = '?';
217                                 goto end_symbol;
218                         }
219                         next_char();
220                         if(replace_trigraph())
221                                 break;
222                         put_back('?');
223                         put_back(c);
224                         c = '?';
225                         goto end_symbol;
226
227                 default:
228                         goto end_symbol;
229                 }
230         }
231 end_symbol:
232         obstack_1grow(&symbol_obstack, '\0');
233
234         string = obstack_finish(&symbol_obstack);
235         symbol = symbol_table_insert(string);
236
237         token->type     = symbol->ID;
238         token->v.symbol = symbol;
239
240         if(symbol->string != string) {
241                 obstack_free(&symbol_obstack, string);
242         }
243 }
244
245 static
246 void parse_number_hex(token_t *token)
247 {
248         assert(c == 'x' || c == 'X');
249         next_char();
250
251         if (!isdigit(c) &&
252                 !('A' <= c && c <= 'F') &&
253                 !('a' <= c && c <= 'f')) {
254                 parse_error("premature end of hex number literal");
255                 token->type = T_ERROR;
256                 return;
257         }
258
259         int value = 0;
260         for(;;) {
261                 if (isdigit(c)) {
262                         value = 16 * value + c - '0';
263                 } else if ('A' <= c && c <= 'F') {
264                         value = 16 * value + c - 'A' + 10;
265                 } else if ('a' <= c && c <= 'f') {
266                         value = 16 * value + c - 'a' + 10;
267                 } else {
268                         token->type     = T_INTEGER;
269                         token->v.intvalue = value;
270                         return;
271                 }
272                 next_char();
273         }
274 }
275
276 static
277 void parse_number_oct(token_t *token)
278 {
279         assert(c == 'o' || c == 'O');
280         next_char();
281
282         int value = 0;
283         for(;;) {
284                 if ('0' <= c && c <= '7') {
285                         value = 8 * value + c - '0';
286                 } else {
287                         token->type     = T_INTEGER;
288                         token->v.intvalue = value;
289                         return;
290                 }
291                 next_char();
292         }
293 }
294
295 static
296 void parse_number_dec(token_t *token, int first_char)
297 {
298         int value = 0;
299         if(first_char > 0) {
300                 assert(first_char >= '0' && first_char <= '9');
301                 value = first_char - '0';
302         }
303
304         for(;;) {
305                 if (isdigit(c)) {
306                         value = 10 * value + c - '0';
307                 } else {
308                         token->type     = T_INTEGER;
309                         token->v.intvalue = value;
310                         return;
311                 }
312                 next_char();
313         }
314 }
315
316 static
317 void parse_number(token_t *token)
318 {
319         // TODO check for overflow
320         // TODO check for various invalid inputs sequences
321
322         if (c == '0') {
323                 next_char();
324                 switch (c) {
325                         case 'X':
326                         case 'x': parse_number_hex(token); break;
327                         case 'o':
328                         case 'O': parse_number_oct(token); break;
329                         default:  parse_number_dec(token, '0');
330                 }
331         } else {
332                 parse_number_dec(token, 0);
333         }
334 }
335
336 static
337 int parse_escape_sequence()
338 {
339         while(1) {
340                 int ec = c;
341                 next_char();
342
343                 switch(ec) {
344                 case '"': return '"';
345                 case '\'': return'\'';
346                 case '\\':
347                         EAT_NEWLINE(break;)
348                         return '\\';
349                 case 'a': return '\a';
350                 case 'b': return '\b';
351                 case 'f': return '\f';
352                 case 'n': return '\n';
353                 case 'r': return '\r';
354                 case 't': return '\t';
355                 case 'v': return '\v';
356                 case 'x': /* TODO parse hex number ... */
357                         parse_error("hex escape sequences not implemented yet");
358                         return EOF;
359                 case 0:
360                 case 1:
361                 case 2:
362                 case 3:
363                 case 4:
364                 case 5:
365                 case 6:
366                 case 7:
367                         /* TODO parse octal number ... */
368                         parse_error("octal escape sequences not implemented yet");
369                         return EOF;
370                 case '?':
371                         if(c != '?') {
372                                 return '?';
373                         }
374                         /* might be a trigraph */
375                         next_char();
376                         if(replace_trigraph()) {
377                                 break;
378                         }
379                         put_back(c);
380                         c = '?';
381                         return '?';
382
383                 case EOF:
384                         parse_error("reached end of file while parsing escape sequence");
385                         return EOF;
386                 default:
387                         parse_error("unknown escape sequence");
388                         return EOF;
389                 }
390         }
391 }
392
393 static
394 void parse_string_literal(token_t *token)
395 {
396         unsigned    start_linenr = source_position.linenr;
397         char       *string;
398         const char *result;
399
400         assert(c == '"');
401         next_char();
402
403         while(1) {
404                 switch(c) {
405                 SKIP_TRIGRAPHS(,
406                         obstack_1grow(&symbol_obstack, '?');
407                         next_char();
408                         break;
409                 )
410
411                 case '\\':
412                         next_char();
413                         EAT_NEWLINE(break;)
414                         int ec = parse_escape_sequence();
415                         obstack_1grow(&symbol_obstack, ec);
416                         break;
417
418                 case EOF:
419                         error_prefix_at(source_position.input_name, start_linenr);
420                         fprintf(stderr, "string has no end\n");
421                         token->type = T_ERROR;
422                         return;
423
424                 case '"':
425                         next_char();
426                         goto end_of_string;
427
428                 default:
429                         obstack_1grow(&symbol_obstack, c);
430                         next_char();
431                         break;
432                 }
433         }
434
435 end_of_string:
436
437         /* TODO: concatenate multiple strings separated by whitespace... */
438
439         /* add finishing 0 to the string */
440         obstack_1grow(&symbol_obstack, '\0');
441         string = obstack_finish(&symbol_obstack);
442
443         /* check if there is already a copy of the string */
444         result = strset_insert(&stringset, string);
445         if(result != string) {
446                 obstack_free(&symbol_obstack, string);
447         }
448
449         token->type     = T_STRING_LITERAL;
450         token->v.string = result;
451 }
452
453 #define MATCH_NEWLINE(code)                 \
454         case '\r':                              \
455                 next_char();                        \
456                 if(c == '\n') {                     \
457                         next_char();                    \
458                 }                                   \
459                 source_position.linenr++;           \
460                 code;                               \
461         case '\n':                              \
462                 next_char();                        \
463                 source_position.linenr++;           \
464                 code;
465
466 static
467 void parse_character_constant(token_t *token)
468 {
469         assert(c == '\'');
470         next_char();
471
472         int found_char = 0;
473         while(1) {
474                 switch(c) {
475                 SKIP_TRIGRAPHS(,
476                         found_char = '?';
477                         break;
478                 )
479
480                 case '\\':
481                         next_char();
482                         EAT_NEWLINE(break;)
483                         found_char = '\\';
484                         break;
485
486                 MATCH_NEWLINE(
487                         parse_error("newline while parsing character constant");
488                         break;
489                 )
490
491                 case '\'':
492                         next_char();
493                         goto end_of_char_constant;
494
495                 case EOF:
496                         parse_error("EOF while parsing character constant");
497                         token->type = T_ERROR;
498                         return;
499
500                 default:
501                         if(found_char != 0) {
502                                 parse_error("more than 1 characters in character "
503                                             "constant");
504                                 goto end_of_char_constant;
505                         } else {
506                                 found_char = c;
507                                 next_char();
508                         }
509                         break;
510                 }
511         }
512
513 end_of_char_constant:
514         token->type       = T_INTEGER;
515         token->v.intvalue = found_char;
516 }
517
518 static
519 void skip_multiline_comment(void)
520 {
521         unsigned start_linenr = source_position.linenr;
522         int had_star = 0;
523
524         while(1) {
525                 switch(c) {
526                 case '*':
527                         next_char();
528                         had_star = 1;
529                         break;
530
531                 case '/':
532                         next_char();
533                         if(had_star) {
534                                 return;
535                         }
536                         had_star = 0;
537                         break;
538
539                 case '\\':
540                         next_char();
541                         EAT_NEWLINE(break;)
542                         had_star = 0;
543                         break;
544
545                 case '?':
546                         next_char();
547                         if(c != '?') {
548                                 had_star = 0;
549                                 break;
550                         }
551                         next_char();
552                         if(replace_trigraph())
553                                 break;
554                         put_back(c);
555                         c = '?';
556                         had_star = 0;
557                         /* we don't put back the 2nd ? as the comment text is discarded
558                          * anyway */
559                         break;
560
561                 MATCH_NEWLINE(had_star = 0; break;)
562
563                 case EOF:
564                         error_prefix_at(source_position.input_name, start_linenr);
565                         fprintf(stderr, "at end of file while looking for comment end\n");
566                         return;
567                 default:
568                         had_star = 0;
569                         next_char();
570                         break;
571                 }
572         }
573 }
574
575 static
576 void skip_line_comment(void)
577 {
578         while(1) {
579                 switch(c) {
580                 case '?':
581                         next_char();
582                         if(c != '?')
583                                 break;
584                         next_char();
585                         if(replace_trigraph())
586                                 break;
587                         put_back('?');
588                         /* we don't put back the 2nd ? as the comment text is discarded
589                          * anyway */
590                         break;
591
592                 case '\\':
593                         next_char();
594                         if(c == '\n') {
595                                 next_char();
596                                 source_position.linenr++;
597                         }
598                         break;
599
600                 case EOF:
601                 case '\r':
602                 case '\n':
603                         return;
604
605                 default:
606                         next_char();
607                         break;
608                 }
609         }
610 }
611
612 static
613 void lexer_next_preprocessing_token(token_t *token);
614
615 static token_t pp_token;
616
617 static inline
618 void next_pp_token(void)
619 {
620         lexer_next_preprocessing_token(&pp_token);
621 }
622
623 static
624 void eat_until_newline(void)
625 {
626         /* TODO */
627 }
628
629 static
630 void error_directive(void)
631 {
632         error_prefix();
633         fprintf(stderr, "#error directive: \n");
634
635         /* parse pp-tokens until new-line */
636 }
637
638 static
639 void define_directive(void)
640 {
641         token_t temptoken;
642
643         lexer_next_preprocessing_token(&temptoken);
644         if(temptoken.type != T_IDENTIFIER) {
645                 parse_error("expected identifier after #define\n");
646                 eat_until_newline();
647         }
648 }
649
650 static
651 void ifdef_directive(int is_ifndef)
652 {
653         (void) is_ifndef;
654         token_t temptoken;
655         lexer_next_preprocessing_token(&temptoken);
656         //expect_identifier();
657         //extect_newline();
658 }
659
660 static
661 void endif_directive(void)
662 {
663         //expect_newline();
664 }
665
666 static
667 void parse_line_directive(void)
668 {
669         if(pp_token.type != T_INTEGER) {
670                 parse_error("expected integer");
671         } else {
672                 source_position.linenr = pp_token.v.intvalue - 1;
673                 next_pp_token();
674         }
675         if(pp_token.type == T_STRING_LITERAL) {
676                 source_position.input_name = pp_token.v.string;
677                 next_pp_token();
678         }
679
680         while(pp_token.type != T_EOF && pp_token.type != '\n') {
681                 next_pp_token();
682         }
683 }
684
685 static
686 void parse_preprocessor_identifier(void)
687 {
688         assert(pp_token.type == T_IDENTIFIER);
689         symbol_t *symbol = pp_token.v.symbol;
690
691         switch(symbol->pp_ID) {
692         case TP_include:
693                 printf("include - enable header name parsing!\n");
694                 break;
695         case TP_define:
696                 define_directive();
697                 break;
698         case TP_ifdef:
699                 ifdef_directive(0);
700                 break;
701         case TP_ifndef:
702                 ifdef_directive(1);
703                 break;
704         case TP_endif:
705                 endif_directive();
706                 break;
707         case TP_line:
708                 next_pp_token();
709                 parse_line_directive();
710                 break;
711         case TP_if:
712         case TP_else:
713         case TP_elif:
714         case TP_undef:
715         case TP_error:
716                 error_directive();
717                 break;
718         case TP_pragma:
719                 break;
720         }
721 }
722
723 static
724 void parse_preprocessor_directive(token_t *result_token)
725 {
726         next_pp_token();
727
728         switch(pp_token.type) {
729         case T_IDENTIFIER:
730                 parse_preprocessor_identifier();
731                 break;
732         case T_INTEGER:
733                 parse_line_directive();
734                 break;
735         }
736
737         lexer_next_token(result_token);
738 }
739
740 #define MAYBE_PROLOG                                       \
741                         next_char();                                   \
742                         while(1) {                                     \
743                                 switch(c) {
744
745 #define MAYBE(ch, set_type)                                \
746                                 case ch:                                   \
747                                         next_char();                           \
748                                         token->type = set_type;                \
749                                         return;
750
751 #define ELSE_CODE(code)                                    \
752                                 SKIP_TRIGRAPHS(,                           \
753                                         code;                                  \
754                                 )                                          \
755                                                                                                                    \
756                                 case '\\':                                 \
757                                         next_char();                           \
758                                         EAT_NEWLINE(break;)                    \
759                                         /* fallthrough */                      \
760                                 default:                                   \
761                                         code;                                  \
762                                 }                                          \
763                         } /* end of while(1) */                        \
764                         break;
765
766 #define ELSE(set_type)                                     \
767                 ELSE_CODE(                                         \
768                         token->type = set_type;                        \
769                         return;                                        \
770                 )
771
772 static
773 void eat_whitespace()
774 {
775         while(1) {
776                 switch(c) {
777                 case ' ':
778                 case '\t':
779                         next_char();
780                         break;
781
782                 case '\r':
783                 case '\n':
784                         return;
785
786                 case '\\':
787                         next_char();
788                         if(c == '\n') {
789                                 next_char();
790                                 source_position.linenr++;
791                                 break;
792                         }
793
794                         put_back(c);
795                         c = '\\';
796                         return;
797
798                 SKIP_TRIGRAPHS(,
799                         return;
800                 )
801
802                 case '/':
803                         next_char();
804                         while(1) {
805                                 switch(c) {
806                                 case '*':
807                                         next_char();
808                                         skip_multiline_comment();
809                                         eat_whitespace();
810                                         return;
811                                 case '/':
812                                         next_char();
813                                         skip_line_comment();
814                                         eat_whitespace();
815                                         return;
816
817                                 SKIP_TRIGRAPHS(
818                                                 put_back('?');
819                                         ,
820                                                 c = '/';
821                                                 return;
822                                 )
823
824                                 case '\\':
825                                         next_char();
826                                         EAT_NEWLINE(break;)
827                                         /* fallthrough */
828                                 default:
829                                         return;
830                                 }
831                         }
832                         break;
833
834                 default:
835                         return;
836                 }
837         }
838 }
839
840 static
841 void lexer_next_preprocessing_token(token_t *token)
842 {
843         while(1) {
844                 switch(c) {
845                 case ' ':
846                 case '\t':
847                         next_char();
848                         break;
849
850                 MATCH_NEWLINE(
851                         eat_whitespace();
852                         if(c == '#') {
853                                 next_char();
854                                 parse_preprocessor_directive(token);
855                                 return;
856                         }
857                         token->type = '\n';
858                         return;
859                 )
860
861                 SYMBOL_CHARS
862                         parse_symbol(token);
863                         return;
864
865                 case '0':
866                 case '1':
867                 case '2':
868                 case '3':
869                 case '4':
870                 case '5':
871                 case '6':
872                 case '7':
873                 case '8':
874                 case '9':
875                         parse_number(token);
876                         return;
877
878                 case '"':
879                         parse_string_literal(token);
880                         return;
881
882                 case '\'':
883                         parse_character_constant(token);
884                         return;
885
886                 case '\\':
887                         next_char();
888                         if(c == '\n') {
889                                 next_char();
890                                 source_position.linenr++;
891                                 break;
892                         } else {
893                                 parse_error("unexpected '\\' found");
894                                 token->type = T_ERROR;
895                         }
896                         return;
897
898                 case '.':
899                         MAYBE_PROLOG
900                                 case '.':
901                                         MAYBE_PROLOG
902                                         MAYBE('.', T_DOTDOTDOT)
903                                         ELSE_CODE(
904                                                 put_back(c);
905                                                 c = '.';
906                                                 token->type = '.';
907                                                 return;
908                                         )
909                         ELSE('.')
910                 case '&':
911                         MAYBE_PROLOG
912                         MAYBE('&', T_ANDAND)
913                         MAYBE('=', T_ANDEQUAL)
914                         ELSE('&')
915                 case '*':
916                         MAYBE_PROLOG
917                         MAYBE('=', T_ASTERISKEQUAL)
918                         ELSE('*')
919                 case '+':
920                         MAYBE_PROLOG
921                         MAYBE('+', T_PLUSPLUS)
922                         MAYBE('=', T_PLUSEQUAL)
923                         ELSE('+')
924                 case '-':
925                         MAYBE_PROLOG
926                         MAYBE('-', T_MINUSMINUS)
927                         MAYBE('=', T_MINUSEQUAL)
928                         ELSE('-')
929                 case '!':
930                         MAYBE_PROLOG
931                         MAYBE('=', T_EXCLAMATIONMARKEQUAL)
932                         ELSE('!')
933                 case '/':
934                         MAYBE_PROLOG
935                         MAYBE('=', T_SLASHEQUAL)
936                                 case '*':
937                                         next_char();
938                                         skip_multiline_comment();
939                                         lexer_next_preprocessing_token(token);
940                                         return;
941                                 case '/':
942                                         next_char();
943                                         skip_line_comment();
944                                         lexer_next_preprocessing_token(token);
945                                         return;
946                         ELSE('/')
947                 case '%':
948                         MAYBE_PROLOG
949                         MAYBE('>', T_PERCENTGREATER)
950                         MAYBE('=', T_PERCENTEQUAL)
951                                 case ':':
952                                         MAYBE_PROLOG
953                                                 case '%':
954                                                         MAYBE_PROLOG
955                                                         MAYBE(':', T_PERCENTCOLONPERCENTCOLON)
956                                                         ELSE_CODE(
957                                                                 put_back(c);
958                                                                 c = '%';
959                                                                 token->type = T_PERCENTCOLON;
960                                                                 return;
961                                                         )
962                                         ELSE(T_PERCENTCOLON)
963                         ELSE('%')
964                 case '<':
965                         MAYBE_PROLOG
966                         MAYBE(':', T_LESSCOLON)
967                         MAYBE('%', T_LESSPERCENT)
968                                 case '<':
969                                         MAYBE_PROLOG
970                                         MAYBE('=', T_LESSLESSEQUAL)
971                                         ELSE(T_LESSLESS)
972                         ELSE('<')
973                 case '>':
974                         MAYBE_PROLOG
975                                 case '>':
976                                         MAYBE_PROLOG
977                                         MAYBE('=', T_GREATERGREATEREQUAL)
978                                         ELSE(T_GREATERGREATER)
979                         ELSE('>')
980                 case '^':
981                         MAYBE_PROLOG
982                         MAYBE('=', T_CARETEQUAL)
983                         ELSE('^')
984                 case '|':
985                         MAYBE_PROLOG
986                         MAYBE('=', T_PIPEEQUAL)
987                         MAYBE('|', T_PIPEPIPE)
988                         ELSE('|')
989                 case ':':
990                         MAYBE_PROLOG
991                         MAYBE('>', T_COLONGREATER)
992                         ELSE(':')
993                 case '=':
994                         MAYBE_PROLOG
995                         MAYBE('=', T_EQUALEQUAL)
996                         ELSE('=')
997                 case '#':
998                         MAYBE_PROLOG
999                         MAYBE('#', T_HASHHASH)
1000                         ELSE('#')
1001
1002                 case '?':
1003                         next_char();
1004                         /* just a simple ? */
1005                         if(c != '?') {
1006                                 token->type = '?';
1007                                 return;
1008                         }
1009                         /* might be a trigraph */
1010                         next_char();
1011                         if(replace_trigraph()) {
1012                                 break;
1013                         }
1014                         put_back(c);
1015                         c = '?';
1016                         token->type = '?';
1017                         return;
1018
1019                 case '[':
1020                 case ']':
1021                 case '(':
1022                 case ')':
1023                 case '{':
1024                 case '}':
1025                 case '~':
1026                 case ';':
1027                 case ',':
1028                         token->type = c;
1029                         next_char();
1030                         return;
1031
1032                 case EOF:
1033                         token->type = T_EOF;
1034                         return;
1035
1036                 default:
1037                         next_char();
1038                         error_prefix();
1039                         fprintf(stderr, "unknown character '%c' found\n", c);
1040                         token->type = T_ERROR;
1041                         return;
1042                 }
1043         }
1044 }
1045
1046 void lexer_next_token(token_t *token)
1047 {
1048         do {
1049                 lexer_next_preprocessing_token(token);
1050         } while(token->type == '\n');
1051 }
1052
1053 void init_lexer(void)
1054 {
1055         strset_init(&stringset);
1056 }
1057
1058 void lexer_open_stream(FILE *stream, const char *input_name)
1059 {
1060         input                      = stream;
1061         source_position.linenr     = 0;
1062         source_position.input_name = input_name;
1063
1064         /* we place a virtual '\n' at the beginning so the lexer knows we're at the
1065          * beginning of a line */
1066         c = '\n';
1067 }
1068
1069 void exit_lexer(void)
1070 {
1071         strset_destroy(&stringset);
1072 }
1073
1074 static __attribute__((unused))
1075 void dbg_pos(const source_position_t source_position)
1076 {
1077         fprintf(stdout, "%s:%d\n", source_position.input_name, source_position.linenr);
1078         fflush(stdout);
1079 }