redo preprocessor output concerning spaces,newlines
[cparser] / preprocessor.c
1 #include <config.h>
2
3 #include <assert.h>
4 #include <errno.h>
5 #include <string.h>
6 #include <stdbool.h>
7 #include <ctype.h>
8
9 #include "token_t.h"
10 #include "symbol_t.h"
11 #include "adt/util.h"
12 #include "adt/error.h"
13 #include "lang_features.h"
14 #include "diagnostic.h"
15 #include "string_rep.h"
16 #include "input.h"
17
18 #define MAX_PUTBACK 3
19 #define INCLUDE_LIMIT 199  /* 199 is for gcc "compatibility" */
20
21 struct pp_argument_t {
22         size_t   list_len;
23         token_t *token_list;
24 };
25
26 struct pp_definition_t {
27         symbol_t          *symbol;
28         source_position_t  source_position;
29         pp_definition_t   *parent_expansion;
30         size_t             expand_pos;
31         bool               is_variadic    : 1;
32         bool               is_expanding   : 1;
33         bool               has_parameters : 1;
34         size_t             n_parameters;
35         symbol_t          *parameters;
36
37         /* replacement */
38         size_t             list_len;
39         token_t           *token_list;
40
41 };
42
43 typedef struct pp_conditional_t pp_conditional_t;
44 struct pp_conditional_t {
45         source_position_t  source_position;
46         bool               condition;
47         bool               in_else;
48         bool               skip; /**< conditional in skip mode (then+else gets skipped) */
49         pp_conditional_t  *parent;
50 };
51
52 typedef struct pp_input_t pp_input_t;
53 struct pp_input_t {
54         FILE              *file;
55         input_t           *input;
56         utf32              c;
57         utf32              buf[1024+MAX_PUTBACK];
58         const utf32       *bufend;
59         const utf32       *bufpos;
60         source_position_t  position;
61         pp_input_t        *parent;
62         unsigned           output_line;
63 };
64
65 /** additional info about the current token */
66 typedef struct add_token_info_t {
67         /** whitespace from beginning of line to the token */
68         unsigned whitespace;
69         /** there has been any whitespace before the token */
70         bool     had_whitespace;
71         /** the token is at the beginning of the line */
72         bool     at_line_begin;
73 } add_token_info_t;
74
75 static pp_input_t input;
76
77 static pp_input_t     *input_stack;
78 static unsigned        n_inputs;
79 static struct obstack  input_obstack;
80
81 static pp_conditional_t *conditional_stack;
82
83 static token_t           pp_token;
84 static bool              resolve_escape_sequences = false;
85 static bool              ignore_unknown_chars     = true;
86 static bool              in_pp_directive;
87 static bool              skip_mode;
88 static FILE             *out;
89 static struct obstack    pp_obstack;
90 static const char       *printed_input_name = NULL;
91 static source_position_t expansion_pos;
92 static pp_definition_t  *current_expansion  = NULL;
93 static preprocessor_token_type_t last_token = TP_ERROR;
94
95 static add_token_info_t  info;
96
97 static inline void next_char(void);
98 static void next_preprocessing_token(void);
99 static void print_line_directive(const source_position_t *pos, const char *add);
100
101 static bool open_input(const char *filename)
102 {
103         FILE *file = fopen(filename, "r");
104         if (file == NULL)
105                 return false;
106
107         input.file                = file;
108         input.input               = input_from_stream(file, NULL);
109         input.bufend              = NULL;
110         input.bufpos              = NULL;
111         input.output_line         = 0;
112         input.position.input_name = filename;
113         input.position.lineno     = 1;
114
115         /* indicate that we're at a new input */
116         print_line_directive(&input.position, input_stack != NULL ? "1" : NULL);
117
118         /* place a virtual '\n' so we realize we're at line begin */
119         input.position.lineno     = 0;
120         input.c = '\n';
121         next_preprocessing_token();
122
123         return true;
124 }
125
126 static void close_input(void)
127 {
128         input_free(input.input);
129         assert(input.file != NULL);
130
131         fclose(input.file);
132         input.input  = NULL;
133         input.file   = NULL;
134         input.bufend = NULL;
135         input.bufpos = NULL;
136         input.c      = EOF;
137 }
138
139 static void push_input(void)
140 {
141         pp_input_t *saved_input
142                 = obstack_alloc(&input_obstack, sizeof(*saved_input));
143
144         memcpy(saved_input, &input, sizeof(*saved_input));
145
146         /* adjust buffer positions */
147         if (input.bufpos != NULL)
148                 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
149         if (input.bufend != NULL)
150                 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
151
152         saved_input->parent = input_stack;
153         input_stack         = saved_input;
154         ++n_inputs;
155 }
156
157 static void pop_restore_input(void)
158 {
159         assert(n_inputs > 0);
160         assert(input_stack != NULL);
161
162         pp_input_t *saved_input = input_stack;
163
164         memcpy(&input, saved_input, sizeof(input));
165         input.parent = NULL;
166
167         /* adjust buffer positions */
168         if (saved_input->bufpos != NULL)
169                 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
170         if (saved_input->bufend != NULL)
171                 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
172
173         input_stack = saved_input->parent;
174         obstack_free(&input_obstack, saved_input);
175         --n_inputs;
176 }
177
178 /**
179  * Prints a parse error message at the current token.
180  *
181  * @param msg   the error message
182  */
183 static void parse_error(const char *msg)
184 {
185         errorf(&pp_token.source_position,  "%s", msg);
186 }
187
188 static inline void next_real_char(void)
189 {
190         assert(input.bufpos <= input.bufend);
191         if (input.bufpos >= input.bufend) {
192                 size_t n = decode(input.input, input.buf + MAX_PUTBACK,
193                                   sizeof(input.buf)/sizeof(input.buf[0]) - MAX_PUTBACK);
194                 if (n == 0) {
195                         input.c = EOF;
196                         return;
197                 }
198                 input.bufpos = input.buf + MAX_PUTBACK;
199                 input.bufend = input.bufpos + n;
200         }
201         input.c = *input.bufpos++;
202         ++input.position.colno;
203 }
204
205 /**
206  * Put a character back into the buffer.
207  *
208  * @param pc  the character to put back
209  */
210 static inline void put_back(utf32 const pc)
211 {
212         assert(input.bufpos > input.buf);
213         *(--input.bufpos - input.buf + input.buf) = (char) pc;
214         --input.position.colno;
215 }
216
217 #define MATCH_NEWLINE(code)                   \
218         case '\r':                                \
219                 next_char();                          \
220                 if (input.c == '\n') {                \
221         case '\n':                                \
222                         next_char();                      \
223                 }                                     \
224                 info.whitespace = 0;                  \
225                 ++input.position.lineno;              \
226                 input.position.colno = 1;             \
227                 code
228
229 #define eat(c_type) (assert(input.c == c_type), next_char())
230
231 static void maybe_concat_lines(void)
232 {
233         eat('\\');
234
235         switch (input.c) {
236         MATCH_NEWLINE(
237                 return;
238         )
239
240         default:
241                 break;
242         }
243
244         put_back(input.c);
245         input.c = '\\';
246 }
247
248 /**
249  * Set c to the next input character, ie.
250  * after expanding trigraphs.
251  */
252 static inline void next_char(void)
253 {
254         next_real_char();
255
256         /* filter trigraphs and concatenated lines */
257         if (UNLIKELY(input.c == '\\')) {
258                 maybe_concat_lines();
259                 goto end_of_next_char;
260         }
261
262         if (LIKELY(input.c != '?'))
263                 goto end_of_next_char;
264
265         next_real_char();
266         if (LIKELY(input.c != '?')) {
267                 put_back(input.c);
268                 input.c = '?';
269                 goto end_of_next_char;
270         }
271
272         next_real_char();
273         switch (input.c) {
274         case '=': input.c = '#'; break;
275         case '(': input.c = '['; break;
276         case '/': input.c = '\\'; maybe_concat_lines(); break;
277         case ')': input.c = ']'; break;
278         case '\'': input.c = '^'; break;
279         case '<': input.c = '{'; break;
280         case '!': input.c = '|'; break;
281         case '>': input.c = '}'; break;
282         case '-': input.c = '~'; break;
283         default:
284                 put_back(input.c);
285                 put_back('?');
286                 input.c = '?';
287                 break;
288         }
289
290 end_of_next_char:;
291 #ifdef DEBUG_CHARS
292         printf("nchar '%c'\n", input.c);
293 #endif
294 }
295
296
297
298 /**
299  * Returns true if the given char is a octal digit.
300  *
301  * @param char  the character to check
302  */
303 static inline bool is_octal_digit(int chr)
304 {
305         switch (chr) {
306         case '0':
307         case '1':
308         case '2':
309         case '3':
310         case '4':
311         case '5':
312         case '6':
313         case '7':
314                 return true;
315         default:
316                 return false;
317         }
318 }
319
320 /**
321  * Returns the value of a digit.
322  * The only portable way to do it ...
323  */
324 static int digit_value(int digit)
325 {
326         switch (digit) {
327         case '0': return 0;
328         case '1': return 1;
329         case '2': return 2;
330         case '3': return 3;
331         case '4': return 4;
332         case '5': return 5;
333         case '6': return 6;
334         case '7': return 7;
335         case '8': return 8;
336         case '9': return 9;
337         case 'a':
338         case 'A': return 10;
339         case 'b':
340         case 'B': return 11;
341         case 'c':
342         case 'C': return 12;
343         case 'd':
344         case 'D': return 13;
345         case 'e':
346         case 'E': return 14;
347         case 'f':
348         case 'F': return 15;
349         default:
350                 panic("wrong character given");
351         }
352 }
353
354 /**
355  * Parses an octal character sequence.
356  *
357  * @param first_digit  the already read first digit
358  */
359 static int parse_octal_sequence(const int first_digit)
360 {
361         assert(is_octal_digit(first_digit));
362         int value = digit_value(first_digit);
363         if (!is_octal_digit(input.c)) return value;
364         value = 8 * value + digit_value(input.c);
365         next_char();
366         if (!is_octal_digit(input.c)) return value;
367         value = 8 * value + digit_value(input.c);
368         next_char();
369
370         if (char_is_signed) {
371                 return (signed char) value;
372         } else {
373                 return (unsigned char) value;
374         }
375 }
376
377 /**
378  * Parses a hex character sequence.
379  */
380 static int parse_hex_sequence(void)
381 {
382         int value = 0;
383         while (isxdigit(input.c)) {
384                 value = 16 * value + digit_value(input.c);
385                 next_char();
386         }
387
388         if (char_is_signed) {
389                 return (signed char) value;
390         } else {
391                 return (unsigned char) value;
392         }
393 }
394
395 /**
396  * Parse an escape sequence.
397  */
398 static int parse_escape_sequence(void)
399 {
400         eat('\\');
401
402         int ec = input.c;
403         next_char();
404
405         switch (ec) {
406         case '"':  return '"';
407         case '\'': return '\'';
408         case '\\': return '\\';
409         case '?': return '\?';
410         case 'a': return '\a';
411         case 'b': return '\b';
412         case 'f': return '\f';
413         case 'n': return '\n';
414         case 'r': return '\r';
415         case 't': return '\t';
416         case 'v': return '\v';
417         case 'x':
418                 return parse_hex_sequence();
419         case '0':
420         case '1':
421         case '2':
422         case '3':
423         case '4':
424         case '5':
425         case '6':
426         case '7':
427                 return parse_octal_sequence(ec);
428         case EOF:
429                 parse_error("reached end of file while parsing escape sequence");
430                 return EOF;
431         default:
432                 parse_error("unknown escape sequence");
433                 return EOF;
434         }
435 }
436
437 static void grow_symbol(utf32 const tc)
438 {
439         struct obstack *const o  = &symbol_obstack;
440         if (tc < 0x80U) {
441                 obstack_1grow(o, tc);
442         } else if (tc < 0x800) {
443                 obstack_1grow(o, 0xC0 | (tc >> 6));
444                 obstack_1grow(o, 0x80 | (tc & 0x3F));
445         } else if (tc < 0x10000) {
446                 obstack_1grow(o, 0xE0 | ( tc >> 12));
447                 obstack_1grow(o, 0x80 | ((tc >>  6) & 0x3F));
448                 obstack_1grow(o, 0x80 | ( tc        & 0x3F));
449         } else {
450                 obstack_1grow(o, 0xF0 | ( tc >> 18));
451                 obstack_1grow(o, 0x80 | ((tc >> 12) & 0x3F));
452                 obstack_1grow(o, 0x80 | ((tc >>  6) & 0x3F));
453                 obstack_1grow(o, 0x80 | ( tc        & 0x3F));
454         }
455 }
456
457 static string_t identify_string(char *string, size_t len)
458 {
459         /* TODO hash */
460 #if 0
461         const char *result = strset_insert(&stringset, concat);
462         if (result != concat) {
463                 obstack_free(&symbol_obstack, concat);
464         }
465 #else
466         const char *result = string;
467 #endif
468         return (string_t) {result, len};
469 }
470
471 static void parse_string_literal(void)
472 {
473         const unsigned start_linenr = input.position.lineno;
474
475         eat('"');
476
477         while (true) {
478                 switch (input.c) {
479                 case '\\': {
480                         utf32 tc;
481                         if (resolve_escape_sequences) {
482                                 tc = parse_escape_sequence();
483                                 obstack_1grow(&symbol_obstack, (char) tc);
484                         } else {
485                                 obstack_1grow(&symbol_obstack, (char) input.c);
486                                 next_char();
487                                 obstack_1grow(&symbol_obstack, (char) input.c);
488                                 next_char();
489                         }
490                         break;
491                 }
492
493                 case EOF: {
494                         source_position_t source_position;
495                         source_position.input_name = pp_token.source_position.input_name;
496                         source_position.lineno     = start_linenr;
497                         errorf(&source_position, "string has no end");
498                         pp_token.type = TP_ERROR;
499                         return;
500                 }
501
502                 case '"':
503                         next_char();
504                         goto end_of_string;
505
506                 default:
507                         grow_symbol(input.c);
508                         next_char();
509                         break;
510                 }
511         }
512
513 end_of_string:
514         /* add finishing 0 to the string */
515         obstack_1grow(&symbol_obstack, '\0');
516         const size_t size   = (size_t)obstack_object_size(&symbol_obstack);
517         char *const  string = obstack_finish(&symbol_obstack);
518
519         pp_token.type    = TP_STRING_LITERAL;
520         pp_token.literal = identify_string(string, size);
521 }
522
523 /**
524  * Parse a wide string literal and set lexer_token.
525  */
526 static void parse_wide_string_literal(void)
527 {
528         parse_string_literal();
529         if (pp_token.type == TP_STRING_LITERAL)
530                 pp_token.type = TP_WIDE_STRING_LITERAL;
531 }
532
533 static void parse_wide_character_constant(void)
534 {
535         eat('\'');
536
537         while (true) {
538                 switch (input.c) {
539                 case '\\': {
540                         const utf32 tc = parse_escape_sequence();
541                         grow_symbol(tc);
542                         break;
543                 }
544
545                 MATCH_NEWLINE(
546                         parse_error("newline while parsing character constant");
547                         break;
548                 )
549
550                 case '\'':
551                         next_char();
552                         goto end_of_wide_char_constant;
553
554                 case EOF:
555                         parse_error("EOF while parsing character constant");
556                         pp_token.type = TP_ERROR;
557                         return;
558
559                 default:
560                         grow_symbol(input.c);
561                         next_char();
562                         break;
563                 }
564         }
565
566 end_of_wide_char_constant:
567         obstack_1grow(&symbol_obstack, '\0');
568         size_t  size = (size_t) obstack_object_size(&symbol_obstack)-1;
569         char   *string = obstack_finish(&symbol_obstack);
570         pp_token.type       = TP_WIDE_CHARACTER_CONSTANT;
571         pp_token.literal = identify_string(string, size);
572
573         if (size == 0) {
574                 parse_error("empty character constant");
575         }
576 }
577
578 static void parse_character_constant(void)
579 {
580         const unsigned start_linenr = input.position.lineno;
581
582         eat('\'');
583
584         int tc;
585         while (true) {
586                 switch (input.c) {
587                 case '\\':
588                         tc = parse_escape_sequence();
589                         obstack_1grow(&symbol_obstack, (char) tc);
590                         break;
591
592                 MATCH_NEWLINE(
593                         parse_error("newline while parsing character constant");
594                         break;
595                 )
596
597                 case EOF: {
598                         source_position_t source_position;
599                         source_position.input_name = pp_token.source_position.input_name;
600                         source_position.lineno     = start_linenr;
601                         errorf(&source_position, "EOF while parsing character constant");
602                         pp_token.type = TP_ERROR;
603                         return;
604                 }
605
606                 case '\'':
607                         next_char();
608                         goto end_of_char_constant;
609
610                 default:
611                         obstack_1grow(&symbol_obstack, (char) input.c);
612                         next_char();
613                         break;
614
615                 }
616         }
617
618 end_of_char_constant:;
619         const size_t      size   = (size_t)obstack_object_size(&symbol_obstack);
620         const char *const string = obstack_finish(&symbol_obstack);
621
622         pp_token.type          = TP_CHARACTER_CONSTANT;
623         pp_token.literal.begin = string;
624         pp_token.literal.size  = size;
625 }
626
627 #define SYMBOL_CHARS_WITHOUT_E_P \
628         case 'a': \
629         case 'b': \
630         case 'c': \
631         case 'd': \
632         case 'f': \
633         case 'g': \
634         case 'h': \
635         case 'i': \
636         case 'j': \
637         case 'k': \
638         case 'l': \
639         case 'm': \
640         case 'n': \
641         case 'o': \
642         case 'q': \
643         case 'r': \
644         case 's': \
645         case 't': \
646         case 'u': \
647         case 'v': \
648         case 'w': \
649         case 'x': \
650         case 'y': \
651         case 'z': \
652         case 'A': \
653         case 'B': \
654         case 'C': \
655         case 'D': \
656         case 'F': \
657         case 'G': \
658         case 'H': \
659         case 'I': \
660         case 'J': \
661         case 'K': \
662         case 'L': \
663         case 'M': \
664         case 'N': \
665         case 'O': \
666         case 'Q': \
667         case 'R': \
668         case 'S': \
669         case 'T': \
670         case 'U': \
671         case 'V': \
672         case 'W': \
673         case 'X': \
674         case 'Y': \
675         case 'Z': \
676         case '_':
677
678 #define SYMBOL_CHARS \
679         SYMBOL_CHARS_WITHOUT_E_P \
680         case 'e': \
681         case 'p': \
682         case 'E': \
683         case 'P':
684
685 #define DIGITS \
686         case '0':  \
687         case '1':  \
688         case '2':  \
689         case '3':  \
690         case '4':  \
691         case '5':  \
692         case '6':  \
693         case '7':  \
694         case '8':  \
695         case '9':
696
697 /**
698  * returns next final token from a preprocessor macro expansion
699  */
700 static void expand_next(void)
701 {
702         assert(current_expansion != NULL);
703
704         pp_definition_t *definition = current_expansion;
705
706 restart:
707         if (definition->list_len == 0
708                         || definition->expand_pos >= definition->list_len) {
709                 /* we're finished with the current macro, move up 1 level in the
710                  * expansion stack */
711                 pp_definition_t *parent = definition->parent_expansion;
712                 definition->parent_expansion = NULL;
713                 definition->is_expanding     = false;
714
715                 /* it was the outermost expansion, parse normal pptoken */
716                 if (parent == NULL) {
717                         current_expansion = NULL;
718                         next_preprocessing_token();
719                         return;
720                 }
721                 definition        = parent;
722                 current_expansion = definition;
723                 goto restart;
724         }
725         pp_token = definition->token_list[definition->expand_pos];
726         pp_token.source_position = expansion_pos;
727         ++definition->expand_pos;
728
729         if (pp_token.type != TP_IDENTIFIER)
730                 return;
731
732         /* if it was an identifier then we might need to expand again */
733         pp_definition_t *symbol_definition = pp_token.symbol->pp_definition;
734         if (symbol_definition != NULL && !symbol_definition->is_expanding) {
735                 symbol_definition->parent_expansion = definition;
736                 symbol_definition->expand_pos       = 0;
737                 symbol_definition->is_expanding     = true;
738                 definition                          = symbol_definition;
739                 current_expansion                   = definition;
740                 goto restart;
741         }
742 }
743
744 static void skip_line_comment(void)
745 {
746         while (true) {
747                 switch (input.c) {
748                 case EOF:
749                         return;
750
751                 case '\r':
752                 case '\n':
753                         return;
754
755                 default:
756                         next_char();
757                         break;
758                 }
759         }
760 }
761
762 static void skip_multiline_comment(void)
763 {
764         unsigned start_linenr = input.position.lineno;
765         while (true) {
766                 switch (input.c) {
767                 case '/':
768                         next_char();
769                         if (input.c == '*') {
770                                 /* TODO: nested comment, warn here */
771                         }
772                         break;
773                 case '*':
774                         next_char();
775                         if (input.c == '/') {
776                                 next_char();
777                                 info.whitespace += input.position.colno-1;
778                                 return;
779                         }
780                         break;
781
782                 MATCH_NEWLINE(
783                         info.at_line_begin |= !in_pp_directive;
784                         break;
785                 )
786
787                 case EOF: {
788                         source_position_t source_position;
789                         source_position.input_name = pp_token.source_position.input_name;
790                         source_position.lineno     = start_linenr;
791                         errorf(&source_position, "at end of file while looking for comment end");
792                         return;
793                 }
794
795                 default:
796                         next_char();
797                         break;
798                 }
799         }
800 }
801
802 static void skip_whitespace(void)
803 {
804         while (true) {
805                 switch (input.c) {
806                 case ' ':
807                 case '\t':
808                         next_char();
809                         continue;
810
811                 MATCH_NEWLINE(
812                         info.at_line_begin = true;
813                         return;
814                 )
815
816                 case '/':
817                         next_char();
818                         if (input.c == '/') {
819                                 next_char();
820                                 skip_line_comment();
821                                 continue;
822                         } else if (input.c == '*') {
823                                 next_char();
824                                 skip_multiline_comment();
825                                 continue;
826                         } else {
827                                 put_back(input.c);
828                                 input.c = '/';
829                         }
830                         return;
831                 default:
832                         return;
833                 }
834         }
835 }
836
837 static void eat_pp(int type)
838 {
839         (void) type;
840         assert(pp_token.type == type);
841         next_preprocessing_token();
842 }
843
844 static void parse_symbol(void)
845 {
846         obstack_1grow(&symbol_obstack, (char) input.c);
847         next_char();
848
849         while (true) {
850                 switch (input.c) {
851                 DIGITS
852                 SYMBOL_CHARS
853                         obstack_1grow(&symbol_obstack, (char) input.c);
854                         next_char();
855                         break;
856
857                 default:
858                         goto end_symbol;
859                 }
860         }
861
862 end_symbol:
863         obstack_1grow(&symbol_obstack, '\0');
864         char *string = obstack_finish(&symbol_obstack);
865
866         /* might be a wide string or character constant ( L"string"/L'c' ) */
867         if (input.c == '"' && string[0] == 'L' && string[1] == '\0') {
868                 obstack_free(&symbol_obstack, string);
869                 parse_wide_string_literal();
870                 return;
871         } else if (input.c == '\'' && string[0] == 'L' && string[1] == '\0') {
872                 obstack_free(&symbol_obstack, string);
873                 parse_wide_character_constant();
874                 return;
875         }
876
877         symbol_t *symbol = symbol_table_insert(string);
878
879         pp_token.type   = symbol->pp_ID;
880         pp_token.symbol = symbol;
881
882         /* we can free the memory from symbol obstack if we already had an entry in
883          * the symbol table */
884         if (symbol->string != string) {
885                 obstack_free(&symbol_obstack, string);
886         }
887 }
888
889 static void parse_number(void)
890 {
891         obstack_1grow(&symbol_obstack, (char) input.c);
892         next_char();
893
894         while (true) {
895                 switch (input.c) {
896                 case '.':
897                 DIGITS
898                 SYMBOL_CHARS_WITHOUT_E_P
899                         obstack_1grow(&symbol_obstack, (char) input.c);
900                         next_char();
901                         break;
902
903                 case 'e':
904                 case 'p':
905                 case 'E':
906                 case 'P':
907                         obstack_1grow(&symbol_obstack, (char) input.c);
908                         next_char();
909                         if (input.c == '+' || input.c == '-') {
910                                 obstack_1grow(&symbol_obstack, (char) input.c);
911                                 next_char();
912                         }
913                         break;
914
915                 default:
916                         goto end_number;
917                 }
918         }
919
920 end_number:
921         obstack_1grow(&symbol_obstack, '\0');
922         size_t  size   = obstack_object_size(&symbol_obstack);
923         char   *string = obstack_finish(&symbol_obstack);
924
925         pp_token.type          = TP_NUMBER;
926         pp_token.literal.begin = string;
927         pp_token.literal.size  = size;
928 }
929
930
931 #define MAYBE_PROLOG                                       \
932                         next_char();                                   \
933                         while (true) {                                 \
934                                 switch (input.c) {
935
936 #define MAYBE(ch, set_type)                                \
937                                 case ch:                                   \
938                                         next_char();                           \
939                                         pp_token.type = set_type;              \
940                                         return;
941
942 #define ELSE_CODE(code)                                    \
943                                 default:                                   \
944                                         code                                   \
945                                         return;                                \
946                                 }                                          \
947                         }
948
949 #define ELSE(set_type)                                     \
950                 ELSE_CODE(                                         \
951                         pp_token.type = set_type;                      \
952                 )
953
954 static void next_preprocessing_token(void)
955 {
956         if (current_expansion != NULL) {
957                 expand_next();
958                 return;
959         }
960
961         info.at_line_begin  = false;
962         info.had_whitespace = false;
963 restart:
964         pp_token.source_position = input.position;
965         switch (input.c) {
966         case ' ':
967         case '\t':
968                 ++info.whitespace;
969                 info.had_whitespace = true;
970                 next_char();
971                 goto restart;
972
973         MATCH_NEWLINE(
974                 info.at_line_begin = true;
975                 info.had_whitespace = true;
976                 goto restart;
977         )
978
979         SYMBOL_CHARS
980                 parse_symbol();
981                 return;
982
983         DIGITS
984                 parse_number();
985                 return;
986
987         case '"':
988                 parse_string_literal();
989                 return;
990
991         case '\'':
992                 parse_character_constant();
993                 return;
994
995         case '.':
996                 MAYBE_PROLOG
997                         case '0':
998                         case '1':
999                         case '2':
1000                         case '3':
1001                         case '4':
1002                         case '5':
1003                         case '6':
1004                         case '7':
1005                         case '8':
1006                         case '9':
1007                                 put_back(input.c);
1008                                 input.c = '.';
1009                                 parse_number();
1010                                 return;
1011
1012                         case '.':
1013                                 MAYBE_PROLOG
1014                                 MAYBE('.', TP_DOTDOTDOT)
1015                                 ELSE_CODE(
1016                                         put_back(input.c);
1017                                         input.c = '.';
1018                                         pp_token.type = '.';
1019                                 )
1020                 ELSE('.')
1021         case '&':
1022                 MAYBE_PROLOG
1023                 MAYBE('&', TP_ANDAND)
1024                 MAYBE('=', TP_ANDEQUAL)
1025                 ELSE('&')
1026         case '*':
1027                 MAYBE_PROLOG
1028                 MAYBE('=', TP_ASTERISKEQUAL)
1029                 ELSE('*')
1030         case '+':
1031                 MAYBE_PROLOG
1032                 MAYBE('+', TP_PLUSPLUS)
1033                 MAYBE('=', TP_PLUSEQUAL)
1034                 ELSE('+')
1035         case '-':
1036                 MAYBE_PROLOG
1037                 MAYBE('>', TP_MINUSGREATER)
1038                 MAYBE('-', TP_MINUSMINUS)
1039                 MAYBE('=', TP_MINUSEQUAL)
1040                 ELSE('-')
1041         case '!':
1042                 MAYBE_PROLOG
1043                 MAYBE('=', TP_EXCLAMATIONMARKEQUAL)
1044                 ELSE('!')
1045         case '/':
1046                 MAYBE_PROLOG
1047                 MAYBE('=', TP_SLASHEQUAL)
1048                         case '*':
1049                                 next_char();
1050                                 info.had_whitespace = true;
1051                                 skip_multiline_comment();
1052                                 goto restart;
1053                         case '/':
1054                                 next_char();
1055                                 info.had_whitespace = true;
1056                                 skip_line_comment();
1057                                 goto restart;
1058                 ELSE('/')
1059         case '%':
1060                 MAYBE_PROLOG
1061                 MAYBE('>', '}')
1062                 MAYBE('=', TP_PERCENTEQUAL)
1063                         case ':':
1064                                 MAYBE_PROLOG
1065                                         case '%':
1066                                                 MAYBE_PROLOG
1067                                                 MAYBE(':', TP_HASHHASH)
1068                                                 ELSE_CODE(
1069                                                         put_back(input.c);
1070                                                         input.c = '%';
1071                                                         pp_token.type = '#';
1072                                                 )
1073                                 ELSE('#')
1074                 ELSE('%')
1075         case '<':
1076                 MAYBE_PROLOG
1077                 MAYBE(':', '[')
1078                 MAYBE('%', '{')
1079                 MAYBE('=', TP_LESSEQUAL)
1080                         case '<':
1081                                 MAYBE_PROLOG
1082                                 MAYBE('=', TP_LESSLESSEQUAL)
1083                                 ELSE(TP_LESSLESS)
1084                 ELSE('<')
1085         case '>':
1086                 MAYBE_PROLOG
1087                 MAYBE('=', TP_GREATEREQUAL)
1088                         case '>':
1089                                 MAYBE_PROLOG
1090                                 MAYBE('=', TP_GREATERGREATEREQUAL)
1091                                 ELSE(TP_GREATERGREATER)
1092                 ELSE('>')
1093         case '^':
1094                 MAYBE_PROLOG
1095                 MAYBE('=', TP_CARETEQUAL)
1096                 ELSE('^')
1097         case '|':
1098                 MAYBE_PROLOG
1099                 MAYBE('=', TP_PIPEEQUAL)
1100                 MAYBE('|', TP_PIPEPIPE)
1101                 ELSE('|')
1102         case ':':
1103                 MAYBE_PROLOG
1104                 MAYBE('>', ']')
1105                 ELSE(':')
1106         case '=':
1107                 MAYBE_PROLOG
1108                 MAYBE('=', TP_EQUALEQUAL)
1109                 ELSE('=')
1110         case '#':
1111                 MAYBE_PROLOG
1112                 MAYBE('#', TP_HASHHASH)
1113                 ELSE_CODE(
1114                         pp_token.type = '#';
1115                 )
1116
1117         case '?':
1118         case '[':
1119         case ']':
1120         case '(':
1121         case ')':
1122         case '{':
1123         case '}':
1124         case '~':
1125         case ';':
1126         case ',':
1127         case '\\':
1128                 pp_token.type = input.c;
1129                 next_char();
1130                 return;
1131
1132         case EOF:
1133                 if (input_stack != NULL) {
1134                         close_input();
1135                         pop_restore_input();
1136                         fputc('\n', out);
1137                         print_line_directive(&input.position, "2");
1138                         goto restart;
1139                 } else {
1140                         pp_token.source_position.lineno++;
1141                         info.at_line_begin = true;
1142                         pp_token.type = TP_EOF;
1143                 }
1144                 return;
1145
1146         default:
1147                 next_char();
1148                 if (!ignore_unknown_chars) {
1149                         errorf(&pp_token.source_position, "unknown character '%c' found\n",
1150                                input.c);
1151                         pp_token.type = TP_ERROR;
1152                 } else {
1153                         pp_token.type = input.c;
1154                 }
1155                 return;
1156         }
1157 }
1158
1159 static void print_quoted_string(const char *const string)
1160 {
1161         fputc('"', out);
1162         for (const char *c = string; *c != 0; ++c) {
1163                 switch (*c) {
1164                 case '"': fputs("\\\"", out); break;
1165                 case '\\':  fputs("\\\\", out); break;
1166                 case '\a':  fputs("\\a", out); break;
1167                 case '\b':  fputs("\\b", out); break;
1168                 case '\f':  fputs("\\f", out); break;
1169                 case '\n':  fputs("\\n", out); break;
1170                 case '\r':  fputs("\\r", out); break;
1171                 case '\t':  fputs("\\t", out); break;
1172                 case '\v':  fputs("\\v", out); break;
1173                 case '\?':  fputs("\\?", out); break;
1174                 default:
1175                         if (!isprint(*c)) {
1176                                 fprintf(out, "\\%03o", (unsigned)*c);
1177                                 break;
1178                         }
1179                         fputc(*c, out);
1180                         break;
1181                 }
1182         }
1183         fputc('"', out);
1184 }
1185
1186 static void print_line_directive(const source_position_t *pos, const char *add)
1187 {
1188         fprintf(out, "# %u ", pos->lineno);
1189         print_quoted_string(pos->input_name);
1190         if (add != NULL) {
1191                 fputc(' ', out);
1192                 fputs(add, out);
1193         }
1194
1195         printed_input_name = pos->input_name;
1196         input.output_line  = pos->lineno-1;
1197 }
1198
1199 static void emit_newlines(void)
1200 {
1201         unsigned delta = pp_token.source_position.lineno - input.output_line;
1202
1203         if (delta >= 9) {
1204                 fputc('\n', out);
1205                 print_line_directive(&pp_token.source_position, NULL);
1206                 fputc('\n', out);
1207         } else {
1208                 for (unsigned i = 0; i < delta; ++i) {
1209                         fputc('\n', out);
1210                 }
1211         }
1212         input.output_line = pp_token.source_position.lineno;
1213 }
1214
1215 static void emit_pp_token(void)
1216 {
1217         if (skip_mode)
1218                 return;
1219
1220         if (info.at_line_begin) {
1221                 emit_newlines();
1222
1223                 for (unsigned i = 0; i < info.whitespace; ++i)
1224                         fputc(' ', out);
1225
1226         } else if (info.had_whitespace ||
1227                            tokens_would_paste(last_token, pp_token.type)) {
1228                 fputc(' ', out);
1229         }
1230
1231         switch (pp_token.type) {
1232         case TP_IDENTIFIER:
1233                 fputs(pp_token.symbol->string, out);
1234                 break;
1235         case TP_NUMBER:
1236                 fputs(pp_token.literal.begin, out);
1237                 break;
1238         case TP_WIDE_STRING_LITERAL:
1239                 fputc('L', out);
1240         case TP_STRING_LITERAL:
1241                 fputc('"', out);
1242                 fputs(pp_token.literal.begin, out);
1243                 fputc('"', out);
1244                 break;
1245         case TP_WIDE_CHARACTER_CONSTANT:
1246                 fputc('L', out);
1247         case TP_CHARACTER_CONSTANT:
1248                 fputc('\'', out);
1249                 fputs(pp_token.literal.begin, out);
1250                 fputc('\'', out);
1251                 break;
1252         default:
1253                 print_pp_token_type(out, pp_token.type);
1254                 break;
1255         }
1256         last_token = pp_token.type;
1257 }
1258
1259 static void eat_pp_directive(void)
1260 {
1261         while (!info.at_line_begin) {
1262                 next_preprocessing_token();
1263         }
1264 }
1265
1266 static bool strings_equal(const string_t *string1, const string_t *string2)
1267 {
1268         size_t size = string1->size;
1269         if (size != string2->size)
1270                 return false;
1271
1272         const char *c1 = string1->begin;
1273         const char *c2 = string2->begin;
1274         for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1275                 if (*c1 != *c2)
1276                         return false;
1277         }
1278         return true;
1279 }
1280
1281 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1282 {
1283         if (token1->type != token2->type)
1284                 return false;
1285
1286         switch (token1->type) {
1287         case TP_HEADERNAME:
1288                 /* TODO */
1289                 return false;
1290         case TP_IDENTIFIER:
1291                 return token1->symbol == token2->symbol;
1292         case TP_NUMBER:
1293         case TP_CHARACTER_CONSTANT:
1294         case TP_STRING_LITERAL:
1295                 return strings_equal(&token1->literal, &token2->literal);
1296
1297         default:
1298                 return true;
1299         }
1300 }
1301
1302 static bool pp_definitions_equal(const pp_definition_t *definition1,
1303                                  const pp_definition_t *definition2)
1304 {
1305         if (definition1->list_len != definition2->list_len)
1306                 return false;
1307
1308         size_t         len = definition1->list_len;
1309         const token_t *t1  = definition1->token_list;
1310         const token_t *t2  = definition2->token_list;
1311         for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1312                 if (!pp_tokens_equal(t1, t2))
1313                         return false;
1314         }
1315         return true;
1316 }
1317
1318 static void parse_define_directive(void)
1319 {
1320         eat_pp(TP_define);
1321         assert(obstack_object_size(&pp_obstack) == 0);
1322
1323         if (pp_token.type != TP_IDENTIFIER || info.at_line_begin) {
1324                 errorf(&pp_token.source_position,
1325                        "expected identifier after #define, got '%t'", &pp_token);
1326                 goto error_out;
1327         }
1328         symbol_t *symbol = pp_token.symbol;
1329
1330         pp_definition_t *new_definition
1331                 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1332         memset(new_definition, 0, sizeof(new_definition[0]));
1333         new_definition->source_position = input.position;
1334
1335         /* this is probably the only place where spaces are significant in the
1336          * lexer (except for the fact that they separate tokens). #define b(x)
1337          * is something else than #define b (x) */
1338         if (input.c == '(') {
1339                 /* eat the '(' */
1340                 next_preprocessing_token();
1341                 /* get next token after '(' */
1342                 next_preprocessing_token();
1343
1344                 while (true) {
1345                         switch (pp_token.type) {
1346                         case TP_DOTDOTDOT:
1347                                 new_definition->is_variadic = true;
1348                                 next_preprocessing_token();
1349                                 if (pp_token.type != ')') {
1350                                         errorf(&input.position,
1351                                                         "'...' not at end of macro argument list");
1352                                         goto error_out;
1353                                 }
1354                                 break;
1355                         case TP_IDENTIFIER:
1356                                 obstack_ptr_grow(&pp_obstack, pp_token.symbol);
1357                                 next_preprocessing_token();
1358
1359                                 if (pp_token.type == ',') {
1360                                         next_preprocessing_token();
1361                                         break;
1362                                 }
1363
1364                                 if (pp_token.type != ')') {
1365                                         errorf(&pp_token.source_position,
1366                                                "expected ',' or ')' after identifier, got '%t'",
1367                                                &pp_token);
1368                                         goto error_out;
1369                                 }
1370                                 break;
1371                         case ')':
1372                                 next_preprocessing_token();
1373                                 goto finish_argument_list;
1374                         default:
1375                                 errorf(&pp_token.source_position,
1376                                        "expected identifier, '...' or ')' in #define argument list, got '%t'",
1377                                        &pp_token);
1378                                 goto error_out;
1379                         }
1380                 }
1381
1382         finish_argument_list:
1383                 new_definition->has_parameters = true;
1384                 new_definition->n_parameters
1385                         = obstack_object_size(&pp_obstack) / sizeof(new_definition->parameters[0]);
1386                 new_definition->parameters = obstack_finish(&pp_obstack);
1387         } else {
1388                 next_preprocessing_token();
1389         }
1390
1391         /* construct a new pp_definition on the obstack */
1392         assert(obstack_object_size(&pp_obstack) == 0);
1393         size_t list_len = 0;
1394         while (!info.at_line_begin) {
1395                 obstack_grow(&pp_obstack, &pp_token, sizeof(pp_token));
1396                 ++list_len;
1397                 next_preprocessing_token();
1398         }
1399
1400         new_definition->list_len   = list_len;
1401         new_definition->token_list = obstack_finish(&pp_obstack);
1402
1403         pp_definition_t *old_definition = symbol->pp_definition;
1404         if (old_definition != NULL) {
1405                 if (!pp_definitions_equal(old_definition, new_definition)) {
1406                         warningf(WARN_OTHER, &input.position, "multiple definition of macro '%Y' (first defined %P)", symbol, &old_definition->source_position);
1407                 } else {
1408                         /* reuse the old definition */
1409                         obstack_free(&pp_obstack, new_definition);
1410                         new_definition = old_definition;
1411                 }
1412         }
1413
1414         symbol->pp_definition = new_definition;
1415         return;
1416
1417 error_out:
1418         if (obstack_object_size(&pp_obstack) > 0) {
1419                 char *ptr = obstack_finish(&pp_obstack);
1420                 obstack_free(&pp_obstack, ptr);
1421         }
1422         eat_pp_directive();
1423 }
1424
1425 static void parse_undef_directive(void)
1426 {
1427         eat_pp(TP_undef);
1428
1429         if (pp_token.type != TP_IDENTIFIER) {
1430                 errorf(&input.position,
1431                        "expected identifier after #undef, got '%t'", &pp_token);
1432                 eat_pp_directive();
1433                 return;
1434         }
1435
1436         symbol_t *symbol = pp_token.symbol;
1437         symbol->pp_definition = NULL;
1438         next_preprocessing_token();
1439
1440         if (!info.at_line_begin) {
1441                 warningf(WARN_OTHER, &input.position, "extra tokens at end of #undef directive");
1442         }
1443         eat_pp_directive();
1444 }
1445
1446 static const char *parse_headername(void)
1447 {
1448         /* behind an #include we can have the special headername lexems.
1449          * They're only allowed behind an #include so they're not recognized
1450          * by the normal next_preprocessing_token. We handle them as a special
1451          * exception here */
1452         skip_whitespace();
1453
1454         if (info.at_line_begin) {
1455                 parse_error("expected headername after #include");
1456                 return NULL;
1457         }
1458
1459         assert(obstack_object_size(&input_obstack) == 0);
1460
1461         /* check wether we have a "... or <... headername */
1462         switch (input.c) {
1463         case '<':
1464                 next_char();
1465                 while (true) {
1466                         switch (input.c) {
1467                         case EOF:
1468                                 /* fallthrough */
1469                         MATCH_NEWLINE(
1470                                 parse_error("header name without closing '>'");
1471                                 return NULL;
1472                         )
1473                         case '>':
1474                                 next_char();
1475                                 goto finished_headername;
1476                         }
1477                         obstack_1grow(&input_obstack, (char) input.c);
1478                         next_char();
1479                 }
1480                 /* we should never be here */
1481
1482         case '"':
1483                 next_char();
1484                 while (true) {
1485                         switch (input.c) {
1486                         case EOF:
1487                                 /* fallthrough */
1488                         MATCH_NEWLINE(
1489                                 parse_error("header name without closing '>'");
1490                                 return NULL;
1491                         )
1492                         case '"':
1493                                 next_char();
1494                                 goto finished_headername;
1495                         }
1496                         obstack_1grow(&input_obstack, (char) input.c);
1497                         next_char();
1498                 }
1499                 /* we should never be here */
1500
1501         default:
1502                 /* TODO: do normal pp_token parsing and concatenate results */
1503                 panic("pp_token concat include not implemented yet");
1504         }
1505
1506 finished_headername:
1507         obstack_1grow(&input_obstack, '\0');
1508         char *headername = obstack_finish(&input_obstack);
1509
1510         /* TODO: iterate search-path to find the file */
1511
1512         skip_whitespace();
1513
1514         return headername;
1515 }
1516
1517 static bool parse_include_directive(void)
1518 {
1519         /* don't eat the TP_include here!
1520          * we need an alternative parsing for the next token */
1521
1522         const char *headername = parse_headername();
1523         if (headername == NULL) {
1524                 eat_pp_directive();
1525                 return false;
1526         }
1527
1528         if (!info.at_line_begin) {
1529                 warningf(WARN_OTHER, &pp_token.source_position, "extra tokens at end of #include directive");
1530                 eat_pp_directive();
1531         }
1532
1533         if (n_inputs > INCLUDE_LIMIT) {
1534                 errorf(&pp_token.source_position, "#include nested too deeply");
1535                 /* eat \n or EOF */
1536                 next_preprocessing_token();
1537                 return false;
1538         }
1539
1540         /* we have to reenable space counting and macro expansion here,
1541          * because it is still disabled in directive parsing,
1542          * but we will trigger a preprocessing token reading of the new file
1543          * now and need expansions/space counting */
1544         in_pp_directive = false;
1545
1546         /* switch inputs */
1547         emit_newlines();
1548         push_input();
1549         bool res = open_input(headername);
1550         if (!res) {
1551                 errorf(&pp_token.source_position,
1552                        "failed including '%s': %s", headername, strerror(errno));
1553                 pop_restore_input();
1554                 return false;
1555         }
1556
1557         return true;
1558 }
1559
1560 static pp_conditional_t *push_conditional(void)
1561 {
1562         pp_conditional_t *conditional
1563                 = obstack_alloc(&pp_obstack, sizeof(*conditional));
1564         memset(conditional, 0, sizeof(*conditional));
1565
1566         conditional->parent = conditional_stack;
1567         conditional_stack   = conditional;
1568
1569         return conditional;
1570 }
1571
1572 static void pop_conditional(void)
1573 {
1574         assert(conditional_stack != NULL);
1575         conditional_stack = conditional_stack->parent;
1576 }
1577
1578 static void check_unclosed_conditionals(void)
1579 {
1580         while (conditional_stack != NULL) {
1581                 pp_conditional_t *conditional = conditional_stack;
1582
1583                 if (conditional->in_else) {
1584                         errorf(&conditional->source_position, "unterminated #else");
1585                 } else {
1586                         errorf(&conditional->source_position, "unterminated condition");
1587                 }
1588                 pop_conditional();
1589         }
1590 }
1591
1592 static void parse_ifdef_ifndef_directive(void)
1593 {
1594         bool is_ifndef = (pp_token.type == TP_ifndef);
1595         bool condition;
1596         next_preprocessing_token();
1597
1598         if (skip_mode) {
1599                 eat_pp_directive();
1600                 pp_conditional_t *conditional = push_conditional();
1601                 conditional->source_position  = pp_token.source_position;
1602                 conditional->skip             = true;
1603                 return;
1604         }
1605
1606         if (pp_token.type != TP_IDENTIFIER || info.at_line_begin) {
1607                 errorf(&pp_token.source_position,
1608                        "expected identifier after #%s, got '%t'",
1609                        is_ifndef ? "ifndef" : "ifdef", &pp_token);
1610                 eat_pp_directive();
1611
1612                 /* just take the true case in the hope to avoid further errors */
1613                 condition = true;
1614         } else {
1615                 symbol_t        *symbol        = pp_token.symbol;
1616                 pp_definition_t *pp_definition = symbol->pp_definition;
1617                 next_preprocessing_token();
1618
1619                 if (!info.at_line_begin) {
1620                         errorf(&pp_token.source_position,
1621                                "extra tokens at end of #%s",
1622                                is_ifndef ? "ifndef" : "ifdef");
1623                         eat_pp_directive();
1624                 }
1625
1626                 /* evaluate wether we are in true or false case */
1627                 condition = is_ifndef ? pp_definition == NULL : pp_definition != NULL;
1628         }
1629
1630         pp_conditional_t *conditional = push_conditional();
1631         conditional->source_position  = pp_token.source_position;
1632         conditional->condition        = condition;
1633
1634         if (!condition) {
1635                 skip_mode = true;
1636         }
1637 }
1638
1639 static void parse_else_directive(void)
1640 {
1641         eat_pp(TP_else);
1642
1643         if (!info.at_line_begin) {
1644                 if (!skip_mode) {
1645                         warningf(WARN_OTHER, &pp_token.source_position, "extra tokens at end of #else");
1646                 }
1647                 eat_pp_directive();
1648         }
1649
1650         pp_conditional_t *conditional = conditional_stack;
1651         if (conditional == NULL) {
1652                 errorf(&pp_token.source_position, "#else without prior #if");
1653                 return;
1654         }
1655
1656         if (conditional->in_else) {
1657                 errorf(&pp_token.source_position,
1658                        "#else after #else (condition started %P)",
1659                        conditional->source_position);
1660                 skip_mode = true;
1661                 return;
1662         }
1663
1664         conditional->in_else = true;
1665         if (!conditional->skip) {
1666                 skip_mode = conditional->condition;
1667         }
1668         conditional->source_position = pp_token.source_position;
1669 }
1670
1671 static void parse_endif_directive(void)
1672 {
1673         eat_pp(TP_endif);
1674
1675         if (!info.at_line_begin) {
1676                 if (!skip_mode) {
1677                         warningf(WARN_OTHER, &pp_token.source_position, "extra tokens at end of #endif");
1678                 }
1679                 eat_pp_directive();
1680         }
1681
1682         pp_conditional_t *conditional = conditional_stack;
1683         if (conditional == NULL) {
1684                 errorf(&pp_token.source_position, "#endif without prior #if");
1685                 return;
1686         }
1687
1688         if (!conditional->skip) {
1689                 skip_mode = false;
1690         }
1691         pop_conditional();
1692 }
1693
1694 static void parse_preprocessing_directive(void)
1695 {
1696         in_pp_directive = true;
1697         eat_pp('#');
1698
1699         if (skip_mode) {
1700                 switch (pp_token.type) {
1701                 case TP_ifdef:
1702                 case TP_ifndef:
1703                         parse_ifdef_ifndef_directive();
1704                         break;
1705                 case TP_else:
1706                         parse_else_directive();
1707                         break;
1708                 case TP_endif:
1709                         parse_endif_directive();
1710                         break;
1711                 default:
1712                         eat_pp_directive();
1713                         break;
1714                 }
1715         } else {
1716                 switch (pp_token.type) {
1717                 case TP_define:
1718                         parse_define_directive();
1719                         break;
1720                 case TP_undef:
1721                         parse_undef_directive();
1722                         break;
1723                 case TP_ifdef:
1724                 case TP_ifndef:
1725                         parse_ifdef_ifndef_directive();
1726                         break;
1727                 case TP_else:
1728                         parse_else_directive();
1729                         break;
1730                 case TP_endif:
1731                         parse_endif_directive();
1732                         break;
1733                 case TP_include:
1734                         parse_include_directive();
1735                         break;
1736                 default:
1737                         if (info.at_line_begin) {
1738                                 /* the nop directive "#" */
1739                                 break;
1740                         }
1741                         errorf(&pp_token.source_position,
1742                                    "invalid preprocessing directive #%t", &pp_token);
1743                         eat_pp_directive();
1744                         break;
1745                 }
1746         }
1747
1748         in_pp_directive = false;
1749         assert(info.at_line_begin);
1750 }
1751
1752 int pptest_main(int argc, char **argv);
1753 int pptest_main(int argc, char **argv)
1754 {
1755         init_symbol_table();
1756         init_tokens();
1757
1758         obstack_init(&pp_obstack);
1759         obstack_init(&input_obstack);
1760
1761         const char *filename = "t.c";
1762         if (argc > 1)
1763                 filename = argv[1];
1764
1765         out = stdout;
1766
1767         /* just here for gcc compatibility */
1768         fprintf(out, "# 1 \"%s\"\n", filename);
1769         fprintf(out, "# 1 \"<built-in>\"\n");
1770         fprintf(out, "# 1 \"<command-line>\"\n");
1771
1772         bool ok = open_input(filename);
1773         assert(ok);
1774
1775         while (true) {
1776                 if (pp_token.type == '#' && info.at_line_begin) {
1777                         parse_preprocessing_directive();
1778                         continue;
1779                 } else if (pp_token.type == TP_EOF) {
1780                         goto end_of_main_loop;
1781                 } else if (pp_token.type == TP_IDENTIFIER && !in_pp_directive) {
1782                         symbol_t *symbol = pp_token.symbol;
1783                         pp_definition_t *pp_definition = symbol->pp_definition;
1784                         if (pp_definition != NULL && !pp_definition->is_expanding) {
1785                                 expansion_pos = pp_token.source_position;
1786                                 if (pp_definition->has_parameters) {
1787                                         source_position_t position = pp_token.source_position;
1788                                         add_token_info_t old_info = info;
1789                                         next_preprocessing_token();
1790                                         add_token_info_t new_info = info;
1791
1792                                         /* no opening brace -> no expansion */
1793                                         if (pp_token.type == '(') {
1794                                                 eat_pp('(');
1795
1796                                                 /* parse arguments (TODO) */
1797                                                 while (pp_token.type != TP_EOF && pp_token.type != ')')
1798                                                         next_preprocessing_token();
1799                                         } else {
1800                                                 token_t next_token = pp_token;
1801                                                 /* restore identifier token */
1802                                                 pp_token.type            = TP_IDENTIFIER;
1803                                                 pp_token.symbol          = symbol;
1804                                                 pp_token.source_position = position;
1805                                                 info = old_info;
1806                                                 emit_pp_token();
1807
1808                                                 info = new_info;
1809                                                 pp_token = next_token;
1810                                                 continue;
1811                                         }
1812                                         info = old_info;
1813                                 }
1814                                 pp_definition->expand_pos   = 0;
1815                                 pp_definition->is_expanding = true;
1816                                 current_expansion           = pp_definition;
1817                                 expand_next();
1818                                 continue;
1819                         }
1820                 }
1821
1822                 emit_pp_token();
1823                 next_preprocessing_token();
1824         }
1825 end_of_main_loop:
1826
1827         fputc('\n', out);
1828         check_unclosed_conditionals();
1829         close_input();
1830
1831         obstack_free(&input_obstack, NULL);
1832         obstack_free(&pp_obstack, NULL);
1833
1834         exit_tokens();
1835         exit_symbol_table();
1836
1837         return 0;
1838 }