do not add searchpath stuff into parse_headername
[cparser] / preprocessor.c
1 #include <config.h>
2
3 #include <assert.h>
4 #include <errno.h>
5 #include <string.h>
6 #include <stdbool.h>
7 #include <ctype.h>
8
9 #include "token_t.h"
10 #include "symbol_t.h"
11 #include "adt/util.h"
12 #include "adt/error.h"
13 #include "lang_features.h"
14 #include "diagnostic.h"
15 #include "string_rep.h"
16 #include "input.h"
17
18 #define MAX_PUTBACK 3
19 #define INCLUDE_LIMIT 199  /* 199 is for gcc "compatibility" */
20
21 struct pp_argument_t {
22         size_t   list_len;
23         token_t *token_list;
24 };
25
26 struct pp_definition_t {
27         symbol_t          *symbol;
28         source_position_t  source_position;
29         pp_definition_t   *parent_expansion;
30         size_t             expand_pos;
31         bool               is_variadic    : 1;
32         bool               is_expanding   : 1;
33         bool               has_parameters : 1;
34         size_t             n_parameters;
35         symbol_t          *parameters;
36
37         /* replacement */
38         size_t             list_len;
39         token_t           *token_list;
40
41 };
42
43 typedef struct pp_conditional_t pp_conditional_t;
44 struct pp_conditional_t {
45         source_position_t  source_position;
46         bool               condition;
47         bool               in_else;
48         bool               skip; /**< conditional in skip mode (then+else gets skipped) */
49         pp_conditional_t  *parent;
50 };
51
52 typedef struct pp_input_t pp_input_t;
53 struct pp_input_t {
54         FILE              *file;
55         input_t           *input;
56         utf32              c;
57         utf32              buf[1024+MAX_PUTBACK];
58         const utf32       *bufend;
59         const utf32       *bufpos;
60         source_position_t  position;
61         bool               had_non_space;
62         pp_input_t        *parent;
63 };
64
65 static pp_input_t input;
66
67 static pp_input_t     *input_stack;
68 static unsigned        n_inputs;
69 static struct obstack  input_obstack;
70
71 static pp_conditional_t *conditional_stack;
72
73 static token_t            pp_token;
74 static bool               resolve_escape_sequences = false;
75 static bool               do_print_spaces          = true;
76 static bool               do_expansions;
77 static bool               skip_mode;
78 static FILE              *out;
79 static struct obstack     pp_obstack;
80 static unsigned           counted_newlines;
81 static unsigned           counted_spaces;
82 static const char        *printed_input_name = NULL;
83 static pp_definition_t   *current_expansion  = NULL;
84
85 static inline void next_char(void);
86 static void next_preprocessing_token(void);
87 static void print_line_directive(const source_position_t *pos, const char *add);
88
89 static bool open_input(const char *filename)
90 {
91         FILE *file = fopen(filename, "r");
92         if (file == NULL)
93                 return false;
94
95         input.file                = file;
96         input.input               = input_from_stream(file, NULL);
97         input.bufend              = NULL;
98         input.bufpos              = NULL;
99         input.had_non_space       = false;
100         input.position.input_name = filename;
101         input.position.lineno     = 1;
102
103         /* indicate that we're at a new input */
104         print_line_directive(&input.position, input_stack != NULL ? "1" : NULL);
105
106         counted_newlines = 0;
107         counted_spaces   = 0;
108
109         /* read first char and first token */
110         next_char();
111         next_preprocessing_token();
112
113         return true;
114 }
115
116 static void close_input(void)
117 {
118         /* ensure we have a newline at EOF */
119         if (input.had_non_space) {
120                 fputc('\n', out);
121         }
122
123         input_free(input.input);
124         assert(input.file != NULL);
125
126         fclose(input.file);
127         input.input  = NULL;
128         input.file   = NULL;
129         input.bufend = NULL;
130         input.bufpos = NULL;
131         input.c      = EOF;
132 }
133
134 static void push_input(void)
135 {
136         pp_input_t *saved_input
137                 = obstack_alloc(&input_obstack, sizeof(*saved_input));
138
139         memcpy(saved_input, &input, sizeof(*saved_input));
140
141         /* adjust buffer positions */
142         if (input.bufpos != NULL)
143                 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
144         if (input.bufend != NULL)
145                 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
146
147         saved_input->parent = input_stack;
148         input_stack         = saved_input;
149         ++n_inputs;
150 }
151
152 static void pop_restore_input(void)
153 {
154         assert(n_inputs > 0);
155         assert(input_stack != NULL);
156
157         pp_input_t *saved_input = input_stack;
158
159         memcpy(&input, saved_input, sizeof(input));
160         input.parent = NULL;
161
162         /* adjust buffer positions */
163         if (saved_input->bufpos != NULL)
164                 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
165         if (saved_input->bufend != NULL)
166                 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
167
168         input_stack = saved_input->parent;
169         obstack_free(&input_obstack, saved_input);
170         --n_inputs;
171 }
172
173 /**
174  * Prints a parse error message at the current token.
175  *
176  * @param msg   the error message
177  */
178 static void parse_error(const char *msg)
179 {
180         errorf(&pp_token.source_position,  "%s", msg);
181 }
182
183 static inline void next_real_char(void)
184 {
185         assert(input.bufpos <= input.bufend);
186         if (input.bufpos >= input.bufend) {
187                 size_t n = decode(input.input, input.buf + MAX_PUTBACK,
188                                   sizeof(input.buf)/sizeof(input.buf[0]) - MAX_PUTBACK);
189                 if (n == 0) {
190                         input.c = EOF;
191                         return;
192                 }
193                 input.bufpos = input.buf + MAX_PUTBACK;
194                 input.bufend = input.bufpos + n;
195         }
196         input.c = *input.bufpos++;
197         ++input.position.colno;
198 }
199
200 /**
201  * Put a character back into the buffer.
202  *
203  * @param pc  the character to put back
204  */
205 static inline void put_back(utf32 const pc)
206 {
207         assert(input.bufpos > input.buf);
208         *(--input.bufpos - input.buf + input.buf) = (char) pc;
209         --input.position.colno;
210 }
211
212 #define MATCH_NEWLINE(code)                   \
213         case '\r':                                \
214                 next_char();                          \
215                 if (input.c == '\n') {                \
216                         next_char();                      \
217                 }                                     \
218                 ++input.position.lineno;              \
219                 code                                  \
220         case '\n':                                \
221                 next_char();                          \
222                 ++input.position.lineno;              \
223                 code
224
225 #define eat(c_type) (assert(input.c == c_type), next_char())
226
227 static void maybe_concat_lines(void)
228 {
229         eat('\\');
230
231         switch (input.c) {
232         MATCH_NEWLINE(return;)
233
234         default:
235                 break;
236         }
237
238         put_back(input.c);
239         input.c = '\\';
240 }
241
242 /**
243  * Set c to the next input character, ie.
244  * after expanding trigraphs.
245  */
246 static inline void next_char(void)
247 {
248         next_real_char();
249
250         /* filter trigraphs and concatenated lines */
251         if (UNLIKELY(input.c == '\\')) {
252                 maybe_concat_lines();
253                 goto end_of_next_char;
254         }
255
256         if (LIKELY(input.c != '?'))
257                 goto end_of_next_char;
258
259         next_real_char();
260         if (LIKELY(input.c != '?')) {
261                 put_back(input.c);
262                 input.c = '?';
263                 goto end_of_next_char;
264         }
265
266         next_real_char();
267         switch (input.c) {
268         case '=': input.c = '#'; break;
269         case '(': input.c = '['; break;
270         case '/': input.c = '\\'; maybe_concat_lines(); break;
271         case ')': input.c = ']'; break;
272         case '\'': input.c = '^'; break;
273         case '<': input.c = '{'; break;
274         case '!': input.c = '|'; break;
275         case '>': input.c = '}'; break;
276         case '-': input.c = '~'; break;
277         default:
278                 put_back(input.c);
279                 put_back('?');
280                 input.c = '?';
281                 break;
282         }
283
284 end_of_next_char:;
285 #ifdef DEBUG_CHARS
286         printf("nchar '%c'\n", input.c);
287 #endif
288 }
289
290
291
292 /**
293  * Returns true if the given char is a octal digit.
294  *
295  * @param char  the character to check
296  */
297 static inline bool is_octal_digit(int chr)
298 {
299         switch (chr) {
300         case '0':
301         case '1':
302         case '2':
303         case '3':
304         case '4':
305         case '5':
306         case '6':
307         case '7':
308                 return true;
309         default:
310                 return false;
311         }
312 }
313
314 /**
315  * Returns the value of a digit.
316  * The only portable way to do it ...
317  */
318 static int digit_value(int digit)
319 {
320         switch (digit) {
321         case '0': return 0;
322         case '1': return 1;
323         case '2': return 2;
324         case '3': return 3;
325         case '4': return 4;
326         case '5': return 5;
327         case '6': return 6;
328         case '7': return 7;
329         case '8': return 8;
330         case '9': return 9;
331         case 'a':
332         case 'A': return 10;
333         case 'b':
334         case 'B': return 11;
335         case 'c':
336         case 'C': return 12;
337         case 'd':
338         case 'D': return 13;
339         case 'e':
340         case 'E': return 14;
341         case 'f':
342         case 'F': return 15;
343         default:
344                 panic("wrong character given");
345         }
346 }
347
348 /**
349  * Parses an octal character sequence.
350  *
351  * @param first_digit  the already read first digit
352  */
353 static int parse_octal_sequence(const int first_digit)
354 {
355         assert(is_octal_digit(first_digit));
356         int value = digit_value(first_digit);
357         if (!is_octal_digit(input.c)) return value;
358         value = 8 * value + digit_value(input.c);
359         next_char();
360         if (!is_octal_digit(input.c)) return value;
361         value = 8 * value + digit_value(input.c);
362         next_char();
363
364         if (char_is_signed) {
365                 return (signed char) value;
366         } else {
367                 return (unsigned char) value;
368         }
369 }
370
371 /**
372  * Parses a hex character sequence.
373  */
374 static int parse_hex_sequence(void)
375 {
376         int value = 0;
377         while (isxdigit(input.c)) {
378                 value = 16 * value + digit_value(input.c);
379                 next_char();
380         }
381
382         if (char_is_signed) {
383                 return (signed char) value;
384         } else {
385                 return (unsigned char) value;
386         }
387 }
388
389 /**
390  * Parse an escape sequence.
391  */
392 static int parse_escape_sequence(void)
393 {
394         eat('\\');
395
396         int ec = input.c;
397         next_char();
398
399         switch (ec) {
400         case '"':  return '"';
401         case '\'': return '\'';
402         case '\\': return '\\';
403         case '?': return '\?';
404         case 'a': return '\a';
405         case 'b': return '\b';
406         case 'f': return '\f';
407         case 'n': return '\n';
408         case 'r': return '\r';
409         case 't': return '\t';
410         case 'v': return '\v';
411         case 'x':
412                 return parse_hex_sequence();
413         case '0':
414         case '1':
415         case '2':
416         case '3':
417         case '4':
418         case '5':
419         case '6':
420         case '7':
421                 return parse_octal_sequence(ec);
422         case EOF:
423                 parse_error("reached end of file while parsing escape sequence");
424                 return EOF;
425         default:
426                 parse_error("unknown escape sequence");
427                 return EOF;
428         }
429 }
430
431 static void parse_string_literal(void)
432 {
433         const unsigned start_linenr = input.position.lineno;
434
435         eat('"');
436
437         int tc;
438         while (true) {
439                 switch (input.c) {
440                 case '\\':
441                         if (resolve_escape_sequences) {
442                                 tc = parse_escape_sequence();
443                                 obstack_1grow(&symbol_obstack, (char) tc);
444                         } else {
445                                 obstack_1grow(&symbol_obstack, (char) input.c);
446                                 next_char();
447                                 obstack_1grow(&symbol_obstack, (char) input.c);
448                                 next_char();
449                         }
450                         break;
451
452                 case EOF: {
453                         source_position_t source_position;
454                         source_position.input_name = pp_token.source_position.input_name;
455                         source_position.lineno     = start_linenr;
456                         errorf(&source_position, "string has no end");
457                         pp_token.type = TP_ERROR;
458                         return;
459                 }
460
461                 case '"':
462                         next_char();
463                         goto end_of_string;
464
465                 default:
466                         obstack_1grow(&symbol_obstack, (char) input.c);
467                         next_char();
468                         break;
469                 }
470         }
471
472 end_of_string:
473         /* add finishing 0 to the string */
474         obstack_1grow(&symbol_obstack, '\0');
475         const size_t      size   = (size_t)obstack_object_size(&symbol_obstack);
476         const char *const string = obstack_finish(&symbol_obstack);
477
478 #if 0 /* TODO hash */
479         /* check if there is already a copy of the string */
480         result = strset_insert(&stringset, string);
481         if (result != string) {
482                 obstack_free(&symbol_obstack, string);
483         }
484 #else
485         const char *const result = string;
486 #endif
487
488         pp_token.type          = TP_STRING_LITERAL;
489         pp_token.literal.begin = result;
490         pp_token.literal.size  = size;
491 }
492
493 static void parse_wide_character_constant(void)
494 {
495         eat('\'');
496
497         int found_char = 0;
498         while (true) {
499                 switch (input.c) {
500                 case '\\':
501                         found_char = parse_escape_sequence();
502                         break;
503
504                 MATCH_NEWLINE(
505                         parse_error("newline while parsing character constant");
506                         break;
507                 )
508
509                 case '\'':
510                         next_char();
511                         goto end_of_wide_char_constant;
512
513                 case EOF:
514                         parse_error("EOF while parsing character constant");
515                         pp_token.type = TP_ERROR;
516                         return;
517
518                 default:
519                         if (found_char != 0) {
520                                 parse_error("more than 1 characters in character "
521                                             "constant");
522                                 goto end_of_wide_char_constant;
523                         } else {
524                                 found_char = input.c;
525                                 next_char();
526                         }
527                         break;
528                 }
529         }
530
531 end_of_wide_char_constant:
532         pp_token.type       = TP_WIDE_CHARACTER_CONSTANT;
533         /* TODO... */
534 }
535
536 static void parse_character_constant(void)
537 {
538         const unsigned start_linenr = input.position.lineno;
539
540         eat('\'');
541
542         int tc;
543         while (true) {
544                 switch (input.c) {
545                 case '\\':
546                         tc = parse_escape_sequence();
547                         obstack_1grow(&symbol_obstack, (char) tc);
548                         break;
549
550                 MATCH_NEWLINE(
551                         parse_error("newline while parsing character constant");
552                         break;
553                 )
554
555                 case EOF: {
556                         source_position_t source_position;
557                         source_position.input_name = pp_token.source_position.input_name;
558                         source_position.lineno     = start_linenr;
559                         errorf(&source_position, "EOF while parsing character constant");
560                         pp_token.type = TP_ERROR;
561                         return;
562                 }
563
564                 case '\'':
565                         next_char();
566                         goto end_of_char_constant;
567
568                 default:
569                         obstack_1grow(&symbol_obstack, (char) input.c);
570                         next_char();
571                         break;
572
573                 }
574         }
575
576 end_of_char_constant:;
577         const size_t      size   = (size_t)obstack_object_size(&symbol_obstack);
578         const char *const string = obstack_finish(&symbol_obstack);
579
580         pp_token.type          = TP_CHARACTER_CONSTANT;
581         pp_token.literal.begin = string;
582         pp_token.literal.size  = size;
583 }
584
585 #define SYMBOL_CHARS_WITHOUT_E_P \
586         case 'a': \
587         case 'b': \
588         case 'c': \
589         case 'd': \
590         case 'f': \
591         case 'g': \
592         case 'h': \
593         case 'i': \
594         case 'j': \
595         case 'k': \
596         case 'l': \
597         case 'm': \
598         case 'n': \
599         case 'o': \
600         case 'q': \
601         case 'r': \
602         case 's': \
603         case 't': \
604         case 'u': \
605         case 'v': \
606         case 'w': \
607         case 'x': \
608         case 'y': \
609         case 'z': \
610         case 'A': \
611         case 'B': \
612         case 'C': \
613         case 'D': \
614         case 'F': \
615         case 'G': \
616         case 'H': \
617         case 'I': \
618         case 'J': \
619         case 'K': \
620         case 'L': \
621         case 'M': \
622         case 'N': \
623         case 'O': \
624         case 'Q': \
625         case 'R': \
626         case 'S': \
627         case 'T': \
628         case 'U': \
629         case 'V': \
630         case 'W': \
631         case 'X': \
632         case 'Y': \
633         case 'Z': \
634         case '_':
635
636 #define SYMBOL_CHARS \
637         SYMBOL_CHARS_WITHOUT_E_P \
638         case 'e': \
639         case 'p': \
640         case 'E': \
641         case 'P':
642
643 #define DIGITS \
644         case '0':  \
645         case '1':  \
646         case '2':  \
647         case '3':  \
648         case '4':  \
649         case '5':  \
650         case '6':  \
651         case '7':  \
652         case '8':  \
653         case '9':
654
655 /**
656  * returns next final token from a preprocessor macro expansion
657  */
658 static void expand_next(void)
659 {
660         assert(current_expansion != NULL);
661
662         pp_definition_t *definition = current_expansion;
663
664 restart:
665         if (definition->list_len == 0
666                         || definition->expand_pos >= definition->list_len) {
667                 /* we're finished with the current macro, move up 1 level in the
668                  * expansion stack */
669                 pp_definition_t *parent = definition->parent_expansion;
670                 definition->parent_expansion = NULL;
671                 definition->is_expanding     = false;
672
673                 /* it was the outermost expansion, parse normal pptoken */
674                 if (parent == NULL) {
675                         current_expansion = NULL;
676                         next_preprocessing_token();
677                         return;
678                 }
679                 definition        = parent;
680                 current_expansion = definition;
681                 goto restart;
682         }
683         pp_token = definition->token_list[definition->expand_pos];
684         ++definition->expand_pos;
685
686         if (pp_token.type != TP_IDENTIFIER)
687                 return;
688
689         /* if it was an identifier then we might need to expand again */
690         pp_definition_t *symbol_definition = pp_token.symbol->pp_definition;
691         if (symbol_definition != NULL && !symbol_definition->is_expanding) {
692                 symbol_definition->parent_expansion = definition;
693                 symbol_definition->expand_pos       = 0;
694                 symbol_definition->is_expanding     = true;
695                 definition                          = symbol_definition;
696                 current_expansion                   = definition;
697                 goto restart;
698         }
699 }
700
701 static void skip_line_comment(void)
702 {
703         if (do_print_spaces)
704                 counted_spaces++;
705
706         while (true) {
707                 switch (input.c) {
708                 case EOF:
709                         return;
710
711                 case '\n':
712                 case '\r':
713                         return;
714
715                 default:
716                         next_char();
717                         break;
718                 }
719         }
720 }
721
722 static void skip_multiline_comment(void)
723 {
724         if (do_print_spaces)
725                 counted_spaces++;
726
727         unsigned start_linenr = input.position.lineno;
728         while (true) {
729                 switch (input.c) {
730                 case '/':
731                         next_char();
732                         if (input.c == '*') {
733                                 /* TODO: nested comment, warn here */
734                         }
735                         break;
736                 case '*':
737                         next_char();
738                         if (input.c == '/') {
739                                 next_char();
740                                 return;
741                         }
742                         break;
743
744                 MATCH_NEWLINE(
745                         if (do_print_spaces) {
746                                 counted_newlines++;
747                                 counted_spaces = 0;
748                         }
749                         break;
750                 )
751
752                 case EOF: {
753                         source_position_t source_position;
754                         source_position.input_name = pp_token.source_position.input_name;
755                         source_position.lineno     = start_linenr;
756                         errorf(&source_position, "at end of file while looking for comment end");
757                         return;
758                 }
759
760                 default:
761                         next_char();
762                         break;
763                 }
764         }
765 }
766
767 /* skip spaces advancing at the start of the next preprocessing token */
768 static void skip_spaces(bool skip_newline)
769 {
770         while (true) {
771                 switch (input.c) {
772                 case ' ':
773                 case '\t':
774                         if (do_print_spaces)
775                                 counted_spaces++;
776                         next_char();
777                         continue;
778                 case '/':
779                         next_char();
780                         if (input.c == '/') {
781                                 next_char();
782                                 skip_line_comment();
783                                 continue;
784                         } else if (input.c == '*') {
785                                 next_char();
786                                 skip_multiline_comment();
787                                 continue;
788                         } else {
789                                 put_back(input.c);
790                                 input.c = '/';
791                         }
792                         return;
793
794                 case '\r':
795                         if (!skip_newline)
796                                 return;
797
798                         next_char();
799                         if (input.c == '\n') {
800                                 next_char();
801                         }
802                         ++input.position.lineno;
803                         if (do_print_spaces)
804                                 ++counted_newlines;
805                         continue;
806
807                 case '\n':
808                         if (!skip_newline)
809                                 return;
810
811                         next_char();
812                         ++input.position.lineno;
813                         if (do_print_spaces)
814                                 ++counted_newlines;
815                         continue;
816
817                 default:
818                         return;
819                 }
820         }
821 }
822
823 static void eat_pp(int type)
824 {
825         (void) type;
826         assert(pp_token.type == type);
827         next_preprocessing_token();
828 }
829
830 static void parse_symbol(void)
831 {
832         obstack_1grow(&symbol_obstack, (char) input.c);
833         next_char();
834
835         while (true) {
836                 switch (input.c) {
837                 DIGITS
838                 SYMBOL_CHARS
839                         obstack_1grow(&symbol_obstack, (char) input.c);
840                         next_char();
841                         break;
842
843                 default:
844                         goto end_symbol;
845                 }
846         }
847
848 end_symbol:
849         obstack_1grow(&symbol_obstack, '\0');
850         char *string = obstack_finish(&symbol_obstack);
851
852         /* might be a wide string or character constant ( L"string"/L'c' ) */
853         if (input.c == '"' && string[0] == 'L' && string[1] == '\0') {
854                 obstack_free(&symbol_obstack, string);
855                 /* TODO */
856                 return;
857         } else if (input.c == '\'' && string[0] == 'L' && string[1] == '\0') {
858                 obstack_free(&symbol_obstack, string);
859                 parse_wide_character_constant();
860                 return;
861         }
862
863         symbol_t *symbol = symbol_table_insert(string);
864
865         pp_token.type   = symbol->pp_ID;
866         pp_token.symbol = symbol;
867
868         /* we can free the memory from symbol obstack if we already had an entry in
869          * the symbol table */
870         if (symbol->string != string) {
871                 obstack_free(&symbol_obstack, string);
872         }
873         if (!do_expansions)
874                 return;
875
876         pp_definition_t *pp_definition = symbol->pp_definition;
877         if (pp_definition == NULL)
878                 return;
879
880         if (pp_definition->has_parameters) {
881                 skip_spaces(true);
882                 /* no opening brace -> no expansion */
883                 if (input.c != '(')
884                         return;
885                 next_preprocessing_token();
886                 eat_pp('(');
887
888                 /* parse arguments (TODO) */
889                 while (pp_token.type != TP_EOF && pp_token.type != ')')
890                         next_preprocessing_token();
891                 next_preprocessing_token();
892         }
893
894         pp_definition->expand_pos   = 0;
895         pp_definition->is_expanding = true,
896         current_expansion           = pp_definition;
897         expand_next();
898 }
899
900 static void parse_number(void)
901 {
902         obstack_1grow(&symbol_obstack, (char) input.c);
903         next_char();
904
905         while (true) {
906                 switch (input.c) {
907                 case '.':
908                 DIGITS
909                 SYMBOL_CHARS_WITHOUT_E_P
910                         obstack_1grow(&symbol_obstack, (char) input.c);
911                         next_char();
912                         break;
913
914                 case 'e':
915                 case 'p':
916                 case 'E':
917                 case 'P':
918                         obstack_1grow(&symbol_obstack, (char) input.c);
919                         next_char();
920                         if (input.c == '+' || input.c == '-') {
921                                 obstack_1grow(&symbol_obstack, (char) input.c);
922                                 next_char();
923                         }
924                         break;
925
926                 default:
927                         goto end_number;
928                 }
929         }
930
931 end_number:
932         obstack_1grow(&symbol_obstack, '\0');
933         size_t  size   = obstack_object_size(&symbol_obstack);
934         char   *string = obstack_finish(&symbol_obstack);
935
936         pp_token.type          = TP_NUMBER;
937         pp_token.literal.begin = string;
938         pp_token.literal.size  = size;
939 }
940
941
942 #define MAYBE_PROLOG                                       \
943                         next_char();                                   \
944                         while (true) {                                 \
945                                 switch (input.c) {
946
947 #define MAYBE(ch, set_type)                                \
948                                 case ch:                                   \
949                                         next_char();                           \
950                                         pp_token.type = set_type;              \
951                                         return;
952
953 #define ELSE_CODE(code)                                    \
954                                 default:                                   \
955                                         code                                   \
956                                         return;                                \
957                                 }                                          \
958                         }
959
960 #define ELSE(set_type)                                     \
961                 ELSE_CODE(                                         \
962                         pp_token.type = set_type;                      \
963                 )
964
965 static void next_preprocessing_token(void)
966 {
967         if (current_expansion != NULL) {
968                 expand_next();
969                 return;
970         }
971
972         pp_token.source_position = input.position;
973
974 restart:
975         switch (input.c) {
976         case ' ':
977         case '\t':
978                 if (do_print_spaces)
979                         counted_spaces++;
980                 next_char();
981                 goto restart;
982
983         MATCH_NEWLINE(
984                 counted_newlines++;
985                 counted_spaces = 0;
986                 pp_token.type = '\n';
987                 return;
988         )
989
990         SYMBOL_CHARS
991                 parse_symbol();
992                 return;
993
994         DIGITS
995                 parse_number();
996                 return;
997
998         case '"':
999                 parse_string_literal();
1000                 return;
1001
1002         case '\'':
1003                 parse_character_constant();
1004                 return;
1005
1006         case '.':
1007                 MAYBE_PROLOG
1008                         case '0':
1009                         case '1':
1010                         case '2':
1011                         case '3':
1012                         case '4':
1013                         case '5':
1014                         case '6':
1015                         case '7':
1016                         case '8':
1017                         case '9':
1018                                 put_back(input.c);
1019                                 input.c = '.';
1020                                 parse_number();
1021                                 return;
1022
1023                         case '.':
1024                                 MAYBE_PROLOG
1025                                 MAYBE('.', TP_DOTDOTDOT)
1026                                 ELSE_CODE(
1027                                         put_back(input.c);
1028                                         input.c = '.';
1029                                         pp_token.type = '.';
1030                                 )
1031                 ELSE('.')
1032         case '&':
1033                 MAYBE_PROLOG
1034                 MAYBE('&', TP_ANDAND)
1035                 MAYBE('=', TP_ANDEQUAL)
1036                 ELSE('&')
1037         case '*':
1038                 MAYBE_PROLOG
1039                 MAYBE('=', TP_ASTERISKEQUAL)
1040                 ELSE('*')
1041         case '+':
1042                 MAYBE_PROLOG
1043                 MAYBE('+', TP_PLUSPLUS)
1044                 MAYBE('=', TP_PLUSEQUAL)
1045                 ELSE('+')
1046         case '-':
1047                 MAYBE_PROLOG
1048                 MAYBE('>', TP_MINUSGREATER)
1049                 MAYBE('-', TP_MINUSMINUS)
1050                 MAYBE('=', TP_MINUSEQUAL)
1051                 ELSE('-')
1052         case '!':
1053                 MAYBE_PROLOG
1054                 MAYBE('=', TP_EXCLAMATIONMARKEQUAL)
1055                 ELSE('!')
1056         case '/':
1057                 MAYBE_PROLOG
1058                 MAYBE('=', TP_SLASHEQUAL)
1059                         case '*':
1060                                 next_char();
1061                                 skip_multiline_comment();
1062                                 goto restart;
1063                         case '/':
1064                                 next_char();
1065                                 skip_line_comment();
1066                                 goto restart;
1067                 ELSE('/')
1068         case '%':
1069                 MAYBE_PROLOG
1070                 MAYBE('>', '}')
1071                 MAYBE('=', TP_PERCENTEQUAL)
1072                         case ':':
1073                                 MAYBE_PROLOG
1074                                         case '%':
1075                                                 MAYBE_PROLOG
1076                                                 MAYBE(':', TP_HASHHASH)
1077                                                 ELSE_CODE(
1078                                                         put_back(input.c);
1079                                                         input.c = '%';
1080                                                         pp_token.type = '#';
1081                                                 )
1082                                 ELSE('#')
1083                 ELSE('%')
1084         case '<':
1085                 MAYBE_PROLOG
1086                 MAYBE(':', '[')
1087                 MAYBE('%', '{')
1088                 MAYBE('=', TP_LESSEQUAL)
1089                         case '<':
1090                                 MAYBE_PROLOG
1091                                 MAYBE('=', TP_LESSLESSEQUAL)
1092                                 ELSE(TP_LESSLESS)
1093                 ELSE('<')
1094         case '>':
1095                 MAYBE_PROLOG
1096                 MAYBE('=', TP_GREATEREQUAL)
1097                         case '>':
1098                                 MAYBE_PROLOG
1099                                 MAYBE('=', TP_GREATERGREATEREQUAL)
1100                                 ELSE(TP_GREATERGREATER)
1101                 ELSE('>')
1102         case '^':
1103                 MAYBE_PROLOG
1104                 MAYBE('=', TP_CARETEQUAL)
1105                 ELSE('^')
1106         case '|':
1107                 MAYBE_PROLOG
1108                 MAYBE('=', TP_PIPEEQUAL)
1109                 MAYBE('|', TP_PIPEPIPE)
1110                 ELSE('|')
1111         case ':':
1112                 MAYBE_PROLOG
1113                 MAYBE('>', ']')
1114                 ELSE(':')
1115         case '=':
1116                 MAYBE_PROLOG
1117                 MAYBE('=', TP_EQUALEQUAL)
1118                 ELSE('=')
1119         case '#':
1120                 MAYBE_PROLOG
1121                 MAYBE('#', TP_HASHHASH)
1122                 ELSE('#')
1123
1124         case '?':
1125         case '[':
1126         case ']':
1127         case '(':
1128         case ')':
1129         case '{':
1130         case '}':
1131         case '~':
1132         case ';':
1133         case ',':
1134         case '\\':
1135                 pp_token.type = input.c;
1136                 next_char();
1137                 return;
1138
1139         case EOF:
1140                 if (input_stack != NULL) {
1141                         close_input();
1142                         pop_restore_input();
1143                         counted_newlines = 0;
1144                         counted_spaces   = 0;
1145                         /* hack to output correct line number */
1146                         print_line_directive(&input.position, "2");
1147                         next_preprocessing_token();
1148                 } else {
1149                         pp_token.type = TP_EOF;
1150                 }
1151                 return;
1152
1153         default:
1154                 next_char();
1155                 errorf(&pp_token.source_position, "unknown character '%c' found\n",
1156                        input.c);
1157                 pp_token.type = TP_ERROR;
1158                 return;
1159         }
1160 }
1161
1162 static void print_quoted_string(const char *const string)
1163 {
1164         fputc('"', out);
1165         for (const char *c = string; *c != 0; ++c) {
1166                 switch (*c) {
1167                 case '"': fputs("\\\"", out); break;
1168                 case '\\':  fputs("\\\\", out); break;
1169                 case '\a':  fputs("\\a", out); break;
1170                 case '\b':  fputs("\\b", out); break;
1171                 case '\f':  fputs("\\f", out); break;
1172                 case '\n':  fputs("\\n", out); break;
1173                 case '\r':  fputs("\\r", out); break;
1174                 case '\t':  fputs("\\t", out); break;
1175                 case '\v':  fputs("\\v", out); break;
1176                 case '\?':  fputs("\\?", out); break;
1177                 default:
1178                         if (!isprint(*c)) {
1179                                 fprintf(out, "\\%03o", (unsigned)*c);
1180                                 break;
1181                         }
1182                         fputc(*c, out);
1183                         break;
1184                 }
1185         }
1186         fputc('"', out);
1187 }
1188
1189 static void print_line_directive(const source_position_t *pos, const char *add)
1190 {
1191         fprintf(out, "# %u ", pos->lineno);
1192         print_quoted_string(pos->input_name);
1193         if (add != NULL) {
1194                 fputc(' ', out);
1195                 fputs(add, out);
1196         }
1197         fputc('\n', out);
1198
1199         printed_input_name = pos->input_name;
1200 }
1201
1202 static void print_spaces(void)
1203 {
1204         if (counted_newlines >= 9) {
1205                 if (input.had_non_space) {
1206                         fputc('\n', out);
1207                 }
1208                 print_line_directive(&pp_token.source_position, NULL);
1209                 counted_newlines = 0;
1210         } else {
1211                 for (unsigned i = 0; i < counted_newlines; ++i)
1212                         fputc('\n', out);
1213                 counted_newlines = 0;
1214         }
1215         for (unsigned i = 0; i < counted_spaces; ++i)
1216                 fputc(' ', out);
1217         counted_spaces = 0;
1218 }
1219
1220 static void emit_pp_token(void)
1221 {
1222         if (skip_mode)
1223                 return;
1224
1225         if (pp_token.type != '\n') {
1226                 print_spaces();
1227                 input.had_non_space = true;
1228         }
1229
1230         switch (pp_token.type) {
1231         case TP_IDENTIFIER:
1232                 fputs(pp_token.symbol->string, out);
1233                 break;
1234         case TP_NUMBER:
1235                 fputs(pp_token.literal.begin, out);
1236                 break;
1237         case TP_STRING_LITERAL:
1238                 fputc('"', out);
1239                 fputs(pp_token.literal.begin, out);
1240                 fputc('"', out);
1241                 break;
1242         case '\n':
1243                 break;
1244         default:
1245                 print_pp_token_type(out, pp_token.type);
1246                 break;
1247         }
1248 }
1249
1250 static void eat_pp_directive(void)
1251 {
1252         while (pp_token.type != '\n' && pp_token.type != TP_EOF) {
1253                 next_preprocessing_token();
1254         }
1255 }
1256
1257 static bool strings_equal(const string_t *string1, const string_t *string2)
1258 {
1259         size_t size = string1->size;
1260         if (size != string2->size)
1261                 return false;
1262
1263         const char *c1 = string1->begin;
1264         const char *c2 = string2->begin;
1265         for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1266                 if (*c1 != *c2)
1267                         return false;
1268         }
1269         return true;
1270 }
1271
1272 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1273 {
1274         if (token1->type != token2->type)
1275                 return false;
1276
1277         switch (token1->type) {
1278         case TP_HEADERNAME:
1279                 /* TODO */
1280                 return false;
1281         case TP_IDENTIFIER:
1282                 return token1->symbol == token2->symbol;
1283         case TP_NUMBER:
1284         case TP_CHARACTER_CONSTANT:
1285         case TP_STRING_LITERAL:
1286                 return strings_equal(&token1->literal, &token2->literal);
1287
1288         default:
1289                 return true;
1290         }
1291 }
1292
1293 static bool pp_definitions_equal(const pp_definition_t *definition1,
1294                                  const pp_definition_t *definition2)
1295 {
1296         if (definition1->list_len != definition2->list_len)
1297                 return false;
1298
1299         size_t         len = definition1->list_len;
1300         const token_t *t1  = definition1->token_list;
1301         const token_t *t2  = definition2->token_list;
1302         for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1303                 if (!pp_tokens_equal(t1, t2))
1304                         return false;
1305         }
1306         return true;
1307 }
1308
1309 static void parse_define_directive(void)
1310 {
1311         eat_pp(TP_define);
1312         assert(obstack_object_size(&pp_obstack) == 0);
1313
1314         if (pp_token.type != TP_IDENTIFIER) {
1315                 errorf(&pp_token.source_position,
1316                        "expected identifier after #define, got '%t'", &pp_token);
1317                 goto error_out;
1318         }
1319         symbol_t *symbol = pp_token.symbol;
1320
1321         pp_definition_t *new_definition
1322                 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1323         memset(new_definition, 0, sizeof(new_definition[0]));
1324         new_definition->source_position = input.position;
1325
1326         /* this is probably the only place where spaces are significant in the
1327          * lexer (except for the fact that they separate tokens). #define b(x)
1328          * is something else than #define b (x) */
1329         if (input.c == '(') {
1330                 /* eat the '(' */
1331                 next_preprocessing_token();
1332                 /* get next token after '(' */
1333                 next_preprocessing_token();
1334
1335                 while (true) {
1336                         switch (pp_token.type) {
1337                         case TP_DOTDOTDOT:
1338                                 new_definition->is_variadic = true;
1339                                 next_preprocessing_token();
1340                                 if (pp_token.type != ')') {
1341                                         errorf(&input.position,
1342                                                         "'...' not at end of macro argument list");
1343                                         goto error_out;
1344                                 }
1345                                 break;
1346                         case TP_IDENTIFIER:
1347                                 obstack_ptr_grow(&pp_obstack, pp_token.symbol);
1348                                 next_preprocessing_token();
1349
1350                                 if (pp_token.type == ',') {
1351                                         next_preprocessing_token();
1352                                         break;
1353                                 }
1354
1355                                 if (pp_token.type != ')') {
1356                                         errorf(&pp_token.source_position,
1357                                                "expected ',' or ')' after identifier, got '%t'",
1358                                                &pp_token);
1359                                         goto error_out;
1360                                 }
1361                                 break;
1362                         case ')':
1363                                 next_preprocessing_token();
1364                                 goto finish_argument_list;
1365                         default:
1366                                 errorf(&pp_token.source_position,
1367                                        "expected identifier, '...' or ')' in #define argument list, got '%t'",
1368                                        &pp_token);
1369                                 goto error_out;
1370                         }
1371                 }
1372
1373         finish_argument_list:
1374                 new_definition->has_parameters = true;
1375                 new_definition->n_parameters
1376                         = obstack_object_size(&pp_obstack) / sizeof(new_definition->parameters[0]);
1377                 new_definition->parameters = obstack_finish(&pp_obstack);
1378         } else {
1379                 next_preprocessing_token();
1380         }
1381
1382         /* construct a new pp_definition on the obstack */
1383         assert(obstack_object_size(&pp_obstack) == 0);
1384         size_t list_len = 0;
1385         while (pp_token.type != '\n' && pp_token.type != TP_EOF) {
1386                 obstack_grow(&pp_obstack, &pp_token, sizeof(pp_token));
1387                 ++list_len;
1388                 next_preprocessing_token();
1389         }
1390
1391         new_definition->list_len   = list_len;
1392         new_definition->token_list = obstack_finish(&pp_obstack);
1393
1394         pp_definition_t *old_definition = symbol->pp_definition;
1395         if (old_definition != NULL) {
1396                 if (!pp_definitions_equal(old_definition, new_definition)) {
1397                         warningf(WARN_OTHER, &input.position, "multiple definition of macro '%Y' (first defined %P)", symbol, &old_definition->source_position);
1398                 } else {
1399                         /* reuse the old definition */
1400                         obstack_free(&pp_obstack, new_definition);
1401                         new_definition = old_definition;
1402                 }
1403         }
1404
1405         symbol->pp_definition = new_definition;
1406         return;
1407
1408 error_out:
1409         if (obstack_object_size(&pp_obstack) > 0) {
1410                 char *ptr = obstack_finish(&pp_obstack);
1411                 obstack_free(&pp_obstack, ptr);
1412         }
1413         eat_pp_directive();
1414 }
1415
1416 static void parse_undef_directive(void)
1417 {
1418         eat_pp(TP_undef);
1419
1420         if (pp_token.type != TP_IDENTIFIER) {
1421                 errorf(&input.position,
1422                        "expected identifier after #undef, got '%t'", &pp_token);
1423                 eat_pp_directive();
1424                 return;
1425         }
1426
1427         symbol_t *symbol = pp_token.symbol;
1428         symbol->pp_definition = NULL;
1429         next_preprocessing_token();
1430
1431         if (pp_token.type != '\n') {
1432                 warningf(WARN_OTHER, &input.position, "extra tokens at end of #undef directive");
1433         }
1434         /* eat until '\n' */
1435         eat_pp_directive();
1436 }
1437
1438 static const char *parse_headername(void)
1439 {
1440         /* behind an #include we can have the special headername lexems.
1441          * They're only allowed behind an #include so they're not recognized
1442          * by the normal next_preprocessing_token. We handle them as a special
1443          * exception here */
1444
1445         /* skip spaces so we reach start of next preprocessing token */
1446         skip_spaces(false);
1447
1448         assert(obstack_object_size(&input_obstack) == 0);
1449
1450         /* check wether we have a "... or <... headername */
1451         switch (input.c) {
1452         case '<':
1453                 next_char();
1454                 while (true) {
1455                         switch (input.c) {
1456                         case EOF:
1457                                 /* fallthrough */
1458                         MATCH_NEWLINE(
1459                                 parse_error("header name without closing '>'");
1460                                 return NULL;
1461                         )
1462                         case '>':
1463                                 next_char();
1464                                 goto finished_headername;
1465                         }
1466                         obstack_1grow(&input_obstack, (char) input.c);
1467                         next_char();
1468                 }
1469                 /* we should never be here */
1470
1471         case '"':
1472                 next_char();
1473                 while (true) {
1474                         switch (input.c) {
1475                         case EOF:
1476                                 /* fallthrough */
1477                         MATCH_NEWLINE(
1478                                 parse_error("header name without closing '>'");
1479                                 return NULL;
1480                         )
1481                         case '"':
1482                                 next_char();
1483                                 goto finished_headername;
1484                         }
1485                         obstack_1grow(&input_obstack, (char) input.c);
1486                         next_char();
1487                 }
1488                 /* we should never be here */
1489
1490         default:
1491                 /* TODO: do normale pp_token parsing and concatenate results */
1492                 panic("pp_token concat include not implemented yet");
1493         }
1494
1495 finished_headername:
1496         obstack_1grow(&input_obstack, '\0');
1497         char *headername = obstack_finish(&input_obstack);
1498
1499         /* TODO: iterate search-path to find the file */
1500
1501         next_preprocessing_token();
1502
1503         return headername;
1504 }
1505
1506 static bool parse_include_directive(void)
1507 {
1508         /* don't eat the TP_include here!
1509          * we need an alternative parsing for the next token */
1510
1511         print_spaces();
1512
1513         const char *headername = parse_headername();
1514         if (headername == NULL) {
1515                 eat_pp_directive();
1516                 return false;
1517         }
1518
1519         if (pp_token.type != '\n' && pp_token.type != TP_EOF) {
1520                 warningf(WARN_OTHER, &pp_token.source_position, "extra tokens at end of #include directive");
1521                 eat_pp_directive();
1522         }
1523
1524         if (n_inputs > INCLUDE_LIMIT) {
1525                 errorf(&pp_token.source_position, "#include nested too deeply");
1526                 /* eat \n or EOF */
1527                 next_preprocessing_token();
1528                 return false;
1529         }
1530
1531         /* we have to reenable space counting and macro expansion here,
1532          * because it is still disabled in directive parsing,
1533          * but we will trigger a preprocessing token reading of the new file
1534          * now and need expansions/space counting */
1535         do_print_spaces = true;
1536         do_expansions   = true;
1537
1538         /* switch inputs */
1539         push_input();
1540         bool res = open_input(headername);
1541         if (!res) {
1542                 errorf(&pp_token.source_position,
1543                        "failed including '%s': %s", headername, strerror(errno));
1544                 pop_restore_input();
1545                 return false;
1546         }
1547
1548         return true;
1549 }
1550
1551 static pp_conditional_t *push_conditional(void)
1552 {
1553         pp_conditional_t *conditional
1554                 = obstack_alloc(&pp_obstack, sizeof(*conditional));
1555         memset(conditional, 0, sizeof(*conditional));
1556
1557         conditional->parent = conditional_stack;
1558         conditional_stack   = conditional;
1559
1560         return conditional;
1561 }
1562
1563 static void pop_conditional(void)
1564 {
1565         assert(conditional_stack != NULL);
1566         conditional_stack = conditional_stack->parent;
1567 }
1568
1569 static void check_unclosed_conditionals(void)
1570 {
1571         while (conditional_stack != NULL) {
1572                 pp_conditional_t *conditional = conditional_stack;
1573
1574                 if (conditional->in_else) {
1575                         errorf(&conditional->source_position, "unterminated #else");
1576                 } else {
1577                         errorf(&conditional->source_position, "unterminated condition");
1578                 }
1579                 pop_conditional();
1580         }
1581 }
1582
1583 static void parse_ifdef_ifndef_directive(void)
1584 {
1585         bool is_ifndef = (pp_token.type == TP_ifndef);
1586         bool condition;
1587         next_preprocessing_token();
1588
1589         if (skip_mode) {
1590                 eat_pp_directive();
1591                 pp_conditional_t *conditional = push_conditional();
1592                 conditional->source_position  = pp_token.source_position;
1593                 conditional->skip             = true;
1594                 return;
1595         }
1596
1597         if (pp_token.type != TP_IDENTIFIER) {
1598                 errorf(&pp_token.source_position,
1599                        "expected identifier after #%s, got '%t'",
1600                        is_ifndef ? "ifndef" : "ifdef", &pp_token);
1601                 eat_pp_directive();
1602
1603                 /* just take the true case in the hope to avoid further errors */
1604                 condition = true;
1605         } else {
1606                 symbol_t        *symbol        = pp_token.symbol;
1607                 pp_definition_t *pp_definition = symbol->pp_definition;
1608                 next_preprocessing_token();
1609
1610                 if (pp_token.type != '\n') {
1611                         errorf(&pp_token.source_position,
1612                                "extra tokens at end of #%s",
1613                                is_ifndef ? "ifndef" : "ifdef");
1614                         eat_pp_directive();
1615                 }
1616
1617                 /* evaluate wether we are in true or false case */
1618                 condition = is_ifndef ? pp_definition == NULL : pp_definition != NULL;
1619         }
1620
1621         pp_conditional_t *conditional = push_conditional();
1622         conditional->source_position  = pp_token.source_position;
1623         conditional->condition        = condition;
1624
1625         if (!condition) {
1626                 skip_mode = true;
1627         }
1628 }
1629
1630 static void parse_else_directive(void)
1631 {
1632         eat_pp(TP_else);
1633
1634         if (pp_token.type != '\n') {
1635                 if (!skip_mode) {
1636                         warningf(WARN_OTHER, &pp_token.source_position, "extra tokens at end of #else");
1637                 }
1638                 eat_pp_directive();
1639         }
1640
1641         pp_conditional_t *conditional = conditional_stack;
1642         if (conditional == NULL) {
1643                 errorf(&pp_token.source_position, "#else without prior #if");
1644                 return;
1645         }
1646
1647         if (conditional->in_else) {
1648                 errorf(&pp_token.source_position,
1649                        "#else after #else (condition started %P)",
1650                        conditional->source_position);
1651                 skip_mode = true;
1652                 return;
1653         }
1654
1655         conditional->in_else = true;
1656         if (!conditional->skip) {
1657                 skip_mode = conditional->condition;
1658         }
1659         conditional->source_position = pp_token.source_position;
1660 }
1661
1662 static void parse_endif_directive(void)
1663 {
1664         eat_pp(TP_endif);
1665
1666         if (pp_token.type != '\n') {
1667                 if (!skip_mode) {
1668                         warningf(WARN_OTHER, &pp_token.source_position, "extra tokens at end of #endif");
1669                 }
1670                 eat_pp_directive();
1671         }
1672
1673         pp_conditional_t *conditional = conditional_stack;
1674         if (conditional == NULL) {
1675                 errorf(&pp_token.source_position, "#endif without prior #if");
1676                 return;
1677         }
1678
1679         if (!conditional->skip) {
1680                 skip_mode = false;
1681         }
1682         pop_conditional();
1683 }
1684
1685 static void parse_preprocessing_directive(void)
1686 {
1687         do_print_spaces = false;
1688         do_expansions   = false;
1689         eat_pp('#');
1690
1691         if (skip_mode) {
1692                 switch (pp_token.type) {
1693                 case TP_ifdef:
1694                 case TP_ifndef:
1695                         parse_ifdef_ifndef_directive();
1696                         break;
1697                 case TP_else:
1698                         parse_else_directive();
1699                         break;
1700                 case TP_endif:
1701                         parse_endif_directive();
1702                         break;
1703                 default:
1704                         eat_pp_directive();
1705                         break;
1706                 }
1707         } else {
1708                 switch (pp_token.type) {
1709                 case TP_define:
1710                         parse_define_directive();
1711                         break;
1712                 case TP_undef:
1713                         parse_undef_directive();
1714                         break;
1715                 case TP_ifdef:
1716                 case TP_ifndef:
1717                         parse_ifdef_ifndef_directive();
1718                         break;
1719                 case TP_else:
1720                         parse_else_directive();
1721                         break;
1722                 case TP_endif:
1723                         parse_endif_directive();
1724                         break;
1725                 case TP_include: {
1726                         bool in_new_source = parse_include_directive();
1727                         /* no need to do anything if source file switched */
1728                         if (in_new_source)
1729                                 return;
1730                         break;
1731                 }
1732                 case '\n':
1733                         /* the nop directive */
1734                         break;
1735                 default:
1736                         errorf(&pp_token.source_position,
1737                                    "invalid preprocessing directive #%t", &pp_token);
1738                         eat_pp_directive();
1739                         break;
1740                 }
1741         }
1742
1743         do_print_spaces = true;
1744         do_expansions   = true;
1745
1746         /* eat '\n' */
1747         assert(pp_token.type == '\n' || pp_token.type == TP_EOF);
1748         next_preprocessing_token();
1749 }
1750
1751 #define GCC_COMPAT_MODE
1752
1753 int pptest_main(int argc, char **argv);
1754 int pptest_main(int argc, char **argv)
1755 {
1756         init_symbol_table();
1757         init_tokens();
1758
1759         obstack_init(&pp_obstack);
1760         obstack_init(&input_obstack);
1761
1762         const char *filename = "t.c";
1763         if (argc > 1)
1764                 filename = argv[1];
1765
1766         out = stdout;
1767
1768 #ifdef GCC_COMPAT_MODE
1769         /* this is here so we can directly compare "gcc -E" output and our output */
1770         fprintf(out, "# 1 \"%s\"\n", filename);
1771         fputs("# 1 \"<built-in>\"\n", out);
1772         fputs("# 1 \"<command-line>\"\n", out);
1773 #endif
1774
1775         bool ok = open_input(filename);
1776         assert(ok);
1777
1778         while (true) {
1779                 /* we're at a line begin */
1780                 if (pp_token.type == '#') {
1781                         parse_preprocessing_directive();
1782                 } else {
1783                         /* parse+emit a line */
1784                         while (pp_token.type != '\n') {
1785                                 if (pp_token.type == TP_EOF)
1786                                         goto end_of_main_loop;
1787                                 emit_pp_token();
1788                                 next_preprocessing_token();
1789                         }
1790                         emit_pp_token();
1791                         next_preprocessing_token();
1792                 }
1793         }
1794 end_of_main_loop:
1795
1796         check_unclosed_conditionals();
1797         close_input();
1798
1799         obstack_free(&input_obstack, NULL);
1800         obstack_free(&pp_obstack, NULL);
1801
1802         exit_tokens();
1803         exit_symbol_table();
1804
1805         return 0;
1806 }