preprocessor: output '3' flag for system headers
[cparser] / preprocessor.c
1 #include <config.h>
2
3 #include <assert.h>
4 #include <errno.h>
5 #include <string.h>
6 #include <stdbool.h>
7 #include <ctype.h>
8
9 #include "preprocessor.h"
10 #include "token_t.h"
11 #include "symbol_t.h"
12 #include "adt/util.h"
13 #include "adt/error.h"
14 #include "adt/strutil.h"
15 #include "adt/strset.h"
16 #include "lang_features.h"
17 #include "diagnostic.h"
18 #include "string_rep.h"
19 #include "input.h"
20
21 #define MAX_PUTBACK 3
22 #define INCLUDE_LIMIT 199  /* 199 is for gcc "compatibility" */
23
24 typedef struct saved_token_t {
25         token_t token;
26         bool    had_whitespace;
27 } saved_token_t;
28
29 typedef struct whitespace_info_t {
30         /** current token had whitespace in front of it */
31         bool     had_whitespace;
32         /** current token is at the beginning of a line.
33          * => a "#" at line begin starts a preprocessing directive. */
34         bool     at_line_begin;
35         /** number of spaces before the first token in a line */
36         unsigned whitespace_at_line_begin;
37 } whitespace_info_t;
38
39 struct pp_definition_t {
40         symbol_t          *symbol;
41         source_position_t  source_position;
42         pp_definition_t   *parent_expansion;
43         size_t             expand_pos;
44         whitespace_info_t  expand_info;
45         bool               is_variadic    : 1;
46         bool               is_expanding   : 1;
47         bool               has_parameters : 1;
48         bool               is_parameter   : 1;
49         pp_definition_t   *function_definition;
50         size_t             n_parameters;
51         pp_definition_t   *parameters;
52
53         /* replacement */
54         size_t             list_len;
55         saved_token_t     *token_list;
56 };
57
58 typedef struct pp_conditional_t pp_conditional_t;
59 struct pp_conditional_t {
60         source_position_t  source_position;
61         bool               condition;
62         bool               in_else;
63         /** conditional in skip mode (then+else gets skipped) */
64         bool               skip;
65         pp_conditional_t  *parent;
66 };
67
68 typedef struct pp_input_t pp_input_t;
69 struct pp_input_t {
70         FILE               *file;
71         input_t            *input;
72         utf32               c;
73         utf32               buf[1024+MAX_PUTBACK];
74         const utf32        *bufend;
75         const utf32        *bufpos;
76         source_position_t   position;
77         pp_input_t         *parent;
78         unsigned            output_line;
79         searchpath_entry_t *path;
80 };
81
82 struct searchpath_entry_t {
83         const char         *path;
84         searchpath_entry_t *next;
85 };
86
87 static pp_input_t      input;
88
89 static pp_input_t     *input_stack;
90 static unsigned        n_inputs;
91 static struct obstack  input_obstack;
92
93 static pp_conditional_t *conditional_stack;
94
95 token_t                  pp_token;
96 bool                     allow_dollar_in_symbol   = true;
97 static bool              resolve_escape_sequences = true;
98 static bool              error_on_unknown_chars   = true;
99 static bool              skip_mode;
100 static FILE             *out;
101 static struct obstack    pp_obstack;
102 static struct obstack    config_obstack;
103 static const char       *printed_input_name = NULL;
104 static source_position_t expansion_pos;
105 static pp_definition_t  *current_expansion  = NULL;
106 static pp_definition_t  *current_call       = NULL;
107 static pp_definition_t  *current_argument   = NULL;
108 static pp_definition_t  *argument_expanding = NULL;
109 static unsigned          argument_brace_count;
110 static strset_t          stringset;
111 static token_kind_t      last_token;
112
113 static searchpath_entry_t *searchpath;
114
115 static whitespace_info_t next_info; /* valid if had_whitespace is true */
116 static whitespace_info_t info;
117
118 static inline void next_char(void);
119 static void next_input_token(void);
120 static void print_line_directive(const source_position_t *pos, const char *add);
121
122 static symbol_t *symbol_colongreater;
123 static symbol_t *symbol_lesscolon;
124 static symbol_t *symbol_lesspercent;
125 static symbol_t *symbol_percentcolon;
126 static symbol_t *symbol_percentcolonpercentcolon;
127 static symbol_t *symbol_percentgreater;
128
129 static void init_symbols(void)
130 {
131         symbol_colongreater             = symbol_table_insert(":>");
132         symbol_lesscolon                = symbol_table_insert("<:");
133         symbol_lesspercent              = symbol_table_insert("<%");
134         symbol_percentcolon             = symbol_table_insert("%:");
135         symbol_percentcolonpercentcolon = symbol_table_insert("%:%:");
136         symbol_percentgreater           = symbol_table_insert("%>");
137 }
138
139 void switch_pp_input(FILE *const file, char const *const filename, searchpath_entry_t *const path)
140 {
141         input.file                = file;
142         input.input               = input_from_stream(file, NULL);
143         input.bufend              = NULL;
144         input.bufpos              = NULL;
145         input.output_line         = 0;
146         input.position.input_name = filename;
147         input.position.lineno     = 1;
148         input.path                = path;
149
150         /* indicate that we're at a new input */
151         print_line_directive(&input.position, input_stack != NULL ? "1" : NULL);
152
153         /* place a virtual '\n' so we realize we're at line begin */
154         input.position.lineno = 0;
155         input.c               = '\n';
156 }
157
158 FILE *close_pp_input(void)
159 {
160         input_free(input.input);
161
162         FILE* const file = input.file;
163         assert(file);
164
165         input.input  = NULL;
166         input.file   = NULL;
167         input.bufend = NULL;
168         input.bufpos = NULL;
169         input.c      = EOF;
170
171         return file;
172 }
173
174 static void push_input(void)
175 {
176         pp_input_t *const saved_input = obstack_copy(&input_obstack, &input, sizeof(input));
177
178         /* adjust buffer positions */
179         if (input.bufpos != NULL)
180                 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
181         if (input.bufend != NULL)
182                 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
183
184         saved_input->parent = input_stack;
185         input_stack         = saved_input;
186         ++n_inputs;
187 }
188
189 static void pop_restore_input(void)
190 {
191         assert(n_inputs > 0);
192         assert(input_stack != NULL);
193
194         pp_input_t *saved_input = input_stack;
195
196         memcpy(&input, saved_input, sizeof(input));
197         input.parent = NULL;
198
199         /* adjust buffer positions */
200         if (saved_input->bufpos != NULL)
201                 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
202         if (saved_input->bufend != NULL)
203                 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
204
205         input_stack = saved_input->parent;
206         obstack_free(&input_obstack, saved_input);
207         --n_inputs;
208 }
209
210 /**
211  * Prints a parse error message at the current token.
212  *
213  * @param msg   the error message
214  */
215 static void parse_error(const char *msg)
216 {
217         errorf(&pp_token.base.source_position,  "%s", msg);
218 }
219
220 static inline void next_real_char(void)
221 {
222         assert(input.bufpos <= input.bufend);
223         if (input.bufpos >= input.bufend) {
224                 size_t const n = decode(input.input, input.buf + MAX_PUTBACK, lengthof(input.buf) - MAX_PUTBACK);
225                 if (n == 0) {
226                         input.c = EOF;
227                         return;
228                 }
229                 input.bufpos = input.buf + MAX_PUTBACK;
230                 input.bufend = input.bufpos + n;
231         }
232         input.c = *input.bufpos++;
233         ++input.position.colno;
234 }
235
236 /**
237  * Put a character back into the buffer.
238  *
239  * @param pc  the character to put back
240  */
241 static inline void put_back(utf32 const pc)
242 {
243         assert(input.bufpos > input.buf);
244         *(--input.bufpos - input.buf + input.buf) = (char) pc;
245         --input.position.colno;
246 }
247
248 #define NEWLINE \
249         '\r': \
250                 next_char(); \
251                 if (input.c == '\n') { \
252         case '\n': \
253                         next_char(); \
254                 } \
255                 ++input.position.lineno; \
256                 input.position.colno = 1; \
257                 goto newline; \
258                 newline // Let it look like an ordinary case label.
259
260 #define eat(c_type) (assert(input.c == c_type), next_char())
261
262 static void maybe_concat_lines(void)
263 {
264         eat('\\');
265
266         switch (input.c) {
267         case NEWLINE:
268                 info.whitespace_at_line_begin = 0;
269                 return;
270
271         default:
272                 break;
273         }
274
275         put_back(input.c);
276         input.c = '\\';
277 }
278
279 /**
280  * Set c to the next input character, ie.
281  * after expanding trigraphs.
282  */
283 static inline void next_char(void)
284 {
285         next_real_char();
286
287         /* filter trigraphs and concatenated lines */
288         if (UNLIKELY(input.c == '\\')) {
289                 maybe_concat_lines();
290                 goto end_of_next_char;
291         }
292
293         if (LIKELY(input.c != '?'))
294                 goto end_of_next_char;
295
296         next_real_char();
297         if (LIKELY(input.c != '?')) {
298                 put_back(input.c);
299                 input.c = '?';
300                 goto end_of_next_char;
301         }
302
303         next_real_char();
304         switch (input.c) {
305         case '=': input.c = '#'; break;
306         case '(': input.c = '['; break;
307         case '/': input.c = '\\'; maybe_concat_lines(); break;
308         case ')': input.c = ']'; break;
309         case '\'': input.c = '^'; break;
310         case '<': input.c = '{'; break;
311         case '!': input.c = '|'; break;
312         case '>': input.c = '}'; break;
313         case '-': input.c = '~'; break;
314         default:
315                 put_back(input.c);
316                 put_back('?');
317                 input.c = '?';
318                 break;
319         }
320
321 end_of_next_char:;
322 #ifdef DEBUG_CHARS
323         printf("nchar '%c'\n", input.c);
324 #endif
325 }
326
327
328
329 /**
330  * Returns true if the given char is a octal digit.
331  *
332  * @param char  the character to check
333  */
334 static inline bool is_octal_digit(int chr)
335 {
336         switch (chr) {
337         case '0':
338         case '1':
339         case '2':
340         case '3':
341         case '4':
342         case '5':
343         case '6':
344         case '7':
345                 return true;
346         default:
347                 return false;
348         }
349 }
350
351 /**
352  * Returns the value of a digit.
353  * The only portable way to do it ...
354  */
355 static int digit_value(int digit)
356 {
357         switch (digit) {
358         case '0': return 0;
359         case '1': return 1;
360         case '2': return 2;
361         case '3': return 3;
362         case '4': return 4;
363         case '5': return 5;
364         case '6': return 6;
365         case '7': return 7;
366         case '8': return 8;
367         case '9': return 9;
368         case 'a':
369         case 'A': return 10;
370         case 'b':
371         case 'B': return 11;
372         case 'c':
373         case 'C': return 12;
374         case 'd':
375         case 'D': return 13;
376         case 'e':
377         case 'E': return 14;
378         case 'f':
379         case 'F': return 15;
380         default:
381                 panic("wrong character given");
382         }
383 }
384
385 /**
386  * Parses an octal character sequence.
387  *
388  * @param first_digit  the already read first digit
389  */
390 static utf32 parse_octal_sequence(const utf32 first_digit)
391 {
392         assert(is_octal_digit(first_digit));
393         utf32 value = digit_value(first_digit);
394         if (!is_octal_digit(input.c)) return value;
395         value = 8 * value + digit_value(input.c);
396         next_char();
397         if (!is_octal_digit(input.c)) return value;
398         value = 8 * value + digit_value(input.c);
399         next_char();
400         return value;
401
402 }
403
404 /**
405  * Parses a hex character sequence.
406  */
407 static utf32 parse_hex_sequence(void)
408 {
409         utf32 value = 0;
410         while (isxdigit(input.c)) {
411                 value = 16 * value + digit_value(input.c);
412                 next_char();
413         }
414         return value;
415 }
416
417 static bool is_universal_char_valid(utf32 const v)
418 {
419         /* C11 Â§6.4.3:2 */
420         if (v < 0xA0U && v != 0x24 && v != 0x40 && v != 0x60)
421                 return false;
422         if (0xD800 <= v && v <= 0xDFFF)
423                 return false;
424         return true;
425 }
426
427 static utf32 parse_universal_char(unsigned const n_digits)
428 {
429         utf32 v = 0;
430         for (unsigned k = n_digits; k != 0; --k) {
431                 if (isxdigit(input.c)) {
432                         v = 16 * v + digit_value(input.c);
433                         if (!resolve_escape_sequences)
434                                 obstack_1grow(&symbol_obstack, input.c);
435                         next_char();
436                 } else {
437                         errorf(&input.position,
438                                "short universal character name, expected %u more digits",
439                                    k);
440                         break;
441                 }
442         }
443         if (!is_universal_char_valid(v)) {
444                 errorf(&input.position,
445                        "\\%c%0*X is not a valid universal character name",
446                        n_digits == 4 ? 'u' : 'U', (int)n_digits, v);
447         }
448         return v;
449 }
450
451 static bool is_universal_char_valid_identifier(utf32 const v)
452 {
453         /* C11 Annex D.1 */
454         if (                v == 0x000A8) return true;
455         if (                v == 0x000AA) return true;
456         if (                v == 0x000AD) return true;
457         if (                v == 0x000AF) return true;
458         if (0x000B2 <= v && v <= 0x000B5) return true;
459         if (0x000B7 <= v && v <= 0x000BA) return true;
460         if (0x000BC <= v && v <= 0x000BE) return true;
461         if (0x000C0 <= v && v <= 0x000D6) return true;
462         if (0x000D8 <= v && v <= 0x000F6) return true;
463         if (0x000F8 <= v && v <= 0x000FF) return true;
464         if (0x00100 <= v && v <= 0x0167F) return true;
465         if (0x01681 <= v && v <= 0x0180D) return true;
466         if (0x0180F <= v && v <= 0x01FFF) return true;
467         if (0x0200B <= v && v <= 0x0200D) return true;
468         if (0x0202A <= v && v <= 0x0202E) return true;
469         if (0x0203F <= v && v <= 0x02040) return true;
470         if (                v == 0x02054) return true;
471         if (0x02060 <= v && v <= 0x0206F) return true;
472         if (0x02070 <= v && v <= 0x0218F) return true;
473         if (0x02460 <= v && v <= 0x024FF) return true;
474         if (0x02776 <= v && v <= 0x02793) return true;
475         if (0x02C00 <= v && v <= 0x02DFF) return true;
476         if (0x02E80 <= v && v <= 0x02FFF) return true;
477         if (0x03004 <= v && v <= 0x03007) return true;
478         if (0x03021 <= v && v <= 0x0302F) return true;
479         if (0x03031 <= v && v <= 0x0303F) return true;
480         if (0x03040 <= v && v <= 0x0D7FF) return true;
481         if (0x0F900 <= v && v <= 0x0FD3D) return true;
482         if (0x0FD40 <= v && v <= 0x0FDCF) return true;
483         if (0x0FDF0 <= v && v <= 0x0FE44) return true;
484         if (0x0FE47 <= v && v <= 0x0FFFD) return true;
485         if (0x10000 <= v && v <= 0x1FFFD) return true;
486         if (0x20000 <= v && v <= 0x2FFFD) return true;
487         if (0x30000 <= v && v <= 0x3FFFD) return true;
488         if (0x40000 <= v && v <= 0x4FFFD) return true;
489         if (0x50000 <= v && v <= 0x5FFFD) return true;
490         if (0x60000 <= v && v <= 0x6FFFD) return true;
491         if (0x70000 <= v && v <= 0x7FFFD) return true;
492         if (0x80000 <= v && v <= 0x8FFFD) return true;
493         if (0x90000 <= v && v <= 0x9FFFD) return true;
494         if (0xA0000 <= v && v <= 0xAFFFD) return true;
495         if (0xB0000 <= v && v <= 0xBFFFD) return true;
496         if (0xC0000 <= v && v <= 0xCFFFD) return true;
497         if (0xD0000 <= v && v <= 0xDFFFD) return true;
498         if (0xE0000 <= v && v <= 0xEFFFD) return true;
499         return false;
500 }
501
502 static bool is_universal_char_valid_identifier_start(utf32 const v)
503 {
504         /* C11 Annex D.2 */
505         if (0x0300 <= v && v <= 0x036F) return false;
506         if (0x1DC0 <= v && v <= 0x1DFF) return false;
507         if (0x20D0 <= v && v <= 0x20FF) return false;
508         if (0xFE20 <= v && v <= 0xFE2F) return false;
509         return true;
510 }
511
512 /**
513  * Parse an escape sequence.
514  */
515 static utf32 parse_escape_sequence(void)
516 {
517         eat('\\');
518
519         utf32 const ec = input.c;
520         next_char();
521
522         switch (ec) {
523         case '"':  return '"';
524         case '\'': return '\'';
525         case '\\': return '\\';
526         case '?': return '\?';
527         case 'a': return '\a';
528         case 'b': return '\b';
529         case 'f': return '\f';
530         case 'n': return '\n';
531         case 'r': return '\r';
532         case 't': return '\t';
533         case 'v': return '\v';
534         case 'x':
535                 return parse_hex_sequence();
536         case '0':
537         case '1':
538         case '2':
539         case '3':
540         case '4':
541         case '5':
542         case '6':
543         case '7':
544                 return parse_octal_sequence(ec);
545         case EOF:
546                 parse_error("reached end of file while parsing escape sequence");
547                 return EOF;
548         /* \E is not documented, but handled, by GCC.  It is acceptable according
549          * to Â§6.11.4, whereas \e is not. */
550         case 'E':
551         case 'e':
552                 if (c_mode & _GNUC)
553                         return 27;   /* hopefully 27 is ALWAYS the code for ESCAPE */
554                 break;
555
556         case 'U': return parse_universal_char(8);
557         case 'u': return parse_universal_char(4);
558
559         default:
560                 break;
561         }
562         /* Â§6.4.4.4:8 footnote 64 */
563         parse_error("unknown escape sequence");
564         return EOF;
565 }
566
567 static const char *identify_string(char *string)
568 {
569         const char *result = strset_insert(&stringset, string);
570         if (result != string) {
571                 obstack_free(&symbol_obstack, string);
572         }
573         return result;
574 }
575
576 static string_t sym_make_string(string_encoding_t const enc)
577 {
578         obstack_1grow(&symbol_obstack, '\0');
579         size_t      const len    = obstack_object_size(&symbol_obstack) - 1;
580         char       *const string = obstack_finish(&symbol_obstack);
581         char const *const result = identify_string(string);
582         return (string_t){ result, len, enc };
583 }
584
585 string_t make_string(char const *const string)
586 {
587         obstack_grow(&symbol_obstack, string, strlen(string));
588         return sym_make_string(STRING_ENCODING_CHAR);
589 }
590
591 static void parse_string(utf32 const delimiter, token_kind_t const kind,
592                          string_encoding_t const enc,
593                          char const *const context)
594 {
595         const unsigned start_linenr = input.position.lineno;
596
597         eat(delimiter);
598
599         while (true) {
600                 switch (input.c) {
601                 case '\\': {
602                         if (resolve_escape_sequences) {
603                                 utf32 const tc = parse_escape_sequence();
604                                 if (enc == STRING_ENCODING_CHAR) {
605                                         if (tc >= 0x100) {
606                                                 warningf(WARN_OTHER, &pp_token.base.source_position, "escape sequence out of range");
607                                         }
608                                         obstack_1grow(&symbol_obstack, tc);
609                                 } else {
610                                         obstack_grow_utf8(&symbol_obstack, tc);
611                                 }
612                         } else {
613                                 obstack_1grow(&symbol_obstack, (char)input.c);
614                                 next_char();
615                                 obstack_1grow(&symbol_obstack, (char)input.c);
616                                 next_char();
617                         }
618                         break;
619                 }
620
621                 case NEWLINE:
622                         errorf(&pp_token.base.source_position, "newline while parsing %s", context);
623                         break;
624
625                 case EOF: {
626                         source_position_t source_position;
627                         source_position.input_name = pp_token.base.source_position.input_name;
628                         source_position.lineno     = start_linenr;
629                         errorf(&source_position, "EOF while parsing %s", context);
630                         goto end_of_string;
631                 }
632
633                 default:
634                         if (input.c == delimiter) {
635                                 next_char();
636                                 goto end_of_string;
637                         } else {
638                                 obstack_grow_utf8(&symbol_obstack, input.c);
639                                 next_char();
640                                 break;
641                         }
642                 }
643         }
644
645 end_of_string:
646         pp_token.kind           = kind;
647         pp_token.literal.string = sym_make_string(enc);
648 }
649
650 static void parse_string_literal(string_encoding_t const enc)
651 {
652         parse_string('"', T_STRING_LITERAL, enc, "string literal");
653 }
654
655 static void parse_character_constant(string_encoding_t const enc)
656 {
657         parse_string('\'', T_CHARACTER_CONSTANT, enc, "character constant");
658         if (pp_token.literal.string.size == 0) {
659                 parse_error("empty character constant");
660         }
661 }
662
663 #define SYMBOL_CASES_WITHOUT_E_P \
664              '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
665         case 'a': \
666         case 'b': \
667         case 'c': \
668         case 'd': \
669         case 'f': \
670         case 'g': \
671         case 'h': \
672         case 'i': \
673         case 'j': \
674         case 'k': \
675         case 'l': \
676         case 'm': \
677         case 'n': \
678         case 'o': \
679         case 'q': \
680         case 'r': \
681         case 's': \
682         case 't': \
683         case 'u': \
684         case 'v': \
685         case 'w': \
686         case 'x': \
687         case 'y': \
688         case 'z': \
689         case 'A': \
690         case 'B': \
691         case 'C': \
692         case 'D': \
693         case 'F': \
694         case 'G': \
695         case 'H': \
696         case 'I': \
697         case 'J': \
698         case 'K': \
699         case 'L': \
700         case 'M': \
701         case 'N': \
702         case 'O': \
703         case 'Q': \
704         case 'R': \
705         case 'S': \
706         case 'T': \
707         case 'U': \
708         case 'V': \
709         case 'W': \
710         case 'X': \
711         case 'Y': \
712         case 'Z': \
713         case '_'
714
715 #define SYMBOL_CASES \
716              SYMBOL_CASES_WITHOUT_E_P: \
717         case 'e': \
718         case 'p': \
719         case 'E': \
720         case 'P'
721
722 #define DIGIT_CASES \
723              '0':  \
724         case '1':  \
725         case '2':  \
726         case '3':  \
727         case '4':  \
728         case '5':  \
729         case '6':  \
730         case '7':  \
731         case '8':  \
732         case '9'
733
734 static void start_expanding(pp_definition_t *definition)
735 {
736         definition->parent_expansion = current_expansion;
737         definition->expand_pos       = 0;
738         definition->is_expanding     = true;
739         if (definition->list_len > 0) {
740                 definition->token_list[0].had_whitespace
741                         = info.had_whitespace;
742         }
743         current_expansion = definition;
744 }
745
746 static void finished_expanding(pp_definition_t *definition)
747 {
748         assert(definition->is_expanding);
749         pp_definition_t *parent = definition->parent_expansion;
750         definition->parent_expansion = NULL;
751         definition->is_expanding     = false;
752
753         /* stop further expanding once we expanded a parameter used in a
754          * sub macro-call */
755         if (definition == argument_expanding)
756                 argument_expanding = NULL;
757
758         assert(current_expansion == definition);
759         current_expansion = parent;
760 }
761
762 static inline void set_punctuator(token_kind_t const kind)
763 {
764         pp_token.kind        = kind;
765         pp_token.base.symbol = token_symbols[kind];
766 }
767
768 static inline void set_digraph(token_kind_t const kind, symbol_t *const symbol)
769 {
770         pp_token.kind        = kind;
771         pp_token.base.symbol = symbol;
772 }
773
774 /**
775  * returns next final token from a preprocessor macro expansion
776  */
777 static bool expand_next(void)
778 {
779         if (current_expansion == NULL)
780                 return false;
781
782 restart:;
783         size_t pos = current_expansion->expand_pos;
784         if (pos >= current_expansion->list_len) {
785                 finished_expanding(current_expansion);
786                 /* it was the outermost expansion, parse pptoken normally */
787                 if (current_expansion == NULL) {
788                         return false;
789                 }
790                 goto restart;
791         }
792         const saved_token_t *saved = &current_expansion->token_list[pos++];
793         pp_token = saved->token;
794
795         if (current_expansion->expand_pos > 0)
796                 info.had_whitespace = saved->had_whitespace;
797         pp_token.base.source_position = expansion_pos;
798         ++current_expansion->expand_pos;
799
800         return true;
801 }
802
803 /**
804  * Returns the next token kind found when continuing the current expansions
805  * without starting new sub-expansions.
806  */
807 static token_kind_t peek_expansion(void)
808 {
809         pp_definition_t *expansion = current_expansion;
810         while (expansion != NULL && expansion->expand_pos >= expansion->list_len) {
811                 expansion = expansion->parent_expansion;
812         }
813         if (expansion == NULL)
814                 return T_EOF;
815         return expansion->token_list[expansion->expand_pos].token.kind;
816 }
817
818 static void skip_line_comment(void)
819 {
820         info.had_whitespace = true;
821         while (true) {
822                 switch (input.c) {
823                 case EOF:
824                         return;
825
826                 case '\r':
827                 case '\n':
828                         return;
829
830                 default:
831                         next_char();
832                         break;
833                 }
834         }
835 }
836
837 static void skip_multiline_comment(void)
838 {
839         info.had_whitespace = true;
840
841         unsigned start_linenr = input.position.lineno;
842         while (true) {
843                 switch (input.c) {
844                 case '/':
845                         next_char();
846                         if (input.c == '*') {
847                                 /* TODO: nested comment, warn here */
848                         }
849                         break;
850                 case '*':
851                         next_char();
852                         if (input.c == '/') {
853                                 if (input.position.lineno != input.output_line)
854                                         info.whitespace_at_line_begin = input.position.colno;
855                                 next_char();
856                                 return;
857                         }
858                         break;
859
860                 case NEWLINE:
861                         break;
862
863                 case EOF: {
864                         source_position_t source_position;
865                         source_position.input_name = pp_token.base.source_position.input_name;
866                         source_position.lineno     = start_linenr;
867                         errorf(&source_position, "at end of file while looking for comment end");
868                         return;
869                 }
870
871                 default:
872                         next_char();
873                         break;
874                 }
875         }
876 }
877
878 static bool skip_till_newline(bool stop_at_non_whitespace)
879 {
880         bool res = false;
881         while (true) {
882                 switch (input.c) {
883                 case ' ':
884                 case '\t':
885                         next_char();
886                         continue;
887
888                 case '/':
889                         next_char();
890                         if (input.c == '/') {
891                                 next_char();
892                                 skip_line_comment();
893                                 continue;
894                         } else if (input.c == '*') {
895                                 next_char();
896                                 skip_multiline_comment();
897                                 continue;
898                         } else {
899                                 put_back(input.c);
900                                 input.c = '/';
901                         }
902                         return true;
903
904                 case NEWLINE:
905                         return res;
906
907                 default:
908                         if (stop_at_non_whitespace)
909                                 return false;
910                         res = true;
911                         next_char();
912                         continue;
913                 }
914         }
915 }
916
917 static void skip_whitespace(void)
918 {
919         while (true) {
920                 switch (input.c) {
921                 case ' ':
922                 case '\t':
923                         ++info.whitespace_at_line_begin;
924                         info.had_whitespace = true;
925                         next_char();
926                         continue;
927
928                 case NEWLINE:
929                         info.at_line_begin  = true;
930                         info.had_whitespace = true;
931                         info.whitespace_at_line_begin = 0;
932                         continue;
933
934                 case '/':
935                         next_char();
936                         if (input.c == '/') {
937                                 next_char();
938                                 skip_line_comment();
939                                 continue;
940                         } else if (input.c == '*') {
941                                 next_char();
942                                 skip_multiline_comment();
943                                 continue;
944                         } else {
945                                 put_back(input.c);
946                                 input.c = '/';
947                         }
948                         return;
949
950                 default:
951                         return;
952                 }
953         }
954 }
955
956 static inline void eat_pp(pp_token_kind_t const kind)
957 {
958         assert(pp_token.base.symbol->pp_ID == kind);
959         (void) kind;
960         next_input_token();
961 }
962
963 static inline void eat_token(token_kind_t const kind)
964 {
965         assert(pp_token.kind == kind);
966         (void)kind;
967         next_input_token();
968 }
969
970 static void parse_symbol(void)
971 {
972         assert(obstack_object_size(&symbol_obstack) == 0);
973         while (true) {
974                 switch (input.c) {
975                 case DIGIT_CASES:
976                 case SYMBOL_CASES:
977                         obstack_1grow(&symbol_obstack, (char) input.c);
978                         next_char();
979                         break;
980
981                 case '\\':
982                         next_char();
983                         switch (input.c) {
984                         {
985                                 unsigned n;
986                         case 'U': n = 8; goto universal;
987                         case 'u': n = 4; goto universal;
988 universal:
989                                 if (!resolve_escape_sequences) {
990                                         obstack_1grow(&symbol_obstack, '\\');
991                                         obstack_1grow(&symbol_obstack, input.c);
992                                 }
993                                 next_char();
994                                 utf32 const v = parse_universal_char(n);
995                                 if (!is_universal_char_valid_identifier(v)) {
996                                         if (is_universal_char_valid(v)) {
997                                                 errorf(&input.position,
998                                                            "universal character \\%c%0*X is not valid in an identifier",
999                                                            n == 4 ? 'u' : 'U', (int)n, v);
1000                                         }
1001                                 } else if (obstack_object_size(&symbol_obstack) == 0 && !is_universal_char_valid_identifier_start(v)) {
1002                                         errorf(&input.position,
1003                                                    "universal character \\%c%0*X is not valid as start of an identifier",
1004                                                    n == 4 ? 'u' : 'U', (int)n, v);
1005                                 } else if (resolve_escape_sequences) {
1006                                         obstack_grow_utf8(&symbol_obstack, v);
1007                                 }
1008                                 break;
1009                         }
1010
1011                         default:
1012                                 put_back(input.c);
1013                                 input.c = '\\';
1014                                 goto end_symbol;
1015                         }
1016
1017                 default:
1018 dollar_sign:
1019                         goto end_symbol;
1020                 }
1021         }
1022
1023 end_symbol:
1024         obstack_1grow(&symbol_obstack, '\0');
1025         char *string = obstack_finish(&symbol_obstack);
1026
1027         /* might be a wide string or character constant ( L"string"/L'c' ) */
1028         if (input.c == '"' && string[0] == 'L' && string[1] == '\0') {
1029                 obstack_free(&symbol_obstack, string);
1030                 parse_string_literal(STRING_ENCODING_WIDE);
1031                 return;
1032         } else if (input.c == '\'' && string[0] == 'L' && string[1] == '\0') {
1033                 obstack_free(&symbol_obstack, string);
1034                 parse_character_constant(STRING_ENCODING_WIDE);
1035                 return;
1036         }
1037
1038         symbol_t *symbol = symbol_table_insert(string);
1039
1040         pp_token.kind        = symbol->ID;
1041         pp_token.base.symbol = symbol;
1042
1043         /* we can free the memory from symbol obstack if we already had an entry in
1044          * the symbol table */
1045         if (symbol->string != string) {
1046                 obstack_free(&symbol_obstack, string);
1047         }
1048 }
1049
1050 static void parse_number(void)
1051 {
1052         obstack_1grow(&symbol_obstack, (char) input.c);
1053         next_char();
1054
1055         while (true) {
1056                 switch (input.c) {
1057                 case '.':
1058                 case DIGIT_CASES:
1059                 case SYMBOL_CASES_WITHOUT_E_P:
1060                         obstack_1grow(&symbol_obstack, (char) input.c);
1061                         next_char();
1062                         break;
1063
1064                 case 'e':
1065                 case 'p':
1066                 case 'E':
1067                 case 'P':
1068                         obstack_1grow(&symbol_obstack, (char) input.c);
1069                         next_char();
1070                         if (input.c == '+' || input.c == '-') {
1071                                 obstack_1grow(&symbol_obstack, (char) input.c);
1072                                 next_char();
1073                         }
1074                         break;
1075
1076                 default:
1077 dollar_sign:
1078                         goto end_number;
1079                 }
1080         }
1081
1082 end_number:
1083         pp_token.kind           = T_NUMBER;
1084         pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
1085 }
1086
1087 #define MAYBE_PROLOG \
1088         next_char(); \
1089         switch (input.c) {
1090
1091 #define MAYBE(ch, kind) \
1092         case ch: \
1093                 next_char(); \
1094                 set_punctuator(kind); \
1095                 return;
1096
1097 #define MAYBE_DIGRAPH(ch, kind, symbol) \
1098         case ch: \
1099                 next_char(); \
1100                 set_digraph(kind, symbol); \
1101                 return;
1102
1103 #define ELSE_CODE(code) \
1104         default: \
1105                 code \
1106         }
1107
1108 #define ELSE(kind) ELSE_CODE(set_punctuator(kind); return;)
1109
1110 /** identifies and returns the next preprocessing token contained in the
1111  * input stream. No macro expansion is performed. */
1112 static void next_input_token(void)
1113 {
1114         if (next_info.had_whitespace) {
1115                 info = next_info;
1116                 next_info.had_whitespace = false;
1117         } else {
1118                 info.at_line_begin  = false;
1119                 info.had_whitespace = false;
1120         }
1121 restart:
1122         pp_token.base.source_position = input.position;
1123         pp_token.base.symbol          = NULL;
1124
1125         switch (input.c) {
1126         case ' ':
1127         case '\t':
1128                 info.whitespace_at_line_begin++;
1129                 info.had_whitespace = true;
1130                 next_char();
1131                 goto restart;
1132
1133         case NEWLINE:
1134                 info.at_line_begin            = true;
1135                 info.had_whitespace           = true;
1136                 info.whitespace_at_line_begin = 0;
1137                 goto restart;
1138
1139         case SYMBOL_CASES:
1140                 parse_symbol();
1141                 return;
1142
1143         case DIGIT_CASES:
1144                 parse_number();
1145                 return;
1146
1147         case '"':
1148                 parse_string_literal(STRING_ENCODING_CHAR);
1149                 return;
1150
1151         case '\'':
1152                 parse_character_constant(STRING_ENCODING_CHAR);
1153                 return;
1154
1155         case '.':
1156                 MAYBE_PROLOG
1157                         case '0':
1158                         case '1':
1159                         case '2':
1160                         case '3':
1161                         case '4':
1162                         case '5':
1163                         case '6':
1164                         case '7':
1165                         case '8':
1166                         case '9':
1167                                 put_back(input.c);
1168                                 input.c = '.';
1169                                 parse_number();
1170                                 return;
1171
1172                         case '.':
1173                                 MAYBE_PROLOG
1174                                 MAYBE('.', T_DOTDOTDOT)
1175                                 ELSE_CODE(
1176                                         put_back(input.c);
1177                                         input.c = '.';
1178                                         set_punctuator('.');
1179                                         return;
1180                                 )
1181                 ELSE('.')
1182         case '&':
1183                 MAYBE_PROLOG
1184                 MAYBE('&', T_ANDAND)
1185                 MAYBE('=', T_ANDEQUAL)
1186                 ELSE('&')
1187         case '*':
1188                 MAYBE_PROLOG
1189                 MAYBE('=', T_ASTERISKEQUAL)
1190                 ELSE('*')
1191         case '+':
1192                 MAYBE_PROLOG
1193                 MAYBE('+', T_PLUSPLUS)
1194                 MAYBE('=', T_PLUSEQUAL)
1195                 ELSE('+')
1196         case '-':
1197                 MAYBE_PROLOG
1198                 MAYBE('>', T_MINUSGREATER)
1199                 MAYBE('-', T_MINUSMINUS)
1200                 MAYBE('=', T_MINUSEQUAL)
1201                 ELSE('-')
1202         case '!':
1203                 MAYBE_PROLOG
1204                 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
1205                 ELSE('!')
1206         case '/':
1207                 MAYBE_PROLOG
1208                 MAYBE('=', T_SLASHEQUAL)
1209                 case '*':
1210                         next_char();
1211                         skip_multiline_comment();
1212                         goto restart;
1213                 case '/':
1214                         next_char();
1215                         skip_line_comment();
1216                         goto restart;
1217                 ELSE('/')
1218         case '%':
1219                 MAYBE_PROLOG
1220                 MAYBE_DIGRAPH('>', '}', symbol_percentgreater)
1221                 MAYBE('=', T_PERCENTEQUAL)
1222                 case ':':
1223                         MAYBE_PROLOG
1224                         case '%':
1225                                 MAYBE_PROLOG
1226                                 MAYBE_DIGRAPH(':', T_HASHHASH, symbol_percentcolonpercentcolon)
1227                                 ELSE_CODE(
1228                                         put_back(input.c);
1229                                         input.c = '%';
1230                                         goto digraph_percentcolon;
1231                                 )
1232                         ELSE_CODE(
1233 digraph_percentcolon:
1234                                 set_digraph('#', symbol_percentcolon);
1235                                 return;
1236                         )
1237                 ELSE('%')
1238         case '<':
1239                 MAYBE_PROLOG
1240                 MAYBE_DIGRAPH(':', '[', symbol_lesscolon)
1241                 MAYBE_DIGRAPH('%', '{', symbol_lesspercent)
1242                 MAYBE('=', T_LESSEQUAL)
1243                 case '<':
1244                         MAYBE_PROLOG
1245                         MAYBE('=', T_LESSLESSEQUAL)
1246                         ELSE(T_LESSLESS)
1247                 ELSE('<')
1248         case '>':
1249                 MAYBE_PROLOG
1250                 MAYBE('=', T_GREATEREQUAL)
1251                 case '>':
1252                         MAYBE_PROLOG
1253                         MAYBE('=', T_GREATERGREATEREQUAL)
1254                         ELSE(T_GREATERGREATER)
1255                 ELSE('>')
1256         case '^':
1257                 MAYBE_PROLOG
1258                 MAYBE('=', T_CARETEQUAL)
1259                 ELSE('^')
1260         case '|':
1261                 MAYBE_PROLOG
1262                 MAYBE('=', T_PIPEEQUAL)
1263                 MAYBE('|', T_PIPEPIPE)
1264                 ELSE('|')
1265         case ':':
1266                 MAYBE_PROLOG
1267                 MAYBE_DIGRAPH('>', ']', symbol_colongreater)
1268                 case ':':
1269                         if (c_mode & _CXX) {
1270                                 next_char();
1271                                 set_punctuator(T_COLONCOLON);
1272                                 return;
1273                         }
1274                         /* FALLTHROUGH */
1275                 ELSE(':')
1276         case '=':
1277                 MAYBE_PROLOG
1278                 MAYBE('=', T_EQUALEQUAL)
1279                 ELSE('=')
1280         case '#':
1281                 MAYBE_PROLOG
1282                 MAYBE('#', T_HASHHASH)
1283                 ELSE('#')
1284
1285         case '?':
1286         case '[':
1287         case ']':
1288         case '(':
1289         case ')':
1290         case '{':
1291         case '}':
1292         case '~':
1293         case ';':
1294         case ',':
1295                 set_punctuator(input.c);
1296                 next_char();
1297                 return;
1298
1299         case EOF:
1300                 if (input_stack != NULL) {
1301                         fclose(close_pp_input());
1302                         pop_restore_input();
1303                         fputc('\n', out);
1304                         if (input.c == (utf32)EOF)
1305                                 --input.position.lineno;
1306                         print_line_directive(&input.position, "2");
1307                         goto restart;
1308                 } else {
1309                         info.at_line_begin = true;
1310                         set_punctuator(T_EOF);
1311                 }
1312                 return;
1313
1314         case '\\':
1315                 next_char();
1316                 int next_c = input.c;
1317                 put_back(input.c);
1318                 input.c = '\\';
1319                 if (next_c == 'U' || next_c == 'u') {
1320                         parse_symbol();
1321                         return;
1322                 }
1323                 /* FALLTHROUGH */
1324         default:
1325 dollar_sign:
1326                 if (error_on_unknown_chars) {
1327                         errorf(&pp_token.base.source_position,
1328                                "unknown character '%lc' found\n", input.c);
1329                         next_char();
1330                         goto restart;
1331                 } else {
1332                         assert(obstack_object_size(&symbol_obstack) == 0);
1333                         obstack_grow_utf8(&symbol_obstack, input.c);
1334                         obstack_1grow(&symbol_obstack, '\0');
1335                         char     *const string = obstack_finish(&symbol_obstack);
1336                         symbol_t *const symbol = symbol_table_insert(string);
1337                         if (symbol->string != string)
1338                                 obstack_free(&symbol_obstack, string);
1339
1340                         pp_token.kind        = T_UNKNOWN_CHAR;
1341                         pp_token.base.symbol = symbol;
1342                         next_char();
1343                         return;
1344                 }
1345         }
1346 }
1347
1348 static void print_quoted_string(const char *const string)
1349 {
1350         fputc('"', out);
1351         for (const char *c = string; *c != 0; ++c) {
1352                 switch (*c) {
1353                 case '"': fputs("\\\"", out); break;
1354                 case '\\':  fputs("\\\\", out); break;
1355                 case '\a':  fputs("\\a", out); break;
1356                 case '\b':  fputs("\\b", out); break;
1357                 case '\f':  fputs("\\f", out); break;
1358                 case '\n':  fputs("\\n", out); break;
1359                 case '\r':  fputs("\\r", out); break;
1360                 case '\t':  fputs("\\t", out); break;
1361                 case '\v':  fputs("\\v", out); break;
1362                 case '\?':  fputs("\\?", out); break;
1363                 default:
1364                         if (!isprint(*c)) {
1365                                 fprintf(out, "\\%03o", (unsigned)*c);
1366                                 break;
1367                         }
1368                         fputc(*c, out);
1369                         break;
1370                 }
1371         }
1372         fputc('"', out);
1373 }
1374
1375 static void print_line_directive(const source_position_t *pos, const char *add)
1376 {
1377         if (!out)
1378                 return;
1379
1380         fprintf(out, "# %u ", pos->lineno);
1381         print_quoted_string(pos->input_name);
1382         if (add != NULL) {
1383                 fputc(' ', out);
1384                 fputs(add, out);
1385         }
1386         if (pos->is_system_header) {
1387                 fputs(" 3", out);
1388         }
1389
1390         printed_input_name = pos->input_name;
1391         input.output_line  = pos->lineno-1;
1392 }
1393
1394 static bool emit_newlines(void)
1395 {
1396         unsigned delta = pp_token.base.source_position.lineno - input.output_line;
1397         if (delta == 0)
1398                 return false;
1399
1400         if (delta >= 9) {
1401                 fputc('\n', out);
1402                 print_line_directive(&pp_token.base.source_position, NULL);
1403                 fputc('\n', out);
1404         } else {
1405                 for (unsigned i = 0; i < delta; ++i) {
1406                         fputc('\n', out);
1407                 }
1408         }
1409         input.output_line = pp_token.base.source_position.lineno;
1410
1411         for (unsigned i = 0; i < info.whitespace_at_line_begin; ++i)
1412                 fputc(' ', out);
1413
1414         return true;
1415 }
1416
1417 void set_preprocessor_output(FILE *output)
1418 {
1419         out = output;
1420         if (out != NULL) {
1421                 error_on_unknown_chars   = false;
1422                 resolve_escape_sequences = false;
1423         } else {
1424                 error_on_unknown_chars   = true;
1425                 resolve_escape_sequences = true;
1426         }
1427 }
1428
1429 void emit_pp_token(void)
1430 {
1431         if (!emit_newlines() &&
1432             (info.had_whitespace || tokens_would_paste(last_token, pp_token.kind)))
1433                 fputc(' ', out);
1434
1435         switch (pp_token.kind) {
1436         case T_NUMBER:
1437                 fputs(pp_token.literal.string.begin, out);
1438                 break;
1439
1440         case T_STRING_LITERAL:
1441                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1442                 fputc('"', out);
1443                 fputs(pp_token.literal.string.begin, out);
1444                 fputc('"', out);
1445                 break;
1446
1447         case T_CHARACTER_CONSTANT:
1448                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1449                 fputc('\'', out);
1450                 fputs(pp_token.literal.string.begin, out);
1451                 fputc('\'', out);
1452                 break;
1453
1454         case T_MACRO_PARAMETER:
1455                 panic("macro parameter not expanded");
1456
1457         default:
1458                 fputs(pp_token.base.symbol->string, out);
1459                 break;
1460         }
1461         last_token = pp_token.kind;
1462 }
1463
1464 static void eat_pp_directive(void)
1465 {
1466         while (!info.at_line_begin) {
1467                 next_input_token();
1468         }
1469 }
1470
1471 static bool strings_equal(const string_t *string1, const string_t *string2)
1472 {
1473         size_t size = string1->size;
1474         if (size != string2->size)
1475                 return false;
1476
1477         const char *c1 = string1->begin;
1478         const char *c2 = string2->begin;
1479         for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1480                 if (*c1 != *c2)
1481                         return false;
1482         }
1483         return true;
1484 }
1485
1486 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1487 {
1488         if (token1->kind != token2->kind)
1489                 return false;
1490
1491         switch (token1->kind) {
1492         case T_NUMBER:
1493         case T_CHARACTER_CONSTANT:
1494         case T_STRING_LITERAL:
1495                 return strings_equal(&token1->literal.string, &token2->literal.string);
1496
1497         case T_MACRO_PARAMETER:
1498                 return token1->macro_parameter.def->symbol
1499                     == token2->macro_parameter.def->symbol;
1500
1501         default:
1502                 return token1->base.symbol == token2->base.symbol;
1503         }
1504 }
1505
1506 static bool pp_definitions_equal(const pp_definition_t *definition1,
1507                                  const pp_definition_t *definition2)
1508 {
1509         if (definition1->list_len != definition2->list_len)
1510                 return false;
1511
1512         size_t               len = definition1->list_len;
1513         const saved_token_t *t1  = definition1->token_list;
1514         const saved_token_t *t2  = definition2->token_list;
1515         for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1516                 if (!pp_tokens_equal(&t1->token, &t2->token))
1517                         return false;
1518         }
1519         return true;
1520 }
1521
1522 static bool is_defineable_token(char const *const context)
1523 {
1524         if (info.at_line_begin) {
1525                 errorf(&pp_token.base.source_position, "unexpected end of line after %s", context);
1526         }
1527
1528         symbol_t *const symbol = pp_token.base.symbol;
1529         if (!symbol)
1530                 goto no_ident;
1531
1532         if (pp_token.kind != T_IDENTIFIER) {
1533                 switch (symbol->string[0]) {
1534                 case SYMBOL_CASES:
1535 dollar_sign:
1536                         break;
1537
1538                 default:
1539 no_ident:
1540                         errorf(&pp_token.base.source_position, "expected identifier after %s, got %K", context, &pp_token);
1541                         return false;
1542                 }
1543         }
1544
1545         /* TODO turn this into a flag in pp_def. */
1546         switch (symbol->pp_ID) {
1547         /* Â§6.10.8:4 */
1548         case TP_defined:
1549                 errorf(&pp_token.base.source_position, "%K cannot be used as macro name in %s", &pp_token, context);
1550                 return false;
1551
1552         default:
1553                 return true;
1554         }
1555 }
1556
1557 static void parse_define_directive(void)
1558 {
1559         eat_pp(TP_define);
1560         if (skip_mode) {
1561                 eat_pp_directive();
1562                 return;
1563         }
1564
1565         assert(obstack_object_size(&pp_obstack) == 0);
1566
1567         if (!is_defineable_token("#define"))
1568                 goto error_out;
1569         symbol_t *const symbol = pp_token.base.symbol;
1570
1571         pp_definition_t *new_definition
1572                 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1573         memset(new_definition, 0, sizeof(new_definition[0]));
1574         new_definition->symbol          = symbol;
1575         new_definition->source_position = input.position;
1576
1577         /* this is probably the only place where spaces are significant in the
1578          * lexer (except for the fact that they separate tokens). #define b(x)
1579          * is something else than #define b (x) */
1580         if (input.c == '(') {
1581                 next_input_token();
1582                 eat_token('(');
1583
1584                 while (true) {
1585                         switch (pp_token.kind) {
1586                         case T_DOTDOTDOT:
1587                                 new_definition->is_variadic = true;
1588                                 eat_token(T_DOTDOTDOT);
1589                                 if (pp_token.kind != ')') {
1590                                         errorf(&input.position,
1591                                                         "'...' not at end of macro argument list");
1592                                         goto error_out;
1593                                 }
1594                                 break;
1595
1596                         case T_IDENTIFIER: {
1597                                 pp_definition_t parameter;
1598                                 memset(&parameter, 0, sizeof(parameter));
1599                                 parameter.source_position = pp_token.base.source_position;
1600                                 parameter.symbol          = pp_token.base.symbol;
1601                                 parameter.is_parameter    = true;
1602                                 obstack_grow(&pp_obstack, &parameter, sizeof(parameter));
1603                                 eat_token(T_IDENTIFIER);
1604
1605                                 if (pp_token.kind == ',') {
1606                                         eat_token(',');
1607                                         break;
1608                                 }
1609
1610                                 if (pp_token.kind != ')') {
1611                                         errorf(&pp_token.base.source_position,
1612                                                "expected ',' or ')' after identifier, got %K",
1613                                                &pp_token);
1614                                         goto error_out;
1615                                 }
1616                                 break;
1617                         }
1618
1619                         case ')':
1620                                 eat_token(')');
1621                                 goto finish_argument_list;
1622
1623                         default:
1624                                 errorf(&pp_token.base.source_position,
1625                                        "expected identifier, '...' or ')' in #define argument list, got %K",
1626                                        &pp_token);
1627                                 goto error_out;
1628                         }
1629                 }
1630
1631         finish_argument_list:
1632                 new_definition->has_parameters = true;
1633                 size_t size = obstack_object_size(&pp_obstack);
1634                 new_definition->n_parameters
1635                         = size / sizeof(new_definition->parameters[0]);
1636                 new_definition->parameters = obstack_finish(&pp_obstack);
1637                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1638                         pp_definition_t *param    = &new_definition->parameters[i];
1639                         symbol_t        *symbol   = param->symbol;
1640                         pp_definition_t *previous = symbol->pp_definition;
1641                         if (previous != NULL
1642                             && previous->function_definition == new_definition) {
1643                                 errorf(&param->source_position,
1644                                        "duplicate macro parameter '%Y'", symbol);
1645                                 param->symbol = sym_anonymous;
1646                                 continue;
1647                         }
1648                         param->parent_expansion    = previous;
1649                         param->function_definition = new_definition;
1650                         symbol->pp_definition      = param;
1651                 }
1652         } else {
1653                 next_input_token();
1654         }
1655
1656         /* construct token list */
1657         assert(obstack_object_size(&pp_obstack) == 0);
1658         while (!info.at_line_begin) {
1659                 if (pp_token.kind == T_IDENTIFIER) {
1660                         const symbol_t  *symbol     = pp_token.base.symbol;
1661                         pp_definition_t *definition = symbol->pp_definition;
1662                         if (definition != NULL
1663                             && definition->function_definition == new_definition) {
1664                             pp_token.kind                = T_MACRO_PARAMETER;
1665                             pp_token.macro_parameter.def = definition;
1666                         }
1667                 }
1668                 saved_token_t saved_token;
1669                 saved_token.token = pp_token;
1670                 saved_token.had_whitespace = info.had_whitespace;
1671                 obstack_grow(&pp_obstack, &saved_token, sizeof(saved_token));
1672                 next_input_token();
1673         }
1674
1675         new_definition->list_len   = obstack_object_size(&pp_obstack)
1676                 / sizeof(new_definition->token_list[0]);
1677         new_definition->token_list = obstack_finish(&pp_obstack);
1678
1679         if (new_definition->has_parameters) {
1680                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1681                         pp_definition_t *param      = &new_definition->parameters[i];
1682                         symbol_t        *symbol     = param->symbol;
1683                         if (symbol == sym_anonymous)
1684                                 continue;
1685                         assert(symbol->pp_definition == param);
1686                         assert(param->function_definition == new_definition);
1687                         symbol->pp_definition   = param->parent_expansion;
1688                         param->parent_expansion = NULL;
1689                 }
1690         }
1691
1692         pp_definition_t *old_definition = symbol->pp_definition;
1693         if (old_definition != NULL) {
1694                 if (!pp_definitions_equal(old_definition, new_definition)) {
1695                         warningf(WARN_OTHER, &input.position, "multiple definition of macro '%Y' (first defined %P)", symbol, &old_definition->source_position);
1696                 } else {
1697                         /* reuse the old definition */
1698                         obstack_free(&pp_obstack, new_definition);
1699                         new_definition = old_definition;
1700                 }
1701         }
1702
1703         symbol->pp_definition = new_definition;
1704         return;
1705
1706 error_out:
1707         if (obstack_object_size(&pp_obstack) > 0) {
1708                 char *ptr = obstack_finish(&pp_obstack);
1709                 obstack_free(&pp_obstack, ptr);
1710         }
1711         eat_pp_directive();
1712 }
1713
1714 static void parse_undef_directive(void)
1715 {
1716         eat_pp(TP_undef);
1717         if (skip_mode) {
1718                 eat_pp_directive();
1719                 return;
1720         }
1721
1722         if (!is_defineable_token("#undef")) {
1723                 eat_pp_directive();
1724                 return;
1725         }
1726
1727         pp_token.base.symbol->pp_definition = NULL;
1728         next_input_token();
1729
1730         if (!info.at_line_begin) {
1731                 warningf(WARN_OTHER, &input.position, "extra tokens at end of #undef directive");
1732         }
1733         eat_pp_directive();
1734 }
1735
1736 /** behind an #include we can have the special headername lexems.
1737  * They're only allowed behind an #include so they're not recognized
1738  * by the normal next_preprocessing_token. We handle them as a special
1739  * exception here */
1740 static void parse_headername(void)
1741 {
1742         const source_position_t start_position = input.position;
1743         string_t                string         = { NULL, 0, STRING_ENCODING_CHAR };
1744         assert(obstack_object_size(&symbol_obstack) == 0);
1745
1746         if (info.at_line_begin) {
1747                 parse_error("expected headername after #include");
1748                 goto finish_error;
1749         }
1750
1751         /* check wether we have a "... or <... headername */
1752         switch (input.c) {
1753         {
1754                 utf32 delimiter;
1755         case '<': delimiter = '>'; goto parse_name;
1756         case '"': delimiter = '"'; goto parse_name;
1757 parse_name:
1758                 next_char();
1759                 while (true) {
1760                         switch (input.c) {
1761                         case NEWLINE:
1762                         case EOF:
1763                                 errorf(&pp_token.base.source_position, "header name without closing '%c'", (char)delimiter);
1764                                 goto finish_error;
1765
1766                         default:
1767                                 if (input.c == delimiter) {
1768                                         next_char();
1769                                         goto finished_headername;
1770                                 } else {
1771                                         obstack_1grow(&symbol_obstack, (char)input.c);
1772                                         next_char();
1773                                 }
1774                                 break;
1775                         }
1776                 }
1777                 /* we should never be here */
1778         }
1779
1780         default:
1781                 /* TODO: do normal pp_token parsing and concatenate results */
1782                 panic("pp_token concat include not implemented yet");
1783         }
1784
1785 finished_headername:
1786         string = sym_make_string(STRING_ENCODING_CHAR);
1787
1788 finish_error:
1789         pp_token.base.source_position = start_position;
1790         pp_token.kind                 = T_HEADERNAME;
1791         pp_token.literal.string       = string;
1792 }
1793
1794 static bool do_include(bool const system_include, bool const include_next, char const *const headername)
1795 {
1796         size_t const        headername_len = strlen(headername);
1797         searchpath_entry_t *entry;
1798         if (include_next) {
1799                 entry = input.path ? input.path->next : searchpath;
1800         } else {
1801                 if (!system_include) {
1802                         /* put dirname of current input on obstack */
1803                         const char *filename   = input.position.input_name;
1804                         const char *last_slash = strrchr(filename, '/');
1805                         const char *full_name;
1806                         if (last_slash != NULL) {
1807                                 size_t len = last_slash - filename;
1808                                 obstack_grow(&symbol_obstack, filename, len + 1);
1809                                 obstack_grow0(&symbol_obstack, headername, headername_len);
1810                                 char *complete_path = obstack_finish(&symbol_obstack);
1811                                 full_name = identify_string(complete_path);
1812                         } else {
1813                                 full_name = headername;
1814                         }
1815
1816                         FILE *file = fopen(full_name, "r");
1817                         if (file != NULL) {
1818                                 switch_pp_input(file, full_name, NULL);
1819                                 return true;
1820                         }
1821                 }
1822
1823                 entry = searchpath;
1824         }
1825
1826         assert(obstack_object_size(&symbol_obstack) == 0);
1827         /* check searchpath */
1828         for (; entry; entry = entry->next) {
1829             const char *path = entry->path;
1830             size_t      len  = strlen(path);
1831                 obstack_grow(&symbol_obstack, path, len);
1832                 if (path[len-1] != '/')
1833                         obstack_1grow(&symbol_obstack, '/');
1834                 obstack_grow(&symbol_obstack, headername, headername_len+1);
1835
1836                 char *complete_path = obstack_finish(&symbol_obstack);
1837                 FILE *file          = fopen(complete_path, "r");
1838                 if (file != NULL) {
1839                         const char *filename = identify_string(complete_path);
1840                         switch_pp_input(file, filename, entry);
1841                         return true;
1842                 } else {
1843                         obstack_free(&symbol_obstack, complete_path);
1844                 }
1845         }
1846
1847         return false;
1848 }
1849
1850 static void parse_include_directive(bool const include_next)
1851 {
1852         if (skip_mode) {
1853                 eat_pp_directive();
1854                 return;
1855         }
1856
1857         /* don't eat the TP_include here!
1858          * we need an alternative parsing for the next token */
1859         skip_till_newline(true);
1860         bool system_include = input.c == '<';
1861         parse_headername();
1862         string_t headername = pp_token.literal.string;
1863         if (headername.begin == NULL) {
1864                 eat_pp_directive();
1865                 return;
1866         }
1867
1868         bool had_nonwhitespace = skip_till_newline(false);
1869         if (had_nonwhitespace) {
1870                 warningf(WARN_OTHER, &pp_token.base.source_position,
1871                          "extra tokens at end of #include directive");
1872         }
1873
1874         if (n_inputs > INCLUDE_LIMIT) {
1875                 errorf(&pp_token.base.source_position, "#include nested too deeply");
1876                 /* eat \n or EOF */
1877                 next_input_token();
1878                 return;
1879         }
1880
1881         /* switch inputs */
1882         info.whitespace_at_line_begin = 0;
1883         info.had_whitespace           = false;
1884         info.at_line_begin            = true;
1885         emit_newlines();
1886         push_input();
1887         bool res = do_include(system_include, include_next, pp_token.literal.string.begin);
1888         if (res) {
1889                 next_input_token();
1890         } else {
1891                 errorf(&pp_token.base.source_position, "failed including '%S': %s", &pp_token.literal.string, strerror(errno));
1892                 pop_restore_input();
1893         }
1894 }
1895
1896 static pp_conditional_t *push_conditional(void)
1897 {
1898         pp_conditional_t *conditional
1899                 = obstack_alloc(&pp_obstack, sizeof(*conditional));
1900         memset(conditional, 0, sizeof(*conditional));
1901
1902         conditional->parent = conditional_stack;
1903         conditional_stack   = conditional;
1904
1905         return conditional;
1906 }
1907
1908 static void pop_conditional(void)
1909 {
1910         assert(conditional_stack != NULL);
1911         conditional_stack = conditional_stack->parent;
1912 }
1913
1914 void check_unclosed_conditionals(void)
1915 {
1916         while (conditional_stack != NULL) {
1917                 pp_conditional_t *conditional = conditional_stack;
1918
1919                 if (conditional->in_else) {
1920                         errorf(&conditional->source_position, "unterminated #else");
1921                 } else {
1922                         errorf(&conditional->source_position, "unterminated condition");
1923                 }
1924                 pop_conditional();
1925         }
1926 }
1927
1928 static void parse_ifdef_ifndef_directive(bool const is_ifdef)
1929 {
1930         bool condition;
1931         eat_pp(is_ifdef ? TP_ifdef : TP_ifndef);
1932
1933         if (skip_mode) {
1934                 eat_pp_directive();
1935                 pp_conditional_t *conditional = push_conditional();
1936                 conditional->source_position  = pp_token.base.source_position;
1937                 conditional->skip             = true;
1938                 return;
1939         }
1940
1941         if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
1942                 errorf(&pp_token.base.source_position,
1943                        "expected identifier after #%s, got %K",
1944                        is_ifdef ? "ifdef" : "ifndef", &pp_token);
1945                 eat_pp_directive();
1946
1947                 /* just take the true case in the hope to avoid further errors */
1948                 condition = true;
1949         } else {
1950                 /* evaluate wether we are in true or false case */
1951                 condition = (bool)pp_token.base.symbol->pp_definition == is_ifdef;
1952                 eat_token(T_IDENTIFIER);
1953
1954                 if (!info.at_line_begin) {
1955                         errorf(&pp_token.base.source_position,
1956                                "extra tokens at end of #%s",
1957                                is_ifdef ? "ifdef" : "ifndef");
1958                         eat_pp_directive();
1959                 }
1960         }
1961
1962         pp_conditional_t *conditional = push_conditional();
1963         conditional->source_position  = pp_token.base.source_position;
1964         conditional->condition        = condition;
1965
1966         if (!condition) {
1967                 skip_mode = true;
1968         }
1969 }
1970
1971 static void parse_else_directive(void)
1972 {
1973         eat_pp(TP_else);
1974
1975         if (!info.at_line_begin) {
1976                 if (!skip_mode) {
1977                         warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #else");
1978                 }
1979                 eat_pp_directive();
1980         }
1981
1982         pp_conditional_t *conditional = conditional_stack;
1983         if (conditional == NULL) {
1984                 errorf(&pp_token.base.source_position, "#else without prior #if");
1985                 return;
1986         }
1987
1988         if (conditional->in_else) {
1989                 errorf(&pp_token.base.source_position,
1990                        "#else after #else (condition started %P)",
1991                        &conditional->source_position);
1992                 skip_mode = true;
1993                 return;
1994         }
1995
1996         conditional->in_else = true;
1997         if (!conditional->skip) {
1998                 skip_mode = conditional->condition;
1999         }
2000         conditional->source_position = pp_token.base.source_position;
2001 }
2002
2003 static void parse_endif_directive(void)
2004 {
2005         eat_pp(TP_endif);
2006
2007         if (!info.at_line_begin) {
2008                 if (!skip_mode) {
2009                         warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #endif");
2010                 }
2011                 eat_pp_directive();
2012         }
2013
2014         pp_conditional_t *conditional = conditional_stack;
2015         if (conditional == NULL) {
2016                 errorf(&pp_token.base.source_position, "#endif without prior #if");
2017                 return;
2018         }
2019
2020         if (!conditional->skip) {
2021                 skip_mode = false;
2022         }
2023         pop_conditional();
2024 }
2025
2026 typedef enum stdc_pragma_kind_t {
2027         STDC_UNKNOWN,
2028         STDC_FP_CONTRACT,
2029         STDC_FENV_ACCESS,
2030         STDC_CX_LIMITED_RANGE
2031 } stdc_pragma_kind_t;
2032
2033 typedef enum stdc_pragma_value_kind_t {
2034         STDC_VALUE_UNKNOWN,
2035         STDC_VALUE_ON,
2036         STDC_VALUE_OFF,
2037         STDC_VALUE_DEFAULT
2038 } stdc_pragma_value_kind_t;
2039
2040 static void parse_pragma_directive(void)
2041 {
2042         eat_pp(TP_pragma);
2043         if (skip_mode) {
2044                 eat_pp_directive();
2045                 return;
2046         }
2047
2048         if (pp_token.kind != T_IDENTIFIER) {
2049                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2050                          "expected identifier after #pragma");
2051                 eat_pp_directive();
2052                 return;
2053         }
2054
2055         stdc_pragma_kind_t kind = STDC_UNKNOWN;
2056         if (pp_token.base.symbol->pp_ID == TP_STDC && c_mode & _C99) {
2057                 /* a STDC pragma */
2058                 next_input_token();
2059
2060                 switch (pp_token.base.symbol->pp_ID) {
2061                 case TP_FP_CONTRACT:      kind = STDC_FP_CONTRACT;      break;
2062                 case TP_FENV_ACCESS:      kind = STDC_FENV_ACCESS;      break;
2063                 case TP_CX_LIMITED_RANGE: kind = STDC_CX_LIMITED_RANGE; break;
2064                 default:                  break;
2065                 }
2066                 if (kind != STDC_UNKNOWN) {
2067                         next_input_token();
2068                         stdc_pragma_value_kind_t value;
2069                         switch (pp_token.base.symbol->pp_ID) {
2070                         case TP_ON:      value = STDC_VALUE_ON;      break;
2071                         case TP_OFF:     value = STDC_VALUE_OFF;     break;
2072                         case TP_DEFAULT: value = STDC_VALUE_DEFAULT; break;
2073                         default:         value = STDC_VALUE_UNKNOWN; break;
2074                         }
2075                         if (value == STDC_VALUE_UNKNOWN) {
2076                                 kind = STDC_UNKNOWN;
2077                                 errorf(&pp_token.base.source_position, "bad STDC pragma argument");
2078                         }
2079                 }
2080         }
2081         eat_pp_directive();
2082         if (kind == STDC_UNKNOWN) {
2083                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2084                          "encountered unknown #pragma");
2085         }
2086 }
2087
2088 static void parse_line_directive(void)
2089 {
2090         if (pp_token.kind != T_NUMBER) {
2091                 if (!skip_mode)
2092                         parse_error("expected integer");
2093         } else {
2094                 char      *end;
2095                 long const line = strtol(pp_token.literal.string.begin, &end, 0);
2096                 if (*end == '\0') {
2097                         /* use offset -1 as this is about the next line */
2098                         input.position.lineno = line - 1;
2099                         /* force output of line */
2100                         input.output_line = input.position.lineno - 20;
2101                 } else {
2102                         if (!skip_mode) {
2103                                 errorf(&input.position, "'%S' is not a valid line number",
2104                                            &pp_token.literal.string);
2105                         }
2106                 }
2107                 next_input_token();
2108                 if (info.at_line_begin)
2109                         return;
2110         }
2111         if (pp_token.kind == T_STRING_LITERAL
2112             && pp_token.literal.string.encoding == STRING_ENCODING_CHAR) {
2113                 input.position.input_name       = pp_token.literal.string.begin;
2114                 input.position.is_system_header = false;
2115                 next_input_token();
2116
2117                 /* attempt to parse numeric flags as outputted by gcc preprocessor */
2118                 while (!info.at_line_begin && pp_token.kind == T_NUMBER) {
2119                         /* flags:
2120                          * 1 - indicates start of a new file
2121                          * 2 - indicates return from a file
2122                          * 3 - indicates system header
2123                          * 4 - indicates implicit extern "C" in C++ mode
2124                          *
2125                          * currently we're only interested in "3"
2126                          */
2127                         if (streq(pp_token.literal.string.begin, "3")) {
2128                                 input.position.is_system_header = true;
2129                         }
2130                         next_input_token();
2131                 }
2132         }
2133
2134         eat_pp_directive();
2135 }
2136
2137 static void parse_error_directive(void)
2138 {
2139         if (skip_mode) {
2140                 eat_pp_directive();
2141                 return;
2142         }
2143
2144         bool const old_resolve_escape_sequences = resolve_escape_sequences;
2145         resolve_escape_sequences = false;
2146
2147         source_position_t const pos = pp_token.base.source_position;
2148         do {
2149                 if (info.had_whitespace && obstack_object_size(&pp_obstack) != 0)
2150                         obstack_1grow(&pp_obstack, ' ');
2151
2152                 switch (pp_token.kind) {
2153                 case T_NUMBER: {
2154                         string_t const *const str = &pp_token.literal.string;
2155                         obstack_grow(&pp_obstack, str->begin, str->size);
2156                         break;
2157                 }
2158
2159                 {
2160                         char delim;
2161                 case T_STRING_LITERAL:     delim =  '"'; goto string;
2162                 case T_CHARACTER_CONSTANT: delim = '\''; goto string;
2163 string:;
2164                         string_t const *const str = &pp_token.literal.string;
2165                         char     const *const enc = get_string_encoding_prefix(str->encoding);
2166                         obstack_printf(&pp_obstack, "%s%c%s%c", enc, delim, str->begin, delim);
2167                         break;
2168                 }
2169
2170                 default: {
2171                         char const *const str = pp_token.base.symbol->string;
2172                         obstack_grow(&pp_obstack, str, strlen(str));
2173                         break;
2174                 }
2175                 }
2176
2177                 next_input_token();
2178         } while (!info.at_line_begin);
2179
2180         resolve_escape_sequences = old_resolve_escape_sequences;
2181
2182         obstack_1grow(&pp_obstack, '\0');
2183         char *const str = obstack_finish(&pp_obstack);
2184         errorf(&pos, "#%s", str);
2185         obstack_free(&pp_obstack, str);
2186 }
2187
2188 static void parse_preprocessing_directive(void)
2189 {
2190         eat_token('#');
2191
2192         if (info.at_line_begin) {
2193                 /* empty directive */
2194                 return;
2195         }
2196
2197         if (pp_token.base.symbol) {
2198                 switch (pp_token.base.symbol->pp_ID) {
2199                 case TP_define:       parse_define_directive();            break;
2200                 case TP_else:         parse_else_directive();              break;
2201                 case TP_endif:        parse_endif_directive();             break;
2202                 case TP_error:        parse_error_directive();             break;
2203                 case TP_ifdef:        parse_ifdef_ifndef_directive(true);  break;
2204                 case TP_ifndef:       parse_ifdef_ifndef_directive(false); break;
2205                 case TP_include:      parse_include_directive(false);      break;
2206                 case TP_include_next: parse_include_directive(true);       break;
2207                 case TP_line:         next_input_token(); goto line_directive;
2208                 case TP_pragma:       parse_pragma_directive();            break;
2209                 case TP_undef:        parse_undef_directive();             break;
2210                 default:              goto skip;
2211                 }
2212         } else if (pp_token.kind == T_NUMBER) {
2213 line_directive:
2214                 parse_line_directive();
2215         } else {
2216 skip:
2217                 if (!skip_mode) {
2218                         errorf(&pp_token.base.source_position, "invalid preprocessing directive #%K", &pp_token);
2219                 }
2220                 eat_pp_directive();
2221         }
2222
2223         assert(info.at_line_begin);
2224 }
2225
2226 static void finish_current_argument(void)
2227 {
2228         if (current_argument == NULL)
2229                 return;
2230         size_t size = obstack_object_size(&pp_obstack);
2231         current_argument->list_len   = size/sizeof(current_argument->token_list[0]);
2232         current_argument->token_list = obstack_finish(&pp_obstack);
2233 }
2234
2235 void next_preprocessing_token(void)
2236 {
2237 restart:
2238         if (!expand_next()) {
2239                 do {
2240                         next_input_token();
2241                         while (pp_token.kind == '#' && info.at_line_begin) {
2242                                 parse_preprocessing_directive();
2243                         }
2244                 } while (skip_mode && pp_token.kind != T_EOF);
2245         }
2246
2247         const token_kind_t kind = pp_token.kind;
2248         if (current_call == NULL || argument_expanding != NULL) {
2249                 symbol_t *const symbol = pp_token.base.symbol;
2250                 if (symbol) {
2251                         if (kind == T_MACRO_PARAMETER) {
2252                                 assert(current_expansion != NULL);
2253                                 start_expanding(pp_token.macro_parameter.def);
2254                                 goto restart;
2255                         }
2256
2257                         pp_definition_t *const pp_definition = symbol->pp_definition;
2258                         if (pp_definition != NULL && !pp_definition->is_expanding) {
2259                                 if (pp_definition->has_parameters) {
2260
2261                                         /* check if next token is a '(' */
2262                                         whitespace_info_t old_info   = info;
2263                                         token_kind_t      next_token = peek_expansion();
2264                                         if (next_token == T_EOF) {
2265                                                 info.at_line_begin  = false;
2266                                                 info.had_whitespace = false;
2267                                                 skip_whitespace();
2268                                                 if (input.c == '(') {
2269                                                         next_token = '(';
2270                                                 }
2271                                         }
2272
2273                                         if (next_token == '(') {
2274                                                 if (current_expansion == NULL)
2275                                                         expansion_pos = pp_token.base.source_position;
2276                                                 next_preprocessing_token();
2277                                                 assert(pp_token.kind == '(');
2278
2279                                                 pp_definition->parent_expansion = current_expansion;
2280                                                 current_call              = pp_definition;
2281                                                 current_call->expand_pos  = 0;
2282                                                 current_call->expand_info = old_info;
2283                                                 if (current_call->n_parameters > 0) {
2284                                                         current_argument = &current_call->parameters[0];
2285                                                         assert(argument_brace_count == 0);
2286                                                 }
2287                                                 goto restart;
2288                                         } else {
2289                                                 /* skip_whitespaces() skipped newlines and whitespace,
2290                                                  * remember results for next token */
2291                                                 next_info = info;
2292                                                 info      = old_info;
2293                                                 return;
2294                                         }
2295                                 } else {
2296                                         if (current_expansion == NULL)
2297                                                 expansion_pos = pp_token.base.source_position;
2298                                         start_expanding(pp_definition);
2299                                         goto restart;
2300                                 }
2301                         }
2302                 }
2303         }
2304
2305         if (current_call != NULL) {
2306                 /* current_call != NULL */
2307                 if (kind == '(') {
2308                         ++argument_brace_count;
2309                 } else if (kind == ')') {
2310                         if (argument_brace_count > 0) {
2311                                 --argument_brace_count;
2312                         } else {
2313                                 finish_current_argument();
2314                                 assert(kind == ')');
2315                                 start_expanding(current_call);
2316                                 info = current_call->expand_info;
2317                                 current_call     = NULL;
2318                                 current_argument = NULL;
2319                                 goto restart;
2320                         }
2321                 } else if (kind == ',' && argument_brace_count == 0) {
2322                         finish_current_argument();
2323                         current_call->expand_pos++;
2324                         if (current_call->expand_pos >= current_call->n_parameters) {
2325                                 errorf(&pp_token.base.source_position,
2326                                            "too many arguments passed for macro '%Y'",
2327                                            current_call->symbol);
2328                                 current_argument = NULL;
2329                         } else {
2330                                 current_argument
2331                                         = &current_call->parameters[current_call->expand_pos];
2332                         }
2333                         goto restart;
2334                 } else if (kind == T_MACRO_PARAMETER) {
2335                         /* parameters have to be fully expanded before being used as
2336                          * parameters for another macro-call */
2337                         assert(current_expansion != NULL);
2338                         pp_definition_t *argument = pp_token.macro_parameter.def;
2339                         argument_expanding = argument;
2340                         start_expanding(argument);
2341                         goto restart;
2342                 } else if (kind == T_EOF) {
2343                         errorf(&expansion_pos,
2344                                "reached end of file while parsing arguments for '%Y'",
2345                                current_call->symbol);
2346                         return;
2347                 }
2348                 if (current_argument != NULL) {
2349                         saved_token_t saved;
2350                         saved.token = pp_token;
2351                         saved.had_whitespace = info.had_whitespace;
2352                         obstack_grow(&pp_obstack, &saved, sizeof(saved));
2353                 }
2354                 goto restart;
2355         }
2356 }
2357
2358
2359 static void prepend_include_path(const char *path)
2360 {
2361         searchpath_entry_t *entry = OALLOCZ(&config_obstack, searchpath_entry_t);
2362         entry->path = path;
2363         entry->next = searchpath;
2364         searchpath  = entry;
2365 }
2366
2367 static void setup_include_path(void)
2368 {
2369         /* built-in paths */
2370         prepend_include_path("/usr/include");
2371
2372         /* parse environment variable */
2373         const char *cpath = getenv("CPATH");
2374         if (cpath != NULL && *cpath != '\0') {
2375                 const char *begin = cpath;
2376                 const char *c;
2377                 do {
2378                         c = begin;
2379                         while (*c != '\0' && *c != ':')
2380                                 ++c;
2381
2382                         size_t len = c-begin;
2383                         if (len == 0) {
2384                                 /* for gcc compatibility (Matze: I would expect that
2385                                  * nothing happens for an empty entry...) */
2386                                 prepend_include_path(".");
2387                         } else {
2388                                 char *const string = obstack_copy0(&config_obstack, begin, len);
2389                                 prepend_include_path(string);
2390                         }
2391
2392                         begin = c+1;
2393                         /* skip : */
2394                         if (*begin == ':')
2395                                 ++begin;
2396                 } while(*c != '\0');
2397         }
2398 }
2399
2400 static void input_error(unsigned const delta_lines, unsigned const delta_cols, char const *const message)
2401 {
2402         source_position_t pos = pp_token.base.source_position;
2403         pos.lineno += delta_lines;
2404         pos.colno  += delta_cols;
2405         errorf(&pos, "%s", message);
2406 }
2407
2408 void init_preprocessor(void)
2409 {
2410         init_symbols();
2411
2412         obstack_init(&config_obstack);
2413         obstack_init(&pp_obstack);
2414         obstack_init(&input_obstack);
2415         strset_init(&stringset);
2416
2417         setup_include_path();
2418
2419         set_input_error_callback(input_error);
2420 }
2421
2422 void exit_preprocessor(void)
2423 {
2424         obstack_free(&input_obstack, NULL);
2425         obstack_free(&pp_obstack, NULL);
2426         obstack_free(&config_obstack, NULL);
2427
2428         strset_destroy(&stringset);
2429 }
2430
2431 int pptest_main(int argc, char **argv);
2432 int pptest_main(int argc, char **argv)
2433 {
2434         init_symbol_table();
2435         init_preprocessor();
2436         init_tokens();
2437
2438         error_on_unknown_chars   = false;
2439         resolve_escape_sequences = false;
2440
2441         /* simplistic commandline parser */
2442         const char *filename = NULL;
2443         const char *output = NULL;
2444         for (int i = 1; i < argc; ++i) {
2445                 const char *opt = argv[i];
2446                 if (streq(opt, "-I")) {
2447                         prepend_include_path(argv[++i]);
2448                         continue;
2449                 } else if (streq(opt, "-E")) {
2450                         /* ignore */
2451                 } else if (streq(opt, "-o")) {
2452                         output = argv[++i];
2453                         continue;
2454                 } else if (opt[0] == '-') {
2455                         fprintf(stderr, "Unknown option '%s'\n", opt);
2456                 } else {
2457                         if (filename != NULL)
2458                                 fprintf(stderr, "Multiple inputs not supported\n");
2459                         filename = argv[i];
2460                 }
2461         }
2462         if (filename == NULL) {
2463                 fprintf(stderr, "No input specified\n");
2464                 return 1;
2465         }
2466
2467         if (output == NULL) {
2468                 out = stdout;
2469         } else {
2470                 out = fopen(output, "w");
2471                 if (out == NULL) {
2472                         fprintf(stderr, "Couldn't open output '%s'\n", output);
2473                         return 1;
2474                 }
2475         }
2476
2477         /* just here for gcc compatibility */
2478         fprintf(out, "# 1 \"%s\"\n", filename);
2479         fprintf(out, "# 1 \"<built-in>\"\n");
2480         fprintf(out, "# 1 \"<command-line>\"\n");
2481
2482         FILE *file = fopen(filename, "r");
2483         if (file == NULL) {
2484                 fprintf(stderr, "Couldn't open input '%s'\n", filename);
2485                 return 1;
2486         }
2487         switch_pp_input(file, filename, NULL);
2488
2489         for (;;) {
2490                 next_preprocessing_token();
2491                 if (pp_token.kind == T_EOF)
2492                         break;
2493                 emit_pp_token();
2494         }
2495
2496         fputc('\n', out);
2497         check_unclosed_conditionals();
2498         fclose(close_pp_input());
2499         if (out != stdout)
2500                 fclose(out);
2501
2502         exit_tokens();
2503         exit_preprocessor();
2504         exit_symbol_table();
2505
2506         return 0;
2507 }