40d3208fa4d186d398f0f19bf33a368a6cb84aeb
[cparser] / preprocessor.c
1 #include <config.h>
2
3 #include <assert.h>
4 #include <errno.h>
5 #include <string.h>
6 #include <stdbool.h>
7 #include <ctype.h>
8
9 #include "preprocessor.h"
10 #include "token_t.h"
11 #include "symbol_t.h"
12 #include "adt/util.h"
13 #include "adt/error.h"
14 #include "adt/strutil.h"
15 #include "adt/strset.h"
16 #include "lang_features.h"
17 #include "diagnostic.h"
18 #include "string_rep.h"
19 #include "input.h"
20
21 #define MAX_PUTBACK 3
22 #define INCLUDE_LIMIT 199  /* 199 is for gcc "compatibility" */
23
24 typedef struct saved_token_t {
25         token_t token;
26         bool    had_whitespace;
27 } saved_token_t;
28
29 typedef struct whitespace_info_t {
30         /** current token had whitespace in front of it */
31         bool     had_whitespace;
32         /** current token is at the beginning of a line.
33          * => a "#" at line begin starts a preprocessing directive. */
34         bool     at_line_begin;
35         /** number of spaces before the first token in a line */
36         unsigned whitespace_at_line_begin;
37 } whitespace_info_t;
38
39 struct pp_definition_t {
40         symbol_t          *symbol;
41         source_position_t  source_position;
42         pp_definition_t   *parent_expansion;
43         size_t             expand_pos;
44         whitespace_info_t  expand_info;
45         bool               is_variadic    : 1;
46         bool               is_expanding   : 1;
47         bool               has_parameters : 1;
48         bool               is_parameter   : 1;
49         pp_definition_t   *function_definition;
50         size_t             n_parameters;
51         pp_definition_t   *parameters;
52
53         /* replacement */
54         size_t             list_len;
55         saved_token_t     *token_list;
56 };
57
58 typedef struct pp_conditional_t pp_conditional_t;
59 struct pp_conditional_t {
60         source_position_t  source_position;
61         bool               condition;
62         bool               in_else;
63         /** conditional in skip mode (then+else gets skipped) */
64         bool               skip;
65         pp_conditional_t  *parent;
66 };
67
68 typedef struct pp_input_t pp_input_t;
69 struct pp_input_t {
70         FILE              *file;
71         input_t           *input;
72         utf32              c;
73         utf32              buf[1024+MAX_PUTBACK];
74         const utf32       *bufend;
75         const utf32       *bufpos;
76         source_position_t  position;
77         pp_input_t        *parent;
78         unsigned           output_line;
79 };
80
81 typedef struct searchpath_entry_t searchpath_entry_t;
82 struct searchpath_entry_t {
83         const char         *path;
84         searchpath_entry_t *next;
85 };
86
87 static pp_input_t      input;
88
89 static pp_input_t     *input_stack;
90 static unsigned        n_inputs;
91 static struct obstack  input_obstack;
92
93 static pp_conditional_t *conditional_stack;
94
95 token_t                  pp_token;
96 bool                     allow_dollar_in_symbol   = true;
97 static bool              resolve_escape_sequences = true;
98 static bool              error_on_unknown_chars   = true;
99 static bool              skip_mode;
100 static FILE             *out;
101 static struct obstack    pp_obstack;
102 static struct obstack    config_obstack;
103 static const char       *printed_input_name = NULL;
104 static source_position_t expansion_pos;
105 static pp_definition_t  *current_expansion  = NULL;
106 static pp_definition_t  *current_call       = NULL;
107 static pp_definition_t  *current_argument   = NULL;
108 static pp_definition_t  *argument_expanding = NULL;
109 static unsigned          argument_brace_count;
110 static strset_t          stringset;
111 static token_kind_t      last_token;
112
113 static searchpath_entry_t *searchpath;
114
115 static whitespace_info_t next_info; /* valid if had_whitespace is true */
116 static whitespace_info_t info;
117
118 static inline void next_char(void);
119 static void next_input_token(void);
120 static void print_line_directive(const source_position_t *pos, const char *add);
121
122 static symbol_t *symbol_colongreater;
123 static symbol_t *symbol_lesscolon;
124 static symbol_t *symbol_lesspercent;
125 static symbol_t *symbol_percentcolon;
126 static symbol_t *symbol_percentcolonpercentcolon;
127 static symbol_t *symbol_percentgreater;
128
129 static void init_symbols(void)
130 {
131         symbol_colongreater             = symbol_table_insert(":>");
132         symbol_lesscolon                = symbol_table_insert("<:");
133         symbol_lesspercent              = symbol_table_insert("<%");
134         symbol_percentcolon             = symbol_table_insert("%:");
135         symbol_percentcolonpercentcolon = symbol_table_insert("%:%:");
136         symbol_percentgreater           = symbol_table_insert("%>");
137 }
138
139 void switch_input(FILE *const file, char const *const filename)
140 {
141         input.file                = file;
142         input.input               = input_from_stream(file, NULL);
143         input.bufend              = NULL;
144         input.bufpos              = NULL;
145         input.output_line         = 0;
146         input.position.input_name = filename;
147         input.position.lineno     = 1;
148
149         /* indicate that we're at a new input */
150         print_line_directive(&input.position, input_stack != NULL ? "1" : NULL);
151
152         /* place a virtual '\n' so we realize we're at line begin */
153         input.position.lineno = 0;
154         input.c               = '\n';
155 }
156
157 FILE* close_input(void)
158 {
159         input_free(input.input);
160
161         FILE* const file = input.file;
162         assert(file);
163
164         input.input  = NULL;
165         input.file   = NULL;
166         input.bufend = NULL;
167         input.bufpos = NULL;
168         input.c      = EOF;
169
170         return file;
171 }
172
173 static void push_input(void)
174 {
175         pp_input_t *saved_input
176                 = obstack_alloc(&input_obstack, sizeof(*saved_input));
177
178         memcpy(saved_input, &input, sizeof(*saved_input));
179
180         /* adjust buffer positions */
181         if (input.bufpos != NULL)
182                 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
183         if (input.bufend != NULL)
184                 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
185
186         saved_input->parent = input_stack;
187         input_stack         = saved_input;
188         ++n_inputs;
189 }
190
191 static void pop_restore_input(void)
192 {
193         assert(n_inputs > 0);
194         assert(input_stack != NULL);
195
196         pp_input_t *saved_input = input_stack;
197
198         memcpy(&input, saved_input, sizeof(input));
199         input.parent = NULL;
200
201         /* adjust buffer positions */
202         if (saved_input->bufpos != NULL)
203                 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
204         if (saved_input->bufend != NULL)
205                 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
206
207         input_stack = saved_input->parent;
208         obstack_free(&input_obstack, saved_input);
209         --n_inputs;
210 }
211
212 /**
213  * Prints a parse error message at the current token.
214  *
215  * @param msg   the error message
216  */
217 static void parse_error(const char *msg)
218 {
219         errorf(&pp_token.base.source_position,  "%s", msg);
220 }
221
222 static inline void next_real_char(void)
223 {
224         assert(input.bufpos <= input.bufend);
225         if (input.bufpos >= input.bufend) {
226                 size_t const n = decode(input.input, input.buf + MAX_PUTBACK, lengthof(input.buf) - MAX_PUTBACK);
227                 if (n == 0) {
228                         input.c = EOF;
229                         return;
230                 }
231                 input.bufpos = input.buf + MAX_PUTBACK;
232                 input.bufend = input.bufpos + n;
233         }
234         input.c = *input.bufpos++;
235         ++input.position.colno;
236 }
237
238 /**
239  * Put a character back into the buffer.
240  *
241  * @param pc  the character to put back
242  */
243 static inline void put_back(utf32 const pc)
244 {
245         assert(input.bufpos > input.buf);
246         *(--input.bufpos - input.buf + input.buf) = (char) pc;
247         --input.position.colno;
248 }
249
250 #define NEWLINE \
251         '\r': \
252                 next_char(); \
253                 if (input.c == '\n') { \
254         case '\n': \
255                         next_char(); \
256                 } \
257                 ++input.position.lineno; \
258                 input.position.colno = 1; \
259                 goto newline; \
260                 newline // Let it look like an ordinary case label.
261
262 #define eat(c_type) (assert(input.c == c_type), next_char())
263
264 static void maybe_concat_lines(void)
265 {
266         eat('\\');
267
268         switch (input.c) {
269         case NEWLINE:
270                 info.whitespace_at_line_begin = 0;
271                 return;
272
273         default:
274                 break;
275         }
276
277         put_back(input.c);
278         input.c = '\\';
279 }
280
281 /**
282  * Set c to the next input character, ie.
283  * after expanding trigraphs.
284  */
285 static inline void next_char(void)
286 {
287         next_real_char();
288
289         /* filter trigraphs and concatenated lines */
290         if (UNLIKELY(input.c == '\\')) {
291                 maybe_concat_lines();
292                 goto end_of_next_char;
293         }
294
295         if (LIKELY(input.c != '?'))
296                 goto end_of_next_char;
297
298         next_real_char();
299         if (LIKELY(input.c != '?')) {
300                 put_back(input.c);
301                 input.c = '?';
302                 goto end_of_next_char;
303         }
304
305         next_real_char();
306         switch (input.c) {
307         case '=': input.c = '#'; break;
308         case '(': input.c = '['; break;
309         case '/': input.c = '\\'; maybe_concat_lines(); break;
310         case ')': input.c = ']'; break;
311         case '\'': input.c = '^'; break;
312         case '<': input.c = '{'; break;
313         case '!': input.c = '|'; break;
314         case '>': input.c = '}'; break;
315         case '-': input.c = '~'; break;
316         default:
317                 put_back(input.c);
318                 put_back('?');
319                 input.c = '?';
320                 break;
321         }
322
323 end_of_next_char:;
324 #ifdef DEBUG_CHARS
325         printf("nchar '%c'\n", input.c);
326 #endif
327 }
328
329
330
331 /**
332  * Returns true if the given char is a octal digit.
333  *
334  * @param char  the character to check
335  */
336 static inline bool is_octal_digit(int chr)
337 {
338         switch (chr) {
339         case '0':
340         case '1':
341         case '2':
342         case '3':
343         case '4':
344         case '5':
345         case '6':
346         case '7':
347                 return true;
348         default:
349                 return false;
350         }
351 }
352
353 /**
354  * Returns the value of a digit.
355  * The only portable way to do it ...
356  */
357 static int digit_value(int digit)
358 {
359         switch (digit) {
360         case '0': return 0;
361         case '1': return 1;
362         case '2': return 2;
363         case '3': return 3;
364         case '4': return 4;
365         case '5': return 5;
366         case '6': return 6;
367         case '7': return 7;
368         case '8': return 8;
369         case '9': return 9;
370         case 'a':
371         case 'A': return 10;
372         case 'b':
373         case 'B': return 11;
374         case 'c':
375         case 'C': return 12;
376         case 'd':
377         case 'D': return 13;
378         case 'e':
379         case 'E': return 14;
380         case 'f':
381         case 'F': return 15;
382         default:
383                 panic("wrong character given");
384         }
385 }
386
387 /**
388  * Parses an octal character sequence.
389  *
390  * @param first_digit  the already read first digit
391  */
392 static utf32 parse_octal_sequence(const utf32 first_digit)
393 {
394         assert(is_octal_digit(first_digit));
395         utf32 value = digit_value(first_digit);
396         if (!is_octal_digit(input.c)) return value;
397         value = 8 * value + digit_value(input.c);
398         next_char();
399         if (!is_octal_digit(input.c)) return value;
400         value = 8 * value + digit_value(input.c);
401         next_char();
402         return value;
403
404 }
405
406 /**
407  * Parses a hex character sequence.
408  */
409 static utf32 parse_hex_sequence(void)
410 {
411         utf32 value = 0;
412         while (isxdigit(input.c)) {
413                 value = 16 * value + digit_value(input.c);
414                 next_char();
415         }
416         return value;
417 }
418
419 static bool is_universal_char_valid(utf32 const v)
420 {
421         /* C11 Â§6.4.3:2 */
422         if (v < 0xA0U && v != 0x24 && v != 0x40 && v != 0x60)
423                 return false;
424         if (0xD800 <= v && v <= 0xDFFF)
425                 return false;
426         return true;
427 }
428
429 static utf32 parse_universal_char(unsigned const n_digits)
430 {
431         utf32 v = 0;
432         for (unsigned k = n_digits; k != 0; --k) {
433                 if (isxdigit(input.c)) {
434                         v = 16 * v + digit_value(input.c);
435                         if (!resolve_escape_sequences)
436                                 obstack_1grow(&symbol_obstack, input.c);
437                         next_char();
438                 } else {
439                         errorf(&input.position,
440                                "short universal character name, expected %u more digits",
441                                    k);
442                         break;
443                 }
444         }
445         if (!is_universal_char_valid(v)) {
446                 errorf(&input.position,
447                        "\\%c%0*X is not a valid universal character name",
448                        n_digits == 4 ? 'u' : 'U', (int)n_digits, v);
449         }
450         return v;
451 }
452
453 static bool is_universal_char_valid_identifier(utf32 const v)
454 {
455         /* C11 Annex D.1 */
456         if (                v == 0x000A8) return true;
457         if (                v == 0x000AA) return true;
458         if (                v == 0x000AD) return true;
459         if (                v == 0x000AF) return true;
460         if (0x000B2 <= v && v <= 0x000B5) return true;
461         if (0x000B7 <= v && v <= 0x000BA) return true;
462         if (0x000BC <= v && v <= 0x000BE) return true;
463         if (0x000C0 <= v && v <= 0x000D6) return true;
464         if (0x000D8 <= v && v <= 0x000F6) return true;
465         if (0x000F8 <= v && v <= 0x000FF) return true;
466         if (0x00100 <= v && v <= 0x0167F) return true;
467         if (0x01681 <= v && v <= 0x0180D) return true;
468         if (0x0180F <= v && v <= 0x01FFF) return true;
469         if (0x0200B <= v && v <= 0x0200D) return true;
470         if (0x0202A <= v && v <= 0x0202E) return true;
471         if (0x0203F <= v && v <= 0x02040) return true;
472         if (                v == 0x02054) return true;
473         if (0x02060 <= v && v <= 0x0206F) return true;
474         if (0x02070 <= v && v <= 0x0218F) return true;
475         if (0x02460 <= v && v <= 0x024FF) return true;
476         if (0x02776 <= v && v <= 0x02793) return true;
477         if (0x02C00 <= v && v <= 0x02DFF) return true;
478         if (0x02E80 <= v && v <= 0x02FFF) return true;
479         if (0x03004 <= v && v <= 0x03007) return true;
480         if (0x03021 <= v && v <= 0x0302F) return true;
481         if (0x03031 <= v && v <= 0x0303F) return true;
482         if (0x03040 <= v && v <= 0x0D7FF) return true;
483         if (0x0F900 <= v && v <= 0x0FD3D) return true;
484         if (0x0FD40 <= v && v <= 0x0FDCF) return true;
485         if (0x0FDF0 <= v && v <= 0x0FE44) return true;
486         if (0x0FE47 <= v && v <= 0x0FFFD) return true;
487         if (0x10000 <= v && v <= 0x1FFFD) return true;
488         if (0x20000 <= v && v <= 0x2FFFD) return true;
489         if (0x30000 <= v && v <= 0x3FFFD) return true;
490         if (0x40000 <= v && v <= 0x4FFFD) return true;
491         if (0x50000 <= v && v <= 0x5FFFD) return true;
492         if (0x60000 <= v && v <= 0x6FFFD) return true;
493         if (0x70000 <= v && v <= 0x7FFFD) return true;
494         if (0x80000 <= v && v <= 0x8FFFD) return true;
495         if (0x90000 <= v && v <= 0x9FFFD) return true;
496         if (0xA0000 <= v && v <= 0xAFFFD) return true;
497         if (0xB0000 <= v && v <= 0xBFFFD) return true;
498         if (0xC0000 <= v && v <= 0xCFFFD) return true;
499         if (0xD0000 <= v && v <= 0xDFFFD) return true;
500         if (0xE0000 <= v && v <= 0xEFFFD) return true;
501         return false;
502 }
503
504 static bool is_universal_char_valid_identifier_start(utf32 const v)
505 {
506         /* C11 Annex D.2 */
507         if (0x0300 <= v && v <= 0x036F) return false;
508         if (0x1DC0 <= v && v <= 0x1DFF) return false;
509         if (0x20D0 <= v && v <= 0x20FF) return false;
510         if (0xFE20 <= v && v <= 0xFE2F) return false;
511         return true;
512 }
513
514 /**
515  * Parse an escape sequence.
516  */
517 static utf32 parse_escape_sequence(void)
518 {
519         eat('\\');
520
521         utf32 const ec = input.c;
522         next_char();
523
524         switch (ec) {
525         case '"':  return '"';
526         case '\'': return '\'';
527         case '\\': return '\\';
528         case '?': return '\?';
529         case 'a': return '\a';
530         case 'b': return '\b';
531         case 'f': return '\f';
532         case 'n': return '\n';
533         case 'r': return '\r';
534         case 't': return '\t';
535         case 'v': return '\v';
536         case 'x':
537                 return parse_hex_sequence();
538         case '0':
539         case '1':
540         case '2':
541         case '3':
542         case '4':
543         case '5':
544         case '6':
545         case '7':
546                 return parse_octal_sequence(ec);
547         case EOF:
548                 parse_error("reached end of file while parsing escape sequence");
549                 return EOF;
550         /* \E is not documented, but handled, by GCC.  It is acceptable according
551          * to Â§6.11.4, whereas \e is not. */
552         case 'E':
553         case 'e':
554                 if (c_mode & _GNUC)
555                         return 27;   /* hopefully 27 is ALWAYS the code for ESCAPE */
556                 break;
557
558         case 'U': return parse_universal_char(8);
559         case 'u': return parse_universal_char(4);
560
561         default:
562                 break;
563         }
564         /* Â§6.4.4.4:8 footnote 64 */
565         parse_error("unknown escape sequence");
566         return EOF;
567 }
568
569 static const char *identify_string(char *string)
570 {
571         const char *result = strset_insert(&stringset, string);
572         if (result != string) {
573                 obstack_free(&symbol_obstack, string);
574         }
575         return result;
576 }
577
578 static string_t sym_make_string(string_encoding_t const enc)
579 {
580         obstack_1grow(&symbol_obstack, '\0');
581         size_t      const len    = obstack_object_size(&symbol_obstack) - 1;
582         char       *const string = obstack_finish(&symbol_obstack);
583         char const *const result = identify_string(string);
584         return (string_t){ result, len, enc };
585 }
586
587 string_t make_string(char const *const string)
588 {
589         obstack_grow(&symbol_obstack, string, strlen(string));
590         return sym_make_string(STRING_ENCODING_CHAR);
591 }
592
593 static void parse_string(utf32 const delimiter, token_kind_t const kind,
594                          string_encoding_t const enc,
595                          char const *const context)
596 {
597         const unsigned start_linenr = input.position.lineno;
598
599         eat(delimiter);
600
601         while (true) {
602                 switch (input.c) {
603                 case '\\': {
604                         if (resolve_escape_sequences) {
605                                 utf32 const tc = parse_escape_sequence();
606                                 if (enc == STRING_ENCODING_CHAR) {
607                                         if (tc >= 0x100) {
608                                                 warningf(WARN_OTHER, &pp_token.base.source_position, "escape sequence out of range");
609                                         }
610                                         obstack_1grow(&symbol_obstack, tc);
611                                 } else {
612                                         obstack_grow_utf8(&symbol_obstack, tc);
613                                 }
614                         } else {
615                                 obstack_1grow(&symbol_obstack, (char)input.c);
616                                 next_char();
617                                 obstack_1grow(&symbol_obstack, (char)input.c);
618                                 next_char();
619                         }
620                         break;
621                 }
622
623                 case NEWLINE:
624                         errorf(&pp_token.base.source_position, "newline while parsing %s", context);
625                         break;
626
627                 case EOF: {
628                         source_position_t source_position;
629                         source_position.input_name = pp_token.base.source_position.input_name;
630                         source_position.lineno     = start_linenr;
631                         errorf(&source_position, "EOF while parsing %s", context);
632                         goto end_of_string;
633                 }
634
635                 default:
636                         if (input.c == delimiter) {
637                                 next_char();
638                                 goto end_of_string;
639                         } else {
640                                 obstack_grow_utf8(&symbol_obstack, input.c);
641                                 next_char();
642                                 break;
643                         }
644                 }
645         }
646
647 end_of_string:
648         pp_token.kind           = kind;
649         pp_token.literal.string = sym_make_string(enc);
650 }
651
652 static void parse_string_literal(string_encoding_t const enc)
653 {
654         parse_string('"', T_STRING_LITERAL, enc, "string literal");
655 }
656
657 static void parse_character_constant(string_encoding_t const enc)
658 {
659         parse_string('\'', T_CHARACTER_CONSTANT, enc, "character constant");
660         if (pp_token.literal.string.size == 0) {
661                 parse_error("empty character constant");
662         }
663 }
664
665 #define SYMBOL_CASES_WITHOUT_E_P \
666              '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
667         case 'a': \
668         case 'b': \
669         case 'c': \
670         case 'd': \
671         case 'f': \
672         case 'g': \
673         case 'h': \
674         case 'i': \
675         case 'j': \
676         case 'k': \
677         case 'l': \
678         case 'm': \
679         case 'n': \
680         case 'o': \
681         case 'q': \
682         case 'r': \
683         case 's': \
684         case 't': \
685         case 'u': \
686         case 'v': \
687         case 'w': \
688         case 'x': \
689         case 'y': \
690         case 'z': \
691         case 'A': \
692         case 'B': \
693         case 'C': \
694         case 'D': \
695         case 'F': \
696         case 'G': \
697         case 'H': \
698         case 'I': \
699         case 'J': \
700         case 'K': \
701         case 'L': \
702         case 'M': \
703         case 'N': \
704         case 'O': \
705         case 'Q': \
706         case 'R': \
707         case 'S': \
708         case 'T': \
709         case 'U': \
710         case 'V': \
711         case 'W': \
712         case 'X': \
713         case 'Y': \
714         case 'Z': \
715         case '_'
716
717 #define SYMBOL_CASES \
718              SYMBOL_CASES_WITHOUT_E_P: \
719         case 'e': \
720         case 'p': \
721         case 'E': \
722         case 'P'
723
724 #define DIGIT_CASES \
725              '0':  \
726         case '1':  \
727         case '2':  \
728         case '3':  \
729         case '4':  \
730         case '5':  \
731         case '6':  \
732         case '7':  \
733         case '8':  \
734         case '9'
735
736 static void start_expanding(pp_definition_t *definition)
737 {
738         definition->parent_expansion = current_expansion;
739         definition->expand_pos       = 0;
740         definition->is_expanding     = true;
741         if (definition->list_len > 0) {
742                 definition->token_list[0].had_whitespace
743                         = info.had_whitespace;
744         }
745         current_expansion = definition;
746 }
747
748 static void finished_expanding(pp_definition_t *definition)
749 {
750         assert(definition->is_expanding);
751         pp_definition_t *parent = definition->parent_expansion;
752         definition->parent_expansion = NULL;
753         definition->is_expanding     = false;
754
755         /* stop further expanding once we expanded a parameter used in a
756          * sub macro-call */
757         if (definition == argument_expanding)
758                 argument_expanding = NULL;
759
760         assert(current_expansion == definition);
761         current_expansion = parent;
762 }
763
764 static inline void set_punctuator(token_kind_t const kind)
765 {
766         pp_token.kind        = kind;
767         pp_token.base.symbol = token_symbols[kind];
768 }
769
770 static inline void set_digraph(token_kind_t const kind, symbol_t *const symbol)
771 {
772         pp_token.kind        = kind;
773         pp_token.base.symbol = symbol;
774 }
775
776 /**
777  * returns next final token from a preprocessor macro expansion
778  */
779 static bool expand_next(void)
780 {
781         if (current_expansion == NULL)
782                 return false;
783
784 restart:;
785         size_t pos = current_expansion->expand_pos;
786         if (pos >= current_expansion->list_len) {
787                 finished_expanding(current_expansion);
788                 /* it was the outermost expansion, parse pptoken normally */
789                 if (current_expansion == NULL) {
790                         return false;
791                 }
792                 goto restart;
793         }
794         const saved_token_t *saved = &current_expansion->token_list[pos++];
795         pp_token = saved->token;
796
797         if (current_expansion->expand_pos > 0)
798                 info.had_whitespace = saved->had_whitespace;
799         pp_token.base.source_position = expansion_pos;
800         ++current_expansion->expand_pos;
801
802         return true;
803 }
804
805 /**
806  * Returns the next token kind found when continuing the current expansions
807  * without starting new sub-expansions.
808  */
809 static token_kind_t peek_expansion(void)
810 {
811         pp_definition_t *expansion = current_expansion;
812         while (expansion != NULL && expansion->expand_pos >= expansion->list_len) {
813                 expansion = expansion->parent_expansion;
814         }
815         if (expansion == NULL)
816                 return T_EOF;
817         return expansion->token_list[expansion->expand_pos].token.kind;
818 }
819
820 static void skip_line_comment(void)
821 {
822         info.had_whitespace = true;
823         while (true) {
824                 switch (input.c) {
825                 case EOF:
826                         return;
827
828                 case '\r':
829                 case '\n':
830                         return;
831
832                 default:
833                         next_char();
834                         break;
835                 }
836         }
837 }
838
839 static void skip_multiline_comment(void)
840 {
841         info.had_whitespace = true;
842
843         unsigned start_linenr = input.position.lineno;
844         while (true) {
845                 switch (input.c) {
846                 case '/':
847                         next_char();
848                         if (input.c == '*') {
849                                 /* TODO: nested comment, warn here */
850                         }
851                         break;
852                 case '*':
853                         next_char();
854                         if (input.c == '/') {
855                                 if (input.position.lineno != input.output_line)
856                                         info.whitespace_at_line_begin = input.position.colno;
857                                 next_char();
858                                 return;
859                         }
860                         break;
861
862                 case NEWLINE:
863                         break;
864
865                 case EOF: {
866                         source_position_t source_position;
867                         source_position.input_name = pp_token.base.source_position.input_name;
868                         source_position.lineno     = start_linenr;
869                         errorf(&source_position, "at end of file while looking for comment end");
870                         return;
871                 }
872
873                 default:
874                         next_char();
875                         break;
876                 }
877         }
878 }
879
880 static bool skip_till_newline(bool stop_at_non_whitespace)
881 {
882         bool res = false;
883         while (true) {
884                 switch (input.c) {
885                 case ' ':
886                 case '\t':
887                         next_char();
888                         continue;
889
890                 case '/':
891                         next_char();
892                         if (input.c == '/') {
893                                 next_char();
894                                 skip_line_comment();
895                                 continue;
896                         } else if (input.c == '*') {
897                                 next_char();
898                                 skip_multiline_comment();
899                                 continue;
900                         } else {
901                                 put_back(input.c);
902                                 input.c = '/';
903                         }
904                         return true;
905
906                 case NEWLINE:
907                         return res;
908
909                 default:
910                         if (stop_at_non_whitespace)
911                                 return false;
912                         res = true;
913                         next_char();
914                         continue;
915                 }
916         }
917 }
918
919 static void skip_whitespace(void)
920 {
921         while (true) {
922                 switch (input.c) {
923                 case ' ':
924                 case '\t':
925                         ++info.whitespace_at_line_begin;
926                         info.had_whitespace = true;
927                         next_char();
928                         continue;
929
930                 case NEWLINE:
931                         info.at_line_begin  = true;
932                         info.had_whitespace = true;
933                         info.whitespace_at_line_begin = 0;
934                         continue;
935
936                 case '/':
937                         next_char();
938                         if (input.c == '/') {
939                                 next_char();
940                                 skip_line_comment();
941                                 continue;
942                         } else if (input.c == '*') {
943                                 next_char();
944                                 skip_multiline_comment();
945                                 continue;
946                         } else {
947                                 put_back(input.c);
948                                 input.c = '/';
949                         }
950                         return;
951
952                 default:
953                         return;
954                 }
955         }
956 }
957
958 static inline void eat_pp(pp_token_kind_t const kind)
959 {
960         assert(pp_token.base.symbol->pp_ID == kind);
961         (void) kind;
962         next_input_token();
963 }
964
965 static inline void eat_token(token_kind_t const kind)
966 {
967         assert(pp_token.kind == kind);
968         (void)kind;
969         next_input_token();
970 }
971
972 static void parse_symbol(void)
973 {
974         assert(obstack_object_size(&symbol_obstack) == 0);
975         while (true) {
976                 switch (input.c) {
977                 case DIGIT_CASES:
978                 case SYMBOL_CASES:
979                         obstack_1grow(&symbol_obstack, (char) input.c);
980                         next_char();
981                         break;
982
983                 case '\\':
984                         next_char();
985                         switch (input.c) {
986                         {
987                                 unsigned n;
988                         case 'U': n = 8; goto universal;
989                         case 'u': n = 4; goto universal;
990 universal:
991                                 if (!resolve_escape_sequences) {
992                                         obstack_1grow(&symbol_obstack, '\\');
993                                         obstack_1grow(&symbol_obstack, input.c);
994                                 }
995                                 next_char();
996                                 utf32 const v = parse_universal_char(n);
997                                 if (!is_universal_char_valid_identifier(v)) {
998                                         if (is_universal_char_valid(v)) {
999                                                 errorf(&input.position,
1000                                                            "universal character \\%c%0*X is not valid in an identifier",
1001                                                            n == 4 ? 'u' : 'U', (int)n, v);
1002                                         }
1003                                 } else if (obstack_object_size(&symbol_obstack) == 0 && !is_universal_char_valid_identifier_start(v)) {
1004                                         errorf(&input.position,
1005                                                    "universal character \\%c%0*X is not valid as start of an identifier",
1006                                                    n == 4 ? 'u' : 'U', (int)n, v);
1007                                 } else if (resolve_escape_sequences) {
1008                                         obstack_grow_utf8(&symbol_obstack, v);
1009                                 }
1010                                 break;
1011                         }
1012
1013                         default:
1014                                 put_back(input.c);
1015                                 input.c = '\\';
1016                                 goto end_symbol;
1017                         }
1018
1019                 default:
1020 dollar_sign:
1021                         goto end_symbol;
1022                 }
1023         }
1024
1025 end_symbol:
1026         obstack_1grow(&symbol_obstack, '\0');
1027         char *string = obstack_finish(&symbol_obstack);
1028
1029         /* might be a wide string or character constant ( L"string"/L'c' ) */
1030         if (input.c == '"' && string[0] == 'L' && string[1] == '\0') {
1031                 obstack_free(&symbol_obstack, string);
1032                 parse_string_literal(STRING_ENCODING_WIDE);
1033                 return;
1034         } else if (input.c == '\'' && string[0] == 'L' && string[1] == '\0') {
1035                 obstack_free(&symbol_obstack, string);
1036                 parse_character_constant(STRING_ENCODING_WIDE);
1037                 return;
1038         }
1039
1040         symbol_t *symbol = symbol_table_insert(string);
1041
1042         pp_token.kind        = symbol->ID;
1043         pp_token.base.symbol = symbol;
1044
1045         /* we can free the memory from symbol obstack if we already had an entry in
1046          * the symbol table */
1047         if (symbol->string != string) {
1048                 obstack_free(&symbol_obstack, string);
1049         }
1050 }
1051
1052 static void parse_number(void)
1053 {
1054         obstack_1grow(&symbol_obstack, (char) input.c);
1055         next_char();
1056
1057         while (true) {
1058                 switch (input.c) {
1059                 case '.':
1060                 case DIGIT_CASES:
1061                 case SYMBOL_CASES_WITHOUT_E_P:
1062                         obstack_1grow(&symbol_obstack, (char) input.c);
1063                         next_char();
1064                         break;
1065
1066                 case 'e':
1067                 case 'p':
1068                 case 'E':
1069                 case 'P':
1070                         obstack_1grow(&symbol_obstack, (char) input.c);
1071                         next_char();
1072                         if (input.c == '+' || input.c == '-') {
1073                                 obstack_1grow(&symbol_obstack, (char) input.c);
1074                                 next_char();
1075                         }
1076                         break;
1077
1078                 default:
1079 dollar_sign:
1080                         goto end_number;
1081                 }
1082         }
1083
1084 end_number:
1085         pp_token.kind           = T_NUMBER;
1086         pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
1087 }
1088
1089 #define MAYBE_PROLOG \
1090         next_char(); \
1091         switch (input.c) {
1092
1093 #define MAYBE(ch, kind) \
1094         case ch: \
1095                 next_char(); \
1096                 set_punctuator(kind); \
1097                 return;
1098
1099 #define MAYBE_DIGRAPH(ch, kind, symbol) \
1100         case ch: \
1101                 next_char(); \
1102                 set_digraph(kind, symbol); \
1103                 return;
1104
1105 #define ELSE_CODE(code) \
1106         default: \
1107                 code \
1108                 return; \
1109         }
1110
1111 #define ELSE(kind) ELSE_CODE(set_punctuator(kind);)
1112
1113 /** identifies and returns the next preprocessing token contained in the
1114  * input stream. No macro expansion is performed. */
1115 static void next_input_token(void)
1116 {
1117         if (next_info.had_whitespace) {
1118                 info = next_info;
1119                 next_info.had_whitespace = false;
1120         } else {
1121                 info.at_line_begin  = false;
1122                 info.had_whitespace = false;
1123         }
1124 restart:
1125         pp_token.base.source_position = input.position;
1126         pp_token.base.symbol          = NULL;
1127
1128         switch (input.c) {
1129         case ' ':
1130         case '\t':
1131                 info.whitespace_at_line_begin++;
1132                 info.had_whitespace = true;
1133                 next_char();
1134                 goto restart;
1135
1136         case NEWLINE:
1137                 info.at_line_begin            = true;
1138                 info.had_whitespace           = true;
1139                 info.whitespace_at_line_begin = 0;
1140                 goto restart;
1141
1142         case SYMBOL_CASES:
1143                 parse_symbol();
1144                 return;
1145
1146         case DIGIT_CASES:
1147                 parse_number();
1148                 return;
1149
1150         case '"':
1151                 parse_string_literal(STRING_ENCODING_CHAR);
1152                 return;
1153
1154         case '\'':
1155                 parse_character_constant(STRING_ENCODING_CHAR);
1156                 return;
1157
1158         case '.':
1159                 MAYBE_PROLOG
1160                         case '0':
1161                         case '1':
1162                         case '2':
1163                         case '3':
1164                         case '4':
1165                         case '5':
1166                         case '6':
1167                         case '7':
1168                         case '8':
1169                         case '9':
1170                                 put_back(input.c);
1171                                 input.c = '.';
1172                                 parse_number();
1173                                 return;
1174
1175                         case '.':
1176                                 MAYBE_PROLOG
1177                                 MAYBE('.', T_DOTDOTDOT)
1178                                 ELSE_CODE(
1179                                         put_back(input.c);
1180                                         input.c = '.';
1181                                         set_punctuator('.');
1182                                 )
1183                 ELSE('.')
1184         case '&':
1185                 MAYBE_PROLOG
1186                 MAYBE('&', T_ANDAND)
1187                 MAYBE('=', T_ANDEQUAL)
1188                 ELSE('&')
1189         case '*':
1190                 MAYBE_PROLOG
1191                 MAYBE('=', T_ASTERISKEQUAL)
1192                 ELSE('*')
1193         case '+':
1194                 MAYBE_PROLOG
1195                 MAYBE('+', T_PLUSPLUS)
1196                 MAYBE('=', T_PLUSEQUAL)
1197                 ELSE('+')
1198         case '-':
1199                 MAYBE_PROLOG
1200                 MAYBE('>', T_MINUSGREATER)
1201                 MAYBE('-', T_MINUSMINUS)
1202                 MAYBE('=', T_MINUSEQUAL)
1203                 ELSE('-')
1204         case '!':
1205                 MAYBE_PROLOG
1206                 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
1207                 ELSE('!')
1208         case '/':
1209                 MAYBE_PROLOG
1210                 MAYBE('=', T_SLASHEQUAL)
1211                 case '*':
1212                         next_char();
1213                         skip_multiline_comment();
1214                         goto restart;
1215                 case '/':
1216                         next_char();
1217                         skip_line_comment();
1218                         goto restart;
1219                 ELSE('/')
1220         case '%':
1221                 MAYBE_PROLOG
1222                 MAYBE_DIGRAPH('>', '}', symbol_percentgreater)
1223                 MAYBE('=', T_PERCENTEQUAL)
1224                 case ':':
1225                         MAYBE_PROLOG
1226                         case '%':
1227                                 MAYBE_PROLOG
1228                                 MAYBE_DIGRAPH(':', T_HASHHASH, symbol_percentcolonpercentcolon)
1229                                 ELSE_CODE(
1230                                         put_back(input.c);
1231                                         input.c = '%';
1232                                         goto digraph_percentcolon;
1233                                 )
1234                         ELSE_CODE(
1235 digraph_percentcolon:
1236                                 set_digraph('#', symbol_percentcolon);
1237                         )
1238                 ELSE('%')
1239         case '<':
1240                 MAYBE_PROLOG
1241                 MAYBE_DIGRAPH(':', '[', symbol_lesscolon)
1242                 MAYBE_DIGRAPH('%', '{', symbol_lesspercent)
1243                 MAYBE('=', T_LESSEQUAL)
1244                 case '<':
1245                         MAYBE_PROLOG
1246                         MAYBE('=', T_LESSLESSEQUAL)
1247                         ELSE(T_LESSLESS)
1248                 ELSE('<')
1249         case '>':
1250                 MAYBE_PROLOG
1251                 MAYBE('=', T_GREATEREQUAL)
1252                 case '>':
1253                         MAYBE_PROLOG
1254                         MAYBE('=', T_GREATERGREATEREQUAL)
1255                         ELSE(T_GREATERGREATER)
1256                 ELSE('>')
1257         case '^':
1258                 MAYBE_PROLOG
1259                 MAYBE('=', T_CARETEQUAL)
1260                 ELSE('^')
1261         case '|':
1262                 MAYBE_PROLOG
1263                 MAYBE('=', T_PIPEEQUAL)
1264                 MAYBE('|', T_PIPEPIPE)
1265                 ELSE('|')
1266         case ':':
1267                 MAYBE_PROLOG
1268                 MAYBE_DIGRAPH('>', ']', symbol_colongreater)
1269                 case ':':
1270                         if (c_mode & _CXX) {
1271                                 next_char();
1272                                 set_punctuator(T_COLONCOLON);
1273                                 return;
1274                         }
1275                         /* FALLTHROUGH */
1276                 ELSE(':')
1277         case '=':
1278                 MAYBE_PROLOG
1279                 MAYBE('=', T_EQUALEQUAL)
1280                 ELSE('=')
1281         case '#':
1282                 MAYBE_PROLOG
1283                 MAYBE('#', T_HASHHASH)
1284                 ELSE('#')
1285
1286         case '?':
1287         case '[':
1288         case ']':
1289         case '(':
1290         case ')':
1291         case '{':
1292         case '}':
1293         case '~':
1294         case ';':
1295         case ',':
1296                 set_punctuator(input.c);
1297                 next_char();
1298                 return;
1299
1300         case EOF:
1301                 if (input_stack != NULL) {
1302                         fclose(close_input());
1303                         pop_restore_input();
1304                         fputc('\n', out);
1305                         if (input.c == (utf32)EOF)
1306                                 --input.position.lineno;
1307                         print_line_directive(&input.position, "2");
1308                         goto restart;
1309                 } else {
1310                         info.at_line_begin = true;
1311                         set_punctuator(T_EOF);
1312                 }
1313                 return;
1314
1315         case '\\':
1316                 next_char();
1317                 int next_c = input.c;
1318                 put_back(input.c);
1319                 input.c = '\\';
1320                 if (next_c == 'U' || next_c == 'u') {
1321                         parse_symbol();
1322                         return;
1323                 }
1324                 /* FALLTHROUGH */
1325         default:
1326 dollar_sign:
1327                 if (error_on_unknown_chars) {
1328                         errorf(&pp_token.base.source_position,
1329                                "unknown character '%lc' found\n", input.c);
1330                         next_char();
1331                         goto restart;
1332                 } else {
1333                         assert(obstack_object_size(&symbol_obstack) == 0);
1334                         obstack_grow_utf8(&symbol_obstack, input.c);
1335                         obstack_1grow(&symbol_obstack, '\0');
1336                         char     *const string = obstack_finish(&symbol_obstack);
1337                         symbol_t *const symbol = symbol_table_insert(string);
1338                         if (symbol->string != string)
1339                                 obstack_free(&symbol_obstack, string);
1340
1341                         pp_token.kind        = T_UNKNOWN_CHAR;
1342                         pp_token.base.symbol = symbol;
1343                         next_char();
1344                         return;
1345                 }
1346         }
1347 }
1348
1349 static void print_quoted_string(const char *const string)
1350 {
1351         fputc('"', out);
1352         for (const char *c = string; *c != 0; ++c) {
1353                 switch (*c) {
1354                 case '"': fputs("\\\"", out); break;
1355                 case '\\':  fputs("\\\\", out); break;
1356                 case '\a':  fputs("\\a", out); break;
1357                 case '\b':  fputs("\\b", out); break;
1358                 case '\f':  fputs("\\f", out); break;
1359                 case '\n':  fputs("\\n", out); break;
1360                 case '\r':  fputs("\\r", out); break;
1361                 case '\t':  fputs("\\t", out); break;
1362                 case '\v':  fputs("\\v", out); break;
1363                 case '\?':  fputs("\\?", out); break;
1364                 default:
1365                         if (!isprint(*c)) {
1366                                 fprintf(out, "\\%03o", (unsigned)*c);
1367                                 break;
1368                         }
1369                         fputc(*c, out);
1370                         break;
1371                 }
1372         }
1373         fputc('"', out);
1374 }
1375
1376 static void print_line_directive(const source_position_t *pos, const char *add)
1377 {
1378         if (!out)
1379                 return;
1380
1381         fprintf(out, "# %u ", pos->lineno);
1382         print_quoted_string(pos->input_name);
1383         if (add != NULL) {
1384                 fputc(' ', out);
1385                 fputs(add, out);
1386         }
1387
1388         printed_input_name = pos->input_name;
1389         input.output_line  = pos->lineno-1;
1390 }
1391
1392 static bool emit_newlines(void)
1393 {
1394         unsigned delta = pp_token.base.source_position.lineno - input.output_line;
1395         if (delta == 0)
1396                 return false;
1397
1398         if (delta >= 9) {
1399                 fputc('\n', out);
1400                 print_line_directive(&pp_token.base.source_position, NULL);
1401                 fputc('\n', out);
1402         } else {
1403                 for (unsigned i = 0; i < delta; ++i) {
1404                         fputc('\n', out);
1405                 }
1406         }
1407         input.output_line = pp_token.base.source_position.lineno;
1408
1409         for (unsigned i = 0; i < info.whitespace_at_line_begin; ++i)
1410                 fputc(' ', out);
1411
1412         return true;
1413 }
1414
1415 static void emit_pp_token(void)
1416 {
1417         if (!emit_newlines() &&
1418             (info.had_whitespace || tokens_would_paste(last_token, pp_token.kind)))
1419                 fputc(' ', out);
1420
1421         switch (pp_token.kind) {
1422         case T_NUMBER:
1423                 fputs(pp_token.literal.string.begin, out);
1424                 break;
1425
1426         case T_STRING_LITERAL:
1427                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1428                 fputc('"', out);
1429                 fputs(pp_token.literal.string.begin, out);
1430                 fputc('"', out);
1431                 break;
1432
1433         case T_CHARACTER_CONSTANT:
1434                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1435                 fputc('\'', out);
1436                 fputs(pp_token.literal.string.begin, out);
1437                 fputc('\'', out);
1438                 break;
1439
1440         case T_MACRO_PARAMETER:
1441                 panic("macro parameter not expanded");
1442
1443         default:
1444                 fputs(pp_token.base.symbol->string, out);
1445                 break;
1446         }
1447         last_token = pp_token.kind;
1448 }
1449
1450 static void eat_pp_directive(void)
1451 {
1452         while (!info.at_line_begin) {
1453                 next_input_token();
1454         }
1455 }
1456
1457 static bool strings_equal(const string_t *string1, const string_t *string2)
1458 {
1459         size_t size = string1->size;
1460         if (size != string2->size)
1461                 return false;
1462
1463         const char *c1 = string1->begin;
1464         const char *c2 = string2->begin;
1465         for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1466                 if (*c1 != *c2)
1467                         return false;
1468         }
1469         return true;
1470 }
1471
1472 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1473 {
1474         if (token1->kind != token2->kind)
1475                 return false;
1476
1477         switch (token1->kind) {
1478         case T_NUMBER:
1479         case T_CHARACTER_CONSTANT:
1480         case T_STRING_LITERAL:
1481                 return strings_equal(&token1->literal.string, &token2->literal.string);
1482
1483         case T_MACRO_PARAMETER:
1484                 return token1->macro_parameter.def->symbol
1485                     == token2->macro_parameter.def->symbol;
1486
1487         default:
1488                 return token1->base.symbol == token2->base.symbol;
1489         }
1490 }
1491
1492 static bool pp_definitions_equal(const pp_definition_t *definition1,
1493                                  const pp_definition_t *definition2)
1494 {
1495         if (definition1->list_len != definition2->list_len)
1496                 return false;
1497
1498         size_t               len = definition1->list_len;
1499         const saved_token_t *t1  = definition1->token_list;
1500         const saved_token_t *t2  = definition2->token_list;
1501         for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1502                 if (!pp_tokens_equal(&t1->token, &t2->token))
1503                         return false;
1504         }
1505         return true;
1506 }
1507
1508 static void parse_define_directive(void)
1509 {
1510         eat_pp(TP_define);
1511         if (skip_mode) {
1512                 eat_pp_directive();
1513                 return;
1514         }
1515
1516         assert(obstack_object_size(&pp_obstack) == 0);
1517
1518         if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
1519                 errorf(&pp_token.base.source_position,
1520                        "expected identifier after #define, got %K", &pp_token);
1521                 goto error_out;
1522         }
1523         symbol_t *const symbol = pp_token.base.symbol;
1524
1525         pp_definition_t *new_definition
1526                 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1527         memset(new_definition, 0, sizeof(new_definition[0]));
1528         new_definition->symbol          = symbol;
1529         new_definition->source_position = input.position;
1530
1531         /* this is probably the only place where spaces are significant in the
1532          * lexer (except for the fact that they separate tokens). #define b(x)
1533          * is something else than #define b (x) */
1534         if (input.c == '(') {
1535                 eat_token(T_IDENTIFIER);
1536                 eat_token('(');
1537
1538                 while (true) {
1539                         switch (pp_token.kind) {
1540                         case T_DOTDOTDOT:
1541                                 new_definition->is_variadic = true;
1542                                 eat_token(T_DOTDOTDOT);
1543                                 if (pp_token.kind != ')') {
1544                                         errorf(&input.position,
1545                                                         "'...' not at end of macro argument list");
1546                                         goto error_out;
1547                                 }
1548                                 break;
1549
1550                         case T_IDENTIFIER: {
1551                                 pp_definition_t parameter;
1552                                 memset(&parameter, 0, sizeof(parameter));
1553                                 parameter.source_position = pp_token.base.source_position;
1554                                 parameter.symbol          = pp_token.base.symbol;
1555                                 parameter.is_parameter    = true;
1556                                 obstack_grow(&pp_obstack, &parameter, sizeof(parameter));
1557                                 eat_token(T_IDENTIFIER);
1558
1559                                 if (pp_token.kind == ',') {
1560                                         eat_token(',');
1561                                         break;
1562                                 }
1563
1564                                 if (pp_token.kind != ')') {
1565                                         errorf(&pp_token.base.source_position,
1566                                                "expected ',' or ')' after identifier, got %K",
1567                                                &pp_token);
1568                                         goto error_out;
1569                                 }
1570                                 break;
1571                         }
1572
1573                         case ')':
1574                                 eat_token(')');
1575                                 goto finish_argument_list;
1576
1577                         default:
1578                                 errorf(&pp_token.base.source_position,
1579                                        "expected identifier, '...' or ')' in #define argument list, got %K",
1580                                        &pp_token);
1581                                 goto error_out;
1582                         }
1583                 }
1584
1585         finish_argument_list:
1586                 new_definition->has_parameters = true;
1587                 size_t size = obstack_object_size(&pp_obstack);
1588                 new_definition->n_parameters
1589                         = size / sizeof(new_definition->parameters[0]);
1590                 new_definition->parameters = obstack_finish(&pp_obstack);
1591                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1592                         pp_definition_t *param    = &new_definition->parameters[i];
1593                         symbol_t        *symbol   = param->symbol;
1594                         pp_definition_t *previous = symbol->pp_definition;
1595                         if (previous != NULL
1596                             && previous->function_definition == new_definition) {
1597                                 errorf(&param->source_position,
1598                                        "duplicate macro parameter '%Y'", symbol);
1599                                 param->symbol = sym_anonymous;
1600                                 continue;
1601                         }
1602                         param->parent_expansion    = previous;
1603                         param->function_definition = new_definition;
1604                         symbol->pp_definition      = param;
1605                 }
1606         } else {
1607                 eat_token(T_IDENTIFIER);
1608         }
1609
1610         /* construct token list */
1611         assert(obstack_object_size(&pp_obstack) == 0);
1612         while (!info.at_line_begin) {
1613                 if (pp_token.kind == T_IDENTIFIER) {
1614                         const symbol_t  *symbol     = pp_token.base.symbol;
1615                         pp_definition_t *definition = symbol->pp_definition;
1616                         if (definition != NULL
1617                             && definition->function_definition == new_definition) {
1618                             pp_token.kind                = T_MACRO_PARAMETER;
1619                             pp_token.macro_parameter.def = definition;
1620                         }
1621                 }
1622                 saved_token_t saved_token;
1623                 saved_token.token = pp_token;
1624                 saved_token.had_whitespace = info.had_whitespace;
1625                 obstack_grow(&pp_obstack, &saved_token, sizeof(saved_token));
1626                 next_input_token();
1627         }
1628
1629         new_definition->list_len   = obstack_object_size(&pp_obstack)
1630                 / sizeof(new_definition->token_list[0]);
1631         new_definition->token_list = obstack_finish(&pp_obstack);
1632
1633         if (new_definition->has_parameters) {
1634                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1635                         pp_definition_t *param      = &new_definition->parameters[i];
1636                         symbol_t        *symbol     = param->symbol;
1637                         if (symbol == sym_anonymous)
1638                                 continue;
1639                         assert(symbol->pp_definition == param);
1640                         assert(param->function_definition == new_definition);
1641                         symbol->pp_definition   = param->parent_expansion;
1642                         param->parent_expansion = NULL;
1643                 }
1644         }
1645
1646         pp_definition_t *old_definition = symbol->pp_definition;
1647         if (old_definition != NULL) {
1648                 if (!pp_definitions_equal(old_definition, new_definition)) {
1649                         warningf(WARN_OTHER, &input.position, "multiple definition of macro '%Y' (first defined %P)", symbol, &old_definition->source_position);
1650                 } else {
1651                         /* reuse the old definition */
1652                         obstack_free(&pp_obstack, new_definition);
1653                         new_definition = old_definition;
1654                 }
1655         }
1656
1657         symbol->pp_definition = new_definition;
1658         return;
1659
1660 error_out:
1661         if (obstack_object_size(&pp_obstack) > 0) {
1662                 char *ptr = obstack_finish(&pp_obstack);
1663                 obstack_free(&pp_obstack, ptr);
1664         }
1665         eat_pp_directive();
1666 }
1667
1668 static void parse_undef_directive(void)
1669 {
1670         eat_pp(TP_undef);
1671         if (skip_mode) {
1672                 eat_pp_directive();
1673                 return;
1674         }
1675
1676         if (pp_token.kind != T_IDENTIFIER) {
1677                 errorf(&input.position,
1678                        "expected identifier after #undef, got %K", &pp_token);
1679                 eat_pp_directive();
1680                 return;
1681         }
1682
1683         pp_token.base.symbol->pp_definition = NULL;
1684         eat_token(T_IDENTIFIER);
1685
1686         if (!info.at_line_begin) {
1687                 warningf(WARN_OTHER, &input.position, "extra tokens at end of #undef directive");
1688         }
1689         eat_pp_directive();
1690 }
1691
1692 /** behind an #include we can have the special headername lexems.
1693  * They're only allowed behind an #include so they're not recognized
1694  * by the normal next_preprocessing_token. We handle them as a special
1695  * exception here */
1696 static void parse_headername(void)
1697 {
1698         const source_position_t start_position = input.position;
1699         string_t                string         = { NULL, 0, STRING_ENCODING_CHAR };
1700         assert(obstack_object_size(&symbol_obstack) == 0);
1701
1702         if (info.at_line_begin) {
1703                 parse_error("expected headername after #include");
1704                 goto finish_error;
1705         }
1706
1707         /* check wether we have a "... or <... headername */
1708         switch (input.c) {
1709         {
1710                 utf32 delimiter;
1711         case '<': delimiter = '>'; goto parse_name;
1712         case '"': delimiter = '"'; goto parse_name;
1713 parse_name:
1714                 next_char();
1715                 while (true) {
1716                         switch (input.c) {
1717                         case NEWLINE:
1718                         case EOF:
1719                                 errorf(&pp_token.base.source_position, "header name without closing '%c'", (char)delimiter);
1720                                 goto finish_error;
1721
1722                         default:
1723                                 if (input.c == delimiter) {
1724                                         next_char();
1725                                         goto finished_headername;
1726                                 } else {
1727                                         obstack_1grow(&symbol_obstack, (char)input.c);
1728                                         next_char();
1729                                 }
1730                                 break;
1731                         }
1732                 }
1733                 /* we should never be here */
1734         }
1735
1736         default:
1737                 /* TODO: do normal pp_token parsing and concatenate results */
1738                 panic("pp_token concat include not implemented yet");
1739         }
1740
1741 finished_headername:
1742         string = sym_make_string(STRING_ENCODING_CHAR);
1743
1744 finish_error:
1745         pp_token.base.source_position = start_position;
1746         pp_token.kind                 = T_HEADERNAME;
1747         pp_token.literal.string       = string;
1748 }
1749
1750 static bool do_include(bool system_include, const char *headername)
1751 {
1752         size_t headername_len = strlen(headername);
1753         if (!system_include) {
1754                 /* put dirname of current input on obstack */
1755                 const char *filename   = input.position.input_name;
1756                 const char *last_slash = strrchr(filename, '/');
1757                 if (last_slash != NULL) {
1758                         size_t len = last_slash - filename;
1759                         obstack_grow(&symbol_obstack, filename, len + 1);
1760                         obstack_grow0(&symbol_obstack, headername, headername_len);
1761                         char *complete_path = obstack_finish(&symbol_obstack);
1762                         headername = identify_string(complete_path);
1763                 }
1764
1765                 FILE *file = fopen(headername, "r");
1766                 if (file != NULL) {
1767                         switch_input(file, headername);
1768                         return true;
1769                 }
1770         }
1771
1772         assert(obstack_object_size(&symbol_obstack) == 0);
1773         /* check searchpath */
1774         for (searchpath_entry_t *entry = searchpath; entry != NULL;
1775              entry = entry->next) {
1776             const char *path = entry->path;
1777             size_t      len  = strlen(path);
1778                 obstack_grow(&symbol_obstack, path, len);
1779                 if (path[len-1] != '/')
1780                         obstack_1grow(&symbol_obstack, '/');
1781                 obstack_grow(&symbol_obstack, headername, headername_len+1);
1782
1783                 char *complete_path = obstack_finish(&symbol_obstack);
1784                 FILE *file          = fopen(complete_path, "r");
1785                 if (file != NULL) {
1786                         const char *filename = identify_string(complete_path);
1787                         switch_input(file, filename);
1788                         return true;
1789                 } else {
1790                         obstack_free(&symbol_obstack, complete_path);
1791                 }
1792         }
1793
1794         return false;
1795 }
1796
1797 static void parse_include_directive(void)
1798 {
1799         if (skip_mode) {
1800                 eat_pp_directive();
1801                 return;
1802         }
1803
1804         /* don't eat the TP_include here!
1805          * we need an alternative parsing for the next token */
1806         skip_till_newline(true);
1807         bool system_include = input.c == '<';
1808         parse_headername();
1809         string_t headername = pp_token.literal.string;
1810         if (headername.begin == NULL) {
1811                 eat_pp_directive();
1812                 return;
1813         }
1814
1815         bool had_nonwhitespace = skip_till_newline(false);
1816         if (had_nonwhitespace) {
1817                 warningf(WARN_OTHER, &pp_token.base.source_position,
1818                          "extra tokens at end of #include directive");
1819         }
1820
1821         if (n_inputs > INCLUDE_LIMIT) {
1822                 errorf(&pp_token.base.source_position, "#include nested too deeply");
1823                 /* eat \n or EOF */
1824                 next_input_token();
1825                 return;
1826         }
1827
1828         /* switch inputs */
1829         info.whitespace_at_line_begin = 0;
1830         info.had_whitespace           = false;
1831         info.at_line_begin            = true;
1832         emit_newlines();
1833         push_input();
1834         bool res = do_include(system_include, pp_token.literal.string.begin);
1835         if (res) {
1836                 next_input_token();
1837         } else {
1838                 errorf(&pp_token.base.source_position, "failed including '%S': %s", &pp_token.literal.string, strerror(errno));
1839                 pop_restore_input();
1840         }
1841 }
1842
1843 static pp_conditional_t *push_conditional(void)
1844 {
1845         pp_conditional_t *conditional
1846                 = obstack_alloc(&pp_obstack, sizeof(*conditional));
1847         memset(conditional, 0, sizeof(*conditional));
1848
1849         conditional->parent = conditional_stack;
1850         conditional_stack   = conditional;
1851
1852         return conditional;
1853 }
1854
1855 static void pop_conditional(void)
1856 {
1857         assert(conditional_stack != NULL);
1858         conditional_stack = conditional_stack->parent;
1859 }
1860
1861 static void check_unclosed_conditionals(void)
1862 {
1863         while (conditional_stack != NULL) {
1864                 pp_conditional_t *conditional = conditional_stack;
1865
1866                 if (conditional->in_else) {
1867                         errorf(&conditional->source_position, "unterminated #else");
1868                 } else {
1869                         errorf(&conditional->source_position, "unterminated condition");
1870                 }
1871                 pop_conditional();
1872         }
1873 }
1874
1875 static void parse_ifdef_ifndef_directive(bool const is_ifdef)
1876 {
1877         bool condition;
1878         eat_pp(is_ifdef ? TP_ifdef : TP_ifndef);
1879
1880         if (skip_mode) {
1881                 eat_pp_directive();
1882                 pp_conditional_t *conditional = push_conditional();
1883                 conditional->source_position  = pp_token.base.source_position;
1884                 conditional->skip             = true;
1885                 return;
1886         }
1887
1888         if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
1889                 errorf(&pp_token.base.source_position,
1890                        "expected identifier after #%s, got %K",
1891                        is_ifdef ? "ifdef" : "ifndef", &pp_token);
1892                 eat_pp_directive();
1893
1894                 /* just take the true case in the hope to avoid further errors */
1895                 condition = true;
1896         } else {
1897                 /* evaluate wether we are in true or false case */
1898                 condition = (bool)pp_token.base.symbol->pp_definition == is_ifdef;
1899                 eat_token(T_IDENTIFIER);
1900
1901                 if (!info.at_line_begin) {
1902                         errorf(&pp_token.base.source_position,
1903                                "extra tokens at end of #%s",
1904                                is_ifdef ? "ifdef" : "ifndef");
1905                         eat_pp_directive();
1906                 }
1907         }
1908
1909         pp_conditional_t *conditional = push_conditional();
1910         conditional->source_position  = pp_token.base.source_position;
1911         conditional->condition        = condition;
1912
1913         if (!condition) {
1914                 skip_mode = true;
1915         }
1916 }
1917
1918 static void parse_else_directive(void)
1919 {
1920         eat_pp(TP_else);
1921
1922         if (!info.at_line_begin) {
1923                 if (!skip_mode) {
1924                         warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #else");
1925                 }
1926                 eat_pp_directive();
1927         }
1928
1929         pp_conditional_t *conditional = conditional_stack;
1930         if (conditional == NULL) {
1931                 errorf(&pp_token.base.source_position, "#else without prior #if");
1932                 return;
1933         }
1934
1935         if (conditional->in_else) {
1936                 errorf(&pp_token.base.source_position,
1937                        "#else after #else (condition started %P)",
1938                        &conditional->source_position);
1939                 skip_mode = true;
1940                 return;
1941         }
1942
1943         conditional->in_else = true;
1944         if (!conditional->skip) {
1945                 skip_mode = conditional->condition;
1946         }
1947         conditional->source_position = pp_token.base.source_position;
1948 }
1949
1950 static void parse_endif_directive(void)
1951 {
1952         eat_pp(TP_endif);
1953
1954         if (!info.at_line_begin) {
1955                 if (!skip_mode) {
1956                         warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #endif");
1957                 }
1958                 eat_pp_directive();
1959         }
1960
1961         pp_conditional_t *conditional = conditional_stack;
1962         if (conditional == NULL) {
1963                 errorf(&pp_token.base.source_position, "#endif without prior #if");
1964                 return;
1965         }
1966
1967         if (!conditional->skip) {
1968                 skip_mode = false;
1969         }
1970         pop_conditional();
1971 }
1972
1973 typedef enum stdc_pragma_kind_t {
1974         STDC_UNKNOWN,
1975         STDC_FP_CONTRACT,
1976         STDC_FENV_ACCESS,
1977         STDC_CX_LIMITED_RANGE
1978 } stdc_pragma_kind_t;
1979
1980 typedef enum stdc_pragma_value_kind_t {
1981         STDC_VALUE_UNKNOWN,
1982         STDC_VALUE_ON,
1983         STDC_VALUE_OFF,
1984         STDC_VALUE_DEFAULT
1985 } stdc_pragma_value_kind_t;
1986
1987 static void parse_pragma_directive(void)
1988 {
1989         eat_pp(TP_pragma);
1990
1991         if (pp_token.kind != T_IDENTIFIER) {
1992                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
1993                          "expected identifier after #pragma");
1994                 eat_pp_directive();
1995                 return;
1996         }
1997
1998         stdc_pragma_kind_t kind = STDC_UNKNOWN;
1999         if (pp_token.base.symbol->pp_ID == TP_STDC && c_mode & _C99) {
2000                 /* a STDC pragma */
2001                 next_input_token();
2002
2003                 switch (pp_token.base.symbol->pp_ID) {
2004                 case TP_FP_CONTRACT:      kind = STDC_FP_CONTRACT;      break;
2005                 case TP_FENV_ACCESS:      kind = STDC_FENV_ACCESS;      break;
2006                 case TP_CX_LIMITED_RANGE: kind = STDC_CX_LIMITED_RANGE; break;
2007                 default:                  break;
2008                 }
2009                 if (kind != STDC_UNKNOWN) {
2010                         next_input_token();
2011                         stdc_pragma_value_kind_t value;
2012                         switch (pp_token.base.symbol->pp_ID) {
2013                         case TP_ON:      value = STDC_VALUE_ON;      break;
2014                         case TP_OFF:     value = STDC_VALUE_OFF;     break;
2015                         case TP_DEFAULT: value = STDC_VALUE_DEFAULT; break;
2016                         default:         value = STDC_VALUE_UNKNOWN; break;
2017                         }
2018                         if (value == STDC_VALUE_UNKNOWN) {
2019                                 kind = STDC_UNKNOWN;
2020                                 errorf(&pp_token.base.source_position, "bad STDC pragma argument");
2021                         }
2022                 }
2023         }
2024         eat_pp_directive();
2025         if (kind == STDC_UNKNOWN) {
2026                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2027                          "encountered unknown #pragma");
2028         }
2029 }
2030
2031 static void parse_line_directive(void)
2032 {
2033         if (pp_token.kind != T_NUMBER) {
2034                 if (!skip_mode)
2035                         parse_error("expected integer");
2036         } else {
2037                 char      *end;
2038                 long const line = strtol(pp_token.literal.string.begin, &end, 0);
2039                 if (*end == '\0') {
2040                         /* use offset -1 as this is about the next line */
2041                         input.position.lineno = line - 1;
2042                         /* force output of line */
2043                         input.output_line = input.position.lineno - 20;
2044                 } else {
2045                         if (!skip_mode) {
2046                                 errorf(&input.position, "'%S' is not a valid line number",
2047                                            &pp_token.literal.string);
2048                         }
2049                 }
2050                 next_input_token();
2051         }
2052         if (pp_token.kind == T_STRING_LITERAL
2053             && pp_token.literal.string.encoding == STRING_ENCODING_CHAR) {
2054                 input.position.input_name       = pp_token.literal.string.begin;
2055                 input.position.is_system_header = false;
2056                 next_input_token();
2057
2058                 /* attempt to parse numeric flags as outputted by gcc preprocessor */
2059                 while (pp_token.kind == T_NUMBER) {
2060                         /* flags:
2061                          * 1 - indicates start of a new file
2062                          * 2 - indicates return from a file
2063                          * 3 - indicates system header
2064                          * 4 - indicates implicit extern "C" in C++ mode
2065                          *
2066                          * currently we're only interested in "3"
2067                          */
2068                         if (streq(pp_token.literal.string.begin, "3")) {
2069                                 input.position.is_system_header = true;
2070                         }
2071                         next_input_token();
2072                 }
2073         }
2074
2075         eat_pp_directive();
2076 }
2077
2078 static void parse_preprocessing_directive(void)
2079 {
2080         eat_token('#');
2081
2082         if (info.at_line_begin) {
2083                 /* empty directive */
2084                 return;
2085         }
2086
2087         if (pp_token.base.symbol) {
2088                 switch (pp_token.base.symbol->pp_ID) {
2089                 case TP_define:  parse_define_directive();            break;
2090                 case TP_else:    parse_else_directive();              break;
2091                 case TP_endif:   parse_endif_directive();             break;
2092                 case TP_ifdef:   parse_ifdef_ifndef_directive(true);  break;
2093                 case TP_ifndef:  parse_ifdef_ifndef_directive(false); break;
2094                 case TP_include: parse_include_directive();           break;
2095                 case TP_line:    next_input_token(); goto line_directive;
2096                 case TP_pragma:  parse_pragma_directive();            break;
2097                 case TP_undef:   parse_undef_directive();             break;
2098                 default:         goto skip;
2099                 }
2100         } else if (pp_token.kind == T_NUMBER) {
2101 line_directive:
2102                 parse_line_directive();
2103         } else {
2104 skip:
2105                 if (!skip_mode) {
2106                         errorf(&pp_token.base.source_position, "invalid preprocessing directive #%K", &pp_token);
2107                 }
2108                 eat_pp_directive();
2109         }
2110
2111         assert(info.at_line_begin);
2112 }
2113
2114 static void finish_current_argument(void)
2115 {
2116         if (current_argument == NULL)
2117                 return;
2118         size_t size = obstack_object_size(&pp_obstack);
2119         current_argument->list_len   = size/sizeof(current_argument->token_list[0]);
2120         current_argument->token_list = obstack_finish(&pp_obstack);
2121 }
2122
2123 void next_preprocessing_token(void)
2124 {
2125 restart:
2126         if (!expand_next()) {
2127                 do {
2128                         next_input_token();
2129                         while (pp_token.kind == '#' && info.at_line_begin) {
2130                                 parse_preprocessing_directive();
2131                         }
2132                 } while (skip_mode && pp_token.kind != T_EOF);
2133         }
2134
2135         const token_kind_t kind = pp_token.kind;
2136         if (current_call == NULL || argument_expanding != NULL) {
2137                 if (kind == T_IDENTIFIER) {
2138                         symbol_t        *const symbol        = pp_token.base.symbol;
2139                         pp_definition_t *const pp_definition = symbol->pp_definition;
2140                         if (pp_definition != NULL && !pp_definition->is_expanding) {
2141                                 if (pp_definition->has_parameters) {
2142
2143                                         /* check if next token is a '(' */
2144                                         whitespace_info_t old_info   = info;
2145                                         token_kind_t      next_token = peek_expansion();
2146                                         if (next_token == T_EOF) {
2147                                                 info.at_line_begin  = false;
2148                                                 info.had_whitespace = false;
2149                                                 skip_whitespace();
2150                                                 if (input.c == '(') {
2151                                                         next_token = '(';
2152                                                 }
2153                                         }
2154
2155                                         if (next_token == '(') {
2156                                                 if (current_expansion == NULL)
2157                                                         expansion_pos = pp_token.base.source_position;
2158                                                 next_preprocessing_token();
2159                                                 assert(pp_token.kind == '(');
2160
2161                                                 pp_definition->parent_expansion = current_expansion;
2162                                                 current_call              = pp_definition;
2163                                                 current_call->expand_pos  = 0;
2164                                                 current_call->expand_info = old_info;
2165                                                 if (current_call->n_parameters > 0) {
2166                                                         current_argument = &current_call->parameters[0];
2167                                                         assert(argument_brace_count == 0);
2168                                                 }
2169                                                 goto restart;
2170                                         } else {
2171                                                 /* skip_whitespaces() skipped newlines and whitespace,
2172                                                  * remember results for next token */
2173                                                 next_info = info;
2174                                                 info      = old_info;
2175                                                 return;
2176                                         }
2177                                 } else {
2178                                         if (current_expansion == NULL)
2179                                                 expansion_pos = pp_token.base.source_position;
2180                                         start_expanding(pp_definition);
2181                                         goto restart;
2182                                 }
2183                         }
2184                 } else if (kind == T_MACRO_PARAMETER) {
2185                         assert(current_expansion != NULL);
2186                         start_expanding(pp_token.macro_parameter.def);
2187                         goto restart;
2188                 }
2189         }
2190
2191         if (current_call != NULL) {
2192                 /* current_call != NULL */
2193                 if (kind == '(') {
2194                         ++argument_brace_count;
2195                 } else if (kind == ')') {
2196                         if (argument_brace_count > 0) {
2197                                 --argument_brace_count;
2198                         } else {
2199                                 finish_current_argument();
2200                                 assert(kind == ')');
2201                                 start_expanding(current_call);
2202                                 info = current_call->expand_info;
2203                                 current_call     = NULL;
2204                                 current_argument = NULL;
2205                                 goto restart;
2206                         }
2207                 } else if (kind == ',' && argument_brace_count == 0) {
2208                         finish_current_argument();
2209                         current_call->expand_pos++;
2210                         if (current_call->expand_pos >= current_call->n_parameters) {
2211                                 errorf(&pp_token.base.source_position,
2212                                            "too many arguments passed for macro '%Y'",
2213                                            current_call->symbol);
2214                                 current_argument = NULL;
2215                         } else {
2216                                 current_argument
2217                                         = &current_call->parameters[current_call->expand_pos];
2218                         }
2219                         goto restart;
2220                 } else if (kind == T_MACRO_PARAMETER) {
2221                         /* parameters have to be fully expanded before being used as
2222                          * parameters for another macro-call */
2223                         assert(current_expansion != NULL);
2224                         pp_definition_t *argument = pp_token.macro_parameter.def;
2225                         argument_expanding = argument;
2226                         start_expanding(argument);
2227                         goto restart;
2228                 } else if (kind == T_EOF) {
2229                         errorf(&expansion_pos,
2230                                "reached end of file while parsing arguments for '%Y'",
2231                                current_call->symbol);
2232                         return;
2233                 }
2234                 if (current_argument != NULL) {
2235                         saved_token_t saved;
2236                         saved.token = pp_token;
2237                         saved.had_whitespace = info.had_whitespace;
2238                         obstack_grow(&pp_obstack, &saved, sizeof(saved));
2239                 }
2240                 goto restart;
2241         }
2242 }
2243
2244
2245 static void prepend_include_path(const char *path)
2246 {
2247         searchpath_entry_t *entry = OALLOCZ(&config_obstack, searchpath_entry_t);
2248         entry->path = path;
2249         entry->next = searchpath;
2250         searchpath  = entry;
2251 }
2252
2253 static void setup_include_path(void)
2254 {
2255         /* built-in paths */
2256         prepend_include_path("/usr/include");
2257
2258         /* parse environment variable */
2259         const char *cpath = getenv("CPATH");
2260         if (cpath != NULL && *cpath != '\0') {
2261                 const char *begin = cpath;
2262                 const char *c;
2263                 do {
2264                         c = begin;
2265                         while (*c != '\0' && *c != ':')
2266                                 ++c;
2267
2268                         size_t len = c-begin;
2269                         if (len == 0) {
2270                                 /* for gcc compatibility (Matze: I would expect that
2271                                  * nothing happens for an empty entry...) */
2272                                 prepend_include_path(".");
2273                         } else {
2274                                 char *string = obstack_alloc(&config_obstack, len+1);
2275                                 memcpy(string, begin, len);
2276                                 string[len] = '\0';
2277
2278                                 prepend_include_path(string);
2279                         }
2280
2281                         begin = c+1;
2282                         /* skip : */
2283                         if (*begin == ':')
2284                                 ++begin;
2285                 } while(*c != '\0');
2286         }
2287 }
2288
2289 void init_preprocessor(void)
2290 {
2291         init_symbols();
2292
2293         obstack_init(&config_obstack);
2294         obstack_init(&pp_obstack);
2295         obstack_init(&input_obstack);
2296         strset_init(&stringset);
2297
2298         setup_include_path();
2299 }
2300
2301 void exit_preprocessor(void)
2302 {
2303         obstack_free(&input_obstack, NULL);
2304         obstack_free(&pp_obstack, NULL);
2305         obstack_free(&config_obstack, NULL);
2306
2307         strset_destroy(&stringset);
2308 }
2309
2310 int pptest_main(int argc, char **argv);
2311 int pptest_main(int argc, char **argv)
2312 {
2313         init_symbol_table();
2314         init_preprocessor();
2315         init_tokens();
2316
2317         error_on_unknown_chars   = false;
2318         resolve_escape_sequences = false;
2319
2320         /* simplistic commandline parser */
2321         const char *filename = NULL;
2322         const char *output = NULL;
2323         for (int i = 1; i < argc; ++i) {
2324                 const char *opt = argv[i];
2325                 if (streq(opt, "-I")) {
2326                         prepend_include_path(argv[++i]);
2327                         continue;
2328                 } else if (streq(opt, "-E")) {
2329                         /* ignore */
2330                 } else if (streq(opt, "-o")) {
2331                         output = argv[++i];
2332                         continue;
2333                 } else if (opt[0] == '-') {
2334                         fprintf(stderr, "Unknown option '%s'\n", opt);
2335                 } else {
2336                         if (filename != NULL)
2337                                 fprintf(stderr, "Multiple inputs not supported\n");
2338                         filename = argv[i];
2339                 }
2340         }
2341         if (filename == NULL) {
2342                 fprintf(stderr, "No input specified\n");
2343                 return 1;
2344         }
2345
2346         if (output == NULL) {
2347                 out = stdout;
2348         } else {
2349                 out = fopen(output, "w");
2350                 if (out == NULL) {
2351                         fprintf(stderr, "Couldn't open output '%s'\n", output);
2352                         return 1;
2353                 }
2354         }
2355
2356         /* just here for gcc compatibility */
2357         fprintf(out, "# 1 \"%s\"\n", filename);
2358         fprintf(out, "# 1 \"<built-in>\"\n");
2359         fprintf(out, "# 1 \"<command-line>\"\n");
2360
2361         FILE *file = fopen(filename, "r");
2362         if (file == NULL) {
2363                 fprintf(stderr, "Couldn't open input '%s'\n", filename);
2364                 return 1;
2365         }
2366         switch_input(file, filename);
2367
2368         for (;;) {
2369                 next_preprocessing_token();
2370                 if (pp_token.kind == T_EOF)
2371                         break;
2372                 emit_pp_token();
2373         }
2374
2375         fputc('\n', out);
2376         check_unclosed_conditionals();
2377         fclose(close_input());
2378         if (out != stdout)
2379                 fclose(out);
2380
2381         exit_tokens();
2382         exit_preprocessor();
2383         exit_symbol_table();
2384
2385         return 0;
2386 }