preprocessor: Do not use uninitialized values for the column in error messages.
[cparser] / preprocessor.c
1 #include <config.h>
2
3 #include <assert.h>
4 #include <errno.h>
5 #include <string.h>
6 #include <stdbool.h>
7 #include <ctype.h>
8
9 #include "preprocessor.h"
10 #include "token_t.h"
11 #include "symbol_t.h"
12 #include "adt/util.h"
13 #include "adt/error.h"
14 #include "adt/strutil.h"
15 #include "adt/strset.h"
16 #include "lang_features.h"
17 #include "diagnostic.h"
18 #include "string_rep.h"
19 #include "input.h"
20
21 #define MAX_PUTBACK 3
22 #define INCLUDE_LIMIT 199  /* 199 is for gcc "compatibility" */
23
24 typedef struct saved_token_t {
25         token_t token;
26         bool    had_whitespace;
27 } saved_token_t;
28
29 typedef struct whitespace_info_t {
30         /** current token had whitespace in front of it */
31         bool     had_whitespace;
32         /** current token is at the beginning of a line.
33          * => a "#" at line begin starts a preprocessing directive. */
34         bool     at_line_begin;
35         /** number of spaces before the first token in a line */
36         unsigned whitespace_at_line_begin;
37 } whitespace_info_t;
38
39 struct pp_definition_t {
40         symbol_t          *symbol;
41         source_position_t  source_position;
42         pp_definition_t   *parent_expansion;
43         size_t             expand_pos;
44         whitespace_info_t  expand_info;
45         bool               is_variadic    : 1;
46         bool               is_expanding   : 1;
47         bool               has_parameters : 1;
48         bool               is_parameter   : 1;
49         pp_definition_t   *function_definition;
50         size_t             n_parameters;
51         pp_definition_t   *parameters;
52
53         /* replacement */
54         size_t             list_len;
55         saved_token_t     *token_list;
56 };
57
58 typedef struct pp_conditional_t pp_conditional_t;
59 struct pp_conditional_t {
60         source_position_t  source_position;
61         bool               condition;
62         bool               in_else;
63         /** conditional in skip mode (then+else gets skipped) */
64         bool               skip;
65         pp_conditional_t  *parent;
66 };
67
68 typedef struct pp_input_t pp_input_t;
69 struct pp_input_t {
70         FILE               *file;
71         input_t            *input;
72         utf32               c;
73         utf32               buf[1024+MAX_PUTBACK];
74         const utf32        *bufend;
75         const utf32        *bufpos;
76         source_position_t   position;
77         pp_input_t         *parent;
78         unsigned            output_line;
79         searchpath_entry_t *path;
80 };
81
82 struct searchpath_entry_t {
83         const char         *path;
84         searchpath_entry_t *next;
85         bool                is_system_path;
86 };
87
88 static pp_input_t      input;
89
90 static pp_input_t     *input_stack;
91 static unsigned        n_inputs;
92 static struct obstack  input_obstack;
93
94 static pp_conditional_t *conditional_stack;
95
96 token_t                  pp_token;
97 bool                     allow_dollar_in_symbol   = true;
98 static bool              resolve_escape_sequences = true;
99 static bool              error_on_unknown_chars   = true;
100 static bool              skip_mode;
101 static FILE             *out;
102 static struct obstack    pp_obstack;
103 static struct obstack    config_obstack;
104 static const char       *printed_input_name = NULL;
105 static source_position_t expansion_pos;
106 static pp_definition_t  *current_expansion  = NULL;
107 static pp_definition_t  *current_call       = NULL;
108 static pp_definition_t  *current_argument   = NULL;
109 static pp_definition_t  *argument_expanding = NULL;
110 static unsigned          argument_brace_count;
111 static strset_t          stringset;
112 static token_kind_t      last_token;
113
114 struct searchpath_t {
115         searchpath_entry_t  *first;
116         searchpath_entry_t **anchor;
117         bool                 is_system_path;
118 };
119
120 searchpath_t bracket_searchpath = { NULL, &bracket_searchpath.first, false };
121 searchpath_t quote_searchpath   = { NULL, &quote_searchpath.first,   false };
122 searchpath_t system_searchpath  = { NULL, &system_searchpath.first,  true  };
123 searchpath_t after_searchpath   = { NULL, &after_searchpath.first,   true  };
124
125 static whitespace_info_t next_info; /* valid if had_whitespace is true */
126 static whitespace_info_t info;
127
128 static inline void next_char(void);
129 static void next_input_token(void);
130 static void print_line_directive(const source_position_t *pos, const char *add);
131
132 static symbol_t *symbol_colongreater;
133 static symbol_t *symbol_lesscolon;
134 static symbol_t *symbol_lesspercent;
135 static symbol_t *symbol_percentcolon;
136 static symbol_t *symbol_percentcolonpercentcolon;
137 static symbol_t *symbol_percentgreater;
138
139 static symbol_t *symbol_L;
140 static symbol_t *symbol_U;
141 static symbol_t *symbol_u;
142 static symbol_t *symbol_u8;
143
144 static void init_symbols(void)
145 {
146         symbol_colongreater             = symbol_table_insert(":>");
147         symbol_lesscolon                = symbol_table_insert("<:");
148         symbol_lesspercent              = symbol_table_insert("<%");
149         symbol_percentcolon             = symbol_table_insert("%:");
150         symbol_percentcolonpercentcolon = symbol_table_insert("%:%:");
151         symbol_percentgreater           = symbol_table_insert("%>");
152
153         symbol_L  = symbol_table_insert("L");
154         symbol_U  = symbol_table_insert("U");
155         symbol_u  = symbol_table_insert("u");
156         symbol_u8 = symbol_table_insert("u8");
157 }
158
159 void switch_pp_input(FILE *const file, char const *const filename, searchpath_entry_t *const path, bool const is_system_header)
160 {
161         input.file                      = file;
162         input.input                     = input_from_stream(file, NULL);
163         input.bufend                    = NULL;
164         input.bufpos                    = NULL;
165         input.output_line               = 0;
166         input.position.input_name       = filename;
167         input.position.lineno           = 1;
168         input.position.is_system_header = is_system_header;
169         input.path                      = path;
170
171         /* indicate that we're at a new input */
172         print_line_directive(&input.position, input_stack != NULL ? "1" : NULL);
173
174         /* place a virtual '\n' so we realize we're at line begin */
175         input.position.lineno = 0;
176         input.c               = '\n';
177 }
178
179 FILE *close_pp_input(void)
180 {
181         input_free(input.input);
182
183         FILE* const file = input.file;
184         assert(file);
185
186         input.input  = NULL;
187         input.file   = NULL;
188         input.bufend = NULL;
189         input.bufpos = NULL;
190         input.c      = EOF;
191
192         return file;
193 }
194
195 static void push_input(void)
196 {
197         pp_input_t *const saved_input = obstack_copy(&input_obstack, &input, sizeof(input));
198
199         /* adjust buffer positions */
200         if (input.bufpos != NULL)
201                 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
202         if (input.bufend != NULL)
203                 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
204
205         saved_input->parent = input_stack;
206         input_stack         = saved_input;
207         ++n_inputs;
208 }
209
210 static void pop_restore_input(void)
211 {
212         assert(n_inputs > 0);
213         assert(input_stack != NULL);
214
215         pp_input_t *saved_input = input_stack;
216
217         memcpy(&input, saved_input, sizeof(input));
218         input.parent = NULL;
219
220         /* adjust buffer positions */
221         if (saved_input->bufpos != NULL)
222                 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
223         if (saved_input->bufend != NULL)
224                 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
225
226         input_stack = saved_input->parent;
227         obstack_free(&input_obstack, saved_input);
228         --n_inputs;
229 }
230
231 /**
232  * Prints a parse error message at the current token.
233  *
234  * @param msg   the error message
235  */
236 static void parse_error(const char *msg)
237 {
238         errorf(&pp_token.base.source_position,  "%s", msg);
239 }
240
241 static inline void next_real_char(void)
242 {
243         assert(input.bufpos <= input.bufend);
244         if (input.bufpos >= input.bufend) {
245                 size_t const n = decode(input.input, input.buf + MAX_PUTBACK, lengthof(input.buf) - MAX_PUTBACK);
246                 if (n == 0) {
247                         input.c = EOF;
248                         return;
249                 }
250                 input.bufpos = input.buf + MAX_PUTBACK;
251                 input.bufend = input.bufpos + n;
252         }
253         input.c = *input.bufpos++;
254         ++input.position.colno;
255 }
256
257 /**
258  * Put a character back into the buffer.
259  *
260  * @param pc  the character to put back
261  */
262 static inline void put_back(utf32 const pc)
263 {
264         assert(input.bufpos > input.buf);
265         *(--input.bufpos - input.buf + input.buf) = (char) pc;
266         --input.position.colno;
267 }
268
269 #define NEWLINE \
270         '\r': \
271                 next_char(); \
272                 if (input.c == '\n') { \
273         case '\n': \
274                         next_char(); \
275                 } \
276                 ++input.position.lineno; \
277                 input.position.colno = 1; \
278                 goto newline; \
279                 newline // Let it look like an ordinary case label.
280
281 #define eat(c_type) (assert(input.c == c_type), next_char())
282
283 static void maybe_concat_lines(void)
284 {
285         eat('\\');
286
287         switch (input.c) {
288         case NEWLINE:
289                 info.whitespace_at_line_begin = 0;
290                 return;
291
292         default:
293                 break;
294         }
295
296         put_back(input.c);
297         input.c = '\\';
298 }
299
300 /**
301  * Set c to the next input character, ie.
302  * after expanding trigraphs.
303  */
304 static inline void next_char(void)
305 {
306         next_real_char();
307
308         /* filter trigraphs and concatenated lines */
309         if (UNLIKELY(input.c == '\\')) {
310                 maybe_concat_lines();
311                 goto end_of_next_char;
312         }
313
314         if (LIKELY(input.c != '?'))
315                 goto end_of_next_char;
316
317         next_real_char();
318         if (LIKELY(input.c != '?')) {
319                 put_back(input.c);
320                 input.c = '?';
321                 goto end_of_next_char;
322         }
323
324         next_real_char();
325         switch (input.c) {
326         case '=': input.c = '#'; break;
327         case '(': input.c = '['; break;
328         case '/': input.c = '\\'; maybe_concat_lines(); break;
329         case ')': input.c = ']'; break;
330         case '\'': input.c = '^'; break;
331         case '<': input.c = '{'; break;
332         case '!': input.c = '|'; break;
333         case '>': input.c = '}'; break;
334         case '-': input.c = '~'; break;
335         default:
336                 put_back(input.c);
337                 put_back('?');
338                 input.c = '?';
339                 break;
340         }
341
342 end_of_next_char:;
343 #ifdef DEBUG_CHARS
344         printf("nchar '%c'\n", input.c);
345 #endif
346 }
347
348
349
350 /**
351  * Returns true if the given char is a octal digit.
352  *
353  * @param char  the character to check
354  */
355 static inline bool is_octal_digit(int chr)
356 {
357         switch (chr) {
358         case '0':
359         case '1':
360         case '2':
361         case '3':
362         case '4':
363         case '5':
364         case '6':
365         case '7':
366                 return true;
367         default:
368                 return false;
369         }
370 }
371
372 /**
373  * Returns the value of a digit.
374  * The only portable way to do it ...
375  */
376 static int digit_value(int digit)
377 {
378         switch (digit) {
379         case '0': return 0;
380         case '1': return 1;
381         case '2': return 2;
382         case '3': return 3;
383         case '4': return 4;
384         case '5': return 5;
385         case '6': return 6;
386         case '7': return 7;
387         case '8': return 8;
388         case '9': return 9;
389         case 'a':
390         case 'A': return 10;
391         case 'b':
392         case 'B': return 11;
393         case 'c':
394         case 'C': return 12;
395         case 'd':
396         case 'D': return 13;
397         case 'e':
398         case 'E': return 14;
399         case 'f':
400         case 'F': return 15;
401         default:
402                 panic("wrong character given");
403         }
404 }
405
406 /**
407  * Parses an octal character sequence.
408  *
409  * @param first_digit  the already read first digit
410  */
411 static utf32 parse_octal_sequence(const utf32 first_digit)
412 {
413         assert(is_octal_digit(first_digit));
414         utf32 value = digit_value(first_digit);
415         if (!is_octal_digit(input.c)) return value;
416         value = 8 * value + digit_value(input.c);
417         next_char();
418         if (!is_octal_digit(input.c)) return value;
419         value = 8 * value + digit_value(input.c);
420         next_char();
421         return value;
422
423 }
424
425 /**
426  * Parses a hex character sequence.
427  */
428 static utf32 parse_hex_sequence(void)
429 {
430         utf32 value = 0;
431         while (isxdigit(input.c)) {
432                 value = 16 * value + digit_value(input.c);
433                 next_char();
434         }
435         return value;
436 }
437
438 static bool is_universal_char_valid(utf32 const v)
439 {
440         /* C11 Â§6.4.3:2 */
441         if (v < 0xA0U && v != 0x24 && v != 0x40 && v != 0x60)
442                 return false;
443         if (0xD800 <= v && v <= 0xDFFF)
444                 return false;
445         return true;
446 }
447
448 static utf32 parse_universal_char(unsigned const n_digits)
449 {
450         utf32 v = 0;
451         for (unsigned k = n_digits; k != 0; --k) {
452                 if (isxdigit(input.c)) {
453                         v = 16 * v + digit_value(input.c);
454                         if (!resolve_escape_sequences)
455                                 obstack_1grow(&symbol_obstack, input.c);
456                         next_char();
457                 } else {
458                         errorf(&input.position,
459                                "short universal character name, expected %u more digits",
460                                    k);
461                         break;
462                 }
463         }
464         if (!is_universal_char_valid(v)) {
465                 errorf(&input.position,
466                        "\\%c%0*X is not a valid universal character name",
467                        n_digits == 4 ? 'u' : 'U', (int)n_digits, v);
468         }
469         return v;
470 }
471
472 static bool is_universal_char_valid_identifier_c99(utf32 const v)
473 {
474         static const utf32 single_chars[] = {
475                 0x00AA, 0x00BA, 0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0,
476                 0x1F59, 0x1F5B, 0x1F5D, 0x05BF, 0x09B2, 0x0A02, 0x0A5E, 0x0A74,
477                 0x0A8D, 0x0AD0, 0x0AE0, 0x0B9C, 0x0CDE, 0x0E84, 0x0E8A, 0x0E8D,
478                 0x0EA5, 0x0EA7, 0x0EC6, 0x0F00, 0x0F35, 0x0F37, 0x0F39, 0x0F97,
479                 0x0FB9, 0x00B5, 0x00B7, 0x02BB, 0x037A, 0x0559, 0x093D, 0x0B3D,
480                 0x1FBE, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128
481         };
482
483         static const utf32 ranges[][2] = {
484                 {0x00C0, 0x00D6}, {0x00D8, 0x00F6}, {0x00F8, 0x01F5}, {0x01FA, 0x0217},
485                 {0x0250, 0x02A8}, {0x1E00, 0x1E9B}, {0x1EA0, 0x1EF9}, {0x0388, 0x038A},
486                 {0x038E, 0x03A1}, {0x03A3, 0x03CE}, {0x03D0, 0x03D6}, {0x03E2, 0x03F3},
487                 {0x1F00, 0x1F15}, {0x1F18, 0x1F1D}, {0x1F20, 0x1F45}, {0x1F48, 0x1F4D},
488                 {0x1F50, 0x1F57}, {0x1F5F, 0x1F7D}, {0x1F80, 0x1FB4}, {0x1FB6, 0x1FBC},
489                 {0x1FC2, 0x1FC4}, {0x1FC6, 0x1FCC}, {0x1FD0, 0x1FD3}, {0x1FD6, 0x1FDB},
490                 {0x1FE0, 0x1FEC}, {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFC}, {0x0401, 0x040C},
491                 {0x040E, 0x044F}, {0x0451, 0x045C}, {0x045E, 0x0481}, {0x0490, 0x04C4},
492                 {0x04C7, 0x04C8}, {0x04CB, 0x04CC}, {0x04D0, 0x04EB}, {0x04EE, 0x04F5},
493                 {0x04F8, 0x04F9}, {0x0531, 0x0556}, {0x0561, 0x0587}, {0x05B0, 0x05B9},
494                 {0x05BB, 0x05BD}, {0x05C1, 0x05C2}, {0x05D0, 0x05EA}, {0x05F0, 0x05F2},
495                 {0x0621, 0x063A}, {0x0640, 0x0652}, {0x0670, 0x06B7}, {0x06BA, 0x06BE},
496                 {0x06C0, 0x06CE}, {0x06D0, 0x06DC}, {0x06E5, 0x06E8}, {0x06EA, 0x06ED},
497                 {0x0901, 0x0903}, {0x0905, 0x0939}, {0x093E, 0x094D}, {0x0950, 0x0952},
498                 {0x0958, 0x0963}, {0x0981, 0x0983}, {0x0985, 0x098C}, {0x098F, 0x0990},
499                 {0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B6, 0x09B9}, {0x09BE, 0x09C4},
500                 {0x09C7, 0x09C8}, {0x09CB, 0x09CD}, {0x09DC, 0x09DD}, {0x09DF, 0x09E3},
501                 {0x09F0, 0x09F1}, {0x0A05, 0x0A0A}, {0x0A0F, 0x0A10}, {0x0A13, 0x0A28},
502                 {0x0A2A, 0x0A30}, {0x0A32, 0x0A33}, {0x0A35, 0x0A36}, {0x0A38, 0x0A39},
503                 {0x0A3E, 0x0A42}, {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A59, 0x0A5C},
504                 {0x0A81, 0x0A83}, {0x0A85, 0x0A8B}, {0x0A8F, 0x0A91}, {0x0A93, 0x0AA8},
505                 {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3}, {0x0AB5, 0x0AB9}, {0x0ABD, 0x0AC5},
506                 {0x0AC7, 0x0AC9}, {0x0ACB, 0x0ACD}, {0x0B01, 0x0B03}, {0x0B05, 0x0B0C},
507                 {0x0B0F, 0x0B10}, {0x0B13, 0x0B28}, {0x0B2A, 0x0B30}, {0x0B32, 0x0B33},
508                 {0x0B36, 0x0B39}, {0x0B3E, 0x0B43}, {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D},
509                 {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B61}, {0x0B82, 0x0B83}, {0x0B85, 0x0B8A},
510                 {0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, {0x0B99, 0x0B9A}, {0x0B9E, 0x0B9F},
511                 {0x0BA3, 0x0BA4}, {0x0BA8, 0x0BAA}, {0x0BAE, 0x0BB5}, {0x0BB7, 0x0BB9},
512                 {0x0BBE, 0x0BC2}, {0x0BC6, 0x0BC8}, {0x0BCA, 0x0BCD}, {0x0C01, 0x0C03},
513                 {0x0C05, 0x0C0C}, {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C33},
514                 {0x0C35, 0x0C39}, {0x0C3E, 0x0C44}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D},
515                 {0x0C60, 0x0C61}, {0x0C82, 0x0C83}, {0x0C85, 0x0C8C}, {0x0C8E, 0x0C90},
516                 {0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9}, {0x0CBE, 0x0CC4},
517                 {0x0CC6, 0x0CC8}, {0x0CCA, 0x0CCD}, {0x0CE0, 0x0CE1}, {0x0D02, 0x0D03},
518                 {0x0D05, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D28}, {0x0D2A, 0x0D39},
519                 {0x0D3E, 0x0D43}, {0x0D46, 0x0D48}, {0x0D4A, 0x0D4D}, {0x0D60, 0x0D61},
520                 {0x0E01, 0x0E3A}, {0x0E40, 0x0E5B}, {0x0E81, 0x0E82}, {0x0E87, 0x0E88},
521                 {0x0E94, 0x0E97}, {0x0E99, 0x0E9F}, {0x0EA1, 0x0EA3}, {0x0EAA, 0x0EAB},
522                 {0x0EAD, 0x0EAE}, {0x0EB0, 0x0EB9}, {0x0EBB, 0x0EBD}, {0x0EC0, 0x0EC4},
523                 {0x0EC8, 0x0ECD}, {0x0EDC, 0x0EDD}, {0x0F18, 0x0F19}, {0x0F3E, 0x0F47},
524                 {0x0F49, 0x0F69}, {0x0F71, 0x0F84}, {0x0F86, 0x0F8B}, {0x0F90, 0x0F95},
525                 {0x0F99, 0x0FAD}, {0x0FB1, 0x0FB7}, {0x10A0, 0x10C5}, {0x10D0, 0x10F6},
526                 {0x3041, 0x3093}, {0x309B, 0x309C}, {0x30A1, 0x30F6}, {0x30FB, 0x30FC},
527                 {0x3105, 0x312C}, {0x4E00, 0x9FA5}, {0xAC00, 0xD7A3}, {0x0660, 0x0669},
528                 {0x06F0, 0x06F9}, {0x0966, 0x096F}, {0x09E6, 0x09EF}, {0x0A66, 0x0A6F},
529                 {0x0AE6, 0x0AEF}, {0x0B66, 0x0B6F}, {0x0BE7, 0x0BEF}, {0x0C66, 0x0C6F},
530                 {0x0CE6, 0x0CEF}, {0x0D66, 0x0D6F}, {0x0E50, 0x0E59}, {0x0ED0, 0x0ED9},
531                 {0x0F20, 0x0F33}, {0x02B0, 0x02B8}, {0x02BD, 0x02C1}, {0x02D0, 0x02D1},
532                 {0x02E0, 0x02E4}, {0x203F, 0x2040}, {0x210A, 0x2113}, {0x2118, 0x211D},
533                 {0x212A, 0x2131}, {0x2133, 0x2138}, {0x2160, 0x2182}, {0x3005, 0x3007},
534                 {0x3021, 0x3029},
535         };
536         for (size_t i = 0; i < sizeof(ranges)/sizeof(ranges[0]); ++i) {
537                 if (ranges[i][0] <= v && v <= ranges[i][1])
538                         return true;
539         }
540         for (size_t i = 0; i < sizeof(single_chars)/sizeof(single_chars[0]); ++i) {
541                 if (v == single_chars[i])
542                         return true;
543         }
544         return false;
545 }
546
547 static bool is_universal_char_valid_identifier_c11(utf32 const v)
548 {
549         /* C11 Annex D.1 */
550         if (                v == 0x000A8) return true;
551         if (                v == 0x000AA) return true;
552         if (                v == 0x000AD) return true;
553         if (                v == 0x000AF) return true;
554         if (0x000B2 <= v && v <= 0x000B5) return true;
555         if (0x000B7 <= v && v <= 0x000BA) return true;
556         if (0x000BC <= v && v <= 0x000BE) return true;
557         if (0x000C0 <= v && v <= 0x000D6) return true;
558         if (0x000D8 <= v && v <= 0x000F6) return true;
559         if (0x000F8 <= v && v <= 0x000FF) return true;
560         if (0x00100 <= v && v <= 0x0167F) return true;
561         if (0x01681 <= v && v <= 0x0180D) return true;
562         if (0x0180F <= v && v <= 0x01FFF) return true;
563         if (0x0200B <= v && v <= 0x0200D) return true;
564         if (0x0202A <= v && v <= 0x0202E) return true;
565         if (0x0203F <= v && v <= 0x02040) return true;
566         if (                v == 0x02054) return true;
567         if (0x02060 <= v && v <= 0x0206F) return true;
568         if (0x02070 <= v && v <= 0x0218F) return true;
569         if (0x02460 <= v && v <= 0x024FF) return true;
570         if (0x02776 <= v && v <= 0x02793) return true;
571         if (0x02C00 <= v && v <= 0x02DFF) return true;
572         if (0x02E80 <= v && v <= 0x02FFF) return true;
573         if (0x03004 <= v && v <= 0x03007) return true;
574         if (0x03021 <= v && v <= 0x0302F) return true;
575         if (0x03031 <= v && v <= 0x0303F) return true;
576         if (0x03040 <= v && v <= 0x0D7FF) return true;
577         if (0x0F900 <= v && v <= 0x0FD3D) return true;
578         if (0x0FD40 <= v && v <= 0x0FDCF) return true;
579         if (0x0FDF0 <= v && v <= 0x0FE44) return true;
580         if (0x0FE47 <= v && v <= 0x0FFFD) return true;
581         if (0x10000 <= v && v <= 0x1FFFD) return true;
582         if (0x20000 <= v && v <= 0x2FFFD) return true;
583         if (0x30000 <= v && v <= 0x3FFFD) return true;
584         if (0x40000 <= v && v <= 0x4FFFD) return true;
585         if (0x50000 <= v && v <= 0x5FFFD) return true;
586         if (0x60000 <= v && v <= 0x6FFFD) return true;
587         if (0x70000 <= v && v <= 0x7FFFD) return true;
588         if (0x80000 <= v && v <= 0x8FFFD) return true;
589         if (0x90000 <= v && v <= 0x9FFFD) return true;
590         if (0xA0000 <= v && v <= 0xAFFFD) return true;
591         if (0xB0000 <= v && v <= 0xBFFFD) return true;
592         if (0xC0000 <= v && v <= 0xCFFFD) return true;
593         if (0xD0000 <= v && v <= 0xDFFFD) return true;
594         if (0xE0000 <= v && v <= 0xEFFFD) return true;
595         return false;
596 }
597
598 static bool is_universal_char_valid_identifier(utf32 const v)
599 {
600         if (c_mode & _C11)
601                 return is_universal_char_valid_identifier_c11(v);
602         return is_universal_char_valid_identifier_c99(v);
603 }
604
605 static bool is_universal_char_invalid_identifier_start(utf32 const v)
606 {
607         if (! (c_mode & _C11))
608                 return false;
609
610         /* C11 Annex D.2 */
611         if (0x0300 <= v && v <= 0x036F) return true;
612         if (0x1DC0 <= v && v <= 0x1DFF) return true;
613         if (0x20D0 <= v && v <= 0x20FF) return true;
614         if (0xFE20 <= v && v <= 0xFE2F) return true;
615         return false;
616 }
617
618 /**
619  * Parse an escape sequence.
620  */
621 static utf32 parse_escape_sequence(void)
622 {
623         eat('\\');
624
625         utf32 const ec = input.c;
626         next_char();
627
628         switch (ec) {
629         case '"':  return '"';
630         case '\'': return '\'';
631         case '\\': return '\\';
632         case '?': return '\?';
633         case 'a': return '\a';
634         case 'b': return '\b';
635         case 'f': return '\f';
636         case 'n': return '\n';
637         case 'r': return '\r';
638         case 't': return '\t';
639         case 'v': return '\v';
640         case 'x':
641                 return parse_hex_sequence();
642         case '0':
643         case '1':
644         case '2':
645         case '3':
646         case '4':
647         case '5':
648         case '6':
649         case '7':
650                 return parse_octal_sequence(ec);
651         case EOF:
652                 parse_error("reached end of file while parsing escape sequence");
653                 return EOF;
654         /* \E is not documented, but handled, by GCC.  It is acceptable according
655          * to Â§6.11.4, whereas \e is not. */
656         case 'E':
657         case 'e':
658                 if (c_mode & _GNUC)
659                         return 27;   /* hopefully 27 is ALWAYS the code for ESCAPE */
660                 break;
661
662         case 'U': return parse_universal_char(8);
663         case 'u': return parse_universal_char(4);
664
665         default:
666                 break;
667         }
668         /* Â§6.4.4.4:8 footnote 64 */
669         parse_error("unknown escape sequence");
670         return EOF;
671 }
672
673 static const char *identify_string(char *string)
674 {
675         const char *result = strset_insert(&stringset, string);
676         if (result != string) {
677                 obstack_free(&symbol_obstack, string);
678         }
679         return result;
680 }
681
682 static string_t sym_make_string(string_encoding_t const enc)
683 {
684         obstack_1grow(&symbol_obstack, '\0');
685         size_t      const len    = obstack_object_size(&symbol_obstack) - 1;
686         char       *const string = obstack_finish(&symbol_obstack);
687         char const *const result = identify_string(string);
688         return (string_t){ result, len, enc };
689 }
690
691 string_t make_string(char const *const string)
692 {
693         obstack_grow(&symbol_obstack, string, strlen(string));
694         return sym_make_string(STRING_ENCODING_CHAR);
695 }
696
697 static utf32 get_string_encoding_limit(string_encoding_t const enc)
698 {
699         switch (enc) {
700         case STRING_ENCODING_CHAR:   return 0xFF;
701         case STRING_ENCODING_CHAR16: return 0xFFFF;
702         case STRING_ENCODING_CHAR32: return 0xFFFFFFFF;
703         case STRING_ENCODING_UTF8:   return 0xFFFFFFFF;
704         case STRING_ENCODING_WIDE:   return 0xFFFFFFFF; // FIXME depends on settings
705         }
706         panic("invalid string encoding");
707 }
708
709 static void parse_string(utf32 const delimiter, token_kind_t const kind,
710                          string_encoding_t const enc,
711                          char const *const context)
712 {
713         eat(delimiter);
714
715         utf32 const limit = get_string_encoding_limit(enc);
716         while (true) {
717                 switch (input.c) {
718                 case '\\': {
719                         if (resolve_escape_sequences) {
720                                 utf32 const tc = parse_escape_sequence();
721                                 if (tc > limit) {
722                                         warningf(WARN_OTHER, &pp_token.base.source_position, "escape sequence out of range");
723                                 }
724                                 if (enc == STRING_ENCODING_CHAR) {
725                                         obstack_1grow(&symbol_obstack, tc);
726                                 } else {
727                                         obstack_grow_utf8(&symbol_obstack, tc);
728                                 }
729                         } else {
730                                 obstack_1grow(&symbol_obstack, (char)input.c);
731                                 next_char();
732                                 obstack_1grow(&symbol_obstack, (char)input.c);
733                                 next_char();
734                         }
735                         break;
736                 }
737
738                 case NEWLINE:
739                         errorf(&pp_token.base.source_position, "newline while parsing %s", context);
740                         break;
741
742                 case EOF:
743                         errorf(&pp_token.base.source_position, "EOF while parsing %s", context);
744                         goto end_of_string;
745
746                 default:
747                         if (input.c == delimiter) {
748                                 next_char();
749                                 goto end_of_string;
750                         } else {
751                                 obstack_grow_utf8(&symbol_obstack, input.c);
752                                 next_char();
753                                 break;
754                         }
755                 }
756         }
757
758 end_of_string:
759         pp_token.kind           = kind;
760         pp_token.literal.string = sym_make_string(enc);
761 }
762
763 static void parse_string_literal(string_encoding_t const enc)
764 {
765         parse_string('"', T_STRING_LITERAL, enc, "string literal");
766 }
767
768 static void parse_character_constant(string_encoding_t const enc)
769 {
770         parse_string('\'', T_CHARACTER_CONSTANT, enc, "character constant");
771         if (pp_token.literal.string.size == 0) {
772                 parse_error("empty character constant");
773         }
774 }
775
776 #define SYMBOL_CASES_WITHOUT_E_P \
777              '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
778         case 'a': \
779         case 'b': \
780         case 'c': \
781         case 'd': \
782         case 'f': \
783         case 'g': \
784         case 'h': \
785         case 'i': \
786         case 'j': \
787         case 'k': \
788         case 'l': \
789         case 'm': \
790         case 'n': \
791         case 'o': \
792         case 'q': \
793         case 'r': \
794         case 's': \
795         case 't': \
796         case 'u': \
797         case 'v': \
798         case 'w': \
799         case 'x': \
800         case 'y': \
801         case 'z': \
802         case 'A': \
803         case 'B': \
804         case 'C': \
805         case 'D': \
806         case 'F': \
807         case 'G': \
808         case 'H': \
809         case 'I': \
810         case 'J': \
811         case 'K': \
812         case 'L': \
813         case 'M': \
814         case 'N': \
815         case 'O': \
816         case 'Q': \
817         case 'R': \
818         case 'S': \
819         case 'T': \
820         case 'U': \
821         case 'V': \
822         case 'W': \
823         case 'X': \
824         case 'Y': \
825         case 'Z': \
826         case '_'
827
828 #define SYMBOL_CASES \
829              SYMBOL_CASES_WITHOUT_E_P: \
830         case 'e': \
831         case 'p': \
832         case 'E': \
833         case 'P'
834
835 #define DIGIT_CASES \
836              '0':  \
837         case '1':  \
838         case '2':  \
839         case '3':  \
840         case '4':  \
841         case '5':  \
842         case '6':  \
843         case '7':  \
844         case '8':  \
845         case '9'
846
847 static void start_expanding(pp_definition_t *definition)
848 {
849         definition->parent_expansion = current_expansion;
850         definition->expand_pos       = 0;
851         definition->is_expanding     = true;
852         if (definition->list_len > 0) {
853                 definition->token_list[0].had_whitespace
854                         = info.had_whitespace;
855         }
856         current_expansion = definition;
857 }
858
859 static void finished_expanding(pp_definition_t *definition)
860 {
861         assert(definition->is_expanding);
862         pp_definition_t *parent = definition->parent_expansion;
863         definition->parent_expansion = NULL;
864         definition->is_expanding     = false;
865
866         /* stop further expanding once we expanded a parameter used in a
867          * sub macro-call */
868         if (definition == argument_expanding)
869                 argument_expanding = NULL;
870
871         assert(current_expansion == definition);
872         current_expansion = parent;
873 }
874
875 static void grow_string_escaped(struct obstack *obst, const string_t *string, char const *delimiter)
876 {
877         char const *prefix = get_string_encoding_prefix(string->encoding);
878         obstack_printf(obst, "%s%s", prefix, delimiter);
879         size_t      size = string->size;
880         const char *str  = string->begin;
881         if (resolve_escape_sequences) {
882                 obstack_grow(obst, str, size);
883         } else {
884                 for (size_t i = 0; i < size; ++i) {
885                         const char c = str[i];
886                         if (c == '\\' || c == '"')
887                                 obstack_1grow(obst, '\\');
888                         obstack_1grow(obst, c);
889                 }
890         }
891         obstack_printf(obst, "%s", delimiter);
892 }
893
894 static void grow_token(struct obstack *obst, const token_t *token)
895 {
896         switch (token->kind) {
897         case T_NUMBER:
898                 obstack_grow(obst, token->literal.string.begin, token->literal.string.size);
899                 break;
900
901         case T_STRING_LITERAL: {
902                 char const *const delimiter = resolve_escape_sequences ? "\"" : "\\\"";
903                 grow_string_escaped(obst, &token->literal.string, delimiter);
904                 break;
905         }
906
907         case T_CHARACTER_CONSTANT:
908                 grow_string_escaped(obst, &token->literal.string, "'");
909                 break;
910
911         case T_IDENTIFIER:
912         default: {
913                 const char *str = token->base.symbol->string;
914                 size_t      len = strlen(str);
915                 obstack_grow(obst, str, len);
916                 break;
917         }
918         }
919 }
920
921 static void stringify(const pp_definition_t *definition)
922 {
923         assert(obstack_object_size(&symbol_obstack) == 0);
924
925         size_t list_len = definition->list_len;
926         for (size_t p = 0; p < list_len; ++p) {
927                 const saved_token_t *saved = &definition->token_list[p];
928                 if (p > 0 && saved->had_whitespace)
929                         obstack_1grow(&symbol_obstack, ' ');
930                 grow_token(&symbol_obstack, &saved->token);
931         }
932         pp_token.kind           = T_STRING_LITERAL;
933         pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
934 }
935
936 static inline void set_punctuator(token_kind_t const kind)
937 {
938         pp_token.kind        = kind;
939         pp_token.base.symbol = token_symbols[kind];
940 }
941
942 static inline void set_digraph(token_kind_t const kind, symbol_t *const symbol)
943 {
944         pp_token.kind        = kind;
945         pp_token.base.symbol = symbol;
946 }
947
948 /**
949  * returns next final token from a preprocessor macro expansion
950  */
951 static bool expand_next(void)
952 {
953         if (current_expansion == NULL)
954                 return false;
955
956 restart:;
957         size_t pos = current_expansion->expand_pos;
958         if (pos >= current_expansion->list_len) {
959                 finished_expanding(current_expansion);
960                 /* it was the outermost expansion, parse pptoken normally */
961                 if (current_expansion == NULL) {
962                         return false;
963                 }
964                 goto restart;
965         }
966         const saved_token_t *saved = &current_expansion->token_list[pos++];
967         pp_token = saved->token;
968         if (pp_token.kind == '#') {
969                 if (pos < current_expansion->list_len) {
970                         const saved_token_t *next = &current_expansion->token_list[pos];
971                         if (next->token.kind == T_MACRO_PARAMETER) {
972                                 pp_definition_t *def = next->token.macro_parameter.def;
973                                 assert(def != NULL && def->is_parameter);
974                                 stringify(def);
975                                 ++pos;
976                         }
977                 }
978         }
979
980         if (current_expansion->expand_pos > 0)
981                 info.had_whitespace = saved->had_whitespace;
982         current_expansion->expand_pos = pos;
983         pp_token.base.source_position = expansion_pos;
984
985         return true;
986 }
987
988 /**
989  * Returns the next token kind found when continuing the current expansions
990  * without starting new sub-expansions.
991  */
992 static token_kind_t peek_expansion(void)
993 {
994         for (pp_definition_t *e = current_expansion; e; e = e->parent_expansion) {
995                 if (e->expand_pos < e->list_len)
996                         return e->token_list[e->expand_pos].token.kind;
997         }
998         return T_EOF;
999 }
1000
1001 static void skip_line_comment(void)
1002 {
1003         info.had_whitespace = true;
1004         while (true) {
1005                 switch (input.c) {
1006                 case EOF:
1007                         return;
1008
1009                 case '\r':
1010                 case '\n':
1011                         return;
1012
1013                 default:
1014                         next_char();
1015                         break;
1016                 }
1017         }
1018 }
1019
1020 static void skip_multiline_comment(void)
1021 {
1022         info.had_whitespace = true;
1023
1024         source_position_t const start_pos = input.position;
1025         while (true) {
1026                 switch (input.c) {
1027                 case '/':
1028                         next_char();
1029                         if (input.c == '*') {
1030                                 /* TODO: nested comment, warn here */
1031                         }
1032                         break;
1033                 case '*':
1034                         next_char();
1035                         if (input.c == '/') {
1036                                 if (input.position.lineno != input.output_line)
1037                                         info.whitespace_at_line_begin = input.position.colno;
1038                                 next_char();
1039                                 return;
1040                         }
1041                         break;
1042
1043                 case NEWLINE:
1044                         break;
1045
1046                 case EOF:
1047                         errorf(&start_pos, "at end of file while looking for comment end");
1048                         return;
1049
1050                 default:
1051                         next_char();
1052                         break;
1053                 }
1054         }
1055 }
1056
1057 static bool skip_till_newline(bool stop_at_non_whitespace)
1058 {
1059         bool res = false;
1060         while (true) {
1061                 switch (input.c) {
1062                 case ' ':
1063                 case '\t':
1064                         next_char();
1065                         continue;
1066
1067                 case '/':
1068                         next_char();
1069                         if (input.c == '/') {
1070                                 next_char();
1071                                 skip_line_comment();
1072                                 continue;
1073                         } else if (input.c == '*') {
1074                                 next_char();
1075                                 skip_multiline_comment();
1076                                 continue;
1077                         } else {
1078                                 put_back(input.c);
1079                                 input.c = '/';
1080                         }
1081                         return true;
1082
1083                 case NEWLINE:
1084                         return res;
1085
1086                 default:
1087                         if (stop_at_non_whitespace)
1088                                 return false;
1089                         res = true;
1090                         next_char();
1091                         continue;
1092                 }
1093         }
1094 }
1095
1096 static void skip_whitespace(void)
1097 {
1098         while (true) {
1099                 switch (input.c) {
1100                 case ' ':
1101                 case '\t':
1102                         ++info.whitespace_at_line_begin;
1103                         info.had_whitespace = true;
1104                         next_char();
1105                         continue;
1106
1107                 case NEWLINE:
1108                         info.at_line_begin  = true;
1109                         info.had_whitespace = true;
1110                         info.whitespace_at_line_begin = 0;
1111                         continue;
1112
1113                 case '/':
1114                         next_char();
1115                         if (input.c == '/') {
1116                                 next_char();
1117                                 skip_line_comment();
1118                                 continue;
1119                         } else if (input.c == '*') {
1120                                 next_char();
1121                                 skip_multiline_comment();
1122                                 continue;
1123                         } else {
1124                                 put_back(input.c);
1125                                 input.c = '/';
1126                         }
1127                         return;
1128
1129                 default:
1130                         return;
1131                 }
1132         }
1133 }
1134
1135 static inline void eat_pp(pp_token_kind_t const kind)
1136 {
1137         assert(pp_token.base.symbol->pp_ID == kind);
1138         (void) kind;
1139         next_input_token();
1140 }
1141
1142 static inline void eat_token(token_kind_t const kind)
1143 {
1144         assert(pp_token.kind == kind);
1145         (void)kind;
1146         next_input_token();
1147 }
1148
1149 static string_encoding_t identify_encoding_prefix(symbol_t *const sym)
1150 {
1151         if (sym == symbol_L) return STRING_ENCODING_WIDE;
1152         if (c_mode & _C11) {
1153                 if (sym == symbol_U)  return STRING_ENCODING_CHAR32;
1154                 if (sym == symbol_u)  return STRING_ENCODING_CHAR16;
1155                 if (sym == symbol_u8) return STRING_ENCODING_UTF8;
1156         }
1157         return STRING_ENCODING_CHAR;
1158 }
1159
1160 static void parse_symbol(void)
1161 {
1162         assert(obstack_object_size(&symbol_obstack) == 0);
1163         while (true) {
1164                 switch (input.c) {
1165                 case DIGIT_CASES:
1166                 case SYMBOL_CASES:
1167                         obstack_1grow(&symbol_obstack, (char) input.c);
1168                         next_char();
1169                         break;
1170
1171                 case '\\':
1172                         next_char();
1173                         switch (input.c) {
1174                         {
1175                                 unsigned n;
1176                         case 'U': n = 8; goto universal;
1177                         case 'u': n = 4; goto universal;
1178 universal:
1179                                 if (!resolve_escape_sequences) {
1180                                         obstack_1grow(&symbol_obstack, '\\');
1181                                         obstack_1grow(&symbol_obstack, input.c);
1182                                 }
1183                                 next_char();
1184                                 utf32 const v = parse_universal_char(n);
1185                                 if (!is_universal_char_valid_identifier(v)) {
1186                                         if (is_universal_char_valid(v)) {
1187                                                 errorf(&input.position,
1188                                                            "universal character \\%c%0*X is not valid in an identifier",
1189                                                            n == 4 ? 'u' : 'U', (int)n, v);
1190                                         }
1191                                 } else if (obstack_object_size(&symbol_obstack) == 0 && is_universal_char_invalid_identifier_start(v)) {
1192                                         errorf(&input.position,
1193                                                    "universal character \\%c%0*X is not valid as start of an identifier",
1194                                                    n == 4 ? 'u' : 'U', (int)n, v);
1195                                 } else if (resolve_escape_sequences) {
1196                                         obstack_grow_utf8(&symbol_obstack, v);
1197                                 }
1198                                 break;
1199                         }
1200
1201                         default:
1202                                 put_back(input.c);
1203                                 input.c = '\\';
1204                                 goto end_symbol;
1205                         }
1206
1207                 default:
1208 dollar_sign:
1209                         goto end_symbol;
1210                 }
1211         }
1212
1213 end_symbol:
1214         obstack_1grow(&symbol_obstack, '\0');
1215         char *string = obstack_finish(&symbol_obstack);
1216
1217         symbol_t *symbol = symbol_table_insert(string);
1218
1219         /* Might be a prefixed string or character constant: L/U/u/u8"string". */
1220         if (input.c == '"') {
1221                 string_encoding_t const enc = identify_encoding_prefix(symbol);
1222                 if (enc != STRING_ENCODING_CHAR) {
1223                         parse_string_literal(enc);
1224                         return;
1225                 }
1226         } else if (input.c == '\'') {
1227                 string_encoding_t const enc = identify_encoding_prefix(symbol);
1228                 if (enc != STRING_ENCODING_CHAR) {
1229                         if (enc == STRING_ENCODING_UTF8) {
1230                                 errorf(&pp_token.base.source_position, "'u8' is not a valid encoding for a chracter constant");
1231                         }
1232                         parse_character_constant(enc);
1233                         return;
1234                 }
1235         }
1236
1237         pp_token.kind        = symbol->ID;
1238         pp_token.base.symbol = symbol;
1239
1240         /* we can free the memory from symbol obstack if we already had an entry in
1241          * the symbol table */
1242         if (symbol->string != string) {
1243                 obstack_free(&symbol_obstack, string);
1244         }
1245 }
1246
1247 static void parse_number(void)
1248 {
1249         obstack_1grow(&symbol_obstack, (char) input.c);
1250         next_char();
1251
1252         while (true) {
1253                 switch (input.c) {
1254                 case '.':
1255                 case DIGIT_CASES:
1256                 case SYMBOL_CASES_WITHOUT_E_P:
1257                         obstack_1grow(&symbol_obstack, (char) input.c);
1258                         next_char();
1259                         break;
1260
1261                 case 'e':
1262                 case 'p':
1263                 case 'E':
1264                 case 'P':
1265                         obstack_1grow(&symbol_obstack, (char) input.c);
1266                         next_char();
1267                         if (input.c == '+' || input.c == '-') {
1268                                 obstack_1grow(&symbol_obstack, (char) input.c);
1269                                 next_char();
1270                         }
1271                         break;
1272
1273                 default:
1274 dollar_sign:
1275                         goto end_number;
1276                 }
1277         }
1278
1279 end_number:
1280         pp_token.kind           = T_NUMBER;
1281         pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
1282 }
1283
1284 #define MAYBE_PROLOG \
1285         next_char(); \
1286         switch (input.c) {
1287
1288 #define MAYBE(ch, kind) \
1289         case ch: \
1290                 next_char(); \
1291                 set_punctuator(kind); \
1292                 return;
1293
1294 #define MAYBE_DIGRAPH(ch, kind, symbol) \
1295         case ch: \
1296                 next_char(); \
1297                 set_digraph(kind, symbol); \
1298                 return;
1299
1300 #define ELSE_CODE(code) \
1301         default: \
1302                 code \
1303         }
1304
1305 #define ELSE(kind) ELSE_CODE(set_punctuator(kind); return;)
1306
1307 /** identifies and returns the next preprocessing token contained in the
1308  * input stream. No macro expansion is performed. */
1309 static void next_input_token(void)
1310 {
1311         if (next_info.had_whitespace) {
1312                 info = next_info;
1313                 next_info.had_whitespace = false;
1314         } else {
1315                 info.at_line_begin  = false;
1316                 info.had_whitespace = false;
1317         }
1318 restart:
1319         pp_token.base.source_position = input.position;
1320         pp_token.base.symbol          = NULL;
1321
1322         switch (input.c) {
1323         case ' ':
1324         case '\t':
1325                 info.whitespace_at_line_begin++;
1326                 info.had_whitespace = true;
1327                 next_char();
1328                 goto restart;
1329
1330         case NEWLINE:
1331                 info.at_line_begin            = true;
1332                 info.had_whitespace           = true;
1333                 info.whitespace_at_line_begin = 0;
1334                 goto restart;
1335
1336         case SYMBOL_CASES:
1337                 parse_symbol();
1338                 return;
1339
1340         case DIGIT_CASES:
1341                 parse_number();
1342                 return;
1343
1344         case '"':
1345                 parse_string_literal(STRING_ENCODING_CHAR);
1346                 return;
1347
1348         case '\'':
1349                 parse_character_constant(STRING_ENCODING_CHAR);
1350                 return;
1351
1352         case '.':
1353                 MAYBE_PROLOG
1354                         case '0':
1355                         case '1':
1356                         case '2':
1357                         case '3':
1358                         case '4':
1359                         case '5':
1360                         case '6':
1361                         case '7':
1362                         case '8':
1363                         case '9':
1364                                 put_back(input.c);
1365                                 input.c = '.';
1366                                 parse_number();
1367                                 return;
1368
1369                         case '.':
1370                                 MAYBE_PROLOG
1371                                 MAYBE('.', T_DOTDOTDOT)
1372                                 ELSE_CODE(
1373                                         put_back(input.c);
1374                                         input.c = '.';
1375                                         set_punctuator('.');
1376                                         return;
1377                                 )
1378                 ELSE('.')
1379         case '&':
1380                 MAYBE_PROLOG
1381                 MAYBE('&', T_ANDAND)
1382                 MAYBE('=', T_ANDEQUAL)
1383                 ELSE('&')
1384         case '*':
1385                 MAYBE_PROLOG
1386                 MAYBE('=', T_ASTERISKEQUAL)
1387                 ELSE('*')
1388         case '+':
1389                 MAYBE_PROLOG
1390                 MAYBE('+', T_PLUSPLUS)
1391                 MAYBE('=', T_PLUSEQUAL)
1392                 ELSE('+')
1393         case '-':
1394                 MAYBE_PROLOG
1395                 MAYBE('>', T_MINUSGREATER)
1396                 MAYBE('-', T_MINUSMINUS)
1397                 MAYBE('=', T_MINUSEQUAL)
1398                 ELSE('-')
1399         case '!':
1400                 MAYBE_PROLOG
1401                 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
1402                 ELSE('!')
1403         case '/':
1404                 MAYBE_PROLOG
1405                 MAYBE('=', T_SLASHEQUAL)
1406                 case '*':
1407                         next_char();
1408                         skip_multiline_comment();
1409                         goto restart;
1410                 case '/':
1411                         next_char();
1412                         skip_line_comment();
1413                         goto restart;
1414                 ELSE('/')
1415         case '%':
1416                 MAYBE_PROLOG
1417                 MAYBE_DIGRAPH('>', '}', symbol_percentgreater)
1418                 MAYBE('=', T_PERCENTEQUAL)
1419                 case ':':
1420                         MAYBE_PROLOG
1421                         case '%':
1422                                 MAYBE_PROLOG
1423                                 MAYBE_DIGRAPH(':', T_HASHHASH, symbol_percentcolonpercentcolon)
1424                                 ELSE_CODE(
1425                                         put_back(input.c);
1426                                         input.c = '%';
1427                                         goto digraph_percentcolon;
1428                                 )
1429                         ELSE_CODE(
1430 digraph_percentcolon:
1431                                 set_digraph('#', symbol_percentcolon);
1432                                 return;
1433                         )
1434                 ELSE('%')
1435         case '<':
1436                 MAYBE_PROLOG
1437                 MAYBE_DIGRAPH(':', '[', symbol_lesscolon)
1438                 MAYBE_DIGRAPH('%', '{', symbol_lesspercent)
1439                 MAYBE('=', T_LESSEQUAL)
1440                 case '<':
1441                         MAYBE_PROLOG
1442                         MAYBE('=', T_LESSLESSEQUAL)
1443                         ELSE(T_LESSLESS)
1444                 ELSE('<')
1445         case '>':
1446                 MAYBE_PROLOG
1447                 MAYBE('=', T_GREATEREQUAL)
1448                 case '>':
1449                         MAYBE_PROLOG
1450                         MAYBE('=', T_GREATERGREATEREQUAL)
1451                         ELSE(T_GREATERGREATER)
1452                 ELSE('>')
1453         case '^':
1454                 MAYBE_PROLOG
1455                 MAYBE('=', T_CARETEQUAL)
1456                 ELSE('^')
1457         case '|':
1458                 MAYBE_PROLOG
1459                 MAYBE('=', T_PIPEEQUAL)
1460                 MAYBE('|', T_PIPEPIPE)
1461                 ELSE('|')
1462         case ':':
1463                 MAYBE_PROLOG
1464                 MAYBE_DIGRAPH('>', ']', symbol_colongreater)
1465                 case ':':
1466                         if (c_mode & _CXX) {
1467                                 next_char();
1468                                 set_punctuator(T_COLONCOLON);
1469                                 return;
1470                         }
1471                         /* FALLTHROUGH */
1472                 ELSE(':')
1473         case '=':
1474                 MAYBE_PROLOG
1475                 MAYBE('=', T_EQUALEQUAL)
1476                 ELSE('=')
1477         case '#':
1478                 MAYBE_PROLOG
1479                 MAYBE('#', T_HASHHASH)
1480                 ELSE('#')
1481
1482         case '?':
1483         case '[':
1484         case ']':
1485         case '(':
1486         case ')':
1487         case '{':
1488         case '}':
1489         case '~':
1490         case ';':
1491         case ',':
1492                 set_punctuator(input.c);
1493                 next_char();
1494                 return;
1495
1496         case EOF:
1497                 if (input_stack != NULL) {
1498                         fclose(close_pp_input());
1499                         pop_restore_input();
1500                         if (out)
1501                                 fputc('\n', out);
1502                         if (input.c == (utf32)EOF)
1503                                 --input.position.lineno;
1504                         print_line_directive(&input.position, "2");
1505                         goto restart;
1506                 } else {
1507                         info.at_line_begin = true;
1508                         set_punctuator(T_EOF);
1509                 }
1510                 return;
1511
1512         case '\\':
1513                 next_char();
1514                 int next_c = input.c;
1515                 put_back(input.c);
1516                 input.c = '\\';
1517                 if (next_c == 'U' || next_c == 'u') {
1518                         parse_symbol();
1519                         return;
1520                 }
1521                 /* FALLTHROUGH */
1522         default:
1523 dollar_sign:
1524                 if (error_on_unknown_chars) {
1525                         errorf(&pp_token.base.source_position, "unknown character '%lc' found", input.c);
1526                         next_char();
1527                         goto restart;
1528                 } else {
1529                         assert(obstack_object_size(&symbol_obstack) == 0);
1530                         obstack_grow_utf8(&symbol_obstack, input.c);
1531                         obstack_1grow(&symbol_obstack, '\0');
1532                         char     *const string = obstack_finish(&symbol_obstack);
1533                         symbol_t *const symbol = symbol_table_insert(string);
1534                         if (symbol->string != string)
1535                                 obstack_free(&symbol_obstack, string);
1536
1537                         pp_token.kind        = T_UNKNOWN_CHAR;
1538                         pp_token.base.symbol = symbol;
1539                         next_char();
1540                         return;
1541                 }
1542         }
1543 }
1544
1545 static void print_quoted_string(const char *const string)
1546 {
1547         fputc('"', out);
1548         for (const char *c = string; *c != 0; ++c) {
1549                 switch (*c) {
1550                 case '"': fputs("\\\"", out); break;
1551                 case '\\':  fputs("\\\\", out); break;
1552                 case '\a':  fputs("\\a", out); break;
1553                 case '\b':  fputs("\\b", out); break;
1554                 case '\f':  fputs("\\f", out); break;
1555                 case '\n':  fputs("\\n", out); break;
1556                 case '\r':  fputs("\\r", out); break;
1557                 case '\t':  fputs("\\t", out); break;
1558                 case '\v':  fputs("\\v", out); break;
1559                 case '\?':  fputs("\\?", out); break;
1560                 default:
1561                         if (!isprint(*c)) {
1562                                 fprintf(out, "\\%03o", (unsigned)*c);
1563                                 break;
1564                         }
1565                         fputc(*c, out);
1566                         break;
1567                 }
1568         }
1569         fputc('"', out);
1570 }
1571
1572 static void print_line_directive(const source_position_t *pos, const char *add)
1573 {
1574         if (!out)
1575                 return;
1576
1577         fprintf(out, "# %u ", pos->lineno);
1578         print_quoted_string(pos->input_name);
1579         if (add != NULL) {
1580                 fputc(' ', out);
1581                 fputs(add, out);
1582         }
1583         if (pos->is_system_header) {
1584                 fputs(" 3", out);
1585         }
1586
1587         printed_input_name = pos->input_name;
1588         input.output_line  = pos->lineno-1;
1589 }
1590
1591 static bool emit_newlines(void)
1592 {
1593         if (!out)
1594                 return true;
1595
1596         unsigned delta = pp_token.base.source_position.lineno - input.output_line;
1597         if (delta == 0)
1598                 return false;
1599
1600         if (delta >= 9) {
1601                 fputc('\n', out);
1602                 print_line_directive(&pp_token.base.source_position, NULL);
1603                 fputc('\n', out);
1604         } else {
1605                 for (unsigned i = 0; i < delta; ++i) {
1606                         fputc('\n', out);
1607                 }
1608         }
1609         input.output_line = pp_token.base.source_position.lineno;
1610
1611         unsigned whitespace = info.whitespace_at_line_begin;
1612         /* make sure there is at least 1 whitespace before a (macro-expanded)
1613          * '#' at line begin. I'm not sure why this is good, but gcc does it. */
1614         if (pp_token.kind == '#' && whitespace == 0)
1615                 ++whitespace;
1616         for (unsigned i = 0; i < whitespace; ++i)
1617                 fputc(' ', out);
1618
1619         return true;
1620 }
1621
1622 void set_preprocessor_output(FILE *output)
1623 {
1624         out = output;
1625         if (out != NULL) {
1626                 error_on_unknown_chars   = false;
1627                 resolve_escape_sequences = false;
1628         } else {
1629                 error_on_unknown_chars   = true;
1630                 resolve_escape_sequences = true;
1631         }
1632 }
1633
1634 void emit_pp_token(void)
1635 {
1636         if (!emit_newlines() &&
1637             (info.had_whitespace || tokens_would_paste(last_token, pp_token.kind)))
1638                 fputc(' ', out);
1639
1640         switch (pp_token.kind) {
1641         case T_NUMBER:
1642                 fputs(pp_token.literal.string.begin, out);
1643                 break;
1644
1645         case T_STRING_LITERAL:
1646                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1647                 fputc('"', out);
1648                 fputs(pp_token.literal.string.begin, out);
1649                 fputc('"', out);
1650                 break;
1651
1652         case T_CHARACTER_CONSTANT:
1653                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1654                 fputc('\'', out);
1655                 fputs(pp_token.literal.string.begin, out);
1656                 fputc('\'', out);
1657                 break;
1658
1659         case T_MACRO_PARAMETER:
1660                 panic("macro parameter not expanded");
1661
1662         default:
1663                 fputs(pp_token.base.symbol->string, out);
1664                 break;
1665         }
1666         last_token = pp_token.kind;
1667 }
1668
1669 static void eat_pp_directive(void)
1670 {
1671         while (!info.at_line_begin) {
1672                 next_input_token();
1673         }
1674 }
1675
1676 static bool strings_equal(const string_t *string1, const string_t *string2)
1677 {
1678         size_t size = string1->size;
1679         if (size != string2->size)
1680                 return false;
1681
1682         const char *c1 = string1->begin;
1683         const char *c2 = string2->begin;
1684         for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1685                 if (*c1 != *c2)
1686                         return false;
1687         }
1688         return true;
1689 }
1690
1691 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1692 {
1693         if (token1->kind != token2->kind)
1694                 return false;
1695
1696         switch (token1->kind) {
1697         case T_NUMBER:
1698         case T_CHARACTER_CONSTANT:
1699         case T_STRING_LITERAL:
1700                 return strings_equal(&token1->literal.string, &token2->literal.string);
1701
1702         case T_MACRO_PARAMETER:
1703                 return token1->macro_parameter.def->symbol
1704                     == token2->macro_parameter.def->symbol;
1705
1706         default:
1707                 return token1->base.symbol == token2->base.symbol;
1708         }
1709 }
1710
1711 static bool pp_definitions_equal(const pp_definition_t *definition1,
1712                                  const pp_definition_t *definition2)
1713 {
1714         if (definition1->list_len != definition2->list_len)
1715                 return false;
1716
1717         size_t               len = definition1->list_len;
1718         const saved_token_t *t1  = definition1->token_list;
1719         const saved_token_t *t2  = definition2->token_list;
1720         for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1721                 if (!pp_tokens_equal(&t1->token, &t2->token))
1722                         return false;
1723                 if (t1->had_whitespace != t2->had_whitespace)
1724                         return false;
1725         }
1726         return true;
1727 }
1728
1729 static void missing_macro_param_error(void)
1730 {
1731         errorf(&pp_token.base.source_position,
1732                "'#' is not followed by a macro parameter");
1733 }
1734
1735 static bool is_defineable_token(char const *const context)
1736 {
1737         if (info.at_line_begin) {
1738                 errorf(&pp_token.base.source_position, "unexpected end of line after %s", context);
1739         }
1740
1741         symbol_t *const symbol = pp_token.base.symbol;
1742         if (!symbol)
1743                 goto no_ident;
1744
1745         if (pp_token.kind != T_IDENTIFIER) {
1746                 switch (symbol->string[0]) {
1747                 case SYMBOL_CASES:
1748 dollar_sign:
1749                         break;
1750
1751                 default:
1752 no_ident:
1753                         errorf(&pp_token.base.source_position, "expected identifier after %s, got %K", context, &pp_token);
1754                         return false;
1755                 }
1756         }
1757
1758         /* TODO turn this into a flag in pp_def. */
1759         switch (symbol->pp_ID) {
1760         /* Â§6.10.8:4 */
1761         case TP_defined:
1762                 errorf(&pp_token.base.source_position, "%K cannot be used as macro name in %s", &pp_token, context);
1763                 return false;
1764
1765         default:
1766                 return true;
1767         }
1768 }
1769
1770 static void parse_define_directive(void)
1771 {
1772         eat_pp(TP_define);
1773         if (skip_mode) {
1774                 eat_pp_directive();
1775                 return;
1776         }
1777
1778         assert(obstack_object_size(&pp_obstack) == 0);
1779
1780         if (!is_defineable_token("#define"))
1781                 goto error_out;
1782         symbol_t *const symbol = pp_token.base.symbol;
1783
1784         pp_definition_t *new_definition
1785                 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1786         memset(new_definition, 0, sizeof(new_definition[0]));
1787         new_definition->symbol          = symbol;
1788         new_definition->source_position = input.position;
1789
1790         /* this is probably the only place where spaces are significant in the
1791          * lexer (except for the fact that they separate tokens). #define b(x)
1792          * is something else than #define b (x) */
1793         if (input.c == '(') {
1794                 next_input_token();
1795                 eat_token('(');
1796
1797                 while (true) {
1798                         switch (pp_token.kind) {
1799                         case T_DOTDOTDOT:
1800                                 new_definition->is_variadic = true;
1801                                 eat_token(T_DOTDOTDOT);
1802                                 if (pp_token.kind != ')') {
1803                                         errorf(&input.position,
1804                                                         "'...' not at end of macro argument list");
1805                                         goto error_out;
1806                                 }
1807                                 break;
1808
1809                         case T_IDENTIFIER: {
1810                                 pp_definition_t parameter;
1811                                 memset(&parameter, 0, sizeof(parameter));
1812                                 parameter.source_position = pp_token.base.source_position;
1813                                 parameter.symbol          = pp_token.base.symbol;
1814                                 parameter.is_parameter    = true;
1815                                 obstack_grow(&pp_obstack, &parameter, sizeof(parameter));
1816                                 eat_token(T_IDENTIFIER);
1817
1818                                 if (pp_token.kind == ',') {
1819                                         eat_token(',');
1820                                         break;
1821                                 }
1822
1823                                 if (pp_token.kind != ')') {
1824                                         errorf(&pp_token.base.source_position,
1825                                                "expected ',' or ')' after identifier, got %K",
1826                                                &pp_token);
1827                                         goto error_out;
1828                                 }
1829                                 break;
1830                         }
1831
1832                         case ')':
1833                                 eat_token(')');
1834                                 goto finish_argument_list;
1835
1836                         default:
1837                                 errorf(&pp_token.base.source_position,
1838                                        "expected identifier, '...' or ')' in #define argument list, got %K",
1839                                        &pp_token);
1840                                 goto error_out;
1841                         }
1842                 }
1843
1844         finish_argument_list:
1845                 new_definition->has_parameters = true;
1846                 size_t size = obstack_object_size(&pp_obstack);
1847                 new_definition->n_parameters
1848                         = size / sizeof(new_definition->parameters[0]);
1849                 new_definition->parameters = obstack_finish(&pp_obstack);
1850                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1851                         pp_definition_t *param    = &new_definition->parameters[i];
1852                         symbol_t        *symbol   = param->symbol;
1853                         pp_definition_t *previous = symbol->pp_definition;
1854                         if (previous != NULL
1855                             && previous->function_definition == new_definition) {
1856                                 errorf(&param->source_position,
1857                                        "duplicate macro parameter '%Y'", symbol);
1858                                 param->symbol = sym_anonymous;
1859                                 continue;
1860                         }
1861                         param->parent_expansion    = previous;
1862                         param->function_definition = new_definition;
1863                         symbol->pp_definition      = param;
1864                 }
1865         } else {
1866                 next_input_token();
1867         }
1868
1869         /* construct token list */
1870         assert(obstack_object_size(&pp_obstack) == 0);
1871         bool next_must_be_param = false;
1872         while (!info.at_line_begin) {
1873                 if (pp_token.kind == T_IDENTIFIER) {
1874                         const symbol_t  *symbol     = pp_token.base.symbol;
1875                         pp_definition_t *definition = symbol->pp_definition;
1876                         if (definition != NULL
1877                             && definition->function_definition == new_definition) {
1878                             pp_token.kind                = T_MACRO_PARAMETER;
1879                             pp_token.macro_parameter.def = definition;
1880                         }
1881                 }
1882                 if (next_must_be_param && pp_token.kind != T_MACRO_PARAMETER) {
1883                         missing_macro_param_error();
1884                 }
1885                 saved_token_t saved_token;
1886                 saved_token.token = pp_token;
1887                 saved_token.had_whitespace = info.had_whitespace;
1888                 obstack_grow(&pp_obstack, &saved_token, sizeof(saved_token));
1889                 next_must_be_param
1890                         = new_definition->has_parameters && pp_token.kind == '#';
1891                 next_input_token();
1892         }
1893         if (next_must_be_param)
1894                 missing_macro_param_error();
1895
1896         new_definition->list_len   = obstack_object_size(&pp_obstack)
1897                 / sizeof(new_definition->token_list[0]);
1898         new_definition->token_list = obstack_finish(&pp_obstack);
1899
1900         if (new_definition->has_parameters) {
1901                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1902                         pp_definition_t *param      = &new_definition->parameters[i];
1903                         symbol_t        *symbol     = param->symbol;
1904                         if (symbol == sym_anonymous)
1905                                 continue;
1906                         assert(symbol->pp_definition == param);
1907                         assert(param->function_definition == new_definition);
1908                         symbol->pp_definition   = param->parent_expansion;
1909                         param->parent_expansion = NULL;
1910                 }
1911         }
1912
1913         pp_definition_t *old_definition = symbol->pp_definition;
1914         if (old_definition != NULL) {
1915                 if (!pp_definitions_equal(old_definition, new_definition)) {
1916                         warningf(WARN_OTHER, &input.position, "multiple definition of macro '%Y' (first defined %P)", symbol, &old_definition->source_position);
1917                 } else {
1918                         /* reuse the old definition */
1919                         obstack_free(&pp_obstack, new_definition);
1920                         new_definition = old_definition;
1921                 }
1922         }
1923
1924         symbol->pp_definition = new_definition;
1925         return;
1926
1927 error_out:
1928         if (obstack_object_size(&pp_obstack) > 0) {
1929                 char *ptr = obstack_finish(&pp_obstack);
1930                 obstack_free(&pp_obstack, ptr);
1931         }
1932         eat_pp_directive();
1933 }
1934
1935 static void parse_undef_directive(void)
1936 {
1937         eat_pp(TP_undef);
1938         if (skip_mode) {
1939                 eat_pp_directive();
1940                 return;
1941         }
1942
1943         if (!is_defineable_token("#undef")) {
1944                 eat_pp_directive();
1945                 return;
1946         }
1947
1948         pp_token.base.symbol->pp_definition = NULL;
1949         next_input_token();
1950
1951         if (!info.at_line_begin) {
1952                 warningf(WARN_OTHER, &input.position, "extra tokens at end of #undef directive");
1953         }
1954         eat_pp_directive();
1955 }
1956
1957 /** behind an #include we can have the special headername lexems.
1958  * They're only allowed behind an #include so they're not recognized
1959  * by the normal next_preprocessing_token. We handle them as a special
1960  * exception here */
1961 static const char *parse_headername(bool *system_include)
1962 {
1963         if (info.at_line_begin) {
1964                 parse_error("expected headername after #include");
1965                 return NULL;
1966         }
1967
1968         /* check wether we have a "... or <... headername */
1969         source_position_t position = input.position;
1970         switch (input.c) {
1971         {
1972                 utf32 delimiter;
1973         case '<': delimiter = '>'; *system_include = true;  goto parse_name;
1974         case '"': delimiter = '"'; *system_include = false; goto parse_name;
1975 parse_name:
1976                 assert(obstack_object_size(&symbol_obstack) == 0);
1977                 next_char();
1978                 while (true) {
1979                         switch (input.c) {
1980                         case NEWLINE:
1981                         case EOF:
1982                                 {
1983                                         char *dummy = obstack_finish(&symbol_obstack);
1984                                         obstack_free(&symbol_obstack, dummy);
1985                                 }
1986                                 errorf(&pp_token.base.source_position,
1987                                        "header name without closing '%c'", (char)delimiter);
1988                                 return NULL;
1989
1990                         default:
1991                                 if (input.c == delimiter) {
1992                                         next_char();
1993                                         goto finish_headername;
1994                                 } else {
1995                                         obstack_1grow(&symbol_obstack, (char)input.c);
1996                                         next_char();
1997                                 }
1998                                 break;
1999                         }
2000                 }
2001                 /* we should never be here */
2002         }
2003
2004         default:
2005                 next_preprocessing_token();
2006                 if (info.at_line_begin) {
2007                         /* TODO: if we are already in the new line then we parsed more than
2008                          * wanted. We reuse the token, but could produce following errors
2009                          * misbehaviours... */
2010                         goto error_invalid_input;
2011                 }
2012                 if (pp_token.kind == T_STRING_LITERAL) {
2013                         *system_include = false;
2014                         return pp_token.literal.string.begin;
2015                 } else if (pp_token.kind == '<') {
2016                         *system_include = true;
2017                         assert(obstack_object_size(&pp_obstack) == 0);
2018                         while (true) {
2019                                 next_preprocessing_token();
2020                                 if (info.at_line_begin) {
2021                                         /* TODO: we shouldn't have parsed/expanded something on the
2022                                          * next line yet... */
2023                                         char *dummy = obstack_finish(&pp_obstack);
2024                                         obstack_free(&pp_obstack, dummy);
2025                                         goto error_invalid_input;
2026                                 }
2027                                 if (pp_token.kind == '>')
2028                                         break;
2029
2030                                 saved_token_t saved;
2031                                 saved.token          = pp_token;
2032                                 saved.had_whitespace = info.had_whitespace;
2033                                 obstack_grow(&pp_obstack, &saved, sizeof(saved));
2034                         }
2035                         size_t size = obstack_object_size(&pp_obstack);
2036                         assert(size % sizeof(saved_token_t) == 0);
2037                         size_t n_tokens = size / sizeof(saved_token_t);
2038                         saved_token_t *tokens = obstack_finish(&pp_obstack);
2039                         assert(obstack_object_size(&symbol_obstack) == 0);
2040                         for (size_t i = 0; i < n_tokens; ++i) {
2041                                 const saved_token_t *saved = &tokens[i];
2042                                 if (i > 0 && saved->had_whitespace)
2043                                         obstack_1grow(&symbol_obstack, ' ');
2044                                 grow_token(&symbol_obstack, &saved->token);
2045                         }
2046                         obstack_free(&pp_obstack, tokens);
2047                         goto finish_headername;
2048                 } else {
2049 error_invalid_input:
2050                         {
2051                                 char *dummy = obstack_finish(&symbol_obstack);
2052                                 obstack_free(&symbol_obstack, dummy);
2053                         }
2054
2055                         errorf(&pp_token.base.source_position,
2056                                "expected \"FILENAME\" or <FILENAME> after #include");
2057                         return NULL;
2058                 }
2059         }
2060
2061 finish_headername:
2062         obstack_1grow(&symbol_obstack, '\0');
2063         char *const  headername = obstack_finish(&symbol_obstack);
2064         const char  *identified = identify_string(headername);
2065         pp_token.base.source_position = position;
2066         return identified;
2067 }
2068
2069 static bool do_include(bool const bracket_include, bool const include_next, char const *const headername)
2070 {
2071         size_t const        headername_len = strlen(headername);
2072         searchpath_entry_t *entry;
2073         if (include_next) {
2074                 entry = input.path      ? input.path->next
2075                       : bracket_include ? bracket_searchpath.first
2076                       : quote_searchpath.first;
2077         } else {
2078                 if (!bracket_include) {
2079                         /* put dirname of current input on obstack */
2080                         const char *filename   = input.position.input_name;
2081                         const char *last_slash = strrchr(filename, '/');
2082                         const char *full_name;
2083                         if (last_slash != NULL) {
2084                                 size_t len = last_slash - filename;
2085                                 obstack_grow(&symbol_obstack, filename, len + 1);
2086                                 obstack_grow0(&symbol_obstack, headername, headername_len);
2087                                 char *complete_path = obstack_finish(&symbol_obstack);
2088                                 full_name = identify_string(complete_path);
2089                         } else {
2090                                 full_name = headername;
2091                         }
2092
2093                         FILE *file = fopen(full_name, "r");
2094                         if (file != NULL) {
2095                                 switch_pp_input(file, full_name, NULL, false);
2096                                 return true;
2097                         }
2098                         entry = quote_searchpath.first;
2099                 } else {
2100                         entry = bracket_searchpath.first;
2101                 }
2102         }
2103
2104         assert(obstack_object_size(&symbol_obstack) == 0);
2105         /* check searchpath */
2106         for (; entry; entry = entry->next) {
2107             const char *path = entry->path;
2108             size_t      len  = strlen(path);
2109                 obstack_grow(&symbol_obstack, path, len);
2110                 if (path[len-1] != '/')
2111                         obstack_1grow(&symbol_obstack, '/');
2112                 obstack_grow(&symbol_obstack, headername, headername_len+1);
2113
2114                 char *complete_path = obstack_finish(&symbol_obstack);
2115                 FILE *file          = fopen(complete_path, "r");
2116                 if (file != NULL) {
2117                         const char *filename = identify_string(complete_path);
2118                         switch_pp_input(file, filename, entry, entry->is_system_path);
2119                         return true;
2120                 } else {
2121                         obstack_free(&symbol_obstack, complete_path);
2122                 }
2123         }
2124
2125         return false;
2126 }
2127
2128 static void parse_include_directive(bool const include_next)
2129 {
2130         if (skip_mode) {
2131                 eat_pp_directive();
2132                 return;
2133         }
2134
2135         /* do not eat the TP_include, since it would already parse the next token
2136          * which needs special handling here. */
2137         skip_till_newline(true);
2138         bool system_include;
2139         const char *headername = parse_headername(&system_include);
2140         if (headername == NULL) {
2141                 eat_pp_directive();
2142                 return;
2143         }
2144
2145         bool had_nonwhitespace = skip_till_newline(false);
2146         if (had_nonwhitespace) {
2147                 warningf(WARN_OTHER, &input.position,
2148                          "extra tokens at end of #include directive");
2149         }
2150
2151         if (n_inputs > INCLUDE_LIMIT) {
2152                 errorf(&pp_token.base.source_position, "#include nested too deeply");
2153                 /* eat \n or EOF */
2154                 next_input_token();
2155                 return;
2156         }
2157
2158         /* switch inputs */
2159         info.whitespace_at_line_begin = 0;
2160         info.had_whitespace           = false;
2161         info.at_line_begin            = true;
2162         emit_newlines();
2163         push_input();
2164         bool res = do_include(system_include, include_next, headername);
2165         if (res) {
2166                 next_input_token();
2167         } else {
2168                 errorf(&pp_token.base.source_position, "failed including '%s': %s", headername, strerror(errno));
2169                 pop_restore_input();
2170         }
2171 }
2172
2173 static pp_conditional_t *push_conditional(void)
2174 {
2175         pp_conditional_t *conditional
2176                 = obstack_alloc(&pp_obstack, sizeof(*conditional));
2177         memset(conditional, 0, sizeof(*conditional));
2178
2179         conditional->parent = conditional_stack;
2180         conditional_stack   = conditional;
2181
2182         return conditional;
2183 }
2184
2185 static void pop_conditional(void)
2186 {
2187         assert(conditional_stack != NULL);
2188         conditional_stack = conditional_stack->parent;
2189 }
2190
2191 void check_unclosed_conditionals(void)
2192 {
2193         while (conditional_stack != NULL) {
2194                 pp_conditional_t *conditional = conditional_stack;
2195
2196                 if (conditional->in_else) {
2197                         errorf(&conditional->source_position, "unterminated #else");
2198                 } else {
2199                         errorf(&conditional->source_position, "unterminated condition");
2200                 }
2201                 pop_conditional();
2202         }
2203 }
2204
2205 static void parse_ifdef_ifndef_directive(bool const is_ifdef)
2206 {
2207         bool condition;
2208         eat_pp(is_ifdef ? TP_ifdef : TP_ifndef);
2209
2210         if (skip_mode) {
2211                 eat_pp_directive();
2212                 pp_conditional_t *conditional = push_conditional();
2213                 conditional->source_position  = pp_token.base.source_position;
2214                 conditional->skip             = true;
2215                 return;
2216         }
2217
2218         if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
2219                 errorf(&pp_token.base.source_position,
2220                        "expected identifier after #%s, got %K",
2221                        is_ifdef ? "ifdef" : "ifndef", &pp_token);
2222                 eat_pp_directive();
2223
2224                 /* just take the true case in the hope to avoid further errors */
2225                 condition = true;
2226         } else {
2227                 /* evaluate wether we are in true or false case */
2228                 condition = (bool)pp_token.base.symbol->pp_definition == is_ifdef;
2229                 eat_token(T_IDENTIFIER);
2230
2231                 if (!info.at_line_begin) {
2232                         errorf(&pp_token.base.source_position,
2233                                "extra tokens at end of #%s",
2234                                is_ifdef ? "ifdef" : "ifndef");
2235                         eat_pp_directive();
2236                 }
2237         }
2238
2239         pp_conditional_t *conditional = push_conditional();
2240         conditional->source_position  = pp_token.base.source_position;
2241         conditional->condition        = condition;
2242
2243         if (!condition) {
2244                 skip_mode = true;
2245         }
2246 }
2247
2248 static void parse_else_directive(void)
2249 {
2250         eat_pp(TP_else);
2251
2252         if (!info.at_line_begin) {
2253                 if (!skip_mode) {
2254                         warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #else");
2255                 }
2256                 eat_pp_directive();
2257         }
2258
2259         pp_conditional_t *conditional = conditional_stack;
2260         if (conditional == NULL) {
2261                 errorf(&pp_token.base.source_position, "#else without prior #if");
2262                 return;
2263         }
2264
2265         if (conditional->in_else) {
2266                 errorf(&pp_token.base.source_position,
2267                        "#else after #else (condition started %P)",
2268                        &conditional->source_position);
2269                 skip_mode = true;
2270                 return;
2271         }
2272
2273         conditional->in_else = true;
2274         if (!conditional->skip) {
2275                 skip_mode = conditional->condition;
2276         }
2277         conditional->source_position = pp_token.base.source_position;
2278 }
2279
2280 static void parse_endif_directive(void)
2281 {
2282         eat_pp(TP_endif);
2283
2284         if (!info.at_line_begin) {
2285                 if (!skip_mode) {
2286                         warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #endif");
2287                 }
2288                 eat_pp_directive();
2289         }
2290
2291         pp_conditional_t *conditional = conditional_stack;
2292         if (conditional == NULL) {
2293                 errorf(&pp_token.base.source_position, "#endif without prior #if");
2294                 return;
2295         }
2296
2297         if (!conditional->skip) {
2298                 skip_mode = false;
2299         }
2300         pop_conditional();
2301 }
2302
2303 typedef enum stdc_pragma_kind_t {
2304         STDC_UNKNOWN,
2305         STDC_FP_CONTRACT,
2306         STDC_FENV_ACCESS,
2307         STDC_CX_LIMITED_RANGE
2308 } stdc_pragma_kind_t;
2309
2310 typedef enum stdc_pragma_value_kind_t {
2311         STDC_VALUE_UNKNOWN,
2312         STDC_VALUE_ON,
2313         STDC_VALUE_OFF,
2314         STDC_VALUE_DEFAULT
2315 } stdc_pragma_value_kind_t;
2316
2317 static void parse_pragma_directive(void)
2318 {
2319         eat_pp(TP_pragma);
2320         if (skip_mode) {
2321                 eat_pp_directive();
2322                 return;
2323         }
2324
2325         if (pp_token.kind != T_IDENTIFIER) {
2326                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2327                          "expected identifier after #pragma");
2328                 eat_pp_directive();
2329                 return;
2330         }
2331
2332         stdc_pragma_kind_t kind = STDC_UNKNOWN;
2333         if (pp_token.base.symbol->pp_ID == TP_STDC && c_mode & _C99) {
2334                 /* a STDC pragma */
2335                 next_input_token();
2336
2337                 switch (pp_token.base.symbol->pp_ID) {
2338                 case TP_FP_CONTRACT:      kind = STDC_FP_CONTRACT;      break;
2339                 case TP_FENV_ACCESS:      kind = STDC_FENV_ACCESS;      break;
2340                 case TP_CX_LIMITED_RANGE: kind = STDC_CX_LIMITED_RANGE; break;
2341                 default:                  break;
2342                 }
2343                 if (kind != STDC_UNKNOWN) {
2344                         next_input_token();
2345                         stdc_pragma_value_kind_t value;
2346                         switch (pp_token.base.symbol->pp_ID) {
2347                         case TP_ON:      value = STDC_VALUE_ON;      break;
2348                         case TP_OFF:     value = STDC_VALUE_OFF;     break;
2349                         case TP_DEFAULT: value = STDC_VALUE_DEFAULT; break;
2350                         default:         value = STDC_VALUE_UNKNOWN; break;
2351                         }
2352                         if (value == STDC_VALUE_UNKNOWN) {
2353                                 kind = STDC_UNKNOWN;
2354                                 errorf(&pp_token.base.source_position, "bad STDC pragma argument");
2355                         }
2356                 }
2357         }
2358         eat_pp_directive();
2359         if (kind == STDC_UNKNOWN) {
2360                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2361                          "encountered unknown #pragma");
2362         }
2363 }
2364
2365 static void parse_line_directive(void)
2366 {
2367         if (pp_token.kind != T_NUMBER) {
2368                 if (!skip_mode)
2369                         parse_error("expected integer");
2370         } else {
2371                 char      *end;
2372                 long const line = strtol(pp_token.literal.string.begin, &end, 0);
2373                 if (*end == '\0') {
2374                         /* use offset -1 as this is about the next line */
2375                         input.position.lineno = line - 1;
2376                         /* force output of line */
2377                         input.output_line = input.position.lineno - 20;
2378                 } else {
2379                         if (!skip_mode) {
2380                                 errorf(&input.position, "'%S' is not a valid line number",
2381                                            &pp_token.literal.string);
2382                         }
2383                 }
2384                 next_input_token();
2385                 if (info.at_line_begin)
2386                         return;
2387         }
2388         if (pp_token.kind == T_STRING_LITERAL
2389             && pp_token.literal.string.encoding == STRING_ENCODING_CHAR) {
2390                 input.position.input_name       = pp_token.literal.string.begin;
2391                 input.position.is_system_header = false;
2392                 next_input_token();
2393
2394                 /* attempt to parse numeric flags as outputted by gcc preprocessor */
2395                 while (!info.at_line_begin && pp_token.kind == T_NUMBER) {
2396                         /* flags:
2397                          * 1 - indicates start of a new file
2398                          * 2 - indicates return from a file
2399                          * 3 - indicates system header
2400                          * 4 - indicates implicit extern "C" in C++ mode
2401                          *
2402                          * currently we're only interested in "3"
2403                          */
2404                         if (streq(pp_token.literal.string.begin, "3")) {
2405                                 input.position.is_system_header = true;
2406                         }
2407                         next_input_token();
2408                 }
2409         }
2410
2411         eat_pp_directive();
2412 }
2413
2414 static void parse_error_directive(void)
2415 {
2416         if (skip_mode) {
2417                 eat_pp_directive();
2418                 return;
2419         }
2420
2421         bool const old_resolve_escape_sequences = resolve_escape_sequences;
2422         resolve_escape_sequences = false;
2423
2424         source_position_t const pos = pp_token.base.source_position;
2425         do {
2426                 if (info.had_whitespace && obstack_object_size(&pp_obstack) != 0)
2427                         obstack_1grow(&pp_obstack, ' ');
2428
2429                 switch (pp_token.kind) {
2430                 case T_NUMBER: {
2431                         string_t const *const str = &pp_token.literal.string;
2432                         obstack_grow(&pp_obstack, str->begin, str->size);
2433                         break;
2434                 }
2435
2436                 {
2437                         char delim;
2438                 case T_STRING_LITERAL:     delim =  '"'; goto string;
2439                 case T_CHARACTER_CONSTANT: delim = '\''; goto string;
2440 string:;
2441                         string_t const *const str = &pp_token.literal.string;
2442                         char     const *const enc = get_string_encoding_prefix(str->encoding);
2443                         obstack_printf(&pp_obstack, "%s%c%s%c", enc, delim, str->begin, delim);
2444                         break;
2445                 }
2446
2447                 default: {
2448                         char const *const str = pp_token.base.symbol->string;
2449                         obstack_grow(&pp_obstack, str, strlen(str));
2450                         break;
2451                 }
2452                 }
2453
2454                 next_input_token();
2455         } while (!info.at_line_begin);
2456
2457         resolve_escape_sequences = old_resolve_escape_sequences;
2458
2459         obstack_1grow(&pp_obstack, '\0');
2460         char *const str = obstack_finish(&pp_obstack);
2461         errorf(&pos, "#%s", str);
2462         obstack_free(&pp_obstack, str);
2463 }
2464
2465 static void parse_preprocessing_directive(void)
2466 {
2467         eat_token('#');
2468
2469         if (info.at_line_begin) {
2470                 /* empty directive */
2471                 return;
2472         }
2473
2474         if (pp_token.base.symbol) {
2475                 switch (pp_token.base.symbol->pp_ID) {
2476                 case TP_define:       parse_define_directive();            break;
2477                 case TP_else:         parse_else_directive();              break;
2478                 case TP_endif:        parse_endif_directive();             break;
2479                 case TP_error:        parse_error_directive();             break;
2480                 case TP_ifdef:        parse_ifdef_ifndef_directive(true);  break;
2481                 case TP_ifndef:       parse_ifdef_ifndef_directive(false); break;
2482                 case TP_include:      parse_include_directive(false);      break;
2483                 case TP_include_next: parse_include_directive(true);       break;
2484                 case TP_line:         next_input_token(); goto line_directive;
2485                 case TP_pragma:       parse_pragma_directive();            break;
2486                 case TP_undef:        parse_undef_directive();             break;
2487                 default:              goto skip;
2488                 }
2489         } else if (pp_token.kind == T_NUMBER) {
2490 line_directive:
2491                 parse_line_directive();
2492         } else {
2493 skip:
2494                 if (!skip_mode) {
2495                         errorf(&pp_token.base.source_position, "invalid preprocessing directive #%K", &pp_token);
2496                 }
2497                 eat_pp_directive();
2498         }
2499
2500         assert(info.at_line_begin);
2501 }
2502
2503 static void finish_current_argument(void)
2504 {
2505         if (current_argument == NULL)
2506                 return;
2507         size_t size = obstack_object_size(&pp_obstack);
2508         current_argument->list_len   = size/sizeof(current_argument->token_list[0]);
2509         current_argument->token_list = obstack_finish(&pp_obstack);
2510 }
2511
2512 void next_preprocessing_token(void)
2513 {
2514 restart:
2515         if (!expand_next()) {
2516                 do {
2517                         next_input_token();
2518                         while (pp_token.kind == '#' && info.at_line_begin) {
2519                                 parse_preprocessing_directive();
2520                         }
2521                 } while (skip_mode && pp_token.kind != T_EOF);
2522         }
2523
2524         const token_kind_t kind = pp_token.kind;
2525         if (current_call == NULL || argument_expanding != NULL) {
2526                 symbol_t *const symbol = pp_token.base.symbol;
2527                 if (symbol) {
2528                         if (kind == T_MACRO_PARAMETER) {
2529                                 assert(current_expansion != NULL);
2530                                 start_expanding(pp_token.macro_parameter.def);
2531                                 goto restart;
2532                         }
2533
2534                         pp_definition_t *const pp_definition = symbol->pp_definition;
2535                         if (pp_definition != NULL && !pp_definition->is_expanding) {
2536                                 if (pp_definition->has_parameters) {
2537
2538                                         /* check if next token is a '(' */
2539                                         whitespace_info_t old_info   = info;
2540                                         token_kind_t      next_token = peek_expansion();
2541                                         if (next_token == T_EOF) {
2542                                                 info.at_line_begin  = false;
2543                                                 info.had_whitespace = false;
2544                                                 skip_whitespace();
2545                                                 if (input.c == '(') {
2546                                                         next_token = '(';
2547                                                 }
2548                                         }
2549
2550                                         if (next_token == '(') {
2551                                                 if (current_expansion == NULL)
2552                                                         expansion_pos = pp_token.base.source_position;
2553                                                 next_preprocessing_token();
2554                                                 assert(pp_token.kind == '(');
2555
2556                                                 pp_definition->parent_expansion = current_expansion;
2557                                                 current_call              = pp_definition;
2558                                                 current_call->expand_pos  = 0;
2559                                                 current_call->expand_info = old_info;
2560                                                 if (current_call->n_parameters > 0) {
2561                                                         current_argument = &current_call->parameters[0];
2562                                                         assert(argument_brace_count == 0);
2563                                                 }
2564                                                 goto restart;
2565                                         } else {
2566                                                 /* skip_whitespaces() skipped newlines and whitespace,
2567                                                  * remember results for next token */
2568                                                 next_info = info;
2569                                                 info      = old_info;
2570                                                 return;
2571                                         }
2572                                 } else {
2573                                         if (current_expansion == NULL)
2574                                                 expansion_pos = pp_token.base.source_position;
2575                                         start_expanding(pp_definition);
2576                                         goto restart;
2577                                 }
2578                         }
2579                 }
2580         }
2581
2582         if (current_call != NULL) {
2583                 /* current_call != NULL */
2584                 if (kind == '(') {
2585                         ++argument_brace_count;
2586                 } else if (kind == ')') {
2587                         if (argument_brace_count > 0) {
2588                                 --argument_brace_count;
2589                         } else {
2590                                 finish_current_argument();
2591                                 assert(kind == ')');
2592                                 start_expanding(current_call);
2593                                 info = current_call->expand_info;
2594                                 current_call     = NULL;
2595                                 current_argument = NULL;
2596                                 goto restart;
2597                         }
2598                 } else if (kind == ',' && argument_brace_count == 0) {
2599                         finish_current_argument();
2600                         current_call->expand_pos++;
2601                         if (current_call->expand_pos >= current_call->n_parameters) {
2602                                 errorf(&pp_token.base.source_position,
2603                                            "too many arguments passed for macro '%Y'",
2604                                            current_call->symbol);
2605                                 current_argument = NULL;
2606                         } else {
2607                                 current_argument
2608                                         = &current_call->parameters[current_call->expand_pos];
2609                         }
2610                         goto restart;
2611                 } else if (kind == T_MACRO_PARAMETER) {
2612                         /* parameters have to be fully expanded before being used as
2613                          * parameters for another macro-call */
2614                         assert(current_expansion != NULL);
2615                         pp_definition_t *argument = pp_token.macro_parameter.def;
2616                         argument_expanding = argument;
2617                         start_expanding(argument);
2618                         goto restart;
2619                 } else if (kind == T_EOF) {
2620                         errorf(&expansion_pos,
2621                                "reached end of file while parsing arguments for '%Y'",
2622                                current_call->symbol);
2623                         return;
2624                 }
2625                 if (current_argument != NULL) {
2626                         saved_token_t saved;
2627                         saved.token = pp_token;
2628                         saved.had_whitespace = info.had_whitespace;
2629                         obstack_grow(&pp_obstack, &saved, sizeof(saved));
2630                 }
2631                 goto restart;
2632         }
2633 }
2634
2635 void append_include_path(searchpath_t *paths, const char *path)
2636 {
2637         searchpath_entry_t *entry = OALLOCZ(&config_obstack, searchpath_entry_t);
2638         entry->path           = path;
2639         entry->is_system_path = paths->is_system_path;
2640
2641         *paths->anchor = entry;
2642         paths->anchor  = &entry->next;
2643 }
2644
2645 static void append_env_paths(searchpath_t *paths, const char *envvar)
2646 {
2647         const char *val = getenv(envvar);
2648         if (val != NULL && *val != '\0') {
2649                 const char *begin = val;
2650                 const char *c;
2651                 do {
2652                         c = begin;
2653                         while (*c != '\0' && *c != ':')
2654                                 ++c;
2655
2656                         size_t len = c-begin;
2657                         if (len == 0) {
2658                                 /* use "." for gcc compatibility (Matze: I would expect that
2659                                  * nothing happens for an empty entry...) */
2660                                 append_include_path(paths, ".");
2661                         } else {
2662                                 char *const string = obstack_copy0(&config_obstack, begin, len);
2663                                 append_include_path(paths, string);
2664                         }
2665
2666                         begin = c+1;
2667                         /* skip : */
2668                         if (*begin == ':')
2669                                 ++begin;
2670                 } while(*c != '\0');
2671         }
2672 }
2673
2674 static void append_searchpath(searchpath_t *path, const searchpath_t *append)
2675 {
2676         *path->anchor = append->first;
2677 }
2678
2679 static void setup_include_path(void)
2680 {
2681         /* built-in paths */
2682         append_include_path(&system_searchpath, "/usr/include");
2683
2684         /* parse environment variable */
2685         append_env_paths(&bracket_searchpath, "CPATH");
2686         append_env_paths(&system_searchpath,
2687                          c_mode & _CXX ? "CPLUS_INCLUDE_PATH" : "C_INCLUDE_PATH");
2688
2689         /* append system search path to bracket searchpath */
2690         append_searchpath(&system_searchpath,  &after_searchpath);
2691         append_searchpath(&bracket_searchpath, &system_searchpath);
2692         append_searchpath(&quote_searchpath, &bracket_searchpath);
2693 }
2694
2695 static void input_error(unsigned const delta_lines, unsigned const delta_cols, char const *const message)
2696 {
2697         source_position_t pos = pp_token.base.source_position;
2698         pos.lineno += delta_lines;
2699         pos.colno  += delta_cols;
2700         errorf(&pos, "%s", message);
2701 }
2702
2703 void init_include_paths(void)
2704 {
2705         obstack_init(&config_obstack);
2706 }
2707
2708 void init_preprocessor(void)
2709 {
2710         init_symbols();
2711
2712         obstack_init(&pp_obstack);
2713         obstack_init(&input_obstack);
2714         strset_init(&stringset);
2715
2716         setup_include_path();
2717
2718         set_input_error_callback(input_error);
2719 }
2720
2721 void exit_preprocessor(void)
2722 {
2723         obstack_free(&input_obstack, NULL);
2724         obstack_free(&pp_obstack, NULL);
2725         obstack_free(&config_obstack, NULL);
2726
2727         strset_destroy(&stringset);
2728 }
2729
2730 int pptest_main(int argc, char **argv);
2731 int pptest_main(int argc, char **argv)
2732 {
2733         init_symbol_table();
2734         init_include_paths();
2735         init_preprocessor();
2736         init_tokens();
2737
2738         error_on_unknown_chars   = false;
2739         resolve_escape_sequences = false;
2740
2741         /* simplistic commandline parser */
2742         const char *filename = NULL;
2743         const char *output = NULL;
2744         for (int i = 1; i < argc; ++i) {
2745                 const char *opt = argv[i];
2746                 if (streq(opt, "-I")) {
2747                         append_include_path(&bracket_searchpath, argv[++i]);
2748                         continue;
2749                 } else if (streq(opt, "-E")) {
2750                         /* ignore */
2751                 } else if (streq(opt, "-o")) {
2752                         output = argv[++i];
2753                         continue;
2754                 } else if (opt[0] == '-') {
2755                         fprintf(stderr, "Unknown option '%s'\n", opt);
2756                 } else {
2757                         if (filename != NULL)
2758                                 fprintf(stderr, "Multiple inputs not supported\n");
2759                         filename = argv[i];
2760                 }
2761         }
2762         if (filename == NULL) {
2763                 fprintf(stderr, "No input specified\n");
2764                 return 1;
2765         }
2766
2767         if (output == NULL) {
2768                 out = stdout;
2769         } else {
2770                 out = fopen(output, "w");
2771                 if (out == NULL) {
2772                         fprintf(stderr, "Couldn't open output '%s'\n", output);
2773                         return 1;
2774                 }
2775         }
2776
2777         /* just here for gcc compatibility */
2778         fprintf(out, "# 1 \"%s\"\n", filename);
2779         fprintf(out, "# 1 \"<built-in>\"\n");
2780         fprintf(out, "# 1 \"<command-line>\"\n");
2781
2782         FILE *file = fopen(filename, "r");
2783         if (file == NULL) {
2784                 fprintf(stderr, "Couldn't open input '%s'\n", filename);
2785                 return 1;
2786         }
2787         switch_pp_input(file, filename, NULL, false);
2788
2789         for (;;) {
2790                 next_preprocessing_token();
2791                 if (pp_token.kind == T_EOF)
2792                         break;
2793                 emit_pp_token();
2794         }
2795
2796         fputc('\n', out);
2797         check_unclosed_conditionals();
2798         fclose(close_pp_input());
2799         if (out != stdout)
2800                 fclose(out);
2801
2802         exit_tokens();
2803         exit_preprocessor();
2804         exit_symbol_table();
2805
2806         return 0;
2807 }