Record and handle, whether a search path is a system path.
[cparser] / preprocessor.c
1 #include <config.h>
2
3 #include <assert.h>
4 #include <errno.h>
5 #include <string.h>
6 #include <stdbool.h>
7 #include <ctype.h>
8
9 #include "preprocessor.h"
10 #include "token_t.h"
11 #include "symbol_t.h"
12 #include "adt/util.h"
13 #include "adt/error.h"
14 #include "adt/strutil.h"
15 #include "adt/strset.h"
16 #include "lang_features.h"
17 #include "diagnostic.h"
18 #include "string_rep.h"
19 #include "input.h"
20
21 #define MAX_PUTBACK 3
22 #define INCLUDE_LIMIT 199  /* 199 is for gcc "compatibility" */
23
24 typedef struct saved_token_t {
25         token_t token;
26         bool    had_whitespace;
27 } saved_token_t;
28
29 typedef struct whitespace_info_t {
30         /** current token had whitespace in front of it */
31         bool     had_whitespace;
32         /** current token is at the beginning of a line.
33          * => a "#" at line begin starts a preprocessing directive. */
34         bool     at_line_begin;
35         /** number of spaces before the first token in a line */
36         unsigned whitespace_at_line_begin;
37 } whitespace_info_t;
38
39 struct pp_definition_t {
40         symbol_t          *symbol;
41         source_position_t  source_position;
42         pp_definition_t   *parent_expansion;
43         size_t             expand_pos;
44         whitespace_info_t  expand_info;
45         bool               is_variadic    : 1;
46         bool               is_expanding   : 1;
47         bool               has_parameters : 1;
48         bool               is_parameter   : 1;
49         pp_definition_t   *function_definition;
50         size_t             n_parameters;
51         pp_definition_t   *parameters;
52
53         /* replacement */
54         size_t             list_len;
55         saved_token_t     *token_list;
56 };
57
58 typedef struct pp_conditional_t pp_conditional_t;
59 struct pp_conditional_t {
60         source_position_t  source_position;
61         bool               condition;
62         bool               in_else;
63         /** conditional in skip mode (then+else gets skipped) */
64         bool               skip;
65         pp_conditional_t  *parent;
66 };
67
68 typedef struct pp_input_t pp_input_t;
69 struct pp_input_t {
70         FILE               *file;
71         input_t            *input;
72         utf32               c;
73         utf32               buf[1024+MAX_PUTBACK];
74         const utf32        *bufend;
75         const utf32        *bufpos;
76         source_position_t   position;
77         pp_input_t         *parent;
78         unsigned            output_line;
79         searchpath_entry_t *path;
80 };
81
82 struct searchpath_entry_t {
83         const char         *path;
84         searchpath_entry_t *next;
85         bool                is_system_path;
86 };
87
88 static pp_input_t      input;
89
90 static pp_input_t     *input_stack;
91 static unsigned        n_inputs;
92 static struct obstack  input_obstack;
93
94 static pp_conditional_t *conditional_stack;
95
96 token_t                  pp_token;
97 bool                     allow_dollar_in_symbol   = true;
98 static bool              resolve_escape_sequences = true;
99 static bool              error_on_unknown_chars   = true;
100 static bool              skip_mode;
101 static FILE             *out;
102 static struct obstack    pp_obstack;
103 static struct obstack    config_obstack;
104 static const char       *printed_input_name = NULL;
105 static source_position_t expansion_pos;
106 static pp_definition_t  *current_expansion  = NULL;
107 static pp_definition_t  *current_call       = NULL;
108 static pp_definition_t  *current_argument   = NULL;
109 static pp_definition_t  *argument_expanding = NULL;
110 static unsigned          argument_brace_count;
111 static strset_t          stringset;
112 static token_kind_t      last_token;
113
114 struct searchpath_t {
115         searchpath_entry_t  *first;
116         searchpath_entry_t **anchor;
117         bool                 is_system_path;
118 };
119
120 searchpath_t bracket_searchpath = { NULL, &bracket_searchpath.first, false };
121 searchpath_t quote_searchpath   = { NULL, &quote_searchpath.first,   false };
122 searchpath_t system_searchpath  = { NULL, &system_searchpath.first,  true  };
123
124 static whitespace_info_t next_info; /* valid if had_whitespace is true */
125 static whitespace_info_t info;
126
127 static inline void next_char(void);
128 static void next_input_token(void);
129 static void print_line_directive(const source_position_t *pos, const char *add);
130
131 static symbol_t *symbol_colongreater;
132 static symbol_t *symbol_lesscolon;
133 static symbol_t *symbol_lesspercent;
134 static symbol_t *symbol_percentcolon;
135 static symbol_t *symbol_percentcolonpercentcolon;
136 static symbol_t *symbol_percentgreater;
137
138 static void init_symbols(void)
139 {
140         symbol_colongreater             = symbol_table_insert(":>");
141         symbol_lesscolon                = symbol_table_insert("<:");
142         symbol_lesspercent              = symbol_table_insert("<%");
143         symbol_percentcolon             = symbol_table_insert("%:");
144         symbol_percentcolonpercentcolon = symbol_table_insert("%:%:");
145         symbol_percentgreater           = symbol_table_insert("%>");
146 }
147
148 void switch_pp_input(FILE *const file, char const *const filename, searchpath_entry_t *const path, bool const is_system_header)
149 {
150         input.file                      = file;
151         input.input                     = input_from_stream(file, NULL);
152         input.bufend                    = NULL;
153         input.bufpos                    = NULL;
154         input.output_line               = 0;
155         input.position.input_name       = filename;
156         input.position.lineno           = 1;
157         input.position.is_system_header = is_system_header;
158         input.path                      = path;
159
160         /* indicate that we're at a new input */
161         print_line_directive(&input.position, input_stack != NULL ? "1" : NULL);
162
163         /* place a virtual '\n' so we realize we're at line begin */
164         input.position.lineno = 0;
165         input.c               = '\n';
166 }
167
168 FILE *close_pp_input(void)
169 {
170         input_free(input.input);
171
172         FILE* const file = input.file;
173         assert(file);
174
175         input.input  = NULL;
176         input.file   = NULL;
177         input.bufend = NULL;
178         input.bufpos = NULL;
179         input.c      = EOF;
180
181         return file;
182 }
183
184 static void push_input(void)
185 {
186         pp_input_t *const saved_input = obstack_copy(&input_obstack, &input, sizeof(input));
187
188         /* adjust buffer positions */
189         if (input.bufpos != NULL)
190                 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
191         if (input.bufend != NULL)
192                 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
193
194         saved_input->parent = input_stack;
195         input_stack         = saved_input;
196         ++n_inputs;
197 }
198
199 static void pop_restore_input(void)
200 {
201         assert(n_inputs > 0);
202         assert(input_stack != NULL);
203
204         pp_input_t *saved_input = input_stack;
205
206         memcpy(&input, saved_input, sizeof(input));
207         input.parent = NULL;
208
209         /* adjust buffer positions */
210         if (saved_input->bufpos != NULL)
211                 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
212         if (saved_input->bufend != NULL)
213                 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
214
215         input_stack = saved_input->parent;
216         obstack_free(&input_obstack, saved_input);
217         --n_inputs;
218 }
219
220 /**
221  * Prints a parse error message at the current token.
222  *
223  * @param msg   the error message
224  */
225 static void parse_error(const char *msg)
226 {
227         errorf(&pp_token.base.source_position,  "%s", msg);
228 }
229
230 static inline void next_real_char(void)
231 {
232         assert(input.bufpos <= input.bufend);
233         if (input.bufpos >= input.bufend) {
234                 size_t const n = decode(input.input, input.buf + MAX_PUTBACK, lengthof(input.buf) - MAX_PUTBACK);
235                 if (n == 0) {
236                         input.c = EOF;
237                         return;
238                 }
239                 input.bufpos = input.buf + MAX_PUTBACK;
240                 input.bufend = input.bufpos + n;
241         }
242         input.c = *input.bufpos++;
243         ++input.position.colno;
244 }
245
246 /**
247  * Put a character back into the buffer.
248  *
249  * @param pc  the character to put back
250  */
251 static inline void put_back(utf32 const pc)
252 {
253         assert(input.bufpos > input.buf);
254         *(--input.bufpos - input.buf + input.buf) = (char) pc;
255         --input.position.colno;
256 }
257
258 #define NEWLINE \
259         '\r': \
260                 next_char(); \
261                 if (input.c == '\n') { \
262         case '\n': \
263                         next_char(); \
264                 } \
265                 ++input.position.lineno; \
266                 input.position.colno = 1; \
267                 goto newline; \
268                 newline // Let it look like an ordinary case label.
269
270 #define eat(c_type) (assert(input.c == c_type), next_char())
271
272 static void maybe_concat_lines(void)
273 {
274         eat('\\');
275
276         switch (input.c) {
277         case NEWLINE:
278                 info.whitespace_at_line_begin = 0;
279                 return;
280
281         default:
282                 break;
283         }
284
285         put_back(input.c);
286         input.c = '\\';
287 }
288
289 /**
290  * Set c to the next input character, ie.
291  * after expanding trigraphs.
292  */
293 static inline void next_char(void)
294 {
295         next_real_char();
296
297         /* filter trigraphs and concatenated lines */
298         if (UNLIKELY(input.c == '\\')) {
299                 maybe_concat_lines();
300                 goto end_of_next_char;
301         }
302
303         if (LIKELY(input.c != '?'))
304                 goto end_of_next_char;
305
306         next_real_char();
307         if (LIKELY(input.c != '?')) {
308                 put_back(input.c);
309                 input.c = '?';
310                 goto end_of_next_char;
311         }
312
313         next_real_char();
314         switch (input.c) {
315         case '=': input.c = '#'; break;
316         case '(': input.c = '['; break;
317         case '/': input.c = '\\'; maybe_concat_lines(); break;
318         case ')': input.c = ']'; break;
319         case '\'': input.c = '^'; break;
320         case '<': input.c = '{'; break;
321         case '!': input.c = '|'; break;
322         case '>': input.c = '}'; break;
323         case '-': input.c = '~'; break;
324         default:
325                 put_back(input.c);
326                 put_back('?');
327                 input.c = '?';
328                 break;
329         }
330
331 end_of_next_char:;
332 #ifdef DEBUG_CHARS
333         printf("nchar '%c'\n", input.c);
334 #endif
335 }
336
337
338
339 /**
340  * Returns true if the given char is a octal digit.
341  *
342  * @param char  the character to check
343  */
344 static inline bool is_octal_digit(int chr)
345 {
346         switch (chr) {
347         case '0':
348         case '1':
349         case '2':
350         case '3':
351         case '4':
352         case '5':
353         case '6':
354         case '7':
355                 return true;
356         default:
357                 return false;
358         }
359 }
360
361 /**
362  * Returns the value of a digit.
363  * The only portable way to do it ...
364  */
365 static int digit_value(int digit)
366 {
367         switch (digit) {
368         case '0': return 0;
369         case '1': return 1;
370         case '2': return 2;
371         case '3': return 3;
372         case '4': return 4;
373         case '5': return 5;
374         case '6': return 6;
375         case '7': return 7;
376         case '8': return 8;
377         case '9': return 9;
378         case 'a':
379         case 'A': return 10;
380         case 'b':
381         case 'B': return 11;
382         case 'c':
383         case 'C': return 12;
384         case 'd':
385         case 'D': return 13;
386         case 'e':
387         case 'E': return 14;
388         case 'f':
389         case 'F': return 15;
390         default:
391                 panic("wrong character given");
392         }
393 }
394
395 /**
396  * Parses an octal character sequence.
397  *
398  * @param first_digit  the already read first digit
399  */
400 static utf32 parse_octal_sequence(const utf32 first_digit)
401 {
402         assert(is_octal_digit(first_digit));
403         utf32 value = digit_value(first_digit);
404         if (!is_octal_digit(input.c)) return value;
405         value = 8 * value + digit_value(input.c);
406         next_char();
407         if (!is_octal_digit(input.c)) return value;
408         value = 8 * value + digit_value(input.c);
409         next_char();
410         return value;
411
412 }
413
414 /**
415  * Parses a hex character sequence.
416  */
417 static utf32 parse_hex_sequence(void)
418 {
419         utf32 value = 0;
420         while (isxdigit(input.c)) {
421                 value = 16 * value + digit_value(input.c);
422                 next_char();
423         }
424         return value;
425 }
426
427 static bool is_universal_char_valid(utf32 const v)
428 {
429         /* C11 Â§6.4.3:2 */
430         if (v < 0xA0U && v != 0x24 && v != 0x40 && v != 0x60)
431                 return false;
432         if (0xD800 <= v && v <= 0xDFFF)
433                 return false;
434         return true;
435 }
436
437 static utf32 parse_universal_char(unsigned const n_digits)
438 {
439         utf32 v = 0;
440         for (unsigned k = n_digits; k != 0; --k) {
441                 if (isxdigit(input.c)) {
442                         v = 16 * v + digit_value(input.c);
443                         if (!resolve_escape_sequences)
444                                 obstack_1grow(&symbol_obstack, input.c);
445                         next_char();
446                 } else {
447                         errorf(&input.position,
448                                "short universal character name, expected %u more digits",
449                                    k);
450                         break;
451                 }
452         }
453         if (!is_universal_char_valid(v)) {
454                 errorf(&input.position,
455                        "\\%c%0*X is not a valid universal character name",
456                        n_digits == 4 ? 'u' : 'U', (int)n_digits, v);
457         }
458         return v;
459 }
460
461 static bool is_universal_char_valid_identifier(utf32 const v)
462 {
463         /* C11 Annex D.1 */
464         if (                v == 0x000A8) return true;
465         if (                v == 0x000AA) return true;
466         if (                v == 0x000AD) return true;
467         if (                v == 0x000AF) return true;
468         if (0x000B2 <= v && v <= 0x000B5) return true;
469         if (0x000B7 <= v && v <= 0x000BA) return true;
470         if (0x000BC <= v && v <= 0x000BE) return true;
471         if (0x000C0 <= v && v <= 0x000D6) return true;
472         if (0x000D8 <= v && v <= 0x000F6) return true;
473         if (0x000F8 <= v && v <= 0x000FF) return true;
474         if (0x00100 <= v && v <= 0x0167F) return true;
475         if (0x01681 <= v && v <= 0x0180D) return true;
476         if (0x0180F <= v && v <= 0x01FFF) return true;
477         if (0x0200B <= v && v <= 0x0200D) return true;
478         if (0x0202A <= v && v <= 0x0202E) return true;
479         if (0x0203F <= v && v <= 0x02040) return true;
480         if (                v == 0x02054) return true;
481         if (0x02060 <= v && v <= 0x0206F) return true;
482         if (0x02070 <= v && v <= 0x0218F) return true;
483         if (0x02460 <= v && v <= 0x024FF) return true;
484         if (0x02776 <= v && v <= 0x02793) return true;
485         if (0x02C00 <= v && v <= 0x02DFF) return true;
486         if (0x02E80 <= v && v <= 0x02FFF) return true;
487         if (0x03004 <= v && v <= 0x03007) return true;
488         if (0x03021 <= v && v <= 0x0302F) return true;
489         if (0x03031 <= v && v <= 0x0303F) return true;
490         if (0x03040 <= v && v <= 0x0D7FF) return true;
491         if (0x0F900 <= v && v <= 0x0FD3D) return true;
492         if (0x0FD40 <= v && v <= 0x0FDCF) return true;
493         if (0x0FDF0 <= v && v <= 0x0FE44) return true;
494         if (0x0FE47 <= v && v <= 0x0FFFD) return true;
495         if (0x10000 <= v && v <= 0x1FFFD) return true;
496         if (0x20000 <= v && v <= 0x2FFFD) return true;
497         if (0x30000 <= v && v <= 0x3FFFD) return true;
498         if (0x40000 <= v && v <= 0x4FFFD) return true;
499         if (0x50000 <= v && v <= 0x5FFFD) return true;
500         if (0x60000 <= v && v <= 0x6FFFD) return true;
501         if (0x70000 <= v && v <= 0x7FFFD) return true;
502         if (0x80000 <= v && v <= 0x8FFFD) return true;
503         if (0x90000 <= v && v <= 0x9FFFD) return true;
504         if (0xA0000 <= v && v <= 0xAFFFD) return true;
505         if (0xB0000 <= v && v <= 0xBFFFD) return true;
506         if (0xC0000 <= v && v <= 0xCFFFD) return true;
507         if (0xD0000 <= v && v <= 0xDFFFD) return true;
508         if (0xE0000 <= v && v <= 0xEFFFD) return true;
509         return false;
510 }
511
512 static bool is_universal_char_valid_identifier_start(utf32 const v)
513 {
514         /* C11 Annex D.2 */
515         if (0x0300 <= v && v <= 0x036F) return false;
516         if (0x1DC0 <= v && v <= 0x1DFF) return false;
517         if (0x20D0 <= v && v <= 0x20FF) return false;
518         if (0xFE20 <= v && v <= 0xFE2F) return false;
519         return true;
520 }
521
522 /**
523  * Parse an escape sequence.
524  */
525 static utf32 parse_escape_sequence(void)
526 {
527         eat('\\');
528
529         utf32 const ec = input.c;
530         next_char();
531
532         switch (ec) {
533         case '"':  return '"';
534         case '\'': return '\'';
535         case '\\': return '\\';
536         case '?': return '\?';
537         case 'a': return '\a';
538         case 'b': return '\b';
539         case 'f': return '\f';
540         case 'n': return '\n';
541         case 'r': return '\r';
542         case 't': return '\t';
543         case 'v': return '\v';
544         case 'x':
545                 return parse_hex_sequence();
546         case '0':
547         case '1':
548         case '2':
549         case '3':
550         case '4':
551         case '5':
552         case '6':
553         case '7':
554                 return parse_octal_sequence(ec);
555         case EOF:
556                 parse_error("reached end of file while parsing escape sequence");
557                 return EOF;
558         /* \E is not documented, but handled, by GCC.  It is acceptable according
559          * to Â§6.11.4, whereas \e is not. */
560         case 'E':
561         case 'e':
562                 if (c_mode & _GNUC)
563                         return 27;   /* hopefully 27 is ALWAYS the code for ESCAPE */
564                 break;
565
566         case 'U': return parse_universal_char(8);
567         case 'u': return parse_universal_char(4);
568
569         default:
570                 break;
571         }
572         /* Â§6.4.4.4:8 footnote 64 */
573         parse_error("unknown escape sequence");
574         return EOF;
575 }
576
577 static const char *identify_string(char *string)
578 {
579         const char *result = strset_insert(&stringset, string);
580         if (result != string) {
581                 obstack_free(&symbol_obstack, string);
582         }
583         return result;
584 }
585
586 static string_t sym_make_string(string_encoding_t const enc)
587 {
588         obstack_1grow(&symbol_obstack, '\0');
589         size_t      const len    = obstack_object_size(&symbol_obstack) - 1;
590         char       *const string = obstack_finish(&symbol_obstack);
591         char const *const result = identify_string(string);
592         return (string_t){ result, len, enc };
593 }
594
595 string_t make_string(char const *const string)
596 {
597         obstack_grow(&symbol_obstack, string, strlen(string));
598         return sym_make_string(STRING_ENCODING_CHAR);
599 }
600
601 static void parse_string(utf32 const delimiter, token_kind_t const kind,
602                          string_encoding_t const enc,
603                          char const *const context)
604 {
605         const unsigned start_linenr = input.position.lineno;
606
607         eat(delimiter);
608
609         while (true) {
610                 switch (input.c) {
611                 case '\\': {
612                         if (resolve_escape_sequences) {
613                                 utf32 const tc = parse_escape_sequence();
614                                 if (enc == STRING_ENCODING_CHAR) {
615                                         if (tc >= 0x100) {
616                                                 warningf(WARN_OTHER, &pp_token.base.source_position, "escape sequence out of range");
617                                         }
618                                         obstack_1grow(&symbol_obstack, tc);
619                                 } else {
620                                         obstack_grow_utf8(&symbol_obstack, tc);
621                                 }
622                         } else {
623                                 obstack_1grow(&symbol_obstack, (char)input.c);
624                                 next_char();
625                                 obstack_1grow(&symbol_obstack, (char)input.c);
626                                 next_char();
627                         }
628                         break;
629                 }
630
631                 case NEWLINE:
632                         errorf(&pp_token.base.source_position, "newline while parsing %s", context);
633                         break;
634
635                 case EOF: {
636                         source_position_t source_position;
637                         source_position.input_name = pp_token.base.source_position.input_name;
638                         source_position.lineno     = start_linenr;
639                         errorf(&source_position, "EOF while parsing %s", context);
640                         goto end_of_string;
641                 }
642
643                 default:
644                         if (input.c == delimiter) {
645                                 next_char();
646                                 goto end_of_string;
647                         } else {
648                                 obstack_grow_utf8(&symbol_obstack, input.c);
649                                 next_char();
650                                 break;
651                         }
652                 }
653         }
654
655 end_of_string:
656         pp_token.kind           = kind;
657         pp_token.literal.string = sym_make_string(enc);
658 }
659
660 static void parse_string_literal(string_encoding_t const enc)
661 {
662         parse_string('"', T_STRING_LITERAL, enc, "string literal");
663 }
664
665 static void parse_character_constant(string_encoding_t const enc)
666 {
667         parse_string('\'', T_CHARACTER_CONSTANT, enc, "character constant");
668         if (pp_token.literal.string.size == 0) {
669                 parse_error("empty character constant");
670         }
671 }
672
673 #define SYMBOL_CASES_WITHOUT_E_P \
674              '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
675         case 'a': \
676         case 'b': \
677         case 'c': \
678         case 'd': \
679         case 'f': \
680         case 'g': \
681         case 'h': \
682         case 'i': \
683         case 'j': \
684         case 'k': \
685         case 'l': \
686         case 'm': \
687         case 'n': \
688         case 'o': \
689         case 'q': \
690         case 'r': \
691         case 's': \
692         case 't': \
693         case 'u': \
694         case 'v': \
695         case 'w': \
696         case 'x': \
697         case 'y': \
698         case 'z': \
699         case 'A': \
700         case 'B': \
701         case 'C': \
702         case 'D': \
703         case 'F': \
704         case 'G': \
705         case 'H': \
706         case 'I': \
707         case 'J': \
708         case 'K': \
709         case 'L': \
710         case 'M': \
711         case 'N': \
712         case 'O': \
713         case 'Q': \
714         case 'R': \
715         case 'S': \
716         case 'T': \
717         case 'U': \
718         case 'V': \
719         case 'W': \
720         case 'X': \
721         case 'Y': \
722         case 'Z': \
723         case '_'
724
725 #define SYMBOL_CASES \
726              SYMBOL_CASES_WITHOUT_E_P: \
727         case 'e': \
728         case 'p': \
729         case 'E': \
730         case 'P'
731
732 #define DIGIT_CASES \
733              '0':  \
734         case '1':  \
735         case '2':  \
736         case '3':  \
737         case '4':  \
738         case '5':  \
739         case '6':  \
740         case '7':  \
741         case '8':  \
742         case '9'
743
744 static void start_expanding(pp_definition_t *definition)
745 {
746         definition->parent_expansion = current_expansion;
747         definition->expand_pos       = 0;
748         definition->is_expanding     = true;
749         if (definition->list_len > 0) {
750                 definition->token_list[0].had_whitespace
751                         = info.had_whitespace;
752         }
753         current_expansion = definition;
754 }
755
756 static void finished_expanding(pp_definition_t *definition)
757 {
758         assert(definition->is_expanding);
759         pp_definition_t *parent = definition->parent_expansion;
760         definition->parent_expansion = NULL;
761         definition->is_expanding     = false;
762
763         /* stop further expanding once we expanded a parameter used in a
764          * sub macro-call */
765         if (definition == argument_expanding)
766                 argument_expanding = NULL;
767
768         assert(current_expansion == definition);
769         current_expansion = parent;
770 }
771
772 static void grow_string_escaped(struct obstack *obst, const string_t *string, char const *delimiter)
773 {
774         char const *prefix = get_string_encoding_prefix(string->encoding);
775         obstack_printf(obst, "%s%s", prefix, delimiter);
776         size_t      size = string->size;
777         const char *str  = string->begin;
778         if (resolve_escape_sequences) {
779                 obstack_grow(obst, str, size);
780         } else {
781                 for (size_t i = 0; i < size; ++i) {
782                         const char c = str[i];
783                         if (c == '\\' || c == '"')
784                                 obstack_1grow(obst, '\\');
785                         obstack_1grow(obst, c);
786                 }
787         }
788         obstack_printf(obst, "%s", delimiter);
789 }
790
791 static void grow_token(struct obstack *obst, const token_t *token)
792 {
793         switch (token->kind) {
794         case T_NUMBER:
795                 obstack_grow(obst, token->literal.string.begin, token->literal.string.size);
796                 break;
797
798         case T_STRING_LITERAL: {
799                 char const *const delimiter = resolve_escape_sequences ? "\"" : "\\\"";
800                 grow_string_escaped(obst, &token->literal.string, delimiter);
801                 break;
802         }
803
804         case T_CHARACTER_CONSTANT:
805                 grow_string_escaped(obst, &token->literal.string, "'");
806                 break;
807
808         case T_IDENTIFIER:
809         default: {
810                 const char *str = token->base.symbol->string;
811                 size_t      len = strlen(str);
812                 obstack_grow(obst, str, len);
813                 break;
814         }
815         }
816 }
817
818 static void stringify(const pp_definition_t *definition)
819 {
820         assert(obstack_object_size(&symbol_obstack) == 0);
821
822         size_t list_len = definition->list_len;
823         for (size_t p = 0; p < list_len; ++p) {
824                 const saved_token_t *saved = &definition->token_list[p];
825                 if (p > 0 && saved->had_whitespace)
826                         obstack_1grow(&symbol_obstack, ' ');
827                 grow_token(&symbol_obstack, &saved->token);
828         }
829         pp_token.kind           = T_STRING_LITERAL;
830         pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
831 }
832
833 static inline void set_punctuator(token_kind_t const kind)
834 {
835         pp_token.kind        = kind;
836         pp_token.base.symbol = token_symbols[kind];
837 }
838
839 static inline void set_digraph(token_kind_t const kind, symbol_t *const symbol)
840 {
841         pp_token.kind        = kind;
842         pp_token.base.symbol = symbol;
843 }
844
845 /**
846  * returns next final token from a preprocessor macro expansion
847  */
848 static bool expand_next(void)
849 {
850         if (current_expansion == NULL)
851                 return false;
852
853 restart:;
854         size_t pos = current_expansion->expand_pos;
855         if (pos >= current_expansion->list_len) {
856                 finished_expanding(current_expansion);
857                 /* it was the outermost expansion, parse pptoken normally */
858                 if (current_expansion == NULL) {
859                         return false;
860                 }
861                 goto restart;
862         }
863         const saved_token_t *saved = &current_expansion->token_list[pos++];
864         pp_token = saved->token;
865         if (pp_token.kind == '#') {
866                 if (pos < current_expansion->list_len) {
867                         const saved_token_t *next = &current_expansion->token_list[pos];
868                         if (next->token.kind == T_MACRO_PARAMETER) {
869                                 pp_definition_t *def = next->token.macro_parameter.def;
870                                 assert(def != NULL && def->is_parameter);
871                                 stringify(def);
872                                 ++pos;
873                         }
874                 }
875         }
876
877         if (current_expansion->expand_pos > 0)
878                 info.had_whitespace = saved->had_whitespace;
879         current_expansion->expand_pos = pos;
880         pp_token.base.source_position = expansion_pos;
881
882         return true;
883 }
884
885 /**
886  * Returns the next token kind found when continuing the current expansions
887  * without starting new sub-expansions.
888  */
889 static token_kind_t peek_expansion(void)
890 {
891         for (pp_definition_t *e = current_expansion; e; e = e->parent_expansion) {
892                 if (e->expand_pos < e->list_len)
893                         return e->token_list[e->expand_pos].token.kind;
894         }
895         return T_EOF;
896 }
897
898 static void skip_line_comment(void)
899 {
900         info.had_whitespace = true;
901         while (true) {
902                 switch (input.c) {
903                 case EOF:
904                         return;
905
906                 case '\r':
907                 case '\n':
908                         return;
909
910                 default:
911                         next_char();
912                         break;
913                 }
914         }
915 }
916
917 static void skip_multiline_comment(void)
918 {
919         info.had_whitespace = true;
920
921         unsigned start_linenr = input.position.lineno;
922         while (true) {
923                 switch (input.c) {
924                 case '/':
925                         next_char();
926                         if (input.c == '*') {
927                                 /* TODO: nested comment, warn here */
928                         }
929                         break;
930                 case '*':
931                         next_char();
932                         if (input.c == '/') {
933                                 if (input.position.lineno != input.output_line)
934                                         info.whitespace_at_line_begin = input.position.colno;
935                                 next_char();
936                                 return;
937                         }
938                         break;
939
940                 case NEWLINE:
941                         break;
942
943                 case EOF: {
944                         source_position_t source_position;
945                         source_position.input_name = pp_token.base.source_position.input_name;
946                         source_position.lineno     = start_linenr;
947                         errorf(&source_position, "at end of file while looking for comment end");
948                         return;
949                 }
950
951                 default:
952                         next_char();
953                         break;
954                 }
955         }
956 }
957
958 static bool skip_till_newline(bool stop_at_non_whitespace)
959 {
960         bool res = false;
961         while (true) {
962                 switch (input.c) {
963                 case ' ':
964                 case '\t':
965                         next_char();
966                         continue;
967
968                 case '/':
969                         next_char();
970                         if (input.c == '/') {
971                                 next_char();
972                                 skip_line_comment();
973                                 continue;
974                         } else if (input.c == '*') {
975                                 next_char();
976                                 skip_multiline_comment();
977                                 continue;
978                         } else {
979                                 put_back(input.c);
980                                 input.c = '/';
981                         }
982                         return true;
983
984                 case NEWLINE:
985                         return res;
986
987                 default:
988                         if (stop_at_non_whitespace)
989                                 return false;
990                         res = true;
991                         next_char();
992                         continue;
993                 }
994         }
995 }
996
997 static void skip_whitespace(void)
998 {
999         while (true) {
1000                 switch (input.c) {
1001                 case ' ':
1002                 case '\t':
1003                         ++info.whitespace_at_line_begin;
1004                         info.had_whitespace = true;
1005                         next_char();
1006                         continue;
1007
1008                 case NEWLINE:
1009                         info.at_line_begin  = true;
1010                         info.had_whitespace = true;
1011                         info.whitespace_at_line_begin = 0;
1012                         continue;
1013
1014                 case '/':
1015                         next_char();
1016                         if (input.c == '/') {
1017                                 next_char();
1018                                 skip_line_comment();
1019                                 continue;
1020                         } else if (input.c == '*') {
1021                                 next_char();
1022                                 skip_multiline_comment();
1023                                 continue;
1024                         } else {
1025                                 put_back(input.c);
1026                                 input.c = '/';
1027                         }
1028                         return;
1029
1030                 default:
1031                         return;
1032                 }
1033         }
1034 }
1035
1036 static inline void eat_pp(pp_token_kind_t const kind)
1037 {
1038         assert(pp_token.base.symbol->pp_ID == kind);
1039         (void) kind;
1040         next_input_token();
1041 }
1042
1043 static inline void eat_token(token_kind_t const kind)
1044 {
1045         assert(pp_token.kind == kind);
1046         (void)kind;
1047         next_input_token();
1048 }
1049
1050 static void parse_symbol(void)
1051 {
1052         assert(obstack_object_size(&symbol_obstack) == 0);
1053         while (true) {
1054                 switch (input.c) {
1055                 case DIGIT_CASES:
1056                 case SYMBOL_CASES:
1057                         obstack_1grow(&symbol_obstack, (char) input.c);
1058                         next_char();
1059                         break;
1060
1061                 case '\\':
1062                         next_char();
1063                         switch (input.c) {
1064                         {
1065                                 unsigned n;
1066                         case 'U': n = 8; goto universal;
1067                         case 'u': n = 4; goto universal;
1068 universal:
1069                                 if (!resolve_escape_sequences) {
1070                                         obstack_1grow(&symbol_obstack, '\\');
1071                                         obstack_1grow(&symbol_obstack, input.c);
1072                                 }
1073                                 next_char();
1074                                 utf32 const v = parse_universal_char(n);
1075                                 if (!is_universal_char_valid_identifier(v)) {
1076                                         if (is_universal_char_valid(v)) {
1077                                                 errorf(&input.position,
1078                                                            "universal character \\%c%0*X is not valid in an identifier",
1079                                                            n == 4 ? 'u' : 'U', (int)n, v);
1080                                         }
1081                                 } else if (obstack_object_size(&symbol_obstack) == 0 && !is_universal_char_valid_identifier_start(v)) {
1082                                         errorf(&input.position,
1083                                                    "universal character \\%c%0*X is not valid as start of an identifier",
1084                                                    n == 4 ? 'u' : 'U', (int)n, v);
1085                                 } else if (resolve_escape_sequences) {
1086                                         obstack_grow_utf8(&symbol_obstack, v);
1087                                 }
1088                                 break;
1089                         }
1090
1091                         default:
1092                                 put_back(input.c);
1093                                 input.c = '\\';
1094                                 goto end_symbol;
1095                         }
1096
1097                 default:
1098 dollar_sign:
1099                         goto end_symbol;
1100                 }
1101         }
1102
1103 end_symbol:
1104         obstack_1grow(&symbol_obstack, '\0');
1105         char *string = obstack_finish(&symbol_obstack);
1106
1107         /* might be a wide string or character constant ( L"string"/L'c' ) */
1108         if (input.c == '"' && string[0] == 'L' && string[1] == '\0') {
1109                 obstack_free(&symbol_obstack, string);
1110                 parse_string_literal(STRING_ENCODING_WIDE);
1111                 return;
1112         } else if (input.c == '\'' && string[0] == 'L' && string[1] == '\0') {
1113                 obstack_free(&symbol_obstack, string);
1114                 parse_character_constant(STRING_ENCODING_WIDE);
1115                 return;
1116         }
1117
1118         symbol_t *symbol = symbol_table_insert(string);
1119
1120         pp_token.kind        = symbol->ID;
1121         pp_token.base.symbol = symbol;
1122
1123         /* we can free the memory from symbol obstack if we already had an entry in
1124          * the symbol table */
1125         if (symbol->string != string) {
1126                 obstack_free(&symbol_obstack, string);
1127         }
1128 }
1129
1130 static void parse_number(void)
1131 {
1132         obstack_1grow(&symbol_obstack, (char) input.c);
1133         next_char();
1134
1135         while (true) {
1136                 switch (input.c) {
1137                 case '.':
1138                 case DIGIT_CASES:
1139                 case SYMBOL_CASES_WITHOUT_E_P:
1140                         obstack_1grow(&symbol_obstack, (char) input.c);
1141                         next_char();
1142                         break;
1143
1144                 case 'e':
1145                 case 'p':
1146                 case 'E':
1147                 case 'P':
1148                         obstack_1grow(&symbol_obstack, (char) input.c);
1149                         next_char();
1150                         if (input.c == '+' || input.c == '-') {
1151                                 obstack_1grow(&symbol_obstack, (char) input.c);
1152                                 next_char();
1153                         }
1154                         break;
1155
1156                 default:
1157 dollar_sign:
1158                         goto end_number;
1159                 }
1160         }
1161
1162 end_number:
1163         pp_token.kind           = T_NUMBER;
1164         pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
1165 }
1166
1167 #define MAYBE_PROLOG \
1168         next_char(); \
1169         switch (input.c) {
1170
1171 #define MAYBE(ch, kind) \
1172         case ch: \
1173                 next_char(); \
1174                 set_punctuator(kind); \
1175                 return;
1176
1177 #define MAYBE_DIGRAPH(ch, kind, symbol) \
1178         case ch: \
1179                 next_char(); \
1180                 set_digraph(kind, symbol); \
1181                 return;
1182
1183 #define ELSE_CODE(code) \
1184         default: \
1185                 code \
1186         }
1187
1188 #define ELSE(kind) ELSE_CODE(set_punctuator(kind); return;)
1189
1190 /** identifies and returns the next preprocessing token contained in the
1191  * input stream. No macro expansion is performed. */
1192 static void next_input_token(void)
1193 {
1194         if (next_info.had_whitespace) {
1195                 info = next_info;
1196                 next_info.had_whitespace = false;
1197         } else {
1198                 info.at_line_begin  = false;
1199                 info.had_whitespace = false;
1200         }
1201 restart:
1202         pp_token.base.source_position = input.position;
1203         pp_token.base.symbol          = NULL;
1204
1205         switch (input.c) {
1206         case ' ':
1207         case '\t':
1208                 info.whitespace_at_line_begin++;
1209                 info.had_whitespace = true;
1210                 next_char();
1211                 goto restart;
1212
1213         case NEWLINE:
1214                 info.at_line_begin            = true;
1215                 info.had_whitespace           = true;
1216                 info.whitespace_at_line_begin = 0;
1217                 goto restart;
1218
1219         case SYMBOL_CASES:
1220                 parse_symbol();
1221                 return;
1222
1223         case DIGIT_CASES:
1224                 parse_number();
1225                 return;
1226
1227         case '"':
1228                 parse_string_literal(STRING_ENCODING_CHAR);
1229                 return;
1230
1231         case '\'':
1232                 parse_character_constant(STRING_ENCODING_CHAR);
1233                 return;
1234
1235         case '.':
1236                 MAYBE_PROLOG
1237                         case '0':
1238                         case '1':
1239                         case '2':
1240                         case '3':
1241                         case '4':
1242                         case '5':
1243                         case '6':
1244                         case '7':
1245                         case '8':
1246                         case '9':
1247                                 put_back(input.c);
1248                                 input.c = '.';
1249                                 parse_number();
1250                                 return;
1251
1252                         case '.':
1253                                 MAYBE_PROLOG
1254                                 MAYBE('.', T_DOTDOTDOT)
1255                                 ELSE_CODE(
1256                                         put_back(input.c);
1257                                         input.c = '.';
1258                                         set_punctuator('.');
1259                                         return;
1260                                 )
1261                 ELSE('.')
1262         case '&':
1263                 MAYBE_PROLOG
1264                 MAYBE('&', T_ANDAND)
1265                 MAYBE('=', T_ANDEQUAL)
1266                 ELSE('&')
1267         case '*':
1268                 MAYBE_PROLOG
1269                 MAYBE('=', T_ASTERISKEQUAL)
1270                 ELSE('*')
1271         case '+':
1272                 MAYBE_PROLOG
1273                 MAYBE('+', T_PLUSPLUS)
1274                 MAYBE('=', T_PLUSEQUAL)
1275                 ELSE('+')
1276         case '-':
1277                 MAYBE_PROLOG
1278                 MAYBE('>', T_MINUSGREATER)
1279                 MAYBE('-', T_MINUSMINUS)
1280                 MAYBE('=', T_MINUSEQUAL)
1281                 ELSE('-')
1282         case '!':
1283                 MAYBE_PROLOG
1284                 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
1285                 ELSE('!')
1286         case '/':
1287                 MAYBE_PROLOG
1288                 MAYBE('=', T_SLASHEQUAL)
1289                 case '*':
1290                         next_char();
1291                         skip_multiline_comment();
1292                         goto restart;
1293                 case '/':
1294                         next_char();
1295                         skip_line_comment();
1296                         goto restart;
1297                 ELSE('/')
1298         case '%':
1299                 MAYBE_PROLOG
1300                 MAYBE_DIGRAPH('>', '}', symbol_percentgreater)
1301                 MAYBE('=', T_PERCENTEQUAL)
1302                 case ':':
1303                         MAYBE_PROLOG
1304                         case '%':
1305                                 MAYBE_PROLOG
1306                                 MAYBE_DIGRAPH(':', T_HASHHASH, symbol_percentcolonpercentcolon)
1307                                 ELSE_CODE(
1308                                         put_back(input.c);
1309                                         input.c = '%';
1310                                         goto digraph_percentcolon;
1311                                 )
1312                         ELSE_CODE(
1313 digraph_percentcolon:
1314                                 set_digraph('#', symbol_percentcolon);
1315                                 return;
1316                         )
1317                 ELSE('%')
1318         case '<':
1319                 MAYBE_PROLOG
1320                 MAYBE_DIGRAPH(':', '[', symbol_lesscolon)
1321                 MAYBE_DIGRAPH('%', '{', symbol_lesspercent)
1322                 MAYBE('=', T_LESSEQUAL)
1323                 case '<':
1324                         MAYBE_PROLOG
1325                         MAYBE('=', T_LESSLESSEQUAL)
1326                         ELSE(T_LESSLESS)
1327                 ELSE('<')
1328         case '>':
1329                 MAYBE_PROLOG
1330                 MAYBE('=', T_GREATEREQUAL)
1331                 case '>':
1332                         MAYBE_PROLOG
1333                         MAYBE('=', T_GREATERGREATEREQUAL)
1334                         ELSE(T_GREATERGREATER)
1335                 ELSE('>')
1336         case '^':
1337                 MAYBE_PROLOG
1338                 MAYBE('=', T_CARETEQUAL)
1339                 ELSE('^')
1340         case '|':
1341                 MAYBE_PROLOG
1342                 MAYBE('=', T_PIPEEQUAL)
1343                 MAYBE('|', T_PIPEPIPE)
1344                 ELSE('|')
1345         case ':':
1346                 MAYBE_PROLOG
1347                 MAYBE_DIGRAPH('>', ']', symbol_colongreater)
1348                 case ':':
1349                         if (c_mode & _CXX) {
1350                                 next_char();
1351                                 set_punctuator(T_COLONCOLON);
1352                                 return;
1353                         }
1354                         /* FALLTHROUGH */
1355                 ELSE(':')
1356         case '=':
1357                 MAYBE_PROLOG
1358                 MAYBE('=', T_EQUALEQUAL)
1359                 ELSE('=')
1360         case '#':
1361                 MAYBE_PROLOG
1362                 MAYBE('#', T_HASHHASH)
1363                 ELSE('#')
1364
1365         case '?':
1366         case '[':
1367         case ']':
1368         case '(':
1369         case ')':
1370         case '{':
1371         case '}':
1372         case '~':
1373         case ';':
1374         case ',':
1375                 set_punctuator(input.c);
1376                 next_char();
1377                 return;
1378
1379         case EOF:
1380                 if (input_stack != NULL) {
1381                         fclose(close_pp_input());
1382                         pop_restore_input();
1383                         if (out)
1384                                 fputc('\n', out);
1385                         if (input.c == (utf32)EOF)
1386                                 --input.position.lineno;
1387                         print_line_directive(&input.position, "2");
1388                         goto restart;
1389                 } else {
1390                         info.at_line_begin = true;
1391                         set_punctuator(T_EOF);
1392                 }
1393                 return;
1394
1395         case '\\':
1396                 next_char();
1397                 int next_c = input.c;
1398                 put_back(input.c);
1399                 input.c = '\\';
1400                 if (next_c == 'U' || next_c == 'u') {
1401                         parse_symbol();
1402                         return;
1403                 }
1404                 /* FALLTHROUGH */
1405         default:
1406 dollar_sign:
1407                 if (error_on_unknown_chars) {
1408                         errorf(&pp_token.base.source_position,
1409                                "unknown character '%lc' found\n", input.c);
1410                         next_char();
1411                         goto restart;
1412                 } else {
1413                         assert(obstack_object_size(&symbol_obstack) == 0);
1414                         obstack_grow_utf8(&symbol_obstack, input.c);
1415                         obstack_1grow(&symbol_obstack, '\0');
1416                         char     *const string = obstack_finish(&symbol_obstack);
1417                         symbol_t *const symbol = symbol_table_insert(string);
1418                         if (symbol->string != string)
1419                                 obstack_free(&symbol_obstack, string);
1420
1421                         pp_token.kind        = T_UNKNOWN_CHAR;
1422                         pp_token.base.symbol = symbol;
1423                         next_char();
1424                         return;
1425                 }
1426         }
1427 }
1428
1429 static void print_quoted_string(const char *const string)
1430 {
1431         fputc('"', out);
1432         for (const char *c = string; *c != 0; ++c) {
1433                 switch (*c) {
1434                 case '"': fputs("\\\"", out); break;
1435                 case '\\':  fputs("\\\\", out); break;
1436                 case '\a':  fputs("\\a", out); break;
1437                 case '\b':  fputs("\\b", out); break;
1438                 case '\f':  fputs("\\f", out); break;
1439                 case '\n':  fputs("\\n", out); break;
1440                 case '\r':  fputs("\\r", out); break;
1441                 case '\t':  fputs("\\t", out); break;
1442                 case '\v':  fputs("\\v", out); break;
1443                 case '\?':  fputs("\\?", out); break;
1444                 default:
1445                         if (!isprint(*c)) {
1446                                 fprintf(out, "\\%03o", (unsigned)*c);
1447                                 break;
1448                         }
1449                         fputc(*c, out);
1450                         break;
1451                 }
1452         }
1453         fputc('"', out);
1454 }
1455
1456 static void print_line_directive(const source_position_t *pos, const char *add)
1457 {
1458         if (!out)
1459                 return;
1460
1461         fprintf(out, "# %u ", pos->lineno);
1462         print_quoted_string(pos->input_name);
1463         if (add != NULL) {
1464                 fputc(' ', out);
1465                 fputs(add, out);
1466         }
1467         if (pos->is_system_header) {
1468                 fputs(" 3", out);
1469         }
1470
1471         printed_input_name = pos->input_name;
1472         input.output_line  = pos->lineno-1;
1473 }
1474
1475 static bool emit_newlines(void)
1476 {
1477         if (!out)
1478                 return true;
1479
1480         unsigned delta = pp_token.base.source_position.lineno - input.output_line;
1481         if (delta == 0)
1482                 return false;
1483
1484         if (delta >= 9) {
1485                 fputc('\n', out);
1486                 print_line_directive(&pp_token.base.source_position, NULL);
1487                 fputc('\n', out);
1488         } else {
1489                 for (unsigned i = 0; i < delta; ++i) {
1490                         fputc('\n', out);
1491                 }
1492         }
1493         input.output_line = pp_token.base.source_position.lineno;
1494
1495         unsigned whitespace = info.whitespace_at_line_begin;
1496         /* make sure there is at least 1 whitespace before a (macro-expanded)
1497          * '#' at line begin. I'm not sure why this is good, but gcc does it. */
1498         if (pp_token.kind == '#' && whitespace == 0)
1499                 ++whitespace;
1500         for (unsigned i = 0; i < whitespace; ++i)
1501                 fputc(' ', out);
1502
1503         return true;
1504 }
1505
1506 void set_preprocessor_output(FILE *output)
1507 {
1508         out = output;
1509         if (out != NULL) {
1510                 error_on_unknown_chars   = false;
1511                 resolve_escape_sequences = false;
1512         } else {
1513                 error_on_unknown_chars   = true;
1514                 resolve_escape_sequences = true;
1515         }
1516 }
1517
1518 void emit_pp_token(void)
1519 {
1520         if (!emit_newlines() &&
1521             (info.had_whitespace || tokens_would_paste(last_token, pp_token.kind)))
1522                 fputc(' ', out);
1523
1524         switch (pp_token.kind) {
1525         case T_NUMBER:
1526                 fputs(pp_token.literal.string.begin, out);
1527                 break;
1528
1529         case T_STRING_LITERAL:
1530                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1531                 fputc('"', out);
1532                 fputs(pp_token.literal.string.begin, out);
1533                 fputc('"', out);
1534                 break;
1535
1536         case T_CHARACTER_CONSTANT:
1537                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1538                 fputc('\'', out);
1539                 fputs(pp_token.literal.string.begin, out);
1540                 fputc('\'', out);
1541                 break;
1542
1543         case T_MACRO_PARAMETER:
1544                 panic("macro parameter not expanded");
1545
1546         default:
1547                 fputs(pp_token.base.symbol->string, out);
1548                 break;
1549         }
1550         last_token = pp_token.kind;
1551 }
1552
1553 static void eat_pp_directive(void)
1554 {
1555         while (!info.at_line_begin) {
1556                 next_input_token();
1557         }
1558 }
1559
1560 static bool strings_equal(const string_t *string1, const string_t *string2)
1561 {
1562         size_t size = string1->size;
1563         if (size != string2->size)
1564                 return false;
1565
1566         const char *c1 = string1->begin;
1567         const char *c2 = string2->begin;
1568         for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1569                 if (*c1 != *c2)
1570                         return false;
1571         }
1572         return true;
1573 }
1574
1575 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1576 {
1577         if (token1->kind != token2->kind)
1578                 return false;
1579
1580         switch (token1->kind) {
1581         case T_NUMBER:
1582         case T_CHARACTER_CONSTANT:
1583         case T_STRING_LITERAL:
1584                 return strings_equal(&token1->literal.string, &token2->literal.string);
1585
1586         case T_MACRO_PARAMETER:
1587                 return token1->macro_parameter.def->symbol
1588                     == token2->macro_parameter.def->symbol;
1589
1590         default:
1591                 return token1->base.symbol == token2->base.symbol;
1592         }
1593 }
1594
1595 static bool pp_definitions_equal(const pp_definition_t *definition1,
1596                                  const pp_definition_t *definition2)
1597 {
1598         if (definition1->list_len != definition2->list_len)
1599                 return false;
1600
1601         size_t               len = definition1->list_len;
1602         const saved_token_t *t1  = definition1->token_list;
1603         const saved_token_t *t2  = definition2->token_list;
1604         for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1605                 if (!pp_tokens_equal(&t1->token, &t2->token))
1606                         return false;
1607                 if (t1->had_whitespace != t2->had_whitespace)
1608                         return false;
1609         }
1610         return true;
1611 }
1612
1613 static void missing_macro_param_error(void)
1614 {
1615         errorf(&pp_token.base.source_position,
1616                "'#' is not followed by a macro parameter");
1617 }
1618
1619 static bool is_defineable_token(char const *const context)
1620 {
1621         if (info.at_line_begin) {
1622                 errorf(&pp_token.base.source_position, "unexpected end of line after %s", context);
1623         }
1624
1625         symbol_t *const symbol = pp_token.base.symbol;
1626         if (!symbol)
1627                 goto no_ident;
1628
1629         if (pp_token.kind != T_IDENTIFIER) {
1630                 switch (symbol->string[0]) {
1631                 case SYMBOL_CASES:
1632 dollar_sign:
1633                         break;
1634
1635                 default:
1636 no_ident:
1637                         errorf(&pp_token.base.source_position, "expected identifier after %s, got %K", context, &pp_token);
1638                         return false;
1639                 }
1640         }
1641
1642         /* TODO turn this into a flag in pp_def. */
1643         switch (symbol->pp_ID) {
1644         /* Â§6.10.8:4 */
1645         case TP_defined:
1646                 errorf(&pp_token.base.source_position, "%K cannot be used as macro name in %s", &pp_token, context);
1647                 return false;
1648
1649         default:
1650                 return true;
1651         }
1652 }
1653
1654 static void parse_define_directive(void)
1655 {
1656         eat_pp(TP_define);
1657         if (skip_mode) {
1658                 eat_pp_directive();
1659                 return;
1660         }
1661
1662         assert(obstack_object_size(&pp_obstack) == 0);
1663
1664         if (!is_defineable_token("#define"))
1665                 goto error_out;
1666         symbol_t *const symbol = pp_token.base.symbol;
1667
1668         pp_definition_t *new_definition
1669                 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1670         memset(new_definition, 0, sizeof(new_definition[0]));
1671         new_definition->symbol          = symbol;
1672         new_definition->source_position = input.position;
1673
1674         /* this is probably the only place where spaces are significant in the
1675          * lexer (except for the fact that they separate tokens). #define b(x)
1676          * is something else than #define b (x) */
1677         if (input.c == '(') {
1678                 next_input_token();
1679                 eat_token('(');
1680
1681                 while (true) {
1682                         switch (pp_token.kind) {
1683                         case T_DOTDOTDOT:
1684                                 new_definition->is_variadic = true;
1685                                 eat_token(T_DOTDOTDOT);
1686                                 if (pp_token.kind != ')') {
1687                                         errorf(&input.position,
1688                                                         "'...' not at end of macro argument list");
1689                                         goto error_out;
1690                                 }
1691                                 break;
1692
1693                         case T_IDENTIFIER: {
1694                                 pp_definition_t parameter;
1695                                 memset(&parameter, 0, sizeof(parameter));
1696                                 parameter.source_position = pp_token.base.source_position;
1697                                 parameter.symbol          = pp_token.base.symbol;
1698                                 parameter.is_parameter    = true;
1699                                 obstack_grow(&pp_obstack, &parameter, sizeof(parameter));
1700                                 eat_token(T_IDENTIFIER);
1701
1702                                 if (pp_token.kind == ',') {
1703                                         eat_token(',');
1704                                         break;
1705                                 }
1706
1707                                 if (pp_token.kind != ')') {
1708                                         errorf(&pp_token.base.source_position,
1709                                                "expected ',' or ')' after identifier, got %K",
1710                                                &pp_token);
1711                                         goto error_out;
1712                                 }
1713                                 break;
1714                         }
1715
1716                         case ')':
1717                                 eat_token(')');
1718                                 goto finish_argument_list;
1719
1720                         default:
1721                                 errorf(&pp_token.base.source_position,
1722                                        "expected identifier, '...' or ')' in #define argument list, got %K",
1723                                        &pp_token);
1724                                 goto error_out;
1725                         }
1726                 }
1727
1728         finish_argument_list:
1729                 new_definition->has_parameters = true;
1730                 size_t size = obstack_object_size(&pp_obstack);
1731                 new_definition->n_parameters
1732                         = size / sizeof(new_definition->parameters[0]);
1733                 new_definition->parameters = obstack_finish(&pp_obstack);
1734                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1735                         pp_definition_t *param    = &new_definition->parameters[i];
1736                         symbol_t        *symbol   = param->symbol;
1737                         pp_definition_t *previous = symbol->pp_definition;
1738                         if (previous != NULL
1739                             && previous->function_definition == new_definition) {
1740                                 errorf(&param->source_position,
1741                                        "duplicate macro parameter '%Y'", symbol);
1742                                 param->symbol = sym_anonymous;
1743                                 continue;
1744                         }
1745                         param->parent_expansion    = previous;
1746                         param->function_definition = new_definition;
1747                         symbol->pp_definition      = param;
1748                 }
1749         } else {
1750                 next_input_token();
1751         }
1752
1753         /* construct token list */
1754         assert(obstack_object_size(&pp_obstack) == 0);
1755         bool next_must_be_param = false;
1756         while (!info.at_line_begin) {
1757                 if (pp_token.kind == T_IDENTIFIER) {
1758                         const symbol_t  *symbol     = pp_token.base.symbol;
1759                         pp_definition_t *definition = symbol->pp_definition;
1760                         if (definition != NULL
1761                             && definition->function_definition == new_definition) {
1762                             pp_token.kind                = T_MACRO_PARAMETER;
1763                             pp_token.macro_parameter.def = definition;
1764                         }
1765                 }
1766                 if (next_must_be_param && pp_token.kind != T_MACRO_PARAMETER) {
1767                         missing_macro_param_error();
1768                 }
1769                 saved_token_t saved_token;
1770                 saved_token.token = pp_token;
1771                 saved_token.had_whitespace = info.had_whitespace;
1772                 obstack_grow(&pp_obstack, &saved_token, sizeof(saved_token));
1773                 next_must_be_param
1774                         = new_definition->has_parameters && pp_token.kind == '#';
1775                 next_input_token();
1776         }
1777         if (next_must_be_param)
1778                 missing_macro_param_error();
1779
1780         new_definition->list_len   = obstack_object_size(&pp_obstack)
1781                 / sizeof(new_definition->token_list[0]);
1782         new_definition->token_list = obstack_finish(&pp_obstack);
1783
1784         if (new_definition->has_parameters) {
1785                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1786                         pp_definition_t *param      = &new_definition->parameters[i];
1787                         symbol_t        *symbol     = param->symbol;
1788                         if (symbol == sym_anonymous)
1789                                 continue;
1790                         assert(symbol->pp_definition == param);
1791                         assert(param->function_definition == new_definition);
1792                         symbol->pp_definition   = param->parent_expansion;
1793                         param->parent_expansion = NULL;
1794                 }
1795         }
1796
1797         pp_definition_t *old_definition = symbol->pp_definition;
1798         if (old_definition != NULL) {
1799                 if (!pp_definitions_equal(old_definition, new_definition)) {
1800                         warningf(WARN_OTHER, &input.position, "multiple definition of macro '%Y' (first defined %P)", symbol, &old_definition->source_position);
1801                 } else {
1802                         /* reuse the old definition */
1803                         obstack_free(&pp_obstack, new_definition);
1804                         new_definition = old_definition;
1805                 }
1806         }
1807
1808         symbol->pp_definition = new_definition;
1809         return;
1810
1811 error_out:
1812         if (obstack_object_size(&pp_obstack) > 0) {
1813                 char *ptr = obstack_finish(&pp_obstack);
1814                 obstack_free(&pp_obstack, ptr);
1815         }
1816         eat_pp_directive();
1817 }
1818
1819 static void parse_undef_directive(void)
1820 {
1821         eat_pp(TP_undef);
1822         if (skip_mode) {
1823                 eat_pp_directive();
1824                 return;
1825         }
1826
1827         if (!is_defineable_token("#undef")) {
1828                 eat_pp_directive();
1829                 return;
1830         }
1831
1832         pp_token.base.symbol->pp_definition = NULL;
1833         next_input_token();
1834
1835         if (!info.at_line_begin) {
1836                 warningf(WARN_OTHER, &input.position, "extra tokens at end of #undef directive");
1837         }
1838         eat_pp_directive();
1839 }
1840
1841 /** behind an #include we can have the special headername lexems.
1842  * They're only allowed behind an #include so they're not recognized
1843  * by the normal next_preprocessing_token. We handle them as a special
1844  * exception here */
1845 static const char *parse_headername(bool *system_include)
1846 {
1847         if (info.at_line_begin) {
1848                 parse_error("expected headername after #include");
1849                 return NULL;
1850         }
1851
1852         /* check wether we have a "... or <... headername */
1853         source_position_t position = input.position;
1854         switch (input.c) {
1855         {
1856                 utf32 delimiter;
1857         case '<': delimiter = '>'; *system_include = true;  goto parse_name;
1858         case '"': delimiter = '"'; *system_include = false; goto parse_name;
1859 parse_name:
1860                 assert(obstack_object_size(&symbol_obstack) == 0);
1861                 next_char();
1862                 while (true) {
1863                         switch (input.c) {
1864                         case NEWLINE:
1865                         case EOF:
1866                                 {
1867                                         char *dummy = obstack_finish(&symbol_obstack);
1868                                         obstack_free(&symbol_obstack, dummy);
1869                                 }
1870                                 errorf(&pp_token.base.source_position,
1871                                        "header name without closing '%c'", (char)delimiter);
1872                                 return NULL;
1873
1874                         default:
1875                                 if (input.c == delimiter) {
1876                                         next_char();
1877                                         goto finish_headername;
1878                                 } else {
1879                                         obstack_1grow(&symbol_obstack, (char)input.c);
1880                                         next_char();
1881                                 }
1882                                 break;
1883                         }
1884                 }
1885                 /* we should never be here */
1886         }
1887
1888         default:
1889                 next_preprocessing_token();
1890                 if (info.at_line_begin) {
1891                         /* TODO: if we are already in the new line then we parsed more than
1892                          * wanted. We reuse the token, but could produce following errors
1893                          * misbehaviours... */
1894                         goto error_invalid_input;
1895                 }
1896                 if (pp_token.kind == T_STRING_LITERAL) {
1897                         *system_include = false;
1898                         return pp_token.literal.string.begin;
1899                 } else if (pp_token.kind == '<') {
1900                         *system_include = true;
1901                         assert(obstack_object_size(&pp_obstack) == 0);
1902                         while (true) {
1903                                 next_preprocessing_token();
1904                                 if (info.at_line_begin) {
1905                                         /* TODO: we shouldn't have parsed/expanded something on the
1906                                          * next line yet... */
1907                                         char *dummy = obstack_finish(&pp_obstack);
1908                                         obstack_free(&pp_obstack, dummy);
1909                                         goto error_invalid_input;
1910                                 }
1911                                 if (pp_token.kind == '>')
1912                                         break;
1913
1914                                 saved_token_t saved;
1915                                 saved.token          = pp_token;
1916                                 saved.had_whitespace = info.had_whitespace;
1917                                 obstack_grow(&pp_obstack, &saved, sizeof(saved));
1918                         }
1919                         size_t size = obstack_object_size(&pp_obstack);
1920                         assert(size % sizeof(saved_token_t) == 0);
1921                         size_t n_tokens = size / sizeof(saved_token_t);
1922                         saved_token_t *tokens = obstack_finish(&pp_obstack);
1923                         assert(obstack_object_size(&symbol_obstack) == 0);
1924                         for (size_t i = 0; i < n_tokens; ++i) {
1925                                 const saved_token_t *saved = &tokens[i];
1926                                 if (i > 0 && saved->had_whitespace)
1927                                         obstack_1grow(&symbol_obstack, ' ');
1928                                 grow_token(&symbol_obstack, &saved->token);
1929                         }
1930                         obstack_free(&pp_obstack, tokens);
1931                         goto finish_headername;
1932                 } else {
1933 error_invalid_input:
1934                         {
1935                                 char *dummy = obstack_finish(&symbol_obstack);
1936                                 obstack_free(&symbol_obstack, dummy);
1937                         }
1938
1939                         errorf(&pp_token.base.source_position,
1940                                "expected \"FILENAME\" or <FILENAME> after #include");
1941                         return NULL;
1942                 }
1943         }
1944
1945 finish_headername:
1946         obstack_1grow(&symbol_obstack, '\0');
1947         char *const  headername = obstack_finish(&symbol_obstack);
1948         const char  *identified = identify_string(headername);
1949         pp_token.base.source_position = position;
1950         return identified;
1951 }
1952
1953 static bool do_include(bool const bracket_include, bool const include_next, char const *const headername)
1954 {
1955         size_t const        headername_len = strlen(headername);
1956         searchpath_entry_t *entry;
1957         if (include_next) {
1958                 entry = input.path      ? input.path->next
1959                       : bracket_include ? bracket_searchpath.first
1960                       : quote_searchpath.first;
1961         } else {
1962                 if (!bracket_include) {
1963                         /* put dirname of current input on obstack */
1964                         const char *filename   = input.position.input_name;
1965                         const char *last_slash = strrchr(filename, '/');
1966                         const char *full_name;
1967                         if (last_slash != NULL) {
1968                                 size_t len = last_slash - filename;
1969                                 obstack_grow(&symbol_obstack, filename, len + 1);
1970                                 obstack_grow0(&symbol_obstack, headername, headername_len);
1971                                 char *complete_path = obstack_finish(&symbol_obstack);
1972                                 full_name = identify_string(complete_path);
1973                         } else {
1974                                 full_name = headername;
1975                         }
1976
1977                         FILE *file = fopen(full_name, "r");
1978                         if (file != NULL) {
1979                                 switch_pp_input(file, full_name, NULL, false);
1980                                 return true;
1981                         }
1982                         entry = quote_searchpath.first;
1983                 } else {
1984                         entry = bracket_searchpath.first;
1985                 }
1986         }
1987
1988         assert(obstack_object_size(&symbol_obstack) == 0);
1989         /* check searchpath */
1990         for (; entry; entry = entry->next) {
1991             const char *path = entry->path;
1992             size_t      len  = strlen(path);
1993                 obstack_grow(&symbol_obstack, path, len);
1994                 if (path[len-1] != '/')
1995                         obstack_1grow(&symbol_obstack, '/');
1996                 obstack_grow(&symbol_obstack, headername, headername_len+1);
1997
1998                 char *complete_path = obstack_finish(&symbol_obstack);
1999                 FILE *file          = fopen(complete_path, "r");
2000                 if (file != NULL) {
2001                         const char *filename = identify_string(complete_path);
2002                         switch_pp_input(file, filename, entry, entry->is_system_path);
2003                         return true;
2004                 } else {
2005                         obstack_free(&symbol_obstack, complete_path);
2006                 }
2007         }
2008
2009         return false;
2010 }
2011
2012 static void parse_include_directive(bool const include_next)
2013 {
2014         if (skip_mode) {
2015                 eat_pp_directive();
2016                 return;
2017         }
2018
2019         /* do not eat the TP_include, since it would already parse the next token
2020          * which needs special handling here. */
2021         skip_till_newline(true);
2022         bool system_include;
2023         const char *headername = parse_headername(&system_include);
2024         if (headername == NULL) {
2025                 eat_pp_directive();
2026                 return;
2027         }
2028
2029         bool had_nonwhitespace = skip_till_newline(false);
2030         if (had_nonwhitespace) {
2031                 warningf(WARN_OTHER, &input.position,
2032                          "extra tokens at end of #include directive");
2033         }
2034
2035         if (n_inputs > INCLUDE_LIMIT) {
2036                 errorf(&pp_token.base.source_position, "#include nested too deeply");
2037                 /* eat \n or EOF */
2038                 next_input_token();
2039                 return;
2040         }
2041
2042         /* switch inputs */
2043         info.whitespace_at_line_begin = 0;
2044         info.had_whitespace           = false;
2045         info.at_line_begin            = true;
2046         emit_newlines();
2047         push_input();
2048         bool res = do_include(system_include, include_next, headername);
2049         if (res) {
2050                 next_input_token();
2051         } else {
2052                 errorf(&pp_token.base.source_position, "failed including '%s': %s", headername, strerror(errno));
2053                 pop_restore_input();
2054         }
2055 }
2056
2057 static pp_conditional_t *push_conditional(void)
2058 {
2059         pp_conditional_t *conditional
2060                 = obstack_alloc(&pp_obstack, sizeof(*conditional));
2061         memset(conditional, 0, sizeof(*conditional));
2062
2063         conditional->parent = conditional_stack;
2064         conditional_stack   = conditional;
2065
2066         return conditional;
2067 }
2068
2069 static void pop_conditional(void)
2070 {
2071         assert(conditional_stack != NULL);
2072         conditional_stack = conditional_stack->parent;
2073 }
2074
2075 void check_unclosed_conditionals(void)
2076 {
2077         while (conditional_stack != NULL) {
2078                 pp_conditional_t *conditional = conditional_stack;
2079
2080                 if (conditional->in_else) {
2081                         errorf(&conditional->source_position, "unterminated #else");
2082                 } else {
2083                         errorf(&conditional->source_position, "unterminated condition");
2084                 }
2085                 pop_conditional();
2086         }
2087 }
2088
2089 static void parse_ifdef_ifndef_directive(bool const is_ifdef)
2090 {
2091         bool condition;
2092         eat_pp(is_ifdef ? TP_ifdef : TP_ifndef);
2093
2094         if (skip_mode) {
2095                 eat_pp_directive();
2096                 pp_conditional_t *conditional = push_conditional();
2097                 conditional->source_position  = pp_token.base.source_position;
2098                 conditional->skip             = true;
2099                 return;
2100         }
2101
2102         if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
2103                 errorf(&pp_token.base.source_position,
2104                        "expected identifier after #%s, got %K",
2105                        is_ifdef ? "ifdef" : "ifndef", &pp_token);
2106                 eat_pp_directive();
2107
2108                 /* just take the true case in the hope to avoid further errors */
2109                 condition = true;
2110         } else {
2111                 /* evaluate wether we are in true or false case */
2112                 condition = (bool)pp_token.base.symbol->pp_definition == is_ifdef;
2113                 eat_token(T_IDENTIFIER);
2114
2115                 if (!info.at_line_begin) {
2116                         errorf(&pp_token.base.source_position,
2117                                "extra tokens at end of #%s",
2118                                is_ifdef ? "ifdef" : "ifndef");
2119                         eat_pp_directive();
2120                 }
2121         }
2122
2123         pp_conditional_t *conditional = push_conditional();
2124         conditional->source_position  = pp_token.base.source_position;
2125         conditional->condition        = condition;
2126
2127         if (!condition) {
2128                 skip_mode = true;
2129         }
2130 }
2131
2132 static void parse_else_directive(void)
2133 {
2134         eat_pp(TP_else);
2135
2136         if (!info.at_line_begin) {
2137                 if (!skip_mode) {
2138                         warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #else");
2139                 }
2140                 eat_pp_directive();
2141         }
2142
2143         pp_conditional_t *conditional = conditional_stack;
2144         if (conditional == NULL) {
2145                 errorf(&pp_token.base.source_position, "#else without prior #if");
2146                 return;
2147         }
2148
2149         if (conditional->in_else) {
2150                 errorf(&pp_token.base.source_position,
2151                        "#else after #else (condition started %P)",
2152                        &conditional->source_position);
2153                 skip_mode = true;
2154                 return;
2155         }
2156
2157         conditional->in_else = true;
2158         if (!conditional->skip) {
2159                 skip_mode = conditional->condition;
2160         }
2161         conditional->source_position = pp_token.base.source_position;
2162 }
2163
2164 static void parse_endif_directive(void)
2165 {
2166         eat_pp(TP_endif);
2167
2168         if (!info.at_line_begin) {
2169                 if (!skip_mode) {
2170                         warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #endif");
2171                 }
2172                 eat_pp_directive();
2173         }
2174
2175         pp_conditional_t *conditional = conditional_stack;
2176         if (conditional == NULL) {
2177                 errorf(&pp_token.base.source_position, "#endif without prior #if");
2178                 return;
2179         }
2180
2181         if (!conditional->skip) {
2182                 skip_mode = false;
2183         }
2184         pop_conditional();
2185 }
2186
2187 typedef enum stdc_pragma_kind_t {
2188         STDC_UNKNOWN,
2189         STDC_FP_CONTRACT,
2190         STDC_FENV_ACCESS,
2191         STDC_CX_LIMITED_RANGE
2192 } stdc_pragma_kind_t;
2193
2194 typedef enum stdc_pragma_value_kind_t {
2195         STDC_VALUE_UNKNOWN,
2196         STDC_VALUE_ON,
2197         STDC_VALUE_OFF,
2198         STDC_VALUE_DEFAULT
2199 } stdc_pragma_value_kind_t;
2200
2201 static void parse_pragma_directive(void)
2202 {
2203         eat_pp(TP_pragma);
2204         if (skip_mode) {
2205                 eat_pp_directive();
2206                 return;
2207         }
2208
2209         if (pp_token.kind != T_IDENTIFIER) {
2210                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2211                          "expected identifier after #pragma");
2212                 eat_pp_directive();
2213                 return;
2214         }
2215
2216         stdc_pragma_kind_t kind = STDC_UNKNOWN;
2217         if (pp_token.base.symbol->pp_ID == TP_STDC && c_mode & _C99) {
2218                 /* a STDC pragma */
2219                 next_input_token();
2220
2221                 switch (pp_token.base.symbol->pp_ID) {
2222                 case TP_FP_CONTRACT:      kind = STDC_FP_CONTRACT;      break;
2223                 case TP_FENV_ACCESS:      kind = STDC_FENV_ACCESS;      break;
2224                 case TP_CX_LIMITED_RANGE: kind = STDC_CX_LIMITED_RANGE; break;
2225                 default:                  break;
2226                 }
2227                 if (kind != STDC_UNKNOWN) {
2228                         next_input_token();
2229                         stdc_pragma_value_kind_t value;
2230                         switch (pp_token.base.symbol->pp_ID) {
2231                         case TP_ON:      value = STDC_VALUE_ON;      break;
2232                         case TP_OFF:     value = STDC_VALUE_OFF;     break;
2233                         case TP_DEFAULT: value = STDC_VALUE_DEFAULT; break;
2234                         default:         value = STDC_VALUE_UNKNOWN; break;
2235                         }
2236                         if (value == STDC_VALUE_UNKNOWN) {
2237                                 kind = STDC_UNKNOWN;
2238                                 errorf(&pp_token.base.source_position, "bad STDC pragma argument");
2239                         }
2240                 }
2241         }
2242         eat_pp_directive();
2243         if (kind == STDC_UNKNOWN) {
2244                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2245                          "encountered unknown #pragma");
2246         }
2247 }
2248
2249 static void parse_line_directive(void)
2250 {
2251         if (pp_token.kind != T_NUMBER) {
2252                 if (!skip_mode)
2253                         parse_error("expected integer");
2254         } else {
2255                 char      *end;
2256                 long const line = strtol(pp_token.literal.string.begin, &end, 0);
2257                 if (*end == '\0') {
2258                         /* use offset -1 as this is about the next line */
2259                         input.position.lineno = line - 1;
2260                         /* force output of line */
2261                         input.output_line = input.position.lineno - 20;
2262                 } else {
2263                         if (!skip_mode) {
2264                                 errorf(&input.position, "'%S' is not a valid line number",
2265                                            &pp_token.literal.string);
2266                         }
2267                 }
2268                 next_input_token();
2269                 if (info.at_line_begin)
2270                         return;
2271         }
2272         if (pp_token.kind == T_STRING_LITERAL
2273             && pp_token.literal.string.encoding == STRING_ENCODING_CHAR) {
2274                 input.position.input_name       = pp_token.literal.string.begin;
2275                 input.position.is_system_header = false;
2276                 next_input_token();
2277
2278                 /* attempt to parse numeric flags as outputted by gcc preprocessor */
2279                 while (!info.at_line_begin && pp_token.kind == T_NUMBER) {
2280                         /* flags:
2281                          * 1 - indicates start of a new file
2282                          * 2 - indicates return from a file
2283                          * 3 - indicates system header
2284                          * 4 - indicates implicit extern "C" in C++ mode
2285                          *
2286                          * currently we're only interested in "3"
2287                          */
2288                         if (streq(pp_token.literal.string.begin, "3")) {
2289                                 input.position.is_system_header = true;
2290                         }
2291                         next_input_token();
2292                 }
2293         }
2294
2295         eat_pp_directive();
2296 }
2297
2298 static void parse_error_directive(void)
2299 {
2300         if (skip_mode) {
2301                 eat_pp_directive();
2302                 return;
2303         }
2304
2305         bool const old_resolve_escape_sequences = resolve_escape_sequences;
2306         resolve_escape_sequences = false;
2307
2308         source_position_t const pos = pp_token.base.source_position;
2309         do {
2310                 if (info.had_whitespace && obstack_object_size(&pp_obstack) != 0)
2311                         obstack_1grow(&pp_obstack, ' ');
2312
2313                 switch (pp_token.kind) {
2314                 case T_NUMBER: {
2315                         string_t const *const str = &pp_token.literal.string;
2316                         obstack_grow(&pp_obstack, str->begin, str->size);
2317                         break;
2318                 }
2319
2320                 {
2321                         char delim;
2322                 case T_STRING_LITERAL:     delim =  '"'; goto string;
2323                 case T_CHARACTER_CONSTANT: delim = '\''; goto string;
2324 string:;
2325                         string_t const *const str = &pp_token.literal.string;
2326                         char     const *const enc = get_string_encoding_prefix(str->encoding);
2327                         obstack_printf(&pp_obstack, "%s%c%s%c", enc, delim, str->begin, delim);
2328                         break;
2329                 }
2330
2331                 default: {
2332                         char const *const str = pp_token.base.symbol->string;
2333                         obstack_grow(&pp_obstack, str, strlen(str));
2334                         break;
2335                 }
2336                 }
2337
2338                 next_input_token();
2339         } while (!info.at_line_begin);
2340
2341         resolve_escape_sequences = old_resolve_escape_sequences;
2342
2343         obstack_1grow(&pp_obstack, '\0');
2344         char *const str = obstack_finish(&pp_obstack);
2345         errorf(&pos, "#%s", str);
2346         obstack_free(&pp_obstack, str);
2347 }
2348
2349 static void parse_preprocessing_directive(void)
2350 {
2351         eat_token('#');
2352
2353         if (info.at_line_begin) {
2354                 /* empty directive */
2355                 return;
2356         }
2357
2358         if (pp_token.base.symbol) {
2359                 switch (pp_token.base.symbol->pp_ID) {
2360                 case TP_define:       parse_define_directive();            break;
2361                 case TP_else:         parse_else_directive();              break;
2362                 case TP_endif:        parse_endif_directive();             break;
2363                 case TP_error:        parse_error_directive();             break;
2364                 case TP_ifdef:        parse_ifdef_ifndef_directive(true);  break;
2365                 case TP_ifndef:       parse_ifdef_ifndef_directive(false); break;
2366                 case TP_include:      parse_include_directive(false);      break;
2367                 case TP_include_next: parse_include_directive(true);       break;
2368                 case TP_line:         next_input_token(); goto line_directive;
2369                 case TP_pragma:       parse_pragma_directive();            break;
2370                 case TP_undef:        parse_undef_directive();             break;
2371                 default:              goto skip;
2372                 }
2373         } else if (pp_token.kind == T_NUMBER) {
2374 line_directive:
2375                 parse_line_directive();
2376         } else {
2377 skip:
2378                 if (!skip_mode) {
2379                         errorf(&pp_token.base.source_position, "invalid preprocessing directive #%K", &pp_token);
2380                 }
2381                 eat_pp_directive();
2382         }
2383
2384         assert(info.at_line_begin);
2385 }
2386
2387 static void finish_current_argument(void)
2388 {
2389         if (current_argument == NULL)
2390                 return;
2391         size_t size = obstack_object_size(&pp_obstack);
2392         current_argument->list_len   = size/sizeof(current_argument->token_list[0]);
2393         current_argument->token_list = obstack_finish(&pp_obstack);
2394 }
2395
2396 void next_preprocessing_token(void)
2397 {
2398 restart:
2399         if (!expand_next()) {
2400                 do {
2401                         next_input_token();
2402                         while (pp_token.kind == '#' && info.at_line_begin) {
2403                                 parse_preprocessing_directive();
2404                         }
2405                 } while (skip_mode && pp_token.kind != T_EOF);
2406         }
2407
2408         const token_kind_t kind = pp_token.kind;
2409         if (current_call == NULL || argument_expanding != NULL) {
2410                 symbol_t *const symbol = pp_token.base.symbol;
2411                 if (symbol) {
2412                         if (kind == T_MACRO_PARAMETER) {
2413                                 assert(current_expansion != NULL);
2414                                 start_expanding(pp_token.macro_parameter.def);
2415                                 goto restart;
2416                         }
2417
2418                         pp_definition_t *const pp_definition = symbol->pp_definition;
2419                         if (pp_definition != NULL && !pp_definition->is_expanding) {
2420                                 if (pp_definition->has_parameters) {
2421
2422                                         /* check if next token is a '(' */
2423                                         whitespace_info_t old_info   = info;
2424                                         token_kind_t      next_token = peek_expansion();
2425                                         if (next_token == T_EOF) {
2426                                                 info.at_line_begin  = false;
2427                                                 info.had_whitespace = false;
2428                                                 skip_whitespace();
2429                                                 if (input.c == '(') {
2430                                                         next_token = '(';
2431                                                 }
2432                                         }
2433
2434                                         if (next_token == '(') {
2435                                                 if (current_expansion == NULL)
2436                                                         expansion_pos = pp_token.base.source_position;
2437                                                 next_preprocessing_token();
2438                                                 assert(pp_token.kind == '(');
2439
2440                                                 pp_definition->parent_expansion = current_expansion;
2441                                                 current_call              = pp_definition;
2442                                                 current_call->expand_pos  = 0;
2443                                                 current_call->expand_info = old_info;
2444                                                 if (current_call->n_parameters > 0) {
2445                                                         current_argument = &current_call->parameters[0];
2446                                                         assert(argument_brace_count == 0);
2447                                                 }
2448                                                 goto restart;
2449                                         } else {
2450                                                 /* skip_whitespaces() skipped newlines and whitespace,
2451                                                  * remember results for next token */
2452                                                 next_info = info;
2453                                                 info      = old_info;
2454                                                 return;
2455                                         }
2456                                 } else {
2457                                         if (current_expansion == NULL)
2458                                                 expansion_pos = pp_token.base.source_position;
2459                                         start_expanding(pp_definition);
2460                                         goto restart;
2461                                 }
2462                         }
2463                 }
2464         }
2465
2466         if (current_call != NULL) {
2467                 /* current_call != NULL */
2468                 if (kind == '(') {
2469                         ++argument_brace_count;
2470                 } else if (kind == ')') {
2471                         if (argument_brace_count > 0) {
2472                                 --argument_brace_count;
2473                         } else {
2474                                 finish_current_argument();
2475                                 assert(kind == ')');
2476                                 start_expanding(current_call);
2477                                 info = current_call->expand_info;
2478                                 current_call     = NULL;
2479                                 current_argument = NULL;
2480                                 goto restart;
2481                         }
2482                 } else if (kind == ',' && argument_brace_count == 0) {
2483                         finish_current_argument();
2484                         current_call->expand_pos++;
2485                         if (current_call->expand_pos >= current_call->n_parameters) {
2486                                 errorf(&pp_token.base.source_position,
2487                                            "too many arguments passed for macro '%Y'",
2488                                            current_call->symbol);
2489                                 current_argument = NULL;
2490                         } else {
2491                                 current_argument
2492                                         = &current_call->parameters[current_call->expand_pos];
2493                         }
2494                         goto restart;
2495                 } else if (kind == T_MACRO_PARAMETER) {
2496                         /* parameters have to be fully expanded before being used as
2497                          * parameters for another macro-call */
2498                         assert(current_expansion != NULL);
2499                         pp_definition_t *argument = pp_token.macro_parameter.def;
2500                         argument_expanding = argument;
2501                         start_expanding(argument);
2502                         goto restart;
2503                 } else if (kind == T_EOF) {
2504                         errorf(&expansion_pos,
2505                                "reached end of file while parsing arguments for '%Y'",
2506                                current_call->symbol);
2507                         return;
2508                 }
2509                 if (current_argument != NULL) {
2510                         saved_token_t saved;
2511                         saved.token = pp_token;
2512                         saved.had_whitespace = info.had_whitespace;
2513                         obstack_grow(&pp_obstack, &saved, sizeof(saved));
2514                 }
2515                 goto restart;
2516         }
2517 }
2518
2519 void append_include_path(searchpath_t *paths, const char *path)
2520 {
2521         searchpath_entry_t *entry = OALLOCZ(&config_obstack, searchpath_entry_t);
2522         entry->path           = path;
2523         entry->is_system_path = paths->is_system_path;
2524
2525         *paths->anchor = entry;
2526         paths->anchor  = &entry->next;
2527 }
2528
2529 static void append_env_paths(searchpath_t *paths, const char *envvar)
2530 {
2531         const char *val = getenv(envvar);
2532         if (val != NULL && *val != '\0') {
2533                 const char *begin = val;
2534                 const char *c;
2535                 do {
2536                         c = begin;
2537                         while (*c != '\0' && *c != ':')
2538                                 ++c;
2539
2540                         size_t len = c-begin;
2541                         if (len == 0) {
2542                                 /* use "." for gcc compatibility (Matze: I would expect that
2543                                  * nothing happens for an empty entry...) */
2544                                 append_include_path(paths, ".");
2545                         } else {
2546                                 char *const string = obstack_copy0(&config_obstack, begin, len);
2547                                 append_include_path(paths, string);
2548                         }
2549
2550                         begin = c+1;
2551                         /* skip : */
2552                         if (*begin == ':')
2553                                 ++begin;
2554                 } while(*c != '\0');
2555         }
2556 }
2557
2558 static void append_searchpath(searchpath_t *path, const searchpath_t *append)
2559 {
2560         *path->anchor = append->first;
2561 }
2562
2563 static void setup_include_path(void)
2564 {
2565         /* built-in paths */
2566         append_include_path(&system_searchpath, "/usr/include");
2567
2568         /* parse environment variable */
2569         append_env_paths(&bracket_searchpath, "CPATH");
2570         append_env_paths(&system_searchpath,
2571                          c_mode & _CXX ? "CPLUS_INCLUDE_PATH" : "C_INCLUDE_PATH");
2572
2573         /* append system search path to bracket searchpath */
2574         append_searchpath(&bracket_searchpath, &system_searchpath);
2575         append_searchpath(&quote_searchpath, &bracket_searchpath);
2576 }
2577
2578 static void input_error(unsigned const delta_lines, unsigned const delta_cols, char const *const message)
2579 {
2580         source_position_t pos = pp_token.base.source_position;
2581         pos.lineno += delta_lines;
2582         pos.colno  += delta_cols;
2583         errorf(&pos, "%s", message);
2584 }
2585
2586 void init_include_paths(void)
2587 {
2588         obstack_init(&config_obstack);
2589 }
2590
2591 void init_preprocessor(void)
2592 {
2593         init_symbols();
2594
2595         obstack_init(&pp_obstack);
2596         obstack_init(&input_obstack);
2597         strset_init(&stringset);
2598
2599         setup_include_path();
2600
2601         set_input_error_callback(input_error);
2602 }
2603
2604 void exit_preprocessor(void)
2605 {
2606         obstack_free(&input_obstack, NULL);
2607         obstack_free(&pp_obstack, NULL);
2608         obstack_free(&config_obstack, NULL);
2609
2610         strset_destroy(&stringset);
2611 }
2612
2613 int pptest_main(int argc, char **argv);
2614 int pptest_main(int argc, char **argv)
2615 {
2616         init_symbol_table();
2617         init_include_paths();
2618         init_preprocessor();
2619         init_tokens();
2620
2621         error_on_unknown_chars   = false;
2622         resolve_escape_sequences = false;
2623
2624         /* simplistic commandline parser */
2625         const char *filename = NULL;
2626         const char *output = NULL;
2627         for (int i = 1; i < argc; ++i) {
2628                 const char *opt = argv[i];
2629                 if (streq(opt, "-I")) {
2630                         append_include_path(&bracket_searchpath, argv[++i]);
2631                         continue;
2632                 } else if (streq(opt, "-E")) {
2633                         /* ignore */
2634                 } else if (streq(opt, "-o")) {
2635                         output = argv[++i];
2636                         continue;
2637                 } else if (opt[0] == '-') {
2638                         fprintf(stderr, "Unknown option '%s'\n", opt);
2639                 } else {
2640                         if (filename != NULL)
2641                                 fprintf(stderr, "Multiple inputs not supported\n");
2642                         filename = argv[i];
2643                 }
2644         }
2645         if (filename == NULL) {
2646                 fprintf(stderr, "No input specified\n");
2647                 return 1;
2648         }
2649
2650         if (output == NULL) {
2651                 out = stdout;
2652         } else {
2653                 out = fopen(output, "w");
2654                 if (out == NULL) {
2655                         fprintf(stderr, "Couldn't open output '%s'\n", output);
2656                         return 1;
2657                 }
2658         }
2659
2660         /* just here for gcc compatibility */
2661         fprintf(out, "# 1 \"%s\"\n", filename);
2662         fprintf(out, "# 1 \"<built-in>\"\n");
2663         fprintf(out, "# 1 \"<command-line>\"\n");
2664
2665         FILE *file = fopen(filename, "r");
2666         if (file == NULL) {
2667                 fprintf(stderr, "Couldn't open input '%s'\n", filename);
2668                 return 1;
2669         }
2670         switch_pp_input(file, filename, NULL, false);
2671
2672         for (;;) {
2673                 next_preprocessing_token();
2674                 if (pp_token.kind == T_EOF)
2675                         break;
2676                 emit_pp_token();
2677         }
2678
2679         fputc('\n', out);
2680         check_unclosed_conditionals();
2681         fclose(close_pp_input());
2682         if (out != stdout)
2683                 fclose(out);
2684
2685         exit_tokens();
2686         exit_preprocessor();
2687         exit_symbol_table();
2688
2689         return 0;
2690 }