Test skip_mode in parse_pragma_directive().
[cparser] / preprocessor.c
1 #include <config.h>
2
3 #include <assert.h>
4 #include <errno.h>
5 #include <string.h>
6 #include <stdbool.h>
7 #include <ctype.h>
8
9 #include "preprocessor.h"
10 #include "token_t.h"
11 #include "symbol_t.h"
12 #include "adt/util.h"
13 #include "adt/error.h"
14 #include "adt/strutil.h"
15 #include "adt/strset.h"
16 #include "lang_features.h"
17 #include "diagnostic.h"
18 #include "string_rep.h"
19 #include "input.h"
20
21 #define MAX_PUTBACK 3
22 #define INCLUDE_LIMIT 199  /* 199 is for gcc "compatibility" */
23
24 typedef struct saved_token_t {
25         token_t token;
26         bool    had_whitespace;
27 } saved_token_t;
28
29 typedef struct whitespace_info_t {
30         /** current token had whitespace in front of it */
31         bool     had_whitespace;
32         /** current token is at the beginning of a line.
33          * => a "#" at line begin starts a preprocessing directive. */
34         bool     at_line_begin;
35         /** number of spaces before the first token in a line */
36         unsigned whitespace_at_line_begin;
37 } whitespace_info_t;
38
39 struct pp_definition_t {
40         symbol_t          *symbol;
41         source_position_t  source_position;
42         pp_definition_t   *parent_expansion;
43         size_t             expand_pos;
44         whitespace_info_t  expand_info;
45         bool               is_variadic    : 1;
46         bool               is_expanding   : 1;
47         bool               has_parameters : 1;
48         bool               is_parameter   : 1;
49         pp_definition_t   *function_definition;
50         size_t             n_parameters;
51         pp_definition_t   *parameters;
52
53         /* replacement */
54         size_t             list_len;
55         saved_token_t     *token_list;
56 };
57
58 typedef struct pp_conditional_t pp_conditional_t;
59 struct pp_conditional_t {
60         source_position_t  source_position;
61         bool               condition;
62         bool               in_else;
63         /** conditional in skip mode (then+else gets skipped) */
64         bool               skip;
65         pp_conditional_t  *parent;
66 };
67
68 typedef struct pp_input_t pp_input_t;
69 struct pp_input_t {
70         FILE              *file;
71         input_t           *input;
72         utf32              c;
73         utf32              buf[1024+MAX_PUTBACK];
74         const utf32       *bufend;
75         const utf32       *bufpos;
76         source_position_t  position;
77         pp_input_t        *parent;
78         unsigned           output_line;
79 };
80
81 typedef struct searchpath_entry_t searchpath_entry_t;
82 struct searchpath_entry_t {
83         const char         *path;
84         searchpath_entry_t *next;
85 };
86
87 static pp_input_t      input;
88
89 static pp_input_t     *input_stack;
90 static unsigned        n_inputs;
91 static struct obstack  input_obstack;
92
93 static pp_conditional_t *conditional_stack;
94
95 token_t                  pp_token;
96 bool                     allow_dollar_in_symbol   = true;
97 static bool              resolve_escape_sequences = true;
98 static bool              error_on_unknown_chars   = true;
99 static bool              skip_mode;
100 static FILE             *out;
101 static struct obstack    pp_obstack;
102 static struct obstack    config_obstack;
103 static const char       *printed_input_name = NULL;
104 static source_position_t expansion_pos;
105 static pp_definition_t  *current_expansion  = NULL;
106 static pp_definition_t  *current_call       = NULL;
107 static pp_definition_t  *current_argument   = NULL;
108 static pp_definition_t  *argument_expanding = NULL;
109 static unsigned          argument_brace_count;
110 static strset_t          stringset;
111 static token_kind_t      last_token;
112
113 static searchpath_entry_t *searchpath;
114
115 static whitespace_info_t next_info; /* valid if had_whitespace is true */
116 static whitespace_info_t info;
117
118 static inline void next_char(void);
119 static void next_input_token(void);
120 static void print_line_directive(const source_position_t *pos, const char *add);
121
122 static symbol_t *symbol_colongreater;
123 static symbol_t *symbol_lesscolon;
124 static symbol_t *symbol_lesspercent;
125 static symbol_t *symbol_percentcolon;
126 static symbol_t *symbol_percentcolonpercentcolon;
127 static symbol_t *symbol_percentgreater;
128
129 static void init_symbols(void)
130 {
131         symbol_colongreater             = symbol_table_insert(":>");
132         symbol_lesscolon                = symbol_table_insert("<:");
133         symbol_lesspercent              = symbol_table_insert("<%");
134         symbol_percentcolon             = symbol_table_insert("%:");
135         symbol_percentcolonpercentcolon = symbol_table_insert("%:%:");
136         symbol_percentgreater           = symbol_table_insert("%>");
137 }
138
139 void switch_pp_input(FILE *const file, char const *const filename)
140 {
141         input.file                = file;
142         input.input               = input_from_stream(file, NULL);
143         input.bufend              = NULL;
144         input.bufpos              = NULL;
145         input.output_line         = 0;
146         input.position.input_name = filename;
147         input.position.lineno     = 1;
148
149         /* indicate that we're at a new input */
150         print_line_directive(&input.position, input_stack != NULL ? "1" : NULL);
151
152         /* place a virtual '\n' so we realize we're at line begin */
153         input.position.lineno = 0;
154         input.c               = '\n';
155 }
156
157 FILE *close_pp_input(void)
158 {
159         input_free(input.input);
160
161         FILE* const file = input.file;
162         assert(file);
163
164         input.input  = NULL;
165         input.file   = NULL;
166         input.bufend = NULL;
167         input.bufpos = NULL;
168         input.c      = EOF;
169
170         return file;
171 }
172
173 static void push_input(void)
174 {
175         pp_input_t *const saved_input = obstack_copy(&input_obstack, &input, sizeof(input));
176
177         /* adjust buffer positions */
178         if (input.bufpos != NULL)
179                 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
180         if (input.bufend != NULL)
181                 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
182
183         saved_input->parent = input_stack;
184         input_stack         = saved_input;
185         ++n_inputs;
186 }
187
188 static void pop_restore_input(void)
189 {
190         assert(n_inputs > 0);
191         assert(input_stack != NULL);
192
193         pp_input_t *saved_input = input_stack;
194
195         memcpy(&input, saved_input, sizeof(input));
196         input.parent = NULL;
197
198         /* adjust buffer positions */
199         if (saved_input->bufpos != NULL)
200                 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
201         if (saved_input->bufend != NULL)
202                 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
203
204         input_stack = saved_input->parent;
205         obstack_free(&input_obstack, saved_input);
206         --n_inputs;
207 }
208
209 /**
210  * Prints a parse error message at the current token.
211  *
212  * @param msg   the error message
213  */
214 static void parse_error(const char *msg)
215 {
216         errorf(&pp_token.base.source_position,  "%s", msg);
217 }
218
219 static inline void next_real_char(void)
220 {
221         assert(input.bufpos <= input.bufend);
222         if (input.bufpos >= input.bufend) {
223                 size_t const n = decode(input.input, input.buf + MAX_PUTBACK, lengthof(input.buf) - MAX_PUTBACK);
224                 if (n == 0) {
225                         input.c = EOF;
226                         return;
227                 }
228                 input.bufpos = input.buf + MAX_PUTBACK;
229                 input.bufend = input.bufpos + n;
230         }
231         input.c = *input.bufpos++;
232         ++input.position.colno;
233 }
234
235 /**
236  * Put a character back into the buffer.
237  *
238  * @param pc  the character to put back
239  */
240 static inline void put_back(utf32 const pc)
241 {
242         assert(input.bufpos > input.buf);
243         *(--input.bufpos - input.buf + input.buf) = (char) pc;
244         --input.position.colno;
245 }
246
247 #define NEWLINE \
248         '\r': \
249                 next_char(); \
250                 if (input.c == '\n') { \
251         case '\n': \
252                         next_char(); \
253                 } \
254                 ++input.position.lineno; \
255                 input.position.colno = 1; \
256                 goto newline; \
257                 newline // Let it look like an ordinary case label.
258
259 #define eat(c_type) (assert(input.c == c_type), next_char())
260
261 static void maybe_concat_lines(void)
262 {
263         eat('\\');
264
265         switch (input.c) {
266         case NEWLINE:
267                 info.whitespace_at_line_begin = 0;
268                 return;
269
270         default:
271                 break;
272         }
273
274         put_back(input.c);
275         input.c = '\\';
276 }
277
278 /**
279  * Set c to the next input character, ie.
280  * after expanding trigraphs.
281  */
282 static inline void next_char(void)
283 {
284         next_real_char();
285
286         /* filter trigraphs and concatenated lines */
287         if (UNLIKELY(input.c == '\\')) {
288                 maybe_concat_lines();
289                 goto end_of_next_char;
290         }
291
292         if (LIKELY(input.c != '?'))
293                 goto end_of_next_char;
294
295         next_real_char();
296         if (LIKELY(input.c != '?')) {
297                 put_back(input.c);
298                 input.c = '?';
299                 goto end_of_next_char;
300         }
301
302         next_real_char();
303         switch (input.c) {
304         case '=': input.c = '#'; break;
305         case '(': input.c = '['; break;
306         case '/': input.c = '\\'; maybe_concat_lines(); break;
307         case ')': input.c = ']'; break;
308         case '\'': input.c = '^'; break;
309         case '<': input.c = '{'; break;
310         case '!': input.c = '|'; break;
311         case '>': input.c = '}'; break;
312         case '-': input.c = '~'; break;
313         default:
314                 put_back(input.c);
315                 put_back('?');
316                 input.c = '?';
317                 break;
318         }
319
320 end_of_next_char:;
321 #ifdef DEBUG_CHARS
322         printf("nchar '%c'\n", input.c);
323 #endif
324 }
325
326
327
328 /**
329  * Returns true if the given char is a octal digit.
330  *
331  * @param char  the character to check
332  */
333 static inline bool is_octal_digit(int chr)
334 {
335         switch (chr) {
336         case '0':
337         case '1':
338         case '2':
339         case '3':
340         case '4':
341         case '5':
342         case '6':
343         case '7':
344                 return true;
345         default:
346                 return false;
347         }
348 }
349
350 /**
351  * Returns the value of a digit.
352  * The only portable way to do it ...
353  */
354 static int digit_value(int digit)
355 {
356         switch (digit) {
357         case '0': return 0;
358         case '1': return 1;
359         case '2': return 2;
360         case '3': return 3;
361         case '4': return 4;
362         case '5': return 5;
363         case '6': return 6;
364         case '7': return 7;
365         case '8': return 8;
366         case '9': return 9;
367         case 'a':
368         case 'A': return 10;
369         case 'b':
370         case 'B': return 11;
371         case 'c':
372         case 'C': return 12;
373         case 'd':
374         case 'D': return 13;
375         case 'e':
376         case 'E': return 14;
377         case 'f':
378         case 'F': return 15;
379         default:
380                 panic("wrong character given");
381         }
382 }
383
384 /**
385  * Parses an octal character sequence.
386  *
387  * @param first_digit  the already read first digit
388  */
389 static utf32 parse_octal_sequence(const utf32 first_digit)
390 {
391         assert(is_octal_digit(first_digit));
392         utf32 value = digit_value(first_digit);
393         if (!is_octal_digit(input.c)) return value;
394         value = 8 * value + digit_value(input.c);
395         next_char();
396         if (!is_octal_digit(input.c)) return value;
397         value = 8 * value + digit_value(input.c);
398         next_char();
399         return value;
400
401 }
402
403 /**
404  * Parses a hex character sequence.
405  */
406 static utf32 parse_hex_sequence(void)
407 {
408         utf32 value = 0;
409         while (isxdigit(input.c)) {
410                 value = 16 * value + digit_value(input.c);
411                 next_char();
412         }
413         return value;
414 }
415
416 static bool is_universal_char_valid(utf32 const v)
417 {
418         /* C11 Â§6.4.3:2 */
419         if (v < 0xA0U && v != 0x24 && v != 0x40 && v != 0x60)
420                 return false;
421         if (0xD800 <= v && v <= 0xDFFF)
422                 return false;
423         return true;
424 }
425
426 static utf32 parse_universal_char(unsigned const n_digits)
427 {
428         utf32 v = 0;
429         for (unsigned k = n_digits; k != 0; --k) {
430                 if (isxdigit(input.c)) {
431                         v = 16 * v + digit_value(input.c);
432                         if (!resolve_escape_sequences)
433                                 obstack_1grow(&symbol_obstack, input.c);
434                         next_char();
435                 } else {
436                         errorf(&input.position,
437                                "short universal character name, expected %u more digits",
438                                    k);
439                         break;
440                 }
441         }
442         if (!is_universal_char_valid(v)) {
443                 errorf(&input.position,
444                        "\\%c%0*X is not a valid universal character name",
445                        n_digits == 4 ? 'u' : 'U', (int)n_digits, v);
446         }
447         return v;
448 }
449
450 static bool is_universal_char_valid_identifier(utf32 const v)
451 {
452         /* C11 Annex D.1 */
453         if (                v == 0x000A8) return true;
454         if (                v == 0x000AA) return true;
455         if (                v == 0x000AD) return true;
456         if (                v == 0x000AF) return true;
457         if (0x000B2 <= v && v <= 0x000B5) return true;
458         if (0x000B7 <= v && v <= 0x000BA) return true;
459         if (0x000BC <= v && v <= 0x000BE) return true;
460         if (0x000C0 <= v && v <= 0x000D6) return true;
461         if (0x000D8 <= v && v <= 0x000F6) return true;
462         if (0x000F8 <= v && v <= 0x000FF) return true;
463         if (0x00100 <= v && v <= 0x0167F) return true;
464         if (0x01681 <= v && v <= 0x0180D) return true;
465         if (0x0180F <= v && v <= 0x01FFF) return true;
466         if (0x0200B <= v && v <= 0x0200D) return true;
467         if (0x0202A <= v && v <= 0x0202E) return true;
468         if (0x0203F <= v && v <= 0x02040) return true;
469         if (                v == 0x02054) return true;
470         if (0x02060 <= v && v <= 0x0206F) return true;
471         if (0x02070 <= v && v <= 0x0218F) return true;
472         if (0x02460 <= v && v <= 0x024FF) return true;
473         if (0x02776 <= v && v <= 0x02793) return true;
474         if (0x02C00 <= v && v <= 0x02DFF) return true;
475         if (0x02E80 <= v && v <= 0x02FFF) return true;
476         if (0x03004 <= v && v <= 0x03007) return true;
477         if (0x03021 <= v && v <= 0x0302F) return true;
478         if (0x03031 <= v && v <= 0x0303F) return true;
479         if (0x03040 <= v && v <= 0x0D7FF) return true;
480         if (0x0F900 <= v && v <= 0x0FD3D) return true;
481         if (0x0FD40 <= v && v <= 0x0FDCF) return true;
482         if (0x0FDF0 <= v && v <= 0x0FE44) return true;
483         if (0x0FE47 <= v && v <= 0x0FFFD) return true;
484         if (0x10000 <= v && v <= 0x1FFFD) return true;
485         if (0x20000 <= v && v <= 0x2FFFD) return true;
486         if (0x30000 <= v && v <= 0x3FFFD) return true;
487         if (0x40000 <= v && v <= 0x4FFFD) return true;
488         if (0x50000 <= v && v <= 0x5FFFD) return true;
489         if (0x60000 <= v && v <= 0x6FFFD) return true;
490         if (0x70000 <= v && v <= 0x7FFFD) return true;
491         if (0x80000 <= v && v <= 0x8FFFD) return true;
492         if (0x90000 <= v && v <= 0x9FFFD) return true;
493         if (0xA0000 <= v && v <= 0xAFFFD) return true;
494         if (0xB0000 <= v && v <= 0xBFFFD) return true;
495         if (0xC0000 <= v && v <= 0xCFFFD) return true;
496         if (0xD0000 <= v && v <= 0xDFFFD) return true;
497         if (0xE0000 <= v && v <= 0xEFFFD) return true;
498         return false;
499 }
500
501 static bool is_universal_char_valid_identifier_start(utf32 const v)
502 {
503         /* C11 Annex D.2 */
504         if (0x0300 <= v && v <= 0x036F) return false;
505         if (0x1DC0 <= v && v <= 0x1DFF) return false;
506         if (0x20D0 <= v && v <= 0x20FF) return false;
507         if (0xFE20 <= v && v <= 0xFE2F) return false;
508         return true;
509 }
510
511 /**
512  * Parse an escape sequence.
513  */
514 static utf32 parse_escape_sequence(void)
515 {
516         eat('\\');
517
518         utf32 const ec = input.c;
519         next_char();
520
521         switch (ec) {
522         case '"':  return '"';
523         case '\'': return '\'';
524         case '\\': return '\\';
525         case '?': return '\?';
526         case 'a': return '\a';
527         case 'b': return '\b';
528         case 'f': return '\f';
529         case 'n': return '\n';
530         case 'r': return '\r';
531         case 't': return '\t';
532         case 'v': return '\v';
533         case 'x':
534                 return parse_hex_sequence();
535         case '0':
536         case '1':
537         case '2':
538         case '3':
539         case '4':
540         case '5':
541         case '6':
542         case '7':
543                 return parse_octal_sequence(ec);
544         case EOF:
545                 parse_error("reached end of file while parsing escape sequence");
546                 return EOF;
547         /* \E is not documented, but handled, by GCC.  It is acceptable according
548          * to Â§6.11.4, whereas \e is not. */
549         case 'E':
550         case 'e':
551                 if (c_mode & _GNUC)
552                         return 27;   /* hopefully 27 is ALWAYS the code for ESCAPE */
553                 break;
554
555         case 'U': return parse_universal_char(8);
556         case 'u': return parse_universal_char(4);
557
558         default:
559                 break;
560         }
561         /* Â§6.4.4.4:8 footnote 64 */
562         parse_error("unknown escape sequence");
563         return EOF;
564 }
565
566 static const char *identify_string(char *string)
567 {
568         const char *result = strset_insert(&stringset, string);
569         if (result != string) {
570                 obstack_free(&symbol_obstack, string);
571         }
572         return result;
573 }
574
575 static string_t sym_make_string(string_encoding_t const enc)
576 {
577         obstack_1grow(&symbol_obstack, '\0');
578         size_t      const len    = obstack_object_size(&symbol_obstack) - 1;
579         char       *const string = obstack_finish(&symbol_obstack);
580         char const *const result = identify_string(string);
581         return (string_t){ result, len, enc };
582 }
583
584 string_t make_string(char const *const string)
585 {
586         obstack_grow(&symbol_obstack, string, strlen(string));
587         return sym_make_string(STRING_ENCODING_CHAR);
588 }
589
590 static void parse_string(utf32 const delimiter, token_kind_t const kind,
591                          string_encoding_t const enc,
592                          char const *const context)
593 {
594         const unsigned start_linenr = input.position.lineno;
595
596         eat(delimiter);
597
598         while (true) {
599                 switch (input.c) {
600                 case '\\': {
601                         if (resolve_escape_sequences) {
602                                 utf32 const tc = parse_escape_sequence();
603                                 if (enc == STRING_ENCODING_CHAR) {
604                                         if (tc >= 0x100) {
605                                                 warningf(WARN_OTHER, &pp_token.base.source_position, "escape sequence out of range");
606                                         }
607                                         obstack_1grow(&symbol_obstack, tc);
608                                 } else {
609                                         obstack_grow_utf8(&symbol_obstack, tc);
610                                 }
611                         } else {
612                                 obstack_1grow(&symbol_obstack, (char)input.c);
613                                 next_char();
614                                 obstack_1grow(&symbol_obstack, (char)input.c);
615                                 next_char();
616                         }
617                         break;
618                 }
619
620                 case NEWLINE:
621                         errorf(&pp_token.base.source_position, "newline while parsing %s", context);
622                         break;
623
624                 case EOF: {
625                         source_position_t source_position;
626                         source_position.input_name = pp_token.base.source_position.input_name;
627                         source_position.lineno     = start_linenr;
628                         errorf(&source_position, "EOF while parsing %s", context);
629                         goto end_of_string;
630                 }
631
632                 default:
633                         if (input.c == delimiter) {
634                                 next_char();
635                                 goto end_of_string;
636                         } else {
637                                 obstack_grow_utf8(&symbol_obstack, input.c);
638                                 next_char();
639                                 break;
640                         }
641                 }
642         }
643
644 end_of_string:
645         pp_token.kind           = kind;
646         pp_token.literal.string = sym_make_string(enc);
647 }
648
649 static void parse_string_literal(string_encoding_t const enc)
650 {
651         parse_string('"', T_STRING_LITERAL, enc, "string literal");
652 }
653
654 static void parse_character_constant(string_encoding_t const enc)
655 {
656         parse_string('\'', T_CHARACTER_CONSTANT, enc, "character constant");
657         if (pp_token.literal.string.size == 0) {
658                 parse_error("empty character constant");
659         }
660 }
661
662 #define SYMBOL_CASES_WITHOUT_E_P \
663              '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
664         case 'a': \
665         case 'b': \
666         case 'c': \
667         case 'd': \
668         case 'f': \
669         case 'g': \
670         case 'h': \
671         case 'i': \
672         case 'j': \
673         case 'k': \
674         case 'l': \
675         case 'm': \
676         case 'n': \
677         case 'o': \
678         case 'q': \
679         case 'r': \
680         case 's': \
681         case 't': \
682         case 'u': \
683         case 'v': \
684         case 'w': \
685         case 'x': \
686         case 'y': \
687         case 'z': \
688         case 'A': \
689         case 'B': \
690         case 'C': \
691         case 'D': \
692         case 'F': \
693         case 'G': \
694         case 'H': \
695         case 'I': \
696         case 'J': \
697         case 'K': \
698         case 'L': \
699         case 'M': \
700         case 'N': \
701         case 'O': \
702         case 'Q': \
703         case 'R': \
704         case 'S': \
705         case 'T': \
706         case 'U': \
707         case 'V': \
708         case 'W': \
709         case 'X': \
710         case 'Y': \
711         case 'Z': \
712         case '_'
713
714 #define SYMBOL_CASES \
715              SYMBOL_CASES_WITHOUT_E_P: \
716         case 'e': \
717         case 'p': \
718         case 'E': \
719         case 'P'
720
721 #define DIGIT_CASES \
722              '0':  \
723         case '1':  \
724         case '2':  \
725         case '3':  \
726         case '4':  \
727         case '5':  \
728         case '6':  \
729         case '7':  \
730         case '8':  \
731         case '9'
732
733 static void start_expanding(pp_definition_t *definition)
734 {
735         definition->parent_expansion = current_expansion;
736         definition->expand_pos       = 0;
737         definition->is_expanding     = true;
738         if (definition->list_len > 0) {
739                 definition->token_list[0].had_whitespace
740                         = info.had_whitespace;
741         }
742         current_expansion = definition;
743 }
744
745 static void finished_expanding(pp_definition_t *definition)
746 {
747         assert(definition->is_expanding);
748         pp_definition_t *parent = definition->parent_expansion;
749         definition->parent_expansion = NULL;
750         definition->is_expanding     = false;
751
752         /* stop further expanding once we expanded a parameter used in a
753          * sub macro-call */
754         if (definition == argument_expanding)
755                 argument_expanding = NULL;
756
757         assert(current_expansion == definition);
758         current_expansion = parent;
759 }
760
761 static inline void set_punctuator(token_kind_t const kind)
762 {
763         pp_token.kind        = kind;
764         pp_token.base.symbol = token_symbols[kind];
765 }
766
767 static inline void set_digraph(token_kind_t const kind, symbol_t *const symbol)
768 {
769         pp_token.kind        = kind;
770         pp_token.base.symbol = symbol;
771 }
772
773 /**
774  * returns next final token from a preprocessor macro expansion
775  */
776 static bool expand_next(void)
777 {
778         if (current_expansion == NULL)
779                 return false;
780
781 restart:;
782         size_t pos = current_expansion->expand_pos;
783         if (pos >= current_expansion->list_len) {
784                 finished_expanding(current_expansion);
785                 /* it was the outermost expansion, parse pptoken normally */
786                 if (current_expansion == NULL) {
787                         return false;
788                 }
789                 goto restart;
790         }
791         const saved_token_t *saved = &current_expansion->token_list[pos++];
792         pp_token = saved->token;
793
794         if (current_expansion->expand_pos > 0)
795                 info.had_whitespace = saved->had_whitespace;
796         pp_token.base.source_position = expansion_pos;
797         ++current_expansion->expand_pos;
798
799         return true;
800 }
801
802 /**
803  * Returns the next token kind found when continuing the current expansions
804  * without starting new sub-expansions.
805  */
806 static token_kind_t peek_expansion(void)
807 {
808         pp_definition_t *expansion = current_expansion;
809         while (expansion != NULL && expansion->expand_pos >= expansion->list_len) {
810                 expansion = expansion->parent_expansion;
811         }
812         if (expansion == NULL)
813                 return T_EOF;
814         return expansion->token_list[expansion->expand_pos].token.kind;
815 }
816
817 static void skip_line_comment(void)
818 {
819         info.had_whitespace = true;
820         while (true) {
821                 switch (input.c) {
822                 case EOF:
823                         return;
824
825                 case '\r':
826                 case '\n':
827                         return;
828
829                 default:
830                         next_char();
831                         break;
832                 }
833         }
834 }
835
836 static void skip_multiline_comment(void)
837 {
838         info.had_whitespace = true;
839
840         unsigned start_linenr = input.position.lineno;
841         while (true) {
842                 switch (input.c) {
843                 case '/':
844                         next_char();
845                         if (input.c == '*') {
846                                 /* TODO: nested comment, warn here */
847                         }
848                         break;
849                 case '*':
850                         next_char();
851                         if (input.c == '/') {
852                                 if (input.position.lineno != input.output_line)
853                                         info.whitespace_at_line_begin = input.position.colno;
854                                 next_char();
855                                 return;
856                         }
857                         break;
858
859                 case NEWLINE:
860                         break;
861
862                 case EOF: {
863                         source_position_t source_position;
864                         source_position.input_name = pp_token.base.source_position.input_name;
865                         source_position.lineno     = start_linenr;
866                         errorf(&source_position, "at end of file while looking for comment end");
867                         return;
868                 }
869
870                 default:
871                         next_char();
872                         break;
873                 }
874         }
875 }
876
877 static bool skip_till_newline(bool stop_at_non_whitespace)
878 {
879         bool res = false;
880         while (true) {
881                 switch (input.c) {
882                 case ' ':
883                 case '\t':
884                         next_char();
885                         continue;
886
887                 case '/':
888                         next_char();
889                         if (input.c == '/') {
890                                 next_char();
891                                 skip_line_comment();
892                                 continue;
893                         } else if (input.c == '*') {
894                                 next_char();
895                                 skip_multiline_comment();
896                                 continue;
897                         } else {
898                                 put_back(input.c);
899                                 input.c = '/';
900                         }
901                         return true;
902
903                 case NEWLINE:
904                         return res;
905
906                 default:
907                         if (stop_at_non_whitespace)
908                                 return false;
909                         res = true;
910                         next_char();
911                         continue;
912                 }
913         }
914 }
915
916 static void skip_whitespace(void)
917 {
918         while (true) {
919                 switch (input.c) {
920                 case ' ':
921                 case '\t':
922                         ++info.whitespace_at_line_begin;
923                         info.had_whitespace = true;
924                         next_char();
925                         continue;
926
927                 case NEWLINE:
928                         info.at_line_begin  = true;
929                         info.had_whitespace = true;
930                         info.whitespace_at_line_begin = 0;
931                         continue;
932
933                 case '/':
934                         next_char();
935                         if (input.c == '/') {
936                                 next_char();
937                                 skip_line_comment();
938                                 continue;
939                         } else if (input.c == '*') {
940                                 next_char();
941                                 skip_multiline_comment();
942                                 continue;
943                         } else {
944                                 put_back(input.c);
945                                 input.c = '/';
946                         }
947                         return;
948
949                 default:
950                         return;
951                 }
952         }
953 }
954
955 static inline void eat_pp(pp_token_kind_t const kind)
956 {
957         assert(pp_token.base.symbol->pp_ID == kind);
958         (void) kind;
959         next_input_token();
960 }
961
962 static inline void eat_token(token_kind_t const kind)
963 {
964         assert(pp_token.kind == kind);
965         (void)kind;
966         next_input_token();
967 }
968
969 static void parse_symbol(void)
970 {
971         assert(obstack_object_size(&symbol_obstack) == 0);
972         while (true) {
973                 switch (input.c) {
974                 case DIGIT_CASES:
975                 case SYMBOL_CASES:
976                         obstack_1grow(&symbol_obstack, (char) input.c);
977                         next_char();
978                         break;
979
980                 case '\\':
981                         next_char();
982                         switch (input.c) {
983                         {
984                                 unsigned n;
985                         case 'U': n = 8; goto universal;
986                         case 'u': n = 4; goto universal;
987 universal:
988                                 if (!resolve_escape_sequences) {
989                                         obstack_1grow(&symbol_obstack, '\\');
990                                         obstack_1grow(&symbol_obstack, input.c);
991                                 }
992                                 next_char();
993                                 utf32 const v = parse_universal_char(n);
994                                 if (!is_universal_char_valid_identifier(v)) {
995                                         if (is_universal_char_valid(v)) {
996                                                 errorf(&input.position,
997                                                            "universal character \\%c%0*X is not valid in an identifier",
998                                                            n == 4 ? 'u' : 'U', (int)n, v);
999                                         }
1000                                 } else if (obstack_object_size(&symbol_obstack) == 0 && !is_universal_char_valid_identifier_start(v)) {
1001                                         errorf(&input.position,
1002                                                    "universal character \\%c%0*X is not valid as start of an identifier",
1003                                                    n == 4 ? 'u' : 'U', (int)n, v);
1004                                 } else if (resolve_escape_sequences) {
1005                                         obstack_grow_utf8(&symbol_obstack, v);
1006                                 }
1007                                 break;
1008                         }
1009
1010                         default:
1011                                 put_back(input.c);
1012                                 input.c = '\\';
1013                                 goto end_symbol;
1014                         }
1015
1016                 default:
1017 dollar_sign:
1018                         goto end_symbol;
1019                 }
1020         }
1021
1022 end_symbol:
1023         obstack_1grow(&symbol_obstack, '\0');
1024         char *string = obstack_finish(&symbol_obstack);
1025
1026         /* might be a wide string or character constant ( L"string"/L'c' ) */
1027         if (input.c == '"' && string[0] == 'L' && string[1] == '\0') {
1028                 obstack_free(&symbol_obstack, string);
1029                 parse_string_literal(STRING_ENCODING_WIDE);
1030                 return;
1031         } else if (input.c == '\'' && string[0] == 'L' && string[1] == '\0') {
1032                 obstack_free(&symbol_obstack, string);
1033                 parse_character_constant(STRING_ENCODING_WIDE);
1034                 return;
1035         }
1036
1037         symbol_t *symbol = symbol_table_insert(string);
1038
1039         pp_token.kind        = symbol->ID;
1040         pp_token.base.symbol = symbol;
1041
1042         /* we can free the memory from symbol obstack if we already had an entry in
1043          * the symbol table */
1044         if (symbol->string != string) {
1045                 obstack_free(&symbol_obstack, string);
1046         }
1047 }
1048
1049 static void parse_number(void)
1050 {
1051         obstack_1grow(&symbol_obstack, (char) input.c);
1052         next_char();
1053
1054         while (true) {
1055                 switch (input.c) {
1056                 case '.':
1057                 case DIGIT_CASES:
1058                 case SYMBOL_CASES_WITHOUT_E_P:
1059                         obstack_1grow(&symbol_obstack, (char) input.c);
1060                         next_char();
1061                         break;
1062
1063                 case 'e':
1064                 case 'p':
1065                 case 'E':
1066                 case 'P':
1067                         obstack_1grow(&symbol_obstack, (char) input.c);
1068                         next_char();
1069                         if (input.c == '+' || input.c == '-') {
1070                                 obstack_1grow(&symbol_obstack, (char) input.c);
1071                                 next_char();
1072                         }
1073                         break;
1074
1075                 default:
1076 dollar_sign:
1077                         goto end_number;
1078                 }
1079         }
1080
1081 end_number:
1082         pp_token.kind           = T_NUMBER;
1083         pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
1084 }
1085
1086 #define MAYBE_PROLOG \
1087         next_char(); \
1088         switch (input.c) {
1089
1090 #define MAYBE(ch, kind) \
1091         case ch: \
1092                 next_char(); \
1093                 set_punctuator(kind); \
1094                 return;
1095
1096 #define MAYBE_DIGRAPH(ch, kind, symbol) \
1097         case ch: \
1098                 next_char(); \
1099                 set_digraph(kind, symbol); \
1100                 return;
1101
1102 #define ELSE_CODE(code) \
1103         default: \
1104                 code \
1105         }
1106
1107 #define ELSE(kind) ELSE_CODE(set_punctuator(kind); return;)
1108
1109 /** identifies and returns the next preprocessing token contained in the
1110  * input stream. No macro expansion is performed. */
1111 static void next_input_token(void)
1112 {
1113         if (next_info.had_whitespace) {
1114                 info = next_info;
1115                 next_info.had_whitespace = false;
1116         } else {
1117                 info.at_line_begin  = false;
1118                 info.had_whitespace = false;
1119         }
1120 restart:
1121         pp_token.base.source_position = input.position;
1122         pp_token.base.symbol          = NULL;
1123
1124         switch (input.c) {
1125         case ' ':
1126         case '\t':
1127                 info.whitespace_at_line_begin++;
1128                 info.had_whitespace = true;
1129                 next_char();
1130                 goto restart;
1131
1132         case NEWLINE:
1133                 info.at_line_begin            = true;
1134                 info.had_whitespace           = true;
1135                 info.whitespace_at_line_begin = 0;
1136                 goto restart;
1137
1138         case SYMBOL_CASES:
1139                 parse_symbol();
1140                 return;
1141
1142         case DIGIT_CASES:
1143                 parse_number();
1144                 return;
1145
1146         case '"':
1147                 parse_string_literal(STRING_ENCODING_CHAR);
1148                 return;
1149
1150         case '\'':
1151                 parse_character_constant(STRING_ENCODING_CHAR);
1152                 return;
1153
1154         case '.':
1155                 MAYBE_PROLOG
1156                         case '0':
1157                         case '1':
1158                         case '2':
1159                         case '3':
1160                         case '4':
1161                         case '5':
1162                         case '6':
1163                         case '7':
1164                         case '8':
1165                         case '9':
1166                                 put_back(input.c);
1167                                 input.c = '.';
1168                                 parse_number();
1169                                 return;
1170
1171                         case '.':
1172                                 MAYBE_PROLOG
1173                                 MAYBE('.', T_DOTDOTDOT)
1174                                 ELSE_CODE(
1175                                         put_back(input.c);
1176                                         input.c = '.';
1177                                         set_punctuator('.');
1178                                         return;
1179                                 )
1180                 ELSE('.')
1181         case '&':
1182                 MAYBE_PROLOG
1183                 MAYBE('&', T_ANDAND)
1184                 MAYBE('=', T_ANDEQUAL)
1185                 ELSE('&')
1186         case '*':
1187                 MAYBE_PROLOG
1188                 MAYBE('=', T_ASTERISKEQUAL)
1189                 ELSE('*')
1190         case '+':
1191                 MAYBE_PROLOG
1192                 MAYBE('+', T_PLUSPLUS)
1193                 MAYBE('=', T_PLUSEQUAL)
1194                 ELSE('+')
1195         case '-':
1196                 MAYBE_PROLOG
1197                 MAYBE('>', T_MINUSGREATER)
1198                 MAYBE('-', T_MINUSMINUS)
1199                 MAYBE('=', T_MINUSEQUAL)
1200                 ELSE('-')
1201         case '!':
1202                 MAYBE_PROLOG
1203                 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
1204                 ELSE('!')
1205         case '/':
1206                 MAYBE_PROLOG
1207                 MAYBE('=', T_SLASHEQUAL)
1208                 case '*':
1209                         next_char();
1210                         skip_multiline_comment();
1211                         goto restart;
1212                 case '/':
1213                         next_char();
1214                         skip_line_comment();
1215                         goto restart;
1216                 ELSE('/')
1217         case '%':
1218                 MAYBE_PROLOG
1219                 MAYBE_DIGRAPH('>', '}', symbol_percentgreater)
1220                 MAYBE('=', T_PERCENTEQUAL)
1221                 case ':':
1222                         MAYBE_PROLOG
1223                         case '%':
1224                                 MAYBE_PROLOG
1225                                 MAYBE_DIGRAPH(':', T_HASHHASH, symbol_percentcolonpercentcolon)
1226                                 ELSE_CODE(
1227                                         put_back(input.c);
1228                                         input.c = '%';
1229                                         goto digraph_percentcolon;
1230                                 )
1231                         ELSE_CODE(
1232 digraph_percentcolon:
1233                                 set_digraph('#', symbol_percentcolon);
1234                                 return;
1235                         )
1236                 ELSE('%')
1237         case '<':
1238                 MAYBE_PROLOG
1239                 MAYBE_DIGRAPH(':', '[', symbol_lesscolon)
1240                 MAYBE_DIGRAPH('%', '{', symbol_lesspercent)
1241                 MAYBE('=', T_LESSEQUAL)
1242                 case '<':
1243                         MAYBE_PROLOG
1244                         MAYBE('=', T_LESSLESSEQUAL)
1245                         ELSE(T_LESSLESS)
1246                 ELSE('<')
1247         case '>':
1248                 MAYBE_PROLOG
1249                 MAYBE('=', T_GREATEREQUAL)
1250                 case '>':
1251                         MAYBE_PROLOG
1252                         MAYBE('=', T_GREATERGREATEREQUAL)
1253                         ELSE(T_GREATERGREATER)
1254                 ELSE('>')
1255         case '^':
1256                 MAYBE_PROLOG
1257                 MAYBE('=', T_CARETEQUAL)
1258                 ELSE('^')
1259         case '|':
1260                 MAYBE_PROLOG
1261                 MAYBE('=', T_PIPEEQUAL)
1262                 MAYBE('|', T_PIPEPIPE)
1263                 ELSE('|')
1264         case ':':
1265                 MAYBE_PROLOG
1266                 MAYBE_DIGRAPH('>', ']', symbol_colongreater)
1267                 case ':':
1268                         if (c_mode & _CXX) {
1269                                 next_char();
1270                                 set_punctuator(T_COLONCOLON);
1271                                 return;
1272                         }
1273                         /* FALLTHROUGH */
1274                 ELSE(':')
1275         case '=':
1276                 MAYBE_PROLOG
1277                 MAYBE('=', T_EQUALEQUAL)
1278                 ELSE('=')
1279         case '#':
1280                 MAYBE_PROLOG
1281                 MAYBE('#', T_HASHHASH)
1282                 ELSE('#')
1283
1284         case '?':
1285         case '[':
1286         case ']':
1287         case '(':
1288         case ')':
1289         case '{':
1290         case '}':
1291         case '~':
1292         case ';':
1293         case ',':
1294                 set_punctuator(input.c);
1295                 next_char();
1296                 return;
1297
1298         case EOF:
1299                 if (input_stack != NULL) {
1300                         fclose(close_pp_input());
1301                         pop_restore_input();
1302                         fputc('\n', out);
1303                         if (input.c == (utf32)EOF)
1304                                 --input.position.lineno;
1305                         print_line_directive(&input.position, "2");
1306                         goto restart;
1307                 } else {
1308                         info.at_line_begin = true;
1309                         set_punctuator(T_EOF);
1310                 }
1311                 return;
1312
1313         case '\\':
1314                 next_char();
1315                 int next_c = input.c;
1316                 put_back(input.c);
1317                 input.c = '\\';
1318                 if (next_c == 'U' || next_c == 'u') {
1319                         parse_symbol();
1320                         return;
1321                 }
1322                 /* FALLTHROUGH */
1323         default:
1324 dollar_sign:
1325                 if (error_on_unknown_chars) {
1326                         errorf(&pp_token.base.source_position,
1327                                "unknown character '%lc' found\n", input.c);
1328                         next_char();
1329                         goto restart;
1330                 } else {
1331                         assert(obstack_object_size(&symbol_obstack) == 0);
1332                         obstack_grow_utf8(&symbol_obstack, input.c);
1333                         obstack_1grow(&symbol_obstack, '\0');
1334                         char     *const string = obstack_finish(&symbol_obstack);
1335                         symbol_t *const symbol = symbol_table_insert(string);
1336                         if (symbol->string != string)
1337                                 obstack_free(&symbol_obstack, string);
1338
1339                         pp_token.kind        = T_UNKNOWN_CHAR;
1340                         pp_token.base.symbol = symbol;
1341                         next_char();
1342                         return;
1343                 }
1344         }
1345 }
1346
1347 static void print_quoted_string(const char *const string)
1348 {
1349         fputc('"', out);
1350         for (const char *c = string; *c != 0; ++c) {
1351                 switch (*c) {
1352                 case '"': fputs("\\\"", out); break;
1353                 case '\\':  fputs("\\\\", out); break;
1354                 case '\a':  fputs("\\a", out); break;
1355                 case '\b':  fputs("\\b", out); break;
1356                 case '\f':  fputs("\\f", out); break;
1357                 case '\n':  fputs("\\n", out); break;
1358                 case '\r':  fputs("\\r", out); break;
1359                 case '\t':  fputs("\\t", out); break;
1360                 case '\v':  fputs("\\v", out); break;
1361                 case '\?':  fputs("\\?", out); break;
1362                 default:
1363                         if (!isprint(*c)) {
1364                                 fprintf(out, "\\%03o", (unsigned)*c);
1365                                 break;
1366                         }
1367                         fputc(*c, out);
1368                         break;
1369                 }
1370         }
1371         fputc('"', out);
1372 }
1373
1374 static void print_line_directive(const source_position_t *pos, const char *add)
1375 {
1376         if (!out)
1377                 return;
1378
1379         fprintf(out, "# %u ", pos->lineno);
1380         print_quoted_string(pos->input_name);
1381         if (add != NULL) {
1382                 fputc(' ', out);
1383                 fputs(add, out);
1384         }
1385
1386         printed_input_name = pos->input_name;
1387         input.output_line  = pos->lineno-1;
1388 }
1389
1390 static bool emit_newlines(void)
1391 {
1392         unsigned delta = pp_token.base.source_position.lineno - input.output_line;
1393         if (delta == 0)
1394                 return false;
1395
1396         if (delta >= 9) {
1397                 fputc('\n', out);
1398                 print_line_directive(&pp_token.base.source_position, NULL);
1399                 fputc('\n', out);
1400         } else {
1401                 for (unsigned i = 0; i < delta; ++i) {
1402                         fputc('\n', out);
1403                 }
1404         }
1405         input.output_line = pp_token.base.source_position.lineno;
1406
1407         for (unsigned i = 0; i < info.whitespace_at_line_begin; ++i)
1408                 fputc(' ', out);
1409
1410         return true;
1411 }
1412
1413 void set_preprocessor_output(FILE *output)
1414 {
1415         out = output;
1416         if (out != NULL) {
1417                 error_on_unknown_chars   = false;
1418                 resolve_escape_sequences = false;
1419         } else {
1420                 error_on_unknown_chars   = true;
1421                 resolve_escape_sequences = true;
1422         }
1423 }
1424
1425 void emit_pp_token(void)
1426 {
1427         if (!emit_newlines() &&
1428             (info.had_whitespace || tokens_would_paste(last_token, pp_token.kind)))
1429                 fputc(' ', out);
1430
1431         switch (pp_token.kind) {
1432         case T_NUMBER:
1433                 fputs(pp_token.literal.string.begin, out);
1434                 break;
1435
1436         case T_STRING_LITERAL:
1437                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1438                 fputc('"', out);
1439                 fputs(pp_token.literal.string.begin, out);
1440                 fputc('"', out);
1441                 break;
1442
1443         case T_CHARACTER_CONSTANT:
1444                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1445                 fputc('\'', out);
1446                 fputs(pp_token.literal.string.begin, out);
1447                 fputc('\'', out);
1448                 break;
1449
1450         case T_MACRO_PARAMETER:
1451                 panic("macro parameter not expanded");
1452
1453         default:
1454                 fputs(pp_token.base.symbol->string, out);
1455                 break;
1456         }
1457         last_token = pp_token.kind;
1458 }
1459
1460 static void eat_pp_directive(void)
1461 {
1462         while (!info.at_line_begin) {
1463                 next_input_token();
1464         }
1465 }
1466
1467 static bool strings_equal(const string_t *string1, const string_t *string2)
1468 {
1469         size_t size = string1->size;
1470         if (size != string2->size)
1471                 return false;
1472
1473         const char *c1 = string1->begin;
1474         const char *c2 = string2->begin;
1475         for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1476                 if (*c1 != *c2)
1477                         return false;
1478         }
1479         return true;
1480 }
1481
1482 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1483 {
1484         if (token1->kind != token2->kind)
1485                 return false;
1486
1487         switch (token1->kind) {
1488         case T_NUMBER:
1489         case T_CHARACTER_CONSTANT:
1490         case T_STRING_LITERAL:
1491                 return strings_equal(&token1->literal.string, &token2->literal.string);
1492
1493         case T_MACRO_PARAMETER:
1494                 return token1->macro_parameter.def->symbol
1495                     == token2->macro_parameter.def->symbol;
1496
1497         default:
1498                 return token1->base.symbol == token2->base.symbol;
1499         }
1500 }
1501
1502 static bool pp_definitions_equal(const pp_definition_t *definition1,
1503                                  const pp_definition_t *definition2)
1504 {
1505         if (definition1->list_len != definition2->list_len)
1506                 return false;
1507
1508         size_t               len = definition1->list_len;
1509         const saved_token_t *t1  = definition1->token_list;
1510         const saved_token_t *t2  = definition2->token_list;
1511         for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1512                 if (!pp_tokens_equal(&t1->token, &t2->token))
1513                         return false;
1514         }
1515         return true;
1516 }
1517
1518 static bool is_defineable_token(char const *const context)
1519 {
1520         if (info.at_line_begin) {
1521                 errorf(&pp_token.base.source_position, "unexpected end of line after %s", context);
1522         }
1523
1524         symbol_t *const symbol = pp_token.base.symbol;
1525         if (!symbol)
1526                 goto no_ident;
1527
1528         if (pp_token.kind != T_IDENTIFIER) {
1529                 switch (symbol->string[0]) {
1530                 case SYMBOL_CASES:
1531 dollar_sign:
1532                         break;
1533
1534                 default:
1535 no_ident:
1536                         errorf(&pp_token.base.source_position, "expected identifier after %s, got %K", context, &pp_token);
1537                         return false;
1538                 }
1539         }
1540
1541         /* TODO turn this into a flag in pp_def. */
1542         switch (symbol->pp_ID) {
1543         /* Â§6.10.8:4 */
1544         case TP_defined:
1545                 errorf(&pp_token.base.source_position, "%K cannot be used as macro name in %s", &pp_token, context);
1546                 return false;
1547
1548         default:
1549                 return true;
1550         }
1551 }
1552
1553 static void parse_define_directive(void)
1554 {
1555         eat_pp(TP_define);
1556         if (skip_mode) {
1557                 eat_pp_directive();
1558                 return;
1559         }
1560
1561         assert(obstack_object_size(&pp_obstack) == 0);
1562
1563         if (!is_defineable_token("#define"))
1564                 goto error_out;
1565         symbol_t *const symbol = pp_token.base.symbol;
1566
1567         pp_definition_t *new_definition
1568                 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1569         memset(new_definition, 0, sizeof(new_definition[0]));
1570         new_definition->symbol          = symbol;
1571         new_definition->source_position = input.position;
1572
1573         /* this is probably the only place where spaces are significant in the
1574          * lexer (except for the fact that they separate tokens). #define b(x)
1575          * is something else than #define b (x) */
1576         if (input.c == '(') {
1577                 next_input_token();
1578                 eat_token('(');
1579
1580                 while (true) {
1581                         switch (pp_token.kind) {
1582                         case T_DOTDOTDOT:
1583                                 new_definition->is_variadic = true;
1584                                 eat_token(T_DOTDOTDOT);
1585                                 if (pp_token.kind != ')') {
1586                                         errorf(&input.position,
1587                                                         "'...' not at end of macro argument list");
1588                                         goto error_out;
1589                                 }
1590                                 break;
1591
1592                         case T_IDENTIFIER: {
1593                                 pp_definition_t parameter;
1594                                 memset(&parameter, 0, sizeof(parameter));
1595                                 parameter.source_position = pp_token.base.source_position;
1596                                 parameter.symbol          = pp_token.base.symbol;
1597                                 parameter.is_parameter    = true;
1598                                 obstack_grow(&pp_obstack, &parameter, sizeof(parameter));
1599                                 eat_token(T_IDENTIFIER);
1600
1601                                 if (pp_token.kind == ',') {
1602                                         eat_token(',');
1603                                         break;
1604                                 }
1605
1606                                 if (pp_token.kind != ')') {
1607                                         errorf(&pp_token.base.source_position,
1608                                                "expected ',' or ')' after identifier, got %K",
1609                                                &pp_token);
1610                                         goto error_out;
1611                                 }
1612                                 break;
1613                         }
1614
1615                         case ')':
1616                                 eat_token(')');
1617                                 goto finish_argument_list;
1618
1619                         default:
1620                                 errorf(&pp_token.base.source_position,
1621                                        "expected identifier, '...' or ')' in #define argument list, got %K",
1622                                        &pp_token);
1623                                 goto error_out;
1624                         }
1625                 }
1626
1627         finish_argument_list:
1628                 new_definition->has_parameters = true;
1629                 size_t size = obstack_object_size(&pp_obstack);
1630                 new_definition->n_parameters
1631                         = size / sizeof(new_definition->parameters[0]);
1632                 new_definition->parameters = obstack_finish(&pp_obstack);
1633                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1634                         pp_definition_t *param    = &new_definition->parameters[i];
1635                         symbol_t        *symbol   = param->symbol;
1636                         pp_definition_t *previous = symbol->pp_definition;
1637                         if (previous != NULL
1638                             && previous->function_definition == new_definition) {
1639                                 errorf(&param->source_position,
1640                                        "duplicate macro parameter '%Y'", symbol);
1641                                 param->symbol = sym_anonymous;
1642                                 continue;
1643                         }
1644                         param->parent_expansion    = previous;
1645                         param->function_definition = new_definition;
1646                         symbol->pp_definition      = param;
1647                 }
1648         } else {
1649                 next_input_token();
1650         }
1651
1652         /* construct token list */
1653         assert(obstack_object_size(&pp_obstack) == 0);
1654         while (!info.at_line_begin) {
1655                 if (pp_token.kind == T_IDENTIFIER) {
1656                         const symbol_t  *symbol     = pp_token.base.symbol;
1657                         pp_definition_t *definition = symbol->pp_definition;
1658                         if (definition != NULL
1659                             && definition->function_definition == new_definition) {
1660                             pp_token.kind                = T_MACRO_PARAMETER;
1661                             pp_token.macro_parameter.def = definition;
1662                         }
1663                 }
1664                 saved_token_t saved_token;
1665                 saved_token.token = pp_token;
1666                 saved_token.had_whitespace = info.had_whitespace;
1667                 obstack_grow(&pp_obstack, &saved_token, sizeof(saved_token));
1668                 next_input_token();
1669         }
1670
1671         new_definition->list_len   = obstack_object_size(&pp_obstack)
1672                 / sizeof(new_definition->token_list[0]);
1673         new_definition->token_list = obstack_finish(&pp_obstack);
1674
1675         if (new_definition->has_parameters) {
1676                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1677                         pp_definition_t *param      = &new_definition->parameters[i];
1678                         symbol_t        *symbol     = param->symbol;
1679                         if (symbol == sym_anonymous)
1680                                 continue;
1681                         assert(symbol->pp_definition == param);
1682                         assert(param->function_definition == new_definition);
1683                         symbol->pp_definition   = param->parent_expansion;
1684                         param->parent_expansion = NULL;
1685                 }
1686         }
1687
1688         pp_definition_t *old_definition = symbol->pp_definition;
1689         if (old_definition != NULL) {
1690                 if (!pp_definitions_equal(old_definition, new_definition)) {
1691                         warningf(WARN_OTHER, &input.position, "multiple definition of macro '%Y' (first defined %P)", symbol, &old_definition->source_position);
1692                 } else {
1693                         /* reuse the old definition */
1694                         obstack_free(&pp_obstack, new_definition);
1695                         new_definition = old_definition;
1696                 }
1697         }
1698
1699         symbol->pp_definition = new_definition;
1700         return;
1701
1702 error_out:
1703         if (obstack_object_size(&pp_obstack) > 0) {
1704                 char *ptr = obstack_finish(&pp_obstack);
1705                 obstack_free(&pp_obstack, ptr);
1706         }
1707         eat_pp_directive();
1708 }
1709
1710 static void parse_undef_directive(void)
1711 {
1712         eat_pp(TP_undef);
1713         if (skip_mode) {
1714                 eat_pp_directive();
1715                 return;
1716         }
1717
1718         if (!is_defineable_token("#undef")) {
1719                 eat_pp_directive();
1720                 return;
1721         }
1722
1723         pp_token.base.symbol->pp_definition = NULL;
1724         next_input_token();
1725
1726         if (!info.at_line_begin) {
1727                 warningf(WARN_OTHER, &input.position, "extra tokens at end of #undef directive");
1728         }
1729         eat_pp_directive();
1730 }
1731
1732 /** behind an #include we can have the special headername lexems.
1733  * They're only allowed behind an #include so they're not recognized
1734  * by the normal next_preprocessing_token. We handle them as a special
1735  * exception here */
1736 static void parse_headername(void)
1737 {
1738         const source_position_t start_position = input.position;
1739         string_t                string         = { NULL, 0, STRING_ENCODING_CHAR };
1740         assert(obstack_object_size(&symbol_obstack) == 0);
1741
1742         if (info.at_line_begin) {
1743                 parse_error("expected headername after #include");
1744                 goto finish_error;
1745         }
1746
1747         /* check wether we have a "... or <... headername */
1748         switch (input.c) {
1749         {
1750                 utf32 delimiter;
1751         case '<': delimiter = '>'; goto parse_name;
1752         case '"': delimiter = '"'; goto parse_name;
1753 parse_name:
1754                 next_char();
1755                 while (true) {
1756                         switch (input.c) {
1757                         case NEWLINE:
1758                         case EOF:
1759                                 errorf(&pp_token.base.source_position, "header name without closing '%c'", (char)delimiter);
1760                                 goto finish_error;
1761
1762                         default:
1763                                 if (input.c == delimiter) {
1764                                         next_char();
1765                                         goto finished_headername;
1766                                 } else {
1767                                         obstack_1grow(&symbol_obstack, (char)input.c);
1768                                         next_char();
1769                                 }
1770                                 break;
1771                         }
1772                 }
1773                 /* we should never be here */
1774         }
1775
1776         default:
1777                 /* TODO: do normal pp_token parsing and concatenate results */
1778                 panic("pp_token concat include not implemented yet");
1779         }
1780
1781 finished_headername:
1782         string = sym_make_string(STRING_ENCODING_CHAR);
1783
1784 finish_error:
1785         pp_token.base.source_position = start_position;
1786         pp_token.kind                 = T_HEADERNAME;
1787         pp_token.literal.string       = string;
1788 }
1789
1790 static bool do_include(bool system_include, char const *const headername)
1791 {
1792         size_t headername_len = strlen(headername);
1793         if (!system_include) {
1794                 /* put dirname of current input on obstack */
1795                 const char *filename   = input.position.input_name;
1796                 const char *last_slash = strrchr(filename, '/');
1797                 const char *full_name;
1798                 if (last_slash != NULL) {
1799                         size_t len = last_slash - filename;
1800                         obstack_grow(&symbol_obstack, filename, len + 1);
1801                         obstack_grow0(&symbol_obstack, headername, headername_len);
1802                         char *complete_path = obstack_finish(&symbol_obstack);
1803                         full_name = identify_string(complete_path);
1804                 } else {
1805                         full_name = headername;
1806                 }
1807
1808                 FILE *file = fopen(full_name, "r");
1809                 if (file != NULL) {
1810                         switch_pp_input(file, full_name);
1811                         return true;
1812                 }
1813         }
1814
1815         assert(obstack_object_size(&symbol_obstack) == 0);
1816         /* check searchpath */
1817         for (searchpath_entry_t *entry = searchpath; entry != NULL;
1818              entry = entry->next) {
1819             const char *path = entry->path;
1820             size_t      len  = strlen(path);
1821                 obstack_grow(&symbol_obstack, path, len);
1822                 if (path[len-1] != '/')
1823                         obstack_1grow(&symbol_obstack, '/');
1824                 obstack_grow(&symbol_obstack, headername, headername_len+1);
1825
1826                 char *complete_path = obstack_finish(&symbol_obstack);
1827                 FILE *file          = fopen(complete_path, "r");
1828                 if (file != NULL) {
1829                         const char *filename = identify_string(complete_path);
1830                         switch_pp_input(file, filename);
1831                         return true;
1832                 } else {
1833                         obstack_free(&symbol_obstack, complete_path);
1834                 }
1835         }
1836
1837         return false;
1838 }
1839
1840 static void parse_include_directive(void)
1841 {
1842         if (skip_mode) {
1843                 eat_pp_directive();
1844                 return;
1845         }
1846
1847         /* don't eat the TP_include here!
1848          * we need an alternative parsing for the next token */
1849         skip_till_newline(true);
1850         bool system_include = input.c == '<';
1851         parse_headername();
1852         string_t headername = pp_token.literal.string;
1853         if (headername.begin == NULL) {
1854                 eat_pp_directive();
1855                 return;
1856         }
1857
1858         bool had_nonwhitespace = skip_till_newline(false);
1859         if (had_nonwhitespace) {
1860                 warningf(WARN_OTHER, &pp_token.base.source_position,
1861                          "extra tokens at end of #include directive");
1862         }
1863
1864         if (n_inputs > INCLUDE_LIMIT) {
1865                 errorf(&pp_token.base.source_position, "#include nested too deeply");
1866                 /* eat \n or EOF */
1867                 next_input_token();
1868                 return;
1869         }
1870
1871         /* switch inputs */
1872         info.whitespace_at_line_begin = 0;
1873         info.had_whitespace           = false;
1874         info.at_line_begin            = true;
1875         emit_newlines();
1876         push_input();
1877         bool res = do_include(system_include, pp_token.literal.string.begin);
1878         if (res) {
1879                 next_input_token();
1880         } else {
1881                 errorf(&pp_token.base.source_position, "failed including '%S': %s", &pp_token.literal.string, strerror(errno));
1882                 pop_restore_input();
1883         }
1884 }
1885
1886 static pp_conditional_t *push_conditional(void)
1887 {
1888         pp_conditional_t *conditional
1889                 = obstack_alloc(&pp_obstack, sizeof(*conditional));
1890         memset(conditional, 0, sizeof(*conditional));
1891
1892         conditional->parent = conditional_stack;
1893         conditional_stack   = conditional;
1894
1895         return conditional;
1896 }
1897
1898 static void pop_conditional(void)
1899 {
1900         assert(conditional_stack != NULL);
1901         conditional_stack = conditional_stack->parent;
1902 }
1903
1904 void check_unclosed_conditionals(void)
1905 {
1906         while (conditional_stack != NULL) {
1907                 pp_conditional_t *conditional = conditional_stack;
1908
1909                 if (conditional->in_else) {
1910                         errorf(&conditional->source_position, "unterminated #else");
1911                 } else {
1912                         errorf(&conditional->source_position, "unterminated condition");
1913                 }
1914                 pop_conditional();
1915         }
1916 }
1917
1918 static void parse_ifdef_ifndef_directive(bool const is_ifdef)
1919 {
1920         bool condition;
1921         eat_pp(is_ifdef ? TP_ifdef : TP_ifndef);
1922
1923         if (skip_mode) {
1924                 eat_pp_directive();
1925                 pp_conditional_t *conditional = push_conditional();
1926                 conditional->source_position  = pp_token.base.source_position;
1927                 conditional->skip             = true;
1928                 return;
1929         }
1930
1931         if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
1932                 errorf(&pp_token.base.source_position,
1933                        "expected identifier after #%s, got %K",
1934                        is_ifdef ? "ifdef" : "ifndef", &pp_token);
1935                 eat_pp_directive();
1936
1937                 /* just take the true case in the hope to avoid further errors */
1938                 condition = true;
1939         } else {
1940                 /* evaluate wether we are in true or false case */
1941                 condition = (bool)pp_token.base.symbol->pp_definition == is_ifdef;
1942                 eat_token(T_IDENTIFIER);
1943
1944                 if (!info.at_line_begin) {
1945                         errorf(&pp_token.base.source_position,
1946                                "extra tokens at end of #%s",
1947                                is_ifdef ? "ifdef" : "ifndef");
1948                         eat_pp_directive();
1949                 }
1950         }
1951
1952         pp_conditional_t *conditional = push_conditional();
1953         conditional->source_position  = pp_token.base.source_position;
1954         conditional->condition        = condition;
1955
1956         if (!condition) {
1957                 skip_mode = true;
1958         }
1959 }
1960
1961 static void parse_else_directive(void)
1962 {
1963         eat_pp(TP_else);
1964
1965         if (!info.at_line_begin) {
1966                 if (!skip_mode) {
1967                         warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #else");
1968                 }
1969                 eat_pp_directive();
1970         }
1971
1972         pp_conditional_t *conditional = conditional_stack;
1973         if (conditional == NULL) {
1974                 errorf(&pp_token.base.source_position, "#else without prior #if");
1975                 return;
1976         }
1977
1978         if (conditional->in_else) {
1979                 errorf(&pp_token.base.source_position,
1980                        "#else after #else (condition started %P)",
1981                        &conditional->source_position);
1982                 skip_mode = true;
1983                 return;
1984         }
1985
1986         conditional->in_else = true;
1987         if (!conditional->skip) {
1988                 skip_mode = conditional->condition;
1989         }
1990         conditional->source_position = pp_token.base.source_position;
1991 }
1992
1993 static void parse_endif_directive(void)
1994 {
1995         eat_pp(TP_endif);
1996
1997         if (!info.at_line_begin) {
1998                 if (!skip_mode) {
1999                         warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #endif");
2000                 }
2001                 eat_pp_directive();
2002         }
2003
2004         pp_conditional_t *conditional = conditional_stack;
2005         if (conditional == NULL) {
2006                 errorf(&pp_token.base.source_position, "#endif without prior #if");
2007                 return;
2008         }
2009
2010         if (!conditional->skip) {
2011                 skip_mode = false;
2012         }
2013         pop_conditional();
2014 }
2015
2016 typedef enum stdc_pragma_kind_t {
2017         STDC_UNKNOWN,
2018         STDC_FP_CONTRACT,
2019         STDC_FENV_ACCESS,
2020         STDC_CX_LIMITED_RANGE
2021 } stdc_pragma_kind_t;
2022
2023 typedef enum stdc_pragma_value_kind_t {
2024         STDC_VALUE_UNKNOWN,
2025         STDC_VALUE_ON,
2026         STDC_VALUE_OFF,
2027         STDC_VALUE_DEFAULT
2028 } stdc_pragma_value_kind_t;
2029
2030 static void parse_pragma_directive(void)
2031 {
2032         eat_pp(TP_pragma);
2033         if (skip_mode) {
2034                 eat_pp_directive();
2035                 return;
2036         }
2037
2038         if (pp_token.kind != T_IDENTIFIER) {
2039                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2040                          "expected identifier after #pragma");
2041                 eat_pp_directive();
2042                 return;
2043         }
2044
2045         stdc_pragma_kind_t kind = STDC_UNKNOWN;
2046         if (pp_token.base.symbol->pp_ID == TP_STDC && c_mode & _C99) {
2047                 /* a STDC pragma */
2048                 next_input_token();
2049
2050                 switch (pp_token.base.symbol->pp_ID) {
2051                 case TP_FP_CONTRACT:      kind = STDC_FP_CONTRACT;      break;
2052                 case TP_FENV_ACCESS:      kind = STDC_FENV_ACCESS;      break;
2053                 case TP_CX_LIMITED_RANGE: kind = STDC_CX_LIMITED_RANGE; break;
2054                 default:                  break;
2055                 }
2056                 if (kind != STDC_UNKNOWN) {
2057                         next_input_token();
2058                         stdc_pragma_value_kind_t value;
2059                         switch (pp_token.base.symbol->pp_ID) {
2060                         case TP_ON:      value = STDC_VALUE_ON;      break;
2061                         case TP_OFF:     value = STDC_VALUE_OFF;     break;
2062                         case TP_DEFAULT: value = STDC_VALUE_DEFAULT; break;
2063                         default:         value = STDC_VALUE_UNKNOWN; break;
2064                         }
2065                         if (value == STDC_VALUE_UNKNOWN) {
2066                                 kind = STDC_UNKNOWN;
2067                                 errorf(&pp_token.base.source_position, "bad STDC pragma argument");
2068                         }
2069                 }
2070         }
2071         eat_pp_directive();
2072         if (kind == STDC_UNKNOWN) {
2073                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2074                          "encountered unknown #pragma");
2075         }
2076 }
2077
2078 static void parse_line_directive(void)
2079 {
2080         if (pp_token.kind != T_NUMBER) {
2081                 if (!skip_mode)
2082                         parse_error("expected integer");
2083         } else {
2084                 char      *end;
2085                 long const line = strtol(pp_token.literal.string.begin, &end, 0);
2086                 if (*end == '\0') {
2087                         /* use offset -1 as this is about the next line */
2088                         input.position.lineno = line - 1;
2089                         /* force output of line */
2090                         input.output_line = input.position.lineno - 20;
2091                 } else {
2092                         if (!skip_mode) {
2093                                 errorf(&input.position, "'%S' is not a valid line number",
2094                                            &pp_token.literal.string);
2095                         }
2096                 }
2097                 next_input_token();
2098                 if (info.at_line_begin)
2099                         return;
2100         }
2101         if (pp_token.kind == T_STRING_LITERAL
2102             && pp_token.literal.string.encoding == STRING_ENCODING_CHAR) {
2103                 input.position.input_name       = pp_token.literal.string.begin;
2104                 input.position.is_system_header = false;
2105                 next_input_token();
2106
2107                 /* attempt to parse numeric flags as outputted by gcc preprocessor */
2108                 while (!info.at_line_begin && pp_token.kind == T_NUMBER) {
2109                         /* flags:
2110                          * 1 - indicates start of a new file
2111                          * 2 - indicates return from a file
2112                          * 3 - indicates system header
2113                          * 4 - indicates implicit extern "C" in C++ mode
2114                          *
2115                          * currently we're only interested in "3"
2116                          */
2117                         if (streq(pp_token.literal.string.begin, "3")) {
2118                                 input.position.is_system_header = true;
2119                         }
2120                         next_input_token();
2121                 }
2122         }
2123
2124         eat_pp_directive();
2125 }
2126
2127 static void parse_error_directive(void)
2128 {
2129         if (skip_mode) {
2130                 eat_pp_directive();
2131                 return;
2132         }
2133
2134         bool const old_resolve_escape_sequences = resolve_escape_sequences;
2135         resolve_escape_sequences = false;
2136
2137         source_position_t const pos = pp_token.base.source_position;
2138         do {
2139                 if (info.had_whitespace && obstack_object_size(&pp_obstack) != 0)
2140                         obstack_1grow(&pp_obstack, ' ');
2141
2142                 switch (pp_token.kind) {
2143                 case T_NUMBER: {
2144                         string_t const *const str = &pp_token.literal.string;
2145                         obstack_grow(&pp_obstack, str->begin, str->size);
2146                         break;
2147                 }
2148
2149                 {
2150                         char delim;
2151                 case T_STRING_LITERAL:     delim =  '"'; goto string;
2152                 case T_CHARACTER_CONSTANT: delim = '\''; goto string;
2153 string:;
2154                         string_t const *const str = &pp_token.literal.string;
2155                         char     const *const enc = get_string_encoding_prefix(str->encoding);
2156                         obstack_printf(&pp_obstack, "%s%c%s%c", enc, delim, str->begin, delim);
2157                         break;
2158                 }
2159
2160                 default: {
2161                         char const *const str = pp_token.base.symbol->string;
2162                         obstack_grow(&pp_obstack, str, strlen(str));
2163                         break;
2164                 }
2165                 }
2166
2167                 next_input_token();
2168         } while (!info.at_line_begin);
2169
2170         resolve_escape_sequences = old_resolve_escape_sequences;
2171
2172         obstack_1grow(&pp_obstack, '\0');
2173         char *const str = obstack_finish(&pp_obstack);
2174         errorf(&pos, "#%s", str);
2175         obstack_free(&pp_obstack, str);
2176 }
2177
2178 static void parse_preprocessing_directive(void)
2179 {
2180         eat_token('#');
2181
2182         if (info.at_line_begin) {
2183                 /* empty directive */
2184                 return;
2185         }
2186
2187         if (pp_token.base.symbol) {
2188                 switch (pp_token.base.symbol->pp_ID) {
2189                 case TP_define:  parse_define_directive();            break;
2190                 case TP_else:    parse_else_directive();              break;
2191                 case TP_endif:   parse_endif_directive();             break;
2192                 case TP_error:   parse_error_directive();             break;
2193                 case TP_ifdef:   parse_ifdef_ifndef_directive(true);  break;
2194                 case TP_ifndef:  parse_ifdef_ifndef_directive(false); break;
2195                 case TP_include: parse_include_directive();           break;
2196                 case TP_line:    next_input_token(); goto line_directive;
2197                 case TP_pragma:  parse_pragma_directive();            break;
2198                 case TP_undef:   parse_undef_directive();             break;
2199                 default:         goto skip;
2200                 }
2201         } else if (pp_token.kind == T_NUMBER) {
2202 line_directive:
2203                 parse_line_directive();
2204         } else {
2205 skip:
2206                 if (!skip_mode) {
2207                         errorf(&pp_token.base.source_position, "invalid preprocessing directive #%K", &pp_token);
2208                 }
2209                 eat_pp_directive();
2210         }
2211
2212         assert(info.at_line_begin);
2213 }
2214
2215 static void finish_current_argument(void)
2216 {
2217         if (current_argument == NULL)
2218                 return;
2219         size_t size = obstack_object_size(&pp_obstack);
2220         current_argument->list_len   = size/sizeof(current_argument->token_list[0]);
2221         current_argument->token_list = obstack_finish(&pp_obstack);
2222 }
2223
2224 void next_preprocessing_token(void)
2225 {
2226 restart:
2227         if (!expand_next()) {
2228                 do {
2229                         next_input_token();
2230                         while (pp_token.kind == '#' && info.at_line_begin) {
2231                                 parse_preprocessing_directive();
2232                         }
2233                 } while (skip_mode && pp_token.kind != T_EOF);
2234         }
2235
2236         const token_kind_t kind = pp_token.kind;
2237         if (current_call == NULL || argument_expanding != NULL) {
2238                 symbol_t *const symbol = pp_token.base.symbol;
2239                 if (symbol) {
2240                         if (kind == T_MACRO_PARAMETER) {
2241                                 assert(current_expansion != NULL);
2242                                 start_expanding(pp_token.macro_parameter.def);
2243                                 goto restart;
2244                         }
2245
2246                         pp_definition_t *const pp_definition = symbol->pp_definition;
2247                         if (pp_definition != NULL && !pp_definition->is_expanding) {
2248                                 if (pp_definition->has_parameters) {
2249
2250                                         /* check if next token is a '(' */
2251                                         whitespace_info_t old_info   = info;
2252                                         token_kind_t      next_token = peek_expansion();
2253                                         if (next_token == T_EOF) {
2254                                                 info.at_line_begin  = false;
2255                                                 info.had_whitespace = false;
2256                                                 skip_whitespace();
2257                                                 if (input.c == '(') {
2258                                                         next_token = '(';
2259                                                 }
2260                                         }
2261
2262                                         if (next_token == '(') {
2263                                                 if (current_expansion == NULL)
2264                                                         expansion_pos = pp_token.base.source_position;
2265                                                 next_preprocessing_token();
2266                                                 assert(pp_token.kind == '(');
2267
2268                                                 pp_definition->parent_expansion = current_expansion;
2269                                                 current_call              = pp_definition;
2270                                                 current_call->expand_pos  = 0;
2271                                                 current_call->expand_info = old_info;
2272                                                 if (current_call->n_parameters > 0) {
2273                                                         current_argument = &current_call->parameters[0];
2274                                                         assert(argument_brace_count == 0);
2275                                                 }
2276                                                 goto restart;
2277                                         } else {
2278                                                 /* skip_whitespaces() skipped newlines and whitespace,
2279                                                  * remember results for next token */
2280                                                 next_info = info;
2281                                                 info      = old_info;
2282                                                 return;
2283                                         }
2284                                 } else {
2285                                         if (current_expansion == NULL)
2286                                                 expansion_pos = pp_token.base.source_position;
2287                                         start_expanding(pp_definition);
2288                                         goto restart;
2289                                 }
2290                         }
2291                 }
2292         }
2293
2294         if (current_call != NULL) {
2295                 /* current_call != NULL */
2296                 if (kind == '(') {
2297                         ++argument_brace_count;
2298                 } else if (kind == ')') {
2299                         if (argument_brace_count > 0) {
2300                                 --argument_brace_count;
2301                         } else {
2302                                 finish_current_argument();
2303                                 assert(kind == ')');
2304                                 start_expanding(current_call);
2305                                 info = current_call->expand_info;
2306                                 current_call     = NULL;
2307                                 current_argument = NULL;
2308                                 goto restart;
2309                         }
2310                 } else if (kind == ',' && argument_brace_count == 0) {
2311                         finish_current_argument();
2312                         current_call->expand_pos++;
2313                         if (current_call->expand_pos >= current_call->n_parameters) {
2314                                 errorf(&pp_token.base.source_position,
2315                                            "too many arguments passed for macro '%Y'",
2316                                            current_call->symbol);
2317                                 current_argument = NULL;
2318                         } else {
2319                                 current_argument
2320                                         = &current_call->parameters[current_call->expand_pos];
2321                         }
2322                         goto restart;
2323                 } else if (kind == T_MACRO_PARAMETER) {
2324                         /* parameters have to be fully expanded before being used as
2325                          * parameters for another macro-call */
2326                         assert(current_expansion != NULL);
2327                         pp_definition_t *argument = pp_token.macro_parameter.def;
2328                         argument_expanding = argument;
2329                         start_expanding(argument);
2330                         goto restart;
2331                 } else if (kind == T_EOF) {
2332                         errorf(&expansion_pos,
2333                                "reached end of file while parsing arguments for '%Y'",
2334                                current_call->symbol);
2335                         return;
2336                 }
2337                 if (current_argument != NULL) {
2338                         saved_token_t saved;
2339                         saved.token = pp_token;
2340                         saved.had_whitespace = info.had_whitespace;
2341                         obstack_grow(&pp_obstack, &saved, sizeof(saved));
2342                 }
2343                 goto restart;
2344         }
2345 }
2346
2347
2348 static void prepend_include_path(const char *path)
2349 {
2350         searchpath_entry_t *entry = OALLOCZ(&config_obstack, searchpath_entry_t);
2351         entry->path = path;
2352         entry->next = searchpath;
2353         searchpath  = entry;
2354 }
2355
2356 static void setup_include_path(void)
2357 {
2358         /* built-in paths */
2359         prepend_include_path("/usr/include");
2360
2361         /* parse environment variable */
2362         const char *cpath = getenv("CPATH");
2363         if (cpath != NULL && *cpath != '\0') {
2364                 const char *begin = cpath;
2365                 const char *c;
2366                 do {
2367                         c = begin;
2368                         while (*c != '\0' && *c != ':')
2369                                 ++c;
2370
2371                         size_t len = c-begin;
2372                         if (len == 0) {
2373                                 /* for gcc compatibility (Matze: I would expect that
2374                                  * nothing happens for an empty entry...) */
2375                                 prepend_include_path(".");
2376                         } else {
2377                                 char *const string = obstack_copy0(&config_obstack, begin, len);
2378                                 prepend_include_path(string);
2379                         }
2380
2381                         begin = c+1;
2382                         /* skip : */
2383                         if (*begin == ':')
2384                                 ++begin;
2385                 } while(*c != '\0');
2386         }
2387 }
2388
2389 static void input_error(unsigned const delta_lines, unsigned const delta_cols, char const *const message)
2390 {
2391         source_position_t pos = pp_token.base.source_position;
2392         pos.lineno += delta_lines;
2393         pos.colno  += delta_cols;
2394         errorf(&pos, "%s", message);
2395 }
2396
2397 void init_preprocessor(void)
2398 {
2399         init_symbols();
2400
2401         obstack_init(&config_obstack);
2402         obstack_init(&pp_obstack);
2403         obstack_init(&input_obstack);
2404         strset_init(&stringset);
2405
2406         setup_include_path();
2407
2408         set_input_error_callback(input_error);
2409 }
2410
2411 void exit_preprocessor(void)
2412 {
2413         obstack_free(&input_obstack, NULL);
2414         obstack_free(&pp_obstack, NULL);
2415         obstack_free(&config_obstack, NULL);
2416
2417         strset_destroy(&stringset);
2418 }
2419
2420 int pptest_main(int argc, char **argv);
2421 int pptest_main(int argc, char **argv)
2422 {
2423         init_symbol_table();
2424         init_preprocessor();
2425         init_tokens();
2426
2427         error_on_unknown_chars   = false;
2428         resolve_escape_sequences = false;
2429
2430         /* simplistic commandline parser */
2431         const char *filename = NULL;
2432         const char *output = NULL;
2433         for (int i = 1; i < argc; ++i) {
2434                 const char *opt = argv[i];
2435                 if (streq(opt, "-I")) {
2436                         prepend_include_path(argv[++i]);
2437                         continue;
2438                 } else if (streq(opt, "-E")) {
2439                         /* ignore */
2440                 } else if (streq(opt, "-o")) {
2441                         output = argv[++i];
2442                         continue;
2443                 } else if (opt[0] == '-') {
2444                         fprintf(stderr, "Unknown option '%s'\n", opt);
2445                 } else {
2446                         if (filename != NULL)
2447                                 fprintf(stderr, "Multiple inputs not supported\n");
2448                         filename = argv[i];
2449                 }
2450         }
2451         if (filename == NULL) {
2452                 fprintf(stderr, "No input specified\n");
2453                 return 1;
2454         }
2455
2456         if (output == NULL) {
2457                 out = stdout;
2458         } else {
2459                 out = fopen(output, "w");
2460                 if (out == NULL) {
2461                         fprintf(stderr, "Couldn't open output '%s'\n", output);
2462                         return 1;
2463                 }
2464         }
2465
2466         /* just here for gcc compatibility */
2467         fprintf(out, "# 1 \"%s\"\n", filename);
2468         fprintf(out, "# 1 \"<built-in>\"\n");
2469         fprintf(out, "# 1 \"<command-line>\"\n");
2470
2471         FILE *file = fopen(filename, "r");
2472         if (file == NULL) {
2473                 fprintf(stderr, "Couldn't open input '%s'\n", filename);
2474                 return 1;
2475         }
2476         switch_pp_input(file, filename);
2477
2478         for (;;) {
2479                 next_preprocessing_token();
2480                 if (pp_token.kind == T_EOF)
2481                         break;
2482                 emit_pp_token();
2483         }
2484
2485         fputc('\n', out);
2486         check_unclosed_conditionals();
2487         fclose(close_pp_input());
2488         if (out != stdout)
2489                 fclose(out);
2490
2491         exit_tokens();
2492         exit_preprocessor();
2493         exit_symbol_table();
2494
2495         return 0;
2496 }