implement macro calling
[cparser] / preprocessor.c
1 #include <config.h>
2
3 #include <assert.h>
4 #include <errno.h>
5 #include <string.h>
6 #include <stdbool.h>
7 #include <ctype.h>
8
9 #include "token_t.h"
10 #include "symbol_t.h"
11 #include "adt/util.h"
12 #include "adt/error.h"
13 #include "adt/strutil.h"
14 #include "adt/strset.h"
15 #include "lang_features.h"
16 #include "diagnostic.h"
17 #include "string_rep.h"
18 #include "input.h"
19
20 #define MAX_PUTBACK 3
21 #define INCLUDE_LIMIT 199  /* 199 is for gcc "compatibility" */
22
23 typedef struct saved_token_t {
24         token_t token;
25         bool    had_whitespace;
26 } saved_token_t;
27
28 typedef struct whitespace_info_t {
29         /** current token had whitespace in front of it */
30         bool     had_whitespace;
31         /** current token is at the beginning of a line.
32          * => a "#" at line begin starts a preprocessing directive. */
33         bool     at_line_begin;
34         /** number of spaces before the first token in a line */
35         unsigned whitespace_at_line_begin;
36 } whitespace_info_t;
37
38 struct pp_definition_t {
39         symbol_t          *symbol;
40         source_position_t  source_position;
41         pp_definition_t   *parent_expansion;
42         size_t             expand_pos;
43         whitespace_info_t  expand_info;
44         bool               is_variadic    : 1;
45         bool               is_expanding   : 1;
46         bool               has_parameters : 1;
47         bool               is_parameter   : 1;
48         pp_definition_t   *function_definition;
49         size_t             n_parameters;
50         pp_definition_t   *parameters;
51
52         /* replacement */
53         size_t             list_len;
54         saved_token_t     *token_list;
55 };
56
57 typedef struct pp_conditional_t pp_conditional_t;
58 struct pp_conditional_t {
59         source_position_t  source_position;
60         bool               condition;
61         bool               in_else;
62         /** conditional in skip mode (then+else gets skipped) */
63         bool               skip;
64         pp_conditional_t  *parent;
65 };
66
67 typedef struct pp_input_t pp_input_t;
68 struct pp_input_t {
69         FILE              *file;
70         input_t           *input;
71         utf32              c;
72         utf32              buf[1024+MAX_PUTBACK];
73         const utf32       *bufend;
74         const utf32       *bufpos;
75         source_position_t  position;
76         pp_input_t        *parent;
77         unsigned           output_line;
78 };
79
80 typedef struct searchpath_entry_t searchpath_entry_t;
81 struct searchpath_entry_t {
82         const char         *path;
83         searchpath_entry_t *next;
84 };
85
86 static pp_input_t      input;
87
88 static pp_input_t     *input_stack;
89 static unsigned        n_inputs;
90 static struct obstack  input_obstack;
91
92 static pp_conditional_t *conditional_stack;
93
94 static token_t           pp_token;
95 static bool              resolve_escape_sequences = false;
96 static bool              error_on_unknown_chars   = true;
97 static bool              skip_mode;
98 static FILE             *out;
99 static struct obstack    pp_obstack;
100 static struct obstack    config_obstack;
101 static const char       *printed_input_name = NULL;
102 static source_position_t expansion_pos;
103 static pp_definition_t  *current_expansion  = NULL;
104 static pp_definition_t  *current_call       = NULL;
105 static pp_definition_t  *current_argument   = NULL;
106 static pp_definition_t  *argument_expanding = NULL;
107 static unsigned          argument_brace_count;
108 static strset_t          stringset;
109 static token_kind_t      last_token;
110
111 static searchpath_entry_t *searchpath;
112
113 static whitespace_info_t next_info; /* valid if had_whitespace is true */
114 static whitespace_info_t info;
115
116 static inline void next_char(void);
117 static void next_input_token(void);
118 static void print_line_directive(const source_position_t *pos, const char *add);
119
120 static symbol_t *symbol_colongreater;
121 static symbol_t *symbol_lesscolon;
122 static symbol_t *symbol_lesspercent;
123 static symbol_t *symbol_percentcolon;
124 static symbol_t *symbol_percentcolonpercentcolon;
125 static symbol_t *symbol_percentgreater;
126
127 static void init_symbols(void)
128 {
129         symbol_colongreater             = symbol_table_insert(":>");
130         symbol_lesscolon                = symbol_table_insert("<:");
131         symbol_lesspercent              = symbol_table_insert("<%");
132         symbol_percentcolon             = symbol_table_insert("%:");
133         symbol_percentcolonpercentcolon = symbol_table_insert("%:%:");
134         symbol_percentgreater           = symbol_table_insert("%>");
135 }
136
137 static void switch_input(FILE *file, const char *filename)
138 {
139         input.file                = file;
140         input.input               = input_from_stream(file, NULL);
141         input.bufend              = NULL;
142         input.bufpos              = NULL;
143         input.output_line         = 0;
144         input.position.input_name = filename;
145         input.position.lineno     = 1;
146
147         /* indicate that we're at a new input */
148         print_line_directive(&input.position, input_stack != NULL ? "1" : NULL);
149
150         /* place a virtual '\n' so we realize we're at line begin */
151         input.position.lineno = 0;
152         input.c               = '\n';
153 }
154
155 static void close_input(void)
156 {
157         input_free(input.input);
158         assert(input.file != NULL);
159
160         fclose(input.file);
161         input.input  = NULL;
162         input.file   = NULL;
163         input.bufend = NULL;
164         input.bufpos = NULL;
165         input.c      = EOF;
166 }
167
168 static void push_input(void)
169 {
170         pp_input_t *saved_input
171                 = obstack_alloc(&input_obstack, sizeof(*saved_input));
172
173         memcpy(saved_input, &input, sizeof(*saved_input));
174
175         /* adjust buffer positions */
176         if (input.bufpos != NULL)
177                 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
178         if (input.bufend != NULL)
179                 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
180
181         saved_input->parent = input_stack;
182         input_stack         = saved_input;
183         ++n_inputs;
184 }
185
186 static void pop_restore_input(void)
187 {
188         assert(n_inputs > 0);
189         assert(input_stack != NULL);
190
191         pp_input_t *saved_input = input_stack;
192
193         memcpy(&input, saved_input, sizeof(input));
194         input.parent = NULL;
195
196         /* adjust buffer positions */
197         if (saved_input->bufpos != NULL)
198                 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
199         if (saved_input->bufend != NULL)
200                 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
201
202         input_stack = saved_input->parent;
203         obstack_free(&input_obstack, saved_input);
204         --n_inputs;
205 }
206
207 /**
208  * Prints a parse error message at the current token.
209  *
210  * @param msg   the error message
211  */
212 static void parse_error(const char *msg)
213 {
214         errorf(&pp_token.base.source_position,  "%s", msg);
215 }
216
217 static inline void next_real_char(void)
218 {
219         assert(input.bufpos <= input.bufend);
220         if (input.bufpos >= input.bufend) {
221                 size_t const n = decode(input.input, input.buf + MAX_PUTBACK, lengthof(input.buf) - MAX_PUTBACK);
222                 if (n == 0) {
223                         input.c = EOF;
224                         return;
225                 }
226                 input.bufpos = input.buf + MAX_PUTBACK;
227                 input.bufend = input.bufpos + n;
228         }
229         input.c = *input.bufpos++;
230         ++input.position.colno;
231 }
232
233 /**
234  * Put a character back into the buffer.
235  *
236  * @param pc  the character to put back
237  */
238 static inline void put_back(utf32 const pc)
239 {
240         assert(input.bufpos > input.buf);
241         *(--input.bufpos - input.buf + input.buf) = (char) pc;
242         --input.position.colno;
243 }
244
245 #define NEWLINE \
246         '\r': \
247                 next_char(); \
248                 if (input.c == '\n') { \
249         case '\n': \
250                         next_char(); \
251                 } \
252                 ++input.position.lineno; \
253                 input.position.colno = 1; \
254                 goto newline; \
255                 newline // Let it look like an ordinary case label.
256
257 #define eat(c_type) (assert(input.c == c_type), next_char())
258
259 static void maybe_concat_lines(void)
260 {
261         eat('\\');
262
263         switch (input.c) {
264         case NEWLINE:
265                 info.whitespace_at_line_begin = 0;
266                 return;
267
268         default:
269                 break;
270         }
271
272         put_back(input.c);
273         input.c = '\\';
274 }
275
276 /**
277  * Set c to the next input character, ie.
278  * after expanding trigraphs.
279  */
280 static inline void next_char(void)
281 {
282         next_real_char();
283
284         /* filter trigraphs and concatenated lines */
285         if (UNLIKELY(input.c == '\\')) {
286                 maybe_concat_lines();
287                 goto end_of_next_char;
288         }
289
290         if (LIKELY(input.c != '?'))
291                 goto end_of_next_char;
292
293         next_real_char();
294         if (LIKELY(input.c != '?')) {
295                 put_back(input.c);
296                 input.c = '?';
297                 goto end_of_next_char;
298         }
299
300         next_real_char();
301         switch (input.c) {
302         case '=': input.c = '#'; break;
303         case '(': input.c = '['; break;
304         case '/': input.c = '\\'; maybe_concat_lines(); break;
305         case ')': input.c = ']'; break;
306         case '\'': input.c = '^'; break;
307         case '<': input.c = '{'; break;
308         case '!': input.c = '|'; break;
309         case '>': input.c = '}'; break;
310         case '-': input.c = '~'; break;
311         default:
312                 put_back(input.c);
313                 put_back('?');
314                 input.c = '?';
315                 break;
316         }
317
318 end_of_next_char:;
319 #ifdef DEBUG_CHARS
320         printf("nchar '%c'\n", input.c);
321 #endif
322 }
323
324
325
326 /**
327  * Returns true if the given char is a octal digit.
328  *
329  * @param char  the character to check
330  */
331 static inline bool is_octal_digit(int chr)
332 {
333         switch (chr) {
334         case '0':
335         case '1':
336         case '2':
337         case '3':
338         case '4':
339         case '5':
340         case '6':
341         case '7':
342                 return true;
343         default:
344                 return false;
345         }
346 }
347
348 /**
349  * Returns the value of a digit.
350  * The only portable way to do it ...
351  */
352 static int digit_value(int digit)
353 {
354         switch (digit) {
355         case '0': return 0;
356         case '1': return 1;
357         case '2': return 2;
358         case '3': return 3;
359         case '4': return 4;
360         case '5': return 5;
361         case '6': return 6;
362         case '7': return 7;
363         case '8': return 8;
364         case '9': return 9;
365         case 'a':
366         case 'A': return 10;
367         case 'b':
368         case 'B': return 11;
369         case 'c':
370         case 'C': return 12;
371         case 'd':
372         case 'D': return 13;
373         case 'e':
374         case 'E': return 14;
375         case 'f':
376         case 'F': return 15;
377         default:
378                 panic("wrong character given");
379         }
380 }
381
382 /**
383  * Parses an octal character sequence.
384  *
385  * @param first_digit  the already read first digit
386  */
387 static utf32 parse_octal_sequence(const utf32 first_digit)
388 {
389         assert(is_octal_digit(first_digit));
390         utf32 value = digit_value(first_digit);
391         if (!is_octal_digit(input.c)) return value;
392         value = 8 * value + digit_value(input.c);
393         next_char();
394         if (!is_octal_digit(input.c)) return value;
395         value = 8 * value + digit_value(input.c);
396         next_char();
397         return value;
398
399 }
400
401 /**
402  * Parses a hex character sequence.
403  */
404 static utf32 parse_hex_sequence(void)
405 {
406         utf32 value = 0;
407         while (isxdigit(input.c)) {
408                 value = 16 * value + digit_value(input.c);
409                 next_char();
410         }
411         return value;
412 }
413
414 /**
415  * Parse an escape sequence.
416  */
417 static utf32 parse_escape_sequence(void)
418 {
419         eat('\\');
420
421         utf32 const ec = input.c;
422         next_char();
423
424         switch (ec) {
425         case '"':  return '"';
426         case '\'': return '\'';
427         case '\\': return '\\';
428         case '?': return '\?';
429         case 'a': return '\a';
430         case 'b': return '\b';
431         case 'f': return '\f';
432         case 'n': return '\n';
433         case 'r': return '\r';
434         case 't': return '\t';
435         case 'v': return '\v';
436         case 'x':
437                 return parse_hex_sequence();
438         case '0':
439         case '1':
440         case '2':
441         case '3':
442         case '4':
443         case '5':
444         case '6':
445         case '7':
446                 return parse_octal_sequence(ec);
447         case EOF:
448                 parse_error("reached end of file while parsing escape sequence");
449                 return EOF;
450         /* \E is not documented, but handled, by GCC.  It is acceptable according
451          * to Â§6.11.4, whereas \e is not. */
452         case 'E':
453         case 'e':
454                 if (c_mode & _GNUC)
455                         return 27;   /* hopefully 27 is ALWAYS the code for ESCAPE */
456                 break;
457         case 'u':
458         case 'U':
459                 parse_error("universal character parsing not implemented yet");
460                 return EOF;
461         default:
462                 break;
463         }
464         /* Â§6.4.4.4:8 footnote 64 */
465         parse_error("unknown escape sequence");
466         return EOF;
467 }
468
469 static const char *identify_string(char *string)
470 {
471         const char *result = strset_insert(&stringset, string);
472         if (result != string) {
473                 obstack_free(&symbol_obstack, string);
474         }
475         return result;
476 }
477
478 static string_t sym_make_string(string_encoding_t const enc)
479 {
480         obstack_1grow(&symbol_obstack, '\0');
481         size_t      const len    = obstack_object_size(&symbol_obstack) - 1;
482         char       *const string = obstack_finish(&symbol_obstack);
483         char const *const result = identify_string(string);
484         return (string_t){ result, len, enc };
485 }
486
487 static void parse_string(utf32 const delimiter, token_kind_t const kind,
488                          string_encoding_t const enc,
489                          char const *const context)
490 {
491         const unsigned start_linenr = input.position.lineno;
492
493         eat(delimiter);
494
495         while (true) {
496                 switch (input.c) {
497                 case '\\': {
498                         if (resolve_escape_sequences) {
499                                 utf32 const tc = parse_escape_sequence();
500                                 if (enc == STRING_ENCODING_CHAR) {
501                                         if (tc >= 0x100) {
502                                                 warningf(WARN_OTHER, &pp_token.base.source_position, "escape sequence out of range");
503                                         }
504                                         obstack_1grow(&symbol_obstack, tc);
505                                 } else {
506                                         obstack_grow_utf8(&symbol_obstack, tc);
507                                 }
508                         } else {
509                                 obstack_1grow(&symbol_obstack, (char)input.c);
510                                 next_char();
511                                 obstack_1grow(&symbol_obstack, (char)input.c);
512                                 next_char();
513                         }
514                         break;
515                 }
516
517                 case NEWLINE:
518                         errorf(&pp_token.base.source_position, "newline while parsing %s", context);
519                         break;
520
521                 case EOF: {
522                         source_position_t source_position;
523                         source_position.input_name = pp_token.base.source_position.input_name;
524                         source_position.lineno     = start_linenr;
525                         errorf(&source_position, "EOF while parsing %s", context);
526                         goto end_of_string;
527                 }
528
529                 default:
530                         if (input.c == delimiter) {
531                                 next_char();
532                                 goto end_of_string;
533                         } else {
534                                 obstack_grow_utf8(&symbol_obstack, input.c);
535                                 next_char();
536                                 break;
537                         }
538                 }
539         }
540
541 end_of_string:
542         pp_token.kind           = kind;
543         pp_token.literal.string = sym_make_string(enc);
544 }
545
546 static void parse_string_literal(string_encoding_t const enc)
547 {
548         parse_string('"', T_STRING_LITERAL, enc, "string literal");
549 }
550
551 static void parse_character_constant(string_encoding_t const enc)
552 {
553         parse_string('\'', T_CHARACTER_CONSTANT, enc, "character constant");
554         if (pp_token.literal.string.size == 0) {
555                 parse_error("empty character constant");
556         }
557 }
558
559 #define SYMBOL_CASES_WITHOUT_E_P \
560              'a': \
561         case 'b': \
562         case 'c': \
563         case 'd': \
564         case 'f': \
565         case 'g': \
566         case 'h': \
567         case 'i': \
568         case 'j': \
569         case 'k': \
570         case 'l': \
571         case 'm': \
572         case 'n': \
573         case 'o': \
574         case 'q': \
575         case 'r': \
576         case 's': \
577         case 't': \
578         case 'u': \
579         case 'v': \
580         case 'w': \
581         case 'x': \
582         case 'y': \
583         case 'z': \
584         case 'A': \
585         case 'B': \
586         case 'C': \
587         case 'D': \
588         case 'F': \
589         case 'G': \
590         case 'H': \
591         case 'I': \
592         case 'J': \
593         case 'K': \
594         case 'L': \
595         case 'M': \
596         case 'N': \
597         case 'O': \
598         case 'Q': \
599         case 'R': \
600         case 'S': \
601         case 'T': \
602         case 'U': \
603         case 'V': \
604         case 'W': \
605         case 'X': \
606         case 'Y': \
607         case 'Z': \
608         case '_'
609
610 #define SYMBOL_CASES \
611              SYMBOL_CASES_WITHOUT_E_P: \
612         case 'e': \
613         case 'p': \
614         case 'E': \
615         case 'P'
616
617 #define DIGIT_CASES \
618              '0':  \
619         case '1':  \
620         case '2':  \
621         case '3':  \
622         case '4':  \
623         case '5':  \
624         case '6':  \
625         case '7':  \
626         case '8':  \
627         case '9'
628
629 static void start_expanding(pp_definition_t *definition)
630 {
631         definition->parent_expansion = current_expansion;
632         definition->expand_pos       = 0;
633         definition->is_expanding     = true;
634         if (definition->list_len > 0) {
635                 definition->token_list[0].had_whitespace
636                         = info.had_whitespace;
637         }
638         current_expansion = definition;
639 }
640
641 static void finished_expanding(pp_definition_t *definition)
642 {
643         assert(definition->is_expanding);
644         pp_definition_t *parent = definition->parent_expansion;
645         definition->parent_expansion = NULL;
646         definition->is_expanding     = false;
647
648         /* stop further expanding once we expanded a parameter used in a
649          * sub macro-call */
650         if (definition == argument_expanding)
651                 argument_expanding = NULL;
652
653         assert(current_expansion == definition);
654         current_expansion = parent;
655 }
656
657 static inline void set_punctuator(token_kind_t const kind)
658 {
659         pp_token.kind        = kind;
660         pp_token.base.symbol = token_symbols[kind];
661 }
662
663 static inline void set_digraph(token_kind_t const kind, symbol_t *const symbol)
664 {
665         pp_token.kind        = kind;
666         pp_token.base.symbol = symbol;
667 }
668
669 /**
670  * returns next final token from a preprocessor macro expansion
671  */
672 static bool expand_next(void)
673 {
674         if (current_expansion == NULL)
675                 return false;
676
677 restart:;
678         size_t pos = current_expansion->expand_pos;
679         if (pos >= current_expansion->list_len) {
680                 finished_expanding(current_expansion);
681                 /* it was the outermost expansion, parse pptoken normally */
682                 if (current_expansion == NULL) {
683                         return false;
684                 }
685                 goto restart;
686         }
687         const saved_token_t *saved = &current_expansion->token_list[pos++];
688         pp_token = saved->token;
689
690         if (current_expansion->expand_pos > 0)
691                 info.had_whitespace = saved->had_whitespace;
692         pp_token.base.source_position = expansion_pos;
693         ++current_expansion->expand_pos;
694
695         return true;
696 }
697
698 /**
699  * Returns the next token kind found when continuing the current expansions
700  * without starting new sub-expansions.
701  */
702 static token_kind_t peek_expansion(void)
703 {
704         pp_definition_t *expansion = current_expansion;
705         while (expansion != NULL && expansion->expand_pos >= expansion->list_len) {
706                 expansion = expansion->parent_expansion;
707         }
708         if (expansion == NULL)
709                 return T_EOF;
710         return expansion->token_list[expansion->expand_pos].token.kind;
711 }
712
713 static void skip_line_comment(void)
714 {
715         info.had_whitespace = true;
716         while (true) {
717                 switch (input.c) {
718                 case EOF:
719                         return;
720
721                 case '\r':
722                 case '\n':
723                         return;
724
725                 default:
726                         next_char();
727                         break;
728                 }
729         }
730 }
731
732 static void skip_multiline_comment(void)
733 {
734         info.had_whitespace = true;
735
736         unsigned start_linenr = input.position.lineno;
737         while (true) {
738                 switch (input.c) {
739                 case '/':
740                         next_char();
741                         if (input.c == '*') {
742                                 /* TODO: nested comment, warn here */
743                         }
744                         break;
745                 case '*':
746                         next_char();
747                         if (input.c == '/') {
748                                 if (input.position.lineno != input.output_line)
749                                         info.whitespace_at_line_begin = input.position.colno;
750                                 next_char();
751                                 return;
752                         }
753                         break;
754
755                 case NEWLINE:
756                         break;
757
758                 case EOF: {
759                         source_position_t source_position;
760                         source_position.input_name = pp_token.base.source_position.input_name;
761                         source_position.lineno     = start_linenr;
762                         errorf(&source_position, "at end of file while looking for comment end");
763                         return;
764                 }
765
766                 default:
767                         next_char();
768                         break;
769                 }
770         }
771 }
772
773 static bool skip_till_newline(bool stop_at_non_whitespace)
774 {
775         bool res = false;
776         while (true) {
777                 switch (input.c) {
778                 case ' ':
779                 case '\t':
780                         next_char();
781                         continue;
782
783                 case '/':
784                         next_char();
785                         if (input.c == '/') {
786                                 next_char();
787                                 skip_line_comment();
788                                 continue;
789                         } else if (input.c == '*') {
790                                 next_char();
791                                 skip_multiline_comment();
792                                 continue;
793                         } else {
794                                 put_back(input.c);
795                                 input.c = '/';
796                         }
797                         return true;
798
799                 case NEWLINE:
800                         return res;
801
802                 default:
803                         if (stop_at_non_whitespace)
804                                 return false;
805                         res = true;
806                         next_char();
807                         continue;
808                 }
809         }
810 }
811
812 static void skip_whitespace(void)
813 {
814         while (true) {
815                 switch (input.c) {
816                 case ' ':
817                 case '\t':
818                         ++info.whitespace_at_line_begin;
819                         info.had_whitespace = true;
820                         next_char();
821                         continue;
822
823                 case NEWLINE:
824                         info.at_line_begin  = true;
825                         info.had_whitespace = true;
826                         info.whitespace_at_line_begin = 0;
827                         continue;
828
829                 case '/':
830                         next_char();
831                         if (input.c == '/') {
832                                 next_char();
833                                 skip_line_comment();
834                                 continue;
835                         } else if (input.c == '*') {
836                                 next_char();
837                                 skip_multiline_comment();
838                                 continue;
839                         } else {
840                                 put_back(input.c);
841                                 input.c = '/';
842                         }
843                         return;
844
845                 default:
846                         return;
847                 }
848         }
849 }
850
851 static inline void eat_pp(pp_token_kind_t const kind)
852 {
853         assert(pp_token.base.symbol->pp_ID == kind);
854         (void) kind;
855         next_input_token();
856 }
857
858 static inline void eat_token(token_kind_t const kind)
859 {
860         assert(pp_token.kind == kind);
861         (void)kind;
862         next_input_token();
863 }
864
865 static void parse_symbol(void)
866 {
867         obstack_1grow(&symbol_obstack, (char) input.c);
868         next_char();
869
870         while (true) {
871                 switch (input.c) {
872                 case DIGIT_CASES:
873                 case SYMBOL_CASES:
874                         obstack_1grow(&symbol_obstack, (char) input.c);
875                         next_char();
876                         break;
877
878                 default:
879                         goto end_symbol;
880                 }
881         }
882
883 end_symbol:
884         obstack_1grow(&symbol_obstack, '\0');
885         char *string = obstack_finish(&symbol_obstack);
886
887         /* might be a wide string or character constant ( L"string"/L'c' ) */
888         if (input.c == '"' && string[0] == 'L' && string[1] == '\0') {
889                 obstack_free(&symbol_obstack, string);
890                 parse_string_literal(STRING_ENCODING_WIDE);
891                 return;
892         } else if (input.c == '\'' && string[0] == 'L' && string[1] == '\0') {
893                 obstack_free(&symbol_obstack, string);
894                 parse_character_constant(STRING_ENCODING_WIDE);
895                 return;
896         }
897
898         symbol_t *symbol = symbol_table_insert(string);
899
900         pp_token.kind        = symbol->ID;
901         pp_token.base.symbol = symbol;
902
903         /* we can free the memory from symbol obstack if we already had an entry in
904          * the symbol table */
905         if (symbol->string != string) {
906                 obstack_free(&symbol_obstack, string);
907         }
908 }
909
910 static void parse_number(void)
911 {
912         obstack_1grow(&symbol_obstack, (char) input.c);
913         next_char();
914
915         while (true) {
916                 switch (input.c) {
917                 case '.':
918                 case DIGIT_CASES:
919                 case SYMBOL_CASES_WITHOUT_E_P:
920                         obstack_1grow(&symbol_obstack, (char) input.c);
921                         next_char();
922                         break;
923
924                 case 'e':
925                 case 'p':
926                 case 'E':
927                 case 'P':
928                         obstack_1grow(&symbol_obstack, (char) input.c);
929                         next_char();
930                         if (input.c == '+' || input.c == '-') {
931                                 obstack_1grow(&symbol_obstack, (char) input.c);
932                                 next_char();
933                         }
934                         break;
935
936                 default:
937                         goto end_number;
938                 }
939         }
940
941 end_number:
942         pp_token.kind           = T_NUMBER;
943         pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
944 }
945
946 #define MAYBE_PROLOG \
947         next_char(); \
948         switch (input.c) {
949
950 #define MAYBE(ch, kind) \
951         case ch: \
952                 next_char(); \
953                 set_punctuator(kind); \
954                 return;
955
956 #define MAYBE_DIGRAPH(ch, kind, symbol) \
957         case ch: \
958                 next_char(); \
959                 set_digraph(kind, symbol); \
960                 return;
961
962 #define ELSE_CODE(code) \
963         default: \
964                 code \
965                 return; \
966         }
967
968 #define ELSE(kind) ELSE_CODE(set_punctuator(kind);)
969
970 /** identifies and returns the next preprocessing token contained in the
971  * input stream. No macro expansion is performed. */
972 static void next_input_token(void)
973 {
974         if (next_info.had_whitespace) {
975                 info = next_info;
976                 next_info.had_whitespace = false;
977         } else {
978                 info.at_line_begin  = false;
979                 info.had_whitespace = false;
980         }
981 restart:
982         pp_token.base.source_position = input.position;
983         pp_token.base.symbol          = NULL;
984
985         switch (input.c) {
986         case ' ':
987         case '\t':
988                 info.whitespace_at_line_begin++;
989                 info.had_whitespace = true;
990                 next_char();
991                 goto restart;
992
993         case NEWLINE:
994                 info.at_line_begin            = true;
995                 info.had_whitespace           = true;
996                 info.whitespace_at_line_begin = 0;
997                 goto restart;
998
999         case SYMBOL_CASES:
1000                 parse_symbol();
1001                 return;
1002
1003         case DIGIT_CASES:
1004                 parse_number();
1005                 return;
1006
1007         case '"':
1008                 parse_string_literal(STRING_ENCODING_CHAR);
1009                 return;
1010
1011         case '\'':
1012                 parse_character_constant(STRING_ENCODING_CHAR);
1013                 return;
1014
1015         case '.':
1016                 MAYBE_PROLOG
1017                         case '0':
1018                         case '1':
1019                         case '2':
1020                         case '3':
1021                         case '4':
1022                         case '5':
1023                         case '6':
1024                         case '7':
1025                         case '8':
1026                         case '9':
1027                                 put_back(input.c);
1028                                 input.c = '.';
1029                                 parse_number();
1030                                 return;
1031
1032                         case '.':
1033                                 MAYBE_PROLOG
1034                                 MAYBE('.', T_DOTDOTDOT)
1035                                 ELSE_CODE(
1036                                         put_back(input.c);
1037                                         input.c = '.';
1038                                         set_punctuator('.');
1039                                 )
1040                 ELSE('.')
1041         case '&':
1042                 MAYBE_PROLOG
1043                 MAYBE('&', T_ANDAND)
1044                 MAYBE('=', T_ANDEQUAL)
1045                 ELSE('&')
1046         case '*':
1047                 MAYBE_PROLOG
1048                 MAYBE('=', T_ASTERISKEQUAL)
1049                 ELSE('*')
1050         case '+':
1051                 MAYBE_PROLOG
1052                 MAYBE('+', T_PLUSPLUS)
1053                 MAYBE('=', T_PLUSEQUAL)
1054                 ELSE('+')
1055         case '-':
1056                 MAYBE_PROLOG
1057                 MAYBE('>', T_MINUSGREATER)
1058                 MAYBE('-', T_MINUSMINUS)
1059                 MAYBE('=', T_MINUSEQUAL)
1060                 ELSE('-')
1061         case '!':
1062                 MAYBE_PROLOG
1063                 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
1064                 ELSE('!')
1065         case '/':
1066                 MAYBE_PROLOG
1067                 MAYBE('=', T_SLASHEQUAL)
1068                 case '*':
1069                         next_char();
1070                         skip_multiline_comment();
1071                         goto restart;
1072                 case '/':
1073                         next_char();
1074                         skip_line_comment();
1075                         goto restart;
1076                 ELSE('/')
1077         case '%':
1078                 MAYBE_PROLOG
1079                 MAYBE_DIGRAPH('>', '}', symbol_percentgreater)
1080                 MAYBE('=', T_PERCENTEQUAL)
1081                 case ':':
1082                         MAYBE_PROLOG
1083                         case '%':
1084                                 MAYBE_PROLOG
1085                                 MAYBE_DIGRAPH(':', T_HASHHASH, symbol_percentcolonpercentcolon)
1086                                 ELSE_CODE(
1087                                         put_back(input.c);
1088                                         input.c = '%';
1089                                         goto digraph_percentcolon;
1090                                 )
1091                         ELSE_CODE(
1092 digraph_percentcolon:
1093                                 set_digraph('#', symbol_percentcolon);
1094                         )
1095                 ELSE('%')
1096         case '<':
1097                 MAYBE_PROLOG
1098                 MAYBE_DIGRAPH(':', '[', symbol_lesscolon)
1099                 MAYBE_DIGRAPH('%', '{', symbol_lesspercent)
1100                 MAYBE('=', T_LESSEQUAL)
1101                 case '<':
1102                         MAYBE_PROLOG
1103                         MAYBE('=', T_LESSLESSEQUAL)
1104                         ELSE(T_LESSLESS)
1105                 ELSE('<')
1106         case '>':
1107                 MAYBE_PROLOG
1108                 MAYBE('=', T_GREATEREQUAL)
1109                 case '>':
1110                         MAYBE_PROLOG
1111                         MAYBE('=', T_GREATERGREATEREQUAL)
1112                         ELSE(T_GREATERGREATER)
1113                 ELSE('>')
1114         case '^':
1115                 MAYBE_PROLOG
1116                 MAYBE('=', T_CARETEQUAL)
1117                 ELSE('^')
1118         case '|':
1119                 MAYBE_PROLOG
1120                 MAYBE('=', T_PIPEEQUAL)
1121                 MAYBE('|', T_PIPEPIPE)
1122                 ELSE('|')
1123         case ':':
1124                 MAYBE_PROLOG
1125                 MAYBE_DIGRAPH('>', ']', symbol_colongreater)
1126                 case ':':
1127                         if (c_mode & _CXX) {
1128                                 next_char();
1129                                 set_punctuator(T_COLONCOLON);
1130                                 return;
1131                         }
1132                         /* FALLTHROUGH */
1133                 ELSE(':')
1134         case '=':
1135                 MAYBE_PROLOG
1136                 MAYBE('=', T_EQUALEQUAL)
1137                 ELSE('=')
1138         case '#':
1139                 MAYBE_PROLOG
1140                 MAYBE('#', T_HASHHASH)
1141                 ELSE('#')
1142
1143         case '?':
1144         case '[':
1145         case ']':
1146         case '(':
1147         case ')':
1148         case '{':
1149         case '}':
1150         case '~':
1151         case ';':
1152         case ',':
1153                 set_punctuator(input.c);
1154                 next_char();
1155                 return;
1156
1157         case EOF:
1158                 if (input_stack != NULL) {
1159                         close_input();
1160                         pop_restore_input();
1161                         fputc('\n', out);
1162                         if (input.c == (utf32)EOF)
1163                                 --input.position.lineno;
1164                         print_line_directive(&input.position, "2");
1165                         goto restart;
1166                 } else {
1167                         info.at_line_begin = true;
1168                         set_punctuator(T_EOF);
1169                 }
1170                 return;
1171
1172         default:
1173                 if (error_on_unknown_chars) {
1174                         errorf(&pp_token.base.source_position,
1175                                "unknown character '%lc' found\n", input.c);
1176                         next_char();
1177                         goto restart;
1178                 } else {
1179                         assert(obstack_object_size(&symbol_obstack) == 0);
1180                         obstack_grow_utf8(&symbol_obstack, input.c);
1181                         obstack_1grow(&symbol_obstack, '\0');
1182                         char     *const string = obstack_finish(&symbol_obstack);
1183                         symbol_t *const symbol = symbol_table_insert(string);
1184                         if (symbol->string != string)
1185                                 obstack_free(&symbol_obstack, string);
1186
1187                         pp_token.kind        = T_UNKNOWN_CHAR;
1188                         pp_token.base.symbol = symbol;
1189                         next_char();
1190                         return;
1191                 }
1192         }
1193 }
1194
1195 static void print_quoted_string(const char *const string)
1196 {
1197         fputc('"', out);
1198         for (const char *c = string; *c != 0; ++c) {
1199                 switch (*c) {
1200                 case '"': fputs("\\\"", out); break;
1201                 case '\\':  fputs("\\\\", out); break;
1202                 case '\a':  fputs("\\a", out); break;
1203                 case '\b':  fputs("\\b", out); break;
1204                 case '\f':  fputs("\\f", out); break;
1205                 case '\n':  fputs("\\n", out); break;
1206                 case '\r':  fputs("\\r", out); break;
1207                 case '\t':  fputs("\\t", out); break;
1208                 case '\v':  fputs("\\v", out); break;
1209                 case '\?':  fputs("\\?", out); break;
1210                 default:
1211                         if (!isprint(*c)) {
1212                                 fprintf(out, "\\%03o", (unsigned)*c);
1213                                 break;
1214                         }
1215                         fputc(*c, out);
1216                         break;
1217                 }
1218         }
1219         fputc('"', out);
1220 }
1221
1222 static void print_line_directive(const source_position_t *pos, const char *add)
1223 {
1224         fprintf(out, "# %u ", pos->lineno);
1225         print_quoted_string(pos->input_name);
1226         if (add != NULL) {
1227                 fputc(' ', out);
1228                 fputs(add, out);
1229         }
1230
1231         printed_input_name = pos->input_name;
1232         input.output_line  = pos->lineno-1;
1233 }
1234
1235 static bool emit_newlines(void)
1236 {
1237         unsigned delta = pp_token.base.source_position.lineno - input.output_line;
1238         if (delta == 0)
1239                 return false;
1240
1241         if (delta >= 9) {
1242                 fputc('\n', out);
1243                 print_line_directive(&pp_token.base.source_position, NULL);
1244                 fputc('\n', out);
1245         } else {
1246                 for (unsigned i = 0; i < delta; ++i) {
1247                         fputc('\n', out);
1248                 }
1249         }
1250         input.output_line = pp_token.base.source_position.lineno;
1251
1252         for (unsigned i = 0; i < info.whitespace_at_line_begin; ++i)
1253                 fputc(' ', out);
1254
1255         return true;
1256 }
1257
1258 static void emit_pp_token(void)
1259 {
1260         if (!emit_newlines() &&
1261             (info.had_whitespace || tokens_would_paste(last_token, pp_token.kind)))
1262                 fputc(' ', out);
1263
1264         switch (pp_token.kind) {
1265         case T_NUMBER:
1266                 fputs(pp_token.literal.string.begin, out);
1267                 break;
1268
1269         case T_STRING_LITERAL:
1270                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1271                 fputc('"', out);
1272                 fputs(pp_token.literal.string.begin, out);
1273                 fputc('"', out);
1274                 break;
1275
1276         case T_CHARACTER_CONSTANT:
1277                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1278                 fputc('\'', out);
1279                 fputs(pp_token.literal.string.begin, out);
1280                 fputc('\'', out);
1281                 break;
1282
1283         case T_MACRO_PARAMETER:
1284                 panic("macro parameter not expanded");
1285
1286         default:
1287                 fputs(pp_token.base.symbol->string, out);
1288                 break;
1289         }
1290         last_token = pp_token.kind;
1291 }
1292
1293 static void eat_pp_directive(void)
1294 {
1295         while (!info.at_line_begin) {
1296                 next_input_token();
1297         }
1298 }
1299
1300 static bool strings_equal(const string_t *string1, const string_t *string2)
1301 {
1302         size_t size = string1->size;
1303         if (size != string2->size)
1304                 return false;
1305
1306         const char *c1 = string1->begin;
1307         const char *c2 = string2->begin;
1308         for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1309                 if (*c1 != *c2)
1310                         return false;
1311         }
1312         return true;
1313 }
1314
1315 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1316 {
1317         if (token1->kind != token2->kind)
1318                 return false;
1319
1320         switch (token1->kind) {
1321         case T_NUMBER:
1322         case T_CHARACTER_CONSTANT:
1323         case T_STRING_LITERAL:
1324                 return strings_equal(&token1->literal.string, &token2->literal.string);
1325
1326         case T_MACRO_PARAMETER:
1327                 return token1->macro_parameter.def->symbol
1328                     == token2->macro_parameter.def->symbol;
1329
1330         default:
1331                 return token1->base.symbol == token2->base.symbol;
1332         }
1333 }
1334
1335 static bool pp_definitions_equal(const pp_definition_t *definition1,
1336                                  const pp_definition_t *definition2)
1337 {
1338         if (definition1->list_len != definition2->list_len)
1339                 return false;
1340
1341         size_t               len = definition1->list_len;
1342         const saved_token_t *t1  = definition1->token_list;
1343         const saved_token_t *t2  = definition2->token_list;
1344         for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1345                 if (!pp_tokens_equal(&t1->token, &t2->token))
1346                         return false;
1347         }
1348         return true;
1349 }
1350
1351 static void parse_define_directive(void)
1352 {
1353         eat_pp(TP_define);
1354         if (skip_mode) {
1355                 eat_pp_directive();
1356                 return;
1357         }
1358
1359         assert(obstack_object_size(&pp_obstack) == 0);
1360
1361         if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
1362                 errorf(&pp_token.base.source_position,
1363                        "expected identifier after #define, got %K", &pp_token);
1364                 goto error_out;
1365         }
1366         symbol_t *const symbol = pp_token.base.symbol;
1367
1368         pp_definition_t *new_definition
1369                 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1370         memset(new_definition, 0, sizeof(new_definition[0]));
1371         new_definition->symbol          = symbol;
1372         new_definition->source_position = input.position;
1373
1374         /* this is probably the only place where spaces are significant in the
1375          * lexer (except for the fact that they separate tokens). #define b(x)
1376          * is something else than #define b (x) */
1377         if (input.c == '(') {
1378                 eat_token(T_IDENTIFIER);
1379                 eat_token('(');
1380
1381                 while (true) {
1382                         switch (pp_token.kind) {
1383                         case T_DOTDOTDOT:
1384                                 new_definition->is_variadic = true;
1385                                 eat_token(T_DOTDOTDOT);
1386                                 if (pp_token.kind != ')') {
1387                                         errorf(&input.position,
1388                                                         "'...' not at end of macro argument list");
1389                                         goto error_out;
1390                                 }
1391                                 break;
1392
1393                         case T_IDENTIFIER: {
1394                                 pp_definition_t parameter;
1395                                 memset(&parameter, 0, sizeof(parameter));
1396                                 parameter.source_position = pp_token.base.source_position;
1397                                 parameter.symbol          = pp_token.base.symbol;
1398                                 parameter.is_parameter    = true;
1399                                 obstack_grow(&pp_obstack, &parameter, sizeof(parameter));
1400                                 eat_token(T_IDENTIFIER);
1401
1402                                 if (pp_token.kind == ',') {
1403                                         eat_token(',');
1404                                         break;
1405                                 }
1406
1407                                 if (pp_token.kind != ')') {
1408                                         errorf(&pp_token.base.source_position,
1409                                                "expected ',' or ')' after identifier, got %K",
1410                                                &pp_token);
1411                                         goto error_out;
1412                                 }
1413                                 break;
1414                         }
1415
1416                         case ')':
1417                                 eat_token(')');
1418                                 goto finish_argument_list;
1419
1420                         default:
1421                                 errorf(&pp_token.base.source_position,
1422                                        "expected identifier, '...' or ')' in #define argument list, got %K",
1423                                        &pp_token);
1424                                 goto error_out;
1425                         }
1426                 }
1427
1428         finish_argument_list:
1429                 new_definition->has_parameters = true;
1430                 size_t size = obstack_object_size(&pp_obstack);
1431                 new_definition->n_parameters
1432                         = size / sizeof(new_definition->parameters[0]);
1433                 new_definition->parameters = obstack_finish(&pp_obstack);
1434                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1435                         pp_definition_t *param    = &new_definition->parameters[i];
1436                         symbol_t        *symbol   = param->symbol;
1437                         pp_definition_t *previous = symbol->pp_definition;
1438                         if (previous != NULL
1439                             && previous->function_definition == new_definition) {
1440                                 errorf(&param->source_position,
1441                                        "duplicate macro parameter '%Y'", symbol);
1442                                 param->symbol = sym_anonymous;
1443                                 continue;
1444                         }
1445                         param->parent_expansion    = previous;
1446                         param->function_definition = new_definition;
1447                         symbol->pp_definition      = param;
1448                 }
1449         } else {
1450                 eat_token(T_IDENTIFIER);
1451         }
1452
1453         /* construct token list */
1454         assert(obstack_object_size(&pp_obstack) == 0);
1455         while (!info.at_line_begin) {
1456                 if (pp_token.kind == T_IDENTIFIER) {
1457                         const symbol_t  *symbol     = pp_token.base.symbol;
1458                         pp_definition_t *definition = symbol->pp_definition;
1459                         if (definition != NULL
1460                             && definition->function_definition == new_definition) {
1461                             pp_token.kind                = T_MACRO_PARAMETER;
1462                             pp_token.macro_parameter.def = definition;
1463                         }
1464                 }
1465                 saved_token_t saved_token;
1466                 saved_token.token = pp_token;
1467                 saved_token.had_whitespace = info.had_whitespace;
1468                 obstack_grow(&pp_obstack, &saved_token, sizeof(saved_token));
1469                 next_input_token();
1470         }
1471
1472         new_definition->list_len   = obstack_object_size(&pp_obstack)
1473                 / sizeof(new_definition->token_list[0]);
1474         new_definition->token_list = obstack_finish(&pp_obstack);
1475
1476         if (new_definition->has_parameters) {
1477                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1478                         pp_definition_t *param      = &new_definition->parameters[i];
1479                         symbol_t        *symbol     = param->symbol;
1480                         if (symbol == sym_anonymous)
1481                                 continue;
1482                         assert(symbol->pp_definition == param);
1483                         assert(param->function_definition == new_definition);
1484                         symbol->pp_definition   = param->parent_expansion;
1485                         param->parent_expansion = NULL;
1486                 }
1487         }
1488
1489         pp_definition_t *old_definition = symbol->pp_definition;
1490         if (old_definition != NULL) {
1491                 if (!pp_definitions_equal(old_definition, new_definition)) {
1492                         warningf(WARN_OTHER, &input.position, "multiple definition of macro '%Y' (first defined %P)", symbol, &old_definition->source_position);
1493                 } else {
1494                         /* reuse the old definition */
1495                         obstack_free(&pp_obstack, new_definition);
1496                         new_definition = old_definition;
1497                 }
1498         }
1499
1500         symbol->pp_definition = new_definition;
1501         return;
1502
1503 error_out:
1504         if (obstack_object_size(&pp_obstack) > 0) {
1505                 char *ptr = obstack_finish(&pp_obstack);
1506                 obstack_free(&pp_obstack, ptr);
1507         }
1508         eat_pp_directive();
1509 }
1510
1511 static void parse_undef_directive(void)
1512 {
1513         eat_pp(TP_undef);
1514         if (skip_mode) {
1515                 eat_pp_directive();
1516                 return;
1517         }
1518
1519         if (pp_token.kind != T_IDENTIFIER) {
1520                 errorf(&input.position,
1521                        "expected identifier after #undef, got %K", &pp_token);
1522                 eat_pp_directive();
1523                 return;
1524         }
1525
1526         pp_token.base.symbol->pp_definition = NULL;
1527         eat_token(T_IDENTIFIER);
1528
1529         if (!info.at_line_begin) {
1530                 warningf(WARN_OTHER, &input.position, "extra tokens at end of #undef directive");
1531         }
1532         eat_pp_directive();
1533 }
1534
1535 /** behind an #include we can have the special headername lexems.
1536  * They're only allowed behind an #include so they're not recognized
1537  * by the normal next_preprocessing_token. We handle them as a special
1538  * exception here */
1539 static void parse_headername(void)
1540 {
1541         const source_position_t start_position = input.position;
1542         string_t                string         = { NULL, 0, STRING_ENCODING_CHAR };
1543         assert(obstack_object_size(&symbol_obstack) == 0);
1544
1545         if (info.at_line_begin) {
1546                 parse_error("expected headername after #include");
1547                 goto finish_error;
1548         }
1549
1550         /* check wether we have a "... or <... headername */
1551         switch (input.c) {
1552         {
1553                 utf32 delimiter;
1554         case '<': delimiter = '>'; goto parse_name;
1555         case '"': delimiter = '"'; goto parse_name;
1556 parse_name:
1557                 next_char();
1558                 while (true) {
1559                         switch (input.c) {
1560                         case NEWLINE:
1561                         case EOF:
1562                                 errorf(&pp_token.base.source_position, "header name without closing '%c'", (char)delimiter);
1563                                 goto finish_error;
1564
1565                         default:
1566                                 if (input.c == delimiter) {
1567                                         next_char();
1568                                         goto finished_headername;
1569                                 } else {
1570                                         obstack_1grow(&symbol_obstack, (char)input.c);
1571                                         next_char();
1572                                 }
1573                                 break;
1574                         }
1575                 }
1576                 /* we should never be here */
1577         }
1578
1579         default:
1580                 /* TODO: do normal pp_token parsing and concatenate results */
1581                 panic("pp_token concat include not implemented yet");
1582         }
1583
1584 finished_headername:
1585         string = sym_make_string(STRING_ENCODING_CHAR);
1586
1587 finish_error:
1588         pp_token.base.source_position = start_position;
1589         pp_token.kind                 = T_HEADERNAME;
1590         pp_token.literal.string       = string;
1591 }
1592
1593 static bool do_include(bool system_include, const char *headername)
1594 {
1595         size_t headername_len = strlen(headername);
1596         if (!system_include) {
1597                 /* put dirname of current input on obstack */
1598                 const char *filename   = input.position.input_name;
1599                 const char *last_slash = strrchr(filename, '/');
1600                 if (last_slash != NULL) {
1601                         size_t len = last_slash - filename;
1602                         obstack_grow(&symbol_obstack, filename, len + 1);
1603                         obstack_grow0(&symbol_obstack, headername, headername_len);
1604                         char *complete_path = obstack_finish(&symbol_obstack);
1605                         headername = identify_string(complete_path);
1606                 }
1607
1608                 FILE *file = fopen(headername, "r");
1609                 if (file != NULL) {
1610                         switch_input(file, headername);
1611                         return true;
1612                 }
1613         }
1614
1615         assert(obstack_object_size(&symbol_obstack) == 0);
1616         /* check searchpath */
1617         for (searchpath_entry_t *entry = searchpath; entry != NULL;
1618              entry = entry->next) {
1619             const char *path = entry->path;
1620             size_t      len  = strlen(path);
1621                 obstack_grow(&symbol_obstack, path, len);
1622                 if (path[len-1] != '/')
1623                         obstack_1grow(&symbol_obstack, '/');
1624                 obstack_grow(&symbol_obstack, headername, headername_len+1);
1625
1626                 char *complete_path = obstack_finish(&symbol_obstack);
1627                 FILE *file          = fopen(complete_path, "r");
1628                 if (file != NULL) {
1629                         const char *filename = identify_string(complete_path);
1630                         switch_input(file, filename);
1631                         return true;
1632                 } else {
1633                         obstack_free(&symbol_obstack, complete_path);
1634                 }
1635         }
1636
1637         return false;
1638 }
1639
1640 static void parse_include_directive(void)
1641 {
1642         if (skip_mode) {
1643                 eat_pp_directive();
1644                 return;
1645         }
1646
1647         /* don't eat the TP_include here!
1648          * we need an alternative parsing for the next token */
1649         skip_till_newline(true);
1650         bool system_include = input.c == '<';
1651         parse_headername();
1652         string_t headername = pp_token.literal.string;
1653         if (headername.begin == NULL) {
1654                 eat_pp_directive();
1655                 return;
1656         }
1657
1658         bool had_nonwhitespace = skip_till_newline(false);
1659         if (had_nonwhitespace) {
1660                 warningf(WARN_OTHER, &pp_token.base.source_position,
1661                          "extra tokens at end of #include directive");
1662         }
1663
1664         if (n_inputs > INCLUDE_LIMIT) {
1665                 errorf(&pp_token.base.source_position, "#include nested too deeply");
1666                 /* eat \n or EOF */
1667                 next_input_token();
1668                 return;
1669         }
1670
1671         /* switch inputs */
1672         info.whitespace_at_line_begin = 0;
1673         info.had_whitespace           = false;
1674         info.at_line_begin            = true;
1675         emit_newlines();
1676         push_input();
1677         bool res = do_include(system_include, pp_token.literal.string.begin);
1678         if (res) {
1679                 next_input_token();
1680         } else {
1681                 errorf(&pp_token.base.source_position, "failed including '%S': %s", &pp_token.literal.string, strerror(errno));
1682                 pop_restore_input();
1683         }
1684 }
1685
1686 static pp_conditional_t *push_conditional(void)
1687 {
1688         pp_conditional_t *conditional
1689                 = obstack_alloc(&pp_obstack, sizeof(*conditional));
1690         memset(conditional, 0, sizeof(*conditional));
1691
1692         conditional->parent = conditional_stack;
1693         conditional_stack   = conditional;
1694
1695         return conditional;
1696 }
1697
1698 static void pop_conditional(void)
1699 {
1700         assert(conditional_stack != NULL);
1701         conditional_stack = conditional_stack->parent;
1702 }
1703
1704 static void check_unclosed_conditionals(void)
1705 {
1706         while (conditional_stack != NULL) {
1707                 pp_conditional_t *conditional = conditional_stack;
1708
1709                 if (conditional->in_else) {
1710                         errorf(&conditional->source_position, "unterminated #else");
1711                 } else {
1712                         errorf(&conditional->source_position, "unterminated condition");
1713                 }
1714                 pop_conditional();
1715         }
1716 }
1717
1718 static void parse_ifdef_ifndef_directive(bool const is_ifdef)
1719 {
1720         bool condition;
1721         eat_pp(is_ifdef ? TP_ifdef : TP_ifndef);
1722
1723         if (skip_mode) {
1724                 eat_pp_directive();
1725                 pp_conditional_t *conditional = push_conditional();
1726                 conditional->source_position  = pp_token.base.source_position;
1727                 conditional->skip             = true;
1728                 return;
1729         }
1730
1731         if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
1732                 errorf(&pp_token.base.source_position,
1733                        "expected identifier after #%s, got %K",
1734                        is_ifdef ? "ifdef" : "ifndef", &pp_token);
1735                 eat_pp_directive();
1736
1737                 /* just take the true case in the hope to avoid further errors */
1738                 condition = true;
1739         } else {
1740                 /* evaluate wether we are in true or false case */
1741                 condition = (bool)pp_token.base.symbol->pp_definition == is_ifdef;
1742                 eat_token(T_IDENTIFIER);
1743
1744                 if (!info.at_line_begin) {
1745                         errorf(&pp_token.base.source_position,
1746                                "extra tokens at end of #%s",
1747                                is_ifdef ? "ifdef" : "ifndef");
1748                         eat_pp_directive();
1749                 }
1750         }
1751
1752         pp_conditional_t *conditional = push_conditional();
1753         conditional->source_position  = pp_token.base.source_position;
1754         conditional->condition        = condition;
1755
1756         if (!condition) {
1757                 skip_mode = true;
1758         }
1759 }
1760
1761 static void parse_else_directive(void)
1762 {
1763         eat_pp(TP_else);
1764
1765         if (!info.at_line_begin) {
1766                 if (!skip_mode) {
1767                         warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #else");
1768                 }
1769                 eat_pp_directive();
1770         }
1771
1772         pp_conditional_t *conditional = conditional_stack;
1773         if (conditional == NULL) {
1774                 errorf(&pp_token.base.source_position, "#else without prior #if");
1775                 return;
1776         }
1777
1778         if (conditional->in_else) {
1779                 errorf(&pp_token.base.source_position,
1780                        "#else after #else (condition started %P)",
1781                        &conditional->source_position);
1782                 skip_mode = true;
1783                 return;
1784         }
1785
1786         conditional->in_else = true;
1787         if (!conditional->skip) {
1788                 skip_mode = conditional->condition;
1789         }
1790         conditional->source_position = pp_token.base.source_position;
1791 }
1792
1793 static void parse_endif_directive(void)
1794 {
1795         eat_pp(TP_endif);
1796
1797         if (!info.at_line_begin) {
1798                 if (!skip_mode) {
1799                         warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #endif");
1800                 }
1801                 eat_pp_directive();
1802         }
1803
1804         pp_conditional_t *conditional = conditional_stack;
1805         if (conditional == NULL) {
1806                 errorf(&pp_token.base.source_position, "#endif without prior #if");
1807                 return;
1808         }
1809
1810         if (!conditional->skip) {
1811                 skip_mode = false;
1812         }
1813         pop_conditional();
1814 }
1815
1816 static void parse_preprocessing_directive(void)
1817 {
1818         eat_token('#');
1819
1820         if (info.at_line_begin) {
1821                 /* empty directive */
1822                 return;
1823         }
1824
1825         if (pp_token.base.symbol) {
1826                 switch (pp_token.base.symbol->pp_ID) {
1827                 case TP_define:  parse_define_directive();            break;
1828                 case TP_else:    parse_else_directive();              break;
1829                 case TP_endif:   parse_endif_directive();             break;
1830                 case TP_ifdef:   parse_ifdef_ifndef_directive(true);  break;
1831                 case TP_ifndef:  parse_ifdef_ifndef_directive(false); break;
1832                 case TP_include: parse_include_directive();           break;
1833                 case TP_undef:   parse_undef_directive();             break;
1834                 default:         goto skip;
1835                 }
1836         } else {
1837 skip:
1838                 if (!skip_mode) {
1839                         errorf(&pp_token.base.source_position, "invalid preprocessing directive #%K", &pp_token);
1840                 }
1841                 eat_pp_directive();
1842         }
1843
1844         assert(info.at_line_begin);
1845 }
1846
1847 static void finish_current_argument(void)
1848 {
1849         if (current_argument == NULL)
1850                 return;
1851         size_t size = obstack_object_size(&pp_obstack);
1852         current_argument->list_len   = size/sizeof(current_argument->token_list[0]);
1853         current_argument->token_list = obstack_finish(&pp_obstack);
1854 }
1855
1856 static void next_preprocessing_token(void)
1857 {
1858 restart:
1859         if (!expand_next()) {
1860                 do {
1861                         next_input_token();
1862                         while (pp_token.kind == '#' && info.at_line_begin) {
1863                                 parse_preprocessing_directive();
1864                         }
1865                 } while (skip_mode && pp_token.kind != T_EOF);
1866         }
1867
1868         const token_kind_t kind = pp_token.kind;
1869         if (current_call == NULL || argument_expanding != NULL) {
1870                 if (kind == T_IDENTIFIER) {
1871                         symbol_t        *const symbol        = pp_token.base.symbol;
1872                         pp_definition_t *const pp_definition = symbol->pp_definition;
1873                         if (pp_definition != NULL && !pp_definition->is_expanding) {
1874                                 if (pp_definition->has_parameters) {
1875
1876                                         /* check if next token is a '(' */
1877                                         whitespace_info_t old_info   = info;
1878                                         token_kind_t      next_token = peek_expansion();
1879                                         if (next_token == T_EOF) {
1880                                                 info.at_line_begin  = false;
1881                                                 info.had_whitespace = false;
1882                                                 skip_whitespace();
1883                                                 if (input.c == '(') {
1884                                                         next_token = '(';
1885                                                 }
1886                                         }
1887
1888                                         if (next_token == '(') {
1889                                                 if (current_expansion == NULL)
1890                                                         expansion_pos = pp_token.base.source_position;
1891                                                 next_preprocessing_token();
1892                                                 assert(pp_token.kind == '(');
1893
1894                                                 pp_definition->parent_expansion = current_expansion;
1895                                                 current_call              = pp_definition;
1896                                                 current_call->expand_pos  = 0;
1897                                                 current_call->expand_info = old_info;
1898                                                 if (current_call->n_parameters > 0) {
1899                                                         current_argument = &current_call->parameters[0];
1900                                                         assert(argument_brace_count == 0);
1901                                                 }
1902                                                 goto restart;
1903                                         } else {
1904                                                 /* skip_whitespaces() skipped newlines and whitespace,
1905                                                  * remember results for next token */
1906                                                 next_info = info;
1907                                                 info      = old_info;
1908                                                 return;
1909                                         }
1910                                 } else {
1911                                         if (current_expansion == NULL)
1912                                                 expansion_pos = pp_token.base.source_position;
1913                                         start_expanding(pp_definition);
1914                                         goto restart;
1915                                 }
1916                         }
1917                 } else if (kind == T_MACRO_PARAMETER) {
1918                         assert(current_expansion != NULL);
1919                         start_expanding(pp_token.macro_parameter.def);
1920                         goto restart;
1921                 }
1922         }
1923
1924         if (current_call != NULL) {
1925                 /* current_call != NULL */
1926                 if (kind == '(') {
1927                         ++argument_brace_count;
1928                 } else if (kind == ')') {
1929                         if (argument_brace_count > 0) {
1930                                 --argument_brace_count;
1931                         } else {
1932                                 finish_current_argument();
1933                                 assert(kind == ')');
1934                                 start_expanding(current_call);
1935                                 info = current_call->expand_info;
1936                                 current_call     = NULL;
1937                                 current_argument = NULL;
1938                                 goto restart;
1939                         }
1940                 } else if (kind == ',' && argument_brace_count == 0) {
1941                         finish_current_argument();
1942                         current_call->expand_pos++;
1943                         if (current_call->expand_pos >= current_call->n_parameters) {
1944                                 errorf(&pp_token.base.source_position,
1945                                            "too many arguments passed for macro '%Y'",
1946                                            current_call->symbol);
1947                                 current_argument = NULL;
1948                         } else {
1949                                 current_argument
1950                                         = &current_call->parameters[current_call->expand_pos];
1951                         }
1952                         goto restart;
1953                 } else if (kind == T_MACRO_PARAMETER) {
1954                         /* parameters have to be fully expanded before being used as
1955                          * parameters for another macro-call */
1956                         assert(current_expansion != NULL);
1957                         pp_definition_t *argument = pp_token.macro_parameter.def;
1958                         argument_expanding = argument;
1959                         start_expanding(argument);
1960                         goto restart;
1961                 } else if (kind == T_EOF) {
1962                         errorf(&expansion_pos,
1963                                "reached end of file while parsing arguments for '%Y'",
1964                                current_call->symbol);
1965                         return;
1966                 }
1967                 if (current_argument != NULL) {
1968                         saved_token_t saved;
1969                         saved.token = pp_token;
1970                         saved.had_whitespace = info.had_whitespace;
1971                         obstack_grow(&pp_obstack, &saved, sizeof(saved));
1972                 }
1973                 goto restart;
1974         }
1975 }
1976
1977
1978 static void prepend_include_path(const char *path)
1979 {
1980         searchpath_entry_t *entry = OALLOCZ(&config_obstack, searchpath_entry_t);
1981         entry->path = path;
1982         entry->next = searchpath;
1983         searchpath  = entry;
1984 }
1985
1986 static void setup_include_path(void)
1987 {
1988         /* built-in paths */
1989         prepend_include_path("/usr/include");
1990
1991         /* parse environment variable */
1992         const char *cpath = getenv("CPATH");
1993         if (cpath != NULL && *cpath != '\0') {
1994                 const char *begin = cpath;
1995                 const char *c;
1996                 do {
1997                         c = begin;
1998                         while (*c != '\0' && *c != ':')
1999                                 ++c;
2000
2001                         size_t len = c-begin;
2002                         if (len == 0) {
2003                                 /* for gcc compatibility (Matze: I would expect that
2004                                  * nothing happens for an empty entry...) */
2005                                 prepend_include_path(".");
2006                         } else {
2007                                 char *string = obstack_alloc(&config_obstack, len+1);
2008                                 memcpy(string, begin, len);
2009                                 string[len] = '\0';
2010
2011                                 prepend_include_path(string);
2012                         }
2013
2014                         begin = c+1;
2015                         /* skip : */
2016                         if (*begin == ':')
2017                                 ++begin;
2018                 } while(*c != '\0');
2019         }
2020 }
2021
2022 int pptest_main(int argc, char **argv);
2023 int pptest_main(int argc, char **argv)
2024 {
2025         init_symbol_table();
2026         init_tokens();
2027         init_symbols();
2028
2029         obstack_init(&config_obstack);
2030         obstack_init(&pp_obstack);
2031         obstack_init(&input_obstack);
2032         strset_init(&stringset);
2033
2034         error_on_unknown_chars = false;
2035
2036         setup_include_path();
2037
2038         /* simplistic commandline parser */
2039         const char *filename = NULL;
2040         const char *output = NULL;
2041         for (int i = 1; i < argc; ++i) {
2042                 const char *opt = argv[i];
2043                 if (streq(opt, "-I")) {
2044                         prepend_include_path(argv[++i]);
2045                         continue;
2046                 } else if (streq(opt, "-E")) {
2047                         /* ignore */
2048                 } else if (streq(opt, "-o")) {
2049                         output = argv[++i];
2050                         continue;
2051                 } else if (opt[0] == '-') {
2052                         fprintf(stderr, "Unknown option '%s'\n", opt);
2053                 } else {
2054                         if (filename != NULL)
2055                                 fprintf(stderr, "Multiple inputs not supported\n");
2056                         filename = argv[i];
2057                 }
2058         }
2059         if (filename == NULL) {
2060                 fprintf(stderr, "No input specified\n");
2061                 return 1;
2062         }
2063
2064         if (output == NULL) {
2065                 out = stdout;
2066         } else {
2067                 out = fopen(output, "w");
2068                 if (out == NULL) {
2069                         fprintf(stderr, "Couldn't open output '%s'\n", output);
2070                         return 1;
2071                 }
2072         }
2073
2074         /* just here for gcc compatibility */
2075         fprintf(out, "# 1 \"%s\"\n", filename);
2076         fprintf(out, "# 1 \"<built-in>\"\n");
2077         fprintf(out, "# 1 \"<command-line>\"\n");
2078
2079         FILE *file = fopen(filename, "r");
2080         if (file == NULL) {
2081                 fprintf(stderr, "Couldn't open input '%s'\n", filename);
2082                 return 1;
2083         }
2084         switch_input(file, filename);
2085
2086         for (;;) {
2087                 next_preprocessing_token();
2088                 if (pp_token.kind == T_EOF)
2089                         break;
2090                 emit_pp_token();
2091         }
2092
2093         fputc('\n', out);
2094         check_unclosed_conditionals();
2095         close_input();
2096         if (out != stdout)
2097                 fclose(out);
2098
2099         obstack_free(&input_obstack, NULL);
2100         obstack_free(&pp_obstack, NULL);
2101         obstack_free(&config_obstack, NULL);
2102
2103         strset_destroy(&stringset);
2104
2105         exit_tokens();
2106         exit_symbol_table();
2107
2108         return 0;
2109 }