nsz Git - cparser/blob - preprocessor.c

   1 #include <config.h>
   2
   3 #include <assert.h>
   4 #include <errno.h>
   5 #include <string.h>
   6 #include <stdbool.h>
   7 #include <ctype.h>
   8
   9 #include "token_t.h"
  10 #include "symbol_t.h"
  11 #include "adt/util.h"
  12 #include "adt/error.h"
  13 #include "adt/strutil.h"
  14 #include "adt/strset.h"
  15 #include "lang_features.h"
  16 #include "diagnostic.h"
  17 #include "string_rep.h"
  18 #include "input.h"
  19
  20 #define MAX_PUTBACK 3
  21 #define INCLUDE_LIMIT 199  /* 199 is for gcc "compatibility" */
  22
  23 typedef struct saved_token_t {
  24         token_t token;
  25         bool    had_whitespace;
  26 } saved_token_t;
  27
  28 typedef struct whitespace_info_t {
  29         /** current token had whitespace in front of it */
  30         bool     had_whitespace;
  31         /** current token is at the beginning of a line.
  32          * => a "#" at line begin starts a preprocessing directive. */
  33         bool     at_line_begin;
  34         /** number of spaces before the first token in a line */
  35         unsigned whitespace_at_line_begin;
  36 } whitespace_info_t;
  37
  38 struct pp_definition_t {
  39         symbol_t          *symbol;
  40         source_position_t  source_position;
  41         pp_definition_t   *parent_expansion;
  42         size_t             expand_pos;
  43         whitespace_info_t  expand_info;
  44         bool               is_variadic    : 1;
  45         bool               is_expanding   : 1;
  46         bool               has_parameters : 1;
  47         bool               is_parameter   : 1;
  48         pp_definition_t   *function_definition;
  49         size_t             n_parameters;
  50         pp_definition_t   *parameters;
  51
  52         /* replacement */
  53         size_t             list_len;
  54         saved_token_t     *token_list;
  55 };
  56
  57 typedef struct pp_conditional_t pp_conditional_t;
  58 struct pp_conditional_t {
  59         source_position_t  source_position;
  60         bool               condition;
  61         bool               in_else;
  62         /** conditional in skip mode (then+else gets skipped) */
  63         bool               skip;
  64         pp_conditional_t  *parent;
  65 };
  66
  67 typedef struct pp_input_t pp_input_t;
  68 struct pp_input_t {
  69         FILE              *file;
  70         input_t           *input;
  71         utf32              c;
  72         utf32              buf[1024+MAX_PUTBACK];
  73         const utf32       *bufend;
  74         const utf32       *bufpos;
  75         source_position_t  position;
  76         pp_input_t        *parent;
  77         unsigned           output_line;
  78 };
  79
  80 typedef struct searchpath_entry_t searchpath_entry_t;
  81 struct searchpath_entry_t {
  82         const char         *path;
  83         searchpath_entry_t *next;
  84 };
  85
  86 static pp_input_t      input;
  87
  88 static pp_input_t     *input_stack;
  89 static unsigned        n_inputs;
  90 static struct obstack  input_obstack;
  91
  92 static pp_conditional_t *conditional_stack;
  93
  94 static token_t           pp_token;
  95 static bool              resolve_escape_sequences = false;
  96 static bool              error_on_unknown_chars   = true;
  97 static bool              skip_mode;
  98 static FILE             *out;
  99 static struct obstack    pp_obstack;
 100 static struct obstack    config_obstack;
 101 static const char       *printed_input_name = NULL;
 102 static source_position_t expansion_pos;
 103 static pp_definition_t  *current_expansion  = NULL;
 104 static pp_definition_t  *current_call       = NULL;
 105 static pp_definition_t  *current_argument   = NULL;
 106 static pp_definition_t  *argument_expanding = NULL;
 107 static unsigned          argument_brace_count;
 108 static strset_t          stringset;
 109 static token_kind_t      last_token;
 110
 111 static searchpath_entry_t *searchpath;
 112
 113 static whitespace_info_t next_info; /* valid if had_whitespace is true */
 114 static whitespace_info_t info;
 115
 116 static inline void next_char(void);
 117 static void next_input_token(void);
 118 static void print_line_directive(const source_position_t *pos, const char *add);
 119
 120 static symbol_t *symbol_colongreater;
 121 static symbol_t *symbol_lesscolon;
 122 static symbol_t *symbol_lesspercent;
 123 static symbol_t *symbol_percentcolon;
 124 static symbol_t *symbol_percentcolonpercentcolon;
 125 static symbol_t *symbol_percentgreater;
 126
 127 extern bool      allow_dollar_in_symbol;
 128
 129 static void init_symbols(void)
 130 {
 131         symbol_colongreater             = symbol_table_insert(":>");
 132         symbol_lesscolon                = symbol_table_insert("<:");
 133         symbol_lesspercent              = symbol_table_insert("<%");
 134         symbol_percentcolon             = symbol_table_insert("%:");
 135         symbol_percentcolonpercentcolon = symbol_table_insert("%:%:");
 136         symbol_percentgreater           = symbol_table_insert("%>");
 137 }
 138
 139 static void switch_input(FILE *file, const char *filename)
 140 {
 141         input.file                = file;
 142         input.input               = input_from_stream(file, NULL);
 143         input.bufend              = NULL;
 144         input.bufpos              = NULL;
 145         input.output_line         = 0;
 146         input.position.input_name = filename;
 147         input.position.lineno     = 1;
 148
 149         /* indicate that we're at a new input */
 150         print_line_directive(&input.position, input_stack != NULL ? "1" : NULL);
 151
 152         /* place a virtual '\n' so we realize we're at line begin */
 153         input.position.lineno = 0;
 154         input.c               = '\n';
 155 }
 156
 157 static void close_input(void)
 158 {
 159         input_free(input.input);
 160         assert(input.file != NULL);
 161
 162         fclose(input.file);
 163         input.input  = NULL;
 164         input.file   = NULL;
 165         input.bufend = NULL;
 166         input.bufpos = NULL;
 167         input.c      = EOF;
 168 }
 169
 170 static void push_input(void)
 171 {
 172         pp_input_t *saved_input
 173                 = obstack_alloc(&input_obstack, sizeof(*saved_input));
 174
 175         memcpy(saved_input, &input, sizeof(*saved_input));
 176
 177         /* adjust buffer positions */
 178         if (input.bufpos != NULL)
 179                 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
 180         if (input.bufend != NULL)
 181                 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
 182
 183         saved_input->parent = input_stack;
 184         input_stack         = saved_input;
 185         ++n_inputs;
 186 }
 187
 188 static void pop_restore_input(void)
 189 {
 190         assert(n_inputs > 0);
 191         assert(input_stack != NULL);
 192
 193         pp_input_t *saved_input = input_stack;
 194
 195         memcpy(&input, saved_input, sizeof(input));
 196         input.parent = NULL;
 197
 198         /* adjust buffer positions */
 199         if (saved_input->bufpos != NULL)
 200                 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
 201         if (saved_input->bufend != NULL)
 202                 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
 203
 204         input_stack = saved_input->parent;
 205         obstack_free(&input_obstack, saved_input);
 206         --n_inputs;
 207 }
 208
 209 /**
 210  * Prints a parse error message at the current token.
 211  *
 212  * @param msg   the error message
 213  */
 214 static void parse_error(const char *msg)
 215 {
 216         errorf(&pp_token.base.source_position,  "%s", msg);
 217 }
 218
 219 static inline void next_real_char(void)
 220 {
 221         assert(input.bufpos <= input.bufend);
 222         if (input.bufpos >= input.bufend) {
 223                 size_t const n = decode(input.input, input.buf + MAX_PUTBACK, lengthof(input.buf) - MAX_PUTBACK);
 224                 if (n == 0) {
 225                         input.c = EOF;
 226                         return;
 227                 }
 228                 input.bufpos = input.buf + MAX_PUTBACK;
 229                 input.bufend = input.bufpos + n;
 230         }
 231         input.c = *input.bufpos++;
 232         ++input.position.colno;
 233 }
 234
 235 /**
 236  * Put a character back into the buffer.
 237  *
 238  * @param pc  the character to put back
 239  */
 240 static inline void put_back(utf32 const pc)
 241 {
 242         assert(input.bufpos > input.buf);
 243         *(--input.bufpos - input.buf + input.buf) = (char) pc;
 244         --input.position.colno;
 245 }
 246
 247 #define NEWLINE \
 248         '\r': \
 249                 next_char(); \
 250                 if (input.c == '\n') { \
 251         case '\n': \
 252                         next_char(); \
 253                 } \
 254                 ++input.position.lineno; \
 255                 input.position.colno = 1; \
 256                 goto newline; \
 257                 newline // Let it look like an ordinary case label.
 258
 259 #define eat(c_type) (assert(input.c == c_type), next_char())
 260
 261 static void maybe_concat_lines(void)
 262 {
 263         eat('\\');
 264
 265         switch (input.c) {
 266         case NEWLINE:
 267                 info.whitespace_at_line_begin = 0;
 268                 return;
 269
 270         default:
 271                 break;
 272         }
 273
 274         put_back(input.c);
 275         input.c = '\\';
 276 }
 277
 278 /**
 279  * Set c to the next input character, ie.
 280  * after expanding trigraphs.
 281  */
 282 static inline void next_char(void)
 283 {
 284         next_real_char();
 285
 286         /* filter trigraphs and concatenated lines */
 287         if (UNLIKELY(input.c == '\\')) {
 288                 maybe_concat_lines();
 289                 goto end_of_next_char;
 290         }
 291
 292         if (LIKELY(input.c != '?'))
 293                 goto end_of_next_char;
 294
 295         next_real_char();
 296         if (LIKELY(input.c != '?')) {
 297                 put_back(input.c);
 298                 input.c = '?';
 299                 goto end_of_next_char;
 300         }
 301
 302         next_real_char();
 303         switch (input.c) {
 304         case '=': input.c = '#'; break;
 305         case '(': input.c = '['; break;
 306         case '/': input.c = '\\'; maybe_concat_lines(); break;
 307         case ')': input.c = ']'; break;
 308         case '\'': input.c = '^'; break;
 309         case '<': input.c = '{'; break;
 310         case '!': input.c = '|'; break;
 311         case '>': input.c = '}'; break;
 312         case '-': input.c = '~'; break;
 313         default:
 314                 put_back(input.c);
 315                 put_back('?');
 316                 input.c = '?';
 317                 break;
 318         }
 319
 320 end_of_next_char:;
 321 #ifdef DEBUG_CHARS
 322         printf("nchar '%c'\n", input.c);
 323 #endif
 324 }
 325
 326
 327
 328 /**
 329  * Returns true if the given char is a octal digit.
 330  *
 331  * @param char  the character to check
 332  */
 333 static inline bool is_octal_digit(int chr)
 334 {
 335         switch (chr) {
 336         case '0':
 337         case '1':
 338         case '2':
 339         case '3':
 340         case '4':
 341         case '5':
 342         case '6':
 343         case '7':
 344                 return true;
 345         default:
 346                 return false;
 347         }
 348 }
 349
 350 /**
 351  * Returns the value of a digit.
 352  * The only portable way to do it ...
 353  */
 354 static int digit_value(int digit)
 355 {
 356         switch (digit) {
 357         case '0': return 0;
 358         case '1': return 1;
 359         case '2': return 2;
 360         case '3': return 3;
 361         case '4': return 4;
 362         case '5': return 5;
 363         case '6': return 6;
 364         case '7': return 7;
 365         case '8': return 8;
 366         case '9': return 9;
 367         case 'a':
 368         case 'A': return 10;
 369         case 'b':
 370         case 'B': return 11;
 371         case 'c':
 372         case 'C': return 12;
 373         case 'd':
 374         case 'D': return 13;
 375         case 'e':
 376         case 'E': return 14;
 377         case 'f':
 378         case 'F': return 15;
 379         default:
 380                 panic("wrong character given");
 381         }
 382 }
 383
 384 /**
 385  * Parses an octal character sequence.
 386  *
 387  * @param first_digit  the already read first digit
 388  */
 389 static utf32 parse_octal_sequence(const utf32 first_digit)
 390 {
 391         assert(is_octal_digit(first_digit));
 392         utf32 value = digit_value(first_digit);
 393         if (!is_octal_digit(input.c)) return value;
 394         value = 8 * value + digit_value(input.c);
 395         next_char();
 396         if (!is_octal_digit(input.c)) return value;
 397         value = 8 * value + digit_value(input.c);
 398         next_char();
 399         return value;
 400
 401 }
 402
 403 /**
 404  * Parses a hex character sequence.
 405  */
 406 static utf32 parse_hex_sequence(void)
 407 {
 408         utf32 value = 0;
 409         while (isxdigit(input.c)) {
 410                 value = 16 * value + digit_value(input.c);
 411                 next_char();
 412         }
 413         return value;
 414 }
 415
 416 static bool is_universal_char_valid(utf32 const v)
 417 {
 418         /* C11 §6.4.3:2 */
 419         if (v < 0xA0U && v != 0x24 && v != 0x40 && v != 0x60)
 420                 return false;
 421         if (0xD800 <= v && v <= 0xDFFF)
 422                 return false;
 423         return true;
 424 }
 425
 426 static utf32 parse_universal_char(unsigned const n_digits)
 427 {
 428         utf32 v = 0;
 429         for (unsigned k = n_digits; k != 0; --k) {
 430                 if (isxdigit(input.c)) {
 431                         v = 16 * v + digit_value(input.c);
 432                         if (!resolve_escape_sequences)
 433                                 obstack_1grow(&symbol_obstack, input.c);
 434                         next_char();
 435                 } else {
 436                         errorf(&input.position,
 437                                "short universal character name, expected %u more digits",
 438                                    k);
 439                         break;
 440                 }
 441         }
 442         if (!is_universal_char_valid(v)) {
 443                 errorf(&input.position,
 444                        "\\%c%0*X is not a valid universal character name",
 445                        n_digits == 4 ? 'u' : 'U', (int)n_digits, v);
 446         }
 447         return v;
 448 }
 449
 450 static bool is_universal_char_valid_identifier(utf32 const v)
 451 {
 452         /* C11 Annex D.1 */
 453         if (                v == 0x000A8) return true;
 454         if (                v == 0x000AA) return true;
 455         if (                v == 0x000AD) return true;
 456         if (                v == 0x000AF) return true;
 457         if (0x000B2 <= v && v <= 0x000B5) return true;
 458         if (0x000B7 <= v && v <= 0x000BA) return true;
 459         if (0x000BC <= v && v <= 0x000BE) return true;
 460         if (0x000C0 <= v && v <= 0x000D6) return true;
 461         if (0x000D8 <= v && v <= 0x000F6) return true;
 462         if (0x000F8 <= v && v <= 0x000FF) return true;
 463         if (0x00100 <= v && v <= 0x0167F) return true;
 464         if (0x01681 <= v && v <= 0x0180D) return true;
 465         if (0x0180F <= v && v <= 0x01FFF) return true;
 466         if (0x0200B <= v && v <= 0x0200D) return true;
 467         if (0x0202A <= v && v <= 0x0202E) return true;
 468         if (0x0203F <= v && v <= 0x02040) return true;
 469         if (                v == 0x02054) return true;
 470         if (0x02060 <= v && v <= 0x0206F) return true;
 471         if (0x02070 <= v && v <= 0x0218F) return true;
 472         if (0x02460 <= v && v <= 0x024FF) return true;
 473         if (0x02776 <= v && v <= 0x02793) return true;
 474         if (0x02C00 <= v && v <= 0x02DFF) return true;
 475         if (0x02E80 <= v && v <= 0x02FFF) return true;
 476         if (0x03004 <= v && v <= 0x03007) return true;
 477         if (0x03021 <= v && v <= 0x0302F) return true;
 478         if (0x03031 <= v && v <= 0x0303F) return true;
 479         if (0x03040 <= v && v <= 0x0D7FF) return true;
 480         if (0x0F900 <= v && v <= 0x0FD3D) return true;
 481         if (0x0FD40 <= v && v <= 0x0FDCF) return true;
 482         if (0x0FDF0 <= v && v <= 0x0FE44) return true;
 483         if (0x0FE47 <= v && v <= 0x0FFFD) return true;
 484         if (0x10000 <= v && v <= 0x1FFFD) return true;
 485         if (0x20000 <= v && v <= 0x2FFFD) return true;
 486         if (0x30000 <= v && v <= 0x3FFFD) return true;
 487         if (0x40000 <= v && v <= 0x4FFFD) return true;
 488         if (0x50000 <= v && v <= 0x5FFFD) return true;
 489         if (0x60000 <= v && v <= 0x6FFFD) return true;
 490         if (0x70000 <= v && v <= 0x7FFFD) return true;
 491         if (0x80000 <= v && v <= 0x8FFFD) return true;
 492         if (0x90000 <= v && v <= 0x9FFFD) return true;
 493         if (0xA0000 <= v && v <= 0xAFFFD) return true;
 494         if (0xB0000 <= v && v <= 0xBFFFD) return true;
 495         if (0xC0000 <= v && v <= 0xCFFFD) return true;
 496         if (0xD0000 <= v && v <= 0xDFFFD) return true;
 497         if (0xE0000 <= v && v <= 0xEFFFD) return true;
 498         return false;
 499 }
 500
 501 static bool is_universal_char_valid_identifier_start(utf32 const v)
 502 {
 503         /* C11 Annex D.2 */
 504         if (0x0300 <= v && v <= 0x036F) return false;
 505         if (0x1DC0 <= v && v <= 0x1DFF) return false;
 506         if (0x20D0 <= v && v <= 0x20FF) return false;
 507         if (0xFE20 <= v && v <= 0xFE2F) return false;
 508         return true;
 509 }
 510
 511 /**
 512  * Parse an escape sequence.
 513  */
 514 static utf32 parse_escape_sequence(void)
 515 {
 516         eat('\\');
 517
 518         utf32 const ec = input.c;
 519         next_char();
 520
 521         switch (ec) {
 522         case '"':  return '"';
 523         case '\'': return '\'';
 524         case '\\': return '\\';
 525         case '?': return '\?';
 526         case 'a': return '\a';
 527         case 'b': return '\b';
 528         case 'f': return '\f';
 529         case 'n': return '\n';
 530         case 'r': return '\r';
 531         case 't': return '\t';
 532         case 'v': return '\v';
 533         case 'x':
 534                 return parse_hex_sequence();
 535         case '0':
 536         case '1':
 537         case '2':
 538         case '3':
 539         case '4':
 540         case '5':
 541         case '6':
 542         case '7':
 543                 return parse_octal_sequence(ec);
 544         case EOF:
 545                 parse_error("reached end of file while parsing escape sequence");
 546                 return EOF;
 547         /* \E is not documented, but handled, by GCC.  It is acceptable according
 548          * to §6.11.4, whereas \e is not. */
 549         case 'E':
 550         case 'e':
 551                 if (c_mode & _GNUC)
 552                         return 27;   /* hopefully 27 is ALWAYS the code for ESCAPE */
 553                 break;
 554
 555         case 'U': return parse_universal_char(8);
 556         case 'u': return parse_universal_char(4);
 557
 558         default:
 559                 break;
 560         }
 561         /* §6.4.4.4:8 footnote 64 */
 562         parse_error("unknown escape sequence");
 563         return EOF;
 564 }
 565
 566 static const char *identify_string(char *string)
 567 {
 568         const char *result = strset_insert(&stringset, string);
 569         if (result != string) {
 570                 obstack_free(&symbol_obstack, string);
 571         }
 572         return result;
 573 }
 574
 575 static string_t sym_make_string(string_encoding_t const enc)
 576 {
 577         obstack_1grow(&symbol_obstack, '\0');
 578         size_t      const len    = obstack_object_size(&symbol_obstack) - 1;
 579         char       *const string = obstack_finish(&symbol_obstack);
 580         char const *const result = identify_string(string);
 581         return (string_t){ result, len, enc };
 582 }
 583
 584 static void parse_string(utf32 const delimiter, token_kind_t const kind,
 585                          string_encoding_t const enc,
 586                          char const *const context)
 587 {
 588         const unsigned start_linenr = input.position.lineno;
 589
 590         eat(delimiter);
 591
 592         while (true) {
 593                 switch (input.c) {
 594                 case '\\': {
 595                         if (resolve_escape_sequences) {
 596                                 utf32 const tc = parse_escape_sequence();
 597                                 if (enc == STRING_ENCODING_CHAR) {
 598                                         if (tc >= 0x100) {
 599                                                 warningf(WARN_OTHER, &pp_token.base.source_position, "escape sequence out of range");
 600                                         }
 601                                         obstack_1grow(&symbol_obstack, tc);
 602                                 } else {
 603                                         obstack_grow_utf8(&symbol_obstack, tc);
 604                                 }
 605                         } else {
 606                                 obstack_1grow(&symbol_obstack, (char)input.c);
 607                                 next_char();
 608                                 obstack_1grow(&symbol_obstack, (char)input.c);
 609                                 next_char();
 610                         }
 611                         break;
 612                 }
 613
 614                 case NEWLINE:
 615                         errorf(&pp_token.base.source_position, "newline while parsing %s", context);
 616                         break;
 617
 618                 case EOF: {
 619                         source_position_t source_position;
 620                         source_position.input_name = pp_token.base.source_position.input_name;
 621                         source_position.lineno     = start_linenr;
 622                         errorf(&source_position, "EOF while parsing %s", context);
 623                         goto end_of_string;
 624                 }
 625
 626                 default:
 627                         if (input.c == delimiter) {
 628                                 next_char();
 629                                 goto end_of_string;
 630                         } else {
 631                                 obstack_grow_utf8(&symbol_obstack, input.c);
 632                                 next_char();
 633                                 break;
 634                         }
 635                 }
 636         }
 637
 638 end_of_string:
 639         pp_token.kind           = kind;
 640         pp_token.literal.string = sym_make_string(enc);
 641 }
 642
 643 static void parse_string_literal(string_encoding_t const enc)
 644 {
 645         parse_string('"', T_STRING_LITERAL, enc, "string literal");
 646 }
 647
 648 static void parse_character_constant(string_encoding_t const enc)
 649 {
 650         parse_string('\'', T_CHARACTER_CONSTANT, enc, "character constant");
 651         if (pp_token.literal.string.size == 0) {
 652                 parse_error("empty character constant");
 653         }
 654 }
 655
 656 #define SYMBOL_CASES_WITHOUT_E_P \
 657              '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
 658         case 'a': \
 659         case 'b': \
 660         case 'c': \
 661         case 'd': \
 662         case 'f': \
 663         case 'g': \
 664         case 'h': \
 665         case 'i': \
 666         case 'j': \
 667         case 'k': \
 668         case 'l': \
 669         case 'm': \
 670         case 'n': \
 671         case 'o': \
 672         case 'q': \
 673         case 'r': \
 674         case 's': \
 675         case 't': \
 676         case 'u': \
 677         case 'v': \
 678         case 'w': \
 679         case 'x': \
 680         case 'y': \
 681         case 'z': \
 682         case 'A': \
 683         case 'B': \
 684         case 'C': \
 685         case 'D': \
 686         case 'F': \
 687         case 'G': \
 688         case 'H': \
 689         case 'I': \
 690         case 'J': \
 691         case 'K': \
 692         case 'L': \
 693         case 'M': \
 694         case 'N': \
 695         case 'O': \
 696         case 'Q': \
 697         case 'R': \
 698         case 'S': \
 699         case 'T': \
 700         case 'U': \
 701         case 'V': \
 702         case 'W': \
 703         case 'X': \
 704         case 'Y': \
 705         case 'Z': \
 706         case '_'
 707
 708 #define SYMBOL_CASES \
 709              SYMBOL_CASES_WITHOUT_E_P: \
 710         case 'e': \
 711         case 'p': \
 712         case 'E': \
 713         case 'P'
 714
 715 #define DIGIT_CASES \
 716              '0':  \
 717         case '1':  \
 718         case '2':  \
 719         case '3':  \
 720         case '4':  \
 721         case '5':  \
 722         case '6':  \
 723         case '7':  \
 724         case '8':  \
 725         case '9'
 726
 727 static void start_expanding(pp_definition_t *definition)
 728 {
 729         definition->parent_expansion = current_expansion;
 730         definition->expand_pos       = 0;
 731         definition->is_expanding     = true;
 732         if (definition->list_len > 0) {
 733                 definition->token_list[0].had_whitespace
 734                         = info.had_whitespace;
 735         }
 736         current_expansion = definition;
 737 }
 738
 739 static void finished_expanding(pp_definition_t *definition)
 740 {
 741         assert(definition->is_expanding);
 742         pp_definition_t *parent = definition->parent_expansion;
 743         definition->parent_expansion = NULL;
 744         definition->is_expanding     = false;
 745
 746         /* stop further expanding once we expanded a parameter used in a
 747          * sub macro-call */
 748         if (definition == argument_expanding)
 749                 argument_expanding = NULL;
 750
 751         assert(current_expansion == definition);
 752         current_expansion = parent;
 753 }
 754
 755 static inline void set_punctuator(token_kind_t const kind)
 756 {
 757         pp_token.kind        = kind;
 758         pp_token.base.symbol = token_symbols[kind];
 759 }
 760
 761 static inline void set_digraph(token_kind_t const kind, symbol_t *const symbol)
 762 {
 763         pp_token.kind        = kind;
 764         pp_token.base.symbol = symbol;
 765 }
 766
 767 /**
 768  * returns next final token from a preprocessor macro expansion
 769  */
 770 static bool expand_next(void)
 771 {
 772         if (current_expansion == NULL)
 773                 return false;
 774
 775 restart:;
 776         size_t pos = current_expansion->expand_pos;
 777         if (pos >= current_expansion->list_len) {
 778                 finished_expanding(current_expansion);
 779                 /* it was the outermost expansion, parse pptoken normally */
 780                 if (current_expansion == NULL) {
 781                         return false;
 782                 }
 783                 goto restart;
 784         }
 785         const saved_token_t *saved = &current_expansion->token_list[pos++];
 786         pp_token = saved->token;
 787
 788         if (current_expansion->expand_pos > 0)
 789                 info.had_whitespace = saved->had_whitespace;
 790         pp_token.base.source_position = expansion_pos;
 791         ++current_expansion->expand_pos;
 792
 793         return true;
 794 }
 795
 796 /**
 797  * Returns the next token kind found when continuing the current expansions
 798  * without starting new sub-expansions.
 799  */
 800 static token_kind_t peek_expansion(void)
 801 {
 802         pp_definition_t *expansion = current_expansion;
 803         while (expansion != NULL && expansion->expand_pos >= expansion->list_len) {
 804                 expansion = expansion->parent_expansion;
 805         }
 806         if (expansion == NULL)
 807                 return T_EOF;
 808         return expansion->token_list[expansion->expand_pos].token.kind;
 809 }
 810
 811 static void skip_line_comment(void)
 812 {
 813         info.had_whitespace = true;
 814         while (true) {
 815                 switch (input.c) {
 816                 case EOF:
 817                         return;
 818
 819                 case '\r':
 820                 case '\n':
 821                         return;
 822
 823                 default:
 824                         next_char();
 825                         break;
 826                 }
 827         }
 828 }
 829
 830 static void skip_multiline_comment(void)
 831 {
 832         info.had_whitespace = true;
 833
 834         unsigned start_linenr = input.position.lineno;
 835         while (true) {
 836                 switch (input.c) {
 837                 case '/':
 838                         next_char();
 839                         if (input.c == '*') {
 840                                 /* TODO: nested comment, warn here */
 841                         }
 842                         break;
 843                 case '*':
 844                         next_char();
 845                         if (input.c == '/') {
 846                                 if (input.position.lineno != input.output_line)
 847                                         info.whitespace_at_line_begin = input.position.colno;
 848                                 next_char();
 849                                 return;
 850                         }
 851                         break;
 852
 853                 case NEWLINE:
 854                         break;
 855
 856                 case EOF: {
 857                         source_position_t source_position;
 858                         source_position.input_name = pp_token.base.source_position.input_name;
 859                         source_position.lineno     = start_linenr;
 860                         errorf(&source_position, "at end of file while looking for comment end");
 861                         return;
 862                 }
 863
 864                 default:
 865                         next_char();
 866                         break;
 867                 }
 868         }
 869 }
 870
 871 static bool skip_till_newline(bool stop_at_non_whitespace)
 872 {
 873         bool res = false;
 874         while (true) {
 875                 switch (input.c) {
 876                 case ' ':
 877                 case '\t':
 878                         next_char();
 879                         continue;
 880
 881                 case '/':
 882                         next_char();
 883                         if (input.c == '/') {
 884                                 next_char();
 885                                 skip_line_comment();
 886                                 continue;
 887                         } else if (input.c == '*') {
 888                                 next_char();
 889                                 skip_multiline_comment();
 890                                 continue;
 891                         } else {
 892                                 put_back(input.c);
 893                                 input.c = '/';
 894                         }
 895                         return true;
 896
 897                 case NEWLINE:
 898                         return res;
 899
 900                 default:
 901                         if (stop_at_non_whitespace)
 902                                 return false;
 903                         res = true;
 904                         next_char();
 905                         continue;
 906                 }
 907         }
 908 }
 909
 910 static void skip_whitespace(void)
 911 {
 912         while (true) {
 913                 switch (input.c) {
 914                 case ' ':
 915                 case '\t':
 916                         ++info.whitespace_at_line_begin;
 917                         info.had_whitespace = true;
 918                         next_char();
 919                         continue;
 920
 921                 case NEWLINE:
 922                         info.at_line_begin  = true;
 923                         info.had_whitespace = true;
 924                         info.whitespace_at_line_begin = 0;
 925                         continue;
 926
 927                 case '/':
 928                         next_char();
 929                         if (input.c == '/') {
 930                                 next_char();
 931                                 skip_line_comment();
 932                                 continue;
 933                         } else if (input.c == '*') {
 934                                 next_char();
 935                                 skip_multiline_comment();
 936                                 continue;
 937                         } else {
 938                                 put_back(input.c);
 939                                 input.c = '/';
 940                         }
 941                         return;
 942
 943                 default:
 944                         return;
 945                 }
 946         }
 947 }
 948
 949 static inline void eat_pp(pp_token_kind_t const kind)
 950 {
 951         assert(pp_token.base.symbol->pp_ID == kind);
 952         (void) kind;
 953         next_input_token();
 954 }
 955
 956 static inline void eat_token(token_kind_t const kind)
 957 {
 958         assert(pp_token.kind == kind);
 959         (void)kind;
 960         next_input_token();
 961 }
 962
 963 static void parse_symbol(void)
 964 {
 965         assert(obstack_object_size(&symbol_obstack) == 0);
 966         while (true) {
 967                 switch (input.c) {
 968                 case DIGIT_CASES:
 969                 case SYMBOL_CASES:
 970                         obstack_1grow(&symbol_obstack, (char) input.c);
 971                         next_char();
 972                         break;
 973
 974                 case '\\':
 975                         next_char();
 976                         switch (input.c) {
 977                         {
 978                                 unsigned n;
 979                         case 'U': n = 8; goto universal;
 980                         case 'u': n = 4; goto universal;
 981 universal:
 982                                 if (!resolve_escape_sequences) {
 983                                         obstack_1grow(&symbol_obstack, '\\');
 984                                         obstack_1grow(&symbol_obstack, input.c);
 985                                 }
 986                                 next_char();
 987                                 utf32 const v = parse_universal_char(n);
 988                                 if (!is_universal_char_valid_identifier(v)) {
 989                                         if (is_universal_char_valid(v)) {
 990                                                 errorf(&input.position,
 991                                                            "universal character \\%c%0*X is not valid in an identifier",
 992                                                            n == 4 ? 'u' : 'U', (int)n, v);
 993                                         }
 994                                 } else if (obstack_object_size(&symbol_obstack) == 0 && !is_universal_char_valid_identifier_start(v)) {
 995                                         errorf(&input.position,
 996                                                    "universal character \\%c%0*X is not valid as start of an identifier",
 997                                                    n == 4 ? 'u' : 'U', (int)n, v);
 998                                 } else if (resolve_escape_sequences) {
 999                                         obstack_grow_utf8(&symbol_obstack, v);
1000                                 }
1001                                 break;
1002                         }
1003
1004                         default:
1005                                 put_back(input.c);
1006                                 input.c = '\\';
1007                                 goto end_symbol;
1008                         }
1009
1010                 default:
1011 dollar_sign:
1012                         goto end_symbol;
1013                 }
1014         }
1015
1016 end_symbol:
1017         obstack_1grow(&symbol_obstack, '\0');
1018         char *string = obstack_finish(&symbol_obstack);
1019
1020         /* might be a wide string or character constant ( L"string"/L'c' ) */
1021         if (input.c == '"' && string[0] == 'L' && string[1] == '\0') {
1022                 obstack_free(&symbol_obstack, string);
1023                 parse_string_literal(STRING_ENCODING_WIDE);
1024                 return;
1025         } else if (input.c == '\'' && string[0] == 'L' && string[1] == '\0') {
1026                 obstack_free(&symbol_obstack, string);
1027                 parse_character_constant(STRING_ENCODING_WIDE);
1028                 return;
1029         }
1030
1031         symbol_t *symbol = symbol_table_insert(string);
1032
1033         pp_token.kind        = symbol->ID;
1034         pp_token.base.symbol = symbol;
1035
1036         /* we can free the memory from symbol obstack if we already had an entry in
1037          * the symbol table */
1038         if (symbol->string != string) {
1039                 obstack_free(&symbol_obstack, string);
1040         }
1041 }
1042
1043 static void parse_number(void)
1044 {
1045         obstack_1grow(&symbol_obstack, (char) input.c);
1046         next_char();
1047
1048         while (true) {
1049                 switch (input.c) {
1050                 case '.':
1051                 case DIGIT_CASES:
1052                 case SYMBOL_CASES_WITHOUT_E_P:
1053                         obstack_1grow(&symbol_obstack, (char) input.c);
1054                         next_char();
1055                         break;
1056
1057                 case 'e':
1058                 case 'p':
1059                 case 'E':
1060                 case 'P':
1061                         obstack_1grow(&symbol_obstack, (char) input.c);
1062                         next_char();
1063                         if (input.c == '+' || input.c == '-') {
1064                                 obstack_1grow(&symbol_obstack, (char) input.c);
1065                                 next_char();
1066                         }
1067                         break;
1068
1069                 default:
1070 dollar_sign:
1071                         goto end_number;
1072                 }
1073         }
1074
1075 end_number:
1076         pp_token.kind           = T_NUMBER;
1077         pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
1078 }
1079
1080 #define MAYBE_PROLOG \
1081         next_char(); \
1082         switch (input.c) {
1083
1084 #define MAYBE(ch, kind) \
1085         case ch: \
1086                 next_char(); \
1087                 set_punctuator(kind); \
1088                 return;
1089
1090 #define MAYBE_DIGRAPH(ch, kind, symbol) \
1091         case ch: \
1092                 next_char(); \
1093                 set_digraph(kind, symbol); \
1094                 return;
1095
1096 #define ELSE_CODE(code) \
1097         default: \
1098                 code \
1099                 return; \
1100         }
1101
1102 #define ELSE(kind) ELSE_CODE(set_punctuator(kind);)
1103
1104 /** identifies and returns the next preprocessing token contained in the
1105  * input stream. No macro expansion is performed. */
1106 static void next_input_token(void)
1107 {
1108         if (next_info.had_whitespace) {
1109                 info = next_info;
1110                 next_info.had_whitespace = false;
1111         } else {
1112                 info.at_line_begin  = false;
1113                 info.had_whitespace = false;
1114         }
1115 restart:
1116         pp_token.base.source_position = input.position;
1117         pp_token.base.symbol          = NULL;
1118
1119         switch (input.c) {
1120         case ' ':
1121         case '\t':
1122                 info.whitespace_at_line_begin++;
1123                 info.had_whitespace = true;
1124                 next_char();
1125                 goto restart;
1126
1127         case NEWLINE:
1128                 info.at_line_begin            = true;
1129                 info.had_whitespace           = true;
1130                 info.whitespace_at_line_begin = 0;
1131                 goto restart;
1132
1133         case SYMBOL_CASES:
1134                 parse_symbol();
1135                 return;
1136
1137         case DIGIT_CASES:
1138                 parse_number();
1139                 return;
1140
1141         case '"':
1142                 parse_string_literal(STRING_ENCODING_CHAR);
1143                 return;
1144
1145         case '\'':
1146                 parse_character_constant(STRING_ENCODING_CHAR);
1147                 return;
1148
1149         case '.':
1150                 MAYBE_PROLOG
1151                         case '0':
1152                         case '1':
1153                         case '2':
1154                         case '3':
1155                         case '4':
1156                         case '5':
1157                         case '6':
1158                         case '7':
1159                         case '8':
1160                         case '9':
1161                                 put_back(input.c);
1162                                 input.c = '.';
1163                                 parse_number();
1164                                 return;
1165
1166                         case '.':
1167                                 MAYBE_PROLOG
1168                                 MAYBE('.', T_DOTDOTDOT)
1169                                 ELSE_CODE(
1170                                         put_back(input.c);
1171                                         input.c = '.';
1172                                         set_punctuator('.');
1173                                 )
1174                 ELSE('.')
1175         case '&':
1176                 MAYBE_PROLOG
1177                 MAYBE('&', T_ANDAND)
1178                 MAYBE('=', T_ANDEQUAL)
1179                 ELSE('&')
1180         case '*':
1181                 MAYBE_PROLOG
1182                 MAYBE('=', T_ASTERISKEQUAL)
1183                 ELSE('*')
1184         case '+':
1185                 MAYBE_PROLOG
1186                 MAYBE('+', T_PLUSPLUS)
1187                 MAYBE('=', T_PLUSEQUAL)
1188                 ELSE('+')
1189         case '-':
1190                 MAYBE_PROLOG
1191                 MAYBE('>', T_MINUSGREATER)
1192                 MAYBE('-', T_MINUSMINUS)
1193                 MAYBE('=', T_MINUSEQUAL)
1194                 ELSE('-')
1195         case '!':
1196                 MAYBE_PROLOG
1197                 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
1198                 ELSE('!')
1199         case '/':
1200                 MAYBE_PROLOG
1201                 MAYBE('=', T_SLASHEQUAL)
1202                 case '*':
1203                         next_char();
1204                         skip_multiline_comment();
1205                         goto restart;
1206                 case '/':
1207                         next_char();
1208                         skip_line_comment();
1209                         goto restart;
1210                 ELSE('/')
1211         case '%':
1212                 MAYBE_PROLOG
1213                 MAYBE_DIGRAPH('>', '}', symbol_percentgreater)
1214                 MAYBE('=', T_PERCENTEQUAL)
1215                 case ':':
1216                         MAYBE_PROLOG
1217                         case '%':
1218                                 MAYBE_PROLOG
1219                                 MAYBE_DIGRAPH(':', T_HASHHASH, symbol_percentcolonpercentcolon)
1220                                 ELSE_CODE(
1221                                         put_back(input.c);
1222                                         input.c = '%';
1223                                         goto digraph_percentcolon;
1224                                 )
1225                         ELSE_CODE(
1226 digraph_percentcolon:
1227                                 set_digraph('#', symbol_percentcolon);
1228                         )
1229                 ELSE('%')
1230         case '<':
1231                 MAYBE_PROLOG
1232                 MAYBE_DIGRAPH(':', '[', symbol_lesscolon)
1233                 MAYBE_DIGRAPH('%', '{', symbol_lesspercent)
1234                 MAYBE('=', T_LESSEQUAL)
1235                 case '<':
1236                         MAYBE_PROLOG
1237                         MAYBE('=', T_LESSLESSEQUAL)
1238                         ELSE(T_LESSLESS)
1239                 ELSE('<')
1240         case '>':
1241                 MAYBE_PROLOG
1242                 MAYBE('=', T_GREATEREQUAL)
1243                 case '>':
1244                         MAYBE_PROLOG
1245                         MAYBE('=', T_GREATERGREATEREQUAL)
1246                         ELSE(T_GREATERGREATER)
1247                 ELSE('>')
1248         case '^':
1249                 MAYBE_PROLOG
1250                 MAYBE('=', T_CARETEQUAL)
1251                 ELSE('^')
1252         case '|':
1253                 MAYBE_PROLOG
1254                 MAYBE('=', T_PIPEEQUAL)
1255                 MAYBE('|', T_PIPEPIPE)
1256                 ELSE('|')
1257         case ':':
1258                 MAYBE_PROLOG
1259                 MAYBE_DIGRAPH('>', ']', symbol_colongreater)
1260                 case ':':
1261                         if (c_mode & _CXX) {
1262                                 next_char();
1263                                 set_punctuator(T_COLONCOLON);
1264                                 return;
1265                         }
1266                         /* FALLTHROUGH */
1267                 ELSE(':')
1268         case '=':
1269                 MAYBE_PROLOG
1270                 MAYBE('=', T_EQUALEQUAL)
1271                 ELSE('=')
1272         case '#':
1273                 MAYBE_PROLOG
1274                 MAYBE('#', T_HASHHASH)
1275                 ELSE('#')
1276
1277         case '?':
1278         case '[':
1279         case ']':
1280         case '(':
1281         case ')':
1282         case '{':
1283         case '}':
1284         case '~':
1285         case ';':
1286         case ',':
1287                 set_punctuator(input.c);
1288                 next_char();
1289                 return;
1290
1291         case EOF:
1292                 if (input_stack != NULL) {
1293                         close_input();
1294                         pop_restore_input();
1295                         fputc('\n', out);
1296                         if (input.c == (utf32)EOF)
1297                                 --input.position.lineno;
1298                         print_line_directive(&input.position, "2");
1299                         goto restart;
1300                 } else {
1301                         info.at_line_begin = true;
1302                         set_punctuator(T_EOF);
1303                 }
1304                 return;
1305
1306         case '\\':
1307                 next_char();
1308                 int next_c = input.c;
1309                 put_back(input.c);
1310                 input.c = '\\';
1311                 if (next_c == 'U' || next_c == 'u') {
1312                         parse_symbol();
1313                         return;
1314                 }
1315                 /* FALLTHROUGH */
1316         default:
1317 dollar_sign:
1318                 if (error_on_unknown_chars) {
1319                         errorf(&pp_token.base.source_position,
1320                                "unknown character '%lc' found\n", input.c);
1321                         next_char();
1322                         goto restart;
1323                 } else {
1324                         assert(obstack_object_size(&symbol_obstack) == 0);
1325                         obstack_grow_utf8(&symbol_obstack, input.c);
1326                         obstack_1grow(&symbol_obstack, '\0');
1327                         char     *const string = obstack_finish(&symbol_obstack);
1328                         symbol_t *const symbol = symbol_table_insert(string);
1329                         if (symbol->string != string)
1330                                 obstack_free(&symbol_obstack, string);
1331
1332                         pp_token.kind        = T_UNKNOWN_CHAR;
1333                         pp_token.base.symbol = symbol;
1334                         next_char();
1335                         return;
1336                 }
1337         }
1338 }
1339
1340 static void print_quoted_string(const char *const string)
1341 {
1342         fputc('"', out);
1343         for (const char *c = string; *c != 0; ++c) {
1344                 switch (*c) {
1345                 case '"': fputs("\\\"", out); break;
1346                 case '\\':  fputs("\\\\", out); break;
1347                 case '\a':  fputs("\\a", out); break;
1348                 case '\b':  fputs("\\b", out); break;
1349                 case '\f':  fputs("\\f", out); break;
1350                 case '\n':  fputs("\\n", out); break;
1351                 case '\r':  fputs("\\r", out); break;
1352                 case '\t':  fputs("\\t", out); break;
1353                 case '\v':  fputs("\\v", out); break;
1354                 case '\?':  fputs("\\?", out); break;
1355                 default:
1356                         if (!isprint(*c)) {
1357                                 fprintf(out, "\\%03o", (unsigned)*c);
1358                                 break;
1359                         }
1360                         fputc(*c, out);
1361                         break;
1362                 }
1363         }
1364         fputc('"', out);
1365 }
1366
1367 static void print_line_directive(const source_position_t *pos, const char *add)
1368 {
1369         fprintf(out, "# %u ", pos->lineno);
1370         print_quoted_string(pos->input_name);
1371         if (add != NULL) {
1372                 fputc(' ', out);
1373                 fputs(add, out);
1374         }
1375
1376         printed_input_name = pos->input_name;
1377         input.output_line  = pos->lineno-1;
1378 }
1379
1380 static bool emit_newlines(void)
1381 {
1382         unsigned delta = pp_token.base.source_position.lineno - input.output_line;
1383         if (delta == 0)
1384                 return false;
1385
1386         if (delta >= 9) {
1387                 fputc('\n', out);
1388                 print_line_directive(&pp_token.base.source_position, NULL);
1389                 fputc('\n', out);
1390         } else {
1391                 for (unsigned i = 0; i < delta; ++i) {
1392                         fputc('\n', out);
1393                 }
1394         }
1395         input.output_line = pp_token.base.source_position.lineno;
1396
1397         for (unsigned i = 0; i < info.whitespace_at_line_begin; ++i)
1398                 fputc(' ', out);
1399
1400         return true;
1401 }
1402
1403 static void emit_pp_token(void)
1404 {
1405         if (!emit_newlines() &&
1406             (info.had_whitespace || tokens_would_paste(last_token, pp_token.kind)))
1407                 fputc(' ', out);
1408
1409         switch (pp_token.kind) {
1410         case T_NUMBER:
1411                 fputs(pp_token.literal.string.begin, out);
1412                 break;
1413
1414         case T_STRING_LITERAL:
1415                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1416                 fputc('"', out);
1417                 fputs(pp_token.literal.string.begin, out);
1418                 fputc('"', out);
1419                 break;
1420
1421         case T_CHARACTER_CONSTANT:
1422                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1423                 fputc('\'', out);
1424                 fputs(pp_token.literal.string.begin, out);
1425                 fputc('\'', out);
1426                 break;
1427
1428         case T_MACRO_PARAMETER:
1429                 panic("macro parameter not expanded");
1430
1431         default:
1432                 fputs(pp_token.base.symbol->string, out);
1433                 break;
1434         }
1435         last_token = pp_token.kind;
1436 }
1437
1438 static void eat_pp_directive(void)
1439 {
1440         while (!info.at_line_begin) {
1441                 next_input_token();
1442         }
1443 }
1444
1445 static bool strings_equal(const string_t *string1, const string_t *string2)
1446 {
1447         size_t size = string1->size;
1448         if (size != string2->size)
1449                 return false;
1450
1451         const char *c1 = string1->begin;
1452         const char *c2 = string2->begin;
1453         for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1454                 if (*c1 != *c2)
1455                         return false;
1456         }
1457         return true;
1458 }
1459
1460 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1461 {
1462         if (token1->kind != token2->kind)
1463                 return false;
1464
1465         switch (token1->kind) {
1466         case T_NUMBER:
1467         case T_CHARACTER_CONSTANT:
1468         case T_STRING_LITERAL:
1469                 return strings_equal(&token1->literal.string, &token2->literal.string);
1470
1471         case T_MACRO_PARAMETER:
1472                 return token1->macro_parameter.def->symbol
1473                     == token2->macro_parameter.def->symbol;
1474
1475         default:
1476                 return token1->base.symbol == token2->base.symbol;
1477         }
1478 }
1479
1480 static bool pp_definitions_equal(const pp_definition_t *definition1,
1481                                  const pp_definition_t *definition2)
1482 {
1483         if (definition1->list_len != definition2->list_len)
1484                 return false;
1485
1486         size_t               len = definition1->list_len;
1487         const saved_token_t *t1  = definition1->token_list;
1488         const saved_token_t *t2  = definition2->token_list;
1489         for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1490                 if (!pp_tokens_equal(&t1->token, &t2->token))
1491                         return false;
1492         }
1493         return true;
1494 }
1495
1496 static void parse_define_directive(void)
1497 {
1498         eat_pp(TP_define);
1499         if (skip_mode) {
1500                 eat_pp_directive();
1501                 return;
1502         }
1503
1504         assert(obstack_object_size(&pp_obstack) == 0);
1505
1506         if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
1507                 errorf(&pp_token.base.source_position,
1508                        "expected identifier after #define, got %K", &pp_token);
1509                 goto error_out;
1510         }
1511         symbol_t *const symbol = pp_token.base.symbol;
1512
1513         pp_definition_t *new_definition
1514                 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1515         memset(new_definition, 0, sizeof(new_definition[0]));
1516         new_definition->symbol          = symbol;
1517         new_definition->source_position = input.position;
1518
1519         /* this is probably the only place where spaces are significant in the
1520          * lexer (except for the fact that they separate tokens). #define b(x)
1521          * is something else than #define b (x) */
1522         if (input.c == '(') {
1523                 eat_token(T_IDENTIFIER);
1524                 eat_token('(');
1525
1526                 while (true) {
1527                         switch (pp_token.kind) {
1528                         case T_DOTDOTDOT:
1529                                 new_definition->is_variadic = true;
1530                                 eat_token(T_DOTDOTDOT);
1531                                 if (pp_token.kind != ')') {
1532                                         errorf(&input.position,
1533                                                         "'...' not at end of macro argument list");
1534                                         goto error_out;
1535                                 }
1536                                 break;
1537
1538                         case T_IDENTIFIER: {
1539                                 pp_definition_t parameter;
1540                                 memset(&parameter, 0, sizeof(parameter));
1541                                 parameter.source_position = pp_token.base.source_position;
1542                                 parameter.symbol          = pp_token.base.symbol;
1543                                 parameter.is_parameter    = true;
1544                                 obstack_grow(&pp_obstack, &parameter, sizeof(parameter));
1545                                 eat_token(T_IDENTIFIER);
1546
1547                                 if (pp_token.kind == ',') {
1548                                         eat_token(',');
1549                                         break;
1550                                 }
1551
1552                                 if (pp_token.kind != ')') {
1553                                         errorf(&pp_token.base.source_position,
1554                                                "expected ',' or ')' after identifier, got %K",
1555                                                &pp_token);
1556                                         goto error_out;
1557                                 }
1558                                 break;
1559                         }
1560
1561                         case ')':
1562                                 eat_token(')');
1563                                 goto finish_argument_list;
1564
1565                         default:
1566                                 errorf(&pp_token.base.source_position,
1567                                        "expected identifier, '...' or ')' in #define argument list, got %K",
1568                                        &pp_token);
1569                                 goto error_out;
1570                         }
1571                 }
1572
1573         finish_argument_list:
1574                 new_definition->has_parameters = true;
1575                 size_t size = obstack_object_size(&pp_obstack);
1576                 new_definition->n_parameters
1577                         = size / sizeof(new_definition->parameters[0]);
1578                 new_definition->parameters = obstack_finish(&pp_obstack);
1579                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1580                         pp_definition_t *param    = &new_definition->parameters[i];
1581                         symbol_t        *symbol   = param->symbol;
1582                         pp_definition_t *previous = symbol->pp_definition;
1583                         if (previous != NULL
1584                             && previous->function_definition == new_definition) {
1585                                 errorf(&param->source_position,
1586                                        "duplicate macro parameter '%Y'", symbol);
1587                                 param->symbol = sym_anonymous;
1588                                 continue;
1589                         }
1590                         param->parent_expansion    = previous;
1591                         param->function_definition = new_definition;
1592                         symbol->pp_definition      = param;
1593                 }
1594         } else {
1595                 eat_token(T_IDENTIFIER);
1596         }
1597
1598         /* construct token list */
1599         assert(obstack_object_size(&pp_obstack) == 0);
1600         while (!info.at_line_begin) {
1601                 if (pp_token.kind == T_IDENTIFIER) {
1602                         const symbol_t  *symbol     = pp_token.base.symbol;
1603                         pp_definition_t *definition = symbol->pp_definition;
1604                         if (definition != NULL
1605                             && definition->function_definition == new_definition) {
1606                             pp_token.kind                = T_MACRO_PARAMETER;
1607                             pp_token.macro_parameter.def = definition;
1608                         }
1609                 }
1610                 saved_token_t saved_token;
1611                 saved_token.token = pp_token;
1612                 saved_token.had_whitespace = info.had_whitespace;
1613                 obstack_grow(&pp_obstack, &saved_token, sizeof(saved_token));
1614                 next_input_token();
1615         }
1616
1617         new_definition->list_len   = obstack_object_size(&pp_obstack)
1618                 / sizeof(new_definition->token_list[0]);
1619         new_definition->token_list = obstack_finish(&pp_obstack);
1620
1621         if (new_definition->has_parameters) {
1622                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1623                         pp_definition_t *param      = &new_definition->parameters[i];
1624                         symbol_t        *symbol     = param->symbol;
1625                         if (symbol == sym_anonymous)
1626                                 continue;
1627                         assert(symbol->pp_definition == param);
1628                         assert(param->function_definition == new_definition);
1629                         symbol->pp_definition   = param->parent_expansion;
1630                         param->parent_expansion = NULL;
1631                 }
1632         }
1633
1634         pp_definition_t *old_definition = symbol->pp_definition;
1635         if (old_definition != NULL) {
1636                 if (!pp_definitions_equal(old_definition, new_definition)) {
1637                         warningf(WARN_OTHER, &input.position, "multiple definition of macro '%Y' (first defined %P)", symbol, &old_definition->source_position);
1638                 } else {
1639                         /* reuse the old definition */
1640                         obstack_free(&pp_obstack, new_definition);
1641                         new_definition = old_definition;
1642                 }
1643         }
1644
1645         symbol->pp_definition = new_definition;
1646         return;
1647
1648 error_out:
1649         if (obstack_object_size(&pp_obstack) > 0) {
1650                 char *ptr = obstack_finish(&pp_obstack);
1651                 obstack_free(&pp_obstack, ptr);
1652         }
1653         eat_pp_directive();
1654 }
1655
1656 static void parse_undef_directive(void)
1657 {
1658         eat_pp(TP_undef);
1659         if (skip_mode) {
1660                 eat_pp_directive();
1661                 return;
1662         }
1663
1664         if (pp_token.kind != T_IDENTIFIER) {
1665                 errorf(&input.position,
1666                        "expected identifier after #undef, got %K", &pp_token);
1667                 eat_pp_directive();
1668                 return;
1669         }
1670
1671         pp_token.base.symbol->pp_definition = NULL;
1672         eat_token(T_IDENTIFIER);
1673
1674         if (!info.at_line_begin) {
1675                 warningf(WARN_OTHER, &input.position, "extra tokens at end of #undef directive");
1676         }
1677         eat_pp_directive();
1678 }
1679
1680 /** behind an #include we can have the special headername lexems.
1681  * They're only allowed behind an #include so they're not recognized
1682  * by the normal next_preprocessing_token. We handle them as a special
1683  * exception here */
1684 static void parse_headername(void)
1685 {
1686         const source_position_t start_position = input.position;
1687         string_t                string         = { NULL, 0, STRING_ENCODING_CHAR };
1688         assert(obstack_object_size(&symbol_obstack) == 0);
1689
1690         if (info.at_line_begin) {
1691                 parse_error("expected headername after #include");
1692                 goto finish_error;
1693         }
1694
1695         /* check wether we have a "... or <... headername */
1696         switch (input.c) {
1697         {
1698                 utf32 delimiter;
1699         case '<': delimiter = '>'; goto parse_name;
1700         case '"': delimiter = '"'; goto parse_name;
1701 parse_name:
1702                 next_char();
1703                 while (true) {
1704                         switch (input.c) {
1705                         case NEWLINE:
1706                         case EOF:
1707                                 errorf(&pp_token.base.source_position, "header name without closing '%c'", (char)delimiter);
1708                                 goto finish_error;
1709
1710                         default:
1711                                 if (input.c == delimiter) {
1712                                         next_char();
1713                                         goto finished_headername;
1714                                 } else {
1715                                         obstack_1grow(&symbol_obstack, (char)input.c);
1716                                         next_char();
1717                                 }
1718                                 break;
1719                         }
1720                 }
1721                 /* we should never be here */
1722         }
1723
1724         default:
1725                 /* TODO: do normal pp_token parsing and concatenate results */
1726                 panic("pp_token concat include not implemented yet");
1727         }
1728
1729 finished_headername:
1730         string = sym_make_string(STRING_ENCODING_CHAR);
1731
1732 finish_error:
1733         pp_token.base.source_position = start_position;
1734         pp_token.kind                 = T_HEADERNAME;
1735         pp_token.literal.string       = string;
1736 }
1737
1738 static bool do_include(bool system_include, const char *headername)
1739 {
1740         size_t headername_len = strlen(headername);
1741         if (!system_include) {
1742                 /* put dirname of current input on obstack */
1743                 const char *filename   = input.position.input_name;
1744                 const char *last_slash = strrchr(filename, '/');
1745                 if (last_slash != NULL) {
1746                         size_t len = last_slash - filename;
1747                         obstack_grow(&symbol_obstack, filename, len + 1);
1748                         obstack_grow0(&symbol_obstack, headername, headername_len);
1749                         char *complete_path = obstack_finish(&symbol_obstack);
1750                         headername = identify_string(complete_path);
1751                 }
1752
1753                 FILE *file = fopen(headername, "r");
1754                 if (file != NULL) {
1755                         switch_input(file, headername);
1756                         return true;
1757                 }
1758         }
1759
1760         assert(obstack_object_size(&symbol_obstack) == 0);
1761         /* check searchpath */
1762         for (searchpath_entry_t *entry = searchpath; entry != NULL;
1763              entry = entry->next) {
1764             const char *path = entry->path;
1765             size_t      len  = strlen(path);
1766                 obstack_grow(&symbol_obstack, path, len);
1767                 if (path[len-1] != '/')
1768                         obstack_1grow(&symbol_obstack, '/');
1769                 obstack_grow(&symbol_obstack, headername, headername_len+1);
1770
1771                 char *complete_path = obstack_finish(&symbol_obstack);
1772                 FILE *file          = fopen(complete_path, "r");
1773                 if (file != NULL) {
1774                         const char *filename = identify_string(complete_path);
1775                         switch_input(file, filename);
1776                         return true;
1777                 } else {
1778                         obstack_free(&symbol_obstack, complete_path);
1779                 }
1780         }
1781
1782         return false;
1783 }
1784
1785 static void parse_include_directive(void)
1786 {
1787         if (skip_mode) {
1788                 eat_pp_directive();
1789                 return;
1790         }
1791
1792         /* don't eat the TP_include here!
1793          * we need an alternative parsing for the next token */
1794         skip_till_newline(true);
1795         bool system_include = input.c == '<';
1796         parse_headername();
1797         string_t headername = pp_token.literal.string;
1798         if (headername.begin == NULL) {
1799                 eat_pp_directive();
1800                 return;
1801         }
1802
1803         bool had_nonwhitespace = skip_till_newline(false);
1804         if (had_nonwhitespace) {
1805                 warningf(WARN_OTHER, &pp_token.base.source_position,
1806                          "extra tokens at end of #include directive");
1807         }
1808
1809         if (n_inputs > INCLUDE_LIMIT) {
1810                 errorf(&pp_token.base.source_position, "#include nested too deeply");
1811                 /* eat \n or EOF */
1812                 next_input_token();
1813                 return;
1814         }
1815
1816         /* switch inputs */
1817         info.whitespace_at_line_begin = 0;
1818         info.had_whitespace           = false;
1819         info.at_line_begin            = true;
1820         emit_newlines();
1821         push_input();
1822         bool res = do_include(system_include, pp_token.literal.string.begin);
1823         if (res) {
1824                 next_input_token();
1825         } else {
1826                 errorf(&pp_token.base.source_position, "failed including '%S': %s", &pp_token.literal.string, strerror(errno));
1827                 pop_restore_input();
1828         }
1829 }
1830
1831 static pp_conditional_t *push_conditional(void)
1832 {
1833         pp_conditional_t *conditional
1834                 = obstack_alloc(&pp_obstack, sizeof(*conditional));
1835         memset(conditional, 0, sizeof(*conditional));
1836
1837         conditional->parent = conditional_stack;
1838         conditional_stack   = conditional;
1839
1840         return conditional;
1841 }
1842
1843 static void pop_conditional(void)
1844 {
1845         assert(conditional_stack != NULL);
1846         conditional_stack = conditional_stack->parent;
1847 }
1848
1849 static void check_unclosed_conditionals(void)
1850 {
1851         while (conditional_stack != NULL) {
1852                 pp_conditional_t *conditional = conditional_stack;
1853
1854                 if (conditional->in_else) {
1855                         errorf(&conditional->source_position, "unterminated #else");
1856                 } else {
1857                         errorf(&conditional->source_position, "unterminated condition");
1858                 }
1859                 pop_conditional();
1860         }
1861 }
1862
1863 static void parse_ifdef_ifndef_directive(bool const is_ifdef)
1864 {
1865         bool condition;
1866         eat_pp(is_ifdef ? TP_ifdef : TP_ifndef);
1867
1868         if (skip_mode) {
1869                 eat_pp_directive();
1870                 pp_conditional_t *conditional = push_conditional();
1871                 conditional->source_position  = pp_token.base.source_position;
1872                 conditional->skip             = true;
1873                 return;
1874         }
1875
1876         if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
1877                 errorf(&pp_token.base.source_position,
1878                        "expected identifier after #%s, got %K",
1879                        is_ifdef ? "ifdef" : "ifndef", &pp_token);
1880                 eat_pp_directive();
1881
1882                 /* just take the true case in the hope to avoid further errors */
1883                 condition = true;
1884         } else {
1885                 /* evaluate wether we are in true or false case */
1886                 condition = (bool)pp_token.base.symbol->pp_definition == is_ifdef;
1887                 eat_token(T_IDENTIFIER);
1888
1889                 if (!info.at_line_begin) {
1890                         errorf(&pp_token.base.source_position,
1891                                "extra tokens at end of #%s",
1892                                is_ifdef ? "ifdef" : "ifndef");
1893                         eat_pp_directive();
1894                 }
1895         }
1896
1897         pp_conditional_t *conditional = push_conditional();
1898         conditional->source_position  = pp_token.base.source_position;
1899         conditional->condition        = condition;
1900
1901         if (!condition) {
1902                 skip_mode = true;
1903         }
1904 }
1905
1906 static void parse_else_directive(void)
1907 {
1908         eat_pp(TP_else);
1909
1910         if (!info.at_line_begin) {
1911                 if (!skip_mode) {
1912                         warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #else");
1913                 }
1914                 eat_pp_directive();
1915         }
1916
1917         pp_conditional_t *conditional = conditional_stack;
1918         if (conditional == NULL) {
1919                 errorf(&pp_token.base.source_position, "#else without prior #if");
1920                 return;
1921         }
1922
1923         if (conditional->in_else) {
1924                 errorf(&pp_token.base.source_position,
1925                        "#else after #else (condition started %P)",
1926                        &conditional->source_position);
1927                 skip_mode = true;
1928                 return;
1929         }
1930
1931         conditional->in_else = true;
1932         if (!conditional->skip) {
1933                 skip_mode = conditional->condition;
1934         }
1935         conditional->source_position = pp_token.base.source_position;
1936 }
1937
1938 static void parse_endif_directive(void)
1939 {
1940         eat_pp(TP_endif);
1941
1942         if (!info.at_line_begin) {
1943                 if (!skip_mode) {
1944                         warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #endif");
1945                 }
1946                 eat_pp_directive();
1947         }
1948
1949         pp_conditional_t *conditional = conditional_stack;
1950         if (conditional == NULL) {
1951                 errorf(&pp_token.base.source_position, "#endif without prior #if");
1952                 return;
1953         }
1954
1955         if (!conditional->skip) {
1956                 skip_mode = false;
1957         }
1958         pop_conditional();
1959 }
1960
1961 typedef enum stdc_pragma_kind_t {
1962         STDC_UNKNOWN,
1963         STDC_FP_CONTRACT,
1964         STDC_FENV_ACCESS,
1965         STDC_CX_LIMITED_RANGE
1966 } stdc_pragma_kind_t;
1967
1968 typedef enum stdc_pragma_value_kind_t {
1969         STDC_VALUE_UNKNOWN,
1970         STDC_VALUE_ON,
1971         STDC_VALUE_OFF,
1972         STDC_VALUE_DEFAULT
1973 } stdc_pragma_value_kind_t;
1974
1975 static void parse_pragma_directive(void)
1976 {
1977         eat_pp(TP_pragma);
1978
1979         if (pp_token.kind != T_IDENTIFIER) {
1980                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
1981                          "expected identifier after #pragma");
1982                 eat_pp_directive();
1983                 return;
1984         }
1985
1986         stdc_pragma_kind_t kind = STDC_UNKNOWN;
1987         if (pp_token.base.symbol->pp_ID == TP_STDC && c_mode & _C99) {
1988                 /* a STDC pragma */
1989                 next_input_token();
1990
1991                 switch (pp_token.base.symbol->pp_ID) {
1992                 case TP_FP_CONTRACT:      kind = STDC_FP_CONTRACT;      break;
1993                 case TP_FENV_ACCESS:      kind = STDC_FENV_ACCESS;      break;
1994                 case TP_CX_LIMITED_RANGE: kind = STDC_CX_LIMITED_RANGE; break;
1995                 default:                  break;
1996                 }
1997                 if (kind != STDC_UNKNOWN) {
1998                         next_input_token();
1999                         stdc_pragma_value_kind_t value;
2000                         switch (pp_token.base.symbol->pp_ID) {
2001                         case TP_ON:      value = STDC_VALUE_ON;      break;
2002                         case TP_OFF:     value = STDC_VALUE_OFF;     break;
2003                         case TP_DEFAULT: value = STDC_VALUE_DEFAULT; break;
2004                         default:         value = STDC_VALUE_UNKNOWN; break;
2005                         }
2006                         if (value == STDC_VALUE_UNKNOWN) {
2007                                 kind = STDC_UNKNOWN;
2008                                 errorf(&pp_token.base.source_position, "bad STDC pragma argument");
2009                         }
2010                 }
2011         }
2012         eat_pp_directive();
2013         if (kind == STDC_UNKNOWN) {
2014                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2015                          "encountered unknown #pragma");
2016         }
2017 }
2018
2019 static void parse_line_directive(void)
2020 {
2021         if (pp_token.kind != T_NUMBER) {
2022                 if (!skip_mode)
2023                         parse_error("expected integer");
2024         } else {
2025                 char      *end;
2026                 long const line = strtol(pp_token.literal.string.begin, &end, 0);
2027                 if (*end == '\0') {
2028                         /* use offset -1 as this is about the next line */
2029                         input.position.lineno = line - 1;
2030                         /* force output of line */
2031                         input.output_line = input.position.lineno - 20;
2032                 } else {
2033                         if (!skip_mode) {
2034                                 errorf(&input.position, "'%S' is not a valid line number",
2035                                            &pp_token.literal.string);
2036                         }
2037                 }
2038                 next_input_token();
2039         }
2040         if (pp_token.kind == T_STRING_LITERAL
2041             && pp_token.literal.string.encoding == STRING_ENCODING_CHAR) {
2042                 input.position.input_name       = pp_token.literal.string.begin;
2043                 input.position.is_system_header = false;
2044                 next_input_token();
2045
2046                 /* attempt to parse numeric flags as outputted by gcc preprocessor */
2047                 while (pp_token.kind == T_NUMBER) {
2048                         /* flags:
2049                          * 1 - indicates start of a new file
2050                          * 2 - indicates return from a file
2051                          * 3 - indicates system header
2052                          * 4 - indicates implicit extern "C" in C++ mode
2053                          *
2054                          * currently we're only interested in "3"
2055                          */
2056                         if (streq(pp_token.literal.string.begin, "3")) {
2057                                 input.position.is_system_header = true;
2058                         }
2059                         next_input_token();
2060                 }
2061         }
2062
2063         eat_pp_directive();
2064 }
2065
2066 static void parse_preprocessing_directive(void)
2067 {
2068         eat_token('#');
2069
2070         if (info.at_line_begin) {
2071                 /* empty directive */
2072                 return;
2073         }
2074
2075         if (pp_token.base.symbol) {
2076                 switch (pp_token.base.symbol->pp_ID) {
2077                 case TP_define:  parse_define_directive();            break;
2078                 case TP_else:    parse_else_directive();              break;
2079                 case TP_endif:   parse_endif_directive();             break;
2080                 case TP_ifdef:   parse_ifdef_ifndef_directive(true);  break;
2081                 case TP_ifndef:  parse_ifdef_ifndef_directive(false); break;
2082                 case TP_include: parse_include_directive();           break;
2083                 case TP_line:    next_input_token(); goto line_directive;
2084                 case TP_pragma:  parse_pragma_directive();            break;
2085                 case TP_undef:   parse_undef_directive();             break;
2086                 default:         goto skip;
2087                 }
2088         } else if (pp_token.kind == T_NUMBER) {
2089 line_directive:
2090                 parse_line_directive();
2091         } else {
2092 skip:
2093                 if (!skip_mode) {
2094                         errorf(&pp_token.base.source_position, "invalid preprocessing directive #%K", &pp_token);
2095                 }
2096                 eat_pp_directive();
2097         }
2098
2099         assert(info.at_line_begin);
2100 }
2101
2102 static void finish_current_argument(void)
2103 {
2104         if (current_argument == NULL)
2105                 return;
2106         size_t size = obstack_object_size(&pp_obstack);
2107         current_argument->list_len   = size/sizeof(current_argument->token_list[0]);
2108         current_argument->token_list = obstack_finish(&pp_obstack);
2109 }
2110
2111 static void next_preprocessing_token(void)
2112 {
2113 restart:
2114         if (!expand_next()) {
2115                 do {
2116                         next_input_token();
2117                         while (pp_token.kind == '#' && info.at_line_begin) {
2118                                 parse_preprocessing_directive();
2119                         }
2120                 } while (skip_mode && pp_token.kind != T_EOF);
2121         }
2122
2123         const token_kind_t kind = pp_token.kind;
2124         if (current_call == NULL || argument_expanding != NULL) {
2125                 if (kind == T_IDENTIFIER) {
2126                         symbol_t        *const symbol        = pp_token.base.symbol;
2127                         pp_definition_t *const pp_definition = symbol->pp_definition;
2128                         if (pp_definition != NULL && !pp_definition->is_expanding) {
2129                                 if (pp_definition->has_parameters) {
2130
2131                                         /* check if next token is a '(' */
2132                                         whitespace_info_t old_info   = info;
2133                                         token_kind_t      next_token = peek_expansion();
2134                                         if (next_token == T_EOF) {
2135                                                 info.at_line_begin  = false;
2136                                                 info.had_whitespace = false;
2137                                                 skip_whitespace();
2138                                                 if (input.c == '(') {
2139                                                         next_token = '(';
2140                                                 }
2141                                         }
2142
2143                                         if (next_token == '(') {
2144                                                 if (current_expansion == NULL)
2145                                                         expansion_pos = pp_token.base.source_position;
2146                                                 next_preprocessing_token();
2147                                                 assert(pp_token.kind == '(');
2148
2149                                                 pp_definition->parent_expansion = current_expansion;
2150                                                 current_call              = pp_definition;
2151                                                 current_call->expand_pos  = 0;
2152                                                 current_call->expand_info = old_info;
2153                                                 if (current_call->n_parameters > 0) {
2154                                                         current_argument = &current_call->parameters[0];
2155                                                         assert(argument_brace_count == 0);
2156                                                 }
2157                                                 goto restart;
2158                                         } else {
2159                                                 /* skip_whitespaces() skipped newlines and whitespace,
2160                                                  * remember results for next token */
2161                                                 next_info = info;
2162                                                 info      = old_info;
2163                                                 return;
2164                                         }
2165                                 } else {
2166                                         if (current_expansion == NULL)
2167                                                 expansion_pos = pp_token.base.source_position;
2168                                         start_expanding(pp_definition);
2169                                         goto restart;
2170                                 }
2171                         }
2172                 } else if (kind == T_MACRO_PARAMETER) {
2173                         assert(current_expansion != NULL);
2174                         start_expanding(pp_token.macro_parameter.def);
2175                         goto restart;
2176                 }
2177         }
2178
2179         if (current_call != NULL) {
2180                 /* current_call != NULL */
2181                 if (kind == '(') {
2182                         ++argument_brace_count;
2183                 } else if (kind == ')') {
2184                         if (argument_brace_count > 0) {
2185                                 --argument_brace_count;
2186                         } else {
2187                                 finish_current_argument();
2188                                 assert(kind == ')');
2189                                 start_expanding(current_call);
2190                                 info = current_call->expand_info;
2191                                 current_call     = NULL;
2192                                 current_argument = NULL;
2193                                 goto restart;
2194                         }
2195                 } else if (kind == ',' && argument_brace_count == 0) {
2196                         finish_current_argument();
2197                         current_call->expand_pos++;
2198                         if (current_call->expand_pos >= current_call->n_parameters) {
2199                                 errorf(&pp_token.base.source_position,
2200                                            "too many arguments passed for macro '%Y'",
2201                                            current_call->symbol);
2202                                 current_argument = NULL;
2203                         } else {
2204                                 current_argument
2205                                         = &current_call->parameters[current_call->expand_pos];
2206                         }
2207                         goto restart;
2208                 } else if (kind == T_MACRO_PARAMETER) {
2209                         /* parameters have to be fully expanded before being used as
2210                          * parameters for another macro-call */
2211                         assert(current_expansion != NULL);
2212                         pp_definition_t *argument = pp_token.macro_parameter.def;
2213                         argument_expanding = argument;
2214                         start_expanding(argument);
2215                         goto restart;
2216                 } else if (kind == T_EOF) {
2217                         errorf(&expansion_pos,
2218                                "reached end of file while parsing arguments for '%Y'",
2219                                current_call->symbol);
2220                         return;
2221                 }
2222                 if (current_argument != NULL) {
2223                         saved_token_t saved;
2224                         saved.token = pp_token;
2225                         saved.had_whitespace = info.had_whitespace;
2226                         obstack_grow(&pp_obstack, &saved, sizeof(saved));
2227                 }
2228                 goto restart;
2229         }
2230 }
2231
2232
2233 static void prepend_include_path(const char *path)
2234 {
2235         searchpath_entry_t *entry = OALLOCZ(&config_obstack, searchpath_entry_t);
2236         entry->path = path;
2237         entry->next = searchpath;
2238         searchpath  = entry;
2239 }
2240
2241 static void setup_include_path(void)
2242 {
2243         /* built-in paths */
2244         prepend_include_path("/usr/include");
2245
2246         /* parse environment variable */
2247         const char *cpath = getenv("CPATH");
2248         if (cpath != NULL && *cpath != '\0') {
2249                 const char *begin = cpath;
2250                 const char *c;
2251                 do {
2252                         c = begin;
2253                         while (*c != '\0' && *c != ':')
2254                                 ++c;
2255
2256                         size_t len = c-begin;
2257                         if (len == 0) {
2258                                 /* for gcc compatibility (Matze: I would expect that
2259                                  * nothing happens for an empty entry...) */
2260                                 prepend_include_path(".");
2261                         } else {
2262                                 char *string = obstack_alloc(&config_obstack, len+1);
2263                                 memcpy(string, begin, len);
2264                                 string[len] = '\0';
2265
2266                                 prepend_include_path(string);
2267                         }
2268
2269                         begin = c+1;
2270                         /* skip : */
2271                         if (*begin == ':')
2272                                 ++begin;
2273                 } while(*c != '\0');
2274         }
2275 }
2276
2277 int pptest_main(int argc, char **argv);
2278 int pptest_main(int argc, char **argv)
2279 {
2280         init_symbol_table();
2281         init_tokens();
2282         init_symbols();
2283
2284         obstack_init(&config_obstack);
2285         obstack_init(&pp_obstack);
2286         obstack_init(&input_obstack);
2287         strset_init(&stringset);
2288
2289         error_on_unknown_chars = false;
2290
2291         setup_include_path();
2292
2293         /* simplistic commandline parser */
2294         const char *filename = NULL;
2295         const char *output = NULL;
2296         for (int i = 1; i < argc; ++i) {
2297                 const char *opt = argv[i];
2298                 if (streq(opt, "-I")) {
2299                         prepend_include_path(argv[++i]);
2300                         continue;
2301                 } else if (streq(opt, "-E")) {
2302                         /* ignore */
2303                 } else if (streq(opt, "-o")) {
2304                         output = argv[++i];
2305                         continue;
2306                 } else if (opt[0] == '-') {
2307                         fprintf(stderr, "Unknown option '%s'\n", opt);
2308                 } else {
2309                         if (filename != NULL)
2310                                 fprintf(stderr, "Multiple inputs not supported\n");
2311                         filename = argv[i];
2312                 }
2313         }
2314         if (filename == NULL) {
2315                 fprintf(stderr, "No input specified\n");
2316                 return 1;
2317         }
2318
2319         if (output == NULL) {
2320                 out = stdout;
2321         } else {
2322                 out = fopen(output, "w");
2323                 if (out == NULL) {
2324                         fprintf(stderr, "Couldn't open output '%s'\n", output);
2325                         return 1;
2326                 }
2327         }
2328
2329         /* just here for gcc compatibility */
2330         fprintf(out, "# 1 \"%s\"\n", filename);
2331         fprintf(out, "# 1 \"<built-in>\"\n");
2332         fprintf(out, "# 1 \"<command-line>\"\n");
2333
2334         FILE *file = fopen(filename, "r");
2335         if (file == NULL) {
2336                 fprintf(stderr, "Couldn't open input '%s'\n", filename);
2337                 return 1;
2338         }
2339         switch_input(file, filename);
2340
2341         for (;;) {
2342                 next_preprocessing_token();
2343                 if (pp_token.kind == T_EOF)
2344                         break;
2345                 emit_pp_token();
2346         }
2347
2348         fputc('\n', out);
2349         check_unclosed_conditionals();
2350         close_input();
2351         if (out != stdout)
2352                 fclose(out);
2353
2354         obstack_free(&input_obstack, NULL);
2355         obstack_free(&pp_obstack, NULL);
2356         obstack_free(&config_obstack, NULL);
2357
2358         strset_destroy(&stringset);
2359
2360         exit_tokens();
2361         exit_symbol_table();
2362
2363         return 0;
2364 }