nsz Git - cparser/blob - preprocessor.c

   1 #include <config.h>
   2
   3 #include <assert.h>
   4 #include <errno.h>
   5 #include <string.h>
   6 #include <stdbool.h>
   7 #include <ctype.h>
   8
   9 #include "preprocessor.h"
  10 #include "token_t.h"
  11 #include "symbol_t.h"
  12 #include "adt/util.h"
  13 #include "adt/error.h"
  14 #include "adt/strutil.h"
  15 #include "adt/strset.h"
  16 #include "lang_features.h"
  17 #include "diagnostic.h"
  18 #include "string_rep.h"
  19 #include "input.h"
  20
  21 #define MAX_PUTBACK 3
  22 #define INCLUDE_LIMIT 199  /* 199 is for gcc "compatibility" */
  23
  24 typedef struct saved_token_t {
  25         token_t token;
  26         bool    had_whitespace;
  27 } saved_token_t;
  28
  29 typedef struct whitespace_info_t {
  30         /** current token had whitespace in front of it */
  31         bool     had_whitespace;
  32         /** current token is at the beginning of a line.
  33          * => a "#" at line begin starts a preprocessing directive. */
  34         bool     at_line_begin;
  35         /** number of spaces before the first token in a line */
  36         unsigned whitespace_at_line_begin;
  37 } whitespace_info_t;
  38
  39 struct pp_definition_t {
  40         symbol_t          *symbol;
  41         source_position_t  source_position;
  42         pp_definition_t   *parent_expansion;
  43         size_t             expand_pos;
  44         whitespace_info_t  expand_info;
  45         bool               is_variadic    : 1;
  46         bool               is_expanding   : 1;
  47         bool               has_parameters : 1;
  48         bool               is_parameter   : 1;
  49         pp_definition_t   *function_definition;
  50         size_t             n_parameters;
  51         pp_definition_t   *parameters;
  52
  53         /* replacement */
  54         size_t             list_len;
  55         saved_token_t     *token_list;
  56 };
  57
  58 typedef struct pp_conditional_t pp_conditional_t;
  59 struct pp_conditional_t {
  60         source_position_t  source_position;
  61         bool               condition;
  62         bool               in_else;
  63         /** conditional in skip mode (then+else gets skipped) */
  64         bool               skip;
  65         pp_conditional_t  *parent;
  66 };
  67
  68 typedef struct pp_input_t pp_input_t;
  69 struct pp_input_t {
  70         FILE              *file;
  71         input_t           *input;
  72         utf32              c;
  73         utf32              buf[1024+MAX_PUTBACK];
  74         const utf32       *bufend;
  75         const utf32       *bufpos;
  76         source_position_t  position;
  77         pp_input_t        *parent;
  78         unsigned           output_line;
  79 };
  80
  81 typedef struct searchpath_entry_t searchpath_entry_t;
  82 struct searchpath_entry_t {
  83         const char         *path;
  84         searchpath_entry_t *next;
  85 };
  86
  87 static pp_input_t      input;
  88
  89 static pp_input_t     *input_stack;
  90 static unsigned        n_inputs;
  91 static struct obstack  input_obstack;
  92
  93 static pp_conditional_t *conditional_stack;
  94
  95 token_t                  pp_token;
  96 bool                     allow_dollar_in_symbol   = true;
  97 static bool              resolve_escape_sequences = true;
  98 static bool              error_on_unknown_chars   = true;
  99 static bool              skip_mode;
 100 static FILE             *out;
 101 static struct obstack    pp_obstack;
 102 static struct obstack    config_obstack;
 103 static const char       *printed_input_name = NULL;
 104 static source_position_t expansion_pos;
 105 static pp_definition_t  *current_expansion  = NULL;
 106 static pp_definition_t  *current_call       = NULL;
 107 static pp_definition_t  *current_argument   = NULL;
 108 static pp_definition_t  *argument_expanding = NULL;
 109 static unsigned          argument_brace_count;
 110 static strset_t          stringset;
 111 static token_kind_t      last_token;
 112
 113 static searchpath_entry_t *searchpath;
 114
 115 static whitespace_info_t next_info; /* valid if had_whitespace is true */
 116 static whitespace_info_t info;
 117
 118 static inline void next_char(void);
 119 static void next_input_token(void);
 120 static void print_line_directive(const source_position_t *pos, const char *add);
 121
 122 static symbol_t *symbol_colongreater;
 123 static symbol_t *symbol_lesscolon;
 124 static symbol_t *symbol_lesspercent;
 125 static symbol_t *symbol_percentcolon;
 126 static symbol_t *symbol_percentcolonpercentcolon;
 127 static symbol_t *symbol_percentgreater;
 128
 129 static void init_symbols(void)
 130 {
 131         symbol_colongreater             = symbol_table_insert(":>");
 132         symbol_lesscolon                = symbol_table_insert("<:");
 133         symbol_lesspercent              = symbol_table_insert("<%");
 134         symbol_percentcolon             = symbol_table_insert("%:");
 135         symbol_percentcolonpercentcolon = symbol_table_insert("%:%:");
 136         symbol_percentgreater           = symbol_table_insert("%>");
 137 }
 138
 139 void switch_input(FILE *const file, char const *const filename)
 140 {
 141         input.file                = file;
 142         input.input               = input_from_stream(file, NULL);
 143         input.bufend              = NULL;
 144         input.bufpos              = NULL;
 145         input.output_line         = 0;
 146         input.position.input_name = filename;
 147         input.position.lineno     = 1;
 148
 149         /* indicate that we're at a new input */
 150         print_line_directive(&input.position, input_stack != NULL ? "1" : NULL);
 151
 152         /* place a virtual '\n' so we realize we're at line begin */
 153         input.position.lineno = 0;
 154         input.c               = '\n';
 155 }
 156
 157 FILE* close_input(void)
 158 {
 159         input_free(input.input);
 160
 161         FILE* const file = input.file;
 162         assert(file);
 163
 164         input.input  = NULL;
 165         input.file   = NULL;
 166         input.bufend = NULL;
 167         input.bufpos = NULL;
 168         input.c      = EOF;
 169
 170         return file;
 171 }
 172
 173 static void push_input(void)
 174 {
 175         pp_input_t *saved_input
 176                 = obstack_alloc(&input_obstack, sizeof(*saved_input));
 177
 178         memcpy(saved_input, &input, sizeof(*saved_input));
 179
 180         /* adjust buffer positions */
 181         if (input.bufpos != NULL)
 182                 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
 183         if (input.bufend != NULL)
 184                 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
 185
 186         saved_input->parent = input_stack;
 187         input_stack         = saved_input;
 188         ++n_inputs;
 189 }
 190
 191 static void pop_restore_input(void)
 192 {
 193         assert(n_inputs > 0);
 194         assert(input_stack != NULL);
 195
 196         pp_input_t *saved_input = input_stack;
 197
 198         memcpy(&input, saved_input, sizeof(input));
 199         input.parent = NULL;
 200
 201         /* adjust buffer positions */
 202         if (saved_input->bufpos != NULL)
 203                 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
 204         if (saved_input->bufend != NULL)
 205                 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
 206
 207         input_stack = saved_input->parent;
 208         obstack_free(&input_obstack, saved_input);
 209         --n_inputs;
 210 }
 211
 212 /**
 213  * Prints a parse error message at the current token.
 214  *
 215  * @param msg   the error message
 216  */
 217 static void parse_error(const char *msg)
 218 {
 219         errorf(&pp_token.base.source_position,  "%s", msg);
 220 }
 221
 222 static inline void next_real_char(void)
 223 {
 224         assert(input.bufpos <= input.bufend);
 225         if (input.bufpos >= input.bufend) {
 226                 size_t const n = decode(input.input, input.buf + MAX_PUTBACK, lengthof(input.buf) - MAX_PUTBACK);
 227                 if (n == 0) {
 228                         input.c = EOF;
 229                         return;
 230                 }
 231                 input.bufpos = input.buf + MAX_PUTBACK;
 232                 input.bufend = input.bufpos + n;
 233         }
 234         input.c = *input.bufpos++;
 235         ++input.position.colno;
 236 }
 237
 238 /**
 239  * Put a character back into the buffer.
 240  *
 241  * @param pc  the character to put back
 242  */
 243 static inline void put_back(utf32 const pc)
 244 {
 245         assert(input.bufpos > input.buf);
 246         *(--input.bufpos - input.buf + input.buf) = (char) pc;
 247         --input.position.colno;
 248 }
 249
 250 #define NEWLINE \
 251         '\r': \
 252                 next_char(); \
 253                 if (input.c == '\n') { \
 254         case '\n': \
 255                         next_char(); \
 256                 } \
 257                 ++input.position.lineno; \
 258                 input.position.colno = 1; \
 259                 goto newline; \
 260                 newline // Let it look like an ordinary case label.
 261
 262 #define eat(c_type) (assert(input.c == c_type), next_char())
 263
 264 static void maybe_concat_lines(void)
 265 {
 266         eat('\\');
 267
 268         switch (input.c) {
 269         case NEWLINE:
 270                 info.whitespace_at_line_begin = 0;
 271                 return;
 272
 273         default:
 274                 break;
 275         }
 276
 277         put_back(input.c);
 278         input.c = '\\';
 279 }
 280
 281 /**
 282  * Set c to the next input character, ie.
 283  * after expanding trigraphs.
 284  */
 285 static inline void next_char(void)
 286 {
 287         next_real_char();
 288
 289         /* filter trigraphs and concatenated lines */
 290         if (UNLIKELY(input.c == '\\')) {
 291                 maybe_concat_lines();
 292                 goto end_of_next_char;
 293         }
 294
 295         if (LIKELY(input.c != '?'))
 296                 goto end_of_next_char;
 297
 298         next_real_char();
 299         if (LIKELY(input.c != '?')) {
 300                 put_back(input.c);
 301                 input.c = '?';
 302                 goto end_of_next_char;
 303         }
 304
 305         next_real_char();
 306         switch (input.c) {
 307         case '=': input.c = '#'; break;
 308         case '(': input.c = '['; break;
 309         case '/': input.c = '\\'; maybe_concat_lines(); break;
 310         case ')': input.c = ']'; break;
 311         case '\'': input.c = '^'; break;
 312         case '<': input.c = '{'; break;
 313         case '!': input.c = '|'; break;
 314         case '>': input.c = '}'; break;
 315         case '-': input.c = '~'; break;
 316         default:
 317                 put_back(input.c);
 318                 put_back('?');
 319                 input.c = '?';
 320                 break;
 321         }
 322
 323 end_of_next_char:;
 324 #ifdef DEBUG_CHARS
 325         printf("nchar '%c'\n", input.c);
 326 #endif
 327 }
 328
 329
 330
 331 /**
 332  * Returns true if the given char is a octal digit.
 333  *
 334  * @param char  the character to check
 335  */
 336 static inline bool is_octal_digit(int chr)
 337 {
 338         switch (chr) {
 339         case '0':
 340         case '1':
 341         case '2':
 342         case '3':
 343         case '4':
 344         case '5':
 345         case '6':
 346         case '7':
 347                 return true;
 348         default:
 349                 return false;
 350         }
 351 }
 352
 353 /**
 354  * Returns the value of a digit.
 355  * The only portable way to do it ...
 356  */
 357 static int digit_value(int digit)
 358 {
 359         switch (digit) {
 360         case '0': return 0;
 361         case '1': return 1;
 362         case '2': return 2;
 363         case '3': return 3;
 364         case '4': return 4;
 365         case '5': return 5;
 366         case '6': return 6;
 367         case '7': return 7;
 368         case '8': return 8;
 369         case '9': return 9;
 370         case 'a':
 371         case 'A': return 10;
 372         case 'b':
 373         case 'B': return 11;
 374         case 'c':
 375         case 'C': return 12;
 376         case 'd':
 377         case 'D': return 13;
 378         case 'e':
 379         case 'E': return 14;
 380         case 'f':
 381         case 'F': return 15;
 382         default:
 383                 panic("wrong character given");
 384         }
 385 }
 386
 387 /**
 388  * Parses an octal character sequence.
 389  *
 390  * @param first_digit  the already read first digit
 391  */
 392 static utf32 parse_octal_sequence(const utf32 first_digit)
 393 {
 394         assert(is_octal_digit(first_digit));
 395         utf32 value = digit_value(first_digit);
 396         if (!is_octal_digit(input.c)) return value;
 397         value = 8 * value + digit_value(input.c);
 398         next_char();
 399         if (!is_octal_digit(input.c)) return value;
 400         value = 8 * value + digit_value(input.c);
 401         next_char();
 402         return value;
 403
 404 }
 405
 406 /**
 407  * Parses a hex character sequence.
 408  */
 409 static utf32 parse_hex_sequence(void)
 410 {
 411         utf32 value = 0;
 412         while (isxdigit(input.c)) {
 413                 value = 16 * value + digit_value(input.c);
 414                 next_char();
 415         }
 416         return value;
 417 }
 418
 419 static bool is_universal_char_valid(utf32 const v)
 420 {
 421         /* C11 §6.4.3:2 */
 422         if (v < 0xA0U && v != 0x24 && v != 0x40 && v != 0x60)
 423                 return false;
 424         if (0xD800 <= v && v <= 0xDFFF)
 425                 return false;
 426         return true;
 427 }
 428
 429 static utf32 parse_universal_char(unsigned const n_digits)
 430 {
 431         utf32 v = 0;
 432         for (unsigned k = n_digits; k != 0; --k) {
 433                 if (isxdigit(input.c)) {
 434                         v = 16 * v + digit_value(input.c);
 435                         if (!resolve_escape_sequences)
 436                                 obstack_1grow(&symbol_obstack, input.c);
 437                         next_char();
 438                 } else {
 439                         errorf(&input.position,
 440                                "short universal character name, expected %u more digits",
 441                                    k);
 442                         break;
 443                 }
 444         }
 445         if (!is_universal_char_valid(v)) {
 446                 errorf(&input.position,
 447                        "\\%c%0*X is not a valid universal character name",
 448                        n_digits == 4 ? 'u' : 'U', (int)n_digits, v);
 449         }
 450         return v;
 451 }
 452
 453 static bool is_universal_char_valid_identifier(utf32 const v)
 454 {
 455         /* C11 Annex D.1 */
 456         if (                v == 0x000A8) return true;
 457         if (                v == 0x000AA) return true;
 458         if (                v == 0x000AD) return true;
 459         if (                v == 0x000AF) return true;
 460         if (0x000B2 <= v && v <= 0x000B5) return true;
 461         if (0x000B7 <= v && v <= 0x000BA) return true;
 462         if (0x000BC <= v && v <= 0x000BE) return true;
 463         if (0x000C0 <= v && v <= 0x000D6) return true;
 464         if (0x000D8 <= v && v <= 0x000F6) return true;
 465         if (0x000F8 <= v && v <= 0x000FF) return true;
 466         if (0x00100 <= v && v <= 0x0167F) return true;
 467         if (0x01681 <= v && v <= 0x0180D) return true;
 468         if (0x0180F <= v && v <= 0x01FFF) return true;
 469         if (0x0200B <= v && v <= 0x0200D) return true;
 470         if (0x0202A <= v && v <= 0x0202E) return true;
 471         if (0x0203F <= v && v <= 0x02040) return true;
 472         if (                v == 0x02054) return true;
 473         if (0x02060 <= v && v <= 0x0206F) return true;
 474         if (0x02070 <= v && v <= 0x0218F) return true;
 475         if (0x02460 <= v && v <= 0x024FF) return true;
 476         if (0x02776 <= v && v <= 0x02793) return true;
 477         if (0x02C00 <= v && v <= 0x02DFF) return true;
 478         if (0x02E80 <= v && v <= 0x02FFF) return true;
 479         if (0x03004 <= v && v <= 0x03007) return true;
 480         if (0x03021 <= v && v <= 0x0302F) return true;
 481         if (0x03031 <= v && v <= 0x0303F) return true;
 482         if (0x03040 <= v && v <= 0x0D7FF) return true;
 483         if (0x0F900 <= v && v <= 0x0FD3D) return true;
 484         if (0x0FD40 <= v && v <= 0x0FDCF) return true;
 485         if (0x0FDF0 <= v && v <= 0x0FE44) return true;
 486         if (0x0FE47 <= v && v <= 0x0FFFD) return true;
 487         if (0x10000 <= v && v <= 0x1FFFD) return true;
 488         if (0x20000 <= v && v <= 0x2FFFD) return true;
 489         if (0x30000 <= v && v <= 0x3FFFD) return true;
 490         if (0x40000 <= v && v <= 0x4FFFD) return true;
 491         if (0x50000 <= v && v <= 0x5FFFD) return true;
 492         if (0x60000 <= v && v <= 0x6FFFD) return true;
 493         if (0x70000 <= v && v <= 0x7FFFD) return true;
 494         if (0x80000 <= v && v <= 0x8FFFD) return true;
 495         if (0x90000 <= v && v <= 0x9FFFD) return true;
 496         if (0xA0000 <= v && v <= 0xAFFFD) return true;
 497         if (0xB0000 <= v && v <= 0xBFFFD) return true;
 498         if (0xC0000 <= v && v <= 0xCFFFD) return true;
 499         if (0xD0000 <= v && v <= 0xDFFFD) return true;
 500         if (0xE0000 <= v && v <= 0xEFFFD) return true;
 501         return false;
 502 }
 503
 504 static bool is_universal_char_valid_identifier_start(utf32 const v)
 505 {
 506         /* C11 Annex D.2 */
 507         if (0x0300 <= v && v <= 0x036F) return false;
 508         if (0x1DC0 <= v && v <= 0x1DFF) return false;
 509         if (0x20D0 <= v && v <= 0x20FF) return false;
 510         if (0xFE20 <= v && v <= 0xFE2F) return false;
 511         return true;
 512 }
 513
 514 /**
 515  * Parse an escape sequence.
 516  */
 517 static utf32 parse_escape_sequence(void)
 518 {
 519         eat('\\');
 520
 521         utf32 const ec = input.c;
 522         next_char();
 523
 524         switch (ec) {
 525         case '"':  return '"';
 526         case '\'': return '\'';
 527         case '\\': return '\\';
 528         case '?': return '\?';
 529         case 'a': return '\a';
 530         case 'b': return '\b';
 531         case 'f': return '\f';
 532         case 'n': return '\n';
 533         case 'r': return '\r';
 534         case 't': return '\t';
 535         case 'v': return '\v';
 536         case 'x':
 537                 return parse_hex_sequence();
 538         case '0':
 539         case '1':
 540         case '2':
 541         case '3':
 542         case '4':
 543         case '5':
 544         case '6':
 545         case '7':
 546                 return parse_octal_sequence(ec);
 547         case EOF:
 548                 parse_error("reached end of file while parsing escape sequence");
 549                 return EOF;
 550         /* \E is not documented, but handled, by GCC.  It is acceptable according
 551          * to §6.11.4, whereas \e is not. */
 552         case 'E':
 553         case 'e':
 554                 if (c_mode & _GNUC)
 555                         return 27;   /* hopefully 27 is ALWAYS the code for ESCAPE */
 556                 break;
 557
 558         case 'U': return parse_universal_char(8);
 559         case 'u': return parse_universal_char(4);
 560
 561         default:
 562                 break;
 563         }
 564         /* §6.4.4.4:8 footnote 64 */
 565         parse_error("unknown escape sequence");
 566         return EOF;
 567 }
 568
 569 static const char *identify_string(char *string)
 570 {
 571         const char *result = strset_insert(&stringset, string);
 572         if (result != string) {
 573                 obstack_free(&symbol_obstack, string);
 574         }
 575         return result;
 576 }
 577
 578 static string_t sym_make_string(string_encoding_t const enc)
 579 {
 580         obstack_1grow(&symbol_obstack, '\0');
 581         size_t      const len    = obstack_object_size(&symbol_obstack) - 1;
 582         char       *const string = obstack_finish(&symbol_obstack);
 583         char const *const result = identify_string(string);
 584         return (string_t){ result, len, enc };
 585 }
 586
 587 string_t make_string(char const *const string)
 588 {
 589         obstack_grow(&symbol_obstack, string, strlen(string));
 590         return sym_make_string(STRING_ENCODING_CHAR);
 591 }
 592
 593 static void parse_string(utf32 const delimiter, token_kind_t const kind,
 594                          string_encoding_t const enc,
 595                          char const *const context)
 596 {
 597         const unsigned start_linenr = input.position.lineno;
 598
 599         eat(delimiter);
 600
 601         while (true) {
 602                 switch (input.c) {
 603                 case '\\': {
 604                         if (resolve_escape_sequences) {
 605                                 utf32 const tc = parse_escape_sequence();
 606                                 if (enc == STRING_ENCODING_CHAR) {
 607                                         if (tc >= 0x100) {
 608                                                 warningf(WARN_OTHER, &pp_token.base.source_position, "escape sequence out of range");
 609                                         }
 610                                         obstack_1grow(&symbol_obstack, tc);
 611                                 } else {
 612                                         obstack_grow_utf8(&symbol_obstack, tc);
 613                                 }
 614                         } else {
 615                                 obstack_1grow(&symbol_obstack, (char)input.c);
 616                                 next_char();
 617                                 obstack_1grow(&symbol_obstack, (char)input.c);
 618                                 next_char();
 619                         }
 620                         break;
 621                 }
 622
 623                 case NEWLINE:
 624                         errorf(&pp_token.base.source_position, "newline while parsing %s", context);
 625                         break;
 626
 627                 case EOF: {
 628                         source_position_t source_position;
 629                         source_position.input_name = pp_token.base.source_position.input_name;
 630                         source_position.lineno     = start_linenr;
 631                         errorf(&source_position, "EOF while parsing %s", context);
 632                         goto end_of_string;
 633                 }
 634
 635                 default:
 636                         if (input.c == delimiter) {
 637                                 next_char();
 638                                 goto end_of_string;
 639                         } else {
 640                                 obstack_grow_utf8(&symbol_obstack, input.c);
 641                                 next_char();
 642                                 break;
 643                         }
 644                 }
 645         }
 646
 647 end_of_string:
 648         pp_token.kind           = kind;
 649         pp_token.literal.string = sym_make_string(enc);
 650 }
 651
 652 static void parse_string_literal(string_encoding_t const enc)
 653 {
 654         parse_string('"', T_STRING_LITERAL, enc, "string literal");
 655 }
 656
 657 static void parse_character_constant(string_encoding_t const enc)
 658 {
 659         parse_string('\'', T_CHARACTER_CONSTANT, enc, "character constant");
 660         if (pp_token.literal.string.size == 0) {
 661                 parse_error("empty character constant");
 662         }
 663 }
 664
 665 #define SYMBOL_CASES_WITHOUT_E_P \
 666              '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
 667         case 'a': \
 668         case 'b': \
 669         case 'c': \
 670         case 'd': \
 671         case 'f': \
 672         case 'g': \
 673         case 'h': \
 674         case 'i': \
 675         case 'j': \
 676         case 'k': \
 677         case 'l': \
 678         case 'm': \
 679         case 'n': \
 680         case 'o': \
 681         case 'q': \
 682         case 'r': \
 683         case 's': \
 684         case 't': \
 685         case 'u': \
 686         case 'v': \
 687         case 'w': \
 688         case 'x': \
 689         case 'y': \
 690         case 'z': \
 691         case 'A': \
 692         case 'B': \
 693         case 'C': \
 694         case 'D': \
 695         case 'F': \
 696         case 'G': \
 697         case 'H': \
 698         case 'I': \
 699         case 'J': \
 700         case 'K': \
 701         case 'L': \
 702         case 'M': \
 703         case 'N': \
 704         case 'O': \
 705         case 'Q': \
 706         case 'R': \
 707         case 'S': \
 708         case 'T': \
 709         case 'U': \
 710         case 'V': \
 711         case 'W': \
 712         case 'X': \
 713         case 'Y': \
 714         case 'Z': \
 715         case '_'
 716
 717 #define SYMBOL_CASES \
 718              SYMBOL_CASES_WITHOUT_E_P: \
 719         case 'e': \
 720         case 'p': \
 721         case 'E': \
 722         case 'P'
 723
 724 #define DIGIT_CASES \
 725              '0':  \
 726         case '1':  \
 727         case '2':  \
 728         case '3':  \
 729         case '4':  \
 730         case '5':  \
 731         case '6':  \
 732         case '7':  \
 733         case '8':  \
 734         case '9'
 735
 736 static void start_expanding(pp_definition_t *definition)
 737 {
 738         definition->parent_expansion = current_expansion;
 739         definition->expand_pos       = 0;
 740         definition->is_expanding     = true;
 741         if (definition->list_len > 0) {
 742                 definition->token_list[0].had_whitespace
 743                         = info.had_whitespace;
 744         }
 745         current_expansion = definition;
 746 }
 747
 748 static void finished_expanding(pp_definition_t *definition)
 749 {
 750         assert(definition->is_expanding);
 751         pp_definition_t *parent = definition->parent_expansion;
 752         definition->parent_expansion = NULL;
 753         definition->is_expanding     = false;
 754
 755         /* stop further expanding once we expanded a parameter used in a
 756          * sub macro-call */
 757         if (definition == argument_expanding)
 758                 argument_expanding = NULL;
 759
 760         assert(current_expansion == definition);
 761         current_expansion = parent;
 762 }
 763
 764 static inline void set_punctuator(token_kind_t const kind)
 765 {
 766         pp_token.kind        = kind;
 767         pp_token.base.symbol = token_symbols[kind];
 768 }
 769
 770 static inline void set_digraph(token_kind_t const kind, symbol_t *const symbol)
 771 {
 772         pp_token.kind        = kind;
 773         pp_token.base.symbol = symbol;
 774 }
 775
 776 /**
 777  * returns next final token from a preprocessor macro expansion
 778  */
 779 static bool expand_next(void)
 780 {
 781         if (current_expansion == NULL)
 782                 return false;
 783
 784 restart:;
 785         size_t pos = current_expansion->expand_pos;
 786         if (pos >= current_expansion->list_len) {
 787                 finished_expanding(current_expansion);
 788                 /* it was the outermost expansion, parse pptoken normally */
 789                 if (current_expansion == NULL) {
 790                         return false;
 791                 }
 792                 goto restart;
 793         }
 794         const saved_token_t *saved = &current_expansion->token_list[pos++];
 795         pp_token = saved->token;
 796
 797         if (current_expansion->expand_pos > 0)
 798                 info.had_whitespace = saved->had_whitespace;
 799         pp_token.base.source_position = expansion_pos;
 800         ++current_expansion->expand_pos;
 801
 802         return true;
 803 }
 804
 805 /**
 806  * Returns the next token kind found when continuing the current expansions
 807  * without starting new sub-expansions.
 808  */
 809 static token_kind_t peek_expansion(void)
 810 {
 811         pp_definition_t *expansion = current_expansion;
 812         while (expansion != NULL && expansion->expand_pos >= expansion->list_len) {
 813                 expansion = expansion->parent_expansion;
 814         }
 815         if (expansion == NULL)
 816                 return T_EOF;
 817         return expansion->token_list[expansion->expand_pos].token.kind;
 818 }
 819
 820 static void skip_line_comment(void)
 821 {
 822         info.had_whitespace = true;
 823         while (true) {
 824                 switch (input.c) {
 825                 case EOF:
 826                         return;
 827
 828                 case '\r':
 829                 case '\n':
 830                         return;
 831
 832                 default:
 833                         next_char();
 834                         break;
 835                 }
 836         }
 837 }
 838
 839 static void skip_multiline_comment(void)
 840 {
 841         info.had_whitespace = true;
 842
 843         unsigned start_linenr = input.position.lineno;
 844         while (true) {
 845                 switch (input.c) {
 846                 case '/':
 847                         next_char();
 848                         if (input.c == '*') {
 849                                 /* TODO: nested comment, warn here */
 850                         }
 851                         break;
 852                 case '*':
 853                         next_char();
 854                         if (input.c == '/') {
 855                                 if (input.position.lineno != input.output_line)
 856                                         info.whitespace_at_line_begin = input.position.colno;
 857                                 next_char();
 858                                 return;
 859                         }
 860                         break;
 861
 862                 case NEWLINE:
 863                         break;
 864
 865                 case EOF: {
 866                         source_position_t source_position;
 867                         source_position.input_name = pp_token.base.source_position.input_name;
 868                         source_position.lineno     = start_linenr;
 869                         errorf(&source_position, "at end of file while looking for comment end");
 870                         return;
 871                 }
 872
 873                 default:
 874                         next_char();
 875                         break;
 876                 }
 877         }
 878 }
 879
 880 static bool skip_till_newline(bool stop_at_non_whitespace)
 881 {
 882         bool res = false;
 883         while (true) {
 884                 switch (input.c) {
 885                 case ' ':
 886                 case '\t':
 887                         next_char();
 888                         continue;
 889
 890                 case '/':
 891                         next_char();
 892                         if (input.c == '/') {
 893                                 next_char();
 894                                 skip_line_comment();
 895                                 continue;
 896                         } else if (input.c == '*') {
 897                                 next_char();
 898                                 skip_multiline_comment();
 899                                 continue;
 900                         } else {
 901                                 put_back(input.c);
 902                                 input.c = '/';
 903                         }
 904                         return true;
 905
 906                 case NEWLINE:
 907                         return res;
 908
 909                 default:
 910                         if (stop_at_non_whitespace)
 911                                 return false;
 912                         res = true;
 913                         next_char();
 914                         continue;
 915                 }
 916         }
 917 }
 918
 919 static void skip_whitespace(void)
 920 {
 921         while (true) {
 922                 switch (input.c) {
 923                 case ' ':
 924                 case '\t':
 925                         ++info.whitespace_at_line_begin;
 926                         info.had_whitespace = true;
 927                         next_char();
 928                         continue;
 929
 930                 case NEWLINE:
 931                         info.at_line_begin  = true;
 932                         info.had_whitespace = true;
 933                         info.whitespace_at_line_begin = 0;
 934                         continue;
 935
 936                 case '/':
 937                         next_char();
 938                         if (input.c == '/') {
 939                                 next_char();
 940                                 skip_line_comment();
 941                                 continue;
 942                         } else if (input.c == '*') {
 943                                 next_char();
 944                                 skip_multiline_comment();
 945                                 continue;
 946                         } else {
 947                                 put_back(input.c);
 948                                 input.c = '/';
 949                         }
 950                         return;
 951
 952                 default:
 953                         return;
 954                 }
 955         }
 956 }
 957
 958 static inline void eat_pp(pp_token_kind_t const kind)
 959 {
 960         assert(pp_token.base.symbol->pp_ID == kind);
 961         (void) kind;
 962         next_input_token();
 963 }
 964
 965 static inline void eat_token(token_kind_t const kind)
 966 {
 967         assert(pp_token.kind == kind);
 968         (void)kind;
 969         next_input_token();
 970 }
 971
 972 static void parse_symbol(void)
 973 {
 974         assert(obstack_object_size(&symbol_obstack) == 0);
 975         while (true) {
 976                 switch (input.c) {
 977                 case DIGIT_CASES:
 978                 case SYMBOL_CASES:
 979                         obstack_1grow(&symbol_obstack, (char) input.c);
 980                         next_char();
 981                         break;
 982
 983                 case '\\':
 984                         next_char();
 985                         switch (input.c) {
 986                         {
 987                                 unsigned n;
 988                         case 'U': n = 8; goto universal;
 989                         case 'u': n = 4; goto universal;
 990 universal:
 991                                 if (!resolve_escape_sequences) {
 992                                         obstack_1grow(&symbol_obstack, '\\');
 993                                         obstack_1grow(&symbol_obstack, input.c);
 994                                 }
 995                                 next_char();
 996                                 utf32 const v = parse_universal_char(n);
 997                                 if (!is_universal_char_valid_identifier(v)) {
 998                                         if (is_universal_char_valid(v)) {
 999                                                 errorf(&input.position,
1000                                                            "universal character \\%c%0*X is not valid in an identifier",
1001                                                            n == 4 ? 'u' : 'U', (int)n, v);
1002                                         }
1003                                 } else if (obstack_object_size(&symbol_obstack) == 0 && !is_universal_char_valid_identifier_start(v)) {
1004                                         errorf(&input.position,
1005                                                    "universal character \\%c%0*X is not valid as start of an identifier",
1006                                                    n == 4 ? 'u' : 'U', (int)n, v);
1007                                 } else if (resolve_escape_sequences) {
1008                                         obstack_grow_utf8(&symbol_obstack, v);
1009                                 }
1010                                 break;
1011                         }
1012
1013                         default:
1014                                 put_back(input.c);
1015                                 input.c = '\\';
1016                                 goto end_symbol;
1017                         }
1018
1019                 default:
1020 dollar_sign:
1021                         goto end_symbol;
1022                 }
1023         }
1024
1025 end_symbol:
1026         obstack_1grow(&symbol_obstack, '\0');
1027         char *string = obstack_finish(&symbol_obstack);
1028
1029         /* might be a wide string or character constant ( L"string"/L'c' ) */
1030         if (input.c == '"' && string[0] == 'L' && string[1] == '\0') {
1031                 obstack_free(&symbol_obstack, string);
1032                 parse_string_literal(STRING_ENCODING_WIDE);
1033                 return;
1034         } else if (input.c == '\'' && string[0] == 'L' && string[1] == '\0') {
1035                 obstack_free(&symbol_obstack, string);
1036                 parse_character_constant(STRING_ENCODING_WIDE);
1037                 return;
1038         }
1039
1040         symbol_t *symbol = symbol_table_insert(string);
1041
1042         pp_token.kind        = symbol->ID;
1043         pp_token.base.symbol = symbol;
1044
1045         /* we can free the memory from symbol obstack if we already had an entry in
1046          * the symbol table */
1047         if (symbol->string != string) {
1048                 obstack_free(&symbol_obstack, string);
1049         }
1050 }
1051
1052 static void parse_number(void)
1053 {
1054         obstack_1grow(&symbol_obstack, (char) input.c);
1055         next_char();
1056
1057         while (true) {
1058                 switch (input.c) {
1059                 case '.':
1060                 case DIGIT_CASES:
1061                 case SYMBOL_CASES_WITHOUT_E_P:
1062                         obstack_1grow(&symbol_obstack, (char) input.c);
1063                         next_char();
1064                         break;
1065
1066                 case 'e':
1067                 case 'p':
1068                 case 'E':
1069                 case 'P':
1070                         obstack_1grow(&symbol_obstack, (char) input.c);
1071                         next_char();
1072                         if (input.c == '+' || input.c == '-') {
1073                                 obstack_1grow(&symbol_obstack, (char) input.c);
1074                                 next_char();
1075                         }
1076                         break;
1077
1078                 default:
1079 dollar_sign:
1080                         goto end_number;
1081                 }
1082         }
1083
1084 end_number:
1085         pp_token.kind           = T_NUMBER;
1086         pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
1087 }
1088
1089 #define MAYBE_PROLOG \
1090         next_char(); \
1091         switch (input.c) {
1092
1093 #define MAYBE(ch, kind) \
1094         case ch: \
1095                 next_char(); \
1096                 set_punctuator(kind); \
1097                 return;
1098
1099 #define MAYBE_DIGRAPH(ch, kind, symbol) \
1100         case ch: \
1101                 next_char(); \
1102                 set_digraph(kind, symbol); \
1103                 return;
1104
1105 #define ELSE_CODE(code) \
1106         default: \
1107                 code \
1108         }
1109
1110 #define ELSE(kind) ELSE_CODE(set_punctuator(kind); return;)
1111
1112 /** identifies and returns the next preprocessing token contained in the
1113  * input stream. No macro expansion is performed. */
1114 static void next_input_token(void)
1115 {
1116         if (next_info.had_whitespace) {
1117                 info = next_info;
1118                 next_info.had_whitespace = false;
1119         } else {
1120                 info.at_line_begin  = false;
1121                 info.had_whitespace = false;
1122         }
1123 restart:
1124         pp_token.base.source_position = input.position;
1125         pp_token.base.symbol          = NULL;
1126
1127         switch (input.c) {
1128         case ' ':
1129         case '\t':
1130                 info.whitespace_at_line_begin++;
1131                 info.had_whitespace = true;
1132                 next_char();
1133                 goto restart;
1134
1135         case NEWLINE:
1136                 info.at_line_begin            = true;
1137                 info.had_whitespace           = true;
1138                 info.whitespace_at_line_begin = 0;
1139                 goto restart;
1140
1141         case SYMBOL_CASES:
1142                 parse_symbol();
1143                 return;
1144
1145         case DIGIT_CASES:
1146                 parse_number();
1147                 return;
1148
1149         case '"':
1150                 parse_string_literal(STRING_ENCODING_CHAR);
1151                 return;
1152
1153         case '\'':
1154                 parse_character_constant(STRING_ENCODING_CHAR);
1155                 return;
1156
1157         case '.':
1158                 MAYBE_PROLOG
1159                         case '0':
1160                         case '1':
1161                         case '2':
1162                         case '3':
1163                         case '4':
1164                         case '5':
1165                         case '6':
1166                         case '7':
1167                         case '8':
1168                         case '9':
1169                                 put_back(input.c);
1170                                 input.c = '.';
1171                                 parse_number();
1172                                 return;
1173
1174                         case '.':
1175                                 MAYBE_PROLOG
1176                                 MAYBE('.', T_DOTDOTDOT)
1177                                 ELSE_CODE(
1178                                         put_back(input.c);
1179                                         input.c = '.';
1180                                         set_punctuator('.');
1181                                         return;
1182                                 )
1183                 ELSE('.')
1184         case '&':
1185                 MAYBE_PROLOG
1186                 MAYBE('&', T_ANDAND)
1187                 MAYBE('=', T_ANDEQUAL)
1188                 ELSE('&')
1189         case '*':
1190                 MAYBE_PROLOG
1191                 MAYBE('=', T_ASTERISKEQUAL)
1192                 ELSE('*')
1193         case '+':
1194                 MAYBE_PROLOG
1195                 MAYBE('+', T_PLUSPLUS)
1196                 MAYBE('=', T_PLUSEQUAL)
1197                 ELSE('+')
1198         case '-':
1199                 MAYBE_PROLOG
1200                 MAYBE('>', T_MINUSGREATER)
1201                 MAYBE('-', T_MINUSMINUS)
1202                 MAYBE('=', T_MINUSEQUAL)
1203                 ELSE('-')
1204         case '!':
1205                 MAYBE_PROLOG
1206                 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
1207                 ELSE('!')
1208         case '/':
1209                 MAYBE_PROLOG
1210                 MAYBE('=', T_SLASHEQUAL)
1211                 case '*':
1212                         next_char();
1213                         skip_multiline_comment();
1214                         goto restart;
1215                 case '/':
1216                         next_char();
1217                         skip_line_comment();
1218                         goto restart;
1219                 ELSE('/')
1220         case '%':
1221                 MAYBE_PROLOG
1222                 MAYBE_DIGRAPH('>', '}', symbol_percentgreater)
1223                 MAYBE('=', T_PERCENTEQUAL)
1224                 case ':':
1225                         MAYBE_PROLOG
1226                         case '%':
1227                                 MAYBE_PROLOG
1228                                 MAYBE_DIGRAPH(':', T_HASHHASH, symbol_percentcolonpercentcolon)
1229                                 ELSE_CODE(
1230                                         put_back(input.c);
1231                                         input.c = '%';
1232                                         goto digraph_percentcolon;
1233                                 )
1234                         ELSE_CODE(
1235 digraph_percentcolon:
1236                                 set_digraph('#', symbol_percentcolon);
1237                                 return;
1238                         )
1239                 ELSE('%')
1240         case '<':
1241                 MAYBE_PROLOG
1242                 MAYBE_DIGRAPH(':', '[', symbol_lesscolon)
1243                 MAYBE_DIGRAPH('%', '{', symbol_lesspercent)
1244                 MAYBE('=', T_LESSEQUAL)
1245                 case '<':
1246                         MAYBE_PROLOG
1247                         MAYBE('=', T_LESSLESSEQUAL)
1248                         ELSE(T_LESSLESS)
1249                 ELSE('<')
1250         case '>':
1251                 MAYBE_PROLOG
1252                 MAYBE('=', T_GREATEREQUAL)
1253                 case '>':
1254                         MAYBE_PROLOG
1255                         MAYBE('=', T_GREATERGREATEREQUAL)
1256                         ELSE(T_GREATERGREATER)
1257                 ELSE('>')
1258         case '^':
1259                 MAYBE_PROLOG
1260                 MAYBE('=', T_CARETEQUAL)
1261                 ELSE('^')
1262         case '|':
1263                 MAYBE_PROLOG
1264                 MAYBE('=', T_PIPEEQUAL)
1265                 MAYBE('|', T_PIPEPIPE)
1266                 ELSE('|')
1267         case ':':
1268                 MAYBE_PROLOG
1269                 MAYBE_DIGRAPH('>', ']', symbol_colongreater)
1270                 case ':':
1271                         if (c_mode & _CXX) {
1272                                 next_char();
1273                                 set_punctuator(T_COLONCOLON);
1274                                 return;
1275                         }
1276                         /* FALLTHROUGH */
1277                 ELSE(':')
1278         case '=':
1279                 MAYBE_PROLOG
1280                 MAYBE('=', T_EQUALEQUAL)
1281                 ELSE('=')
1282         case '#':
1283                 MAYBE_PROLOG
1284                 MAYBE('#', T_HASHHASH)
1285                 ELSE('#')
1286
1287         case '?':
1288         case '[':
1289         case ']':
1290         case '(':
1291         case ')':
1292         case '{':
1293         case '}':
1294         case '~':
1295         case ';':
1296         case ',':
1297                 set_punctuator(input.c);
1298                 next_char();
1299                 return;
1300
1301         case EOF:
1302                 if (input_stack != NULL) {
1303                         fclose(close_input());
1304                         pop_restore_input();
1305                         fputc('\n', out);
1306                         if (input.c == (utf32)EOF)
1307                                 --input.position.lineno;
1308                         print_line_directive(&input.position, "2");
1309                         goto restart;
1310                 } else {
1311                         info.at_line_begin = true;
1312                         set_punctuator(T_EOF);
1313                 }
1314                 return;
1315
1316         case '\\':
1317                 next_char();
1318                 int next_c = input.c;
1319                 put_back(input.c);
1320                 input.c = '\\';
1321                 if (next_c == 'U' || next_c == 'u') {
1322                         parse_symbol();
1323                         return;
1324                 }
1325                 /* FALLTHROUGH */
1326         default:
1327 dollar_sign:
1328                 if (error_on_unknown_chars) {
1329                         errorf(&pp_token.base.source_position,
1330                                "unknown character '%lc' found\n", input.c);
1331                         next_char();
1332                         goto restart;
1333                 } else {
1334                         assert(obstack_object_size(&symbol_obstack) == 0);
1335                         obstack_grow_utf8(&symbol_obstack, input.c);
1336                         obstack_1grow(&symbol_obstack, '\0');
1337                         char     *const string = obstack_finish(&symbol_obstack);
1338                         symbol_t *const symbol = symbol_table_insert(string);
1339                         if (symbol->string != string)
1340                                 obstack_free(&symbol_obstack, string);
1341
1342                         pp_token.kind        = T_UNKNOWN_CHAR;
1343                         pp_token.base.symbol = symbol;
1344                         next_char();
1345                         return;
1346                 }
1347         }
1348 }
1349
1350 static void print_quoted_string(const char *const string)
1351 {
1352         fputc('"', out);
1353         for (const char *c = string; *c != 0; ++c) {
1354                 switch (*c) {
1355                 case '"': fputs("\\\"", out); break;
1356                 case '\\':  fputs("\\\\", out); break;
1357                 case '\a':  fputs("\\a", out); break;
1358                 case '\b':  fputs("\\b", out); break;
1359                 case '\f':  fputs("\\f", out); break;
1360                 case '\n':  fputs("\\n", out); break;
1361                 case '\r':  fputs("\\r", out); break;
1362                 case '\t':  fputs("\\t", out); break;
1363                 case '\v':  fputs("\\v", out); break;
1364                 case '\?':  fputs("\\?", out); break;
1365                 default:
1366                         if (!isprint(*c)) {
1367                                 fprintf(out, "\\%03o", (unsigned)*c);
1368                                 break;
1369                         }
1370                         fputc(*c, out);
1371                         break;
1372                 }
1373         }
1374         fputc('"', out);
1375 }
1376
1377 static void print_line_directive(const source_position_t *pos, const char *add)
1378 {
1379         if (!out)
1380                 return;
1381
1382         fprintf(out, "# %u ", pos->lineno);
1383         print_quoted_string(pos->input_name);
1384         if (add != NULL) {
1385                 fputc(' ', out);
1386                 fputs(add, out);
1387         }
1388
1389         printed_input_name = pos->input_name;
1390         input.output_line  = pos->lineno-1;
1391 }
1392
1393 static bool emit_newlines(void)
1394 {
1395         unsigned delta = pp_token.base.source_position.lineno - input.output_line;
1396         if (delta == 0)
1397                 return false;
1398
1399         if (delta >= 9) {
1400                 fputc('\n', out);
1401                 print_line_directive(&pp_token.base.source_position, NULL);
1402                 fputc('\n', out);
1403         } else {
1404                 for (unsigned i = 0; i < delta; ++i) {
1405                         fputc('\n', out);
1406                 }
1407         }
1408         input.output_line = pp_token.base.source_position.lineno;
1409
1410         for (unsigned i = 0; i < info.whitespace_at_line_begin; ++i)
1411                 fputc(' ', out);
1412
1413         return true;
1414 }
1415
1416 static void emit_pp_token(void)
1417 {
1418         if (!emit_newlines() &&
1419             (info.had_whitespace || tokens_would_paste(last_token, pp_token.kind)))
1420                 fputc(' ', out);
1421
1422         switch (pp_token.kind) {
1423         case T_NUMBER:
1424                 fputs(pp_token.literal.string.begin, out);
1425                 break;
1426
1427         case T_STRING_LITERAL:
1428                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1429                 fputc('"', out);
1430                 fputs(pp_token.literal.string.begin, out);
1431                 fputc('"', out);
1432                 break;
1433
1434         case T_CHARACTER_CONSTANT:
1435                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1436                 fputc('\'', out);
1437                 fputs(pp_token.literal.string.begin, out);
1438                 fputc('\'', out);
1439                 break;
1440
1441         case T_MACRO_PARAMETER:
1442                 panic("macro parameter not expanded");
1443
1444         default:
1445                 fputs(pp_token.base.symbol->string, out);
1446                 break;
1447         }
1448         last_token = pp_token.kind;
1449 }
1450
1451 static void eat_pp_directive(void)
1452 {
1453         while (!info.at_line_begin) {
1454                 next_input_token();
1455         }
1456 }
1457
1458 static bool strings_equal(const string_t *string1, const string_t *string2)
1459 {
1460         size_t size = string1->size;
1461         if (size != string2->size)
1462                 return false;
1463
1464         const char *c1 = string1->begin;
1465         const char *c2 = string2->begin;
1466         for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1467                 if (*c1 != *c2)
1468                         return false;
1469         }
1470         return true;
1471 }
1472
1473 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1474 {
1475         if (token1->kind != token2->kind)
1476                 return false;
1477
1478         switch (token1->kind) {
1479         case T_NUMBER:
1480         case T_CHARACTER_CONSTANT:
1481         case T_STRING_LITERAL:
1482                 return strings_equal(&token1->literal.string, &token2->literal.string);
1483
1484         case T_MACRO_PARAMETER:
1485                 return token1->macro_parameter.def->symbol
1486                     == token2->macro_parameter.def->symbol;
1487
1488         default:
1489                 return token1->base.symbol == token2->base.symbol;
1490         }
1491 }
1492
1493 static bool pp_definitions_equal(const pp_definition_t *definition1,
1494                                  const pp_definition_t *definition2)
1495 {
1496         if (definition1->list_len != definition2->list_len)
1497                 return false;
1498
1499         size_t               len = definition1->list_len;
1500         const saved_token_t *t1  = definition1->token_list;
1501         const saved_token_t *t2  = definition2->token_list;
1502         for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1503                 if (!pp_tokens_equal(&t1->token, &t2->token))
1504                         return false;
1505         }
1506         return true;
1507 }
1508
1509 static void parse_define_directive(void)
1510 {
1511         eat_pp(TP_define);
1512         if (skip_mode) {
1513                 eat_pp_directive();
1514                 return;
1515         }
1516
1517         assert(obstack_object_size(&pp_obstack) == 0);
1518
1519         if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
1520                 errorf(&pp_token.base.source_position,
1521                        "expected identifier after #define, got %K", &pp_token);
1522                 goto error_out;
1523         }
1524         symbol_t *const symbol = pp_token.base.symbol;
1525
1526         pp_definition_t *new_definition
1527                 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1528         memset(new_definition, 0, sizeof(new_definition[0]));
1529         new_definition->symbol          = symbol;
1530         new_definition->source_position = input.position;
1531
1532         /* this is probably the only place where spaces are significant in the
1533          * lexer (except for the fact that they separate tokens). #define b(x)
1534          * is something else than #define b (x) */
1535         if (input.c == '(') {
1536                 eat_token(T_IDENTIFIER);
1537                 eat_token('(');
1538
1539                 while (true) {
1540                         switch (pp_token.kind) {
1541                         case T_DOTDOTDOT:
1542                                 new_definition->is_variadic = true;
1543                                 eat_token(T_DOTDOTDOT);
1544                                 if (pp_token.kind != ')') {
1545                                         errorf(&input.position,
1546                                                         "'...' not at end of macro argument list");
1547                                         goto error_out;
1548                                 }
1549                                 break;
1550
1551                         case T_IDENTIFIER: {
1552                                 pp_definition_t parameter;
1553                                 memset(&parameter, 0, sizeof(parameter));
1554                                 parameter.source_position = pp_token.base.source_position;
1555                                 parameter.symbol          = pp_token.base.symbol;
1556                                 parameter.is_parameter    = true;
1557                                 obstack_grow(&pp_obstack, &parameter, sizeof(parameter));
1558                                 eat_token(T_IDENTIFIER);
1559
1560                                 if (pp_token.kind == ',') {
1561                                         eat_token(',');
1562                                         break;
1563                                 }
1564
1565                                 if (pp_token.kind != ')') {
1566                                         errorf(&pp_token.base.source_position,
1567                                                "expected ',' or ')' after identifier, got %K",
1568                                                &pp_token);
1569                                         goto error_out;
1570                                 }
1571                                 break;
1572                         }
1573
1574                         case ')':
1575                                 eat_token(')');
1576                                 goto finish_argument_list;
1577
1578                         default:
1579                                 errorf(&pp_token.base.source_position,
1580                                        "expected identifier, '...' or ')' in #define argument list, got %K",
1581                                        &pp_token);
1582                                 goto error_out;
1583                         }
1584                 }
1585
1586         finish_argument_list:
1587                 new_definition->has_parameters = true;
1588                 size_t size = obstack_object_size(&pp_obstack);
1589                 new_definition->n_parameters
1590                         = size / sizeof(new_definition->parameters[0]);
1591                 new_definition->parameters = obstack_finish(&pp_obstack);
1592                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1593                         pp_definition_t *param    = &new_definition->parameters[i];
1594                         symbol_t        *symbol   = param->symbol;
1595                         pp_definition_t *previous = symbol->pp_definition;
1596                         if (previous != NULL
1597                             && previous->function_definition == new_definition) {
1598                                 errorf(&param->source_position,
1599                                        "duplicate macro parameter '%Y'", symbol);
1600                                 param->symbol = sym_anonymous;
1601                                 continue;
1602                         }
1603                         param->parent_expansion    = previous;
1604                         param->function_definition = new_definition;
1605                         symbol->pp_definition      = param;
1606                 }
1607         } else {
1608                 eat_token(T_IDENTIFIER);
1609         }
1610
1611         /* construct token list */
1612         assert(obstack_object_size(&pp_obstack) == 0);
1613         while (!info.at_line_begin) {
1614                 if (pp_token.kind == T_IDENTIFIER) {
1615                         const symbol_t  *symbol     = pp_token.base.symbol;
1616                         pp_definition_t *definition = symbol->pp_definition;
1617                         if (definition != NULL
1618                             && definition->function_definition == new_definition) {
1619                             pp_token.kind                = T_MACRO_PARAMETER;
1620                             pp_token.macro_parameter.def = definition;
1621                         }
1622                 }
1623                 saved_token_t saved_token;
1624                 saved_token.token = pp_token;
1625                 saved_token.had_whitespace = info.had_whitespace;
1626                 obstack_grow(&pp_obstack, &saved_token, sizeof(saved_token));
1627                 next_input_token();
1628         }
1629
1630         new_definition->list_len   = obstack_object_size(&pp_obstack)
1631                 / sizeof(new_definition->token_list[0]);
1632         new_definition->token_list = obstack_finish(&pp_obstack);
1633
1634         if (new_definition->has_parameters) {
1635                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1636                         pp_definition_t *param      = &new_definition->parameters[i];
1637                         symbol_t        *symbol     = param->symbol;
1638                         if (symbol == sym_anonymous)
1639                                 continue;
1640                         assert(symbol->pp_definition == param);
1641                         assert(param->function_definition == new_definition);
1642                         symbol->pp_definition   = param->parent_expansion;
1643                         param->parent_expansion = NULL;
1644                 }
1645         }
1646
1647         pp_definition_t *old_definition = symbol->pp_definition;
1648         if (old_definition != NULL) {
1649                 if (!pp_definitions_equal(old_definition, new_definition)) {
1650                         warningf(WARN_OTHER, &input.position, "multiple definition of macro '%Y' (first defined %P)", symbol, &old_definition->source_position);
1651                 } else {
1652                         /* reuse the old definition */
1653                         obstack_free(&pp_obstack, new_definition);
1654                         new_definition = old_definition;
1655                 }
1656         }
1657
1658         symbol->pp_definition = new_definition;
1659         return;
1660
1661 error_out:
1662         if (obstack_object_size(&pp_obstack) > 0) {
1663                 char *ptr = obstack_finish(&pp_obstack);
1664                 obstack_free(&pp_obstack, ptr);
1665         }
1666         eat_pp_directive();
1667 }
1668
1669 static void parse_undef_directive(void)
1670 {
1671         eat_pp(TP_undef);
1672         if (skip_mode) {
1673                 eat_pp_directive();
1674                 return;
1675         }
1676
1677         if (pp_token.kind != T_IDENTIFIER) {
1678                 errorf(&input.position,
1679                        "expected identifier after #undef, got %K", &pp_token);
1680                 eat_pp_directive();
1681                 return;
1682         }
1683
1684         pp_token.base.symbol->pp_definition = NULL;
1685         eat_token(T_IDENTIFIER);
1686
1687         if (!info.at_line_begin) {
1688                 warningf(WARN_OTHER, &input.position, "extra tokens at end of #undef directive");
1689         }
1690         eat_pp_directive();
1691 }
1692
1693 /** behind an #include we can have the special headername lexems.
1694  * They're only allowed behind an #include so they're not recognized
1695  * by the normal next_preprocessing_token. We handle them as a special
1696  * exception here */
1697 static void parse_headername(void)
1698 {
1699         const source_position_t start_position = input.position;
1700         string_t                string         = { NULL, 0, STRING_ENCODING_CHAR };
1701         assert(obstack_object_size(&symbol_obstack) == 0);
1702
1703         if (info.at_line_begin) {
1704                 parse_error("expected headername after #include");
1705                 goto finish_error;
1706         }
1707
1708         /* check wether we have a "... or <... headername */
1709         switch (input.c) {
1710         {
1711                 utf32 delimiter;
1712         case '<': delimiter = '>'; goto parse_name;
1713         case '"': delimiter = '"'; goto parse_name;
1714 parse_name:
1715                 next_char();
1716                 while (true) {
1717                         switch (input.c) {
1718                         case NEWLINE:
1719                         case EOF:
1720                                 errorf(&pp_token.base.source_position, "header name without closing '%c'", (char)delimiter);
1721                                 goto finish_error;
1722
1723                         default:
1724                                 if (input.c == delimiter) {
1725                                         next_char();
1726                                         goto finished_headername;
1727                                 } else {
1728                                         obstack_1grow(&symbol_obstack, (char)input.c);
1729                                         next_char();
1730                                 }
1731                                 break;
1732                         }
1733                 }
1734                 /* we should never be here */
1735         }
1736
1737         default:
1738                 /* TODO: do normal pp_token parsing and concatenate results */
1739                 panic("pp_token concat include not implemented yet");
1740         }
1741
1742 finished_headername:
1743         string = sym_make_string(STRING_ENCODING_CHAR);
1744
1745 finish_error:
1746         pp_token.base.source_position = start_position;
1747         pp_token.kind                 = T_HEADERNAME;
1748         pp_token.literal.string       = string;
1749 }
1750
1751 static bool do_include(bool system_include, const char *headername)
1752 {
1753         size_t headername_len = strlen(headername);
1754         if (!system_include) {
1755                 /* put dirname of current input on obstack */
1756                 const char *filename   = input.position.input_name;
1757                 const char *last_slash = strrchr(filename, '/');
1758                 if (last_slash != NULL) {
1759                         size_t len = last_slash - filename;
1760                         obstack_grow(&symbol_obstack, filename, len + 1);
1761                         obstack_grow0(&symbol_obstack, headername, headername_len);
1762                         char *complete_path = obstack_finish(&symbol_obstack);
1763                         headername = identify_string(complete_path);
1764                 }
1765
1766                 FILE *file = fopen(headername, "r");
1767                 if (file != NULL) {
1768                         switch_input(file, headername);
1769                         return true;
1770                 }
1771         }
1772
1773         assert(obstack_object_size(&symbol_obstack) == 0);
1774         /* check searchpath */
1775         for (searchpath_entry_t *entry = searchpath; entry != NULL;
1776              entry = entry->next) {
1777             const char *path = entry->path;
1778             size_t      len  = strlen(path);
1779                 obstack_grow(&symbol_obstack, path, len);
1780                 if (path[len-1] != '/')
1781                         obstack_1grow(&symbol_obstack, '/');
1782                 obstack_grow(&symbol_obstack, headername, headername_len+1);
1783
1784                 char *complete_path = obstack_finish(&symbol_obstack);
1785                 FILE *file          = fopen(complete_path, "r");
1786                 if (file != NULL) {
1787                         const char *filename = identify_string(complete_path);
1788                         switch_input(file, filename);
1789                         return true;
1790                 } else {
1791                         obstack_free(&symbol_obstack, complete_path);
1792                 }
1793         }
1794
1795         return false;
1796 }
1797
1798 static void parse_include_directive(void)
1799 {
1800         if (skip_mode) {
1801                 eat_pp_directive();
1802                 return;
1803         }
1804
1805         /* don't eat the TP_include here!
1806          * we need an alternative parsing for the next token */
1807         skip_till_newline(true);
1808         bool system_include = input.c == '<';
1809         parse_headername();
1810         string_t headername = pp_token.literal.string;
1811         if (headername.begin == NULL) {
1812                 eat_pp_directive();
1813                 return;
1814         }
1815
1816         bool had_nonwhitespace = skip_till_newline(false);
1817         if (had_nonwhitespace) {
1818                 warningf(WARN_OTHER, &pp_token.base.source_position,
1819                          "extra tokens at end of #include directive");
1820         }
1821
1822         if (n_inputs > INCLUDE_LIMIT) {
1823                 errorf(&pp_token.base.source_position, "#include nested too deeply");
1824                 /* eat \n or EOF */
1825                 next_input_token();
1826                 return;
1827         }
1828
1829         /* switch inputs */
1830         info.whitespace_at_line_begin = 0;
1831         info.had_whitespace           = false;
1832         info.at_line_begin            = true;
1833         emit_newlines();
1834         push_input();
1835         bool res = do_include(system_include, pp_token.literal.string.begin);
1836         if (res) {
1837                 next_input_token();
1838         } else {
1839                 errorf(&pp_token.base.source_position, "failed including '%S': %s", &pp_token.literal.string, strerror(errno));
1840                 pop_restore_input();
1841         }
1842 }
1843
1844 static pp_conditional_t *push_conditional(void)
1845 {
1846         pp_conditional_t *conditional
1847                 = obstack_alloc(&pp_obstack, sizeof(*conditional));
1848         memset(conditional, 0, sizeof(*conditional));
1849
1850         conditional->parent = conditional_stack;
1851         conditional_stack   = conditional;
1852
1853         return conditional;
1854 }
1855
1856 static void pop_conditional(void)
1857 {
1858         assert(conditional_stack != NULL);
1859         conditional_stack = conditional_stack->parent;
1860 }
1861
1862 static void check_unclosed_conditionals(void)
1863 {
1864         while (conditional_stack != NULL) {
1865                 pp_conditional_t *conditional = conditional_stack;
1866
1867                 if (conditional->in_else) {
1868                         errorf(&conditional->source_position, "unterminated #else");
1869                 } else {
1870                         errorf(&conditional->source_position, "unterminated condition");
1871                 }
1872                 pop_conditional();
1873         }
1874 }
1875
1876 static void parse_ifdef_ifndef_directive(bool const is_ifdef)
1877 {
1878         bool condition;
1879         eat_pp(is_ifdef ? TP_ifdef : TP_ifndef);
1880
1881         if (skip_mode) {
1882                 eat_pp_directive();
1883                 pp_conditional_t *conditional = push_conditional();
1884                 conditional->source_position  = pp_token.base.source_position;
1885                 conditional->skip             = true;
1886                 return;
1887         }
1888
1889         if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
1890                 errorf(&pp_token.base.source_position,
1891                        "expected identifier after #%s, got %K",
1892                        is_ifdef ? "ifdef" : "ifndef", &pp_token);
1893                 eat_pp_directive();
1894
1895                 /* just take the true case in the hope to avoid further errors */
1896                 condition = true;
1897         } else {
1898                 /* evaluate wether we are in true or false case */
1899                 condition = (bool)pp_token.base.symbol->pp_definition == is_ifdef;
1900                 eat_token(T_IDENTIFIER);
1901
1902                 if (!info.at_line_begin) {
1903                         errorf(&pp_token.base.source_position,
1904                                "extra tokens at end of #%s",
1905                                is_ifdef ? "ifdef" : "ifndef");
1906                         eat_pp_directive();
1907                 }
1908         }
1909
1910         pp_conditional_t *conditional = push_conditional();
1911         conditional->source_position  = pp_token.base.source_position;
1912         conditional->condition        = condition;
1913
1914         if (!condition) {
1915                 skip_mode = true;
1916         }
1917 }
1918
1919 static void parse_else_directive(void)
1920 {
1921         eat_pp(TP_else);
1922
1923         if (!info.at_line_begin) {
1924                 if (!skip_mode) {
1925                         warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #else");
1926                 }
1927                 eat_pp_directive();
1928         }
1929
1930         pp_conditional_t *conditional = conditional_stack;
1931         if (conditional == NULL) {
1932                 errorf(&pp_token.base.source_position, "#else without prior #if");
1933                 return;
1934         }
1935
1936         if (conditional->in_else) {
1937                 errorf(&pp_token.base.source_position,
1938                        "#else after #else (condition started %P)",
1939                        &conditional->source_position);
1940                 skip_mode = true;
1941                 return;
1942         }
1943
1944         conditional->in_else = true;
1945         if (!conditional->skip) {
1946                 skip_mode = conditional->condition;
1947         }
1948         conditional->source_position = pp_token.base.source_position;
1949 }
1950
1951 static void parse_endif_directive(void)
1952 {
1953         eat_pp(TP_endif);
1954
1955         if (!info.at_line_begin) {
1956                 if (!skip_mode) {
1957                         warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #endif");
1958                 }
1959                 eat_pp_directive();
1960         }
1961
1962         pp_conditional_t *conditional = conditional_stack;
1963         if (conditional == NULL) {
1964                 errorf(&pp_token.base.source_position, "#endif without prior #if");
1965                 return;
1966         }
1967
1968         if (!conditional->skip) {
1969                 skip_mode = false;
1970         }
1971         pop_conditional();
1972 }
1973
1974 typedef enum stdc_pragma_kind_t {
1975         STDC_UNKNOWN,
1976         STDC_FP_CONTRACT,
1977         STDC_FENV_ACCESS,
1978         STDC_CX_LIMITED_RANGE
1979 } stdc_pragma_kind_t;
1980
1981 typedef enum stdc_pragma_value_kind_t {
1982         STDC_VALUE_UNKNOWN,
1983         STDC_VALUE_ON,
1984         STDC_VALUE_OFF,
1985         STDC_VALUE_DEFAULT
1986 } stdc_pragma_value_kind_t;
1987
1988 static void parse_pragma_directive(void)
1989 {
1990         eat_pp(TP_pragma);
1991
1992         if (pp_token.kind != T_IDENTIFIER) {
1993                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
1994                          "expected identifier after #pragma");
1995                 eat_pp_directive();
1996                 return;
1997         }
1998
1999         stdc_pragma_kind_t kind = STDC_UNKNOWN;
2000         if (pp_token.base.symbol->pp_ID == TP_STDC && c_mode & _C99) {
2001                 /* a STDC pragma */
2002                 next_input_token();
2003
2004                 switch (pp_token.base.symbol->pp_ID) {
2005                 case TP_FP_CONTRACT:      kind = STDC_FP_CONTRACT;      break;
2006                 case TP_FENV_ACCESS:      kind = STDC_FENV_ACCESS;      break;
2007                 case TP_CX_LIMITED_RANGE: kind = STDC_CX_LIMITED_RANGE; break;
2008                 default:                  break;
2009                 }
2010                 if (kind != STDC_UNKNOWN) {
2011                         next_input_token();
2012                         stdc_pragma_value_kind_t value;
2013                         switch (pp_token.base.symbol->pp_ID) {
2014                         case TP_ON:      value = STDC_VALUE_ON;      break;
2015                         case TP_OFF:     value = STDC_VALUE_OFF;     break;
2016                         case TP_DEFAULT: value = STDC_VALUE_DEFAULT; break;
2017                         default:         value = STDC_VALUE_UNKNOWN; break;
2018                         }
2019                         if (value == STDC_VALUE_UNKNOWN) {
2020                                 kind = STDC_UNKNOWN;
2021                                 errorf(&pp_token.base.source_position, "bad STDC pragma argument");
2022                         }
2023                 }
2024         }
2025         eat_pp_directive();
2026         if (kind == STDC_UNKNOWN) {
2027                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2028                          "encountered unknown #pragma");
2029         }
2030 }
2031
2032 static void parse_line_directive(void)
2033 {
2034         if (pp_token.kind != T_NUMBER) {
2035                 if (!skip_mode)
2036                         parse_error("expected integer");
2037         } else {
2038                 char      *end;
2039                 long const line = strtol(pp_token.literal.string.begin, &end, 0);
2040                 if (*end == '\0') {
2041                         /* use offset -1 as this is about the next line */
2042                         input.position.lineno = line - 1;
2043                         /* force output of line */
2044                         input.output_line = input.position.lineno - 20;
2045                 } else {
2046                         if (!skip_mode) {
2047                                 errorf(&input.position, "'%S' is not a valid line number",
2048                                            &pp_token.literal.string);
2049                         }
2050                 }
2051                 next_input_token();
2052         }
2053         if (pp_token.kind == T_STRING_LITERAL
2054             && pp_token.literal.string.encoding == STRING_ENCODING_CHAR) {
2055                 input.position.input_name       = pp_token.literal.string.begin;
2056                 input.position.is_system_header = false;
2057                 next_input_token();
2058
2059                 /* attempt to parse numeric flags as outputted by gcc preprocessor */
2060                 while (pp_token.kind == T_NUMBER) {
2061                         /* flags:
2062                          * 1 - indicates start of a new file
2063                          * 2 - indicates return from a file
2064                          * 3 - indicates system header
2065                          * 4 - indicates implicit extern "C" in C++ mode
2066                          *
2067                          * currently we're only interested in "3"
2068                          */
2069                         if (streq(pp_token.literal.string.begin, "3")) {
2070                                 input.position.is_system_header = true;
2071                         }
2072                         next_input_token();
2073                 }
2074         }
2075
2076         eat_pp_directive();
2077 }
2078
2079 static void parse_preprocessing_directive(void)
2080 {
2081         eat_token('#');
2082
2083         if (info.at_line_begin) {
2084                 /* empty directive */
2085                 return;
2086         }
2087
2088         if (pp_token.base.symbol) {
2089                 switch (pp_token.base.symbol->pp_ID) {
2090                 case TP_define:  parse_define_directive();            break;
2091                 case TP_else:    parse_else_directive();              break;
2092                 case TP_endif:   parse_endif_directive();             break;
2093                 case TP_ifdef:   parse_ifdef_ifndef_directive(true);  break;
2094                 case TP_ifndef:  parse_ifdef_ifndef_directive(false); break;
2095                 case TP_include: parse_include_directive();           break;
2096                 case TP_line:    next_input_token(); goto line_directive;
2097                 case TP_pragma:  parse_pragma_directive();            break;
2098                 case TP_undef:   parse_undef_directive();             break;
2099                 default:         goto skip;
2100                 }
2101         } else if (pp_token.kind == T_NUMBER) {
2102 line_directive:
2103                 parse_line_directive();
2104         } else {
2105 skip:
2106                 if (!skip_mode) {
2107                         errorf(&pp_token.base.source_position, "invalid preprocessing directive #%K", &pp_token);
2108                 }
2109                 eat_pp_directive();
2110         }
2111
2112         assert(info.at_line_begin);
2113 }
2114
2115 static void finish_current_argument(void)
2116 {
2117         if (current_argument == NULL)
2118                 return;
2119         size_t size = obstack_object_size(&pp_obstack);
2120         current_argument->list_len   = size/sizeof(current_argument->token_list[0]);
2121         current_argument->token_list = obstack_finish(&pp_obstack);
2122 }
2123
2124 void next_preprocessing_token(void)
2125 {
2126 restart:
2127         if (!expand_next()) {
2128                 do {
2129                         next_input_token();
2130                         while (pp_token.kind == '#' && info.at_line_begin) {
2131                                 parse_preprocessing_directive();
2132                         }
2133                 } while (skip_mode && pp_token.kind != T_EOF);
2134         }
2135
2136         const token_kind_t kind = pp_token.kind;
2137         if (current_call == NULL || argument_expanding != NULL) {
2138                 if (kind == T_IDENTIFIER) {
2139                         symbol_t        *const symbol        = pp_token.base.symbol;
2140                         pp_definition_t *const pp_definition = symbol->pp_definition;
2141                         if (pp_definition != NULL && !pp_definition->is_expanding) {
2142                                 if (pp_definition->has_parameters) {
2143
2144                                         /* check if next token is a '(' */
2145                                         whitespace_info_t old_info   = info;
2146                                         token_kind_t      next_token = peek_expansion();
2147                                         if (next_token == T_EOF) {
2148                                                 info.at_line_begin  = false;
2149                                                 info.had_whitespace = false;
2150                                                 skip_whitespace();
2151                                                 if (input.c == '(') {
2152                                                         next_token = '(';
2153                                                 }
2154                                         }
2155
2156                                         if (next_token == '(') {
2157                                                 if (current_expansion == NULL)
2158                                                         expansion_pos = pp_token.base.source_position;
2159                                                 next_preprocessing_token();
2160                                                 assert(pp_token.kind == '(');
2161
2162                                                 pp_definition->parent_expansion = current_expansion;
2163                                                 current_call              = pp_definition;
2164                                                 current_call->expand_pos  = 0;
2165                                                 current_call->expand_info = old_info;
2166                                                 if (current_call->n_parameters > 0) {
2167                                                         current_argument = &current_call->parameters[0];
2168                                                         assert(argument_brace_count == 0);
2169                                                 }
2170                                                 goto restart;
2171                                         } else {
2172                                                 /* skip_whitespaces() skipped newlines and whitespace,
2173                                                  * remember results for next token */
2174                                                 next_info = info;
2175                                                 info      = old_info;
2176                                                 return;
2177                                         }
2178                                 } else {
2179                                         if (current_expansion == NULL)
2180                                                 expansion_pos = pp_token.base.source_position;
2181                                         start_expanding(pp_definition);
2182                                         goto restart;
2183                                 }
2184                         }
2185                 } else if (kind == T_MACRO_PARAMETER) {
2186                         assert(current_expansion != NULL);
2187                         start_expanding(pp_token.macro_parameter.def);
2188                         goto restart;
2189                 }
2190         }
2191
2192         if (current_call != NULL) {
2193                 /* current_call != NULL */
2194                 if (kind == '(') {
2195                         ++argument_brace_count;
2196                 } else if (kind == ')') {
2197                         if (argument_brace_count > 0) {
2198                                 --argument_brace_count;
2199                         } else {
2200                                 finish_current_argument();
2201                                 assert(kind == ')');
2202                                 start_expanding(current_call);
2203                                 info = current_call->expand_info;
2204                                 current_call     = NULL;
2205                                 current_argument = NULL;
2206                                 goto restart;
2207                         }
2208                 } else if (kind == ',' && argument_brace_count == 0) {
2209                         finish_current_argument();
2210                         current_call->expand_pos++;
2211                         if (current_call->expand_pos >= current_call->n_parameters) {
2212                                 errorf(&pp_token.base.source_position,
2213                                            "too many arguments passed for macro '%Y'",
2214                                            current_call->symbol);
2215                                 current_argument = NULL;
2216                         } else {
2217                                 current_argument
2218                                         = &current_call->parameters[current_call->expand_pos];
2219                         }
2220                         goto restart;
2221                 } else if (kind == T_MACRO_PARAMETER) {
2222                         /* parameters have to be fully expanded before being used as
2223                          * parameters for another macro-call */
2224                         assert(current_expansion != NULL);
2225                         pp_definition_t *argument = pp_token.macro_parameter.def;
2226                         argument_expanding = argument;
2227                         start_expanding(argument);
2228                         goto restart;
2229                 } else if (kind == T_EOF) {
2230                         errorf(&expansion_pos,
2231                                "reached end of file while parsing arguments for '%Y'",
2232                                current_call->symbol);
2233                         return;
2234                 }
2235                 if (current_argument != NULL) {
2236                         saved_token_t saved;
2237                         saved.token = pp_token;
2238                         saved.had_whitespace = info.had_whitespace;
2239                         obstack_grow(&pp_obstack, &saved, sizeof(saved));
2240                 }
2241                 goto restart;
2242         }
2243 }
2244
2245
2246 static void prepend_include_path(const char *path)
2247 {
2248         searchpath_entry_t *entry = OALLOCZ(&config_obstack, searchpath_entry_t);
2249         entry->path = path;
2250         entry->next = searchpath;
2251         searchpath  = entry;
2252 }
2253
2254 static void setup_include_path(void)
2255 {
2256         /* built-in paths */
2257         prepend_include_path("/usr/include");
2258
2259         /* parse environment variable */
2260         const char *cpath = getenv("CPATH");
2261         if (cpath != NULL && *cpath != '\0') {
2262                 const char *begin = cpath;
2263                 const char *c;
2264                 do {
2265                         c = begin;
2266                         while (*c != '\0' && *c != ':')
2267                                 ++c;
2268
2269                         size_t len = c-begin;
2270                         if (len == 0) {
2271                                 /* for gcc compatibility (Matze: I would expect that
2272                                  * nothing happens for an empty entry...) */
2273                                 prepend_include_path(".");
2274                         } else {
2275                                 char *string = obstack_alloc(&config_obstack, len+1);
2276                                 memcpy(string, begin, len);
2277                                 string[len] = '\0';
2278
2279                                 prepend_include_path(string);
2280                         }
2281
2282                         begin = c+1;
2283                         /* skip : */
2284                         if (*begin == ':')
2285                                 ++begin;
2286                 } while(*c != '\0');
2287         }
2288 }
2289
2290 void init_preprocessor(void)
2291 {
2292         init_symbols();
2293
2294         obstack_init(&config_obstack);
2295         obstack_init(&pp_obstack);
2296         obstack_init(&input_obstack);
2297         strset_init(&stringset);
2298
2299         setup_include_path();
2300 }
2301
2302 void exit_preprocessor(void)
2303 {
2304         obstack_free(&input_obstack, NULL);
2305         obstack_free(&pp_obstack, NULL);
2306         obstack_free(&config_obstack, NULL);
2307
2308         strset_destroy(&stringset);
2309 }
2310
2311 int pptest_main(int argc, char **argv);
2312 int pptest_main(int argc, char **argv)
2313 {
2314         init_symbol_table();
2315         init_preprocessor();
2316         init_tokens();
2317
2318         error_on_unknown_chars   = false;
2319         resolve_escape_sequences = false;
2320
2321         /* simplistic commandline parser */
2322         const char *filename = NULL;
2323         const char *output = NULL;
2324         for (int i = 1; i < argc; ++i) {
2325                 const char *opt = argv[i];
2326                 if (streq(opt, "-I")) {
2327                         prepend_include_path(argv[++i]);
2328                         continue;
2329                 } else if (streq(opt, "-E")) {
2330                         /* ignore */
2331                 } else if (streq(opt, "-o")) {
2332                         output = argv[++i];
2333                         continue;
2334                 } else if (opt[0] == '-') {
2335                         fprintf(stderr, "Unknown option '%s'\n", opt);
2336                 } else {
2337                         if (filename != NULL)
2338                                 fprintf(stderr, "Multiple inputs not supported\n");
2339                         filename = argv[i];
2340                 }
2341         }
2342         if (filename == NULL) {
2343                 fprintf(stderr, "No input specified\n");
2344                 return 1;
2345         }
2346
2347         if (output == NULL) {
2348                 out = stdout;
2349         } else {
2350                 out = fopen(output, "w");
2351                 if (out == NULL) {
2352                         fprintf(stderr, "Couldn't open output '%s'\n", output);
2353                         return 1;
2354                 }
2355         }
2356
2357         /* just here for gcc compatibility */
2358         fprintf(out, "# 1 \"%s\"\n", filename);
2359         fprintf(out, "# 1 \"<built-in>\"\n");
2360         fprintf(out, "# 1 \"<command-line>\"\n");
2361
2362         FILE *file = fopen(filename, "r");
2363         if (file == NULL) {
2364                 fprintf(stderr, "Couldn't open input '%s'\n", filename);
2365                 return 1;
2366         }
2367         switch_input(file, filename);
2368
2369         for (;;) {
2370                 next_preprocessing_token();
2371                 if (pp_token.kind == T_EOF)
2372                         break;
2373                 emit_pp_token();
2374         }
2375
2376         fputc('\n', out);
2377         check_unclosed_conditionals();
2378         fclose(close_input());
2379         if (out != stdout)
2380                 fclose(out);
2381
2382         exit_tokens();
2383         exit_preprocessor();
2384         exit_symbol_table();
2385
2386         return 0;
2387 }