nsz Git - cparser/blob - preprocessor.c

   1 #include <config.h>
   2
   3 #include <assert.h>
   4 #include <errno.h>
   5 #include <string.h>
   6 #include <stdbool.h>
   7 #include <ctype.h>
   8
   9 #include "token_t.h"
  10 #include "symbol_t.h"
  11 #include "adt/util.h"
  12 #include "adt/error.h"
  13 #include "adt/strutil.h"
  14 #include "adt/strset.h"
  15 #include "lang_features.h"
  16 #include "diagnostic.h"
  17 #include "string_rep.h"
  18 #include "input.h"
  19
  20 #define MAX_PUTBACK 3
  21 #define INCLUDE_LIMIT 199  /* 199 is for gcc "compatibility" */
  22
  23 typedef struct saved_token_t {
  24         token_t token;
  25         bool    had_whitespace;
  26 } saved_token_t;
  27
  28 typedef struct whitespace_info_t {
  29         /** current token had whitespace in front of it */
  30         bool     had_whitespace;
  31         /** current token is at the beginning of a line.
  32          * => a "#" at line begin starts a preprocessing directive. */
  33         bool     at_line_begin;
  34         /** number of spaces before the first token in a line */
  35         unsigned whitespace_at_line_begin;
  36 } whitespace_info_t;
  37
  38 struct pp_definition_t {
  39         symbol_t          *symbol;
  40         source_position_t  source_position;
  41         pp_definition_t   *parent_expansion;
  42         size_t             expand_pos;
  43         whitespace_info_t  expand_info;
  44         bool               is_variadic    : 1;
  45         bool               is_expanding   : 1;
  46         bool               has_parameters : 1;
  47         bool               is_parameter   : 1;
  48         pp_definition_t   *function_definition;
  49         size_t             n_parameters;
  50         pp_definition_t   *parameters;
  51
  52         /* replacement */
  53         size_t             list_len;
  54         saved_token_t     *token_list;
  55 };
  56
  57 typedef struct pp_conditional_t pp_conditional_t;
  58 struct pp_conditional_t {
  59         source_position_t  source_position;
  60         bool               condition;
  61         bool               in_else;
  62         /** conditional in skip mode (then+else gets skipped) */
  63         bool               skip;
  64         pp_conditional_t  *parent;
  65 };
  66
  67 typedef struct pp_input_t pp_input_t;
  68 struct pp_input_t {
  69         FILE              *file;
  70         input_t           *input;
  71         utf32              c;
  72         utf32              buf[1024+MAX_PUTBACK];
  73         const utf32       *bufend;
  74         const utf32       *bufpos;
  75         source_position_t  position;
  76         pp_input_t        *parent;
  77         unsigned           output_line;
  78 };
  79
  80 typedef struct searchpath_entry_t searchpath_entry_t;
  81 struct searchpath_entry_t {
  82         const char         *path;
  83         searchpath_entry_t *next;
  84 };
  85
  86 static pp_input_t      input;
  87
  88 static pp_input_t     *input_stack;
  89 static unsigned        n_inputs;
  90 static struct obstack  input_obstack;
  91
  92 static pp_conditional_t *conditional_stack;
  93
  94 static token_t           pp_token;
  95 static bool              resolve_escape_sequences = false;
  96 static bool              error_on_unknown_chars   = true;
  97 static bool              skip_mode;
  98 static FILE             *out;
  99 static struct obstack    pp_obstack;
 100 static struct obstack    config_obstack;
 101 static const char       *printed_input_name = NULL;
 102 static source_position_t expansion_pos;
 103 static pp_definition_t  *current_expansion  = NULL;
 104 static pp_definition_t  *current_call       = NULL;
 105 static pp_definition_t  *current_argument   = NULL;
 106 static pp_definition_t  *argument_expanding = NULL;
 107 static unsigned          argument_brace_count;
 108 static strset_t          stringset;
 109 static token_kind_t      last_token;
 110
 111 static searchpath_entry_t *searchpath;
 112
 113 static whitespace_info_t next_info; /* valid if had_whitespace is true */
 114 static whitespace_info_t info;
 115
 116 static inline void next_char(void);
 117 static void next_input_token(void);
 118 static void print_line_directive(const source_position_t *pos, const char *add);
 119
 120 static symbol_t *symbol_colongreater;
 121 static symbol_t *symbol_lesscolon;
 122 static symbol_t *symbol_lesspercent;
 123 static symbol_t *symbol_percentcolon;
 124 static symbol_t *symbol_percentcolonpercentcolon;
 125 static symbol_t *symbol_percentgreater;
 126
 127 static void init_symbols(void)
 128 {
 129         symbol_colongreater             = symbol_table_insert(":>");
 130         symbol_lesscolon                = symbol_table_insert("<:");
 131         symbol_lesspercent              = symbol_table_insert("<%");
 132         symbol_percentcolon             = symbol_table_insert("%:");
 133         symbol_percentcolonpercentcolon = symbol_table_insert("%:%:");
 134         symbol_percentgreater           = symbol_table_insert("%>");
 135 }
 136
 137 static void switch_input(FILE *file, const char *filename)
 138 {
 139         input.file                = file;
 140         input.input               = input_from_stream(file, NULL);
 141         input.bufend              = NULL;
 142         input.bufpos              = NULL;
 143         input.output_line         = 0;
 144         input.position.input_name = filename;
 145         input.position.lineno     = 1;
 146
 147         /* indicate that we're at a new input */
 148         print_line_directive(&input.position, input_stack != NULL ? "1" : NULL);
 149
 150         /* place a virtual '\n' so we realize we're at line begin */
 151         input.position.lineno = 0;
 152         input.c               = '\n';
 153 }
 154
 155 static void close_input(void)
 156 {
 157         input_free(input.input);
 158         assert(input.file != NULL);
 159
 160         fclose(input.file);
 161         input.input  = NULL;
 162         input.file   = NULL;
 163         input.bufend = NULL;
 164         input.bufpos = NULL;
 165         input.c      = EOF;
 166 }
 167
 168 static void push_input(void)
 169 {
 170         pp_input_t *saved_input
 171                 = obstack_alloc(&input_obstack, sizeof(*saved_input));
 172
 173         memcpy(saved_input, &input, sizeof(*saved_input));
 174
 175         /* adjust buffer positions */
 176         if (input.bufpos != NULL)
 177                 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
 178         if (input.bufend != NULL)
 179                 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
 180
 181         saved_input->parent = input_stack;
 182         input_stack         = saved_input;
 183         ++n_inputs;
 184 }
 185
 186 static void pop_restore_input(void)
 187 {
 188         assert(n_inputs > 0);
 189         assert(input_stack != NULL);
 190
 191         pp_input_t *saved_input = input_stack;
 192
 193         memcpy(&input, saved_input, sizeof(input));
 194         input.parent = NULL;
 195
 196         /* adjust buffer positions */
 197         if (saved_input->bufpos != NULL)
 198                 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
 199         if (saved_input->bufend != NULL)
 200                 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
 201
 202         input_stack = saved_input->parent;
 203         obstack_free(&input_obstack, saved_input);
 204         --n_inputs;
 205 }
 206
 207 /**
 208  * Prints a parse error message at the current token.
 209  *
 210  * @param msg   the error message
 211  */
 212 static void parse_error(const char *msg)
 213 {
 214         errorf(&pp_token.base.source_position,  "%s", msg);
 215 }
 216
 217 static inline void next_real_char(void)
 218 {
 219         assert(input.bufpos <= input.bufend);
 220         if (input.bufpos >= input.bufend) {
 221                 size_t const n = decode(input.input, input.buf + MAX_PUTBACK, lengthof(input.buf) - MAX_PUTBACK);
 222                 if (n == 0) {
 223                         input.c = EOF;
 224                         return;
 225                 }
 226                 input.bufpos = input.buf + MAX_PUTBACK;
 227                 input.bufend = input.bufpos + n;
 228         }
 229         input.c = *input.bufpos++;
 230         ++input.position.colno;
 231 }
 232
 233 /**
 234  * Put a character back into the buffer.
 235  *
 236  * @param pc  the character to put back
 237  */
 238 static inline void put_back(utf32 const pc)
 239 {
 240         assert(input.bufpos > input.buf);
 241         *(--input.bufpos - input.buf + input.buf) = (char) pc;
 242         --input.position.colno;
 243 }
 244
 245 #define NEWLINE \
 246         '\r': \
 247                 next_char(); \
 248                 if (input.c == '\n') { \
 249         case '\n': \
 250                         next_char(); \
 251                 } \
 252                 ++input.position.lineno; \
 253                 input.position.colno = 1; \
 254                 goto newline; \
 255                 newline // Let it look like an ordinary case label.
 256
 257 #define eat(c_type) (assert(input.c == c_type), next_char())
 258
 259 static void maybe_concat_lines(void)
 260 {
 261         eat('\\');
 262
 263         switch (input.c) {
 264         case NEWLINE:
 265                 info.whitespace_at_line_begin = 0;
 266                 return;
 267
 268         default:
 269                 break;
 270         }
 271
 272         put_back(input.c);
 273         input.c = '\\';
 274 }
 275
 276 /**
 277  * Set c to the next input character, ie.
 278  * after expanding trigraphs.
 279  */
 280 static inline void next_char(void)
 281 {
 282         next_real_char();
 283
 284         /* filter trigraphs and concatenated lines */
 285         if (UNLIKELY(input.c == '\\')) {
 286                 maybe_concat_lines();
 287                 goto end_of_next_char;
 288         }
 289
 290         if (LIKELY(input.c != '?'))
 291                 goto end_of_next_char;
 292
 293         next_real_char();
 294         if (LIKELY(input.c != '?')) {
 295                 put_back(input.c);
 296                 input.c = '?';
 297                 goto end_of_next_char;
 298         }
 299
 300         next_real_char();
 301         switch (input.c) {
 302         case '=': input.c = '#'; break;
 303         case '(': input.c = '['; break;
 304         case '/': input.c = '\\'; maybe_concat_lines(); break;
 305         case ')': input.c = ']'; break;
 306         case '\'': input.c = '^'; break;
 307         case '<': input.c = '{'; break;
 308         case '!': input.c = '|'; break;
 309         case '>': input.c = '}'; break;
 310         case '-': input.c = '~'; break;
 311         default:
 312                 put_back(input.c);
 313                 put_back('?');
 314                 input.c = '?';
 315                 break;
 316         }
 317
 318 end_of_next_char:;
 319 #ifdef DEBUG_CHARS
 320         printf("nchar '%c'\n", input.c);
 321 #endif
 322 }
 323
 324
 325
 326 /**
 327  * Returns true if the given char is a octal digit.
 328  *
 329  * @param char  the character to check
 330  */
 331 static inline bool is_octal_digit(int chr)
 332 {
 333         switch (chr) {
 334         case '0':
 335         case '1':
 336         case '2':
 337         case '3':
 338         case '4':
 339         case '5':
 340         case '6':
 341         case '7':
 342                 return true;
 343         default:
 344                 return false;
 345         }
 346 }
 347
 348 /**
 349  * Returns the value of a digit.
 350  * The only portable way to do it ...
 351  */
 352 static int digit_value(int digit)
 353 {
 354         switch (digit) {
 355         case '0': return 0;
 356         case '1': return 1;
 357         case '2': return 2;
 358         case '3': return 3;
 359         case '4': return 4;
 360         case '5': return 5;
 361         case '6': return 6;
 362         case '7': return 7;
 363         case '8': return 8;
 364         case '9': return 9;
 365         case 'a':
 366         case 'A': return 10;
 367         case 'b':
 368         case 'B': return 11;
 369         case 'c':
 370         case 'C': return 12;
 371         case 'd':
 372         case 'D': return 13;
 373         case 'e':
 374         case 'E': return 14;
 375         case 'f':
 376         case 'F': return 15;
 377         default:
 378                 panic("wrong character given");
 379         }
 380 }
 381
 382 /**
 383  * Parses an octal character sequence.
 384  *
 385  * @param first_digit  the already read first digit
 386  */
 387 static utf32 parse_octal_sequence(const utf32 first_digit)
 388 {
 389         assert(is_octal_digit(first_digit));
 390         utf32 value = digit_value(first_digit);
 391         if (!is_octal_digit(input.c)) return value;
 392         value = 8 * value + digit_value(input.c);
 393         next_char();
 394         if (!is_octal_digit(input.c)) return value;
 395         value = 8 * value + digit_value(input.c);
 396         next_char();
 397         return value;
 398
 399 }
 400
 401 /**
 402  * Parses a hex character sequence.
 403  */
 404 static utf32 parse_hex_sequence(void)
 405 {
 406         utf32 value = 0;
 407         while (isxdigit(input.c)) {
 408                 value = 16 * value + digit_value(input.c);
 409                 next_char();
 410         }
 411         return value;
 412 }
 413
 414 static bool is_universal_char_valid(utf32 const v)
 415 {
 416         /* C11 §6.4.3:2 */
 417         if (v < 0xA0U && v != 0x24 && v != 0x40 && v != 0x60)
 418                 return false;
 419         if (0xD800 <= v && v <= 0xDFFF)
 420                 return false;
 421         return true;
 422 }
 423
 424 static utf32 parse_universal_char(unsigned const n_digits)
 425 {
 426         utf32 v = 0;
 427         for (unsigned k = n_digits; k != 0; --k) {
 428                 if (isxdigit(input.c)) {
 429                         v = 16 * v + digit_value(input.c);
 430                         if (!resolve_escape_sequences)
 431                                 obstack_1grow(&symbol_obstack, input.c);
 432                         next_char();
 433                 } else {
 434                         errorf(&input.position,
 435                                "short universal character name, expected %u more digits",
 436                                    k);
 437                         break;
 438                 }
 439         }
 440         if (!is_universal_char_valid(v)) {
 441                 errorf(&input.position,
 442                        "\\%c%0*X is not a valid universal character name",
 443                        n_digits == 4 ? 'u' : 'U', (int)n_digits, v);
 444         }
 445         return v;
 446 }
 447
 448 static bool is_universal_char_valid_identifier(utf32 const v)
 449 {
 450         /* C11 Annex D.1 */
 451         if (                v == 0x000A8) return true;
 452         if (                v == 0x000AA) return true;
 453         if (                v == 0x000AD) return true;
 454         if (                v == 0x000AF) return true;
 455         if (0x000B2 <= v && v <= 0x000B5) return true;
 456         if (0x000B7 <= v && v <= 0x000BA) return true;
 457         if (0x000BC <= v && v <= 0x000BE) return true;
 458         if (0x000C0 <= v && v <= 0x000D6) return true;
 459         if (0x000D8 <= v && v <= 0x000F6) return true;
 460         if (0x000F8 <= v && v <= 0x000FF) return true;
 461         if (0x00100 <= v && v <= 0x0167F) return true;
 462         if (0x01681 <= v && v <= 0x0180D) return true;
 463         if (0x0180F <= v && v <= 0x01FFF) return true;
 464         if (0x0200B <= v && v <= 0x0200D) return true;
 465         if (0x0202A <= v && v <= 0x0202E) return true;
 466         if (0x0203F <= v && v <= 0x02040) return true;
 467         if (                v == 0x02054) return true;
 468         if (0x02060 <= v && v <= 0x0206F) return true;
 469         if (0x02070 <= v && v <= 0x0218F) return true;
 470         if (0x02460 <= v && v <= 0x024FF) return true;
 471         if (0x02776 <= v && v <= 0x02793) return true;
 472         if (0x02C00 <= v && v <= 0x02DFF) return true;
 473         if (0x02E80 <= v && v <= 0x02FFF) return true;
 474         if (0x03004 <= v && v <= 0x03007) return true;
 475         if (0x03021 <= v && v <= 0x0302F) return true;
 476         if (0x03031 <= v && v <= 0x0303F) return true;
 477         if (0x03040 <= v && v <= 0x0D7FF) return true;
 478         if (0x0F900 <= v && v <= 0x0FD3D) return true;
 479         if (0x0FD40 <= v && v <= 0x0FDCF) return true;
 480         if (0x0FDF0 <= v && v <= 0x0FE44) return true;
 481         if (0x0FE47 <= v && v <= 0x0FFFD) return true;
 482         if (0x10000 <= v && v <= 0x1FFFD) return true;
 483         if (0x20000 <= v && v <= 0x2FFFD) return true;
 484         if (0x30000 <= v && v <= 0x3FFFD) return true;
 485         if (0x40000 <= v && v <= 0x4FFFD) return true;
 486         if (0x50000 <= v && v <= 0x5FFFD) return true;
 487         if (0x60000 <= v && v <= 0x6FFFD) return true;
 488         if (0x70000 <= v && v <= 0x7FFFD) return true;
 489         if (0x80000 <= v && v <= 0x8FFFD) return true;
 490         if (0x90000 <= v && v <= 0x9FFFD) return true;
 491         if (0xA0000 <= v && v <= 0xAFFFD) return true;
 492         if (0xB0000 <= v && v <= 0xBFFFD) return true;
 493         if (0xC0000 <= v && v <= 0xCFFFD) return true;
 494         if (0xD0000 <= v && v <= 0xDFFFD) return true;
 495         if (0xE0000 <= v && v <= 0xEFFFD) return true;
 496         return false;
 497 }
 498
 499 static bool is_universal_char_valid_identifier_start(utf32 const v)
 500 {
 501         /* C11 Annex D.2 */
 502         if (0x0300 <= v && v <= 0x036F) return false;
 503         if (0x1DC0 <= v && v <= 0x1DFF) return false;
 504         if (0x20D0 <= v && v <= 0x20FF) return false;
 505         if (0xFE20 <= v && v <= 0xFE2F) return false;
 506         return true;
 507 }
 508
 509 /**
 510  * Parse an escape sequence.
 511  */
 512 static utf32 parse_escape_sequence(void)
 513 {
 514         eat('\\');
 515
 516         utf32 const ec = input.c;
 517         next_char();
 518
 519         switch (ec) {
 520         case '"':  return '"';
 521         case '\'': return '\'';
 522         case '\\': return '\\';
 523         case '?': return '\?';
 524         case 'a': return '\a';
 525         case 'b': return '\b';
 526         case 'f': return '\f';
 527         case 'n': return '\n';
 528         case 'r': return '\r';
 529         case 't': return '\t';
 530         case 'v': return '\v';
 531         case 'x':
 532                 return parse_hex_sequence();
 533         case '0':
 534         case '1':
 535         case '2':
 536         case '3':
 537         case '4':
 538         case '5':
 539         case '6':
 540         case '7':
 541                 return parse_octal_sequence(ec);
 542         case EOF:
 543                 parse_error("reached end of file while parsing escape sequence");
 544                 return EOF;
 545         /* \E is not documented, but handled, by GCC.  It is acceptable according
 546          * to §6.11.4, whereas \e is not. */
 547         case 'E':
 548         case 'e':
 549                 if (c_mode & _GNUC)
 550                         return 27;   /* hopefully 27 is ALWAYS the code for ESCAPE */
 551                 break;
 552
 553         case 'U': return parse_universal_char(8);
 554         case 'u': return parse_universal_char(4);
 555
 556         default:
 557                 break;
 558         }
 559         /* §6.4.4.4:8 footnote 64 */
 560         parse_error("unknown escape sequence");
 561         return EOF;
 562 }
 563
 564 static const char *identify_string(char *string)
 565 {
 566         const char *result = strset_insert(&stringset, string);
 567         if (result != string) {
 568                 obstack_free(&symbol_obstack, string);
 569         }
 570         return result;
 571 }
 572
 573 static string_t sym_make_string(string_encoding_t const enc)
 574 {
 575         obstack_1grow(&symbol_obstack, '\0');
 576         size_t      const len    = obstack_object_size(&symbol_obstack) - 1;
 577         char       *const string = obstack_finish(&symbol_obstack);
 578         char const *const result = identify_string(string);
 579         return (string_t){ result, len, enc };
 580 }
 581
 582 static void parse_string(utf32 const delimiter, token_kind_t const kind,
 583                          string_encoding_t const enc,
 584                          char const *const context)
 585 {
 586         const unsigned start_linenr = input.position.lineno;
 587
 588         eat(delimiter);
 589
 590         while (true) {
 591                 switch (input.c) {
 592                 case '\\': {
 593                         if (resolve_escape_sequences) {
 594                                 utf32 const tc = parse_escape_sequence();
 595                                 if (enc == STRING_ENCODING_CHAR) {
 596                                         if (tc >= 0x100) {
 597                                                 warningf(WARN_OTHER, &pp_token.base.source_position, "escape sequence out of range");
 598                                         }
 599                                         obstack_1grow(&symbol_obstack, tc);
 600                                 } else {
 601                                         obstack_grow_utf8(&symbol_obstack, tc);
 602                                 }
 603                         } else {
 604                                 obstack_1grow(&symbol_obstack, (char)input.c);
 605                                 next_char();
 606                                 obstack_1grow(&symbol_obstack, (char)input.c);
 607                                 next_char();
 608                         }
 609                         break;
 610                 }
 611
 612                 case NEWLINE:
 613                         errorf(&pp_token.base.source_position, "newline while parsing %s", context);
 614                         break;
 615
 616                 case EOF: {
 617                         source_position_t source_position;
 618                         source_position.input_name = pp_token.base.source_position.input_name;
 619                         source_position.lineno     = start_linenr;
 620                         errorf(&source_position, "EOF while parsing %s", context);
 621                         goto end_of_string;
 622                 }
 623
 624                 default:
 625                         if (input.c == delimiter) {
 626                                 next_char();
 627                                 goto end_of_string;
 628                         } else {
 629                                 obstack_grow_utf8(&symbol_obstack, input.c);
 630                                 next_char();
 631                                 break;
 632                         }
 633                 }
 634         }
 635
 636 end_of_string:
 637         pp_token.kind           = kind;
 638         pp_token.literal.string = sym_make_string(enc);
 639 }
 640
 641 static void parse_string_literal(string_encoding_t const enc)
 642 {
 643         parse_string('"', T_STRING_LITERAL, enc, "string literal");
 644 }
 645
 646 static void parse_character_constant(string_encoding_t const enc)
 647 {
 648         parse_string('\'', T_CHARACTER_CONSTANT, enc, "character constant");
 649         if (pp_token.literal.string.size == 0) {
 650                 parse_error("empty character constant");
 651         }
 652 }
 653
 654 #define SYMBOL_CASES_WITHOUT_E_P \
 655              'a': \
 656         case 'b': \
 657         case 'c': \
 658         case 'd': \
 659         case 'f': \
 660         case 'g': \
 661         case 'h': \
 662         case 'i': \
 663         case 'j': \
 664         case 'k': \
 665         case 'l': \
 666         case 'm': \
 667         case 'n': \
 668         case 'o': \
 669         case 'q': \
 670         case 'r': \
 671         case 's': \
 672         case 't': \
 673         case 'u': \
 674         case 'v': \
 675         case 'w': \
 676         case 'x': \
 677         case 'y': \
 678         case 'z': \
 679         case 'A': \
 680         case 'B': \
 681         case 'C': \
 682         case 'D': \
 683         case 'F': \
 684         case 'G': \
 685         case 'H': \
 686         case 'I': \
 687         case 'J': \
 688         case 'K': \
 689         case 'L': \
 690         case 'M': \
 691         case 'N': \
 692         case 'O': \
 693         case 'Q': \
 694         case 'R': \
 695         case 'S': \
 696         case 'T': \
 697         case 'U': \
 698         case 'V': \
 699         case 'W': \
 700         case 'X': \
 701         case 'Y': \
 702         case 'Z': \
 703         case '_'
 704
 705 #define SYMBOL_CASES \
 706              SYMBOL_CASES_WITHOUT_E_P: \
 707         case 'e': \
 708         case 'p': \
 709         case 'E': \
 710         case 'P'
 711
 712 #define DIGIT_CASES \
 713              '0':  \
 714         case '1':  \
 715         case '2':  \
 716         case '3':  \
 717         case '4':  \
 718         case '5':  \
 719         case '6':  \
 720         case '7':  \
 721         case '8':  \
 722         case '9'
 723
 724 static void start_expanding(pp_definition_t *definition)
 725 {
 726         definition->parent_expansion = current_expansion;
 727         definition->expand_pos       = 0;
 728         definition->is_expanding     = true;
 729         if (definition->list_len > 0) {
 730                 definition->token_list[0].had_whitespace
 731                         = info.had_whitespace;
 732         }
 733         current_expansion = definition;
 734 }
 735
 736 static void finished_expanding(pp_definition_t *definition)
 737 {
 738         assert(definition->is_expanding);
 739         pp_definition_t *parent = definition->parent_expansion;
 740         definition->parent_expansion = NULL;
 741         definition->is_expanding     = false;
 742
 743         /* stop further expanding once we expanded a parameter used in a
 744          * sub macro-call */
 745         if (definition == argument_expanding)
 746                 argument_expanding = NULL;
 747
 748         assert(current_expansion == definition);
 749         current_expansion = parent;
 750 }
 751
 752 static inline void set_punctuator(token_kind_t const kind)
 753 {
 754         pp_token.kind        = kind;
 755         pp_token.base.symbol = token_symbols[kind];
 756 }
 757
 758 static inline void set_digraph(token_kind_t const kind, symbol_t *const symbol)
 759 {
 760         pp_token.kind        = kind;
 761         pp_token.base.symbol = symbol;
 762 }
 763
 764 /**
 765  * returns next final token from a preprocessor macro expansion
 766  */
 767 static bool expand_next(void)
 768 {
 769         if (current_expansion == NULL)
 770                 return false;
 771
 772 restart:;
 773         size_t pos = current_expansion->expand_pos;
 774         if (pos >= current_expansion->list_len) {
 775                 finished_expanding(current_expansion);
 776                 /* it was the outermost expansion, parse pptoken normally */
 777                 if (current_expansion == NULL) {
 778                         return false;
 779                 }
 780                 goto restart;
 781         }
 782         const saved_token_t *saved = &current_expansion->token_list[pos++];
 783         pp_token = saved->token;
 784
 785         if (current_expansion->expand_pos > 0)
 786                 info.had_whitespace = saved->had_whitespace;
 787         pp_token.base.source_position = expansion_pos;
 788         ++current_expansion->expand_pos;
 789
 790         return true;
 791 }
 792
 793 /**
 794  * Returns the next token kind found when continuing the current expansions
 795  * without starting new sub-expansions.
 796  */
 797 static token_kind_t peek_expansion(void)
 798 {
 799         pp_definition_t *expansion = current_expansion;
 800         while (expansion != NULL && expansion->expand_pos >= expansion->list_len) {
 801                 expansion = expansion->parent_expansion;
 802         }
 803         if (expansion == NULL)
 804                 return T_EOF;
 805         return expansion->token_list[expansion->expand_pos].token.kind;
 806 }
 807
 808 static void skip_line_comment(void)
 809 {
 810         info.had_whitespace = true;
 811         while (true) {
 812                 switch (input.c) {
 813                 case EOF:
 814                         return;
 815
 816                 case '\r':
 817                 case '\n':
 818                         return;
 819
 820                 default:
 821                         next_char();
 822                         break;
 823                 }
 824         }
 825 }
 826
 827 static void skip_multiline_comment(void)
 828 {
 829         info.had_whitespace = true;
 830
 831         unsigned start_linenr = input.position.lineno;
 832         while (true) {
 833                 switch (input.c) {
 834                 case '/':
 835                         next_char();
 836                         if (input.c == '*') {
 837                                 /* TODO: nested comment, warn here */
 838                         }
 839                         break;
 840                 case '*':
 841                         next_char();
 842                         if (input.c == '/') {
 843                                 if (input.position.lineno != input.output_line)
 844                                         info.whitespace_at_line_begin = input.position.colno;
 845                                 next_char();
 846                                 return;
 847                         }
 848                         break;
 849
 850                 case NEWLINE:
 851                         break;
 852
 853                 case EOF: {
 854                         source_position_t source_position;
 855                         source_position.input_name = pp_token.base.source_position.input_name;
 856                         source_position.lineno     = start_linenr;
 857                         errorf(&source_position, "at end of file while looking for comment end");
 858                         return;
 859                 }
 860
 861                 default:
 862                         next_char();
 863                         break;
 864                 }
 865         }
 866 }
 867
 868 static bool skip_till_newline(bool stop_at_non_whitespace)
 869 {
 870         bool res = false;
 871         while (true) {
 872                 switch (input.c) {
 873                 case ' ':
 874                 case '\t':
 875                         next_char();
 876                         continue;
 877
 878                 case '/':
 879                         next_char();
 880                         if (input.c == '/') {
 881                                 next_char();
 882                                 skip_line_comment();
 883                                 continue;
 884                         } else if (input.c == '*') {
 885                                 next_char();
 886                                 skip_multiline_comment();
 887                                 continue;
 888                         } else {
 889                                 put_back(input.c);
 890                                 input.c = '/';
 891                         }
 892                         return true;
 893
 894                 case NEWLINE:
 895                         return res;
 896
 897                 default:
 898                         if (stop_at_non_whitespace)
 899                                 return false;
 900                         res = true;
 901                         next_char();
 902                         continue;
 903                 }
 904         }
 905 }
 906
 907 static void skip_whitespace(void)
 908 {
 909         while (true) {
 910                 switch (input.c) {
 911                 case ' ':
 912                 case '\t':
 913                         ++info.whitespace_at_line_begin;
 914                         info.had_whitespace = true;
 915                         next_char();
 916                         continue;
 917
 918                 case NEWLINE:
 919                         info.at_line_begin  = true;
 920                         info.had_whitespace = true;
 921                         info.whitespace_at_line_begin = 0;
 922                         continue;
 923
 924                 case '/':
 925                         next_char();
 926                         if (input.c == '/') {
 927                                 next_char();
 928                                 skip_line_comment();
 929                                 continue;
 930                         } else if (input.c == '*') {
 931                                 next_char();
 932                                 skip_multiline_comment();
 933                                 continue;
 934                         } else {
 935                                 put_back(input.c);
 936                                 input.c = '/';
 937                         }
 938                         return;
 939
 940                 default:
 941                         return;
 942                 }
 943         }
 944 }
 945
 946 static inline void eat_pp(pp_token_kind_t const kind)
 947 {
 948         assert(pp_token.base.symbol->pp_ID == kind);
 949         (void) kind;
 950         next_input_token();
 951 }
 952
 953 static inline void eat_token(token_kind_t const kind)
 954 {
 955         assert(pp_token.kind == kind);
 956         (void)kind;
 957         next_input_token();
 958 }
 959
 960 static void parse_symbol(void)
 961 {
 962         assert(obstack_object_size(&symbol_obstack) == 0);
 963         while (true) {
 964                 switch (input.c) {
 965                 case DIGIT_CASES:
 966                 case SYMBOL_CASES:
 967                         obstack_1grow(&symbol_obstack, (char) input.c);
 968                         next_char();
 969                         break;
 970
 971                 case '\\':
 972                         next_char();
 973                         switch (input.c) {
 974                         {
 975                                 unsigned n;
 976                         case 'U': n = 8; goto universal;
 977                         case 'u': n = 4; goto universal;
 978 universal:
 979                                 if (!resolve_escape_sequences) {
 980                                         obstack_1grow(&symbol_obstack, '\\');
 981                                         obstack_1grow(&symbol_obstack, input.c);
 982                                 }
 983                                 next_char();
 984                                 utf32 const v = parse_universal_char(n);
 985                                 if (!is_universal_char_valid_identifier(v)) {
 986                                         if (is_universal_char_valid(v)) {
 987                                                 errorf(&input.position,
 988                                                            "universal character \\%c%0*X is not valid in an identifier",
 989                                                            n == 4 ? 'u' : 'U', (int)n, v);
 990                                         }
 991                                 } else if (obstack_object_size(&symbol_obstack) == 0 && !is_universal_char_valid_identifier_start(v)) {
 992                                         errorf(&input.position,
 993                                                    "universal character \\%c%0*X is not valid as start of an identifier",
 994                                                    n == 4 ? 'u' : 'U', (int)n, v);
 995                                 } else if (resolve_escape_sequences) {
 996                                         obstack_grow_utf8(&symbol_obstack, v);
 997                                 }
 998                                 break;
 999                         }
1000
1001                         default:
1002                                 put_back(input.c);
1003                                 input.c = '\\';
1004                                 goto end_symbol;
1005                         }
1006
1007                 default:
1008                         goto end_symbol;
1009                 }
1010         }
1011
1012 end_symbol:
1013         obstack_1grow(&symbol_obstack, '\0');
1014         char *string = obstack_finish(&symbol_obstack);
1015
1016         /* might be a wide string or character constant ( L"string"/L'c' ) */
1017         if (input.c == '"' && string[0] == 'L' && string[1] == '\0') {
1018                 obstack_free(&symbol_obstack, string);
1019                 parse_string_literal(STRING_ENCODING_WIDE);
1020                 return;
1021         } else if (input.c == '\'' && string[0] == 'L' && string[1] == '\0') {
1022                 obstack_free(&symbol_obstack, string);
1023                 parse_character_constant(STRING_ENCODING_WIDE);
1024                 return;
1025         }
1026
1027         symbol_t *symbol = symbol_table_insert(string);
1028
1029         pp_token.kind        = symbol->ID;
1030         pp_token.base.symbol = symbol;
1031
1032         /* we can free the memory from symbol obstack if we already had an entry in
1033          * the symbol table */
1034         if (symbol->string != string) {
1035                 obstack_free(&symbol_obstack, string);
1036         }
1037 }
1038
1039 static void parse_number(void)
1040 {
1041         obstack_1grow(&symbol_obstack, (char) input.c);
1042         next_char();
1043
1044         while (true) {
1045                 switch (input.c) {
1046                 case '.':
1047                 case DIGIT_CASES:
1048                 case SYMBOL_CASES_WITHOUT_E_P:
1049                         obstack_1grow(&symbol_obstack, (char) input.c);
1050                         next_char();
1051                         break;
1052
1053                 case 'e':
1054                 case 'p':
1055                 case 'E':
1056                 case 'P':
1057                         obstack_1grow(&symbol_obstack, (char) input.c);
1058                         next_char();
1059                         if (input.c == '+' || input.c == '-') {
1060                                 obstack_1grow(&symbol_obstack, (char) input.c);
1061                                 next_char();
1062                         }
1063                         break;
1064
1065                 default:
1066                         goto end_number;
1067                 }
1068         }
1069
1070 end_number:
1071         pp_token.kind           = T_NUMBER;
1072         pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
1073 }
1074
1075 #define MAYBE_PROLOG \
1076         next_char(); \
1077         switch (input.c) {
1078
1079 #define MAYBE(ch, kind) \
1080         case ch: \
1081                 next_char(); \
1082                 set_punctuator(kind); \
1083                 return;
1084
1085 #define MAYBE_DIGRAPH(ch, kind, symbol) \
1086         case ch: \
1087                 next_char(); \
1088                 set_digraph(kind, symbol); \
1089                 return;
1090
1091 #define ELSE_CODE(code) \
1092         default: \
1093                 code \
1094                 return; \
1095         }
1096
1097 #define ELSE(kind) ELSE_CODE(set_punctuator(kind);)
1098
1099 /** identifies and returns the next preprocessing token contained in the
1100  * input stream. No macro expansion is performed. */
1101 static void next_input_token(void)
1102 {
1103         if (next_info.had_whitespace) {
1104                 info = next_info;
1105                 next_info.had_whitespace = false;
1106         } else {
1107                 info.at_line_begin  = false;
1108                 info.had_whitespace = false;
1109         }
1110 restart:
1111         pp_token.base.source_position = input.position;
1112         pp_token.base.symbol          = NULL;
1113
1114         switch (input.c) {
1115         case ' ':
1116         case '\t':
1117                 info.whitespace_at_line_begin++;
1118                 info.had_whitespace = true;
1119                 next_char();
1120                 goto restart;
1121
1122         case NEWLINE:
1123                 info.at_line_begin            = true;
1124                 info.had_whitespace           = true;
1125                 info.whitespace_at_line_begin = 0;
1126                 goto restart;
1127
1128         case SYMBOL_CASES:
1129                 parse_symbol();
1130                 return;
1131
1132         case DIGIT_CASES:
1133                 parse_number();
1134                 return;
1135
1136         case '"':
1137                 parse_string_literal(STRING_ENCODING_CHAR);
1138                 return;
1139
1140         case '\'':
1141                 parse_character_constant(STRING_ENCODING_CHAR);
1142                 return;
1143
1144         case '.':
1145                 MAYBE_PROLOG
1146                         case '0':
1147                         case '1':
1148                         case '2':
1149                         case '3':
1150                         case '4':
1151                         case '5':
1152                         case '6':
1153                         case '7':
1154                         case '8':
1155                         case '9':
1156                                 put_back(input.c);
1157                                 input.c = '.';
1158                                 parse_number();
1159                                 return;
1160
1161                         case '.':
1162                                 MAYBE_PROLOG
1163                                 MAYBE('.', T_DOTDOTDOT)
1164                                 ELSE_CODE(
1165                                         put_back(input.c);
1166                                         input.c = '.';
1167                                         set_punctuator('.');
1168                                 )
1169                 ELSE('.')
1170         case '&':
1171                 MAYBE_PROLOG
1172                 MAYBE('&', T_ANDAND)
1173                 MAYBE('=', T_ANDEQUAL)
1174                 ELSE('&')
1175         case '*':
1176                 MAYBE_PROLOG
1177                 MAYBE('=', T_ASTERISKEQUAL)
1178                 ELSE('*')
1179         case '+':
1180                 MAYBE_PROLOG
1181                 MAYBE('+', T_PLUSPLUS)
1182                 MAYBE('=', T_PLUSEQUAL)
1183                 ELSE('+')
1184         case '-':
1185                 MAYBE_PROLOG
1186                 MAYBE('>', T_MINUSGREATER)
1187                 MAYBE('-', T_MINUSMINUS)
1188                 MAYBE('=', T_MINUSEQUAL)
1189                 ELSE('-')
1190         case '!':
1191                 MAYBE_PROLOG
1192                 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
1193                 ELSE('!')
1194         case '/':
1195                 MAYBE_PROLOG
1196                 MAYBE('=', T_SLASHEQUAL)
1197                 case '*':
1198                         next_char();
1199                         skip_multiline_comment();
1200                         goto restart;
1201                 case '/':
1202                         next_char();
1203                         skip_line_comment();
1204                         goto restart;
1205                 ELSE('/')
1206         case '%':
1207                 MAYBE_PROLOG
1208                 MAYBE_DIGRAPH('>', '}', symbol_percentgreater)
1209                 MAYBE('=', T_PERCENTEQUAL)
1210                 case ':':
1211                         MAYBE_PROLOG
1212                         case '%':
1213                                 MAYBE_PROLOG
1214                                 MAYBE_DIGRAPH(':', T_HASHHASH, symbol_percentcolonpercentcolon)
1215                                 ELSE_CODE(
1216                                         put_back(input.c);
1217                                         input.c = '%';
1218                                         goto digraph_percentcolon;
1219                                 )
1220                         ELSE_CODE(
1221 digraph_percentcolon:
1222                                 set_digraph('#', symbol_percentcolon);
1223                         )
1224                 ELSE('%')
1225         case '<':
1226                 MAYBE_PROLOG
1227                 MAYBE_DIGRAPH(':', '[', symbol_lesscolon)
1228                 MAYBE_DIGRAPH('%', '{', symbol_lesspercent)
1229                 MAYBE('=', T_LESSEQUAL)
1230                 case '<':
1231                         MAYBE_PROLOG
1232                         MAYBE('=', T_LESSLESSEQUAL)
1233                         ELSE(T_LESSLESS)
1234                 ELSE('<')
1235         case '>':
1236                 MAYBE_PROLOG
1237                 MAYBE('=', T_GREATEREQUAL)
1238                 case '>':
1239                         MAYBE_PROLOG
1240                         MAYBE('=', T_GREATERGREATEREQUAL)
1241                         ELSE(T_GREATERGREATER)
1242                 ELSE('>')
1243         case '^':
1244                 MAYBE_PROLOG
1245                 MAYBE('=', T_CARETEQUAL)
1246                 ELSE('^')
1247         case '|':
1248                 MAYBE_PROLOG
1249                 MAYBE('=', T_PIPEEQUAL)
1250                 MAYBE('|', T_PIPEPIPE)
1251                 ELSE('|')
1252         case ':':
1253                 MAYBE_PROLOG
1254                 MAYBE_DIGRAPH('>', ']', symbol_colongreater)
1255                 case ':':
1256                         if (c_mode & _CXX) {
1257                                 next_char();
1258                                 set_punctuator(T_COLONCOLON);
1259                                 return;
1260                         }
1261                         /* FALLTHROUGH */
1262                 ELSE(':')
1263         case '=':
1264                 MAYBE_PROLOG
1265                 MAYBE('=', T_EQUALEQUAL)
1266                 ELSE('=')
1267         case '#':
1268                 MAYBE_PROLOG
1269                 MAYBE('#', T_HASHHASH)
1270                 ELSE('#')
1271
1272         case '?':
1273         case '[':
1274         case ']':
1275         case '(':
1276         case ')':
1277         case '{':
1278         case '}':
1279         case '~':
1280         case ';':
1281         case ',':
1282                 set_punctuator(input.c);
1283                 next_char();
1284                 return;
1285
1286         case EOF:
1287                 if (input_stack != NULL) {
1288                         close_input();
1289                         pop_restore_input();
1290                         fputc('\n', out);
1291                         if (input.c == (utf32)EOF)
1292                                 --input.position.lineno;
1293                         print_line_directive(&input.position, "2");
1294                         goto restart;
1295                 } else {
1296                         info.at_line_begin = true;
1297                         set_punctuator(T_EOF);
1298                 }
1299                 return;
1300
1301         case '\\':
1302                 next_char();
1303                 int next_c = input.c;
1304                 put_back(input.c);
1305                 input.c = '\\';
1306                 if (next_c == 'U' || next_c == 'u') {
1307                         parse_symbol();
1308                         return;
1309                 }
1310                 /* FALLTHROUGH */
1311         default:
1312                 if (error_on_unknown_chars) {
1313                         errorf(&pp_token.base.source_position,
1314                                "unknown character '%lc' found\n", input.c);
1315                         next_char();
1316                         goto restart;
1317                 } else {
1318                         assert(obstack_object_size(&symbol_obstack) == 0);
1319                         obstack_grow_utf8(&symbol_obstack, input.c);
1320                         obstack_1grow(&symbol_obstack, '\0');
1321                         char     *const string = obstack_finish(&symbol_obstack);
1322                         symbol_t *const symbol = symbol_table_insert(string);
1323                         if (symbol->string != string)
1324                                 obstack_free(&symbol_obstack, string);
1325
1326                         pp_token.kind        = T_UNKNOWN_CHAR;
1327                         pp_token.base.symbol = symbol;
1328                         next_char();
1329                         return;
1330                 }
1331         }
1332 }
1333
1334 static void print_quoted_string(const char *const string)
1335 {
1336         fputc('"', out);
1337         for (const char *c = string; *c != 0; ++c) {
1338                 switch (*c) {
1339                 case '"': fputs("\\\"", out); break;
1340                 case '\\':  fputs("\\\\", out); break;
1341                 case '\a':  fputs("\\a", out); break;
1342                 case '\b':  fputs("\\b", out); break;
1343                 case '\f':  fputs("\\f", out); break;
1344                 case '\n':  fputs("\\n", out); break;
1345                 case '\r':  fputs("\\r", out); break;
1346                 case '\t':  fputs("\\t", out); break;
1347                 case '\v':  fputs("\\v", out); break;
1348                 case '\?':  fputs("\\?", out); break;
1349                 default:
1350                         if (!isprint(*c)) {
1351                                 fprintf(out, "\\%03o", (unsigned)*c);
1352                                 break;
1353                         }
1354                         fputc(*c, out);
1355                         break;
1356                 }
1357         }
1358         fputc('"', out);
1359 }
1360
1361 static void print_line_directive(const source_position_t *pos, const char *add)
1362 {
1363         fprintf(out, "# %u ", pos->lineno);
1364         print_quoted_string(pos->input_name);
1365         if (add != NULL) {
1366                 fputc(' ', out);
1367                 fputs(add, out);
1368         }
1369
1370         printed_input_name = pos->input_name;
1371         input.output_line  = pos->lineno-1;
1372 }
1373
1374 static bool emit_newlines(void)
1375 {
1376         unsigned delta = pp_token.base.source_position.lineno - input.output_line;
1377         if (delta == 0)
1378                 return false;
1379
1380         if (delta >= 9) {
1381                 fputc('\n', out);
1382                 print_line_directive(&pp_token.base.source_position, NULL);
1383                 fputc('\n', out);
1384         } else {
1385                 for (unsigned i = 0; i < delta; ++i) {
1386                         fputc('\n', out);
1387                 }
1388         }
1389         input.output_line = pp_token.base.source_position.lineno;
1390
1391         for (unsigned i = 0; i < info.whitespace_at_line_begin; ++i)
1392                 fputc(' ', out);
1393
1394         return true;
1395 }
1396
1397 static void emit_pp_token(void)
1398 {
1399         if (!emit_newlines() &&
1400             (info.had_whitespace || tokens_would_paste(last_token, pp_token.kind)))
1401                 fputc(' ', out);
1402
1403         switch (pp_token.kind) {
1404         case T_NUMBER:
1405                 fputs(pp_token.literal.string.begin, out);
1406                 break;
1407
1408         case T_STRING_LITERAL:
1409                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1410                 fputc('"', out);
1411                 fputs(pp_token.literal.string.begin, out);
1412                 fputc('"', out);
1413                 break;
1414
1415         case T_CHARACTER_CONSTANT:
1416                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1417                 fputc('\'', out);
1418                 fputs(pp_token.literal.string.begin, out);
1419                 fputc('\'', out);
1420                 break;
1421
1422         case T_MACRO_PARAMETER:
1423                 panic("macro parameter not expanded");
1424
1425         default:
1426                 fputs(pp_token.base.symbol->string, out);
1427                 break;
1428         }
1429         last_token = pp_token.kind;
1430 }
1431
1432 static void eat_pp_directive(void)
1433 {
1434         while (!info.at_line_begin) {
1435                 next_input_token();
1436         }
1437 }
1438
1439 static bool strings_equal(const string_t *string1, const string_t *string2)
1440 {
1441         size_t size = string1->size;
1442         if (size != string2->size)
1443                 return false;
1444
1445         const char *c1 = string1->begin;
1446         const char *c2 = string2->begin;
1447         for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1448                 if (*c1 != *c2)
1449                         return false;
1450         }
1451         return true;
1452 }
1453
1454 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1455 {
1456         if (token1->kind != token2->kind)
1457                 return false;
1458
1459         switch (token1->kind) {
1460         case T_NUMBER:
1461         case T_CHARACTER_CONSTANT:
1462         case T_STRING_LITERAL:
1463                 return strings_equal(&token1->literal.string, &token2->literal.string);
1464
1465         case T_MACRO_PARAMETER:
1466                 return token1->macro_parameter.def->symbol
1467                     == token2->macro_parameter.def->symbol;
1468
1469         default:
1470                 return token1->base.symbol == token2->base.symbol;
1471         }
1472 }
1473
1474 static bool pp_definitions_equal(const pp_definition_t *definition1,
1475                                  const pp_definition_t *definition2)
1476 {
1477         if (definition1->list_len != definition2->list_len)
1478                 return false;
1479
1480         size_t               len = definition1->list_len;
1481         const saved_token_t *t1  = definition1->token_list;
1482         const saved_token_t *t2  = definition2->token_list;
1483         for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1484                 if (!pp_tokens_equal(&t1->token, &t2->token))
1485                         return false;
1486         }
1487         return true;
1488 }
1489
1490 static void parse_define_directive(void)
1491 {
1492         eat_pp(TP_define);
1493         if (skip_mode) {
1494                 eat_pp_directive();
1495                 return;
1496         }
1497
1498         assert(obstack_object_size(&pp_obstack) == 0);
1499
1500         if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
1501                 errorf(&pp_token.base.source_position,
1502                        "expected identifier after #define, got %K", &pp_token);
1503                 goto error_out;
1504         }
1505         symbol_t *const symbol = pp_token.base.symbol;
1506
1507         pp_definition_t *new_definition
1508                 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1509         memset(new_definition, 0, sizeof(new_definition[0]));
1510         new_definition->symbol          = symbol;
1511         new_definition->source_position = input.position;
1512
1513         /* this is probably the only place where spaces are significant in the
1514          * lexer (except for the fact that they separate tokens). #define b(x)
1515          * is something else than #define b (x) */
1516         if (input.c == '(') {
1517                 eat_token(T_IDENTIFIER);
1518                 eat_token('(');
1519
1520                 while (true) {
1521                         switch (pp_token.kind) {
1522                         case T_DOTDOTDOT:
1523                                 new_definition->is_variadic = true;
1524                                 eat_token(T_DOTDOTDOT);
1525                                 if (pp_token.kind != ')') {
1526                                         errorf(&input.position,
1527                                                         "'...' not at end of macro argument list");
1528                                         goto error_out;
1529                                 }
1530                                 break;
1531
1532                         case T_IDENTIFIER: {
1533                                 pp_definition_t parameter;
1534                                 memset(&parameter, 0, sizeof(parameter));
1535                                 parameter.source_position = pp_token.base.source_position;
1536                                 parameter.symbol          = pp_token.base.symbol;
1537                                 parameter.is_parameter    = true;
1538                                 obstack_grow(&pp_obstack, &parameter, sizeof(parameter));
1539                                 eat_token(T_IDENTIFIER);
1540
1541                                 if (pp_token.kind == ',') {
1542                                         eat_token(',');
1543                                         break;
1544                                 }
1545
1546                                 if (pp_token.kind != ')') {
1547                                         errorf(&pp_token.base.source_position,
1548                                                "expected ',' or ')' after identifier, got %K",
1549                                                &pp_token);
1550                                         goto error_out;
1551                                 }
1552                                 break;
1553                         }
1554
1555                         case ')':
1556                                 eat_token(')');
1557                                 goto finish_argument_list;
1558
1559                         default:
1560                                 errorf(&pp_token.base.source_position,
1561                                        "expected identifier, '...' or ')' in #define argument list, got %K",
1562                                        &pp_token);
1563                                 goto error_out;
1564                         }
1565                 }
1566
1567         finish_argument_list:
1568                 new_definition->has_parameters = true;
1569                 size_t size = obstack_object_size(&pp_obstack);
1570                 new_definition->n_parameters
1571                         = size / sizeof(new_definition->parameters[0]);
1572                 new_definition->parameters = obstack_finish(&pp_obstack);
1573                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1574                         pp_definition_t *param    = &new_definition->parameters[i];
1575                         symbol_t        *symbol   = param->symbol;
1576                         pp_definition_t *previous = symbol->pp_definition;
1577                         if (previous != NULL
1578                             && previous->function_definition == new_definition) {
1579                                 errorf(&param->source_position,
1580                                        "duplicate macro parameter '%Y'", symbol);
1581                                 param->symbol = sym_anonymous;
1582                                 continue;
1583                         }
1584                         param->parent_expansion    = previous;
1585                         param->function_definition = new_definition;
1586                         symbol->pp_definition      = param;
1587                 }
1588         } else {
1589                 eat_token(T_IDENTIFIER);
1590         }
1591
1592         /* construct token list */
1593         assert(obstack_object_size(&pp_obstack) == 0);
1594         while (!info.at_line_begin) {
1595                 if (pp_token.kind == T_IDENTIFIER) {
1596                         const symbol_t  *symbol     = pp_token.base.symbol;
1597                         pp_definition_t *definition = symbol->pp_definition;
1598                         if (definition != NULL
1599                             && definition->function_definition == new_definition) {
1600                             pp_token.kind                = T_MACRO_PARAMETER;
1601                             pp_token.macro_parameter.def = definition;
1602                         }
1603                 }
1604                 saved_token_t saved_token;
1605                 saved_token.token = pp_token;
1606                 saved_token.had_whitespace = info.had_whitespace;
1607                 obstack_grow(&pp_obstack, &saved_token, sizeof(saved_token));
1608                 next_input_token();
1609         }
1610
1611         new_definition->list_len   = obstack_object_size(&pp_obstack)
1612                 / sizeof(new_definition->token_list[0]);
1613         new_definition->token_list = obstack_finish(&pp_obstack);
1614
1615         if (new_definition->has_parameters) {
1616                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1617                         pp_definition_t *param      = &new_definition->parameters[i];
1618                         symbol_t        *symbol     = param->symbol;
1619                         if (symbol == sym_anonymous)
1620                                 continue;
1621                         assert(symbol->pp_definition == param);
1622                         assert(param->function_definition == new_definition);
1623                         symbol->pp_definition   = param->parent_expansion;
1624                         param->parent_expansion = NULL;
1625                 }
1626         }
1627
1628         pp_definition_t *old_definition = symbol->pp_definition;
1629         if (old_definition != NULL) {
1630                 if (!pp_definitions_equal(old_definition, new_definition)) {
1631                         warningf(WARN_OTHER, &input.position, "multiple definition of macro '%Y' (first defined %P)", symbol, &old_definition->source_position);
1632                 } else {
1633                         /* reuse the old definition */
1634                         obstack_free(&pp_obstack, new_definition);
1635                         new_definition = old_definition;
1636                 }
1637         }
1638
1639         symbol->pp_definition = new_definition;
1640         return;
1641
1642 error_out:
1643         if (obstack_object_size(&pp_obstack) > 0) {
1644                 char *ptr = obstack_finish(&pp_obstack);
1645                 obstack_free(&pp_obstack, ptr);
1646         }
1647         eat_pp_directive();
1648 }
1649
1650 static void parse_undef_directive(void)
1651 {
1652         eat_pp(TP_undef);
1653         if (skip_mode) {
1654                 eat_pp_directive();
1655                 return;
1656         }
1657
1658         if (pp_token.kind != T_IDENTIFIER) {
1659                 errorf(&input.position,
1660                        "expected identifier after #undef, got %K", &pp_token);
1661                 eat_pp_directive();
1662                 return;
1663         }
1664
1665         pp_token.base.symbol->pp_definition = NULL;
1666         eat_token(T_IDENTIFIER);
1667
1668         if (!info.at_line_begin) {
1669                 warningf(WARN_OTHER, &input.position, "extra tokens at end of #undef directive");
1670         }
1671         eat_pp_directive();
1672 }
1673
1674 /** behind an #include we can have the special headername lexems.
1675  * They're only allowed behind an #include so they're not recognized
1676  * by the normal next_preprocessing_token. We handle them as a special
1677  * exception here */
1678 static void parse_headername(void)
1679 {
1680         const source_position_t start_position = input.position;
1681         string_t                string         = { NULL, 0, STRING_ENCODING_CHAR };
1682         assert(obstack_object_size(&symbol_obstack) == 0);
1683
1684         if (info.at_line_begin) {
1685                 parse_error("expected headername after #include");
1686                 goto finish_error;
1687         }
1688
1689         /* check wether we have a "... or <... headername */
1690         switch (input.c) {
1691         {
1692                 utf32 delimiter;
1693         case '<': delimiter = '>'; goto parse_name;
1694         case '"': delimiter = '"'; goto parse_name;
1695 parse_name:
1696                 next_char();
1697                 while (true) {
1698                         switch (input.c) {
1699                         case NEWLINE:
1700                         case EOF:
1701                                 errorf(&pp_token.base.source_position, "header name without closing '%c'", (char)delimiter);
1702                                 goto finish_error;
1703
1704                         default:
1705                                 if (input.c == delimiter) {
1706                                         next_char();
1707                                         goto finished_headername;
1708                                 } else {
1709                                         obstack_1grow(&symbol_obstack, (char)input.c);
1710                                         next_char();
1711                                 }
1712                                 break;
1713                         }
1714                 }
1715                 /* we should never be here */
1716         }
1717
1718         default:
1719                 /* TODO: do normal pp_token parsing and concatenate results */
1720                 panic("pp_token concat include not implemented yet");
1721         }
1722
1723 finished_headername:
1724         string = sym_make_string(STRING_ENCODING_CHAR);
1725
1726 finish_error:
1727         pp_token.base.source_position = start_position;
1728         pp_token.kind                 = T_HEADERNAME;
1729         pp_token.literal.string       = string;
1730 }
1731
1732 static bool do_include(bool system_include, const char *headername)
1733 {
1734         size_t headername_len = strlen(headername);
1735         if (!system_include) {
1736                 /* put dirname of current input on obstack */
1737                 const char *filename   = input.position.input_name;
1738                 const char *last_slash = strrchr(filename, '/');
1739                 if (last_slash != NULL) {
1740                         size_t len = last_slash - filename;
1741                         obstack_grow(&symbol_obstack, filename, len + 1);
1742                         obstack_grow0(&symbol_obstack, headername, headername_len);
1743                         char *complete_path = obstack_finish(&symbol_obstack);
1744                         headername = identify_string(complete_path);
1745                 }
1746
1747                 FILE *file = fopen(headername, "r");
1748                 if (file != NULL) {
1749                         switch_input(file, headername);
1750                         return true;
1751                 }
1752         }
1753
1754         assert(obstack_object_size(&symbol_obstack) == 0);
1755         /* check searchpath */
1756         for (searchpath_entry_t *entry = searchpath; entry != NULL;
1757              entry = entry->next) {
1758             const char *path = entry->path;
1759             size_t      len  = strlen(path);
1760                 obstack_grow(&symbol_obstack, path, len);
1761                 if (path[len-1] != '/')
1762                         obstack_1grow(&symbol_obstack, '/');
1763                 obstack_grow(&symbol_obstack, headername, headername_len+1);
1764
1765                 char *complete_path = obstack_finish(&symbol_obstack);
1766                 FILE *file          = fopen(complete_path, "r");
1767                 if (file != NULL) {
1768                         const char *filename = identify_string(complete_path);
1769                         switch_input(file, filename);
1770                         return true;
1771                 } else {
1772                         obstack_free(&symbol_obstack, complete_path);
1773                 }
1774         }
1775
1776         return false;
1777 }
1778
1779 static void parse_include_directive(void)
1780 {
1781         if (skip_mode) {
1782                 eat_pp_directive();
1783                 return;
1784         }
1785
1786         /* don't eat the TP_include here!
1787          * we need an alternative parsing for the next token */
1788         skip_till_newline(true);
1789         bool system_include = input.c == '<';
1790         parse_headername();
1791         string_t headername = pp_token.literal.string;
1792         if (headername.begin == NULL) {
1793                 eat_pp_directive();
1794                 return;
1795         }
1796
1797         bool had_nonwhitespace = skip_till_newline(false);
1798         if (had_nonwhitespace) {
1799                 warningf(WARN_OTHER, &pp_token.base.source_position,
1800                          "extra tokens at end of #include directive");
1801         }
1802
1803         if (n_inputs > INCLUDE_LIMIT) {
1804                 errorf(&pp_token.base.source_position, "#include nested too deeply");
1805                 /* eat \n or EOF */
1806                 next_input_token();
1807                 return;
1808         }
1809
1810         /* switch inputs */
1811         info.whitespace_at_line_begin = 0;
1812         info.had_whitespace           = false;
1813         info.at_line_begin            = true;
1814         emit_newlines();
1815         push_input();
1816         bool res = do_include(system_include, pp_token.literal.string.begin);
1817         if (res) {
1818                 next_input_token();
1819         } else {
1820                 errorf(&pp_token.base.source_position, "failed including '%S': %s", &pp_token.literal.string, strerror(errno));
1821                 pop_restore_input();
1822         }
1823 }
1824
1825 static pp_conditional_t *push_conditional(void)
1826 {
1827         pp_conditional_t *conditional
1828                 = obstack_alloc(&pp_obstack, sizeof(*conditional));
1829         memset(conditional, 0, sizeof(*conditional));
1830
1831         conditional->parent = conditional_stack;
1832         conditional_stack   = conditional;
1833
1834         return conditional;
1835 }
1836
1837 static void pop_conditional(void)
1838 {
1839         assert(conditional_stack != NULL);
1840         conditional_stack = conditional_stack->parent;
1841 }
1842
1843 static void check_unclosed_conditionals(void)
1844 {
1845         while (conditional_stack != NULL) {
1846                 pp_conditional_t *conditional = conditional_stack;
1847
1848                 if (conditional->in_else) {
1849                         errorf(&conditional->source_position, "unterminated #else");
1850                 } else {
1851                         errorf(&conditional->source_position, "unterminated condition");
1852                 }
1853                 pop_conditional();
1854         }
1855 }
1856
1857 static void parse_ifdef_ifndef_directive(bool const is_ifdef)
1858 {
1859         bool condition;
1860         eat_pp(is_ifdef ? TP_ifdef : TP_ifndef);
1861
1862         if (skip_mode) {
1863                 eat_pp_directive();
1864                 pp_conditional_t *conditional = push_conditional();
1865                 conditional->source_position  = pp_token.base.source_position;
1866                 conditional->skip             = true;
1867                 return;
1868         }
1869
1870         if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
1871                 errorf(&pp_token.base.source_position,
1872                        "expected identifier after #%s, got %K",
1873                        is_ifdef ? "ifdef" : "ifndef", &pp_token);
1874                 eat_pp_directive();
1875
1876                 /* just take the true case in the hope to avoid further errors */
1877                 condition = true;
1878         } else {
1879                 /* evaluate wether we are in true or false case */
1880                 condition = (bool)pp_token.base.symbol->pp_definition == is_ifdef;
1881                 eat_token(T_IDENTIFIER);
1882
1883                 if (!info.at_line_begin) {
1884                         errorf(&pp_token.base.source_position,
1885                                "extra tokens at end of #%s",
1886                                is_ifdef ? "ifdef" : "ifndef");
1887                         eat_pp_directive();
1888                 }
1889         }
1890
1891         pp_conditional_t *conditional = push_conditional();
1892         conditional->source_position  = pp_token.base.source_position;
1893         conditional->condition        = condition;
1894
1895         if (!condition) {
1896                 skip_mode = true;
1897         }
1898 }
1899
1900 static void parse_else_directive(void)
1901 {
1902         eat_pp(TP_else);
1903
1904         if (!info.at_line_begin) {
1905                 if (!skip_mode) {
1906                         warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #else");
1907                 }
1908                 eat_pp_directive();
1909         }
1910
1911         pp_conditional_t *conditional = conditional_stack;
1912         if (conditional == NULL) {
1913                 errorf(&pp_token.base.source_position, "#else without prior #if");
1914                 return;
1915         }
1916
1917         if (conditional->in_else) {
1918                 errorf(&pp_token.base.source_position,
1919                        "#else after #else (condition started %P)",
1920                        &conditional->source_position);
1921                 skip_mode = true;
1922                 return;
1923         }
1924
1925         conditional->in_else = true;
1926         if (!conditional->skip) {
1927                 skip_mode = conditional->condition;
1928         }
1929         conditional->source_position = pp_token.base.source_position;
1930 }
1931
1932 static void parse_endif_directive(void)
1933 {
1934         eat_pp(TP_endif);
1935
1936         if (!info.at_line_begin) {
1937                 if (!skip_mode) {
1938                         warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #endif");
1939                 }
1940                 eat_pp_directive();
1941         }
1942
1943         pp_conditional_t *conditional = conditional_stack;
1944         if (conditional == NULL) {
1945                 errorf(&pp_token.base.source_position, "#endif without prior #if");
1946                 return;
1947         }
1948
1949         if (!conditional->skip) {
1950                 skip_mode = false;
1951         }
1952         pop_conditional();
1953 }
1954
1955 typedef enum stdc_pragma_kind_t {
1956         STDC_UNKNOWN,
1957         STDC_FP_CONTRACT,
1958         STDC_FENV_ACCESS,
1959         STDC_CX_LIMITED_RANGE
1960 } stdc_pragma_kind_t;
1961
1962 typedef enum stdc_pragma_value_kind_t {
1963         STDC_VALUE_UNKNOWN,
1964         STDC_VALUE_ON,
1965         STDC_VALUE_OFF,
1966         STDC_VALUE_DEFAULT
1967 } stdc_pragma_value_kind_t;
1968
1969 static void parse_pragma_directive(void)
1970 {
1971         eat_pp(TP_pragma);
1972
1973         if (pp_token.kind != T_IDENTIFIER) {
1974                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
1975                          "expected identifier after #pragma");
1976                 eat_pp_directive();
1977                 return;
1978         }
1979
1980         stdc_pragma_kind_t kind = STDC_UNKNOWN;
1981         if (pp_token.base.symbol->pp_ID == TP_STDC && c_mode & _C99) {
1982                 /* a STDC pragma */
1983                 next_input_token();
1984
1985                 switch (pp_token.base.symbol->pp_ID) {
1986                 case TP_FP_CONTRACT:      kind = STDC_FP_CONTRACT;      break;
1987                 case TP_FENV_ACCESS:      kind = STDC_FENV_ACCESS;      break;
1988                 case TP_CX_LIMITED_RANGE: kind = STDC_CX_LIMITED_RANGE; break;
1989                 default:                  break;
1990                 }
1991                 if (kind != STDC_UNKNOWN) {
1992                         next_input_token();
1993                         stdc_pragma_value_kind_t value;
1994                         switch (pp_token.base.symbol->pp_ID) {
1995                         case TP_ON:      value = STDC_VALUE_ON;      break;
1996                         case TP_OFF:     value = STDC_VALUE_OFF;     break;
1997                         case TP_DEFAULT: value = STDC_VALUE_DEFAULT; break;
1998                         default:         value = STDC_VALUE_UNKNOWN; break;
1999                         }
2000                         if (value == STDC_VALUE_UNKNOWN) {
2001                                 kind = STDC_UNKNOWN;
2002                                 errorf(&pp_token.base.source_position, "bad STDC pragma argument");
2003                         }
2004                 }
2005         }
2006         eat_pp_directive();
2007         if (kind == STDC_UNKNOWN) {
2008                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2009                          "encountered unknown #pragma");
2010         }
2011 }
2012
2013 static void parse_line_directive(void)
2014 {
2015         if (pp_token.kind != T_NUMBER) {
2016                 if (!skip_mode)
2017                         parse_error("expected integer");
2018         } else {
2019                 char      *end;
2020                 long const line = strtol(pp_token.literal.string.begin, &end, 0);
2021                 if (*end == '\0') {
2022                         /* use offset -1 as this is about the next line */
2023                         input.position.lineno = line - 1;
2024                         /* force output of line */
2025                         input.output_line = input.position.lineno - 20;
2026                 } else {
2027                         if (!skip_mode) {
2028                                 errorf(&input.position, "'%S' is not a valid line number",
2029                                            &pp_token.literal.string);
2030                         }
2031                 }
2032                 next_input_token();
2033         }
2034         if (pp_token.kind == T_STRING_LITERAL
2035             && pp_token.literal.string.encoding == STRING_ENCODING_CHAR) {
2036                 input.position.input_name       = pp_token.literal.string.begin;
2037                 input.position.is_system_header = false;
2038                 next_input_token();
2039
2040                 /* attempt to parse numeric flags as outputted by gcc preprocessor */
2041                 while (pp_token.kind == T_NUMBER) {
2042                         /* flags:
2043                          * 1 - indicates start of a new file
2044                          * 2 - indicates return from a file
2045                          * 3 - indicates system header
2046                          * 4 - indicates implicit extern "C" in C++ mode
2047                          *
2048                          * currently we're only interested in "3"
2049                          */
2050                         if (streq(pp_token.literal.string.begin, "3")) {
2051                                 input.position.is_system_header = true;
2052                         }
2053                         next_input_token();
2054                 }
2055         }
2056
2057         eat_pp_directive();
2058 }
2059
2060 static void parse_preprocessing_directive(void)
2061 {
2062         eat_token('#');
2063
2064         if (info.at_line_begin) {
2065                 /* empty directive */
2066                 return;
2067         }
2068
2069         if (pp_token.base.symbol) {
2070                 switch (pp_token.base.symbol->pp_ID) {
2071                 case TP_define:  parse_define_directive();            break;
2072                 case TP_else:    parse_else_directive();              break;
2073                 case TP_endif:   parse_endif_directive();             break;
2074                 case TP_ifdef:   parse_ifdef_ifndef_directive(true);  break;
2075                 case TP_ifndef:  parse_ifdef_ifndef_directive(false); break;
2076                 case TP_include: parse_include_directive();           break;
2077                 case TP_line:    next_input_token(); goto line_directive;
2078                 case TP_pragma:  parse_pragma_directive();            break;
2079                 case TP_undef:   parse_undef_directive();             break;
2080                 default:         goto skip;
2081                 }
2082         } else if (pp_token.kind == T_NUMBER) {
2083 line_directive:
2084                 parse_line_directive();
2085         } else {
2086 skip:
2087                 if (!skip_mode) {
2088                         errorf(&pp_token.base.source_position, "invalid preprocessing directive #%K", &pp_token);
2089                 }
2090                 eat_pp_directive();
2091         }
2092
2093         assert(info.at_line_begin);
2094 }
2095
2096 static void finish_current_argument(void)
2097 {
2098         if (current_argument == NULL)
2099                 return;
2100         size_t size = obstack_object_size(&pp_obstack);
2101         current_argument->list_len   = size/sizeof(current_argument->token_list[0]);
2102         current_argument->token_list = obstack_finish(&pp_obstack);
2103 }
2104
2105 static void next_preprocessing_token(void)
2106 {
2107 restart:
2108         if (!expand_next()) {
2109                 do {
2110                         next_input_token();
2111                         while (pp_token.kind == '#' && info.at_line_begin) {
2112                                 parse_preprocessing_directive();
2113                         }
2114                 } while (skip_mode && pp_token.kind != T_EOF);
2115         }
2116
2117         const token_kind_t kind = pp_token.kind;
2118         if (current_call == NULL || argument_expanding != NULL) {
2119                 if (kind == T_IDENTIFIER) {
2120                         symbol_t        *const symbol        = pp_token.base.symbol;
2121                         pp_definition_t *const pp_definition = symbol->pp_definition;
2122                         if (pp_definition != NULL && !pp_definition->is_expanding) {
2123                                 if (pp_definition->has_parameters) {
2124
2125                                         /* check if next token is a '(' */
2126                                         whitespace_info_t old_info   = info;
2127                                         token_kind_t      next_token = peek_expansion();
2128                                         if (next_token == T_EOF) {
2129                                                 info.at_line_begin  = false;
2130                                                 info.had_whitespace = false;
2131                                                 skip_whitespace();
2132                                                 if (input.c == '(') {
2133                                                         next_token = '(';
2134                                                 }
2135                                         }
2136
2137                                         if (next_token == '(') {
2138                                                 if (current_expansion == NULL)
2139                                                         expansion_pos = pp_token.base.source_position;
2140                                                 next_preprocessing_token();
2141                                                 assert(pp_token.kind == '(');
2142
2143                                                 pp_definition->parent_expansion = current_expansion;
2144                                                 current_call              = pp_definition;
2145                                                 current_call->expand_pos  = 0;
2146                                                 current_call->expand_info = old_info;
2147                                                 if (current_call->n_parameters > 0) {
2148                                                         current_argument = &current_call->parameters[0];
2149                                                         assert(argument_brace_count == 0);
2150                                                 }
2151                                                 goto restart;
2152                                         } else {
2153                                                 /* skip_whitespaces() skipped newlines and whitespace,
2154                                                  * remember results for next token */
2155                                                 next_info = info;
2156                                                 info      = old_info;
2157                                                 return;
2158                                         }
2159                                 } else {
2160                                         if (current_expansion == NULL)
2161                                                 expansion_pos = pp_token.base.source_position;
2162                                         start_expanding(pp_definition);
2163                                         goto restart;
2164                                 }
2165                         }
2166                 } else if (kind == T_MACRO_PARAMETER) {
2167                         assert(current_expansion != NULL);
2168                         start_expanding(pp_token.macro_parameter.def);
2169                         goto restart;
2170                 }
2171         }
2172
2173         if (current_call != NULL) {
2174                 /* current_call != NULL */
2175                 if (kind == '(') {
2176                         ++argument_brace_count;
2177                 } else if (kind == ')') {
2178                         if (argument_brace_count > 0) {
2179                                 --argument_brace_count;
2180                         } else {
2181                                 finish_current_argument();
2182                                 assert(kind == ')');
2183                                 start_expanding(current_call);
2184                                 info = current_call->expand_info;
2185                                 current_call     = NULL;
2186                                 current_argument = NULL;
2187                                 goto restart;
2188                         }
2189                 } else if (kind == ',' && argument_brace_count == 0) {
2190                         finish_current_argument();
2191                         current_call->expand_pos++;
2192                         if (current_call->expand_pos >= current_call->n_parameters) {
2193                                 errorf(&pp_token.base.source_position,
2194                                            "too many arguments passed for macro '%Y'",
2195                                            current_call->symbol);
2196                                 current_argument = NULL;
2197                         } else {
2198                                 current_argument
2199                                         = &current_call->parameters[current_call->expand_pos];
2200                         }
2201                         goto restart;
2202                 } else if (kind == T_MACRO_PARAMETER) {
2203                         /* parameters have to be fully expanded before being used as
2204                          * parameters for another macro-call */
2205                         assert(current_expansion != NULL);
2206                         pp_definition_t *argument = pp_token.macro_parameter.def;
2207                         argument_expanding = argument;
2208                         start_expanding(argument);
2209                         goto restart;
2210                 } else if (kind == T_EOF) {
2211                         errorf(&expansion_pos,
2212                                "reached end of file while parsing arguments for '%Y'",
2213                                current_call->symbol);
2214                         return;
2215                 }
2216                 if (current_argument != NULL) {
2217                         saved_token_t saved;
2218                         saved.token = pp_token;
2219                         saved.had_whitespace = info.had_whitespace;
2220                         obstack_grow(&pp_obstack, &saved, sizeof(saved));
2221                 }
2222                 goto restart;
2223         }
2224 }
2225
2226
2227 static void prepend_include_path(const char *path)
2228 {
2229         searchpath_entry_t *entry = OALLOCZ(&config_obstack, searchpath_entry_t);
2230         entry->path = path;
2231         entry->next = searchpath;
2232         searchpath  = entry;
2233 }
2234
2235 static void setup_include_path(void)
2236 {
2237         /* built-in paths */
2238         prepend_include_path("/usr/include");
2239
2240         /* parse environment variable */
2241         const char *cpath = getenv("CPATH");
2242         if (cpath != NULL && *cpath != '\0') {
2243                 const char *begin = cpath;
2244                 const char *c;
2245                 do {
2246                         c = begin;
2247                         while (*c != '\0' && *c != ':')
2248                                 ++c;
2249
2250                         size_t len = c-begin;
2251                         if (len == 0) {
2252                                 /* for gcc compatibility (Matze: I would expect that
2253                                  * nothing happens for an empty entry...) */
2254                                 prepend_include_path(".");
2255                         } else {
2256                                 char *string = obstack_alloc(&config_obstack, len+1);
2257                                 memcpy(string, begin, len);
2258                                 string[len] = '\0';
2259
2260                                 prepend_include_path(string);
2261                         }
2262
2263                         begin = c+1;
2264                         /* skip : */
2265                         if (*begin == ':')
2266                                 ++begin;
2267                 } while(*c != '\0');
2268         }
2269 }
2270
2271 int pptest_main(int argc, char **argv);
2272 int pptest_main(int argc, char **argv)
2273 {
2274         init_symbol_table();
2275         init_tokens();
2276         init_symbols();
2277
2278         obstack_init(&config_obstack);
2279         obstack_init(&pp_obstack);
2280         obstack_init(&input_obstack);
2281         strset_init(&stringset);
2282
2283         error_on_unknown_chars = false;
2284
2285         setup_include_path();
2286
2287         /* simplistic commandline parser */
2288         const char *filename = NULL;
2289         const char *output = NULL;
2290         for (int i = 1; i < argc; ++i) {
2291                 const char *opt = argv[i];
2292                 if (streq(opt, "-I")) {
2293                         prepend_include_path(argv[++i]);
2294                         continue;
2295                 } else if (streq(opt, "-E")) {
2296                         /* ignore */
2297                 } else if (streq(opt, "-o")) {
2298                         output = argv[++i];
2299                         continue;
2300                 } else if (opt[0] == '-') {
2301                         fprintf(stderr, "Unknown option '%s'\n", opt);
2302                 } else {
2303                         if (filename != NULL)
2304                                 fprintf(stderr, "Multiple inputs not supported\n");
2305                         filename = argv[i];
2306                 }
2307         }
2308         if (filename == NULL) {
2309                 fprintf(stderr, "No input specified\n");
2310                 return 1;
2311         }
2312
2313         if (output == NULL) {
2314                 out = stdout;
2315         } else {
2316                 out = fopen(output, "w");
2317                 if (out == NULL) {
2318                         fprintf(stderr, "Couldn't open output '%s'\n", output);
2319                         return 1;
2320                 }
2321         }
2322
2323         /* just here for gcc compatibility */
2324         fprintf(out, "# 1 \"%s\"\n", filename);
2325         fprintf(out, "# 1 \"<built-in>\"\n");
2326         fprintf(out, "# 1 \"<command-line>\"\n");
2327
2328         FILE *file = fopen(filename, "r");
2329         if (file == NULL) {
2330                 fprintf(stderr, "Couldn't open input '%s'\n", filename);
2331                 return 1;
2332         }
2333         switch_input(file, filename);
2334
2335         for (;;) {
2336                 next_preprocessing_token();
2337                 if (pp_token.kind == T_EOF)
2338                         break;
2339                 emit_pp_token();
2340         }
2341
2342         fputc('\n', out);
2343         check_unclosed_conditionals();
2344         close_input();
2345         if (out != stdout)
2346                 fclose(out);
2347
2348         obstack_free(&input_obstack, NULL);
2349         obstack_free(&pp_obstack, NULL);
2350         obstack_free(&config_obstack, NULL);
2351
2352         strset_destroy(&stringset);
2353
2354         exit_tokens();
2355         exit_symbol_table();
2356
2357         return 0;
2358 }