nsz Git - cparser/blob - preprocessor.c

   1 #include <config.h>
   2
   3 #include <assert.h>
   4 #include <errno.h>
   5 #include <string.h>
   6 #include <stdbool.h>
   7 #include <ctype.h>
   8
   9 #include "preprocessor.h"
  10 #include "token_t.h"
  11 #include "symbol_t.h"
  12 #include "adt/util.h"
  13 #include "adt/error.h"
  14 #include "adt/strutil.h"
  15 #include "adt/strset.h"
  16 #include "lang_features.h"
  17 #include "diagnostic.h"
  18 #include "string_rep.h"
  19 #include "input.h"
  20
  21 #define MAX_PUTBACK 3
  22 #define INCLUDE_LIMIT 199  /* 199 is for gcc "compatibility" */
  23
  24 typedef struct saved_token_t {
  25         token_t token;
  26         bool    had_whitespace;
  27 } saved_token_t;
  28
  29 typedef struct whitespace_info_t {
  30         /** current token had whitespace in front of it */
  31         bool     had_whitespace;
  32         /** current token is at the beginning of a line.
  33          * => a "#" at line begin starts a preprocessing directive. */
  34         bool     at_line_begin;
  35         /** number of spaces before the first token in a line */
  36         unsigned whitespace_at_line_begin;
  37 } whitespace_info_t;
  38
  39 struct pp_definition_t {
  40         symbol_t          *symbol;
  41         source_position_t  source_position;
  42         pp_definition_t   *parent_expansion;
  43         size_t             expand_pos;
  44         whitespace_info_t  expand_info;
  45         bool               is_variadic    : 1;
  46         bool               is_expanding   : 1;
  47         bool               has_parameters : 1;
  48         bool               is_parameter   : 1;
  49         pp_definition_t   *function_definition;
  50         size_t             n_parameters;
  51         pp_definition_t   *parameters;
  52
  53         /* replacement */
  54         size_t             list_len;
  55         saved_token_t     *token_list;
  56 };
  57
  58 typedef struct pp_conditional_t pp_conditional_t;
  59 struct pp_conditional_t {
  60         source_position_t  source_position;
  61         bool               condition;
  62         bool               in_else;
  63         /** conditional in skip mode (then+else gets skipped) */
  64         bool               skip;
  65         pp_conditional_t  *parent;
  66 };
  67
  68 typedef struct pp_input_t pp_input_t;
  69 struct pp_input_t {
  70         FILE               *file;
  71         input_t            *input;
  72         utf32               c;
  73         utf32               buf[1024+MAX_PUTBACK];
  74         const utf32        *bufend;
  75         const utf32        *bufpos;
  76         source_position_t   position;
  77         pp_input_t         *parent;
  78         unsigned            output_line;
  79         searchpath_entry_t *path;
  80 };
  81
  82 struct searchpath_entry_t {
  83         const char         *path;
  84         searchpath_entry_t *next;
  85 };
  86
  87 static pp_input_t      input;
  88
  89 static pp_input_t     *input_stack;
  90 static unsigned        n_inputs;
  91 static struct obstack  input_obstack;
  92
  93 static pp_conditional_t *conditional_stack;
  94
  95 token_t                  pp_token;
  96 bool                     allow_dollar_in_symbol   = true;
  97 static bool              resolve_escape_sequences = true;
  98 static bool              error_on_unknown_chars   = true;
  99 static bool              skip_mode;
 100 static FILE             *out;
 101 static struct obstack    pp_obstack;
 102 static struct obstack    config_obstack;
 103 static const char       *printed_input_name = NULL;
 104 static source_position_t expansion_pos;
 105 static pp_definition_t  *current_expansion  = NULL;
 106 static pp_definition_t  *current_call       = NULL;
 107 static pp_definition_t  *current_argument   = NULL;
 108 static pp_definition_t  *argument_expanding = NULL;
 109 static unsigned          argument_brace_count;
 110 static strset_t          stringset;
 111 static token_kind_t      last_token;
 112
 113 static searchpath_entry_t *searchpath;
 114
 115 static whitespace_info_t next_info; /* valid if had_whitespace is true */
 116 static whitespace_info_t info;
 117
 118 static inline void next_char(void);
 119 static void next_input_token(void);
 120 static void print_line_directive(const source_position_t *pos, const char *add);
 121
 122 static symbol_t *symbol_colongreater;
 123 static symbol_t *symbol_lesscolon;
 124 static symbol_t *symbol_lesspercent;
 125 static symbol_t *symbol_percentcolon;
 126 static symbol_t *symbol_percentcolonpercentcolon;
 127 static symbol_t *symbol_percentgreater;
 128
 129 static void init_symbols(void)
 130 {
 131         symbol_colongreater             = symbol_table_insert(":>");
 132         symbol_lesscolon                = symbol_table_insert("<:");
 133         symbol_lesspercent              = symbol_table_insert("<%");
 134         symbol_percentcolon             = symbol_table_insert("%:");
 135         symbol_percentcolonpercentcolon = symbol_table_insert("%:%:");
 136         symbol_percentgreater           = symbol_table_insert("%>");
 137 }
 138
 139 void switch_pp_input(FILE *const file, char const *const filename, searchpath_entry_t *const path)
 140 {
 141         input.file                = file;
 142         input.input               = input_from_stream(file, NULL);
 143         input.bufend              = NULL;
 144         input.bufpos              = NULL;
 145         input.output_line         = 0;
 146         input.position.input_name = filename;
 147         input.position.lineno     = 1;
 148         input.path                = path;
 149
 150         /* indicate that we're at a new input */
 151         print_line_directive(&input.position, input_stack != NULL ? "1" : NULL);
 152
 153         /* place a virtual '\n' so we realize we're at line begin */
 154         input.position.lineno = 0;
 155         input.c               = '\n';
 156 }
 157
 158 FILE *close_pp_input(void)
 159 {
 160         input_free(input.input);
 161
 162         FILE* const file = input.file;
 163         assert(file);
 164
 165         input.input  = NULL;
 166         input.file   = NULL;
 167         input.bufend = NULL;
 168         input.bufpos = NULL;
 169         input.c      = EOF;
 170
 171         return file;
 172 }
 173
 174 static void push_input(void)
 175 {
 176         pp_input_t *const saved_input = obstack_copy(&input_obstack, &input, sizeof(input));
 177
 178         /* adjust buffer positions */
 179         if (input.bufpos != NULL)
 180                 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
 181         if (input.bufend != NULL)
 182                 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
 183
 184         saved_input->parent = input_stack;
 185         input_stack         = saved_input;
 186         ++n_inputs;
 187 }
 188
 189 static void pop_restore_input(void)
 190 {
 191         assert(n_inputs > 0);
 192         assert(input_stack != NULL);
 193
 194         pp_input_t *saved_input = input_stack;
 195
 196         memcpy(&input, saved_input, sizeof(input));
 197         input.parent = NULL;
 198
 199         /* adjust buffer positions */
 200         if (saved_input->bufpos != NULL)
 201                 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
 202         if (saved_input->bufend != NULL)
 203                 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
 204
 205         input_stack = saved_input->parent;
 206         obstack_free(&input_obstack, saved_input);
 207         --n_inputs;
 208 }
 209
 210 /**
 211  * Prints a parse error message at the current token.
 212  *
 213  * @param msg   the error message
 214  */
 215 static void parse_error(const char *msg)
 216 {
 217         errorf(&pp_token.base.source_position,  "%s", msg);
 218 }
 219
 220 static inline void next_real_char(void)
 221 {
 222         assert(input.bufpos <= input.bufend);
 223         if (input.bufpos >= input.bufend) {
 224                 size_t const n = decode(input.input, input.buf + MAX_PUTBACK, lengthof(input.buf) - MAX_PUTBACK);
 225                 if (n == 0) {
 226                         input.c = EOF;
 227                         return;
 228                 }
 229                 input.bufpos = input.buf + MAX_PUTBACK;
 230                 input.bufend = input.bufpos + n;
 231         }
 232         input.c = *input.bufpos++;
 233         ++input.position.colno;
 234 }
 235
 236 /**
 237  * Put a character back into the buffer.
 238  *
 239  * @param pc  the character to put back
 240  */
 241 static inline void put_back(utf32 const pc)
 242 {
 243         assert(input.bufpos > input.buf);
 244         *(--input.bufpos - input.buf + input.buf) = (char) pc;
 245         --input.position.colno;
 246 }
 247
 248 #define NEWLINE \
 249         '\r': \
 250                 next_char(); \
 251                 if (input.c == '\n') { \
 252         case '\n': \
 253                         next_char(); \
 254                 } \
 255                 ++input.position.lineno; \
 256                 input.position.colno = 1; \
 257                 goto newline; \
 258                 newline // Let it look like an ordinary case label.
 259
 260 #define eat(c_type) (assert(input.c == c_type), next_char())
 261
 262 static void maybe_concat_lines(void)
 263 {
 264         eat('\\');
 265
 266         switch (input.c) {
 267         case NEWLINE:
 268                 info.whitespace_at_line_begin = 0;
 269                 return;
 270
 271         default:
 272                 break;
 273         }
 274
 275         put_back(input.c);
 276         input.c = '\\';
 277 }
 278
 279 /**
 280  * Set c to the next input character, ie.
 281  * after expanding trigraphs.
 282  */
 283 static inline void next_char(void)
 284 {
 285         next_real_char();
 286
 287         /* filter trigraphs and concatenated lines */
 288         if (UNLIKELY(input.c == '\\')) {
 289                 maybe_concat_lines();
 290                 goto end_of_next_char;
 291         }
 292
 293         if (LIKELY(input.c != '?'))
 294                 goto end_of_next_char;
 295
 296         next_real_char();
 297         if (LIKELY(input.c != '?')) {
 298                 put_back(input.c);
 299                 input.c = '?';
 300                 goto end_of_next_char;
 301         }
 302
 303         next_real_char();
 304         switch (input.c) {
 305         case '=': input.c = '#'; break;
 306         case '(': input.c = '['; break;
 307         case '/': input.c = '\\'; maybe_concat_lines(); break;
 308         case ')': input.c = ']'; break;
 309         case '\'': input.c = '^'; break;
 310         case '<': input.c = '{'; break;
 311         case '!': input.c = '|'; break;
 312         case '>': input.c = '}'; break;
 313         case '-': input.c = '~'; break;
 314         default:
 315                 put_back(input.c);
 316                 put_back('?');
 317                 input.c = '?';
 318                 break;
 319         }
 320
 321 end_of_next_char:;
 322 #ifdef DEBUG_CHARS
 323         printf("nchar '%c'\n", input.c);
 324 #endif
 325 }
 326
 327
 328
 329 /**
 330  * Returns true if the given char is a octal digit.
 331  *
 332  * @param char  the character to check
 333  */
 334 static inline bool is_octal_digit(int chr)
 335 {
 336         switch (chr) {
 337         case '0':
 338         case '1':
 339         case '2':
 340         case '3':
 341         case '4':
 342         case '5':
 343         case '6':
 344         case '7':
 345                 return true;
 346         default:
 347                 return false;
 348         }
 349 }
 350
 351 /**
 352  * Returns the value of a digit.
 353  * The only portable way to do it ...
 354  */
 355 static int digit_value(int digit)
 356 {
 357         switch (digit) {
 358         case '0': return 0;
 359         case '1': return 1;
 360         case '2': return 2;
 361         case '3': return 3;
 362         case '4': return 4;
 363         case '5': return 5;
 364         case '6': return 6;
 365         case '7': return 7;
 366         case '8': return 8;
 367         case '9': return 9;
 368         case 'a':
 369         case 'A': return 10;
 370         case 'b':
 371         case 'B': return 11;
 372         case 'c':
 373         case 'C': return 12;
 374         case 'd':
 375         case 'D': return 13;
 376         case 'e':
 377         case 'E': return 14;
 378         case 'f':
 379         case 'F': return 15;
 380         default:
 381                 panic("wrong character given");
 382         }
 383 }
 384
 385 /**
 386  * Parses an octal character sequence.
 387  *
 388  * @param first_digit  the already read first digit
 389  */
 390 static utf32 parse_octal_sequence(const utf32 first_digit)
 391 {
 392         assert(is_octal_digit(first_digit));
 393         utf32 value = digit_value(first_digit);
 394         if (!is_octal_digit(input.c)) return value;
 395         value = 8 * value + digit_value(input.c);
 396         next_char();
 397         if (!is_octal_digit(input.c)) return value;
 398         value = 8 * value + digit_value(input.c);
 399         next_char();
 400         return value;
 401
 402 }
 403
 404 /**
 405  * Parses a hex character sequence.
 406  */
 407 static utf32 parse_hex_sequence(void)
 408 {
 409         utf32 value = 0;
 410         while (isxdigit(input.c)) {
 411                 value = 16 * value + digit_value(input.c);
 412                 next_char();
 413         }
 414         return value;
 415 }
 416
 417 static bool is_universal_char_valid(utf32 const v)
 418 {
 419         /* C11 §6.4.3:2 */
 420         if (v < 0xA0U && v != 0x24 && v != 0x40 && v != 0x60)
 421                 return false;
 422         if (0xD800 <= v && v <= 0xDFFF)
 423                 return false;
 424         return true;
 425 }
 426
 427 static utf32 parse_universal_char(unsigned const n_digits)
 428 {
 429         utf32 v = 0;
 430         for (unsigned k = n_digits; k != 0; --k) {
 431                 if (isxdigit(input.c)) {
 432                         v = 16 * v + digit_value(input.c);
 433                         if (!resolve_escape_sequences)
 434                                 obstack_1grow(&symbol_obstack, input.c);
 435                         next_char();
 436                 } else {
 437                         errorf(&input.position,
 438                                "short universal character name, expected %u more digits",
 439                                    k);
 440                         break;
 441                 }
 442         }
 443         if (!is_universal_char_valid(v)) {
 444                 errorf(&input.position,
 445                        "\\%c%0*X is not a valid universal character name",
 446                        n_digits == 4 ? 'u' : 'U', (int)n_digits, v);
 447         }
 448         return v;
 449 }
 450
 451 static bool is_universal_char_valid_identifier(utf32 const v)
 452 {
 453         /* C11 Annex D.1 */
 454         if (                v == 0x000A8) return true;
 455         if (                v == 0x000AA) return true;
 456         if (                v == 0x000AD) return true;
 457         if (                v == 0x000AF) return true;
 458         if (0x000B2 <= v && v <= 0x000B5) return true;
 459         if (0x000B7 <= v && v <= 0x000BA) return true;
 460         if (0x000BC <= v && v <= 0x000BE) return true;
 461         if (0x000C0 <= v && v <= 0x000D6) return true;
 462         if (0x000D8 <= v && v <= 0x000F6) return true;
 463         if (0x000F8 <= v && v <= 0x000FF) return true;
 464         if (0x00100 <= v && v <= 0x0167F) return true;
 465         if (0x01681 <= v && v <= 0x0180D) return true;
 466         if (0x0180F <= v && v <= 0x01FFF) return true;
 467         if (0x0200B <= v && v <= 0x0200D) return true;
 468         if (0x0202A <= v && v <= 0x0202E) return true;
 469         if (0x0203F <= v && v <= 0x02040) return true;
 470         if (                v == 0x02054) return true;
 471         if (0x02060 <= v && v <= 0x0206F) return true;
 472         if (0x02070 <= v && v <= 0x0218F) return true;
 473         if (0x02460 <= v && v <= 0x024FF) return true;
 474         if (0x02776 <= v && v <= 0x02793) return true;
 475         if (0x02C00 <= v && v <= 0x02DFF) return true;
 476         if (0x02E80 <= v && v <= 0x02FFF) return true;
 477         if (0x03004 <= v && v <= 0x03007) return true;
 478         if (0x03021 <= v && v <= 0x0302F) return true;
 479         if (0x03031 <= v && v <= 0x0303F) return true;
 480         if (0x03040 <= v && v <= 0x0D7FF) return true;
 481         if (0x0F900 <= v && v <= 0x0FD3D) return true;
 482         if (0x0FD40 <= v && v <= 0x0FDCF) return true;
 483         if (0x0FDF0 <= v && v <= 0x0FE44) return true;
 484         if (0x0FE47 <= v && v <= 0x0FFFD) return true;
 485         if (0x10000 <= v && v <= 0x1FFFD) return true;
 486         if (0x20000 <= v && v <= 0x2FFFD) return true;
 487         if (0x30000 <= v && v <= 0x3FFFD) return true;
 488         if (0x40000 <= v && v <= 0x4FFFD) return true;
 489         if (0x50000 <= v && v <= 0x5FFFD) return true;
 490         if (0x60000 <= v && v <= 0x6FFFD) return true;
 491         if (0x70000 <= v && v <= 0x7FFFD) return true;
 492         if (0x80000 <= v && v <= 0x8FFFD) return true;
 493         if (0x90000 <= v && v <= 0x9FFFD) return true;
 494         if (0xA0000 <= v && v <= 0xAFFFD) return true;
 495         if (0xB0000 <= v && v <= 0xBFFFD) return true;
 496         if (0xC0000 <= v && v <= 0xCFFFD) return true;
 497         if (0xD0000 <= v && v <= 0xDFFFD) return true;
 498         if (0xE0000 <= v && v <= 0xEFFFD) return true;
 499         return false;
 500 }
 501
 502 static bool is_universal_char_valid_identifier_start(utf32 const v)
 503 {
 504         /* C11 Annex D.2 */
 505         if (0x0300 <= v && v <= 0x036F) return false;
 506         if (0x1DC0 <= v && v <= 0x1DFF) return false;
 507         if (0x20D0 <= v && v <= 0x20FF) return false;
 508         if (0xFE20 <= v && v <= 0xFE2F) return false;
 509         return true;
 510 }
 511
 512 /**
 513  * Parse an escape sequence.
 514  */
 515 static utf32 parse_escape_sequence(void)
 516 {
 517         eat('\\');
 518
 519         utf32 const ec = input.c;
 520         next_char();
 521
 522         switch (ec) {
 523         case '"':  return '"';
 524         case '\'': return '\'';
 525         case '\\': return '\\';
 526         case '?': return '\?';
 527         case 'a': return '\a';
 528         case 'b': return '\b';
 529         case 'f': return '\f';
 530         case 'n': return '\n';
 531         case 'r': return '\r';
 532         case 't': return '\t';
 533         case 'v': return '\v';
 534         case 'x':
 535                 return parse_hex_sequence();
 536         case '0':
 537         case '1':
 538         case '2':
 539         case '3':
 540         case '4':
 541         case '5':
 542         case '6':
 543         case '7':
 544                 return parse_octal_sequence(ec);
 545         case EOF:
 546                 parse_error("reached end of file while parsing escape sequence");
 547                 return EOF;
 548         /* \E is not documented, but handled, by GCC.  It is acceptable according
 549          * to §6.11.4, whereas \e is not. */
 550         case 'E':
 551         case 'e':
 552                 if (c_mode & _GNUC)
 553                         return 27;   /* hopefully 27 is ALWAYS the code for ESCAPE */
 554                 break;
 555
 556         case 'U': return parse_universal_char(8);
 557         case 'u': return parse_universal_char(4);
 558
 559         default:
 560                 break;
 561         }
 562         /* §6.4.4.4:8 footnote 64 */
 563         parse_error("unknown escape sequence");
 564         return EOF;
 565 }
 566
 567 static const char *identify_string(char *string)
 568 {
 569         const char *result = strset_insert(&stringset, string);
 570         if (result != string) {
 571                 obstack_free(&symbol_obstack, string);
 572         }
 573         return result;
 574 }
 575
 576 static string_t sym_make_string(string_encoding_t const enc)
 577 {
 578         obstack_1grow(&symbol_obstack, '\0');
 579         size_t      const len    = obstack_object_size(&symbol_obstack) - 1;
 580         char       *const string = obstack_finish(&symbol_obstack);
 581         char const *const result = identify_string(string);
 582         return (string_t){ result, len, enc };
 583 }
 584
 585 string_t make_string(char const *const string)
 586 {
 587         obstack_grow(&symbol_obstack, string, strlen(string));
 588         return sym_make_string(STRING_ENCODING_CHAR);
 589 }
 590
 591 static void parse_string(utf32 const delimiter, token_kind_t const kind,
 592                          string_encoding_t const enc,
 593                          char const *const context)
 594 {
 595         const unsigned start_linenr = input.position.lineno;
 596
 597         eat(delimiter);
 598
 599         while (true) {
 600                 switch (input.c) {
 601                 case '\\': {
 602                         if (resolve_escape_sequences) {
 603                                 utf32 const tc = parse_escape_sequence();
 604                                 if (enc == STRING_ENCODING_CHAR) {
 605                                         if (tc >= 0x100) {
 606                                                 warningf(WARN_OTHER, &pp_token.base.source_position, "escape sequence out of range");
 607                                         }
 608                                         obstack_1grow(&symbol_obstack, tc);
 609                                 } else {
 610                                         obstack_grow_utf8(&symbol_obstack, tc);
 611                                 }
 612                         } else {
 613                                 obstack_1grow(&symbol_obstack, (char)input.c);
 614                                 next_char();
 615                                 obstack_1grow(&symbol_obstack, (char)input.c);
 616                                 next_char();
 617                         }
 618                         break;
 619                 }
 620
 621                 case NEWLINE:
 622                         errorf(&pp_token.base.source_position, "newline while parsing %s", context);
 623                         break;
 624
 625                 case EOF: {
 626                         source_position_t source_position;
 627                         source_position.input_name = pp_token.base.source_position.input_name;
 628                         source_position.lineno     = start_linenr;
 629                         errorf(&source_position, "EOF while parsing %s", context);
 630                         goto end_of_string;
 631                 }
 632
 633                 default:
 634                         if (input.c == delimiter) {
 635                                 next_char();
 636                                 goto end_of_string;
 637                         } else {
 638                                 obstack_grow_utf8(&symbol_obstack, input.c);
 639                                 next_char();
 640                                 break;
 641                         }
 642                 }
 643         }
 644
 645 end_of_string:
 646         pp_token.kind           = kind;
 647         pp_token.literal.string = sym_make_string(enc);
 648 }
 649
 650 static void parse_string_literal(string_encoding_t const enc)
 651 {
 652         parse_string('"', T_STRING_LITERAL, enc, "string literal");
 653 }
 654
 655 static void parse_character_constant(string_encoding_t const enc)
 656 {
 657         parse_string('\'', T_CHARACTER_CONSTANT, enc, "character constant");
 658         if (pp_token.literal.string.size == 0) {
 659                 parse_error("empty character constant");
 660         }
 661 }
 662
 663 #define SYMBOL_CASES_WITHOUT_E_P \
 664              '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
 665         case 'a': \
 666         case 'b': \
 667         case 'c': \
 668         case 'd': \
 669         case 'f': \
 670         case 'g': \
 671         case 'h': \
 672         case 'i': \
 673         case 'j': \
 674         case 'k': \
 675         case 'l': \
 676         case 'm': \
 677         case 'n': \
 678         case 'o': \
 679         case 'q': \
 680         case 'r': \
 681         case 's': \
 682         case 't': \
 683         case 'u': \
 684         case 'v': \
 685         case 'w': \
 686         case 'x': \
 687         case 'y': \
 688         case 'z': \
 689         case 'A': \
 690         case 'B': \
 691         case 'C': \
 692         case 'D': \
 693         case 'F': \
 694         case 'G': \
 695         case 'H': \
 696         case 'I': \
 697         case 'J': \
 698         case 'K': \
 699         case 'L': \
 700         case 'M': \
 701         case 'N': \
 702         case 'O': \
 703         case 'Q': \
 704         case 'R': \
 705         case 'S': \
 706         case 'T': \
 707         case 'U': \
 708         case 'V': \
 709         case 'W': \
 710         case 'X': \
 711         case 'Y': \
 712         case 'Z': \
 713         case '_'
 714
 715 #define SYMBOL_CASES \
 716              SYMBOL_CASES_WITHOUT_E_P: \
 717         case 'e': \
 718         case 'p': \
 719         case 'E': \
 720         case 'P'
 721
 722 #define DIGIT_CASES \
 723              '0':  \
 724         case '1':  \
 725         case '2':  \
 726         case '3':  \
 727         case '4':  \
 728         case '5':  \
 729         case '6':  \
 730         case '7':  \
 731         case '8':  \
 732         case '9'
 733
 734 static void start_expanding(pp_definition_t *definition)
 735 {
 736         definition->parent_expansion = current_expansion;
 737         definition->expand_pos       = 0;
 738         definition->is_expanding     = true;
 739         if (definition->list_len > 0) {
 740                 definition->token_list[0].had_whitespace
 741                         = info.had_whitespace;
 742         }
 743         current_expansion = definition;
 744 }
 745
 746 static void finished_expanding(pp_definition_t *definition)
 747 {
 748         assert(definition->is_expanding);
 749         pp_definition_t *parent = definition->parent_expansion;
 750         definition->parent_expansion = NULL;
 751         definition->is_expanding     = false;
 752
 753         /* stop further expanding once we expanded a parameter used in a
 754          * sub macro-call */
 755         if (definition == argument_expanding)
 756                 argument_expanding = NULL;
 757
 758         assert(current_expansion == definition);
 759         current_expansion = parent;
 760 }
 761
 762 static void grow_string_escaped(struct obstack *obst, const string_t *string, char const *delimiter)
 763 {
 764         char const *prefix = get_string_encoding_prefix(string->encoding);
 765         obstack_printf(obst, "%s%s", prefix, delimiter);
 766         size_t      size = string->size;
 767         const char *str  = string->begin;
 768         if (resolve_escape_sequences) {
 769                 obstack_grow(obst, str, size);
 770         } else {
 771                 for (size_t i = 0; i < size; ++i) {
 772                         const char c = str[i];
 773                         if (c == '\\' || c == '"')
 774                                 obstack_1grow(obst, '\\');
 775                         obstack_1grow(obst, c);
 776                 }
 777         }
 778         obstack_printf(obst, "%s", delimiter);
 779 }
 780
 781 static void grow_token(struct obstack *obst, const token_t *token)
 782 {
 783         switch (token->kind) {
 784         case T_NUMBER:
 785                 obstack_grow(obst, token->literal.string.begin, token->literal.string.size);
 786                 break;
 787
 788         case T_STRING_LITERAL: {
 789                 char const *const delimiter = resolve_escape_sequences ? "\"" : "\\\"";
 790                 grow_string_escaped(obst, &token->literal.string, delimiter);
 791                 break;
 792         }
 793
 794         case T_CHARACTER_CONSTANT:
 795                 grow_string_escaped(obst, &token->literal.string, "'");
 796                 break;
 797
 798         case T_IDENTIFIER:
 799         default: {
 800                 const char *str = token->base.symbol->string;
 801                 size_t      len = strlen(str);
 802                 obstack_grow(obst, str, len);
 803                 break;
 804         }
 805         }
 806 }
 807
 808 static void stringify(const pp_definition_t *definition)
 809 {
 810         assert(obstack_object_size(&symbol_obstack) == 0);
 811
 812         size_t list_len = definition->list_len;
 813         for (size_t p = 0; p < list_len; ++p) {
 814                 const saved_token_t *saved = &definition->token_list[p];
 815                 if (p > 0 && saved->had_whitespace)
 816                         obstack_1grow(&symbol_obstack, ' ');
 817                 grow_token(&symbol_obstack, &saved->token);
 818         }
 819         pp_token.kind           = T_STRING_LITERAL;
 820         pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
 821 }
 822
 823 static inline void set_punctuator(token_kind_t const kind)
 824 {
 825         pp_token.kind        = kind;
 826         pp_token.base.symbol = token_symbols[kind];
 827 }
 828
 829 static inline void set_digraph(token_kind_t const kind, symbol_t *const symbol)
 830 {
 831         pp_token.kind        = kind;
 832         pp_token.base.symbol = symbol;
 833 }
 834
 835 /**
 836  * returns next final token from a preprocessor macro expansion
 837  */
 838 static bool expand_next(void)
 839 {
 840         if (current_expansion == NULL)
 841                 return false;
 842
 843 restart:;
 844         size_t pos = current_expansion->expand_pos;
 845         if (pos >= current_expansion->list_len) {
 846                 finished_expanding(current_expansion);
 847                 /* it was the outermost expansion, parse pptoken normally */
 848                 if (current_expansion == NULL) {
 849                         return false;
 850                 }
 851                 goto restart;
 852         }
 853         const saved_token_t *saved = &current_expansion->token_list[pos++];
 854         pp_token = saved->token;
 855         if (pp_token.kind == '#') {
 856                 if (pos < current_expansion->list_len) {
 857                         const saved_token_t *next = &current_expansion->token_list[pos];
 858                         if (next->token.kind == T_MACRO_PARAMETER) {
 859                                 pp_definition_t *def = next->token.macro_parameter.def;
 860                                 assert(def != NULL && def->is_parameter);
 861                                 stringify(def);
 862                                 ++pos;
 863                         }
 864                 }
 865         }
 866
 867         if (current_expansion->expand_pos > 0)
 868                 info.had_whitespace = saved->had_whitespace;
 869         current_expansion->expand_pos = pos;
 870         pp_token.base.source_position = expansion_pos;
 871
 872         return true;
 873 }
 874
 875 /**
 876  * Returns the next token kind found when continuing the current expansions
 877  * without starting new sub-expansions.
 878  */
 879 static token_kind_t peek_expansion(void)
 880 {
 881         for (pp_definition_t *e = current_expansion; e; e = e->parent_expansion) {
 882                 if (e->expand_pos < e->list_len)
 883                         return e->token_list[e->expand_pos].token.kind;
 884         }
 885         return T_EOF;
 886 }
 887
 888 static void skip_line_comment(void)
 889 {
 890         info.had_whitespace = true;
 891         while (true) {
 892                 switch (input.c) {
 893                 case EOF:
 894                         return;
 895
 896                 case '\r':
 897                 case '\n':
 898                         return;
 899
 900                 default:
 901                         next_char();
 902                         break;
 903                 }
 904         }
 905 }
 906
 907 static void skip_multiline_comment(void)
 908 {
 909         info.had_whitespace = true;
 910
 911         unsigned start_linenr = input.position.lineno;
 912         while (true) {
 913                 switch (input.c) {
 914                 case '/':
 915                         next_char();
 916                         if (input.c == '*') {
 917                                 /* TODO: nested comment, warn here */
 918                         }
 919                         break;
 920                 case '*':
 921                         next_char();
 922                         if (input.c == '/') {
 923                                 if (input.position.lineno != input.output_line)
 924                                         info.whitespace_at_line_begin = input.position.colno;
 925                                 next_char();
 926                                 return;
 927                         }
 928                         break;
 929
 930                 case NEWLINE:
 931                         break;
 932
 933                 case EOF: {
 934                         source_position_t source_position;
 935                         source_position.input_name = pp_token.base.source_position.input_name;
 936                         source_position.lineno     = start_linenr;
 937                         errorf(&source_position, "at end of file while looking for comment end");
 938                         return;
 939                 }
 940
 941                 default:
 942                         next_char();
 943                         break;
 944                 }
 945         }
 946 }
 947
 948 static bool skip_till_newline(bool stop_at_non_whitespace)
 949 {
 950         bool res = false;
 951         while (true) {
 952                 switch (input.c) {
 953                 case ' ':
 954                 case '\t':
 955                         next_char();
 956                         continue;
 957
 958                 case '/':
 959                         next_char();
 960                         if (input.c == '/') {
 961                                 next_char();
 962                                 skip_line_comment();
 963                                 continue;
 964                         } else if (input.c == '*') {
 965                                 next_char();
 966                                 skip_multiline_comment();
 967                                 continue;
 968                         } else {
 969                                 put_back(input.c);
 970                                 input.c = '/';
 971                         }
 972                         return true;
 973
 974                 case NEWLINE:
 975                         return res;
 976
 977                 default:
 978                         if (stop_at_non_whitespace)
 979                                 return false;
 980                         res = true;
 981                         next_char();
 982                         continue;
 983                 }
 984         }
 985 }
 986
 987 static void skip_whitespace(void)
 988 {
 989         while (true) {
 990                 switch (input.c) {
 991                 case ' ':
 992                 case '\t':
 993                         ++info.whitespace_at_line_begin;
 994                         info.had_whitespace = true;
 995                         next_char();
 996                         continue;
 997
 998                 case NEWLINE:
 999                         info.at_line_begin  = true;
1000                         info.had_whitespace = true;
1001                         info.whitespace_at_line_begin = 0;
1002                         continue;
1003
1004                 case '/':
1005                         next_char();
1006                         if (input.c == '/') {
1007                                 next_char();
1008                                 skip_line_comment();
1009                                 continue;
1010                         } else if (input.c == '*') {
1011                                 next_char();
1012                                 skip_multiline_comment();
1013                                 continue;
1014                         } else {
1015                                 put_back(input.c);
1016                                 input.c = '/';
1017                         }
1018                         return;
1019
1020                 default:
1021                         return;
1022                 }
1023         }
1024 }
1025
1026 static inline void eat_pp(pp_token_kind_t const kind)
1027 {
1028         assert(pp_token.base.symbol->pp_ID == kind);
1029         (void) kind;
1030         next_input_token();
1031 }
1032
1033 static inline void eat_token(token_kind_t const kind)
1034 {
1035         assert(pp_token.kind == kind);
1036         (void)kind;
1037         next_input_token();
1038 }
1039
1040 static void parse_symbol(void)
1041 {
1042         assert(obstack_object_size(&symbol_obstack) == 0);
1043         while (true) {
1044                 switch (input.c) {
1045                 case DIGIT_CASES:
1046                 case SYMBOL_CASES:
1047                         obstack_1grow(&symbol_obstack, (char) input.c);
1048                         next_char();
1049                         break;
1050
1051                 case '\\':
1052                         next_char();
1053                         switch (input.c) {
1054                         {
1055                                 unsigned n;
1056                         case 'U': n = 8; goto universal;
1057                         case 'u': n = 4; goto universal;
1058 universal:
1059                                 if (!resolve_escape_sequences) {
1060                                         obstack_1grow(&symbol_obstack, '\\');
1061                                         obstack_1grow(&symbol_obstack, input.c);
1062                                 }
1063                                 next_char();
1064                                 utf32 const v = parse_universal_char(n);
1065                                 if (!is_universal_char_valid_identifier(v)) {
1066                                         if (is_universal_char_valid(v)) {
1067                                                 errorf(&input.position,
1068                                                            "universal character \\%c%0*X is not valid in an identifier",
1069                                                            n == 4 ? 'u' : 'U', (int)n, v);
1070                                         }
1071                                 } else if (obstack_object_size(&symbol_obstack) == 0 && !is_universal_char_valid_identifier_start(v)) {
1072                                         errorf(&input.position,
1073                                                    "universal character \\%c%0*X is not valid as start of an identifier",
1074                                                    n == 4 ? 'u' : 'U', (int)n, v);
1075                                 } else if (resolve_escape_sequences) {
1076                                         obstack_grow_utf8(&symbol_obstack, v);
1077                                 }
1078                                 break;
1079                         }
1080
1081                         default:
1082                                 put_back(input.c);
1083                                 input.c = '\\';
1084                                 goto end_symbol;
1085                         }
1086
1087                 default:
1088 dollar_sign:
1089                         goto end_symbol;
1090                 }
1091         }
1092
1093 end_symbol:
1094         obstack_1grow(&symbol_obstack, '\0');
1095         char *string = obstack_finish(&symbol_obstack);
1096
1097         /* might be a wide string or character constant ( L"string"/L'c' ) */
1098         if (input.c == '"' && string[0] == 'L' && string[1] == '\0') {
1099                 obstack_free(&symbol_obstack, string);
1100                 parse_string_literal(STRING_ENCODING_WIDE);
1101                 return;
1102         } else if (input.c == '\'' && string[0] == 'L' && string[1] == '\0') {
1103                 obstack_free(&symbol_obstack, string);
1104                 parse_character_constant(STRING_ENCODING_WIDE);
1105                 return;
1106         }
1107
1108         symbol_t *symbol = symbol_table_insert(string);
1109
1110         pp_token.kind        = symbol->ID;
1111         pp_token.base.symbol = symbol;
1112
1113         /* we can free the memory from symbol obstack if we already had an entry in
1114          * the symbol table */
1115         if (symbol->string != string) {
1116                 obstack_free(&symbol_obstack, string);
1117         }
1118 }
1119
1120 static void parse_number(void)
1121 {
1122         obstack_1grow(&symbol_obstack, (char) input.c);
1123         next_char();
1124
1125         while (true) {
1126                 switch (input.c) {
1127                 case '.':
1128                 case DIGIT_CASES:
1129                 case SYMBOL_CASES_WITHOUT_E_P:
1130                         obstack_1grow(&symbol_obstack, (char) input.c);
1131                         next_char();
1132                         break;
1133
1134                 case 'e':
1135                 case 'p':
1136                 case 'E':
1137                 case 'P':
1138                         obstack_1grow(&symbol_obstack, (char) input.c);
1139                         next_char();
1140                         if (input.c == '+' || input.c == '-') {
1141                                 obstack_1grow(&symbol_obstack, (char) input.c);
1142                                 next_char();
1143                         }
1144                         break;
1145
1146                 default:
1147 dollar_sign:
1148                         goto end_number;
1149                 }
1150         }
1151
1152 end_number:
1153         pp_token.kind           = T_NUMBER;
1154         pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
1155 }
1156
1157 #define MAYBE_PROLOG \
1158         next_char(); \
1159         switch (input.c) {
1160
1161 #define MAYBE(ch, kind) \
1162         case ch: \
1163                 next_char(); \
1164                 set_punctuator(kind); \
1165                 return;
1166
1167 #define MAYBE_DIGRAPH(ch, kind, symbol) \
1168         case ch: \
1169                 next_char(); \
1170                 set_digraph(kind, symbol); \
1171                 return;
1172
1173 #define ELSE_CODE(code) \
1174         default: \
1175                 code \
1176         }
1177
1178 #define ELSE(kind) ELSE_CODE(set_punctuator(kind); return;)
1179
1180 /** identifies and returns the next preprocessing token contained in the
1181  * input stream. No macro expansion is performed. */
1182 static void next_input_token(void)
1183 {
1184         if (next_info.had_whitespace) {
1185                 info = next_info;
1186                 next_info.had_whitespace = false;
1187         } else {
1188                 info.at_line_begin  = false;
1189                 info.had_whitespace = false;
1190         }
1191 restart:
1192         pp_token.base.source_position = input.position;
1193         pp_token.base.symbol          = NULL;
1194
1195         switch (input.c) {
1196         case ' ':
1197         case '\t':
1198                 info.whitespace_at_line_begin++;
1199                 info.had_whitespace = true;
1200                 next_char();
1201                 goto restart;
1202
1203         case NEWLINE:
1204                 info.at_line_begin            = true;
1205                 info.had_whitespace           = true;
1206                 info.whitespace_at_line_begin = 0;
1207                 goto restart;
1208
1209         case SYMBOL_CASES:
1210                 parse_symbol();
1211                 return;
1212
1213         case DIGIT_CASES:
1214                 parse_number();
1215                 return;
1216
1217         case '"':
1218                 parse_string_literal(STRING_ENCODING_CHAR);
1219                 return;
1220
1221         case '\'':
1222                 parse_character_constant(STRING_ENCODING_CHAR);
1223                 return;
1224
1225         case '.':
1226                 MAYBE_PROLOG
1227                         case '0':
1228                         case '1':
1229                         case '2':
1230                         case '3':
1231                         case '4':
1232                         case '5':
1233                         case '6':
1234                         case '7':
1235                         case '8':
1236                         case '9':
1237                                 put_back(input.c);
1238                                 input.c = '.';
1239                                 parse_number();
1240                                 return;
1241
1242                         case '.':
1243                                 MAYBE_PROLOG
1244                                 MAYBE('.', T_DOTDOTDOT)
1245                                 ELSE_CODE(
1246                                         put_back(input.c);
1247                                         input.c = '.';
1248                                         set_punctuator('.');
1249                                         return;
1250                                 )
1251                 ELSE('.')
1252         case '&':
1253                 MAYBE_PROLOG
1254                 MAYBE('&', T_ANDAND)
1255                 MAYBE('=', T_ANDEQUAL)
1256                 ELSE('&')
1257         case '*':
1258                 MAYBE_PROLOG
1259                 MAYBE('=', T_ASTERISKEQUAL)
1260                 ELSE('*')
1261         case '+':
1262                 MAYBE_PROLOG
1263                 MAYBE('+', T_PLUSPLUS)
1264                 MAYBE('=', T_PLUSEQUAL)
1265                 ELSE('+')
1266         case '-':
1267                 MAYBE_PROLOG
1268                 MAYBE('>', T_MINUSGREATER)
1269                 MAYBE('-', T_MINUSMINUS)
1270                 MAYBE('=', T_MINUSEQUAL)
1271                 ELSE('-')
1272         case '!':
1273                 MAYBE_PROLOG
1274                 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
1275                 ELSE('!')
1276         case '/':
1277                 MAYBE_PROLOG
1278                 MAYBE('=', T_SLASHEQUAL)
1279                 case '*':
1280                         next_char();
1281                         skip_multiline_comment();
1282                         goto restart;
1283                 case '/':
1284                         next_char();
1285                         skip_line_comment();
1286                         goto restart;
1287                 ELSE('/')
1288         case '%':
1289                 MAYBE_PROLOG
1290                 MAYBE_DIGRAPH('>', '}', symbol_percentgreater)
1291                 MAYBE('=', T_PERCENTEQUAL)
1292                 case ':':
1293                         MAYBE_PROLOG
1294                         case '%':
1295                                 MAYBE_PROLOG
1296                                 MAYBE_DIGRAPH(':', T_HASHHASH, symbol_percentcolonpercentcolon)
1297                                 ELSE_CODE(
1298                                         put_back(input.c);
1299                                         input.c = '%';
1300                                         goto digraph_percentcolon;
1301                                 )
1302                         ELSE_CODE(
1303 digraph_percentcolon:
1304                                 set_digraph('#', symbol_percentcolon);
1305                                 return;
1306                         )
1307                 ELSE('%')
1308         case '<':
1309                 MAYBE_PROLOG
1310                 MAYBE_DIGRAPH(':', '[', symbol_lesscolon)
1311                 MAYBE_DIGRAPH('%', '{', symbol_lesspercent)
1312                 MAYBE('=', T_LESSEQUAL)
1313                 case '<':
1314                         MAYBE_PROLOG
1315                         MAYBE('=', T_LESSLESSEQUAL)
1316                         ELSE(T_LESSLESS)
1317                 ELSE('<')
1318         case '>':
1319                 MAYBE_PROLOG
1320                 MAYBE('=', T_GREATEREQUAL)
1321                 case '>':
1322                         MAYBE_PROLOG
1323                         MAYBE('=', T_GREATERGREATEREQUAL)
1324                         ELSE(T_GREATERGREATER)
1325                 ELSE('>')
1326         case '^':
1327                 MAYBE_PROLOG
1328                 MAYBE('=', T_CARETEQUAL)
1329                 ELSE('^')
1330         case '|':
1331                 MAYBE_PROLOG
1332                 MAYBE('=', T_PIPEEQUAL)
1333                 MAYBE('|', T_PIPEPIPE)
1334                 ELSE('|')
1335         case ':':
1336                 MAYBE_PROLOG
1337                 MAYBE_DIGRAPH('>', ']', symbol_colongreater)
1338                 case ':':
1339                         if (c_mode & _CXX) {
1340                                 next_char();
1341                                 set_punctuator(T_COLONCOLON);
1342                                 return;
1343                         }
1344                         /* FALLTHROUGH */
1345                 ELSE(':')
1346         case '=':
1347                 MAYBE_PROLOG
1348                 MAYBE('=', T_EQUALEQUAL)
1349                 ELSE('=')
1350         case '#':
1351                 MAYBE_PROLOG
1352                 MAYBE('#', T_HASHHASH)
1353                 ELSE('#')
1354
1355         case '?':
1356         case '[':
1357         case ']':
1358         case '(':
1359         case ')':
1360         case '{':
1361         case '}':
1362         case '~':
1363         case ';':
1364         case ',':
1365                 set_punctuator(input.c);
1366                 next_char();
1367                 return;
1368
1369         case EOF:
1370                 if (input_stack != NULL) {
1371                         fclose(close_pp_input());
1372                         pop_restore_input();
1373                         if (out)
1374                                 fputc('\n', out);
1375                         if (input.c == (utf32)EOF)
1376                                 --input.position.lineno;
1377                         print_line_directive(&input.position, "2");
1378                         goto restart;
1379                 } else {
1380                         info.at_line_begin = true;
1381                         set_punctuator(T_EOF);
1382                 }
1383                 return;
1384
1385         case '\\':
1386                 next_char();
1387                 int next_c = input.c;
1388                 put_back(input.c);
1389                 input.c = '\\';
1390                 if (next_c == 'U' || next_c == 'u') {
1391                         parse_symbol();
1392                         return;
1393                 }
1394                 /* FALLTHROUGH */
1395         default:
1396 dollar_sign:
1397                 if (error_on_unknown_chars) {
1398                         errorf(&pp_token.base.source_position,
1399                                "unknown character '%lc' found\n", input.c);
1400                         next_char();
1401                         goto restart;
1402                 } else {
1403                         assert(obstack_object_size(&symbol_obstack) == 0);
1404                         obstack_grow_utf8(&symbol_obstack, input.c);
1405                         obstack_1grow(&symbol_obstack, '\0');
1406                         char     *const string = obstack_finish(&symbol_obstack);
1407                         symbol_t *const symbol = symbol_table_insert(string);
1408                         if (symbol->string != string)
1409                                 obstack_free(&symbol_obstack, string);
1410
1411                         pp_token.kind        = T_UNKNOWN_CHAR;
1412                         pp_token.base.symbol = symbol;
1413                         next_char();
1414                         return;
1415                 }
1416         }
1417 }
1418
1419 static void print_quoted_string(const char *const string)
1420 {
1421         fputc('"', out);
1422         for (const char *c = string; *c != 0; ++c) {
1423                 switch (*c) {
1424                 case '"': fputs("\\\"", out); break;
1425                 case '\\':  fputs("\\\\", out); break;
1426                 case '\a':  fputs("\\a", out); break;
1427                 case '\b':  fputs("\\b", out); break;
1428                 case '\f':  fputs("\\f", out); break;
1429                 case '\n':  fputs("\\n", out); break;
1430                 case '\r':  fputs("\\r", out); break;
1431                 case '\t':  fputs("\\t", out); break;
1432                 case '\v':  fputs("\\v", out); break;
1433                 case '\?':  fputs("\\?", out); break;
1434                 default:
1435                         if (!isprint(*c)) {
1436                                 fprintf(out, "\\%03o", (unsigned)*c);
1437                                 break;
1438                         }
1439                         fputc(*c, out);
1440                         break;
1441                 }
1442         }
1443         fputc('"', out);
1444 }
1445
1446 static void print_line_directive(const source_position_t *pos, const char *add)
1447 {
1448         if (!out)
1449                 return;
1450
1451         fprintf(out, "# %u ", pos->lineno);
1452         print_quoted_string(pos->input_name);
1453         if (add != NULL) {
1454                 fputc(' ', out);
1455                 fputs(add, out);
1456         }
1457         if (pos->is_system_header) {
1458                 fputs(" 3", out);
1459         }
1460
1461         printed_input_name = pos->input_name;
1462         input.output_line  = pos->lineno-1;
1463 }
1464
1465 static bool emit_newlines(void)
1466 {
1467         if (!out)
1468                 return true;
1469
1470         unsigned delta = pp_token.base.source_position.lineno - input.output_line;
1471         if (delta == 0)
1472                 return false;
1473
1474         if (delta >= 9) {
1475                 fputc('\n', out);
1476                 print_line_directive(&pp_token.base.source_position, NULL);
1477                 fputc('\n', out);
1478         } else {
1479                 for (unsigned i = 0; i < delta; ++i) {
1480                         fputc('\n', out);
1481                 }
1482         }
1483         input.output_line = pp_token.base.source_position.lineno;
1484
1485         unsigned whitespace = info.whitespace_at_line_begin;
1486         /* make sure there is at least 1 whitespace before a (macro-expanded)
1487          * '#' at line begin. I'm not sure why this is good, but gcc does it. */
1488         if (pp_token.kind == '#' && whitespace == 0)
1489                 ++whitespace;
1490         for (unsigned i = 0; i < whitespace; ++i)
1491                 fputc(' ', out);
1492
1493         return true;
1494 }
1495
1496 void set_preprocessor_output(FILE *output)
1497 {
1498         out = output;
1499         if (out != NULL) {
1500                 error_on_unknown_chars   = false;
1501                 resolve_escape_sequences = false;
1502         } else {
1503                 error_on_unknown_chars   = true;
1504                 resolve_escape_sequences = true;
1505         }
1506 }
1507
1508 void emit_pp_token(void)
1509 {
1510         if (!emit_newlines() &&
1511             (info.had_whitespace || tokens_would_paste(last_token, pp_token.kind)))
1512                 fputc(' ', out);
1513
1514         switch (pp_token.kind) {
1515         case T_NUMBER:
1516                 fputs(pp_token.literal.string.begin, out);
1517                 break;
1518
1519         case T_STRING_LITERAL:
1520                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1521                 fputc('"', out);
1522                 fputs(pp_token.literal.string.begin, out);
1523                 fputc('"', out);
1524                 break;
1525
1526         case T_CHARACTER_CONSTANT:
1527                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1528                 fputc('\'', out);
1529                 fputs(pp_token.literal.string.begin, out);
1530                 fputc('\'', out);
1531                 break;
1532
1533         case T_MACRO_PARAMETER:
1534                 panic("macro parameter not expanded");
1535
1536         default:
1537                 fputs(pp_token.base.symbol->string, out);
1538                 break;
1539         }
1540         last_token = pp_token.kind;
1541 }
1542
1543 static void eat_pp_directive(void)
1544 {
1545         while (!info.at_line_begin) {
1546                 next_input_token();
1547         }
1548 }
1549
1550 static bool strings_equal(const string_t *string1, const string_t *string2)
1551 {
1552         size_t size = string1->size;
1553         if (size != string2->size)
1554                 return false;
1555
1556         const char *c1 = string1->begin;
1557         const char *c2 = string2->begin;
1558         for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1559                 if (*c1 != *c2)
1560                         return false;
1561         }
1562         return true;
1563 }
1564
1565 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1566 {
1567         if (token1->kind != token2->kind)
1568                 return false;
1569
1570         switch (token1->kind) {
1571         case T_NUMBER:
1572         case T_CHARACTER_CONSTANT:
1573         case T_STRING_LITERAL:
1574                 return strings_equal(&token1->literal.string, &token2->literal.string);
1575
1576         case T_MACRO_PARAMETER:
1577                 return token1->macro_parameter.def->symbol
1578                     == token2->macro_parameter.def->symbol;
1579
1580         default:
1581                 return token1->base.symbol == token2->base.symbol;
1582         }
1583 }
1584
1585 static bool pp_definitions_equal(const pp_definition_t *definition1,
1586                                  const pp_definition_t *definition2)
1587 {
1588         if (definition1->list_len != definition2->list_len)
1589                 return false;
1590
1591         size_t               len = definition1->list_len;
1592         const saved_token_t *t1  = definition1->token_list;
1593         const saved_token_t *t2  = definition2->token_list;
1594         for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1595                 if (!pp_tokens_equal(&t1->token, &t2->token))
1596                         return false;
1597                 if (t1->had_whitespace != t2->had_whitespace)
1598                         return false;
1599         }
1600         return true;
1601 }
1602
1603 static void missing_macro_param_error(void)
1604 {
1605         errorf(&pp_token.base.source_position,
1606                "'#' is not followed by a macro parameter");
1607 }
1608
1609 static bool is_defineable_token(char const *const context)
1610 {
1611         if (info.at_line_begin) {
1612                 errorf(&pp_token.base.source_position, "unexpected end of line after %s", context);
1613         }
1614
1615         symbol_t *const symbol = pp_token.base.symbol;
1616         if (!symbol)
1617                 goto no_ident;
1618
1619         if (pp_token.kind != T_IDENTIFIER) {
1620                 switch (symbol->string[0]) {
1621                 case SYMBOL_CASES:
1622 dollar_sign:
1623                         break;
1624
1625                 default:
1626 no_ident:
1627                         errorf(&pp_token.base.source_position, "expected identifier after %s, got %K", context, &pp_token);
1628                         return false;
1629                 }
1630         }
1631
1632         /* TODO turn this into a flag in pp_def. */
1633         switch (symbol->pp_ID) {
1634         /* §6.10.8:4 */
1635         case TP_defined:
1636                 errorf(&pp_token.base.source_position, "%K cannot be used as macro name in %s", &pp_token, context);
1637                 return false;
1638
1639         default:
1640                 return true;
1641         }
1642 }
1643
1644 static void parse_define_directive(void)
1645 {
1646         eat_pp(TP_define);
1647         if (skip_mode) {
1648                 eat_pp_directive();
1649                 return;
1650         }
1651
1652         assert(obstack_object_size(&pp_obstack) == 0);
1653
1654         if (!is_defineable_token("#define"))
1655                 goto error_out;
1656         symbol_t *const symbol = pp_token.base.symbol;
1657
1658         pp_definition_t *new_definition
1659                 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1660         memset(new_definition, 0, sizeof(new_definition[0]));
1661         new_definition->symbol          = symbol;
1662         new_definition->source_position = input.position;
1663
1664         /* this is probably the only place where spaces are significant in the
1665          * lexer (except for the fact that they separate tokens). #define b(x)
1666          * is something else than #define b (x) */
1667         if (input.c == '(') {
1668                 next_input_token();
1669                 eat_token('(');
1670
1671                 while (true) {
1672                         switch (pp_token.kind) {
1673                         case T_DOTDOTDOT:
1674                                 new_definition->is_variadic = true;
1675                                 eat_token(T_DOTDOTDOT);
1676                                 if (pp_token.kind != ')') {
1677                                         errorf(&input.position,
1678                                                         "'...' not at end of macro argument list");
1679                                         goto error_out;
1680                                 }
1681                                 break;
1682
1683                         case T_IDENTIFIER: {
1684                                 pp_definition_t parameter;
1685                                 memset(&parameter, 0, sizeof(parameter));
1686                                 parameter.source_position = pp_token.base.source_position;
1687                                 parameter.symbol          = pp_token.base.symbol;
1688                                 parameter.is_parameter    = true;
1689                                 obstack_grow(&pp_obstack, &parameter, sizeof(parameter));
1690                                 eat_token(T_IDENTIFIER);
1691
1692                                 if (pp_token.kind == ',') {
1693                                         eat_token(',');
1694                                         break;
1695                                 }
1696
1697                                 if (pp_token.kind != ')') {
1698                                         errorf(&pp_token.base.source_position,
1699                                                "expected ',' or ')' after identifier, got %K",
1700                                                &pp_token);
1701                                         goto error_out;
1702                                 }
1703                                 break;
1704                         }
1705
1706                         case ')':
1707                                 eat_token(')');
1708                                 goto finish_argument_list;
1709
1710                         default:
1711                                 errorf(&pp_token.base.source_position,
1712                                        "expected identifier, '...' or ')' in #define argument list, got %K",
1713                                        &pp_token);
1714                                 goto error_out;
1715                         }
1716                 }
1717
1718         finish_argument_list:
1719                 new_definition->has_parameters = true;
1720                 size_t size = obstack_object_size(&pp_obstack);
1721                 new_definition->n_parameters
1722                         = size / sizeof(new_definition->parameters[0]);
1723                 new_definition->parameters = obstack_finish(&pp_obstack);
1724                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1725                         pp_definition_t *param    = &new_definition->parameters[i];
1726                         symbol_t        *symbol   = param->symbol;
1727                         pp_definition_t *previous = symbol->pp_definition;
1728                         if (previous != NULL
1729                             && previous->function_definition == new_definition) {
1730                                 errorf(&param->source_position,
1731                                        "duplicate macro parameter '%Y'", symbol);
1732                                 param->symbol = sym_anonymous;
1733                                 continue;
1734                         }
1735                         param->parent_expansion    = previous;
1736                         param->function_definition = new_definition;
1737                         symbol->pp_definition      = param;
1738                 }
1739         } else {
1740                 next_input_token();
1741         }
1742
1743         /* construct token list */
1744         assert(obstack_object_size(&pp_obstack) == 0);
1745         bool next_must_be_param = false;
1746         while (!info.at_line_begin) {
1747                 if (pp_token.kind == T_IDENTIFIER) {
1748                         const symbol_t  *symbol     = pp_token.base.symbol;
1749                         pp_definition_t *definition = symbol->pp_definition;
1750                         if (definition != NULL
1751                             && definition->function_definition == new_definition) {
1752                             pp_token.kind                = T_MACRO_PARAMETER;
1753                             pp_token.macro_parameter.def = definition;
1754                         }
1755                 }
1756                 if (next_must_be_param && pp_token.kind != T_MACRO_PARAMETER) {
1757                         missing_macro_param_error();
1758                 }
1759                 saved_token_t saved_token;
1760                 saved_token.token = pp_token;
1761                 saved_token.had_whitespace = info.had_whitespace;
1762                 obstack_grow(&pp_obstack, &saved_token, sizeof(saved_token));
1763                 next_must_be_param
1764                         = new_definition->has_parameters && pp_token.kind == '#';
1765                 next_input_token();
1766         }
1767         if (next_must_be_param)
1768                 missing_macro_param_error();
1769
1770         new_definition->list_len   = obstack_object_size(&pp_obstack)
1771                 / sizeof(new_definition->token_list[0]);
1772         new_definition->token_list = obstack_finish(&pp_obstack);
1773
1774         if (new_definition->has_parameters) {
1775                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1776                         pp_definition_t *param      = &new_definition->parameters[i];
1777                         symbol_t        *symbol     = param->symbol;
1778                         if (symbol == sym_anonymous)
1779                                 continue;
1780                         assert(symbol->pp_definition == param);
1781                         assert(param->function_definition == new_definition);
1782                         symbol->pp_definition   = param->parent_expansion;
1783                         param->parent_expansion = NULL;
1784                 }
1785         }
1786
1787         pp_definition_t *old_definition = symbol->pp_definition;
1788         if (old_definition != NULL) {
1789                 if (!pp_definitions_equal(old_definition, new_definition)) {
1790                         warningf(WARN_OTHER, &input.position, "multiple definition of macro '%Y' (first defined %P)", symbol, &old_definition->source_position);
1791                 } else {
1792                         /* reuse the old definition */
1793                         obstack_free(&pp_obstack, new_definition);
1794                         new_definition = old_definition;
1795                 }
1796         }
1797
1798         symbol->pp_definition = new_definition;
1799         return;
1800
1801 error_out:
1802         if (obstack_object_size(&pp_obstack) > 0) {
1803                 char *ptr = obstack_finish(&pp_obstack);
1804                 obstack_free(&pp_obstack, ptr);
1805         }
1806         eat_pp_directive();
1807 }
1808
1809 static void parse_undef_directive(void)
1810 {
1811         eat_pp(TP_undef);
1812         if (skip_mode) {
1813                 eat_pp_directive();
1814                 return;
1815         }
1816
1817         if (!is_defineable_token("#undef")) {
1818                 eat_pp_directive();
1819                 return;
1820         }
1821
1822         pp_token.base.symbol->pp_definition = NULL;
1823         next_input_token();
1824
1825         if (!info.at_line_begin) {
1826                 warningf(WARN_OTHER, &input.position, "extra tokens at end of #undef directive");
1827         }
1828         eat_pp_directive();
1829 }
1830
1831 /** behind an #include we can have the special headername lexems.
1832  * They're only allowed behind an #include so they're not recognized
1833  * by the normal next_preprocessing_token. We handle them as a special
1834  * exception here */
1835 static const char *parse_headername(bool *system_include)
1836 {
1837         if (info.at_line_begin) {
1838                 parse_error("expected headername after #include");
1839                 return NULL;
1840         }
1841
1842         /* check wether we have a "... or <... headername */
1843         source_position_t position = input.position;
1844         switch (input.c) {
1845         {
1846                 utf32 delimiter;
1847         case '<': delimiter = '>'; *system_include = true;  goto parse_name;
1848         case '"': delimiter = '"'; *system_include = false; goto parse_name;
1849 parse_name:
1850                 assert(obstack_object_size(&symbol_obstack) == 0);
1851                 next_char();
1852                 while (true) {
1853                         switch (input.c) {
1854                         case NEWLINE:
1855                         case EOF:
1856                                 {
1857                                         char *dummy = obstack_finish(&symbol_obstack);
1858                                         obstack_free(&symbol_obstack, dummy);
1859                                 }
1860                                 errorf(&pp_token.base.source_position,
1861                                        "header name without closing '%c'", (char)delimiter);
1862                                 return NULL;
1863
1864                         default:
1865                                 if (input.c == delimiter) {
1866                                         next_char();
1867                                         goto finish_headername;
1868                                 } else {
1869                                         obstack_1grow(&symbol_obstack, (char)input.c);
1870                                         next_char();
1871                                 }
1872                                 break;
1873                         }
1874                 }
1875                 /* we should never be here */
1876         }
1877
1878         default:
1879                 next_preprocessing_token();
1880                 if (info.at_line_begin) {
1881                         /* TODO: if we are already in the new line then we parsed more than
1882                          * wanted. We reuse the token, but could produce following errors
1883                          * misbehaviours... */
1884                         goto error_invalid_input;
1885                 }
1886                 if (pp_token.kind == T_STRING_LITERAL) {
1887                         *system_include = false;
1888                         return pp_token.literal.string.begin;
1889                 } else if (pp_token.kind == '<') {
1890                         *system_include = true;
1891                         assert(obstack_object_size(&pp_obstack) == 0);
1892                         while (true) {
1893                                 next_preprocessing_token();
1894                                 if (info.at_line_begin) {
1895                                         /* TODO: we shouldn't have parsed/expanded something on the
1896                                          * next line yet... */
1897                                         char *dummy = obstack_finish(&pp_obstack);
1898                                         obstack_free(&pp_obstack, dummy);
1899                                         goto error_invalid_input;
1900                                 }
1901                                 if (pp_token.kind == '>')
1902                                         break;
1903
1904                                 saved_token_t saved;
1905                                 saved.token          = pp_token;
1906                                 saved.had_whitespace = info.had_whitespace;
1907                                 obstack_grow(&pp_obstack, &saved, sizeof(saved));
1908                         }
1909                         size_t size = obstack_object_size(&pp_obstack);
1910                         assert(size % sizeof(saved_token_t) == 0);
1911                         size_t n_tokens = size / sizeof(saved_token_t);
1912                         saved_token_t *tokens = obstack_finish(&pp_obstack);
1913                         assert(obstack_object_size(&symbol_obstack) == 0);
1914                         for (size_t i = 0; i < n_tokens; ++i) {
1915                                 const saved_token_t *saved = &tokens[i];
1916                                 if (i > 0 && saved->had_whitespace)
1917                                         obstack_1grow(&symbol_obstack, ' ');
1918                                 grow_token(&symbol_obstack, &saved->token);
1919                         }
1920                         obstack_free(&pp_obstack, tokens);
1921                         goto finish_headername;
1922                 } else {
1923 error_invalid_input:
1924                         {
1925                                 char *dummy = obstack_finish(&symbol_obstack);
1926                                 obstack_free(&symbol_obstack, dummy);
1927                         }
1928
1929                         errorf(&pp_token.base.source_position,
1930                                "expected \"FILENAME\" or <FILENAME> after #include");
1931                         return NULL;
1932                 }
1933         }
1934
1935 finish_headername:
1936         obstack_1grow(&symbol_obstack, '\0');
1937         char *const  headername = obstack_finish(&symbol_obstack);
1938         const char  *identified = identify_string(headername);
1939         pp_token.base.source_position = position;
1940         return identified;
1941 }
1942
1943 static bool do_include(bool const system_include, bool const include_next, char const *const headername)
1944 {
1945         size_t const        headername_len = strlen(headername);
1946         searchpath_entry_t *entry;
1947         if (include_next) {
1948                 entry = input.path ? input.path->next : searchpath;
1949         } else {
1950                 if (!system_include) {
1951                         /* put dirname of current input on obstack */
1952                         const char *filename   = input.position.input_name;
1953                         const char *last_slash = strrchr(filename, '/');
1954                         const char *full_name;
1955                         if (last_slash != NULL) {
1956                                 size_t len = last_slash - filename;
1957                                 obstack_grow(&symbol_obstack, filename, len + 1);
1958                                 obstack_grow0(&symbol_obstack, headername, headername_len);
1959                                 char *complete_path = obstack_finish(&symbol_obstack);
1960                                 full_name = identify_string(complete_path);
1961                         } else {
1962                                 full_name = headername;
1963                         }
1964
1965                         FILE *file = fopen(full_name, "r");
1966                         if (file != NULL) {
1967                                 switch_pp_input(file, full_name, NULL);
1968                                 return true;
1969                         }
1970                 }
1971
1972                 entry = searchpath;
1973         }
1974
1975         assert(obstack_object_size(&symbol_obstack) == 0);
1976         /* check searchpath */
1977         for (; entry; entry = entry->next) {
1978             const char *path = entry->path;
1979             size_t      len  = strlen(path);
1980                 obstack_grow(&symbol_obstack, path, len);
1981                 if (path[len-1] != '/')
1982                         obstack_1grow(&symbol_obstack, '/');
1983                 obstack_grow(&symbol_obstack, headername, headername_len+1);
1984
1985                 char *complete_path = obstack_finish(&symbol_obstack);
1986                 FILE *file          = fopen(complete_path, "r");
1987                 if (file != NULL) {
1988                         const char *filename = identify_string(complete_path);
1989                         switch_pp_input(file, filename, entry);
1990                         return true;
1991                 } else {
1992                         obstack_free(&symbol_obstack, complete_path);
1993                 }
1994         }
1995
1996         return false;
1997 }
1998
1999 static void parse_include_directive(bool const include_next)
2000 {
2001         if (skip_mode) {
2002                 eat_pp_directive();
2003                 return;
2004         }
2005
2006         /* do not eat the TP_include, since it would already parse the next token
2007          * which needs special handling here. */
2008         skip_till_newline(true);
2009         bool system_include;
2010         const char *headername = parse_headername(&system_include);
2011         if (headername == NULL) {
2012                 eat_pp_directive();
2013                 return;
2014         }
2015
2016         bool had_nonwhitespace = skip_till_newline(false);
2017         if (had_nonwhitespace) {
2018                 warningf(WARN_OTHER, &input.position,
2019                          "extra tokens at end of #include directive");
2020         }
2021
2022         if (n_inputs > INCLUDE_LIMIT) {
2023                 errorf(&pp_token.base.source_position, "#include nested too deeply");
2024                 /* eat \n or EOF */
2025                 next_input_token();
2026                 return;
2027         }
2028
2029         /* switch inputs */
2030         info.whitespace_at_line_begin = 0;
2031         info.had_whitespace           = false;
2032         info.at_line_begin            = true;
2033         emit_newlines();
2034         push_input();
2035         bool res = do_include(system_include, include_next, headername);
2036         if (res) {
2037                 next_input_token();
2038         } else {
2039                 errorf(&pp_token.base.source_position, "failed including '%s': %s", headername, strerror(errno));
2040                 pop_restore_input();
2041         }
2042 }
2043
2044 static pp_conditional_t *push_conditional(void)
2045 {
2046         pp_conditional_t *conditional
2047                 = obstack_alloc(&pp_obstack, sizeof(*conditional));
2048         memset(conditional, 0, sizeof(*conditional));
2049
2050         conditional->parent = conditional_stack;
2051         conditional_stack   = conditional;
2052
2053         return conditional;
2054 }
2055
2056 static void pop_conditional(void)
2057 {
2058         assert(conditional_stack != NULL);
2059         conditional_stack = conditional_stack->parent;
2060 }
2061
2062 void check_unclosed_conditionals(void)
2063 {
2064         while (conditional_stack != NULL) {
2065                 pp_conditional_t *conditional = conditional_stack;
2066
2067                 if (conditional->in_else) {
2068                         errorf(&conditional->source_position, "unterminated #else");
2069                 } else {
2070                         errorf(&conditional->source_position, "unterminated condition");
2071                 }
2072                 pop_conditional();
2073         }
2074 }
2075
2076 static void parse_ifdef_ifndef_directive(bool const is_ifdef)
2077 {
2078         bool condition;
2079         eat_pp(is_ifdef ? TP_ifdef : TP_ifndef);
2080
2081         if (skip_mode) {
2082                 eat_pp_directive();
2083                 pp_conditional_t *conditional = push_conditional();
2084                 conditional->source_position  = pp_token.base.source_position;
2085                 conditional->skip             = true;
2086                 return;
2087         }
2088
2089         if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
2090                 errorf(&pp_token.base.source_position,
2091                        "expected identifier after #%s, got %K",
2092                        is_ifdef ? "ifdef" : "ifndef", &pp_token);
2093                 eat_pp_directive();
2094
2095                 /* just take the true case in the hope to avoid further errors */
2096                 condition = true;
2097         } else {
2098                 /* evaluate wether we are in true or false case */
2099                 condition = (bool)pp_token.base.symbol->pp_definition == is_ifdef;
2100                 eat_token(T_IDENTIFIER);
2101
2102                 if (!info.at_line_begin) {
2103                         errorf(&pp_token.base.source_position,
2104                                "extra tokens at end of #%s",
2105                                is_ifdef ? "ifdef" : "ifndef");
2106                         eat_pp_directive();
2107                 }
2108         }
2109
2110         pp_conditional_t *conditional = push_conditional();
2111         conditional->source_position  = pp_token.base.source_position;
2112         conditional->condition        = condition;
2113
2114         if (!condition) {
2115                 skip_mode = true;
2116         }
2117 }
2118
2119 static void parse_else_directive(void)
2120 {
2121         eat_pp(TP_else);
2122
2123         if (!info.at_line_begin) {
2124                 if (!skip_mode) {
2125                         warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #else");
2126                 }
2127                 eat_pp_directive();
2128         }
2129
2130         pp_conditional_t *conditional = conditional_stack;
2131         if (conditional == NULL) {
2132                 errorf(&pp_token.base.source_position, "#else without prior #if");
2133                 return;
2134         }
2135
2136         if (conditional->in_else) {
2137                 errorf(&pp_token.base.source_position,
2138                        "#else after #else (condition started %P)",
2139                        &conditional->source_position);
2140                 skip_mode = true;
2141                 return;
2142         }
2143
2144         conditional->in_else = true;
2145         if (!conditional->skip) {
2146                 skip_mode = conditional->condition;
2147         }
2148         conditional->source_position = pp_token.base.source_position;
2149 }
2150
2151 static void parse_endif_directive(void)
2152 {
2153         eat_pp(TP_endif);
2154
2155         if (!info.at_line_begin) {
2156                 if (!skip_mode) {
2157                         warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #endif");
2158                 }
2159                 eat_pp_directive();
2160         }
2161
2162         pp_conditional_t *conditional = conditional_stack;
2163         if (conditional == NULL) {
2164                 errorf(&pp_token.base.source_position, "#endif without prior #if");
2165                 return;
2166         }
2167
2168         if (!conditional->skip) {
2169                 skip_mode = false;
2170         }
2171         pop_conditional();
2172 }
2173
2174 typedef enum stdc_pragma_kind_t {
2175         STDC_UNKNOWN,
2176         STDC_FP_CONTRACT,
2177         STDC_FENV_ACCESS,
2178         STDC_CX_LIMITED_RANGE
2179 } stdc_pragma_kind_t;
2180
2181 typedef enum stdc_pragma_value_kind_t {
2182         STDC_VALUE_UNKNOWN,
2183         STDC_VALUE_ON,
2184         STDC_VALUE_OFF,
2185         STDC_VALUE_DEFAULT
2186 } stdc_pragma_value_kind_t;
2187
2188 static void parse_pragma_directive(void)
2189 {
2190         eat_pp(TP_pragma);
2191         if (skip_mode) {
2192                 eat_pp_directive();
2193                 return;
2194         }
2195
2196         if (pp_token.kind != T_IDENTIFIER) {
2197                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2198                          "expected identifier after #pragma");
2199                 eat_pp_directive();
2200                 return;
2201         }
2202
2203         stdc_pragma_kind_t kind = STDC_UNKNOWN;
2204         if (pp_token.base.symbol->pp_ID == TP_STDC && c_mode & _C99) {
2205                 /* a STDC pragma */
2206                 next_input_token();
2207
2208                 switch (pp_token.base.symbol->pp_ID) {
2209                 case TP_FP_CONTRACT:      kind = STDC_FP_CONTRACT;      break;
2210                 case TP_FENV_ACCESS:      kind = STDC_FENV_ACCESS;      break;
2211                 case TP_CX_LIMITED_RANGE: kind = STDC_CX_LIMITED_RANGE; break;
2212                 default:                  break;
2213                 }
2214                 if (kind != STDC_UNKNOWN) {
2215                         next_input_token();
2216                         stdc_pragma_value_kind_t value;
2217                         switch (pp_token.base.symbol->pp_ID) {
2218                         case TP_ON:      value = STDC_VALUE_ON;      break;
2219                         case TP_OFF:     value = STDC_VALUE_OFF;     break;
2220                         case TP_DEFAULT: value = STDC_VALUE_DEFAULT; break;
2221                         default:         value = STDC_VALUE_UNKNOWN; break;
2222                         }
2223                         if (value == STDC_VALUE_UNKNOWN) {
2224                                 kind = STDC_UNKNOWN;
2225                                 errorf(&pp_token.base.source_position, "bad STDC pragma argument");
2226                         }
2227                 }
2228         }
2229         eat_pp_directive();
2230         if (kind == STDC_UNKNOWN) {
2231                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2232                          "encountered unknown #pragma");
2233         }
2234 }
2235
2236 static void parse_line_directive(void)
2237 {
2238         if (pp_token.kind != T_NUMBER) {
2239                 if (!skip_mode)
2240                         parse_error("expected integer");
2241         } else {
2242                 char      *end;
2243                 long const line = strtol(pp_token.literal.string.begin, &end, 0);
2244                 if (*end == '\0') {
2245                         /* use offset -1 as this is about the next line */
2246                         input.position.lineno = line - 1;
2247                         /* force output of line */
2248                         input.output_line = input.position.lineno - 20;
2249                 } else {
2250                         if (!skip_mode) {
2251                                 errorf(&input.position, "'%S' is not a valid line number",
2252                                            &pp_token.literal.string);
2253                         }
2254                 }
2255                 next_input_token();
2256                 if (info.at_line_begin)
2257                         return;
2258         }
2259         if (pp_token.kind == T_STRING_LITERAL
2260             && pp_token.literal.string.encoding == STRING_ENCODING_CHAR) {
2261                 input.position.input_name       = pp_token.literal.string.begin;
2262                 input.position.is_system_header = false;
2263                 next_input_token();
2264
2265                 /* attempt to parse numeric flags as outputted by gcc preprocessor */
2266                 while (!info.at_line_begin && pp_token.kind == T_NUMBER) {
2267                         /* flags:
2268                          * 1 - indicates start of a new file
2269                          * 2 - indicates return from a file
2270                          * 3 - indicates system header
2271                          * 4 - indicates implicit extern "C" in C++ mode
2272                          *
2273                          * currently we're only interested in "3"
2274                          */
2275                         if (streq(pp_token.literal.string.begin, "3")) {
2276                                 input.position.is_system_header = true;
2277                         }
2278                         next_input_token();
2279                 }
2280         }
2281
2282         eat_pp_directive();
2283 }
2284
2285 static void parse_error_directive(void)
2286 {
2287         if (skip_mode) {
2288                 eat_pp_directive();
2289                 return;
2290         }
2291
2292         bool const old_resolve_escape_sequences = resolve_escape_sequences;
2293         resolve_escape_sequences = false;
2294
2295         source_position_t const pos = pp_token.base.source_position;
2296         do {
2297                 if (info.had_whitespace && obstack_object_size(&pp_obstack) != 0)
2298                         obstack_1grow(&pp_obstack, ' ');
2299
2300                 switch (pp_token.kind) {
2301                 case T_NUMBER: {
2302                         string_t const *const str = &pp_token.literal.string;
2303                         obstack_grow(&pp_obstack, str->begin, str->size);
2304                         break;
2305                 }
2306
2307                 {
2308                         char delim;
2309                 case T_STRING_LITERAL:     delim =  '"'; goto string;
2310                 case T_CHARACTER_CONSTANT: delim = '\''; goto string;
2311 string:;
2312                         string_t const *const str = &pp_token.literal.string;
2313                         char     const *const enc = get_string_encoding_prefix(str->encoding);
2314                         obstack_printf(&pp_obstack, "%s%c%s%c", enc, delim, str->begin, delim);
2315                         break;
2316                 }
2317
2318                 default: {
2319                         char const *const str = pp_token.base.symbol->string;
2320                         obstack_grow(&pp_obstack, str, strlen(str));
2321                         break;
2322                 }
2323                 }
2324
2325                 next_input_token();
2326         } while (!info.at_line_begin);
2327
2328         resolve_escape_sequences = old_resolve_escape_sequences;
2329
2330         obstack_1grow(&pp_obstack, '\0');
2331         char *const str = obstack_finish(&pp_obstack);
2332         errorf(&pos, "#%s", str);
2333         obstack_free(&pp_obstack, str);
2334 }
2335
2336 static void parse_preprocessing_directive(void)
2337 {
2338         eat_token('#');
2339
2340         if (info.at_line_begin) {
2341                 /* empty directive */
2342                 return;
2343         }
2344
2345         if (pp_token.base.symbol) {
2346                 switch (pp_token.base.symbol->pp_ID) {
2347                 case TP_define:       parse_define_directive();            break;
2348                 case TP_else:         parse_else_directive();              break;
2349                 case TP_endif:        parse_endif_directive();             break;
2350                 case TP_error:        parse_error_directive();             break;
2351                 case TP_ifdef:        parse_ifdef_ifndef_directive(true);  break;
2352                 case TP_ifndef:       parse_ifdef_ifndef_directive(false); break;
2353                 case TP_include:      parse_include_directive(false);      break;
2354                 case TP_include_next: parse_include_directive(true);       break;
2355                 case TP_line:         next_input_token(); goto line_directive;
2356                 case TP_pragma:       parse_pragma_directive();            break;
2357                 case TP_undef:        parse_undef_directive();             break;
2358                 default:              goto skip;
2359                 }
2360         } else if (pp_token.kind == T_NUMBER) {
2361 line_directive:
2362                 parse_line_directive();
2363         } else {
2364 skip:
2365                 if (!skip_mode) {
2366                         errorf(&pp_token.base.source_position, "invalid preprocessing directive #%K", &pp_token);
2367                 }
2368                 eat_pp_directive();
2369         }
2370
2371         assert(info.at_line_begin);
2372 }
2373
2374 static void finish_current_argument(void)
2375 {
2376         if (current_argument == NULL)
2377                 return;
2378         size_t size = obstack_object_size(&pp_obstack);
2379         current_argument->list_len   = size/sizeof(current_argument->token_list[0]);
2380         current_argument->token_list = obstack_finish(&pp_obstack);
2381 }
2382
2383 void next_preprocessing_token(void)
2384 {
2385 restart:
2386         if (!expand_next()) {
2387                 do {
2388                         next_input_token();
2389                         while (pp_token.kind == '#' && info.at_line_begin) {
2390                                 parse_preprocessing_directive();
2391                         }
2392                 } while (skip_mode && pp_token.kind != T_EOF);
2393         }
2394
2395         const token_kind_t kind = pp_token.kind;
2396         if (current_call == NULL || argument_expanding != NULL) {
2397                 symbol_t *const symbol = pp_token.base.symbol;
2398                 if (symbol) {
2399                         if (kind == T_MACRO_PARAMETER) {
2400                                 assert(current_expansion != NULL);
2401                                 start_expanding(pp_token.macro_parameter.def);
2402                                 goto restart;
2403                         }
2404
2405                         pp_definition_t *const pp_definition = symbol->pp_definition;
2406                         if (pp_definition != NULL && !pp_definition->is_expanding) {
2407                                 if (pp_definition->has_parameters) {
2408
2409                                         /* check if next token is a '(' */
2410                                         whitespace_info_t old_info   = info;
2411                                         token_kind_t      next_token = peek_expansion();
2412                                         if (next_token == T_EOF) {
2413                                                 info.at_line_begin  = false;
2414                                                 info.had_whitespace = false;
2415                                                 skip_whitespace();
2416                                                 if (input.c == '(') {
2417                                                         next_token = '(';
2418                                                 }
2419                                         }
2420
2421                                         if (next_token == '(') {
2422                                                 if (current_expansion == NULL)
2423                                                         expansion_pos = pp_token.base.source_position;
2424                                                 next_preprocessing_token();
2425                                                 assert(pp_token.kind == '(');
2426
2427                                                 pp_definition->parent_expansion = current_expansion;
2428                                                 current_call              = pp_definition;
2429                                                 current_call->expand_pos  = 0;
2430                                                 current_call->expand_info = old_info;
2431                                                 if (current_call->n_parameters > 0) {
2432                                                         current_argument = &current_call->parameters[0];
2433                                                         assert(argument_brace_count == 0);
2434                                                 }
2435                                                 goto restart;
2436                                         } else {
2437                                                 /* skip_whitespaces() skipped newlines and whitespace,
2438                                                  * remember results for next token */
2439                                                 next_info = info;
2440                                                 info      = old_info;
2441                                                 return;
2442                                         }
2443                                 } else {
2444                                         if (current_expansion == NULL)
2445                                                 expansion_pos = pp_token.base.source_position;
2446                                         start_expanding(pp_definition);
2447                                         goto restart;
2448                                 }
2449                         }
2450                 }
2451         }
2452
2453         if (current_call != NULL) {
2454                 /* current_call != NULL */
2455                 if (kind == '(') {
2456                         ++argument_brace_count;
2457                 } else if (kind == ')') {
2458                         if (argument_brace_count > 0) {
2459                                 --argument_brace_count;
2460                         } else {
2461                                 finish_current_argument();
2462                                 assert(kind == ')');
2463                                 start_expanding(current_call);
2464                                 info = current_call->expand_info;
2465                                 current_call     = NULL;
2466                                 current_argument = NULL;
2467                                 goto restart;
2468                         }
2469                 } else if (kind == ',' && argument_brace_count == 0) {
2470                         finish_current_argument();
2471                         current_call->expand_pos++;
2472                         if (current_call->expand_pos >= current_call->n_parameters) {
2473                                 errorf(&pp_token.base.source_position,
2474                                            "too many arguments passed for macro '%Y'",
2475                                            current_call->symbol);
2476                                 current_argument = NULL;
2477                         } else {
2478                                 current_argument
2479                                         = &current_call->parameters[current_call->expand_pos];
2480                         }
2481                         goto restart;
2482                 } else if (kind == T_MACRO_PARAMETER) {
2483                         /* parameters have to be fully expanded before being used as
2484                          * parameters for another macro-call */
2485                         assert(current_expansion != NULL);
2486                         pp_definition_t *argument = pp_token.macro_parameter.def;
2487                         argument_expanding = argument;
2488                         start_expanding(argument);
2489                         goto restart;
2490                 } else if (kind == T_EOF) {
2491                         errorf(&expansion_pos,
2492                                "reached end of file while parsing arguments for '%Y'",
2493                                current_call->symbol);
2494                         return;
2495                 }
2496                 if (current_argument != NULL) {
2497                         saved_token_t saved;
2498                         saved.token = pp_token;
2499                         saved.had_whitespace = info.had_whitespace;
2500                         obstack_grow(&pp_obstack, &saved, sizeof(saved));
2501                 }
2502                 goto restart;
2503         }
2504 }
2505
2506
2507 static void prepend_include_path(const char *path)
2508 {
2509         searchpath_entry_t *entry = OALLOCZ(&config_obstack, searchpath_entry_t);
2510         entry->path = path;
2511         entry->next = searchpath;
2512         searchpath  = entry;
2513 }
2514
2515 static void setup_include_path(void)
2516 {
2517         /* built-in paths */
2518         prepend_include_path("/usr/include");
2519
2520         /* parse environment variable */
2521         const char *cpath = getenv("CPATH");
2522         if (cpath != NULL && *cpath != '\0') {
2523                 const char *begin = cpath;
2524                 const char *c;
2525                 do {
2526                         c = begin;
2527                         while (*c != '\0' && *c != ':')
2528                                 ++c;
2529
2530                         size_t len = c-begin;
2531                         if (len == 0) {
2532                                 /* for gcc compatibility (Matze: I would expect that
2533                                  * nothing happens for an empty entry...) */
2534                                 prepend_include_path(".");
2535                         } else {
2536                                 char *const string = obstack_copy0(&config_obstack, begin, len);
2537                                 prepend_include_path(string);
2538                         }
2539
2540                         begin = c+1;
2541                         /* skip : */
2542                         if (*begin == ':')
2543                                 ++begin;
2544                 } while(*c != '\0');
2545         }
2546 }
2547
2548 static void input_error(unsigned const delta_lines, unsigned const delta_cols, char const *const message)
2549 {
2550         source_position_t pos = pp_token.base.source_position;
2551         pos.lineno += delta_lines;
2552         pos.colno  += delta_cols;
2553         errorf(&pos, "%s", message);
2554 }
2555
2556 void init_preprocessor(void)
2557 {
2558         init_symbols();
2559
2560         obstack_init(&config_obstack);
2561         obstack_init(&pp_obstack);
2562         obstack_init(&input_obstack);
2563         strset_init(&stringset);
2564
2565         setup_include_path();
2566
2567         set_input_error_callback(input_error);
2568 }
2569
2570 void exit_preprocessor(void)
2571 {
2572         obstack_free(&input_obstack, NULL);
2573         obstack_free(&pp_obstack, NULL);
2574         obstack_free(&config_obstack, NULL);
2575
2576         strset_destroy(&stringset);
2577 }
2578
2579 int pptest_main(int argc, char **argv);
2580 int pptest_main(int argc, char **argv)
2581 {
2582         init_symbol_table();
2583         init_preprocessor();
2584         init_tokens();
2585
2586         error_on_unknown_chars   = false;
2587         resolve_escape_sequences = false;
2588
2589         /* simplistic commandline parser */
2590         const char *filename = NULL;
2591         const char *output = NULL;
2592         for (int i = 1; i < argc; ++i) {
2593                 const char *opt = argv[i];
2594                 if (streq(opt, "-I")) {
2595                         prepend_include_path(argv[++i]);
2596                         continue;
2597                 } else if (streq(opt, "-E")) {
2598                         /* ignore */
2599                 } else if (streq(opt, "-o")) {
2600                         output = argv[++i];
2601                         continue;
2602                 } else if (opt[0] == '-') {
2603                         fprintf(stderr, "Unknown option '%s'\n", opt);
2604                 } else {
2605                         if (filename != NULL)
2606                                 fprintf(stderr, "Multiple inputs not supported\n");
2607                         filename = argv[i];
2608                 }
2609         }
2610         if (filename == NULL) {
2611                 fprintf(stderr, "No input specified\n");
2612                 return 1;
2613         }
2614
2615         if (output == NULL) {
2616                 out = stdout;
2617         } else {
2618                 out = fopen(output, "w");
2619                 if (out == NULL) {
2620                         fprintf(stderr, "Couldn't open output '%s'\n", output);
2621                         return 1;
2622                 }
2623         }
2624
2625         /* just here for gcc compatibility */
2626         fprintf(out, "# 1 \"%s\"\n", filename);
2627         fprintf(out, "# 1 \"<built-in>\"\n");
2628         fprintf(out, "# 1 \"<command-line>\"\n");
2629
2630         FILE *file = fopen(filename, "r");
2631         if (file == NULL) {
2632                 fprintf(stderr, "Couldn't open input '%s'\n", filename);
2633                 return 1;
2634         }
2635         switch_pp_input(file, filename, NULL);
2636
2637         for (;;) {
2638                 next_preprocessing_token();
2639                 if (pp_token.kind == T_EOF)
2640                         break;
2641                 emit_pp_token();
2642         }
2643
2644         fputc('\n', out);
2645         check_unclosed_conditionals();
2646         fclose(close_pp_input());
2647         if (out != stdout)
2648                 fclose(out);
2649
2650         exit_tokens();
2651         exit_preprocessor();
2652         exit_symbol_table();
2653
2654         return 0;
2655 }