nsz Git - cparser/blob - preprocessor.c

   1 #include <config.h>
   2
   3 #include <assert.h>
   4 #include <errno.h>
   5 #include <string.h>
   6 #include <stdbool.h>
   7 #include <ctype.h>
   8
   9 #include "preprocessor.h"
  10 #include "token_t.h"
  11 #include "symbol_t.h"
  12 #include "adt/util.h"
  13 #include "adt/error.h"
  14 #include "adt/strutil.h"
  15 #include "adt/strset.h"
  16 #include "lang_features.h"
  17 #include "diagnostic.h"
  18 #include "string_rep.h"
  19 #include "input.h"
  20
  21 #define MAX_PUTBACK 3
  22 #define INCLUDE_LIMIT 199  /* 199 is for gcc "compatibility" */
  23
  24 typedef struct saved_token_t {
  25         token_t token;
  26         bool    had_whitespace;
  27 } saved_token_t;
  28
  29 typedef struct whitespace_info_t {
  30         /** current token had whitespace in front of it */
  31         bool     had_whitespace;
  32         /** current token is at the beginning of a line.
  33          * => a "#" at line begin starts a preprocessing directive. */
  34         bool     at_line_begin;
  35         /** number of spaces before the first token in a line */
  36         unsigned whitespace_at_line_begin;
  37 } whitespace_info_t;
  38
  39 struct pp_definition_t {
  40         symbol_t          *symbol;
  41         source_position_t  source_position;
  42         pp_definition_t   *parent_expansion;
  43         size_t             expand_pos;
  44         whitespace_info_t  expand_info;
  45         bool               is_variadic    : 1;
  46         bool               is_expanding   : 1;
  47         bool               has_parameters : 1;
  48         bool               is_parameter   : 1;
  49         pp_definition_t   *function_definition;
  50         size_t             n_parameters;
  51         pp_definition_t   *parameters;
  52
  53         /* replacement */
  54         size_t             list_len;
  55         saved_token_t     *token_list;
  56 };
  57
  58 typedef struct pp_conditional_t pp_conditional_t;
  59 struct pp_conditional_t {
  60         source_position_t  source_position;
  61         bool               condition;
  62         bool               in_else;
  63         /** conditional in skip mode (then+else gets skipped) */
  64         bool               skip;
  65         pp_conditional_t  *parent;
  66 };
  67
  68 typedef struct pp_input_t pp_input_t;
  69 struct pp_input_t {
  70         FILE               *file;
  71         input_t            *input;
  72         utf32               c;
  73         utf32               buf[1024+MAX_PUTBACK];
  74         const utf32        *bufend;
  75         const utf32        *bufpos;
  76         source_position_t   position;
  77         pp_input_t         *parent;
  78         unsigned            output_line;
  79         searchpath_entry_t *path;
  80 };
  81
  82 struct searchpath_entry_t {
  83         const char         *path;
  84         searchpath_entry_t *next;
  85         bool                is_system_path;
  86 };
  87
  88 static pp_input_t      input;
  89
  90 static pp_input_t     *input_stack;
  91 static unsigned        n_inputs;
  92 static struct obstack  input_obstack;
  93
  94 static pp_conditional_t *conditional_stack;
  95
  96 token_t                  pp_token;
  97 bool                     allow_dollar_in_symbol   = true;
  98 static bool              resolve_escape_sequences = true;
  99 static bool              error_on_unknown_chars   = true;
 100 static bool              skip_mode;
 101 static FILE             *out;
 102 static struct obstack    pp_obstack;
 103 static struct obstack    config_obstack;
 104 static const char       *printed_input_name = NULL;
 105 static source_position_t expansion_pos;
 106 static pp_definition_t  *current_expansion  = NULL;
 107 static pp_definition_t  *current_call       = NULL;
 108 static pp_definition_t  *current_argument   = NULL;
 109 static pp_definition_t  *argument_expanding = NULL;
 110 static unsigned          argument_brace_count;
 111 static strset_t          stringset;
 112 static token_kind_t      last_token;
 113
 114 struct searchpath_t {
 115         searchpath_entry_t  *first;
 116         searchpath_entry_t **anchor;
 117         bool                 is_system_path;
 118 };
 119
 120 searchpath_t bracket_searchpath = { NULL, &bracket_searchpath.first, false };
 121 searchpath_t quote_searchpath   = { NULL, &quote_searchpath.first,   false };
 122 searchpath_t system_searchpath  = { NULL, &system_searchpath.first,  true  };
 123 searchpath_t after_searchpath   = { NULL, &after_searchpath.first,   true  };
 124
 125 static whitespace_info_t next_info; /* valid if had_whitespace is true */
 126 static whitespace_info_t info;
 127
 128 static inline void next_char(void);
 129 static void next_input_token(void);
 130 static void print_line_directive(const source_position_t *pos, const char *add);
 131
 132 static symbol_t *symbol_colongreater;
 133 static symbol_t *symbol_lesscolon;
 134 static symbol_t *symbol_lesspercent;
 135 static symbol_t *symbol_percentcolon;
 136 static symbol_t *symbol_percentcolonpercentcolon;
 137 static symbol_t *symbol_percentgreater;
 138
 139 static void init_symbols(void)
 140 {
 141         symbol_colongreater             = symbol_table_insert(":>");
 142         symbol_lesscolon                = symbol_table_insert("<:");
 143         symbol_lesspercent              = symbol_table_insert("<%");
 144         symbol_percentcolon             = symbol_table_insert("%:");
 145         symbol_percentcolonpercentcolon = symbol_table_insert("%:%:");
 146         symbol_percentgreater           = symbol_table_insert("%>");
 147 }
 148
 149 void switch_pp_input(FILE *const file, char const *const filename, searchpath_entry_t *const path, bool const is_system_header)
 150 {
 151         input.file                      = file;
 152         input.input                     = input_from_stream(file, NULL);
 153         input.bufend                    = NULL;
 154         input.bufpos                    = NULL;
 155         input.output_line               = 0;
 156         input.position.input_name       = filename;
 157         input.position.lineno           = 1;
 158         input.position.is_system_header = is_system_header;
 159         input.path                      = path;
 160
 161         /* indicate that we're at a new input */
 162         print_line_directive(&input.position, input_stack != NULL ? "1" : NULL);
 163
 164         /* place a virtual '\n' so we realize we're at line begin */
 165         input.position.lineno = 0;
 166         input.c               = '\n';
 167 }
 168
 169 FILE *close_pp_input(void)
 170 {
 171         input_free(input.input);
 172
 173         FILE* const file = input.file;
 174         assert(file);
 175
 176         input.input  = NULL;
 177         input.file   = NULL;
 178         input.bufend = NULL;
 179         input.bufpos = NULL;
 180         input.c      = EOF;
 181
 182         return file;
 183 }
 184
 185 static void push_input(void)
 186 {
 187         pp_input_t *const saved_input = obstack_copy(&input_obstack, &input, sizeof(input));
 188
 189         /* adjust buffer positions */
 190         if (input.bufpos != NULL)
 191                 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
 192         if (input.bufend != NULL)
 193                 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
 194
 195         saved_input->parent = input_stack;
 196         input_stack         = saved_input;
 197         ++n_inputs;
 198 }
 199
 200 static void pop_restore_input(void)
 201 {
 202         assert(n_inputs > 0);
 203         assert(input_stack != NULL);
 204
 205         pp_input_t *saved_input = input_stack;
 206
 207         memcpy(&input, saved_input, sizeof(input));
 208         input.parent = NULL;
 209
 210         /* adjust buffer positions */
 211         if (saved_input->bufpos != NULL)
 212                 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
 213         if (saved_input->bufend != NULL)
 214                 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
 215
 216         input_stack = saved_input->parent;
 217         obstack_free(&input_obstack, saved_input);
 218         --n_inputs;
 219 }
 220
 221 /**
 222  * Prints a parse error message at the current token.
 223  *
 224  * @param msg   the error message
 225  */
 226 static void parse_error(const char *msg)
 227 {
 228         errorf(&pp_token.base.source_position,  "%s", msg);
 229 }
 230
 231 static inline void next_real_char(void)
 232 {
 233         assert(input.bufpos <= input.bufend);
 234         if (input.bufpos >= input.bufend) {
 235                 size_t const n = decode(input.input, input.buf + MAX_PUTBACK, lengthof(input.buf) - MAX_PUTBACK);
 236                 if (n == 0) {
 237                         input.c = EOF;
 238                         return;
 239                 }
 240                 input.bufpos = input.buf + MAX_PUTBACK;
 241                 input.bufend = input.bufpos + n;
 242         }
 243         input.c = *input.bufpos++;
 244         ++input.position.colno;
 245 }
 246
 247 /**
 248  * Put a character back into the buffer.
 249  *
 250  * @param pc  the character to put back
 251  */
 252 static inline void put_back(utf32 const pc)
 253 {
 254         assert(input.bufpos > input.buf);
 255         *(--input.bufpos - input.buf + input.buf) = (char) pc;
 256         --input.position.colno;
 257 }
 258
 259 #define NEWLINE \
 260         '\r': \
 261                 next_char(); \
 262                 if (input.c == '\n') { \
 263         case '\n': \
 264                         next_char(); \
 265                 } \
 266                 ++input.position.lineno; \
 267                 input.position.colno = 1; \
 268                 goto newline; \
 269                 newline // Let it look like an ordinary case label.
 270
 271 #define eat(c_type) (assert(input.c == c_type), next_char())
 272
 273 static void maybe_concat_lines(void)
 274 {
 275         eat('\\');
 276
 277         switch (input.c) {
 278         case NEWLINE:
 279                 info.whitespace_at_line_begin = 0;
 280                 return;
 281
 282         default:
 283                 break;
 284         }
 285
 286         put_back(input.c);
 287         input.c = '\\';
 288 }
 289
 290 /**
 291  * Set c to the next input character, ie.
 292  * after expanding trigraphs.
 293  */
 294 static inline void next_char(void)
 295 {
 296         next_real_char();
 297
 298         /* filter trigraphs and concatenated lines */
 299         if (UNLIKELY(input.c == '\\')) {
 300                 maybe_concat_lines();
 301                 goto end_of_next_char;
 302         }
 303
 304         if (LIKELY(input.c != '?'))
 305                 goto end_of_next_char;
 306
 307         next_real_char();
 308         if (LIKELY(input.c != '?')) {
 309                 put_back(input.c);
 310                 input.c = '?';
 311                 goto end_of_next_char;
 312         }
 313
 314         next_real_char();
 315         switch (input.c) {
 316         case '=': input.c = '#'; break;
 317         case '(': input.c = '['; break;
 318         case '/': input.c = '\\'; maybe_concat_lines(); break;
 319         case ')': input.c = ']'; break;
 320         case '\'': input.c = '^'; break;
 321         case '<': input.c = '{'; break;
 322         case '!': input.c = '|'; break;
 323         case '>': input.c = '}'; break;
 324         case '-': input.c = '~'; break;
 325         default:
 326                 put_back(input.c);
 327                 put_back('?');
 328                 input.c = '?';
 329                 break;
 330         }
 331
 332 end_of_next_char:;
 333 #ifdef DEBUG_CHARS
 334         printf("nchar '%c'\n", input.c);
 335 #endif
 336 }
 337
 338
 339
 340 /**
 341  * Returns true if the given char is a octal digit.
 342  *
 343  * @param char  the character to check
 344  */
 345 static inline bool is_octal_digit(int chr)
 346 {
 347         switch (chr) {
 348         case '0':
 349         case '1':
 350         case '2':
 351         case '3':
 352         case '4':
 353         case '5':
 354         case '6':
 355         case '7':
 356                 return true;
 357         default:
 358                 return false;
 359         }
 360 }
 361
 362 /**
 363  * Returns the value of a digit.
 364  * The only portable way to do it ...
 365  */
 366 static int digit_value(int digit)
 367 {
 368         switch (digit) {
 369         case '0': return 0;
 370         case '1': return 1;
 371         case '2': return 2;
 372         case '3': return 3;
 373         case '4': return 4;
 374         case '5': return 5;
 375         case '6': return 6;
 376         case '7': return 7;
 377         case '8': return 8;
 378         case '9': return 9;
 379         case 'a':
 380         case 'A': return 10;
 381         case 'b':
 382         case 'B': return 11;
 383         case 'c':
 384         case 'C': return 12;
 385         case 'd':
 386         case 'D': return 13;
 387         case 'e':
 388         case 'E': return 14;
 389         case 'f':
 390         case 'F': return 15;
 391         default:
 392                 panic("wrong character given");
 393         }
 394 }
 395
 396 /**
 397  * Parses an octal character sequence.
 398  *
 399  * @param first_digit  the already read first digit
 400  */
 401 static utf32 parse_octal_sequence(const utf32 first_digit)
 402 {
 403         assert(is_octal_digit(first_digit));
 404         utf32 value = digit_value(first_digit);
 405         if (!is_octal_digit(input.c)) return value;
 406         value = 8 * value + digit_value(input.c);
 407         next_char();
 408         if (!is_octal_digit(input.c)) return value;
 409         value = 8 * value + digit_value(input.c);
 410         next_char();
 411         return value;
 412
 413 }
 414
 415 /**
 416  * Parses a hex character sequence.
 417  */
 418 static utf32 parse_hex_sequence(void)
 419 {
 420         utf32 value = 0;
 421         while (isxdigit(input.c)) {
 422                 value = 16 * value + digit_value(input.c);
 423                 next_char();
 424         }
 425         return value;
 426 }
 427
 428 static bool is_universal_char_valid(utf32 const v)
 429 {
 430         /* C11 §6.4.3:2 */
 431         if (v < 0xA0U && v != 0x24 && v != 0x40 && v != 0x60)
 432                 return false;
 433         if (0xD800 <= v && v <= 0xDFFF)
 434                 return false;
 435         return true;
 436 }
 437
 438 static utf32 parse_universal_char(unsigned const n_digits)
 439 {
 440         utf32 v = 0;
 441         for (unsigned k = n_digits; k != 0; --k) {
 442                 if (isxdigit(input.c)) {
 443                         v = 16 * v + digit_value(input.c);
 444                         if (!resolve_escape_sequences)
 445                                 obstack_1grow(&symbol_obstack, input.c);
 446                         next_char();
 447                 } else {
 448                         errorf(&input.position,
 449                                "short universal character name, expected %u more digits",
 450                                    k);
 451                         break;
 452                 }
 453         }
 454         if (!is_universal_char_valid(v)) {
 455                 errorf(&input.position,
 456                        "\\%c%0*X is not a valid universal character name",
 457                        n_digits == 4 ? 'u' : 'U', (int)n_digits, v);
 458         }
 459         return v;
 460 }
 461
 462 static bool is_universal_char_valid_identifier(utf32 const v)
 463 {
 464         /* C11 Annex D.1 */
 465         if (                v == 0x000A8) return true;
 466         if (                v == 0x000AA) return true;
 467         if (                v == 0x000AD) return true;
 468         if (                v == 0x000AF) return true;
 469         if (0x000B2 <= v && v <= 0x000B5) return true;
 470         if (0x000B7 <= v && v <= 0x000BA) return true;
 471         if (0x000BC <= v && v <= 0x000BE) return true;
 472         if (0x000C0 <= v && v <= 0x000D6) return true;
 473         if (0x000D8 <= v && v <= 0x000F6) return true;
 474         if (0x000F8 <= v && v <= 0x000FF) return true;
 475         if (0x00100 <= v && v <= 0x0167F) return true;
 476         if (0x01681 <= v && v <= 0x0180D) return true;
 477         if (0x0180F <= v && v <= 0x01FFF) return true;
 478         if (0x0200B <= v && v <= 0x0200D) return true;
 479         if (0x0202A <= v && v <= 0x0202E) return true;
 480         if (0x0203F <= v && v <= 0x02040) return true;
 481         if (                v == 0x02054) return true;
 482         if (0x02060 <= v && v <= 0x0206F) return true;
 483         if (0x02070 <= v && v <= 0x0218F) return true;
 484         if (0x02460 <= v && v <= 0x024FF) return true;
 485         if (0x02776 <= v && v <= 0x02793) return true;
 486         if (0x02C00 <= v && v <= 0x02DFF) return true;
 487         if (0x02E80 <= v && v <= 0x02FFF) return true;
 488         if (0x03004 <= v && v <= 0x03007) return true;
 489         if (0x03021 <= v && v <= 0x0302F) return true;
 490         if (0x03031 <= v && v <= 0x0303F) return true;
 491         if (0x03040 <= v && v <= 0x0D7FF) return true;
 492         if (0x0F900 <= v && v <= 0x0FD3D) return true;
 493         if (0x0FD40 <= v && v <= 0x0FDCF) return true;
 494         if (0x0FDF0 <= v && v <= 0x0FE44) return true;
 495         if (0x0FE47 <= v && v <= 0x0FFFD) return true;
 496         if (0x10000 <= v && v <= 0x1FFFD) return true;
 497         if (0x20000 <= v && v <= 0x2FFFD) return true;
 498         if (0x30000 <= v && v <= 0x3FFFD) return true;
 499         if (0x40000 <= v && v <= 0x4FFFD) return true;
 500         if (0x50000 <= v && v <= 0x5FFFD) return true;
 501         if (0x60000 <= v && v <= 0x6FFFD) return true;
 502         if (0x70000 <= v && v <= 0x7FFFD) return true;
 503         if (0x80000 <= v && v <= 0x8FFFD) return true;
 504         if (0x90000 <= v && v <= 0x9FFFD) return true;
 505         if (0xA0000 <= v && v <= 0xAFFFD) return true;
 506         if (0xB0000 <= v && v <= 0xBFFFD) return true;
 507         if (0xC0000 <= v && v <= 0xCFFFD) return true;
 508         if (0xD0000 <= v && v <= 0xDFFFD) return true;
 509         if (0xE0000 <= v && v <= 0xEFFFD) return true;
 510         return false;
 511 }
 512
 513 static bool is_universal_char_valid_identifier_start(utf32 const v)
 514 {
 515         /* C11 Annex D.2 */
 516         if (0x0300 <= v && v <= 0x036F) return false;
 517         if (0x1DC0 <= v && v <= 0x1DFF) return false;
 518         if (0x20D0 <= v && v <= 0x20FF) return false;
 519         if (0xFE20 <= v && v <= 0xFE2F) return false;
 520         return true;
 521 }
 522
 523 /**
 524  * Parse an escape sequence.
 525  */
 526 static utf32 parse_escape_sequence(void)
 527 {
 528         eat('\\');
 529
 530         utf32 const ec = input.c;
 531         next_char();
 532
 533         switch (ec) {
 534         case '"':  return '"';
 535         case '\'': return '\'';
 536         case '\\': return '\\';
 537         case '?': return '\?';
 538         case 'a': return '\a';
 539         case 'b': return '\b';
 540         case 'f': return '\f';
 541         case 'n': return '\n';
 542         case 'r': return '\r';
 543         case 't': return '\t';
 544         case 'v': return '\v';
 545         case 'x':
 546                 return parse_hex_sequence();
 547         case '0':
 548         case '1':
 549         case '2':
 550         case '3':
 551         case '4':
 552         case '5':
 553         case '6':
 554         case '7':
 555                 return parse_octal_sequence(ec);
 556         case EOF:
 557                 parse_error("reached end of file while parsing escape sequence");
 558                 return EOF;
 559         /* \E is not documented, but handled, by GCC.  It is acceptable according
 560          * to §6.11.4, whereas \e is not. */
 561         case 'E':
 562         case 'e':
 563                 if (c_mode & _GNUC)
 564                         return 27;   /* hopefully 27 is ALWAYS the code for ESCAPE */
 565                 break;
 566
 567         case 'U': return parse_universal_char(8);
 568         case 'u': return parse_universal_char(4);
 569
 570         default:
 571                 break;
 572         }
 573         /* §6.4.4.4:8 footnote 64 */
 574         parse_error("unknown escape sequence");
 575         return EOF;
 576 }
 577
 578 static const char *identify_string(char *string)
 579 {
 580         const char *result = strset_insert(&stringset, string);
 581         if (result != string) {
 582                 obstack_free(&symbol_obstack, string);
 583         }
 584         return result;
 585 }
 586
 587 static string_t sym_make_string(string_encoding_t const enc)
 588 {
 589         obstack_1grow(&symbol_obstack, '\0');
 590         size_t      const len    = obstack_object_size(&symbol_obstack) - 1;
 591         char       *const string = obstack_finish(&symbol_obstack);
 592         char const *const result = identify_string(string);
 593         return (string_t){ result, len, enc };
 594 }
 595
 596 string_t make_string(char const *const string)
 597 {
 598         obstack_grow(&symbol_obstack, string, strlen(string));
 599         return sym_make_string(STRING_ENCODING_CHAR);
 600 }
 601
 602 static void parse_string(utf32 const delimiter, token_kind_t const kind,
 603                          string_encoding_t const enc,
 604                          char const *const context)
 605 {
 606         const unsigned start_linenr = input.position.lineno;
 607
 608         eat(delimiter);
 609
 610         while (true) {
 611                 switch (input.c) {
 612                 case '\\': {
 613                         if (resolve_escape_sequences) {
 614                                 utf32 const tc = parse_escape_sequence();
 615                                 if (enc == STRING_ENCODING_CHAR) {
 616                                         if (tc >= 0x100) {
 617                                                 warningf(WARN_OTHER, &pp_token.base.source_position, "escape sequence out of range");
 618                                         }
 619                                         obstack_1grow(&symbol_obstack, tc);
 620                                 } else {
 621                                         obstack_grow_utf8(&symbol_obstack, tc);
 622                                 }
 623                         } else {
 624                                 obstack_1grow(&symbol_obstack, (char)input.c);
 625                                 next_char();
 626                                 obstack_1grow(&symbol_obstack, (char)input.c);
 627                                 next_char();
 628                         }
 629                         break;
 630                 }
 631
 632                 case NEWLINE:
 633                         errorf(&pp_token.base.source_position, "newline while parsing %s", context);
 634                         break;
 635
 636                 case EOF: {
 637                         source_position_t source_position;
 638                         source_position.input_name = pp_token.base.source_position.input_name;
 639                         source_position.lineno     = start_linenr;
 640                         errorf(&source_position, "EOF while parsing %s", context);
 641                         goto end_of_string;
 642                 }
 643
 644                 default:
 645                         if (input.c == delimiter) {
 646                                 next_char();
 647                                 goto end_of_string;
 648                         } else {
 649                                 obstack_grow_utf8(&symbol_obstack, input.c);
 650                                 next_char();
 651                                 break;
 652                         }
 653                 }
 654         }
 655
 656 end_of_string:
 657         pp_token.kind           = kind;
 658         pp_token.literal.string = sym_make_string(enc);
 659 }
 660
 661 static void parse_string_literal(string_encoding_t const enc)
 662 {
 663         parse_string('"', T_STRING_LITERAL, enc, "string literal");
 664 }
 665
 666 static void parse_character_constant(string_encoding_t const enc)
 667 {
 668         parse_string('\'', T_CHARACTER_CONSTANT, enc, "character constant");
 669         if (pp_token.literal.string.size == 0) {
 670                 parse_error("empty character constant");
 671         }
 672 }
 673
 674 #define SYMBOL_CASES_WITHOUT_E_P \
 675              '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
 676         case 'a': \
 677         case 'b': \
 678         case 'c': \
 679         case 'd': \
 680         case 'f': \
 681         case 'g': \
 682         case 'h': \
 683         case 'i': \
 684         case 'j': \
 685         case 'k': \
 686         case 'l': \
 687         case 'm': \
 688         case 'n': \
 689         case 'o': \
 690         case 'q': \
 691         case 'r': \
 692         case 's': \
 693         case 't': \
 694         case 'u': \
 695         case 'v': \
 696         case 'w': \
 697         case 'x': \
 698         case 'y': \
 699         case 'z': \
 700         case 'A': \
 701         case 'B': \
 702         case 'C': \
 703         case 'D': \
 704         case 'F': \
 705         case 'G': \
 706         case 'H': \
 707         case 'I': \
 708         case 'J': \
 709         case 'K': \
 710         case 'L': \
 711         case 'M': \
 712         case 'N': \
 713         case 'O': \
 714         case 'Q': \
 715         case 'R': \
 716         case 'S': \
 717         case 'T': \
 718         case 'U': \
 719         case 'V': \
 720         case 'W': \
 721         case 'X': \
 722         case 'Y': \
 723         case 'Z': \
 724         case '_'
 725
 726 #define SYMBOL_CASES \
 727              SYMBOL_CASES_WITHOUT_E_P: \
 728         case 'e': \
 729         case 'p': \
 730         case 'E': \
 731         case 'P'
 732
 733 #define DIGIT_CASES \
 734              '0':  \
 735         case '1':  \
 736         case '2':  \
 737         case '3':  \
 738         case '4':  \
 739         case '5':  \
 740         case '6':  \
 741         case '7':  \
 742         case '8':  \
 743         case '9'
 744
 745 static void start_expanding(pp_definition_t *definition)
 746 {
 747         definition->parent_expansion = current_expansion;
 748         definition->expand_pos       = 0;
 749         definition->is_expanding     = true;
 750         if (definition->list_len > 0) {
 751                 definition->token_list[0].had_whitespace
 752                         = info.had_whitespace;
 753         }
 754         current_expansion = definition;
 755 }
 756
 757 static void finished_expanding(pp_definition_t *definition)
 758 {
 759         assert(definition->is_expanding);
 760         pp_definition_t *parent = definition->parent_expansion;
 761         definition->parent_expansion = NULL;
 762         definition->is_expanding     = false;
 763
 764         /* stop further expanding once we expanded a parameter used in a
 765          * sub macro-call */
 766         if (definition == argument_expanding)
 767                 argument_expanding = NULL;
 768
 769         assert(current_expansion == definition);
 770         current_expansion = parent;
 771 }
 772
 773 static void grow_string_escaped(struct obstack *obst, const string_t *string, char const *delimiter)
 774 {
 775         char const *prefix = get_string_encoding_prefix(string->encoding);
 776         obstack_printf(obst, "%s%s", prefix, delimiter);
 777         size_t      size = string->size;
 778         const char *str  = string->begin;
 779         if (resolve_escape_sequences) {
 780                 obstack_grow(obst, str, size);
 781         } else {
 782                 for (size_t i = 0; i < size; ++i) {
 783                         const char c = str[i];
 784                         if (c == '\\' || c == '"')
 785                                 obstack_1grow(obst, '\\');
 786                         obstack_1grow(obst, c);
 787                 }
 788         }
 789         obstack_printf(obst, "%s", delimiter);
 790 }
 791
 792 static void grow_token(struct obstack *obst, const token_t *token)
 793 {
 794         switch (token->kind) {
 795         case T_NUMBER:
 796                 obstack_grow(obst, token->literal.string.begin, token->literal.string.size);
 797                 break;
 798
 799         case T_STRING_LITERAL: {
 800                 char const *const delimiter = resolve_escape_sequences ? "\"" : "\\\"";
 801                 grow_string_escaped(obst, &token->literal.string, delimiter);
 802                 break;
 803         }
 804
 805         case T_CHARACTER_CONSTANT:
 806                 grow_string_escaped(obst, &token->literal.string, "'");
 807                 break;
 808
 809         case T_IDENTIFIER:
 810         default: {
 811                 const char *str = token->base.symbol->string;
 812                 size_t      len = strlen(str);
 813                 obstack_grow(obst, str, len);
 814                 break;
 815         }
 816         }
 817 }
 818
 819 static void stringify(const pp_definition_t *definition)
 820 {
 821         assert(obstack_object_size(&symbol_obstack) == 0);
 822
 823         size_t list_len = definition->list_len;
 824         for (size_t p = 0; p < list_len; ++p) {
 825                 const saved_token_t *saved = &definition->token_list[p];
 826                 if (p > 0 && saved->had_whitespace)
 827                         obstack_1grow(&symbol_obstack, ' ');
 828                 grow_token(&symbol_obstack, &saved->token);
 829         }
 830         pp_token.kind           = T_STRING_LITERAL;
 831         pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
 832 }
 833
 834 static inline void set_punctuator(token_kind_t const kind)
 835 {
 836         pp_token.kind        = kind;
 837         pp_token.base.symbol = token_symbols[kind];
 838 }
 839
 840 static inline void set_digraph(token_kind_t const kind, symbol_t *const symbol)
 841 {
 842         pp_token.kind        = kind;
 843         pp_token.base.symbol = symbol;
 844 }
 845
 846 /**
 847  * returns next final token from a preprocessor macro expansion
 848  */
 849 static bool expand_next(void)
 850 {
 851         if (current_expansion == NULL)
 852                 return false;
 853
 854 restart:;
 855         size_t pos = current_expansion->expand_pos;
 856         if (pos >= current_expansion->list_len) {
 857                 finished_expanding(current_expansion);
 858                 /* it was the outermost expansion, parse pptoken normally */
 859                 if (current_expansion == NULL) {
 860                         return false;
 861                 }
 862                 goto restart;
 863         }
 864         const saved_token_t *saved = &current_expansion->token_list[pos++];
 865         pp_token = saved->token;
 866         if (pp_token.kind == '#') {
 867                 if (pos < current_expansion->list_len) {
 868                         const saved_token_t *next = &current_expansion->token_list[pos];
 869                         if (next->token.kind == T_MACRO_PARAMETER) {
 870                                 pp_definition_t *def = next->token.macro_parameter.def;
 871                                 assert(def != NULL && def->is_parameter);
 872                                 stringify(def);
 873                                 ++pos;
 874                         }
 875                 }
 876         }
 877
 878         if (current_expansion->expand_pos > 0)
 879                 info.had_whitespace = saved->had_whitespace;
 880         current_expansion->expand_pos = pos;
 881         pp_token.base.source_position = expansion_pos;
 882
 883         return true;
 884 }
 885
 886 /**
 887  * Returns the next token kind found when continuing the current expansions
 888  * without starting new sub-expansions.
 889  */
 890 static token_kind_t peek_expansion(void)
 891 {
 892         for (pp_definition_t *e = current_expansion; e; e = e->parent_expansion) {
 893                 if (e->expand_pos < e->list_len)
 894                         return e->token_list[e->expand_pos].token.kind;
 895         }
 896         return T_EOF;
 897 }
 898
 899 static void skip_line_comment(void)
 900 {
 901         info.had_whitespace = true;
 902         while (true) {
 903                 switch (input.c) {
 904                 case EOF:
 905                         return;
 906
 907                 case '\r':
 908                 case '\n':
 909                         return;
 910
 911                 default:
 912                         next_char();
 913                         break;
 914                 }
 915         }
 916 }
 917
 918 static void skip_multiline_comment(void)
 919 {
 920         info.had_whitespace = true;
 921
 922         unsigned start_linenr = input.position.lineno;
 923         while (true) {
 924                 switch (input.c) {
 925                 case '/':
 926                         next_char();
 927                         if (input.c == '*') {
 928                                 /* TODO: nested comment, warn here */
 929                         }
 930                         break;
 931                 case '*':
 932                         next_char();
 933                         if (input.c == '/') {
 934                                 if (input.position.lineno != input.output_line)
 935                                         info.whitespace_at_line_begin = input.position.colno;
 936                                 next_char();
 937                                 return;
 938                         }
 939                         break;
 940
 941                 case NEWLINE:
 942                         break;
 943
 944                 case EOF: {
 945                         source_position_t source_position;
 946                         source_position.input_name = pp_token.base.source_position.input_name;
 947                         source_position.lineno     = start_linenr;
 948                         errorf(&source_position, "at end of file while looking for comment end");
 949                         return;
 950                 }
 951
 952                 default:
 953                         next_char();
 954                         break;
 955                 }
 956         }
 957 }
 958
 959 static bool skip_till_newline(bool stop_at_non_whitespace)
 960 {
 961         bool res = false;
 962         while (true) {
 963                 switch (input.c) {
 964                 case ' ':
 965                 case '\t':
 966                         next_char();
 967                         continue;
 968
 969                 case '/':
 970                         next_char();
 971                         if (input.c == '/') {
 972                                 next_char();
 973                                 skip_line_comment();
 974                                 continue;
 975                         } else if (input.c == '*') {
 976                                 next_char();
 977                                 skip_multiline_comment();
 978                                 continue;
 979                         } else {
 980                                 put_back(input.c);
 981                                 input.c = '/';
 982                         }
 983                         return true;
 984
 985                 case NEWLINE:
 986                         return res;
 987
 988                 default:
 989                         if (stop_at_non_whitespace)
 990                                 return false;
 991                         res = true;
 992                         next_char();
 993                         continue;
 994                 }
 995         }
 996 }
 997
 998 static void skip_whitespace(void)
 999 {
1000         while (true) {
1001                 switch (input.c) {
1002                 case ' ':
1003                 case '\t':
1004                         ++info.whitespace_at_line_begin;
1005                         info.had_whitespace = true;
1006                         next_char();
1007                         continue;
1008
1009                 case NEWLINE:
1010                         info.at_line_begin  = true;
1011                         info.had_whitespace = true;
1012                         info.whitespace_at_line_begin = 0;
1013                         continue;
1014
1015                 case '/':
1016                         next_char();
1017                         if (input.c == '/') {
1018                                 next_char();
1019                                 skip_line_comment();
1020                                 continue;
1021                         } else if (input.c == '*') {
1022                                 next_char();
1023                                 skip_multiline_comment();
1024                                 continue;
1025                         } else {
1026                                 put_back(input.c);
1027                                 input.c = '/';
1028                         }
1029                         return;
1030
1031                 default:
1032                         return;
1033                 }
1034         }
1035 }
1036
1037 static inline void eat_pp(pp_token_kind_t const kind)
1038 {
1039         assert(pp_token.base.symbol->pp_ID == kind);
1040         (void) kind;
1041         next_input_token();
1042 }
1043
1044 static inline void eat_token(token_kind_t const kind)
1045 {
1046         assert(pp_token.kind == kind);
1047         (void)kind;
1048         next_input_token();
1049 }
1050
1051 static void parse_symbol(void)
1052 {
1053         assert(obstack_object_size(&symbol_obstack) == 0);
1054         while (true) {
1055                 switch (input.c) {
1056                 case DIGIT_CASES:
1057                 case SYMBOL_CASES:
1058                         obstack_1grow(&symbol_obstack, (char) input.c);
1059                         next_char();
1060                         break;
1061
1062                 case '\\':
1063                         next_char();
1064                         switch (input.c) {
1065                         {
1066                                 unsigned n;
1067                         case 'U': n = 8; goto universal;
1068                         case 'u': n = 4; goto universal;
1069 universal:
1070                                 if (!resolve_escape_sequences) {
1071                                         obstack_1grow(&symbol_obstack, '\\');
1072                                         obstack_1grow(&symbol_obstack, input.c);
1073                                 }
1074                                 next_char();
1075                                 utf32 const v = parse_universal_char(n);
1076                                 if (!is_universal_char_valid_identifier(v)) {
1077                                         if (is_universal_char_valid(v)) {
1078                                                 errorf(&input.position,
1079                                                            "universal character \\%c%0*X is not valid in an identifier",
1080                                                            n == 4 ? 'u' : 'U', (int)n, v);
1081                                         }
1082                                 } else if (obstack_object_size(&symbol_obstack) == 0 && !is_universal_char_valid_identifier_start(v)) {
1083                                         errorf(&input.position,
1084                                                    "universal character \\%c%0*X is not valid as start of an identifier",
1085                                                    n == 4 ? 'u' : 'U', (int)n, v);
1086                                 } else if (resolve_escape_sequences) {
1087                                         obstack_grow_utf8(&symbol_obstack, v);
1088                                 }
1089                                 break;
1090                         }
1091
1092                         default:
1093                                 put_back(input.c);
1094                                 input.c = '\\';
1095                                 goto end_symbol;
1096                         }
1097
1098                 default:
1099 dollar_sign:
1100                         goto end_symbol;
1101                 }
1102         }
1103
1104 end_symbol:
1105         obstack_1grow(&symbol_obstack, '\0');
1106         char *string = obstack_finish(&symbol_obstack);
1107
1108         /* might be a wide string or character constant ( L"string"/L'c' ) */
1109         if (input.c == '"' && string[0] == 'L' && string[1] == '\0') {
1110                 obstack_free(&symbol_obstack, string);
1111                 parse_string_literal(STRING_ENCODING_WIDE);
1112                 return;
1113         } else if (input.c == '\'' && string[0] == 'L' && string[1] == '\0') {
1114                 obstack_free(&symbol_obstack, string);
1115                 parse_character_constant(STRING_ENCODING_WIDE);
1116                 return;
1117         }
1118
1119         symbol_t *symbol = symbol_table_insert(string);
1120
1121         pp_token.kind        = symbol->ID;
1122         pp_token.base.symbol = symbol;
1123
1124         /* we can free the memory from symbol obstack if we already had an entry in
1125          * the symbol table */
1126         if (symbol->string != string) {
1127                 obstack_free(&symbol_obstack, string);
1128         }
1129 }
1130
1131 static void parse_number(void)
1132 {
1133         obstack_1grow(&symbol_obstack, (char) input.c);
1134         next_char();
1135
1136         while (true) {
1137                 switch (input.c) {
1138                 case '.':
1139                 case DIGIT_CASES:
1140                 case SYMBOL_CASES_WITHOUT_E_P:
1141                         obstack_1grow(&symbol_obstack, (char) input.c);
1142                         next_char();
1143                         break;
1144
1145                 case 'e':
1146                 case 'p':
1147                 case 'E':
1148                 case 'P':
1149                         obstack_1grow(&symbol_obstack, (char) input.c);
1150                         next_char();
1151                         if (input.c == '+' || input.c == '-') {
1152                                 obstack_1grow(&symbol_obstack, (char) input.c);
1153                                 next_char();
1154                         }
1155                         break;
1156
1157                 default:
1158 dollar_sign:
1159                         goto end_number;
1160                 }
1161         }
1162
1163 end_number:
1164         pp_token.kind           = T_NUMBER;
1165         pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
1166 }
1167
1168 #define MAYBE_PROLOG \
1169         next_char(); \
1170         switch (input.c) {
1171
1172 #define MAYBE(ch, kind) \
1173         case ch: \
1174                 next_char(); \
1175                 set_punctuator(kind); \
1176                 return;
1177
1178 #define MAYBE_DIGRAPH(ch, kind, symbol) \
1179         case ch: \
1180                 next_char(); \
1181                 set_digraph(kind, symbol); \
1182                 return;
1183
1184 #define ELSE_CODE(code) \
1185         default: \
1186                 code \
1187         }
1188
1189 #define ELSE(kind) ELSE_CODE(set_punctuator(kind); return;)
1190
1191 /** identifies and returns the next preprocessing token contained in the
1192  * input stream. No macro expansion is performed. */
1193 static void next_input_token(void)
1194 {
1195         if (next_info.had_whitespace) {
1196                 info = next_info;
1197                 next_info.had_whitespace = false;
1198         } else {
1199                 info.at_line_begin  = false;
1200                 info.had_whitespace = false;
1201         }
1202 restart:
1203         pp_token.base.source_position = input.position;
1204         pp_token.base.symbol          = NULL;
1205
1206         switch (input.c) {
1207         case ' ':
1208         case '\t':
1209                 info.whitespace_at_line_begin++;
1210                 info.had_whitespace = true;
1211                 next_char();
1212                 goto restart;
1213
1214         case NEWLINE:
1215                 info.at_line_begin            = true;
1216                 info.had_whitespace           = true;
1217                 info.whitespace_at_line_begin = 0;
1218                 goto restart;
1219
1220         case SYMBOL_CASES:
1221                 parse_symbol();
1222                 return;
1223
1224         case DIGIT_CASES:
1225                 parse_number();
1226                 return;
1227
1228         case '"':
1229                 parse_string_literal(STRING_ENCODING_CHAR);
1230                 return;
1231
1232         case '\'':
1233                 parse_character_constant(STRING_ENCODING_CHAR);
1234                 return;
1235
1236         case '.':
1237                 MAYBE_PROLOG
1238                         case '0':
1239                         case '1':
1240                         case '2':
1241                         case '3':
1242                         case '4':
1243                         case '5':
1244                         case '6':
1245                         case '7':
1246                         case '8':
1247                         case '9':
1248                                 put_back(input.c);
1249                                 input.c = '.';
1250                                 parse_number();
1251                                 return;
1252
1253                         case '.':
1254                                 MAYBE_PROLOG
1255                                 MAYBE('.', T_DOTDOTDOT)
1256                                 ELSE_CODE(
1257                                         put_back(input.c);
1258                                         input.c = '.';
1259                                         set_punctuator('.');
1260                                         return;
1261                                 )
1262                 ELSE('.')
1263         case '&':
1264                 MAYBE_PROLOG
1265                 MAYBE('&', T_ANDAND)
1266                 MAYBE('=', T_ANDEQUAL)
1267                 ELSE('&')
1268         case '*':
1269                 MAYBE_PROLOG
1270                 MAYBE('=', T_ASTERISKEQUAL)
1271                 ELSE('*')
1272         case '+':
1273                 MAYBE_PROLOG
1274                 MAYBE('+', T_PLUSPLUS)
1275                 MAYBE('=', T_PLUSEQUAL)
1276                 ELSE('+')
1277         case '-':
1278                 MAYBE_PROLOG
1279                 MAYBE('>', T_MINUSGREATER)
1280                 MAYBE('-', T_MINUSMINUS)
1281                 MAYBE('=', T_MINUSEQUAL)
1282                 ELSE('-')
1283         case '!':
1284                 MAYBE_PROLOG
1285                 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
1286                 ELSE('!')
1287         case '/':
1288                 MAYBE_PROLOG
1289                 MAYBE('=', T_SLASHEQUAL)
1290                 case '*':
1291                         next_char();
1292                         skip_multiline_comment();
1293                         goto restart;
1294                 case '/':
1295                         next_char();
1296                         skip_line_comment();
1297                         goto restart;
1298                 ELSE('/')
1299         case '%':
1300                 MAYBE_PROLOG
1301                 MAYBE_DIGRAPH('>', '}', symbol_percentgreater)
1302                 MAYBE('=', T_PERCENTEQUAL)
1303                 case ':':
1304                         MAYBE_PROLOG
1305                         case '%':
1306                                 MAYBE_PROLOG
1307                                 MAYBE_DIGRAPH(':', T_HASHHASH, symbol_percentcolonpercentcolon)
1308                                 ELSE_CODE(
1309                                         put_back(input.c);
1310                                         input.c = '%';
1311                                         goto digraph_percentcolon;
1312                                 )
1313                         ELSE_CODE(
1314 digraph_percentcolon:
1315                                 set_digraph('#', symbol_percentcolon);
1316                                 return;
1317                         )
1318                 ELSE('%')
1319         case '<':
1320                 MAYBE_PROLOG
1321                 MAYBE_DIGRAPH(':', '[', symbol_lesscolon)
1322                 MAYBE_DIGRAPH('%', '{', symbol_lesspercent)
1323                 MAYBE('=', T_LESSEQUAL)
1324                 case '<':
1325                         MAYBE_PROLOG
1326                         MAYBE('=', T_LESSLESSEQUAL)
1327                         ELSE(T_LESSLESS)
1328                 ELSE('<')
1329         case '>':
1330                 MAYBE_PROLOG
1331                 MAYBE('=', T_GREATEREQUAL)
1332                 case '>':
1333                         MAYBE_PROLOG
1334                         MAYBE('=', T_GREATERGREATEREQUAL)
1335                         ELSE(T_GREATERGREATER)
1336                 ELSE('>')
1337         case '^':
1338                 MAYBE_PROLOG
1339                 MAYBE('=', T_CARETEQUAL)
1340                 ELSE('^')
1341         case '|':
1342                 MAYBE_PROLOG
1343                 MAYBE('=', T_PIPEEQUAL)
1344                 MAYBE('|', T_PIPEPIPE)
1345                 ELSE('|')
1346         case ':':
1347                 MAYBE_PROLOG
1348                 MAYBE_DIGRAPH('>', ']', symbol_colongreater)
1349                 case ':':
1350                         if (c_mode & _CXX) {
1351                                 next_char();
1352                                 set_punctuator(T_COLONCOLON);
1353                                 return;
1354                         }
1355                         /* FALLTHROUGH */
1356                 ELSE(':')
1357         case '=':
1358                 MAYBE_PROLOG
1359                 MAYBE('=', T_EQUALEQUAL)
1360                 ELSE('=')
1361         case '#':
1362                 MAYBE_PROLOG
1363                 MAYBE('#', T_HASHHASH)
1364                 ELSE('#')
1365
1366         case '?':
1367         case '[':
1368         case ']':
1369         case '(':
1370         case ')':
1371         case '{':
1372         case '}':
1373         case '~':
1374         case ';':
1375         case ',':
1376                 set_punctuator(input.c);
1377                 next_char();
1378                 return;
1379
1380         case EOF:
1381                 if (input_stack != NULL) {
1382                         fclose(close_pp_input());
1383                         pop_restore_input();
1384                         if (out)
1385                                 fputc('\n', out);
1386                         if (input.c == (utf32)EOF)
1387                                 --input.position.lineno;
1388                         print_line_directive(&input.position, "2");
1389                         goto restart;
1390                 } else {
1391                         info.at_line_begin = true;
1392                         set_punctuator(T_EOF);
1393                 }
1394                 return;
1395
1396         case '\\':
1397                 next_char();
1398                 int next_c = input.c;
1399                 put_back(input.c);
1400                 input.c = '\\';
1401                 if (next_c == 'U' || next_c == 'u') {
1402                         parse_symbol();
1403                         return;
1404                 }
1405                 /* FALLTHROUGH */
1406         default:
1407 dollar_sign:
1408                 if (error_on_unknown_chars) {
1409                         errorf(&pp_token.base.source_position,
1410                                "unknown character '%lc' found\n", input.c);
1411                         next_char();
1412                         goto restart;
1413                 } else {
1414                         assert(obstack_object_size(&symbol_obstack) == 0);
1415                         obstack_grow_utf8(&symbol_obstack, input.c);
1416                         obstack_1grow(&symbol_obstack, '\0');
1417                         char     *const string = obstack_finish(&symbol_obstack);
1418                         symbol_t *const symbol = symbol_table_insert(string);
1419                         if (symbol->string != string)
1420                                 obstack_free(&symbol_obstack, string);
1421
1422                         pp_token.kind        = T_UNKNOWN_CHAR;
1423                         pp_token.base.symbol = symbol;
1424                         next_char();
1425                         return;
1426                 }
1427         }
1428 }
1429
1430 static void print_quoted_string(const char *const string)
1431 {
1432         fputc('"', out);
1433         for (const char *c = string; *c != 0; ++c) {
1434                 switch (*c) {
1435                 case '"': fputs("\\\"", out); break;
1436                 case '\\':  fputs("\\\\", out); break;
1437                 case '\a':  fputs("\\a", out); break;
1438                 case '\b':  fputs("\\b", out); break;
1439                 case '\f':  fputs("\\f", out); break;
1440                 case '\n':  fputs("\\n", out); break;
1441                 case '\r':  fputs("\\r", out); break;
1442                 case '\t':  fputs("\\t", out); break;
1443                 case '\v':  fputs("\\v", out); break;
1444                 case '\?':  fputs("\\?", out); break;
1445                 default:
1446                         if (!isprint(*c)) {
1447                                 fprintf(out, "\\%03o", (unsigned)*c);
1448                                 break;
1449                         }
1450                         fputc(*c, out);
1451                         break;
1452                 }
1453         }
1454         fputc('"', out);
1455 }
1456
1457 static void print_line_directive(const source_position_t *pos, const char *add)
1458 {
1459         if (!out)
1460                 return;
1461
1462         fprintf(out, "# %u ", pos->lineno);
1463         print_quoted_string(pos->input_name);
1464         if (add != NULL) {
1465                 fputc(' ', out);
1466                 fputs(add, out);
1467         }
1468         if (pos->is_system_header) {
1469                 fputs(" 3", out);
1470         }
1471
1472         printed_input_name = pos->input_name;
1473         input.output_line  = pos->lineno-1;
1474 }
1475
1476 static bool emit_newlines(void)
1477 {
1478         if (!out)
1479                 return true;
1480
1481         unsigned delta = pp_token.base.source_position.lineno - input.output_line;
1482         if (delta == 0)
1483                 return false;
1484
1485         if (delta >= 9) {
1486                 fputc('\n', out);
1487                 print_line_directive(&pp_token.base.source_position, NULL);
1488                 fputc('\n', out);
1489         } else {
1490                 for (unsigned i = 0; i < delta; ++i) {
1491                         fputc('\n', out);
1492                 }
1493         }
1494         input.output_line = pp_token.base.source_position.lineno;
1495
1496         unsigned whitespace = info.whitespace_at_line_begin;
1497         /* make sure there is at least 1 whitespace before a (macro-expanded)
1498          * '#' at line begin. I'm not sure why this is good, but gcc does it. */
1499         if (pp_token.kind == '#' && whitespace == 0)
1500                 ++whitespace;
1501         for (unsigned i = 0; i < whitespace; ++i)
1502                 fputc(' ', out);
1503
1504         return true;
1505 }
1506
1507 void set_preprocessor_output(FILE *output)
1508 {
1509         out = output;
1510         if (out != NULL) {
1511                 error_on_unknown_chars   = false;
1512                 resolve_escape_sequences = false;
1513         } else {
1514                 error_on_unknown_chars   = true;
1515                 resolve_escape_sequences = true;
1516         }
1517 }
1518
1519 void emit_pp_token(void)
1520 {
1521         if (!emit_newlines() &&
1522             (info.had_whitespace || tokens_would_paste(last_token, pp_token.kind)))
1523                 fputc(' ', out);
1524
1525         switch (pp_token.kind) {
1526         case T_NUMBER:
1527                 fputs(pp_token.literal.string.begin, out);
1528                 break;
1529
1530         case T_STRING_LITERAL:
1531                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1532                 fputc('"', out);
1533                 fputs(pp_token.literal.string.begin, out);
1534                 fputc('"', out);
1535                 break;
1536
1537         case T_CHARACTER_CONSTANT:
1538                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1539                 fputc('\'', out);
1540                 fputs(pp_token.literal.string.begin, out);
1541                 fputc('\'', out);
1542                 break;
1543
1544         case T_MACRO_PARAMETER:
1545                 panic("macro parameter not expanded");
1546
1547         default:
1548                 fputs(pp_token.base.symbol->string, out);
1549                 break;
1550         }
1551         last_token = pp_token.kind;
1552 }
1553
1554 static void eat_pp_directive(void)
1555 {
1556         while (!info.at_line_begin) {
1557                 next_input_token();
1558         }
1559 }
1560
1561 static bool strings_equal(const string_t *string1, const string_t *string2)
1562 {
1563         size_t size = string1->size;
1564         if (size != string2->size)
1565                 return false;
1566
1567         const char *c1 = string1->begin;
1568         const char *c2 = string2->begin;
1569         for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1570                 if (*c1 != *c2)
1571                         return false;
1572         }
1573         return true;
1574 }
1575
1576 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1577 {
1578         if (token1->kind != token2->kind)
1579                 return false;
1580
1581         switch (token1->kind) {
1582         case T_NUMBER:
1583         case T_CHARACTER_CONSTANT:
1584         case T_STRING_LITERAL:
1585                 return strings_equal(&token1->literal.string, &token2->literal.string);
1586
1587         case T_MACRO_PARAMETER:
1588                 return token1->macro_parameter.def->symbol
1589                     == token2->macro_parameter.def->symbol;
1590
1591         default:
1592                 return token1->base.symbol == token2->base.symbol;
1593         }
1594 }
1595
1596 static bool pp_definitions_equal(const pp_definition_t *definition1,
1597                                  const pp_definition_t *definition2)
1598 {
1599         if (definition1->list_len != definition2->list_len)
1600                 return false;
1601
1602         size_t               len = definition1->list_len;
1603         const saved_token_t *t1  = definition1->token_list;
1604         const saved_token_t *t2  = definition2->token_list;
1605         for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1606                 if (!pp_tokens_equal(&t1->token, &t2->token))
1607                         return false;
1608                 if (t1->had_whitespace != t2->had_whitespace)
1609                         return false;
1610         }
1611         return true;
1612 }
1613
1614 static void missing_macro_param_error(void)
1615 {
1616         errorf(&pp_token.base.source_position,
1617                "'#' is not followed by a macro parameter");
1618 }
1619
1620 static bool is_defineable_token(char const *const context)
1621 {
1622         if (info.at_line_begin) {
1623                 errorf(&pp_token.base.source_position, "unexpected end of line after %s", context);
1624         }
1625
1626         symbol_t *const symbol = pp_token.base.symbol;
1627         if (!symbol)
1628                 goto no_ident;
1629
1630         if (pp_token.kind != T_IDENTIFIER) {
1631                 switch (symbol->string[0]) {
1632                 case SYMBOL_CASES:
1633 dollar_sign:
1634                         break;
1635
1636                 default:
1637 no_ident:
1638                         errorf(&pp_token.base.source_position, "expected identifier after %s, got %K", context, &pp_token);
1639                         return false;
1640                 }
1641         }
1642
1643         /* TODO turn this into a flag in pp_def. */
1644         switch (symbol->pp_ID) {
1645         /* §6.10.8:4 */
1646         case TP_defined:
1647                 errorf(&pp_token.base.source_position, "%K cannot be used as macro name in %s", &pp_token, context);
1648                 return false;
1649
1650         default:
1651                 return true;
1652         }
1653 }
1654
1655 static void parse_define_directive(void)
1656 {
1657         eat_pp(TP_define);
1658         if (skip_mode) {
1659                 eat_pp_directive();
1660                 return;
1661         }
1662
1663         assert(obstack_object_size(&pp_obstack) == 0);
1664
1665         if (!is_defineable_token("#define"))
1666                 goto error_out;
1667         symbol_t *const symbol = pp_token.base.symbol;
1668
1669         pp_definition_t *new_definition
1670                 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1671         memset(new_definition, 0, sizeof(new_definition[0]));
1672         new_definition->symbol          = symbol;
1673         new_definition->source_position = input.position;
1674
1675         /* this is probably the only place where spaces are significant in the
1676          * lexer (except for the fact that they separate tokens). #define b(x)
1677          * is something else than #define b (x) */
1678         if (input.c == '(') {
1679                 next_input_token();
1680                 eat_token('(');
1681
1682                 while (true) {
1683                         switch (pp_token.kind) {
1684                         case T_DOTDOTDOT:
1685                                 new_definition->is_variadic = true;
1686                                 eat_token(T_DOTDOTDOT);
1687                                 if (pp_token.kind != ')') {
1688                                         errorf(&input.position,
1689                                                         "'...' not at end of macro argument list");
1690                                         goto error_out;
1691                                 }
1692                                 break;
1693
1694                         case T_IDENTIFIER: {
1695                                 pp_definition_t parameter;
1696                                 memset(&parameter, 0, sizeof(parameter));
1697                                 parameter.source_position = pp_token.base.source_position;
1698                                 parameter.symbol          = pp_token.base.symbol;
1699                                 parameter.is_parameter    = true;
1700                                 obstack_grow(&pp_obstack, &parameter, sizeof(parameter));
1701                                 eat_token(T_IDENTIFIER);
1702
1703                                 if (pp_token.kind == ',') {
1704                                         eat_token(',');
1705                                         break;
1706                                 }
1707
1708                                 if (pp_token.kind != ')') {
1709                                         errorf(&pp_token.base.source_position,
1710                                                "expected ',' or ')' after identifier, got %K",
1711                                                &pp_token);
1712                                         goto error_out;
1713                                 }
1714                                 break;
1715                         }
1716
1717                         case ')':
1718                                 eat_token(')');
1719                                 goto finish_argument_list;
1720
1721                         default:
1722                                 errorf(&pp_token.base.source_position,
1723                                        "expected identifier, '...' or ')' in #define argument list, got %K",
1724                                        &pp_token);
1725                                 goto error_out;
1726                         }
1727                 }
1728
1729         finish_argument_list:
1730                 new_definition->has_parameters = true;
1731                 size_t size = obstack_object_size(&pp_obstack);
1732                 new_definition->n_parameters
1733                         = size / sizeof(new_definition->parameters[0]);
1734                 new_definition->parameters = obstack_finish(&pp_obstack);
1735                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1736                         pp_definition_t *param    = &new_definition->parameters[i];
1737                         symbol_t        *symbol   = param->symbol;
1738                         pp_definition_t *previous = symbol->pp_definition;
1739                         if (previous != NULL
1740                             && previous->function_definition == new_definition) {
1741                                 errorf(&param->source_position,
1742                                        "duplicate macro parameter '%Y'", symbol);
1743                                 param->symbol = sym_anonymous;
1744                                 continue;
1745                         }
1746                         param->parent_expansion    = previous;
1747                         param->function_definition = new_definition;
1748                         symbol->pp_definition      = param;
1749                 }
1750         } else {
1751                 next_input_token();
1752         }
1753
1754         /* construct token list */
1755         assert(obstack_object_size(&pp_obstack) == 0);
1756         bool next_must_be_param = false;
1757         while (!info.at_line_begin) {
1758                 if (pp_token.kind == T_IDENTIFIER) {
1759                         const symbol_t  *symbol     = pp_token.base.symbol;
1760                         pp_definition_t *definition = symbol->pp_definition;
1761                         if (definition != NULL
1762                             && definition->function_definition == new_definition) {
1763                             pp_token.kind                = T_MACRO_PARAMETER;
1764                             pp_token.macro_parameter.def = definition;
1765                         }
1766                 }
1767                 if (next_must_be_param && pp_token.kind != T_MACRO_PARAMETER) {
1768                         missing_macro_param_error();
1769                 }
1770                 saved_token_t saved_token;
1771                 saved_token.token = pp_token;
1772                 saved_token.had_whitespace = info.had_whitespace;
1773                 obstack_grow(&pp_obstack, &saved_token, sizeof(saved_token));
1774                 next_must_be_param
1775                         = new_definition->has_parameters && pp_token.kind == '#';
1776                 next_input_token();
1777         }
1778         if (next_must_be_param)
1779                 missing_macro_param_error();
1780
1781         new_definition->list_len   = obstack_object_size(&pp_obstack)
1782                 / sizeof(new_definition->token_list[0]);
1783         new_definition->token_list = obstack_finish(&pp_obstack);
1784
1785         if (new_definition->has_parameters) {
1786                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1787                         pp_definition_t *param      = &new_definition->parameters[i];
1788                         symbol_t        *symbol     = param->symbol;
1789                         if (symbol == sym_anonymous)
1790                                 continue;
1791                         assert(symbol->pp_definition == param);
1792                         assert(param->function_definition == new_definition);
1793                         symbol->pp_definition   = param->parent_expansion;
1794                         param->parent_expansion = NULL;
1795                 }
1796         }
1797
1798         pp_definition_t *old_definition = symbol->pp_definition;
1799         if (old_definition != NULL) {
1800                 if (!pp_definitions_equal(old_definition, new_definition)) {
1801                         warningf(WARN_OTHER, &input.position, "multiple definition of macro '%Y' (first defined %P)", symbol, &old_definition->source_position);
1802                 } else {
1803                         /* reuse the old definition */
1804                         obstack_free(&pp_obstack, new_definition);
1805                         new_definition = old_definition;
1806                 }
1807         }
1808
1809         symbol->pp_definition = new_definition;
1810         return;
1811
1812 error_out:
1813         if (obstack_object_size(&pp_obstack) > 0) {
1814                 char *ptr = obstack_finish(&pp_obstack);
1815                 obstack_free(&pp_obstack, ptr);
1816         }
1817         eat_pp_directive();
1818 }
1819
1820 static void parse_undef_directive(void)
1821 {
1822         eat_pp(TP_undef);
1823         if (skip_mode) {
1824                 eat_pp_directive();
1825                 return;
1826         }
1827
1828         if (!is_defineable_token("#undef")) {
1829                 eat_pp_directive();
1830                 return;
1831         }
1832
1833         pp_token.base.symbol->pp_definition = NULL;
1834         next_input_token();
1835
1836         if (!info.at_line_begin) {
1837                 warningf(WARN_OTHER, &input.position, "extra tokens at end of #undef directive");
1838         }
1839         eat_pp_directive();
1840 }
1841
1842 /** behind an #include we can have the special headername lexems.
1843  * They're only allowed behind an #include so they're not recognized
1844  * by the normal next_preprocessing_token. We handle them as a special
1845  * exception here */
1846 static const char *parse_headername(bool *system_include)
1847 {
1848         if (info.at_line_begin) {
1849                 parse_error("expected headername after #include");
1850                 return NULL;
1851         }
1852
1853         /* check wether we have a "... or <... headername */
1854         source_position_t position = input.position;
1855         switch (input.c) {
1856         {
1857                 utf32 delimiter;
1858         case '<': delimiter = '>'; *system_include = true;  goto parse_name;
1859         case '"': delimiter = '"'; *system_include = false; goto parse_name;
1860 parse_name:
1861                 assert(obstack_object_size(&symbol_obstack) == 0);
1862                 next_char();
1863                 while (true) {
1864                         switch (input.c) {
1865                         case NEWLINE:
1866                         case EOF:
1867                                 {
1868                                         char *dummy = obstack_finish(&symbol_obstack);
1869                                         obstack_free(&symbol_obstack, dummy);
1870                                 }
1871                                 errorf(&pp_token.base.source_position,
1872                                        "header name without closing '%c'", (char)delimiter);
1873                                 return NULL;
1874
1875                         default:
1876                                 if (input.c == delimiter) {
1877                                         next_char();
1878                                         goto finish_headername;
1879                                 } else {
1880                                         obstack_1grow(&symbol_obstack, (char)input.c);
1881                                         next_char();
1882                                 }
1883                                 break;
1884                         }
1885                 }
1886                 /* we should never be here */
1887         }
1888
1889         default:
1890                 next_preprocessing_token();
1891                 if (info.at_line_begin) {
1892                         /* TODO: if we are already in the new line then we parsed more than
1893                          * wanted. We reuse the token, but could produce following errors
1894                          * misbehaviours... */
1895                         goto error_invalid_input;
1896                 }
1897                 if (pp_token.kind == T_STRING_LITERAL) {
1898                         *system_include = false;
1899                         return pp_token.literal.string.begin;
1900                 } else if (pp_token.kind == '<') {
1901                         *system_include = true;
1902                         assert(obstack_object_size(&pp_obstack) == 0);
1903                         while (true) {
1904                                 next_preprocessing_token();
1905                                 if (info.at_line_begin) {
1906                                         /* TODO: we shouldn't have parsed/expanded something on the
1907                                          * next line yet... */
1908                                         char *dummy = obstack_finish(&pp_obstack);
1909                                         obstack_free(&pp_obstack, dummy);
1910                                         goto error_invalid_input;
1911                                 }
1912                                 if (pp_token.kind == '>')
1913                                         break;
1914
1915                                 saved_token_t saved;
1916                                 saved.token          = pp_token;
1917                                 saved.had_whitespace = info.had_whitespace;
1918                                 obstack_grow(&pp_obstack, &saved, sizeof(saved));
1919                         }
1920                         size_t size = obstack_object_size(&pp_obstack);
1921                         assert(size % sizeof(saved_token_t) == 0);
1922                         size_t n_tokens = size / sizeof(saved_token_t);
1923                         saved_token_t *tokens = obstack_finish(&pp_obstack);
1924                         assert(obstack_object_size(&symbol_obstack) == 0);
1925                         for (size_t i = 0; i < n_tokens; ++i) {
1926                                 const saved_token_t *saved = &tokens[i];
1927                                 if (i > 0 && saved->had_whitespace)
1928                                         obstack_1grow(&symbol_obstack, ' ');
1929                                 grow_token(&symbol_obstack, &saved->token);
1930                         }
1931                         obstack_free(&pp_obstack, tokens);
1932                         goto finish_headername;
1933                 } else {
1934 error_invalid_input:
1935                         {
1936                                 char *dummy = obstack_finish(&symbol_obstack);
1937                                 obstack_free(&symbol_obstack, dummy);
1938                         }
1939
1940                         errorf(&pp_token.base.source_position,
1941                                "expected \"FILENAME\" or <FILENAME> after #include");
1942                         return NULL;
1943                 }
1944         }
1945
1946 finish_headername:
1947         obstack_1grow(&symbol_obstack, '\0');
1948         char *const  headername = obstack_finish(&symbol_obstack);
1949         const char  *identified = identify_string(headername);
1950         pp_token.base.source_position = position;
1951         return identified;
1952 }
1953
1954 static bool do_include(bool const bracket_include, bool const include_next, char const *const headername)
1955 {
1956         size_t const        headername_len = strlen(headername);
1957         searchpath_entry_t *entry;
1958         if (include_next) {
1959                 entry = input.path      ? input.path->next
1960                       : bracket_include ? bracket_searchpath.first
1961                       : quote_searchpath.first;
1962         } else {
1963                 if (!bracket_include) {
1964                         /* put dirname of current input on obstack */
1965                         const char *filename   = input.position.input_name;
1966                         const char *last_slash = strrchr(filename, '/');
1967                         const char *full_name;
1968                         if (last_slash != NULL) {
1969                                 size_t len = last_slash - filename;
1970                                 obstack_grow(&symbol_obstack, filename, len + 1);
1971                                 obstack_grow0(&symbol_obstack, headername, headername_len);
1972                                 char *complete_path = obstack_finish(&symbol_obstack);
1973                                 full_name = identify_string(complete_path);
1974                         } else {
1975                                 full_name = headername;
1976                         }
1977
1978                         FILE *file = fopen(full_name, "r");
1979                         if (file != NULL) {
1980                                 switch_pp_input(file, full_name, NULL, false);
1981                                 return true;
1982                         }
1983                         entry = quote_searchpath.first;
1984                 } else {
1985                         entry = bracket_searchpath.first;
1986                 }
1987         }
1988
1989         assert(obstack_object_size(&symbol_obstack) == 0);
1990         /* check searchpath */
1991         for (; entry; entry = entry->next) {
1992             const char *path = entry->path;
1993             size_t      len  = strlen(path);
1994                 obstack_grow(&symbol_obstack, path, len);
1995                 if (path[len-1] != '/')
1996                         obstack_1grow(&symbol_obstack, '/');
1997                 obstack_grow(&symbol_obstack, headername, headername_len+1);
1998
1999                 char *complete_path = obstack_finish(&symbol_obstack);
2000                 FILE *file          = fopen(complete_path, "r");
2001                 if (file != NULL) {
2002                         const char *filename = identify_string(complete_path);
2003                         switch_pp_input(file, filename, entry, entry->is_system_path);
2004                         return true;
2005                 } else {
2006                         obstack_free(&symbol_obstack, complete_path);
2007                 }
2008         }
2009
2010         return false;
2011 }
2012
2013 static void parse_include_directive(bool const include_next)
2014 {
2015         if (skip_mode) {
2016                 eat_pp_directive();
2017                 return;
2018         }
2019
2020         /* do not eat the TP_include, since it would already parse the next token
2021          * which needs special handling here. */
2022         skip_till_newline(true);
2023         bool system_include;
2024         const char *headername = parse_headername(&system_include);
2025         if (headername == NULL) {
2026                 eat_pp_directive();
2027                 return;
2028         }
2029
2030         bool had_nonwhitespace = skip_till_newline(false);
2031         if (had_nonwhitespace) {
2032                 warningf(WARN_OTHER, &input.position,
2033                          "extra tokens at end of #include directive");
2034         }
2035
2036         if (n_inputs > INCLUDE_LIMIT) {
2037                 errorf(&pp_token.base.source_position, "#include nested too deeply");
2038                 /* eat \n or EOF */
2039                 next_input_token();
2040                 return;
2041         }
2042
2043         /* switch inputs */
2044         info.whitespace_at_line_begin = 0;
2045         info.had_whitespace           = false;
2046         info.at_line_begin            = true;
2047         emit_newlines();
2048         push_input();
2049         bool res = do_include(system_include, include_next, headername);
2050         if (res) {
2051                 next_input_token();
2052         } else {
2053                 errorf(&pp_token.base.source_position, "failed including '%s': %s", headername, strerror(errno));
2054                 pop_restore_input();
2055         }
2056 }
2057
2058 static pp_conditional_t *push_conditional(void)
2059 {
2060         pp_conditional_t *conditional
2061                 = obstack_alloc(&pp_obstack, sizeof(*conditional));
2062         memset(conditional, 0, sizeof(*conditional));
2063
2064         conditional->parent = conditional_stack;
2065         conditional_stack   = conditional;
2066
2067         return conditional;
2068 }
2069
2070 static void pop_conditional(void)
2071 {
2072         assert(conditional_stack != NULL);
2073         conditional_stack = conditional_stack->parent;
2074 }
2075
2076 void check_unclosed_conditionals(void)
2077 {
2078         while (conditional_stack != NULL) {
2079                 pp_conditional_t *conditional = conditional_stack;
2080
2081                 if (conditional->in_else) {
2082                         errorf(&conditional->source_position, "unterminated #else");
2083                 } else {
2084                         errorf(&conditional->source_position, "unterminated condition");
2085                 }
2086                 pop_conditional();
2087         }
2088 }
2089
2090 static void parse_ifdef_ifndef_directive(bool const is_ifdef)
2091 {
2092         bool condition;
2093         eat_pp(is_ifdef ? TP_ifdef : TP_ifndef);
2094
2095         if (skip_mode) {
2096                 eat_pp_directive();
2097                 pp_conditional_t *conditional = push_conditional();
2098                 conditional->source_position  = pp_token.base.source_position;
2099                 conditional->skip             = true;
2100                 return;
2101         }
2102
2103         if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
2104                 errorf(&pp_token.base.source_position,
2105                        "expected identifier after #%s, got %K",
2106                        is_ifdef ? "ifdef" : "ifndef", &pp_token);
2107                 eat_pp_directive();
2108
2109                 /* just take the true case in the hope to avoid further errors */
2110                 condition = true;
2111         } else {
2112                 /* evaluate wether we are in true or false case */
2113                 condition = (bool)pp_token.base.symbol->pp_definition == is_ifdef;
2114                 eat_token(T_IDENTIFIER);
2115
2116                 if (!info.at_line_begin) {
2117                         errorf(&pp_token.base.source_position,
2118                                "extra tokens at end of #%s",
2119                                is_ifdef ? "ifdef" : "ifndef");
2120                         eat_pp_directive();
2121                 }
2122         }
2123
2124         pp_conditional_t *conditional = push_conditional();
2125         conditional->source_position  = pp_token.base.source_position;
2126         conditional->condition        = condition;
2127
2128         if (!condition) {
2129                 skip_mode = true;
2130         }
2131 }
2132
2133 static void parse_else_directive(void)
2134 {
2135         eat_pp(TP_else);
2136
2137         if (!info.at_line_begin) {
2138                 if (!skip_mode) {
2139                         warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #else");
2140                 }
2141                 eat_pp_directive();
2142         }
2143
2144         pp_conditional_t *conditional = conditional_stack;
2145         if (conditional == NULL) {
2146                 errorf(&pp_token.base.source_position, "#else without prior #if");
2147                 return;
2148         }
2149
2150         if (conditional->in_else) {
2151                 errorf(&pp_token.base.source_position,
2152                        "#else after #else (condition started %P)",
2153                        &conditional->source_position);
2154                 skip_mode = true;
2155                 return;
2156         }
2157
2158         conditional->in_else = true;
2159         if (!conditional->skip) {
2160                 skip_mode = conditional->condition;
2161         }
2162         conditional->source_position = pp_token.base.source_position;
2163 }
2164
2165 static void parse_endif_directive(void)
2166 {
2167         eat_pp(TP_endif);
2168
2169         if (!info.at_line_begin) {
2170                 if (!skip_mode) {
2171                         warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #endif");
2172                 }
2173                 eat_pp_directive();
2174         }
2175
2176         pp_conditional_t *conditional = conditional_stack;
2177         if (conditional == NULL) {
2178                 errorf(&pp_token.base.source_position, "#endif without prior #if");
2179                 return;
2180         }
2181
2182         if (!conditional->skip) {
2183                 skip_mode = false;
2184         }
2185         pop_conditional();
2186 }
2187
2188 typedef enum stdc_pragma_kind_t {
2189         STDC_UNKNOWN,
2190         STDC_FP_CONTRACT,
2191         STDC_FENV_ACCESS,
2192         STDC_CX_LIMITED_RANGE
2193 } stdc_pragma_kind_t;
2194
2195 typedef enum stdc_pragma_value_kind_t {
2196         STDC_VALUE_UNKNOWN,
2197         STDC_VALUE_ON,
2198         STDC_VALUE_OFF,
2199         STDC_VALUE_DEFAULT
2200 } stdc_pragma_value_kind_t;
2201
2202 static void parse_pragma_directive(void)
2203 {
2204         eat_pp(TP_pragma);
2205         if (skip_mode) {
2206                 eat_pp_directive();
2207                 return;
2208         }
2209
2210         if (pp_token.kind != T_IDENTIFIER) {
2211                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2212                          "expected identifier after #pragma");
2213                 eat_pp_directive();
2214                 return;
2215         }
2216
2217         stdc_pragma_kind_t kind = STDC_UNKNOWN;
2218         if (pp_token.base.symbol->pp_ID == TP_STDC && c_mode & _C99) {
2219                 /* a STDC pragma */
2220                 next_input_token();
2221
2222                 switch (pp_token.base.symbol->pp_ID) {
2223                 case TP_FP_CONTRACT:      kind = STDC_FP_CONTRACT;      break;
2224                 case TP_FENV_ACCESS:      kind = STDC_FENV_ACCESS;      break;
2225                 case TP_CX_LIMITED_RANGE: kind = STDC_CX_LIMITED_RANGE; break;
2226                 default:                  break;
2227                 }
2228                 if (kind != STDC_UNKNOWN) {
2229                         next_input_token();
2230                         stdc_pragma_value_kind_t value;
2231                         switch (pp_token.base.symbol->pp_ID) {
2232                         case TP_ON:      value = STDC_VALUE_ON;      break;
2233                         case TP_OFF:     value = STDC_VALUE_OFF;     break;
2234                         case TP_DEFAULT: value = STDC_VALUE_DEFAULT; break;
2235                         default:         value = STDC_VALUE_UNKNOWN; break;
2236                         }
2237                         if (value == STDC_VALUE_UNKNOWN) {
2238                                 kind = STDC_UNKNOWN;
2239                                 errorf(&pp_token.base.source_position, "bad STDC pragma argument");
2240                         }
2241                 }
2242         }
2243         eat_pp_directive();
2244         if (kind == STDC_UNKNOWN) {
2245                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2246                          "encountered unknown #pragma");
2247         }
2248 }
2249
2250 static void parse_line_directive(void)
2251 {
2252         if (pp_token.kind != T_NUMBER) {
2253                 if (!skip_mode)
2254                         parse_error("expected integer");
2255         } else {
2256                 char      *end;
2257                 long const line = strtol(pp_token.literal.string.begin, &end, 0);
2258                 if (*end == '\0') {
2259                         /* use offset -1 as this is about the next line */
2260                         input.position.lineno = line - 1;
2261                         /* force output of line */
2262                         input.output_line = input.position.lineno - 20;
2263                 } else {
2264                         if (!skip_mode) {
2265                                 errorf(&input.position, "'%S' is not a valid line number",
2266                                            &pp_token.literal.string);
2267                         }
2268                 }
2269                 next_input_token();
2270                 if (info.at_line_begin)
2271                         return;
2272         }
2273         if (pp_token.kind == T_STRING_LITERAL
2274             && pp_token.literal.string.encoding == STRING_ENCODING_CHAR) {
2275                 input.position.input_name       = pp_token.literal.string.begin;
2276                 input.position.is_system_header = false;
2277                 next_input_token();
2278
2279                 /* attempt to parse numeric flags as outputted by gcc preprocessor */
2280                 while (!info.at_line_begin && pp_token.kind == T_NUMBER) {
2281                         /* flags:
2282                          * 1 - indicates start of a new file
2283                          * 2 - indicates return from a file
2284                          * 3 - indicates system header
2285                          * 4 - indicates implicit extern "C" in C++ mode
2286                          *
2287                          * currently we're only interested in "3"
2288                          */
2289                         if (streq(pp_token.literal.string.begin, "3")) {
2290                                 input.position.is_system_header = true;
2291                         }
2292                         next_input_token();
2293                 }
2294         }
2295
2296         eat_pp_directive();
2297 }
2298
2299 static void parse_error_directive(void)
2300 {
2301         if (skip_mode) {
2302                 eat_pp_directive();
2303                 return;
2304         }
2305
2306         bool const old_resolve_escape_sequences = resolve_escape_sequences;
2307         resolve_escape_sequences = false;
2308
2309         source_position_t const pos = pp_token.base.source_position;
2310         do {
2311                 if (info.had_whitespace && obstack_object_size(&pp_obstack) != 0)
2312                         obstack_1grow(&pp_obstack, ' ');
2313
2314                 switch (pp_token.kind) {
2315                 case T_NUMBER: {
2316                         string_t const *const str = &pp_token.literal.string;
2317                         obstack_grow(&pp_obstack, str->begin, str->size);
2318                         break;
2319                 }
2320
2321                 {
2322                         char delim;
2323                 case T_STRING_LITERAL:     delim =  '"'; goto string;
2324                 case T_CHARACTER_CONSTANT: delim = '\''; goto string;
2325 string:;
2326                         string_t const *const str = &pp_token.literal.string;
2327                         char     const *const enc = get_string_encoding_prefix(str->encoding);
2328                         obstack_printf(&pp_obstack, "%s%c%s%c", enc, delim, str->begin, delim);
2329                         break;
2330                 }
2331
2332                 default: {
2333                         char const *const str = pp_token.base.symbol->string;
2334                         obstack_grow(&pp_obstack, str, strlen(str));
2335                         break;
2336                 }
2337                 }
2338
2339                 next_input_token();
2340         } while (!info.at_line_begin);
2341
2342         resolve_escape_sequences = old_resolve_escape_sequences;
2343
2344         obstack_1grow(&pp_obstack, '\0');
2345         char *const str = obstack_finish(&pp_obstack);
2346         errorf(&pos, "#%s", str);
2347         obstack_free(&pp_obstack, str);
2348 }
2349
2350 static void parse_preprocessing_directive(void)
2351 {
2352         eat_token('#');
2353
2354         if (info.at_line_begin) {
2355                 /* empty directive */
2356                 return;
2357         }
2358
2359         if (pp_token.base.symbol) {
2360                 switch (pp_token.base.symbol->pp_ID) {
2361                 case TP_define:       parse_define_directive();            break;
2362                 case TP_else:         parse_else_directive();              break;
2363                 case TP_endif:        parse_endif_directive();             break;
2364                 case TP_error:        parse_error_directive();             break;
2365                 case TP_ifdef:        parse_ifdef_ifndef_directive(true);  break;
2366                 case TP_ifndef:       parse_ifdef_ifndef_directive(false); break;
2367                 case TP_include:      parse_include_directive(false);      break;
2368                 case TP_include_next: parse_include_directive(true);       break;
2369                 case TP_line:         next_input_token(); goto line_directive;
2370                 case TP_pragma:       parse_pragma_directive();            break;
2371                 case TP_undef:        parse_undef_directive();             break;
2372                 default:              goto skip;
2373                 }
2374         } else if (pp_token.kind == T_NUMBER) {
2375 line_directive:
2376                 parse_line_directive();
2377         } else {
2378 skip:
2379                 if (!skip_mode) {
2380                         errorf(&pp_token.base.source_position, "invalid preprocessing directive #%K", &pp_token);
2381                 }
2382                 eat_pp_directive();
2383         }
2384
2385         assert(info.at_line_begin);
2386 }
2387
2388 static void finish_current_argument(void)
2389 {
2390         if (current_argument == NULL)
2391                 return;
2392         size_t size = obstack_object_size(&pp_obstack);
2393         current_argument->list_len   = size/sizeof(current_argument->token_list[0]);
2394         current_argument->token_list = obstack_finish(&pp_obstack);
2395 }
2396
2397 void next_preprocessing_token(void)
2398 {
2399 restart:
2400         if (!expand_next()) {
2401                 do {
2402                         next_input_token();
2403                         while (pp_token.kind == '#' && info.at_line_begin) {
2404                                 parse_preprocessing_directive();
2405                         }
2406                 } while (skip_mode && pp_token.kind != T_EOF);
2407         }
2408
2409         const token_kind_t kind = pp_token.kind;
2410         if (current_call == NULL || argument_expanding != NULL) {
2411                 symbol_t *const symbol = pp_token.base.symbol;
2412                 if (symbol) {
2413                         if (kind == T_MACRO_PARAMETER) {
2414                                 assert(current_expansion != NULL);
2415                                 start_expanding(pp_token.macro_parameter.def);
2416                                 goto restart;
2417                         }
2418
2419                         pp_definition_t *const pp_definition = symbol->pp_definition;
2420                         if (pp_definition != NULL && !pp_definition->is_expanding) {
2421                                 if (pp_definition->has_parameters) {
2422
2423                                         /* check if next token is a '(' */
2424                                         whitespace_info_t old_info   = info;
2425                                         token_kind_t      next_token = peek_expansion();
2426                                         if (next_token == T_EOF) {
2427                                                 info.at_line_begin  = false;
2428                                                 info.had_whitespace = false;
2429                                                 skip_whitespace();
2430                                                 if (input.c == '(') {
2431                                                         next_token = '(';
2432                                                 }
2433                                         }
2434
2435                                         if (next_token == '(') {
2436                                                 if (current_expansion == NULL)
2437                                                         expansion_pos = pp_token.base.source_position;
2438                                                 next_preprocessing_token();
2439                                                 assert(pp_token.kind == '(');
2440
2441                                                 pp_definition->parent_expansion = current_expansion;
2442                                                 current_call              = pp_definition;
2443                                                 current_call->expand_pos  = 0;
2444                                                 current_call->expand_info = old_info;
2445                                                 if (current_call->n_parameters > 0) {
2446                                                         current_argument = &current_call->parameters[0];
2447                                                         assert(argument_brace_count == 0);
2448                                                 }
2449                                                 goto restart;
2450                                         } else {
2451                                                 /* skip_whitespaces() skipped newlines and whitespace,
2452                                                  * remember results for next token */
2453                                                 next_info = info;
2454                                                 info      = old_info;
2455                                                 return;
2456                                         }
2457                                 } else {
2458                                         if (current_expansion == NULL)
2459                                                 expansion_pos = pp_token.base.source_position;
2460                                         start_expanding(pp_definition);
2461                                         goto restart;
2462                                 }
2463                         }
2464                 }
2465         }
2466
2467         if (current_call != NULL) {
2468                 /* current_call != NULL */
2469                 if (kind == '(') {
2470                         ++argument_brace_count;
2471                 } else if (kind == ')') {
2472                         if (argument_brace_count > 0) {
2473                                 --argument_brace_count;
2474                         } else {
2475                                 finish_current_argument();
2476                                 assert(kind == ')');
2477                                 start_expanding(current_call);
2478                                 info = current_call->expand_info;
2479                                 current_call     = NULL;
2480                                 current_argument = NULL;
2481                                 goto restart;
2482                         }
2483                 } else if (kind == ',' && argument_brace_count == 0) {
2484                         finish_current_argument();
2485                         current_call->expand_pos++;
2486                         if (current_call->expand_pos >= current_call->n_parameters) {
2487                                 errorf(&pp_token.base.source_position,
2488                                            "too many arguments passed for macro '%Y'",
2489                                            current_call->symbol);
2490                                 current_argument = NULL;
2491                         } else {
2492                                 current_argument
2493                                         = &current_call->parameters[current_call->expand_pos];
2494                         }
2495                         goto restart;
2496                 } else if (kind == T_MACRO_PARAMETER) {
2497                         /* parameters have to be fully expanded before being used as
2498                          * parameters for another macro-call */
2499                         assert(current_expansion != NULL);
2500                         pp_definition_t *argument = pp_token.macro_parameter.def;
2501                         argument_expanding = argument;
2502                         start_expanding(argument);
2503                         goto restart;
2504                 } else if (kind == T_EOF) {
2505                         errorf(&expansion_pos,
2506                                "reached end of file while parsing arguments for '%Y'",
2507                                current_call->symbol);
2508                         return;
2509                 }
2510                 if (current_argument != NULL) {
2511                         saved_token_t saved;
2512                         saved.token = pp_token;
2513                         saved.had_whitespace = info.had_whitespace;
2514                         obstack_grow(&pp_obstack, &saved, sizeof(saved));
2515                 }
2516                 goto restart;
2517         }
2518 }
2519
2520 void append_include_path(searchpath_t *paths, const char *path)
2521 {
2522         searchpath_entry_t *entry = OALLOCZ(&config_obstack, searchpath_entry_t);
2523         entry->path           = path;
2524         entry->is_system_path = paths->is_system_path;
2525
2526         *paths->anchor = entry;
2527         paths->anchor  = &entry->next;
2528 }
2529
2530 static void append_env_paths(searchpath_t *paths, const char *envvar)
2531 {
2532         const char *val = getenv(envvar);
2533         if (val != NULL && *val != '\0') {
2534                 const char *begin = val;
2535                 const char *c;
2536                 do {
2537                         c = begin;
2538                         while (*c != '\0' && *c != ':')
2539                                 ++c;
2540
2541                         size_t len = c-begin;
2542                         if (len == 0) {
2543                                 /* use "." for gcc compatibility (Matze: I would expect that
2544                                  * nothing happens for an empty entry...) */
2545                                 append_include_path(paths, ".");
2546                         } else {
2547                                 char *const string = obstack_copy0(&config_obstack, begin, len);
2548                                 append_include_path(paths, string);
2549                         }
2550
2551                         begin = c+1;
2552                         /* skip : */
2553                         if (*begin == ':')
2554                                 ++begin;
2555                 } while(*c != '\0');
2556         }
2557 }
2558
2559 static void append_searchpath(searchpath_t *path, const searchpath_t *append)
2560 {
2561         *path->anchor = append->first;
2562 }
2563
2564 static void setup_include_path(void)
2565 {
2566         /* built-in paths */
2567         append_include_path(&system_searchpath, "/usr/include");
2568
2569         /* parse environment variable */
2570         append_env_paths(&bracket_searchpath, "CPATH");
2571         append_env_paths(&system_searchpath,
2572                          c_mode & _CXX ? "CPLUS_INCLUDE_PATH" : "C_INCLUDE_PATH");
2573
2574         /* append system search path to bracket searchpath */
2575         append_searchpath(&system_searchpath,  &after_searchpath);
2576         append_searchpath(&bracket_searchpath, &system_searchpath);
2577         append_searchpath(&quote_searchpath, &bracket_searchpath);
2578 }
2579
2580 static void input_error(unsigned const delta_lines, unsigned const delta_cols, char const *const message)
2581 {
2582         source_position_t pos = pp_token.base.source_position;
2583         pos.lineno += delta_lines;
2584         pos.colno  += delta_cols;
2585         errorf(&pos, "%s", message);
2586 }
2587
2588 void init_include_paths(void)
2589 {
2590         obstack_init(&config_obstack);
2591 }
2592
2593 void init_preprocessor(void)
2594 {
2595         init_symbols();
2596
2597         obstack_init(&pp_obstack);
2598         obstack_init(&input_obstack);
2599         strset_init(&stringset);
2600
2601         setup_include_path();
2602
2603         set_input_error_callback(input_error);
2604 }
2605
2606 void exit_preprocessor(void)
2607 {
2608         obstack_free(&input_obstack, NULL);
2609         obstack_free(&pp_obstack, NULL);
2610         obstack_free(&config_obstack, NULL);
2611
2612         strset_destroy(&stringset);
2613 }
2614
2615 int pptest_main(int argc, char **argv);
2616 int pptest_main(int argc, char **argv)
2617 {
2618         init_symbol_table();
2619         init_include_paths();
2620         init_preprocessor();
2621         init_tokens();
2622
2623         error_on_unknown_chars   = false;
2624         resolve_escape_sequences = false;
2625
2626         /* simplistic commandline parser */
2627         const char *filename = NULL;
2628         const char *output = NULL;
2629         for (int i = 1; i < argc; ++i) {
2630                 const char *opt = argv[i];
2631                 if (streq(opt, "-I")) {
2632                         append_include_path(&bracket_searchpath, argv[++i]);
2633                         continue;
2634                 } else if (streq(opt, "-E")) {
2635                         /* ignore */
2636                 } else if (streq(opt, "-o")) {
2637                         output = argv[++i];
2638                         continue;
2639                 } else if (opt[0] == '-') {
2640                         fprintf(stderr, "Unknown option '%s'\n", opt);
2641                 } else {
2642                         if (filename != NULL)
2643                                 fprintf(stderr, "Multiple inputs not supported\n");
2644                         filename = argv[i];
2645                 }
2646         }
2647         if (filename == NULL) {
2648                 fprintf(stderr, "No input specified\n");
2649                 return 1;
2650         }
2651
2652         if (output == NULL) {
2653                 out = stdout;
2654         } else {
2655                 out = fopen(output, "w");
2656                 if (out == NULL) {
2657                         fprintf(stderr, "Couldn't open output '%s'\n", output);
2658                         return 1;
2659                 }
2660         }
2661
2662         /* just here for gcc compatibility */
2663         fprintf(out, "# 1 \"%s\"\n", filename);
2664         fprintf(out, "# 1 \"<built-in>\"\n");
2665         fprintf(out, "# 1 \"<command-line>\"\n");
2666
2667         FILE *file = fopen(filename, "r");
2668         if (file == NULL) {
2669                 fprintf(stderr, "Couldn't open input '%s'\n", filename);
2670                 return 1;
2671         }
2672         switch_pp_input(file, filename, NULL, false);
2673
2674         for (;;) {
2675                 next_preprocessing_token();
2676                 if (pp_token.kind == T_EOF)
2677                         break;
2678                 emit_pp_token();
2679         }
2680
2681         fputc('\n', out);
2682         check_unclosed_conditionals();
2683         fclose(close_pp_input());
2684         if (out != stdout)
2685                 fclose(out);
2686
2687         exit_tokens();
2688         exit_preprocessor();
2689         exit_symbol_table();
2690
2691         return 0;
2692 }