nsz Git - cparser/blob - preprocessor.c

   1 #include <config.h>
   2
   3 #include <assert.h>
   4 #include <errno.h>
   5 #include <string.h>
   6 #include <stdbool.h>
   7 #include <ctype.h>
   8
   9 #include "preprocessor.h"
  10 #include "token_t.h"
  11 #include "symbol_t.h"
  12 #include "adt/util.h"
  13 #include "adt/error.h"
  14 #include "adt/strutil.h"
  15 #include "adt/strset.h"
  16 #include "lang_features.h"
  17 #include "diagnostic.h"
  18 #include "string_rep.h"
  19 #include "input.h"
  20
  21 #define MAX_PUTBACK 3
  22 #define INCLUDE_LIMIT 199  /* 199 is for gcc "compatibility" */
  23
  24 typedef struct saved_token_t {
  25         token_t token;
  26         bool    had_whitespace;
  27 } saved_token_t;
  28
  29 typedef struct whitespace_info_t {
  30         /** current token had whitespace in front of it */
  31         bool     had_whitespace;
  32         /** current token is at the beginning of a line.
  33          * => a "#" at line begin starts a preprocessing directive. */
  34         bool     at_line_begin;
  35         /** number of spaces before the first token in a line */
  36         unsigned whitespace_at_line_begin;
  37 } whitespace_info_t;
  38
  39 struct pp_definition_t {
  40         symbol_t          *symbol;
  41         source_position_t  source_position;
  42         pp_definition_t   *parent_expansion;
  43         size_t             expand_pos;
  44         whitespace_info_t  expand_info;
  45         bool               is_variadic    : 1;
  46         bool               is_expanding   : 1;
  47         bool               has_parameters : 1;
  48         bool               is_parameter   : 1;
  49         pp_definition_t   *function_definition;
  50         size_t             n_parameters;
  51         pp_definition_t   *parameters;
  52
  53         /* replacement */
  54         size_t             list_len;
  55         saved_token_t     *token_list;
  56 };
  57
  58 typedef struct pp_conditional_t pp_conditional_t;
  59 struct pp_conditional_t {
  60         source_position_t  source_position;
  61         bool               condition;
  62         bool               in_else;
  63         /** conditional in skip mode (then+else gets skipped) */
  64         bool               skip;
  65         pp_conditional_t  *parent;
  66 };
  67
  68 typedef struct pp_input_t pp_input_t;
  69 struct pp_input_t {
  70         FILE               *file;
  71         input_t            *input;
  72         utf32               c;
  73         utf32               buf[1024+MAX_PUTBACK];
  74         const utf32        *bufend;
  75         const utf32        *bufpos;
  76         source_position_t   position;
  77         pp_input_t         *parent;
  78         unsigned            output_line;
  79         searchpath_entry_t *path;
  80 };
  81
  82 struct searchpath_entry_t {
  83         const char         *path;
  84         searchpath_entry_t *next;
  85         bool                is_system_path;
  86 };
  87
  88 static pp_input_t      input;
  89
  90 static pp_input_t     *input_stack;
  91 static unsigned        n_inputs;
  92 static struct obstack  input_obstack;
  93
  94 static pp_conditional_t *conditional_stack;
  95
  96 token_t                  pp_token;
  97 bool                     allow_dollar_in_symbol   = true;
  98 static bool              resolve_escape_sequences = true;
  99 static bool              error_on_unknown_chars   = true;
 100 static bool              skip_mode;
 101 static FILE             *out;
 102 static struct obstack    pp_obstack;
 103 static struct obstack    config_obstack;
 104 static const char       *printed_input_name = NULL;
 105 static source_position_t expansion_pos;
 106 static pp_definition_t  *current_expansion  = NULL;
 107 static pp_definition_t  *current_call       = NULL;
 108 static pp_definition_t  *current_argument   = NULL;
 109 static pp_definition_t  *argument_expanding = NULL;
 110 static unsigned          argument_brace_count;
 111 static strset_t          stringset;
 112 static token_kind_t      last_token;
 113
 114 struct searchpath_t {
 115         searchpath_entry_t  *first;
 116         searchpath_entry_t **anchor;
 117         bool                 is_system_path;
 118 };
 119
 120 searchpath_t bracket_searchpath = { NULL, &bracket_searchpath.first, false };
 121 searchpath_t quote_searchpath   = { NULL, &quote_searchpath.first,   false };
 122 searchpath_t system_searchpath  = { NULL, &system_searchpath.first,  true  };
 123 searchpath_t after_searchpath   = { NULL, &after_searchpath.first,   true  };
 124
 125 static whitespace_info_t next_info; /* valid if had_whitespace is true */
 126 static whitespace_info_t info;
 127
 128 static inline void next_char(void);
 129 static void next_input_token(void);
 130 static void print_line_directive(const source_position_t *pos, const char *add);
 131
 132 static symbol_t *symbol_colongreater;
 133 static symbol_t *symbol_lesscolon;
 134 static symbol_t *symbol_lesspercent;
 135 static symbol_t *symbol_percentcolon;
 136 static symbol_t *symbol_percentcolonpercentcolon;
 137 static symbol_t *symbol_percentgreater;
 138
 139 static void init_symbols(void)
 140 {
 141         symbol_colongreater             = symbol_table_insert(":>");
 142         symbol_lesscolon                = symbol_table_insert("<:");
 143         symbol_lesspercent              = symbol_table_insert("<%");
 144         symbol_percentcolon             = symbol_table_insert("%:");
 145         symbol_percentcolonpercentcolon = symbol_table_insert("%:%:");
 146         symbol_percentgreater           = symbol_table_insert("%>");
 147 }
 148
 149 void switch_pp_input(FILE *const file, char const *const filename, searchpath_entry_t *const path, bool const is_system_header)
 150 {
 151         input.file                      = file;
 152         input.input                     = input_from_stream(file, NULL);
 153         input.bufend                    = NULL;
 154         input.bufpos                    = NULL;
 155         input.output_line               = 0;
 156         input.position.input_name       = filename;
 157         input.position.lineno           = 1;
 158         input.position.is_system_header = is_system_header;
 159         input.path                      = path;
 160
 161         /* indicate that we're at a new input */
 162         print_line_directive(&input.position, input_stack != NULL ? "1" : NULL);
 163
 164         /* place a virtual '\n' so we realize we're at line begin */
 165         input.position.lineno = 0;
 166         input.c               = '\n';
 167 }
 168
 169 FILE *close_pp_input(void)
 170 {
 171         input_free(input.input);
 172
 173         FILE* const file = input.file;
 174         assert(file);
 175
 176         input.input  = NULL;
 177         input.file   = NULL;
 178         input.bufend = NULL;
 179         input.bufpos = NULL;
 180         input.c      = EOF;
 181
 182         return file;
 183 }
 184
 185 static void push_input(void)
 186 {
 187         pp_input_t *const saved_input = obstack_copy(&input_obstack, &input, sizeof(input));
 188
 189         /* adjust buffer positions */
 190         if (input.bufpos != NULL)
 191                 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
 192         if (input.bufend != NULL)
 193                 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
 194
 195         saved_input->parent = input_stack;
 196         input_stack         = saved_input;
 197         ++n_inputs;
 198 }
 199
 200 static void pop_restore_input(void)
 201 {
 202         assert(n_inputs > 0);
 203         assert(input_stack != NULL);
 204
 205         pp_input_t *saved_input = input_stack;
 206
 207         memcpy(&input, saved_input, sizeof(input));
 208         input.parent = NULL;
 209
 210         /* adjust buffer positions */
 211         if (saved_input->bufpos != NULL)
 212                 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
 213         if (saved_input->bufend != NULL)
 214                 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
 215
 216         input_stack = saved_input->parent;
 217         obstack_free(&input_obstack, saved_input);
 218         --n_inputs;
 219 }
 220
 221 /**
 222  * Prints a parse error message at the current token.
 223  *
 224  * @param msg   the error message
 225  */
 226 static void parse_error(const char *msg)
 227 {
 228         errorf(&pp_token.base.source_position,  "%s", msg);
 229 }
 230
 231 static inline void next_real_char(void)
 232 {
 233         assert(input.bufpos <= input.bufend);
 234         if (input.bufpos >= input.bufend) {
 235                 size_t const n = decode(input.input, input.buf + MAX_PUTBACK, lengthof(input.buf) - MAX_PUTBACK);
 236                 if (n == 0) {
 237                         input.c = EOF;
 238                         return;
 239                 }
 240                 input.bufpos = input.buf + MAX_PUTBACK;
 241                 input.bufend = input.bufpos + n;
 242         }
 243         input.c = *input.bufpos++;
 244         ++input.position.colno;
 245 }
 246
 247 /**
 248  * Put a character back into the buffer.
 249  *
 250  * @param pc  the character to put back
 251  */
 252 static inline void put_back(utf32 const pc)
 253 {
 254         assert(input.bufpos > input.buf);
 255         *(--input.bufpos - input.buf + input.buf) = (char) pc;
 256         --input.position.colno;
 257 }
 258
 259 #define NEWLINE \
 260         '\r': \
 261                 next_char(); \
 262                 if (input.c == '\n') { \
 263         case '\n': \
 264                         next_char(); \
 265                 } \
 266                 ++input.position.lineno; \
 267                 input.position.colno = 1; \
 268                 goto newline; \
 269                 newline // Let it look like an ordinary case label.
 270
 271 #define eat(c_type) (assert(input.c == c_type), next_char())
 272
 273 static void maybe_concat_lines(void)
 274 {
 275         eat('\\');
 276
 277         switch (input.c) {
 278         case NEWLINE:
 279                 info.whitespace_at_line_begin = 0;
 280                 return;
 281
 282         default:
 283                 break;
 284         }
 285
 286         put_back(input.c);
 287         input.c = '\\';
 288 }
 289
 290 /**
 291  * Set c to the next input character, ie.
 292  * after expanding trigraphs.
 293  */
 294 static inline void next_char(void)
 295 {
 296         next_real_char();
 297
 298         /* filter trigraphs and concatenated lines */
 299         if (UNLIKELY(input.c == '\\')) {
 300                 maybe_concat_lines();
 301                 goto end_of_next_char;
 302         }
 303
 304         if (LIKELY(input.c != '?'))
 305                 goto end_of_next_char;
 306
 307         next_real_char();
 308         if (LIKELY(input.c != '?')) {
 309                 put_back(input.c);
 310                 input.c = '?';
 311                 goto end_of_next_char;
 312         }
 313
 314         next_real_char();
 315         switch (input.c) {
 316         case '=': input.c = '#'; break;
 317         case '(': input.c = '['; break;
 318         case '/': input.c = '\\'; maybe_concat_lines(); break;
 319         case ')': input.c = ']'; break;
 320         case '\'': input.c = '^'; break;
 321         case '<': input.c = '{'; break;
 322         case '!': input.c = '|'; break;
 323         case '>': input.c = '}'; break;
 324         case '-': input.c = '~'; break;
 325         default:
 326                 put_back(input.c);
 327                 put_back('?');
 328                 input.c = '?';
 329                 break;
 330         }
 331
 332 end_of_next_char:;
 333 #ifdef DEBUG_CHARS
 334         printf("nchar '%c'\n", input.c);
 335 #endif
 336 }
 337
 338
 339
 340 /**
 341  * Returns true if the given char is a octal digit.
 342  *
 343  * @param char  the character to check
 344  */
 345 static inline bool is_octal_digit(int chr)
 346 {
 347         switch (chr) {
 348         case '0':
 349         case '1':
 350         case '2':
 351         case '3':
 352         case '4':
 353         case '5':
 354         case '6':
 355         case '7':
 356                 return true;
 357         default:
 358                 return false;
 359         }
 360 }
 361
 362 /**
 363  * Returns the value of a digit.
 364  * The only portable way to do it ...
 365  */
 366 static int digit_value(int digit)
 367 {
 368         switch (digit) {
 369         case '0': return 0;
 370         case '1': return 1;
 371         case '2': return 2;
 372         case '3': return 3;
 373         case '4': return 4;
 374         case '5': return 5;
 375         case '6': return 6;
 376         case '7': return 7;
 377         case '8': return 8;
 378         case '9': return 9;
 379         case 'a':
 380         case 'A': return 10;
 381         case 'b':
 382         case 'B': return 11;
 383         case 'c':
 384         case 'C': return 12;
 385         case 'd':
 386         case 'D': return 13;
 387         case 'e':
 388         case 'E': return 14;
 389         case 'f':
 390         case 'F': return 15;
 391         default:
 392                 panic("wrong character given");
 393         }
 394 }
 395
 396 /**
 397  * Parses an octal character sequence.
 398  *
 399  * @param first_digit  the already read first digit
 400  */
 401 static utf32 parse_octal_sequence(const utf32 first_digit)
 402 {
 403         assert(is_octal_digit(first_digit));
 404         utf32 value = digit_value(first_digit);
 405         if (!is_octal_digit(input.c)) return value;
 406         value = 8 * value + digit_value(input.c);
 407         next_char();
 408         if (!is_octal_digit(input.c)) return value;
 409         value = 8 * value + digit_value(input.c);
 410         next_char();
 411         return value;
 412
 413 }
 414
 415 /**
 416  * Parses a hex character sequence.
 417  */
 418 static utf32 parse_hex_sequence(void)
 419 {
 420         utf32 value = 0;
 421         while (isxdigit(input.c)) {
 422                 value = 16 * value + digit_value(input.c);
 423                 next_char();
 424         }
 425         return value;
 426 }
 427
 428 static bool is_universal_char_valid(utf32 const v)
 429 {
 430         /* C11 §6.4.3:2 */
 431         if (v < 0xA0U && v != 0x24 && v != 0x40 && v != 0x60)
 432                 return false;
 433         if (0xD800 <= v && v <= 0xDFFF)
 434                 return false;
 435         return true;
 436 }
 437
 438 static utf32 parse_universal_char(unsigned const n_digits)
 439 {
 440         utf32 v = 0;
 441         for (unsigned k = n_digits; k != 0; --k) {
 442                 if (isxdigit(input.c)) {
 443                         v = 16 * v + digit_value(input.c);
 444                         if (!resolve_escape_sequences)
 445                                 obstack_1grow(&symbol_obstack, input.c);
 446                         next_char();
 447                 } else {
 448                         errorf(&input.position,
 449                                "short universal character name, expected %u more digits",
 450                                    k);
 451                         break;
 452                 }
 453         }
 454         if (!is_universal_char_valid(v)) {
 455                 errorf(&input.position,
 456                        "\\%c%0*X is not a valid universal character name",
 457                        n_digits == 4 ? 'u' : 'U', (int)n_digits, v);
 458         }
 459         return v;
 460 }
 461
 462 static bool is_universal_char_valid_identifier_c99(utf32 const v)
 463 {
 464         static const utf32 single_chars[] = {
 465                 0x00AA, 0x00BA, 0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0,
 466                 0x1F59, 0x1F5B, 0x1F5D, 0x05BF, 0x09B2, 0x0A02, 0x0A5E, 0x0A74,
 467                 0x0A8D, 0x0AD0, 0x0AE0, 0x0B9C, 0x0CDE, 0x0E84, 0x0E8A, 0x0E8D,
 468                 0x0EA5, 0x0EA7, 0x0EC6, 0x0F00, 0x0F35, 0x0F37, 0x0F39, 0x0F97,
 469                 0x0FB9, 0x00B5, 0x00B7, 0x02BB, 0x037A, 0x0559, 0x093D, 0x0B3D,
 470                 0x1FBE, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128
 471         };
 472
 473         static const utf32 ranges[][2] = {
 474                 {0x00C0, 0x00D6}, {0x00D8, 0x00F6}, {0x00F8, 0x01F5}, {0x01FA, 0x0217},
 475                 {0x0250, 0x02A8}, {0x1E00, 0x1E9B}, {0x1EA0, 0x1EF9}, {0x0388, 0x038A},
 476                 {0x038E, 0x03A1}, {0x03A3, 0x03CE}, {0x03D0, 0x03D6}, {0x03E2, 0x03F3},
 477                 {0x1F00, 0x1F15}, {0x1F18, 0x1F1D}, {0x1F20, 0x1F45}, {0x1F48, 0x1F4D},
 478                 {0x1F50, 0x1F57}, {0x1F5F, 0x1F7D}, {0x1F80, 0x1FB4}, {0x1FB6, 0x1FBC},
 479                 {0x1FC2, 0x1FC4}, {0x1FC6, 0x1FCC}, {0x1FD0, 0x1FD3}, {0x1FD6, 0x1FDB},
 480                 {0x1FE0, 0x1FEC}, {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFC}, {0x0401, 0x040C},
 481                 {0x040E, 0x044F}, {0x0451, 0x045C}, {0x045E, 0x0481}, {0x0490, 0x04C4},
 482                 {0x04C7, 0x04C8}, {0x04CB, 0x04CC}, {0x04D0, 0x04EB}, {0x04EE, 0x04F5},
 483                 {0x04F8, 0x04F9}, {0x0531, 0x0556}, {0x0561, 0x0587}, {0x05B0, 0x05B9},
 484                 {0x05BB, 0x05BD}, {0x05C1, 0x05C2}, {0x05D0, 0x05EA}, {0x05F0, 0x05F2},
 485                 {0x0621, 0x063A}, {0x0640, 0x0652}, {0x0670, 0x06B7}, {0x06BA, 0x06BE},
 486                 {0x06C0, 0x06CE}, {0x06D0, 0x06DC}, {0x06E5, 0x06E8}, {0x06EA, 0x06ED},
 487                 {0x0901, 0x0903}, {0x0905, 0x0939}, {0x093E, 0x094D}, {0x0950, 0x0952},
 488                 {0x0958, 0x0963}, {0x0981, 0x0983}, {0x0985, 0x098C}, {0x098F, 0x0990},
 489                 {0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B6, 0x09B9}, {0x09BE, 0x09C4},
 490                 {0x09C7, 0x09C8}, {0x09CB, 0x09CD}, {0x09DC, 0x09DD}, {0x09DF, 0x09E3},
 491                 {0x09F0, 0x09F1}, {0x0A05, 0x0A0A}, {0x0A0F, 0x0A10}, {0x0A13, 0x0A28},
 492                 {0x0A2A, 0x0A30}, {0x0A32, 0x0A33}, {0x0A35, 0x0A36}, {0x0A38, 0x0A39},
 493                 {0x0A3E, 0x0A42}, {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A59, 0x0A5C},
 494                 {0x0A81, 0x0A83}, {0x0A85, 0x0A8B}, {0x0A8F, 0x0A91}, {0x0A93, 0x0AA8},
 495                 {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3}, {0x0AB5, 0x0AB9}, {0x0ABD, 0x0AC5},
 496                 {0x0AC7, 0x0AC9}, {0x0ACB, 0x0ACD}, {0x0B01, 0x0B03}, {0x0B05, 0x0B0C},
 497                 {0x0B0F, 0x0B10}, {0x0B13, 0x0B28}, {0x0B2A, 0x0B30}, {0x0B32, 0x0B33},
 498                 {0x0B36, 0x0B39}, {0x0B3E, 0x0B43}, {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D},
 499                 {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B61}, {0x0B82, 0x0B83}, {0x0B85, 0x0B8A},
 500                 {0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, {0x0B99, 0x0B9A}, {0x0B9E, 0x0B9F},
 501                 {0x0BA3, 0x0BA4}, {0x0BA8, 0x0BAA}, {0x0BAE, 0x0BB5}, {0x0BB7, 0x0BB9},
 502                 {0x0BBE, 0x0BC2}, {0x0BC6, 0x0BC8}, {0x0BCA, 0x0BCD}, {0x0C01, 0x0C03},
 503                 {0x0C05, 0x0C0C}, {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C33},
 504                 {0x0C35, 0x0C39}, {0x0C3E, 0x0C44}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D},
 505                 {0x0C60, 0x0C61}, {0x0C82, 0x0C83}, {0x0C85, 0x0C8C}, {0x0C8E, 0x0C90},
 506                 {0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9}, {0x0CBE, 0x0CC4},
 507                 {0x0CC6, 0x0CC8}, {0x0CCA, 0x0CCD}, {0x0CE0, 0x0CE1}, {0x0D02, 0x0D03},
 508                 {0x0D05, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D28}, {0x0D2A, 0x0D39},
 509                 {0x0D3E, 0x0D43}, {0x0D46, 0x0D48}, {0x0D4A, 0x0D4D}, {0x0D60, 0x0D61},
 510                 {0x0E01, 0x0E3A}, {0x0E40, 0x0E5B}, {0x0E81, 0x0E82}, {0x0E87, 0x0E88},
 511                 {0x0E94, 0x0E97}, {0x0E99, 0x0E9F}, {0x0EA1, 0x0EA3}, {0x0EAA, 0x0EAB},
 512                 {0x0EAD, 0x0EAE}, {0x0EB0, 0x0EB9}, {0x0EBB, 0x0EBD}, {0x0EC0, 0x0EC4},
 513                 {0x0EC8, 0x0ECD}, {0x0EDC, 0x0EDD}, {0x0F18, 0x0F19}, {0x0F3E, 0x0F47},
 514                 {0x0F49, 0x0F69}, {0x0F71, 0x0F84}, {0x0F86, 0x0F8B}, {0x0F90, 0x0F95},
 515                 {0x0F99, 0x0FAD}, {0x0FB1, 0x0FB7}, {0x10A0, 0x10C5}, {0x10D0, 0x10F6},
 516                 {0x3041, 0x3093}, {0x309B, 0x309C}, {0x30A1, 0x30F6}, {0x30FB, 0x30FC},
 517                 {0x3105, 0x312C}, {0x4E00, 0x9FA5}, {0xAC00, 0xD7A3}, {0x0660, 0x0669},
 518                 {0x06F0, 0x06F9}, {0x0966, 0x096F}, {0x09E6, 0x09EF}, {0x0A66, 0x0A6F},
 519                 {0x0AE6, 0x0AEF}, {0x0B66, 0x0B6F}, {0x0BE7, 0x0BEF}, {0x0C66, 0x0C6F},
 520                 {0x0CE6, 0x0CEF}, {0x0D66, 0x0D6F}, {0x0E50, 0x0E59}, {0x0ED0, 0x0ED9},
 521                 {0x0F20, 0x0F33}, {0x02B0, 0x02B8}, {0x02BD, 0x02C1}, {0x02D0, 0x02D1},
 522                 {0x02E0, 0x02E4}, {0x203F, 0x2040}, {0x210A, 0x2113}, {0x2118, 0x211D},
 523                 {0x212A, 0x2131}, {0x2133, 0x2138}, {0x2160, 0x2182}, {0x3005, 0x3007},
 524                 {0x3021, 0x3029},
 525         };
 526         for (size_t i = 0; i < sizeof(ranges)/sizeof(ranges[0]); ++i) {
 527                 if (ranges[i][0] <= v && v <= ranges[i][1])
 528                         return true;
 529         }
 530         for (size_t i = 0; i < sizeof(single_chars)/sizeof(single_chars[0]); ++i) {
 531                 if (v == single_chars[i])
 532                         return true;
 533         }
 534         return false;
 535 }
 536
 537 static bool is_universal_char_valid_identifier_c11(utf32 const v)
 538 {
 539         /* C11 Annex D.1 */
 540         if (                v == 0x000A8) return true;
 541         if (                v == 0x000AA) return true;
 542         if (                v == 0x000AD) return true;
 543         if (                v == 0x000AF) return true;
 544         if (0x000B2 <= v && v <= 0x000B5) return true;
 545         if (0x000B7 <= v && v <= 0x000BA) return true;
 546         if (0x000BC <= v && v <= 0x000BE) return true;
 547         if (0x000C0 <= v && v <= 0x000D6) return true;
 548         if (0x000D8 <= v && v <= 0x000F6) return true;
 549         if (0x000F8 <= v && v <= 0x000FF) return true;
 550         if (0x00100 <= v && v <= 0x0167F) return true;
 551         if (0x01681 <= v && v <= 0x0180D) return true;
 552         if (0x0180F <= v && v <= 0x01FFF) return true;
 553         if (0x0200B <= v && v <= 0x0200D) return true;
 554         if (0x0202A <= v && v <= 0x0202E) return true;
 555         if (0x0203F <= v && v <= 0x02040) return true;
 556         if (                v == 0x02054) return true;
 557         if (0x02060 <= v && v <= 0x0206F) return true;
 558         if (0x02070 <= v && v <= 0x0218F) return true;
 559         if (0x02460 <= v && v <= 0x024FF) return true;
 560         if (0x02776 <= v && v <= 0x02793) return true;
 561         if (0x02C00 <= v && v <= 0x02DFF) return true;
 562         if (0x02E80 <= v && v <= 0x02FFF) return true;
 563         if (0x03004 <= v && v <= 0x03007) return true;
 564         if (0x03021 <= v && v <= 0x0302F) return true;
 565         if (0x03031 <= v && v <= 0x0303F) return true;
 566         if (0x03040 <= v && v <= 0x0D7FF) return true;
 567         if (0x0F900 <= v && v <= 0x0FD3D) return true;
 568         if (0x0FD40 <= v && v <= 0x0FDCF) return true;
 569         if (0x0FDF0 <= v && v <= 0x0FE44) return true;
 570         if (0x0FE47 <= v && v <= 0x0FFFD) return true;
 571         if (0x10000 <= v && v <= 0x1FFFD) return true;
 572         if (0x20000 <= v && v <= 0x2FFFD) return true;
 573         if (0x30000 <= v && v <= 0x3FFFD) return true;
 574         if (0x40000 <= v && v <= 0x4FFFD) return true;
 575         if (0x50000 <= v && v <= 0x5FFFD) return true;
 576         if (0x60000 <= v && v <= 0x6FFFD) return true;
 577         if (0x70000 <= v && v <= 0x7FFFD) return true;
 578         if (0x80000 <= v && v <= 0x8FFFD) return true;
 579         if (0x90000 <= v && v <= 0x9FFFD) return true;
 580         if (0xA0000 <= v && v <= 0xAFFFD) return true;
 581         if (0xB0000 <= v && v <= 0xBFFFD) return true;
 582         if (0xC0000 <= v && v <= 0xCFFFD) return true;
 583         if (0xD0000 <= v && v <= 0xDFFFD) return true;
 584         if (0xE0000 <= v && v <= 0xEFFFD) return true;
 585         return false;
 586 }
 587
 588 static bool is_universal_char_valid_identifier(utf32 const v)
 589 {
 590         if (c_mode & _C11)
 591                 return is_universal_char_valid_identifier_c11(v);
 592         return is_universal_char_valid_identifier_c99(v);
 593 }
 594
 595 static bool is_universal_char_invalid_identifier_start(utf32 const v)
 596 {
 597         if (! (c_mode & _C11))
 598                 return false;
 599
 600         /* C11 Annex D.2 */
 601         if (0x0300 <= v && v <= 0x036F) return true;
 602         if (0x1DC0 <= v && v <= 0x1DFF) return true;
 603         if (0x20D0 <= v && v <= 0x20FF) return true;
 604         if (0xFE20 <= v && v <= 0xFE2F) return true;
 605         return false;
 606 }
 607
 608 /**
 609  * Parse an escape sequence.
 610  */
 611 static utf32 parse_escape_sequence(void)
 612 {
 613         eat('\\');
 614
 615         utf32 const ec = input.c;
 616         next_char();
 617
 618         switch (ec) {
 619         case '"':  return '"';
 620         case '\'': return '\'';
 621         case '\\': return '\\';
 622         case '?': return '\?';
 623         case 'a': return '\a';
 624         case 'b': return '\b';
 625         case 'f': return '\f';
 626         case 'n': return '\n';
 627         case 'r': return '\r';
 628         case 't': return '\t';
 629         case 'v': return '\v';
 630         case 'x':
 631                 return parse_hex_sequence();
 632         case '0':
 633         case '1':
 634         case '2':
 635         case '3':
 636         case '4':
 637         case '5':
 638         case '6':
 639         case '7':
 640                 return parse_octal_sequence(ec);
 641         case EOF:
 642                 parse_error("reached end of file while parsing escape sequence");
 643                 return EOF;
 644         /* \E is not documented, but handled, by GCC.  It is acceptable according
 645          * to §6.11.4, whereas \e is not. */
 646         case 'E':
 647         case 'e':
 648                 if (c_mode & _GNUC)
 649                         return 27;   /* hopefully 27 is ALWAYS the code for ESCAPE */
 650                 break;
 651
 652         case 'U': return parse_universal_char(8);
 653         case 'u': return parse_universal_char(4);
 654
 655         default:
 656                 break;
 657         }
 658         /* §6.4.4.4:8 footnote 64 */
 659         parse_error("unknown escape sequence");
 660         return EOF;
 661 }
 662
 663 static const char *identify_string(char *string)
 664 {
 665         const char *result = strset_insert(&stringset, string);
 666         if (result != string) {
 667                 obstack_free(&symbol_obstack, string);
 668         }
 669         return result;
 670 }
 671
 672 static string_t sym_make_string(string_encoding_t const enc)
 673 {
 674         obstack_1grow(&symbol_obstack, '\0');
 675         size_t      const len    = obstack_object_size(&symbol_obstack) - 1;
 676         char       *const string = obstack_finish(&symbol_obstack);
 677         char const *const result = identify_string(string);
 678         return (string_t){ result, len, enc };
 679 }
 680
 681 string_t make_string(char const *const string)
 682 {
 683         obstack_grow(&symbol_obstack, string, strlen(string));
 684         return sym_make_string(STRING_ENCODING_CHAR);
 685 }
 686
 687 static void parse_string(utf32 const delimiter, token_kind_t const kind,
 688                          string_encoding_t const enc,
 689                          char const *const context)
 690 {
 691         const unsigned start_linenr = input.position.lineno;
 692
 693         eat(delimiter);
 694
 695         while (true) {
 696                 switch (input.c) {
 697                 case '\\': {
 698                         if (resolve_escape_sequences) {
 699                                 utf32 const tc = parse_escape_sequence();
 700                                 if (enc == STRING_ENCODING_CHAR) {
 701                                         if (tc >= 0x100) {
 702                                                 warningf(WARN_OTHER, &pp_token.base.source_position, "escape sequence out of range");
 703                                         }
 704                                         obstack_1grow(&symbol_obstack, tc);
 705                                 } else {
 706                                         obstack_grow_utf8(&symbol_obstack, tc);
 707                                 }
 708                         } else {
 709                                 obstack_1grow(&symbol_obstack, (char)input.c);
 710                                 next_char();
 711                                 obstack_1grow(&symbol_obstack, (char)input.c);
 712                                 next_char();
 713                         }
 714                         break;
 715                 }
 716
 717                 case NEWLINE:
 718                         errorf(&pp_token.base.source_position, "newline while parsing %s", context);
 719                         break;
 720
 721                 case EOF: {
 722                         source_position_t source_position;
 723                         source_position.input_name = pp_token.base.source_position.input_name;
 724                         source_position.lineno     = start_linenr;
 725                         errorf(&source_position, "EOF while parsing %s", context);
 726                         goto end_of_string;
 727                 }
 728
 729                 default:
 730                         if (input.c == delimiter) {
 731                                 next_char();
 732                                 goto end_of_string;
 733                         } else {
 734                                 obstack_grow_utf8(&symbol_obstack, input.c);
 735                                 next_char();
 736                                 break;
 737                         }
 738                 }
 739         }
 740
 741 end_of_string:
 742         pp_token.kind           = kind;
 743         pp_token.literal.string = sym_make_string(enc);
 744 }
 745
 746 static void parse_string_literal(string_encoding_t const enc)
 747 {
 748         parse_string('"', T_STRING_LITERAL, enc, "string literal");
 749 }
 750
 751 static void parse_character_constant(string_encoding_t const enc)
 752 {
 753         parse_string('\'', T_CHARACTER_CONSTANT, enc, "character constant");
 754         if (pp_token.literal.string.size == 0) {
 755                 parse_error("empty character constant");
 756         }
 757 }
 758
 759 #define SYMBOL_CASES_WITHOUT_E_P \
 760              '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
 761         case 'a': \
 762         case 'b': \
 763         case 'c': \
 764         case 'd': \
 765         case 'f': \
 766         case 'g': \
 767         case 'h': \
 768         case 'i': \
 769         case 'j': \
 770         case 'k': \
 771         case 'l': \
 772         case 'm': \
 773         case 'n': \
 774         case 'o': \
 775         case 'q': \
 776         case 'r': \
 777         case 's': \
 778         case 't': \
 779         case 'u': \
 780         case 'v': \
 781         case 'w': \
 782         case 'x': \
 783         case 'y': \
 784         case 'z': \
 785         case 'A': \
 786         case 'B': \
 787         case 'C': \
 788         case 'D': \
 789         case 'F': \
 790         case 'G': \
 791         case 'H': \
 792         case 'I': \
 793         case 'J': \
 794         case 'K': \
 795         case 'L': \
 796         case 'M': \
 797         case 'N': \
 798         case 'O': \
 799         case 'Q': \
 800         case 'R': \
 801         case 'S': \
 802         case 'T': \
 803         case 'U': \
 804         case 'V': \
 805         case 'W': \
 806         case 'X': \
 807         case 'Y': \
 808         case 'Z': \
 809         case '_'
 810
 811 #define SYMBOL_CASES \
 812              SYMBOL_CASES_WITHOUT_E_P: \
 813         case 'e': \
 814         case 'p': \
 815         case 'E': \
 816         case 'P'
 817
 818 #define DIGIT_CASES \
 819              '0':  \
 820         case '1':  \
 821         case '2':  \
 822         case '3':  \
 823         case '4':  \
 824         case '5':  \
 825         case '6':  \
 826         case '7':  \
 827         case '8':  \
 828         case '9'
 829
 830 static void start_expanding(pp_definition_t *definition)
 831 {
 832         definition->parent_expansion = current_expansion;
 833         definition->expand_pos       = 0;
 834         definition->is_expanding     = true;
 835         if (definition->list_len > 0) {
 836                 definition->token_list[0].had_whitespace
 837                         = info.had_whitespace;
 838         }
 839         current_expansion = definition;
 840 }
 841
 842 static void finished_expanding(pp_definition_t *definition)
 843 {
 844         assert(definition->is_expanding);
 845         pp_definition_t *parent = definition->parent_expansion;
 846         definition->parent_expansion = NULL;
 847         definition->is_expanding     = false;
 848
 849         /* stop further expanding once we expanded a parameter used in a
 850          * sub macro-call */
 851         if (definition == argument_expanding)
 852                 argument_expanding = NULL;
 853
 854         assert(current_expansion == definition);
 855         current_expansion = parent;
 856 }
 857
 858 static void grow_string_escaped(struct obstack *obst, const string_t *string, char const *delimiter)
 859 {
 860         char const *prefix = get_string_encoding_prefix(string->encoding);
 861         obstack_printf(obst, "%s%s", prefix, delimiter);
 862         size_t      size = string->size;
 863         const char *str  = string->begin;
 864         if (resolve_escape_sequences) {
 865                 obstack_grow(obst, str, size);
 866         } else {
 867                 for (size_t i = 0; i < size; ++i) {
 868                         const char c = str[i];
 869                         if (c == '\\' || c == '"')
 870                                 obstack_1grow(obst, '\\');
 871                         obstack_1grow(obst, c);
 872                 }
 873         }
 874         obstack_printf(obst, "%s", delimiter);
 875 }
 876
 877 static void grow_token(struct obstack *obst, const token_t *token)
 878 {
 879         switch (token->kind) {
 880         case T_NUMBER:
 881                 obstack_grow(obst, token->literal.string.begin, token->literal.string.size);
 882                 break;
 883
 884         case T_STRING_LITERAL: {
 885                 char const *const delimiter = resolve_escape_sequences ? "\"" : "\\\"";
 886                 grow_string_escaped(obst, &token->literal.string, delimiter);
 887                 break;
 888         }
 889
 890         case T_CHARACTER_CONSTANT:
 891                 grow_string_escaped(obst, &token->literal.string, "'");
 892                 break;
 893
 894         case T_IDENTIFIER:
 895         default: {
 896                 const char *str = token->base.symbol->string;
 897                 size_t      len = strlen(str);
 898                 obstack_grow(obst, str, len);
 899                 break;
 900         }
 901         }
 902 }
 903
 904 static void stringify(const pp_definition_t *definition)
 905 {
 906         assert(obstack_object_size(&symbol_obstack) == 0);
 907
 908         size_t list_len = definition->list_len;
 909         for (size_t p = 0; p < list_len; ++p) {
 910                 const saved_token_t *saved = &definition->token_list[p];
 911                 if (p > 0 && saved->had_whitespace)
 912                         obstack_1grow(&symbol_obstack, ' ');
 913                 grow_token(&symbol_obstack, &saved->token);
 914         }
 915         pp_token.kind           = T_STRING_LITERAL;
 916         pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
 917 }
 918
 919 static inline void set_punctuator(token_kind_t const kind)
 920 {
 921         pp_token.kind        = kind;
 922         pp_token.base.symbol = token_symbols[kind];
 923 }
 924
 925 static inline void set_digraph(token_kind_t const kind, symbol_t *const symbol)
 926 {
 927         pp_token.kind        = kind;
 928         pp_token.base.symbol = symbol;
 929 }
 930
 931 /**
 932  * returns next final token from a preprocessor macro expansion
 933  */
 934 static bool expand_next(void)
 935 {
 936         if (current_expansion == NULL)
 937                 return false;
 938
 939 restart:;
 940         size_t pos = current_expansion->expand_pos;
 941         if (pos >= current_expansion->list_len) {
 942                 finished_expanding(current_expansion);
 943                 /* it was the outermost expansion, parse pptoken normally */
 944                 if (current_expansion == NULL) {
 945                         return false;
 946                 }
 947                 goto restart;
 948         }
 949         const saved_token_t *saved = &current_expansion->token_list[pos++];
 950         pp_token = saved->token;
 951         if (pp_token.kind == '#') {
 952                 if (pos < current_expansion->list_len) {
 953                         const saved_token_t *next = &current_expansion->token_list[pos];
 954                         if (next->token.kind == T_MACRO_PARAMETER) {
 955                                 pp_definition_t *def = next->token.macro_parameter.def;
 956                                 assert(def != NULL && def->is_parameter);
 957                                 stringify(def);
 958                                 ++pos;
 959                         }
 960                 }
 961         }
 962
 963         if (current_expansion->expand_pos > 0)
 964                 info.had_whitespace = saved->had_whitespace;
 965         current_expansion->expand_pos = pos;
 966         pp_token.base.source_position = expansion_pos;
 967
 968         return true;
 969 }
 970
 971 /**
 972  * Returns the next token kind found when continuing the current expansions
 973  * without starting new sub-expansions.
 974  */
 975 static token_kind_t peek_expansion(void)
 976 {
 977         for (pp_definition_t *e = current_expansion; e; e = e->parent_expansion) {
 978                 if (e->expand_pos < e->list_len)
 979                         return e->token_list[e->expand_pos].token.kind;
 980         }
 981         return T_EOF;
 982 }
 983
 984 static void skip_line_comment(void)
 985 {
 986         info.had_whitespace = true;
 987         while (true) {
 988                 switch (input.c) {
 989                 case EOF:
 990                         return;
 991
 992                 case '\r':
 993                 case '\n':
 994                         return;
 995
 996                 default:
 997                         next_char();
 998                         break;
 999                 }
1000         }
1001 }
1002
1003 static void skip_multiline_comment(void)
1004 {
1005         info.had_whitespace = true;
1006
1007         unsigned start_linenr = input.position.lineno;
1008         while (true) {
1009                 switch (input.c) {
1010                 case '/':
1011                         next_char();
1012                         if (input.c == '*') {
1013                                 /* TODO: nested comment, warn here */
1014                         }
1015                         break;
1016                 case '*':
1017                         next_char();
1018                         if (input.c == '/') {
1019                                 if (input.position.lineno != input.output_line)
1020                                         info.whitespace_at_line_begin = input.position.colno;
1021                                 next_char();
1022                                 return;
1023                         }
1024                         break;
1025
1026                 case NEWLINE:
1027                         break;
1028
1029                 case EOF: {
1030                         source_position_t source_position;
1031                         source_position.input_name = pp_token.base.source_position.input_name;
1032                         source_position.lineno     = start_linenr;
1033                         errorf(&source_position, "at end of file while looking for comment end");
1034                         return;
1035                 }
1036
1037                 default:
1038                         next_char();
1039                         break;
1040                 }
1041         }
1042 }
1043
1044 static bool skip_till_newline(bool stop_at_non_whitespace)
1045 {
1046         bool res = false;
1047         while (true) {
1048                 switch (input.c) {
1049                 case ' ':
1050                 case '\t':
1051                         next_char();
1052                         continue;
1053
1054                 case '/':
1055                         next_char();
1056                         if (input.c == '/') {
1057                                 next_char();
1058                                 skip_line_comment();
1059                                 continue;
1060                         } else if (input.c == '*') {
1061                                 next_char();
1062                                 skip_multiline_comment();
1063                                 continue;
1064                         } else {
1065                                 put_back(input.c);
1066                                 input.c = '/';
1067                         }
1068                         return true;
1069
1070                 case NEWLINE:
1071                         return res;
1072
1073                 default:
1074                         if (stop_at_non_whitespace)
1075                                 return false;
1076                         res = true;
1077                         next_char();
1078                         continue;
1079                 }
1080         }
1081 }
1082
1083 static void skip_whitespace(void)
1084 {
1085         while (true) {
1086                 switch (input.c) {
1087                 case ' ':
1088                 case '\t':
1089                         ++info.whitespace_at_line_begin;
1090                         info.had_whitespace = true;
1091                         next_char();
1092                         continue;
1093
1094                 case NEWLINE:
1095                         info.at_line_begin  = true;
1096                         info.had_whitespace = true;
1097                         info.whitespace_at_line_begin = 0;
1098                         continue;
1099
1100                 case '/':
1101                         next_char();
1102                         if (input.c == '/') {
1103                                 next_char();
1104                                 skip_line_comment();
1105                                 continue;
1106                         } else if (input.c == '*') {
1107                                 next_char();
1108                                 skip_multiline_comment();
1109                                 continue;
1110                         } else {
1111                                 put_back(input.c);
1112                                 input.c = '/';
1113                         }
1114                         return;
1115
1116                 default:
1117                         return;
1118                 }
1119         }
1120 }
1121
1122 static inline void eat_pp(pp_token_kind_t const kind)
1123 {
1124         assert(pp_token.base.symbol->pp_ID == kind);
1125         (void) kind;
1126         next_input_token();
1127 }
1128
1129 static inline void eat_token(token_kind_t const kind)
1130 {
1131         assert(pp_token.kind == kind);
1132         (void)kind;
1133         next_input_token();
1134 }
1135
1136 static void parse_symbol(void)
1137 {
1138         assert(obstack_object_size(&symbol_obstack) == 0);
1139         while (true) {
1140                 switch (input.c) {
1141                 case DIGIT_CASES:
1142                 case SYMBOL_CASES:
1143                         obstack_1grow(&symbol_obstack, (char) input.c);
1144                         next_char();
1145                         break;
1146
1147                 case '\\':
1148                         next_char();
1149                         switch (input.c) {
1150                         {
1151                                 unsigned n;
1152                         case 'U': n = 8; goto universal;
1153                         case 'u': n = 4; goto universal;
1154 universal:
1155                                 if (!resolve_escape_sequences) {
1156                                         obstack_1grow(&symbol_obstack, '\\');
1157                                         obstack_1grow(&symbol_obstack, input.c);
1158                                 }
1159                                 next_char();
1160                                 utf32 const v = parse_universal_char(n);
1161                                 if (!is_universal_char_valid_identifier(v)) {
1162                                         if (is_universal_char_valid(v)) {
1163                                                 errorf(&input.position,
1164                                                            "universal character \\%c%0*X is not valid in an identifier",
1165                                                            n == 4 ? 'u' : 'U', (int)n, v);
1166                                         }
1167                                 } else if (obstack_object_size(&symbol_obstack) == 0 && is_universal_char_invalid_identifier_start(v)) {
1168                                         errorf(&input.position,
1169                                                    "universal character \\%c%0*X is not valid as start of an identifier",
1170                                                    n == 4 ? 'u' : 'U', (int)n, v);
1171                                 } else if (resolve_escape_sequences) {
1172                                         obstack_grow_utf8(&symbol_obstack, v);
1173                                 }
1174                                 break;
1175                         }
1176
1177                         default:
1178                                 put_back(input.c);
1179                                 input.c = '\\';
1180                                 goto end_symbol;
1181                         }
1182
1183                 default:
1184 dollar_sign:
1185                         goto end_symbol;
1186                 }
1187         }
1188
1189 end_symbol:
1190         obstack_1grow(&symbol_obstack, '\0');
1191         char *string = obstack_finish(&symbol_obstack);
1192
1193         /* might be a wide string or character constant ( L"string"/L'c' ) */
1194         if (input.c == '"' && string[0] == 'L' && string[1] == '\0') {
1195                 obstack_free(&symbol_obstack, string);
1196                 parse_string_literal(STRING_ENCODING_WIDE);
1197                 return;
1198         } else if (input.c == '\'' && string[0] == 'L' && string[1] == '\0') {
1199                 obstack_free(&symbol_obstack, string);
1200                 parse_character_constant(STRING_ENCODING_WIDE);
1201                 return;
1202         }
1203
1204         symbol_t *symbol = symbol_table_insert(string);
1205
1206         pp_token.kind        = symbol->ID;
1207         pp_token.base.symbol = symbol;
1208
1209         /* we can free the memory from symbol obstack if we already had an entry in
1210          * the symbol table */
1211         if (symbol->string != string) {
1212                 obstack_free(&symbol_obstack, string);
1213         }
1214 }
1215
1216 static void parse_number(void)
1217 {
1218         obstack_1grow(&symbol_obstack, (char) input.c);
1219         next_char();
1220
1221         while (true) {
1222                 switch (input.c) {
1223                 case '.':
1224                 case DIGIT_CASES:
1225                 case SYMBOL_CASES_WITHOUT_E_P:
1226                         obstack_1grow(&symbol_obstack, (char) input.c);
1227                         next_char();
1228                         break;
1229
1230                 case 'e':
1231                 case 'p':
1232                 case 'E':
1233                 case 'P':
1234                         obstack_1grow(&symbol_obstack, (char) input.c);
1235                         next_char();
1236                         if (input.c == '+' || input.c == '-') {
1237                                 obstack_1grow(&symbol_obstack, (char) input.c);
1238                                 next_char();
1239                         }
1240                         break;
1241
1242                 default:
1243 dollar_sign:
1244                         goto end_number;
1245                 }
1246         }
1247
1248 end_number:
1249         pp_token.kind           = T_NUMBER;
1250         pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
1251 }
1252
1253 #define MAYBE_PROLOG \
1254         next_char(); \
1255         switch (input.c) {
1256
1257 #define MAYBE(ch, kind) \
1258         case ch: \
1259                 next_char(); \
1260                 set_punctuator(kind); \
1261                 return;
1262
1263 #define MAYBE_DIGRAPH(ch, kind, symbol) \
1264         case ch: \
1265                 next_char(); \
1266                 set_digraph(kind, symbol); \
1267                 return;
1268
1269 #define ELSE_CODE(code) \
1270         default: \
1271                 code \
1272         }
1273
1274 #define ELSE(kind) ELSE_CODE(set_punctuator(kind); return;)
1275
1276 /** identifies and returns the next preprocessing token contained in the
1277  * input stream. No macro expansion is performed. */
1278 static void next_input_token(void)
1279 {
1280         if (next_info.had_whitespace) {
1281                 info = next_info;
1282                 next_info.had_whitespace = false;
1283         } else {
1284                 info.at_line_begin  = false;
1285                 info.had_whitespace = false;
1286         }
1287 restart:
1288         pp_token.base.source_position = input.position;
1289         pp_token.base.symbol          = NULL;
1290
1291         switch (input.c) {
1292         case ' ':
1293         case '\t':
1294                 info.whitespace_at_line_begin++;
1295                 info.had_whitespace = true;
1296                 next_char();
1297                 goto restart;
1298
1299         case NEWLINE:
1300                 info.at_line_begin            = true;
1301                 info.had_whitespace           = true;
1302                 info.whitespace_at_line_begin = 0;
1303                 goto restart;
1304
1305         case SYMBOL_CASES:
1306                 parse_symbol();
1307                 return;
1308
1309         case DIGIT_CASES:
1310                 parse_number();
1311                 return;
1312
1313         case '"':
1314                 parse_string_literal(STRING_ENCODING_CHAR);
1315                 return;
1316
1317         case '\'':
1318                 parse_character_constant(STRING_ENCODING_CHAR);
1319                 return;
1320
1321         case '.':
1322                 MAYBE_PROLOG
1323                         case '0':
1324                         case '1':
1325                         case '2':
1326                         case '3':
1327                         case '4':
1328                         case '5':
1329                         case '6':
1330                         case '7':
1331                         case '8':
1332                         case '9':
1333                                 put_back(input.c);
1334                                 input.c = '.';
1335                                 parse_number();
1336                                 return;
1337
1338                         case '.':
1339                                 MAYBE_PROLOG
1340                                 MAYBE('.', T_DOTDOTDOT)
1341                                 ELSE_CODE(
1342                                         put_back(input.c);
1343                                         input.c = '.';
1344                                         set_punctuator('.');
1345                                         return;
1346                                 )
1347                 ELSE('.')
1348         case '&':
1349                 MAYBE_PROLOG
1350                 MAYBE('&', T_ANDAND)
1351                 MAYBE('=', T_ANDEQUAL)
1352                 ELSE('&')
1353         case '*':
1354                 MAYBE_PROLOG
1355                 MAYBE('=', T_ASTERISKEQUAL)
1356                 ELSE('*')
1357         case '+':
1358                 MAYBE_PROLOG
1359                 MAYBE('+', T_PLUSPLUS)
1360                 MAYBE('=', T_PLUSEQUAL)
1361                 ELSE('+')
1362         case '-':
1363                 MAYBE_PROLOG
1364                 MAYBE('>', T_MINUSGREATER)
1365                 MAYBE('-', T_MINUSMINUS)
1366                 MAYBE('=', T_MINUSEQUAL)
1367                 ELSE('-')
1368         case '!':
1369                 MAYBE_PROLOG
1370                 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
1371                 ELSE('!')
1372         case '/':
1373                 MAYBE_PROLOG
1374                 MAYBE('=', T_SLASHEQUAL)
1375                 case '*':
1376                         next_char();
1377                         skip_multiline_comment();
1378                         goto restart;
1379                 case '/':
1380                         next_char();
1381                         skip_line_comment();
1382                         goto restart;
1383                 ELSE('/')
1384         case '%':
1385                 MAYBE_PROLOG
1386                 MAYBE_DIGRAPH('>', '}', symbol_percentgreater)
1387                 MAYBE('=', T_PERCENTEQUAL)
1388                 case ':':
1389                         MAYBE_PROLOG
1390                         case '%':
1391                                 MAYBE_PROLOG
1392                                 MAYBE_DIGRAPH(':', T_HASHHASH, symbol_percentcolonpercentcolon)
1393                                 ELSE_CODE(
1394                                         put_back(input.c);
1395                                         input.c = '%';
1396                                         goto digraph_percentcolon;
1397                                 )
1398                         ELSE_CODE(
1399 digraph_percentcolon:
1400                                 set_digraph('#', symbol_percentcolon);
1401                                 return;
1402                         )
1403                 ELSE('%')
1404         case '<':
1405                 MAYBE_PROLOG
1406                 MAYBE_DIGRAPH(':', '[', symbol_lesscolon)
1407                 MAYBE_DIGRAPH('%', '{', symbol_lesspercent)
1408                 MAYBE('=', T_LESSEQUAL)
1409                 case '<':
1410                         MAYBE_PROLOG
1411                         MAYBE('=', T_LESSLESSEQUAL)
1412                         ELSE(T_LESSLESS)
1413                 ELSE('<')
1414         case '>':
1415                 MAYBE_PROLOG
1416                 MAYBE('=', T_GREATEREQUAL)
1417                 case '>':
1418                         MAYBE_PROLOG
1419                         MAYBE('=', T_GREATERGREATEREQUAL)
1420                         ELSE(T_GREATERGREATER)
1421                 ELSE('>')
1422         case '^':
1423                 MAYBE_PROLOG
1424                 MAYBE('=', T_CARETEQUAL)
1425                 ELSE('^')
1426         case '|':
1427                 MAYBE_PROLOG
1428                 MAYBE('=', T_PIPEEQUAL)
1429                 MAYBE('|', T_PIPEPIPE)
1430                 ELSE('|')
1431         case ':':
1432                 MAYBE_PROLOG
1433                 MAYBE_DIGRAPH('>', ']', symbol_colongreater)
1434                 case ':':
1435                         if (c_mode & _CXX) {
1436                                 next_char();
1437                                 set_punctuator(T_COLONCOLON);
1438                                 return;
1439                         }
1440                         /* FALLTHROUGH */
1441                 ELSE(':')
1442         case '=':
1443                 MAYBE_PROLOG
1444                 MAYBE('=', T_EQUALEQUAL)
1445                 ELSE('=')
1446         case '#':
1447                 MAYBE_PROLOG
1448                 MAYBE('#', T_HASHHASH)
1449                 ELSE('#')
1450
1451         case '?':
1452         case '[':
1453         case ']':
1454         case '(':
1455         case ')':
1456         case '{':
1457         case '}':
1458         case '~':
1459         case ';':
1460         case ',':
1461                 set_punctuator(input.c);
1462                 next_char();
1463                 return;
1464
1465         case EOF:
1466                 if (input_stack != NULL) {
1467                         fclose(close_pp_input());
1468                         pop_restore_input();
1469                         if (out)
1470                                 fputc('\n', out);
1471                         if (input.c == (utf32)EOF)
1472                                 --input.position.lineno;
1473                         print_line_directive(&input.position, "2");
1474                         goto restart;
1475                 } else {
1476                         info.at_line_begin = true;
1477                         set_punctuator(T_EOF);
1478                 }
1479                 return;
1480
1481         case '\\':
1482                 next_char();
1483                 int next_c = input.c;
1484                 put_back(input.c);
1485                 input.c = '\\';
1486                 if (next_c == 'U' || next_c == 'u') {
1487                         parse_symbol();
1488                         return;
1489                 }
1490                 /* FALLTHROUGH */
1491         default:
1492 dollar_sign:
1493                 if (error_on_unknown_chars) {
1494                         errorf(&pp_token.base.source_position,
1495                                "unknown character '%lc' found\n", input.c);
1496                         next_char();
1497                         goto restart;
1498                 } else {
1499                         assert(obstack_object_size(&symbol_obstack) == 0);
1500                         obstack_grow_utf8(&symbol_obstack, input.c);
1501                         obstack_1grow(&symbol_obstack, '\0');
1502                         char     *const string = obstack_finish(&symbol_obstack);
1503                         symbol_t *const symbol = symbol_table_insert(string);
1504                         if (symbol->string != string)
1505                                 obstack_free(&symbol_obstack, string);
1506
1507                         pp_token.kind        = T_UNKNOWN_CHAR;
1508                         pp_token.base.symbol = symbol;
1509                         next_char();
1510                         return;
1511                 }
1512         }
1513 }
1514
1515 static void print_quoted_string(const char *const string)
1516 {
1517         fputc('"', out);
1518         for (const char *c = string; *c != 0; ++c) {
1519                 switch (*c) {
1520                 case '"': fputs("\\\"", out); break;
1521                 case '\\':  fputs("\\\\", out); break;
1522                 case '\a':  fputs("\\a", out); break;
1523                 case '\b':  fputs("\\b", out); break;
1524                 case '\f':  fputs("\\f", out); break;
1525                 case '\n':  fputs("\\n", out); break;
1526                 case '\r':  fputs("\\r", out); break;
1527                 case '\t':  fputs("\\t", out); break;
1528                 case '\v':  fputs("\\v", out); break;
1529                 case '\?':  fputs("\\?", out); break;
1530                 default:
1531                         if (!isprint(*c)) {
1532                                 fprintf(out, "\\%03o", (unsigned)*c);
1533                                 break;
1534                         }
1535                         fputc(*c, out);
1536                         break;
1537                 }
1538         }
1539         fputc('"', out);
1540 }
1541
1542 static void print_line_directive(const source_position_t *pos, const char *add)
1543 {
1544         if (!out)
1545                 return;
1546
1547         fprintf(out, "# %u ", pos->lineno);
1548         print_quoted_string(pos->input_name);
1549         if (add != NULL) {
1550                 fputc(' ', out);
1551                 fputs(add, out);
1552         }
1553         if (pos->is_system_header) {
1554                 fputs(" 3", out);
1555         }
1556
1557         printed_input_name = pos->input_name;
1558         input.output_line  = pos->lineno-1;
1559 }
1560
1561 static bool emit_newlines(void)
1562 {
1563         if (!out)
1564                 return true;
1565
1566         unsigned delta = pp_token.base.source_position.lineno - input.output_line;
1567         if (delta == 0)
1568                 return false;
1569
1570         if (delta >= 9) {
1571                 fputc('\n', out);
1572                 print_line_directive(&pp_token.base.source_position, NULL);
1573                 fputc('\n', out);
1574         } else {
1575                 for (unsigned i = 0; i < delta; ++i) {
1576                         fputc('\n', out);
1577                 }
1578         }
1579         input.output_line = pp_token.base.source_position.lineno;
1580
1581         unsigned whitespace = info.whitespace_at_line_begin;
1582         /* make sure there is at least 1 whitespace before a (macro-expanded)
1583          * '#' at line begin. I'm not sure why this is good, but gcc does it. */
1584         if (pp_token.kind == '#' && whitespace == 0)
1585                 ++whitespace;
1586         for (unsigned i = 0; i < whitespace; ++i)
1587                 fputc(' ', out);
1588
1589         return true;
1590 }
1591
1592 void set_preprocessor_output(FILE *output)
1593 {
1594         out = output;
1595         if (out != NULL) {
1596                 error_on_unknown_chars   = false;
1597                 resolve_escape_sequences = false;
1598         } else {
1599                 error_on_unknown_chars   = true;
1600                 resolve_escape_sequences = true;
1601         }
1602 }
1603
1604 void emit_pp_token(void)
1605 {
1606         if (!emit_newlines() &&
1607             (info.had_whitespace || tokens_would_paste(last_token, pp_token.kind)))
1608                 fputc(' ', out);
1609
1610         switch (pp_token.kind) {
1611         case T_NUMBER:
1612                 fputs(pp_token.literal.string.begin, out);
1613                 break;
1614
1615         case T_STRING_LITERAL:
1616                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1617                 fputc('"', out);
1618                 fputs(pp_token.literal.string.begin, out);
1619                 fputc('"', out);
1620                 break;
1621
1622         case T_CHARACTER_CONSTANT:
1623                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1624                 fputc('\'', out);
1625                 fputs(pp_token.literal.string.begin, out);
1626                 fputc('\'', out);
1627                 break;
1628
1629         case T_MACRO_PARAMETER:
1630                 panic("macro parameter not expanded");
1631
1632         default:
1633                 fputs(pp_token.base.symbol->string, out);
1634                 break;
1635         }
1636         last_token = pp_token.kind;
1637 }
1638
1639 static void eat_pp_directive(void)
1640 {
1641         while (!info.at_line_begin) {
1642                 next_input_token();
1643         }
1644 }
1645
1646 static bool strings_equal(const string_t *string1, const string_t *string2)
1647 {
1648         size_t size = string1->size;
1649         if (size != string2->size)
1650                 return false;
1651
1652         const char *c1 = string1->begin;
1653         const char *c2 = string2->begin;
1654         for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1655                 if (*c1 != *c2)
1656                         return false;
1657         }
1658         return true;
1659 }
1660
1661 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1662 {
1663         if (token1->kind != token2->kind)
1664                 return false;
1665
1666         switch (token1->kind) {
1667         case T_NUMBER:
1668         case T_CHARACTER_CONSTANT:
1669         case T_STRING_LITERAL:
1670                 return strings_equal(&token1->literal.string, &token2->literal.string);
1671
1672         case T_MACRO_PARAMETER:
1673                 return token1->macro_parameter.def->symbol
1674                     == token2->macro_parameter.def->symbol;
1675
1676         default:
1677                 return token1->base.symbol == token2->base.symbol;
1678         }
1679 }
1680
1681 static bool pp_definitions_equal(const pp_definition_t *definition1,
1682                                  const pp_definition_t *definition2)
1683 {
1684         if (definition1->list_len != definition2->list_len)
1685                 return false;
1686
1687         size_t               len = definition1->list_len;
1688         const saved_token_t *t1  = definition1->token_list;
1689         const saved_token_t *t2  = definition2->token_list;
1690         for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1691                 if (!pp_tokens_equal(&t1->token, &t2->token))
1692                         return false;
1693                 if (t1->had_whitespace != t2->had_whitespace)
1694                         return false;
1695         }
1696         return true;
1697 }
1698
1699 static void missing_macro_param_error(void)
1700 {
1701         errorf(&pp_token.base.source_position,
1702                "'#' is not followed by a macro parameter");
1703 }
1704
1705 static bool is_defineable_token(char const *const context)
1706 {
1707         if (info.at_line_begin) {
1708                 errorf(&pp_token.base.source_position, "unexpected end of line after %s", context);
1709         }
1710
1711         symbol_t *const symbol = pp_token.base.symbol;
1712         if (!symbol)
1713                 goto no_ident;
1714
1715         if (pp_token.kind != T_IDENTIFIER) {
1716                 switch (symbol->string[0]) {
1717                 case SYMBOL_CASES:
1718 dollar_sign:
1719                         break;
1720
1721                 default:
1722 no_ident:
1723                         errorf(&pp_token.base.source_position, "expected identifier after %s, got %K", context, &pp_token);
1724                         return false;
1725                 }
1726         }
1727
1728         /* TODO turn this into a flag in pp_def. */
1729         switch (symbol->pp_ID) {
1730         /* §6.10.8:4 */
1731         case TP_defined:
1732                 errorf(&pp_token.base.source_position, "%K cannot be used as macro name in %s", &pp_token, context);
1733                 return false;
1734
1735         default:
1736                 return true;
1737         }
1738 }
1739
1740 static void parse_define_directive(void)
1741 {
1742         eat_pp(TP_define);
1743         if (skip_mode) {
1744                 eat_pp_directive();
1745                 return;
1746         }
1747
1748         assert(obstack_object_size(&pp_obstack) == 0);
1749
1750         if (!is_defineable_token("#define"))
1751                 goto error_out;
1752         symbol_t *const symbol = pp_token.base.symbol;
1753
1754         pp_definition_t *new_definition
1755                 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1756         memset(new_definition, 0, sizeof(new_definition[0]));
1757         new_definition->symbol          = symbol;
1758         new_definition->source_position = input.position;
1759
1760         /* this is probably the only place where spaces are significant in the
1761          * lexer (except for the fact that they separate tokens). #define b(x)
1762          * is something else than #define b (x) */
1763         if (input.c == '(') {
1764                 next_input_token();
1765                 eat_token('(');
1766
1767                 while (true) {
1768                         switch (pp_token.kind) {
1769                         case T_DOTDOTDOT:
1770                                 new_definition->is_variadic = true;
1771                                 eat_token(T_DOTDOTDOT);
1772                                 if (pp_token.kind != ')') {
1773                                         errorf(&input.position,
1774                                                         "'...' not at end of macro argument list");
1775                                         goto error_out;
1776                                 }
1777                                 break;
1778
1779                         case T_IDENTIFIER: {
1780                                 pp_definition_t parameter;
1781                                 memset(&parameter, 0, sizeof(parameter));
1782                                 parameter.source_position = pp_token.base.source_position;
1783                                 parameter.symbol          = pp_token.base.symbol;
1784                                 parameter.is_parameter    = true;
1785                                 obstack_grow(&pp_obstack, &parameter, sizeof(parameter));
1786                                 eat_token(T_IDENTIFIER);
1787
1788                                 if (pp_token.kind == ',') {
1789                                         eat_token(',');
1790                                         break;
1791                                 }
1792
1793                                 if (pp_token.kind != ')') {
1794                                         errorf(&pp_token.base.source_position,
1795                                                "expected ',' or ')' after identifier, got %K",
1796                                                &pp_token);
1797                                         goto error_out;
1798                                 }
1799                                 break;
1800                         }
1801
1802                         case ')':
1803                                 eat_token(')');
1804                                 goto finish_argument_list;
1805
1806                         default:
1807                                 errorf(&pp_token.base.source_position,
1808                                        "expected identifier, '...' or ')' in #define argument list, got %K",
1809                                        &pp_token);
1810                                 goto error_out;
1811                         }
1812                 }
1813
1814         finish_argument_list:
1815                 new_definition->has_parameters = true;
1816                 size_t size = obstack_object_size(&pp_obstack);
1817                 new_definition->n_parameters
1818                         = size / sizeof(new_definition->parameters[0]);
1819                 new_definition->parameters = obstack_finish(&pp_obstack);
1820                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1821                         pp_definition_t *param    = &new_definition->parameters[i];
1822                         symbol_t        *symbol   = param->symbol;
1823                         pp_definition_t *previous = symbol->pp_definition;
1824                         if (previous != NULL
1825                             && previous->function_definition == new_definition) {
1826                                 errorf(&param->source_position,
1827                                        "duplicate macro parameter '%Y'", symbol);
1828                                 param->symbol = sym_anonymous;
1829                                 continue;
1830                         }
1831                         param->parent_expansion    = previous;
1832                         param->function_definition = new_definition;
1833                         symbol->pp_definition      = param;
1834                 }
1835         } else {
1836                 next_input_token();
1837         }
1838
1839         /* construct token list */
1840         assert(obstack_object_size(&pp_obstack) == 0);
1841         bool next_must_be_param = false;
1842         while (!info.at_line_begin) {
1843                 if (pp_token.kind == T_IDENTIFIER) {
1844                         const symbol_t  *symbol     = pp_token.base.symbol;
1845                         pp_definition_t *definition = symbol->pp_definition;
1846                         if (definition != NULL
1847                             && definition->function_definition == new_definition) {
1848                             pp_token.kind                = T_MACRO_PARAMETER;
1849                             pp_token.macro_parameter.def = definition;
1850                         }
1851                 }
1852                 if (next_must_be_param && pp_token.kind != T_MACRO_PARAMETER) {
1853                         missing_macro_param_error();
1854                 }
1855                 saved_token_t saved_token;
1856                 saved_token.token = pp_token;
1857                 saved_token.had_whitespace = info.had_whitespace;
1858                 obstack_grow(&pp_obstack, &saved_token, sizeof(saved_token));
1859                 next_must_be_param
1860                         = new_definition->has_parameters && pp_token.kind == '#';
1861                 next_input_token();
1862         }
1863         if (next_must_be_param)
1864                 missing_macro_param_error();
1865
1866         new_definition->list_len   = obstack_object_size(&pp_obstack)
1867                 / sizeof(new_definition->token_list[0]);
1868         new_definition->token_list = obstack_finish(&pp_obstack);
1869
1870         if (new_definition->has_parameters) {
1871                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1872                         pp_definition_t *param      = &new_definition->parameters[i];
1873                         symbol_t        *symbol     = param->symbol;
1874                         if (symbol == sym_anonymous)
1875                                 continue;
1876                         assert(symbol->pp_definition == param);
1877                         assert(param->function_definition == new_definition);
1878                         symbol->pp_definition   = param->parent_expansion;
1879                         param->parent_expansion = NULL;
1880                 }
1881         }
1882
1883         pp_definition_t *old_definition = symbol->pp_definition;
1884         if (old_definition != NULL) {
1885                 if (!pp_definitions_equal(old_definition, new_definition)) {
1886                         warningf(WARN_OTHER, &input.position, "multiple definition of macro '%Y' (first defined %P)", symbol, &old_definition->source_position);
1887                 } else {
1888                         /* reuse the old definition */
1889                         obstack_free(&pp_obstack, new_definition);
1890                         new_definition = old_definition;
1891                 }
1892         }
1893
1894         symbol->pp_definition = new_definition;
1895         return;
1896
1897 error_out:
1898         if (obstack_object_size(&pp_obstack) > 0) {
1899                 char *ptr = obstack_finish(&pp_obstack);
1900                 obstack_free(&pp_obstack, ptr);
1901         }
1902         eat_pp_directive();
1903 }
1904
1905 static void parse_undef_directive(void)
1906 {
1907         eat_pp(TP_undef);
1908         if (skip_mode) {
1909                 eat_pp_directive();
1910                 return;
1911         }
1912
1913         if (!is_defineable_token("#undef")) {
1914                 eat_pp_directive();
1915                 return;
1916         }
1917
1918         pp_token.base.symbol->pp_definition = NULL;
1919         next_input_token();
1920
1921         if (!info.at_line_begin) {
1922                 warningf(WARN_OTHER, &input.position, "extra tokens at end of #undef directive");
1923         }
1924         eat_pp_directive();
1925 }
1926
1927 /** behind an #include we can have the special headername lexems.
1928  * They're only allowed behind an #include so they're not recognized
1929  * by the normal next_preprocessing_token. We handle them as a special
1930  * exception here */
1931 static const char *parse_headername(bool *system_include)
1932 {
1933         if (info.at_line_begin) {
1934                 parse_error("expected headername after #include");
1935                 return NULL;
1936         }
1937
1938         /* check wether we have a "... or <... headername */
1939         source_position_t position = input.position;
1940         switch (input.c) {
1941         {
1942                 utf32 delimiter;
1943         case '<': delimiter = '>'; *system_include = true;  goto parse_name;
1944         case '"': delimiter = '"'; *system_include = false; goto parse_name;
1945 parse_name:
1946                 assert(obstack_object_size(&symbol_obstack) == 0);
1947                 next_char();
1948                 while (true) {
1949                         switch (input.c) {
1950                         case NEWLINE:
1951                         case EOF:
1952                                 {
1953                                         char *dummy = obstack_finish(&symbol_obstack);
1954                                         obstack_free(&symbol_obstack, dummy);
1955                                 }
1956                                 errorf(&pp_token.base.source_position,
1957                                        "header name without closing '%c'", (char)delimiter);
1958                                 return NULL;
1959
1960                         default:
1961                                 if (input.c == delimiter) {
1962                                         next_char();
1963                                         goto finish_headername;
1964                                 } else {
1965                                         obstack_1grow(&symbol_obstack, (char)input.c);
1966                                         next_char();
1967                                 }
1968                                 break;
1969                         }
1970                 }
1971                 /* we should never be here */
1972         }
1973
1974         default:
1975                 next_preprocessing_token();
1976                 if (info.at_line_begin) {
1977                         /* TODO: if we are already in the new line then we parsed more than
1978                          * wanted. We reuse the token, but could produce following errors
1979                          * misbehaviours... */
1980                         goto error_invalid_input;
1981                 }
1982                 if (pp_token.kind == T_STRING_LITERAL) {
1983                         *system_include = false;
1984                         return pp_token.literal.string.begin;
1985                 } else if (pp_token.kind == '<') {
1986                         *system_include = true;
1987                         assert(obstack_object_size(&pp_obstack) == 0);
1988                         while (true) {
1989                                 next_preprocessing_token();
1990                                 if (info.at_line_begin) {
1991                                         /* TODO: we shouldn't have parsed/expanded something on the
1992                                          * next line yet... */
1993                                         char *dummy = obstack_finish(&pp_obstack);
1994                                         obstack_free(&pp_obstack, dummy);
1995                                         goto error_invalid_input;
1996                                 }
1997                                 if (pp_token.kind == '>')
1998                                         break;
1999
2000                                 saved_token_t saved;
2001                                 saved.token          = pp_token;
2002                                 saved.had_whitespace = info.had_whitespace;
2003                                 obstack_grow(&pp_obstack, &saved, sizeof(saved));
2004                         }
2005                         size_t size = obstack_object_size(&pp_obstack);
2006                         assert(size % sizeof(saved_token_t) == 0);
2007                         size_t n_tokens = size / sizeof(saved_token_t);
2008                         saved_token_t *tokens = obstack_finish(&pp_obstack);
2009                         assert(obstack_object_size(&symbol_obstack) == 0);
2010                         for (size_t i = 0; i < n_tokens; ++i) {
2011                                 const saved_token_t *saved = &tokens[i];
2012                                 if (i > 0 && saved->had_whitespace)
2013                                         obstack_1grow(&symbol_obstack, ' ');
2014                                 grow_token(&symbol_obstack, &saved->token);
2015                         }
2016                         obstack_free(&pp_obstack, tokens);
2017                         goto finish_headername;
2018                 } else {
2019 error_invalid_input:
2020                         {
2021                                 char *dummy = obstack_finish(&symbol_obstack);
2022                                 obstack_free(&symbol_obstack, dummy);
2023                         }
2024
2025                         errorf(&pp_token.base.source_position,
2026                                "expected \"FILENAME\" or <FILENAME> after #include");
2027                         return NULL;
2028                 }
2029         }
2030
2031 finish_headername:
2032         obstack_1grow(&symbol_obstack, '\0');
2033         char *const  headername = obstack_finish(&symbol_obstack);
2034         const char  *identified = identify_string(headername);
2035         pp_token.base.source_position = position;
2036         return identified;
2037 }
2038
2039 static bool do_include(bool const bracket_include, bool const include_next, char const *const headername)
2040 {
2041         size_t const        headername_len = strlen(headername);
2042         searchpath_entry_t *entry;
2043         if (include_next) {
2044                 entry = input.path      ? input.path->next
2045                       : bracket_include ? bracket_searchpath.first
2046                       : quote_searchpath.first;
2047         } else {
2048                 if (!bracket_include) {
2049                         /* put dirname of current input on obstack */
2050                         const char *filename   = input.position.input_name;
2051                         const char *last_slash = strrchr(filename, '/');
2052                         const char *full_name;
2053                         if (last_slash != NULL) {
2054                                 size_t len = last_slash - filename;
2055                                 obstack_grow(&symbol_obstack, filename, len + 1);
2056                                 obstack_grow0(&symbol_obstack, headername, headername_len);
2057                                 char *complete_path = obstack_finish(&symbol_obstack);
2058                                 full_name = identify_string(complete_path);
2059                         } else {
2060                                 full_name = headername;
2061                         }
2062
2063                         FILE *file = fopen(full_name, "r");
2064                         if (file != NULL) {
2065                                 switch_pp_input(file, full_name, NULL, false);
2066                                 return true;
2067                         }
2068                         entry = quote_searchpath.first;
2069                 } else {
2070                         entry = bracket_searchpath.first;
2071                 }
2072         }
2073
2074         assert(obstack_object_size(&symbol_obstack) == 0);
2075         /* check searchpath */
2076         for (; entry; entry = entry->next) {
2077             const char *path = entry->path;
2078             size_t      len  = strlen(path);
2079                 obstack_grow(&symbol_obstack, path, len);
2080                 if (path[len-1] != '/')
2081                         obstack_1grow(&symbol_obstack, '/');
2082                 obstack_grow(&symbol_obstack, headername, headername_len+1);
2083
2084                 char *complete_path = obstack_finish(&symbol_obstack);
2085                 FILE *file          = fopen(complete_path, "r");
2086                 if (file != NULL) {
2087                         const char *filename = identify_string(complete_path);
2088                         switch_pp_input(file, filename, entry, entry->is_system_path);
2089                         return true;
2090                 } else {
2091                         obstack_free(&symbol_obstack, complete_path);
2092                 }
2093         }
2094
2095         return false;
2096 }
2097
2098 static void parse_include_directive(bool const include_next)
2099 {
2100         if (skip_mode) {
2101                 eat_pp_directive();
2102                 return;
2103         }
2104
2105         /* do not eat the TP_include, since it would already parse the next token
2106          * which needs special handling here. */
2107         skip_till_newline(true);
2108         bool system_include;
2109         const char *headername = parse_headername(&system_include);
2110         if (headername == NULL) {
2111                 eat_pp_directive();
2112                 return;
2113         }
2114
2115         bool had_nonwhitespace = skip_till_newline(false);
2116         if (had_nonwhitespace) {
2117                 warningf(WARN_OTHER, &input.position,
2118                          "extra tokens at end of #include directive");
2119         }
2120
2121         if (n_inputs > INCLUDE_LIMIT) {
2122                 errorf(&pp_token.base.source_position, "#include nested too deeply");
2123                 /* eat \n or EOF */
2124                 next_input_token();
2125                 return;
2126         }
2127
2128         /* switch inputs */
2129         info.whitespace_at_line_begin = 0;
2130         info.had_whitespace           = false;
2131         info.at_line_begin            = true;
2132         emit_newlines();
2133         push_input();
2134         bool res = do_include(system_include, include_next, headername);
2135         if (res) {
2136                 next_input_token();
2137         } else {
2138                 errorf(&pp_token.base.source_position, "failed including '%s': %s", headername, strerror(errno));
2139                 pop_restore_input();
2140         }
2141 }
2142
2143 static pp_conditional_t *push_conditional(void)
2144 {
2145         pp_conditional_t *conditional
2146                 = obstack_alloc(&pp_obstack, sizeof(*conditional));
2147         memset(conditional, 0, sizeof(*conditional));
2148
2149         conditional->parent = conditional_stack;
2150         conditional_stack   = conditional;
2151
2152         return conditional;
2153 }
2154
2155 static void pop_conditional(void)
2156 {
2157         assert(conditional_stack != NULL);
2158         conditional_stack = conditional_stack->parent;
2159 }
2160
2161 void check_unclosed_conditionals(void)
2162 {
2163         while (conditional_stack != NULL) {
2164                 pp_conditional_t *conditional = conditional_stack;
2165
2166                 if (conditional->in_else) {
2167                         errorf(&conditional->source_position, "unterminated #else");
2168                 } else {
2169                         errorf(&conditional->source_position, "unterminated condition");
2170                 }
2171                 pop_conditional();
2172         }
2173 }
2174
2175 static void parse_ifdef_ifndef_directive(bool const is_ifdef)
2176 {
2177         bool condition;
2178         eat_pp(is_ifdef ? TP_ifdef : TP_ifndef);
2179
2180         if (skip_mode) {
2181                 eat_pp_directive();
2182                 pp_conditional_t *conditional = push_conditional();
2183                 conditional->source_position  = pp_token.base.source_position;
2184                 conditional->skip             = true;
2185                 return;
2186         }
2187
2188         if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
2189                 errorf(&pp_token.base.source_position,
2190                        "expected identifier after #%s, got %K",
2191                        is_ifdef ? "ifdef" : "ifndef", &pp_token);
2192                 eat_pp_directive();
2193
2194                 /* just take the true case in the hope to avoid further errors */
2195                 condition = true;
2196         } else {
2197                 /* evaluate wether we are in true or false case */
2198                 condition = (bool)pp_token.base.symbol->pp_definition == is_ifdef;
2199                 eat_token(T_IDENTIFIER);
2200
2201                 if (!info.at_line_begin) {
2202                         errorf(&pp_token.base.source_position,
2203                                "extra tokens at end of #%s",
2204                                is_ifdef ? "ifdef" : "ifndef");
2205                         eat_pp_directive();
2206                 }
2207         }
2208
2209         pp_conditional_t *conditional = push_conditional();
2210         conditional->source_position  = pp_token.base.source_position;
2211         conditional->condition        = condition;
2212
2213         if (!condition) {
2214                 skip_mode = true;
2215         }
2216 }
2217
2218 static void parse_else_directive(void)
2219 {
2220         eat_pp(TP_else);
2221
2222         if (!info.at_line_begin) {
2223                 if (!skip_mode) {
2224                         warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #else");
2225                 }
2226                 eat_pp_directive();
2227         }
2228
2229         pp_conditional_t *conditional = conditional_stack;
2230         if (conditional == NULL) {
2231                 errorf(&pp_token.base.source_position, "#else without prior #if");
2232                 return;
2233         }
2234
2235         if (conditional->in_else) {
2236                 errorf(&pp_token.base.source_position,
2237                        "#else after #else (condition started %P)",
2238                        &conditional->source_position);
2239                 skip_mode = true;
2240                 return;
2241         }
2242
2243         conditional->in_else = true;
2244         if (!conditional->skip) {
2245                 skip_mode = conditional->condition;
2246         }
2247         conditional->source_position = pp_token.base.source_position;
2248 }
2249
2250 static void parse_endif_directive(void)
2251 {
2252         eat_pp(TP_endif);
2253
2254         if (!info.at_line_begin) {
2255                 if (!skip_mode) {
2256                         warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #endif");
2257                 }
2258                 eat_pp_directive();
2259         }
2260
2261         pp_conditional_t *conditional = conditional_stack;
2262         if (conditional == NULL) {
2263                 errorf(&pp_token.base.source_position, "#endif without prior #if");
2264                 return;
2265         }
2266
2267         if (!conditional->skip) {
2268                 skip_mode = false;
2269         }
2270         pop_conditional();
2271 }
2272
2273 typedef enum stdc_pragma_kind_t {
2274         STDC_UNKNOWN,
2275         STDC_FP_CONTRACT,
2276         STDC_FENV_ACCESS,
2277         STDC_CX_LIMITED_RANGE
2278 } stdc_pragma_kind_t;
2279
2280 typedef enum stdc_pragma_value_kind_t {
2281         STDC_VALUE_UNKNOWN,
2282         STDC_VALUE_ON,
2283         STDC_VALUE_OFF,
2284         STDC_VALUE_DEFAULT
2285 } stdc_pragma_value_kind_t;
2286
2287 static void parse_pragma_directive(void)
2288 {
2289         eat_pp(TP_pragma);
2290         if (skip_mode) {
2291                 eat_pp_directive();
2292                 return;
2293         }
2294
2295         if (pp_token.kind != T_IDENTIFIER) {
2296                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2297                          "expected identifier after #pragma");
2298                 eat_pp_directive();
2299                 return;
2300         }
2301
2302         stdc_pragma_kind_t kind = STDC_UNKNOWN;
2303         if (pp_token.base.symbol->pp_ID == TP_STDC && c_mode & _C99) {
2304                 /* a STDC pragma */
2305                 next_input_token();
2306
2307                 switch (pp_token.base.symbol->pp_ID) {
2308                 case TP_FP_CONTRACT:      kind = STDC_FP_CONTRACT;      break;
2309                 case TP_FENV_ACCESS:      kind = STDC_FENV_ACCESS;      break;
2310                 case TP_CX_LIMITED_RANGE: kind = STDC_CX_LIMITED_RANGE; break;
2311                 default:                  break;
2312                 }
2313                 if (kind != STDC_UNKNOWN) {
2314                         next_input_token();
2315                         stdc_pragma_value_kind_t value;
2316                         switch (pp_token.base.symbol->pp_ID) {
2317                         case TP_ON:      value = STDC_VALUE_ON;      break;
2318                         case TP_OFF:     value = STDC_VALUE_OFF;     break;
2319                         case TP_DEFAULT: value = STDC_VALUE_DEFAULT; break;
2320                         default:         value = STDC_VALUE_UNKNOWN; break;
2321                         }
2322                         if (value == STDC_VALUE_UNKNOWN) {
2323                                 kind = STDC_UNKNOWN;
2324                                 errorf(&pp_token.base.source_position, "bad STDC pragma argument");
2325                         }
2326                 }
2327         }
2328         eat_pp_directive();
2329         if (kind == STDC_UNKNOWN) {
2330                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2331                          "encountered unknown #pragma");
2332         }
2333 }
2334
2335 static void parse_line_directive(void)
2336 {
2337         if (pp_token.kind != T_NUMBER) {
2338                 if (!skip_mode)
2339                         parse_error("expected integer");
2340         } else {
2341                 char      *end;
2342                 long const line = strtol(pp_token.literal.string.begin, &end, 0);
2343                 if (*end == '\0') {
2344                         /* use offset -1 as this is about the next line */
2345                         input.position.lineno = line - 1;
2346                         /* force output of line */
2347                         input.output_line = input.position.lineno - 20;
2348                 } else {
2349                         if (!skip_mode) {
2350                                 errorf(&input.position, "'%S' is not a valid line number",
2351                                            &pp_token.literal.string);
2352                         }
2353                 }
2354                 next_input_token();
2355                 if (info.at_line_begin)
2356                         return;
2357         }
2358         if (pp_token.kind == T_STRING_LITERAL
2359             && pp_token.literal.string.encoding == STRING_ENCODING_CHAR) {
2360                 input.position.input_name       = pp_token.literal.string.begin;
2361                 input.position.is_system_header = false;
2362                 next_input_token();
2363
2364                 /* attempt to parse numeric flags as outputted by gcc preprocessor */
2365                 while (!info.at_line_begin && pp_token.kind == T_NUMBER) {
2366                         /* flags:
2367                          * 1 - indicates start of a new file
2368                          * 2 - indicates return from a file
2369                          * 3 - indicates system header
2370                          * 4 - indicates implicit extern "C" in C++ mode
2371                          *
2372                          * currently we're only interested in "3"
2373                          */
2374                         if (streq(pp_token.literal.string.begin, "3")) {
2375                                 input.position.is_system_header = true;
2376                         }
2377                         next_input_token();
2378                 }
2379         }
2380
2381         eat_pp_directive();
2382 }
2383
2384 static void parse_error_directive(void)
2385 {
2386         if (skip_mode) {
2387                 eat_pp_directive();
2388                 return;
2389         }
2390
2391         bool const old_resolve_escape_sequences = resolve_escape_sequences;
2392         resolve_escape_sequences = false;
2393
2394         source_position_t const pos = pp_token.base.source_position;
2395         do {
2396                 if (info.had_whitespace && obstack_object_size(&pp_obstack) != 0)
2397                         obstack_1grow(&pp_obstack, ' ');
2398
2399                 switch (pp_token.kind) {
2400                 case T_NUMBER: {
2401                         string_t const *const str = &pp_token.literal.string;
2402                         obstack_grow(&pp_obstack, str->begin, str->size);
2403                         break;
2404                 }
2405
2406                 {
2407                         char delim;
2408                 case T_STRING_LITERAL:     delim =  '"'; goto string;
2409                 case T_CHARACTER_CONSTANT: delim = '\''; goto string;
2410 string:;
2411                         string_t const *const str = &pp_token.literal.string;
2412                         char     const *const enc = get_string_encoding_prefix(str->encoding);
2413                         obstack_printf(&pp_obstack, "%s%c%s%c", enc, delim, str->begin, delim);
2414                         break;
2415                 }
2416
2417                 default: {
2418                         char const *const str = pp_token.base.symbol->string;
2419                         obstack_grow(&pp_obstack, str, strlen(str));
2420                         break;
2421                 }
2422                 }
2423
2424                 next_input_token();
2425         } while (!info.at_line_begin);
2426
2427         resolve_escape_sequences = old_resolve_escape_sequences;
2428
2429         obstack_1grow(&pp_obstack, '\0');
2430         char *const str = obstack_finish(&pp_obstack);
2431         errorf(&pos, "#%s", str);
2432         obstack_free(&pp_obstack, str);
2433 }
2434
2435 static void parse_preprocessing_directive(void)
2436 {
2437         eat_token('#');
2438
2439         if (info.at_line_begin) {
2440                 /* empty directive */
2441                 return;
2442         }
2443
2444         if (pp_token.base.symbol) {
2445                 switch (pp_token.base.symbol->pp_ID) {
2446                 case TP_define:       parse_define_directive();            break;
2447                 case TP_else:         parse_else_directive();              break;
2448                 case TP_endif:        parse_endif_directive();             break;
2449                 case TP_error:        parse_error_directive();             break;
2450                 case TP_ifdef:        parse_ifdef_ifndef_directive(true);  break;
2451                 case TP_ifndef:       parse_ifdef_ifndef_directive(false); break;
2452                 case TP_include:      parse_include_directive(false);      break;
2453                 case TP_include_next: parse_include_directive(true);       break;
2454                 case TP_line:         next_input_token(); goto line_directive;
2455                 case TP_pragma:       parse_pragma_directive();            break;
2456                 case TP_undef:        parse_undef_directive();             break;
2457                 default:              goto skip;
2458                 }
2459         } else if (pp_token.kind == T_NUMBER) {
2460 line_directive:
2461                 parse_line_directive();
2462         } else {
2463 skip:
2464                 if (!skip_mode) {
2465                         errorf(&pp_token.base.source_position, "invalid preprocessing directive #%K", &pp_token);
2466                 }
2467                 eat_pp_directive();
2468         }
2469
2470         assert(info.at_line_begin);
2471 }
2472
2473 static void finish_current_argument(void)
2474 {
2475         if (current_argument == NULL)
2476                 return;
2477         size_t size = obstack_object_size(&pp_obstack);
2478         current_argument->list_len   = size/sizeof(current_argument->token_list[0]);
2479         current_argument->token_list = obstack_finish(&pp_obstack);
2480 }
2481
2482 void next_preprocessing_token(void)
2483 {
2484 restart:
2485         if (!expand_next()) {
2486                 do {
2487                         next_input_token();
2488                         while (pp_token.kind == '#' && info.at_line_begin) {
2489                                 parse_preprocessing_directive();
2490                         }
2491                 } while (skip_mode && pp_token.kind != T_EOF);
2492         }
2493
2494         const token_kind_t kind = pp_token.kind;
2495         if (current_call == NULL || argument_expanding != NULL) {
2496                 symbol_t *const symbol = pp_token.base.symbol;
2497                 if (symbol) {
2498                         if (kind == T_MACRO_PARAMETER) {
2499                                 assert(current_expansion != NULL);
2500                                 start_expanding(pp_token.macro_parameter.def);
2501                                 goto restart;
2502                         }
2503
2504                         pp_definition_t *const pp_definition = symbol->pp_definition;
2505                         if (pp_definition != NULL && !pp_definition->is_expanding) {
2506                                 if (pp_definition->has_parameters) {
2507
2508                                         /* check if next token is a '(' */
2509                                         whitespace_info_t old_info   = info;
2510                                         token_kind_t      next_token = peek_expansion();
2511                                         if (next_token == T_EOF) {
2512                                                 info.at_line_begin  = false;
2513                                                 info.had_whitespace = false;
2514                                                 skip_whitespace();
2515                                                 if (input.c == '(') {
2516                                                         next_token = '(';
2517                                                 }
2518                                         }
2519
2520                                         if (next_token == '(') {
2521                                                 if (current_expansion == NULL)
2522                                                         expansion_pos = pp_token.base.source_position;
2523                                                 next_preprocessing_token();
2524                                                 assert(pp_token.kind == '(');
2525
2526                                                 pp_definition->parent_expansion = current_expansion;
2527                                                 current_call              = pp_definition;
2528                                                 current_call->expand_pos  = 0;
2529                                                 current_call->expand_info = old_info;
2530                                                 if (current_call->n_parameters > 0) {
2531                                                         current_argument = &current_call->parameters[0];
2532                                                         assert(argument_brace_count == 0);
2533                                                 }
2534                                                 goto restart;
2535                                         } else {
2536                                                 /* skip_whitespaces() skipped newlines and whitespace,
2537                                                  * remember results for next token */
2538                                                 next_info = info;
2539                                                 info      = old_info;
2540                                                 return;
2541                                         }
2542                                 } else {
2543                                         if (current_expansion == NULL)
2544                                                 expansion_pos = pp_token.base.source_position;
2545                                         start_expanding(pp_definition);
2546                                         goto restart;
2547                                 }
2548                         }
2549                 }
2550         }
2551
2552         if (current_call != NULL) {
2553                 /* current_call != NULL */
2554                 if (kind == '(') {
2555                         ++argument_brace_count;
2556                 } else if (kind == ')') {
2557                         if (argument_brace_count > 0) {
2558                                 --argument_brace_count;
2559                         } else {
2560                                 finish_current_argument();
2561                                 assert(kind == ')');
2562                                 start_expanding(current_call);
2563                                 info = current_call->expand_info;
2564                                 current_call     = NULL;
2565                                 current_argument = NULL;
2566                                 goto restart;
2567                         }
2568                 } else if (kind == ',' && argument_brace_count == 0) {
2569                         finish_current_argument();
2570                         current_call->expand_pos++;
2571                         if (current_call->expand_pos >= current_call->n_parameters) {
2572                                 errorf(&pp_token.base.source_position,
2573                                            "too many arguments passed for macro '%Y'",
2574                                            current_call->symbol);
2575                                 current_argument = NULL;
2576                         } else {
2577                                 current_argument
2578                                         = &current_call->parameters[current_call->expand_pos];
2579                         }
2580                         goto restart;
2581                 } else if (kind == T_MACRO_PARAMETER) {
2582                         /* parameters have to be fully expanded before being used as
2583                          * parameters for another macro-call */
2584                         assert(current_expansion != NULL);
2585                         pp_definition_t *argument = pp_token.macro_parameter.def;
2586                         argument_expanding = argument;
2587                         start_expanding(argument);
2588                         goto restart;
2589                 } else if (kind == T_EOF) {
2590                         errorf(&expansion_pos,
2591                                "reached end of file while parsing arguments for '%Y'",
2592                                current_call->symbol);
2593                         return;
2594                 }
2595                 if (current_argument != NULL) {
2596                         saved_token_t saved;
2597                         saved.token = pp_token;
2598                         saved.had_whitespace = info.had_whitespace;
2599                         obstack_grow(&pp_obstack, &saved, sizeof(saved));
2600                 }
2601                 goto restart;
2602         }
2603 }
2604
2605 void append_include_path(searchpath_t *paths, const char *path)
2606 {
2607         searchpath_entry_t *entry = OALLOCZ(&config_obstack, searchpath_entry_t);
2608         entry->path           = path;
2609         entry->is_system_path = paths->is_system_path;
2610
2611         *paths->anchor = entry;
2612         paths->anchor  = &entry->next;
2613 }
2614
2615 static void append_env_paths(searchpath_t *paths, const char *envvar)
2616 {
2617         const char *val = getenv(envvar);
2618         if (val != NULL && *val != '\0') {
2619                 const char *begin = val;
2620                 const char *c;
2621                 do {
2622                         c = begin;
2623                         while (*c != '\0' && *c != ':')
2624                                 ++c;
2625
2626                         size_t len = c-begin;
2627                         if (len == 0) {
2628                                 /* use "." for gcc compatibility (Matze: I would expect that
2629                                  * nothing happens for an empty entry...) */
2630                                 append_include_path(paths, ".");
2631                         } else {
2632                                 char *const string = obstack_copy0(&config_obstack, begin, len);
2633                                 append_include_path(paths, string);
2634                         }
2635
2636                         begin = c+1;
2637                         /* skip : */
2638                         if (*begin == ':')
2639                                 ++begin;
2640                 } while(*c != '\0');
2641         }
2642 }
2643
2644 static void append_searchpath(searchpath_t *path, const searchpath_t *append)
2645 {
2646         *path->anchor = append->first;
2647 }
2648
2649 static void setup_include_path(void)
2650 {
2651         /* built-in paths */
2652         append_include_path(&system_searchpath, "/usr/include");
2653
2654         /* parse environment variable */
2655         append_env_paths(&bracket_searchpath, "CPATH");
2656         append_env_paths(&system_searchpath,
2657                          c_mode & _CXX ? "CPLUS_INCLUDE_PATH" : "C_INCLUDE_PATH");
2658
2659         /* append system search path to bracket searchpath */
2660         append_searchpath(&system_searchpath,  &after_searchpath);
2661         append_searchpath(&bracket_searchpath, &system_searchpath);
2662         append_searchpath(&quote_searchpath, &bracket_searchpath);
2663 }
2664
2665 static void input_error(unsigned const delta_lines, unsigned const delta_cols, char const *const message)
2666 {
2667         source_position_t pos = pp_token.base.source_position;
2668         pos.lineno += delta_lines;
2669         pos.colno  += delta_cols;
2670         errorf(&pos, "%s", message);
2671 }
2672
2673 void init_include_paths(void)
2674 {
2675         obstack_init(&config_obstack);
2676 }
2677
2678 void init_preprocessor(void)
2679 {
2680         init_symbols();
2681
2682         obstack_init(&pp_obstack);
2683         obstack_init(&input_obstack);
2684         strset_init(&stringset);
2685
2686         setup_include_path();
2687
2688         set_input_error_callback(input_error);
2689 }
2690
2691 void exit_preprocessor(void)
2692 {
2693         obstack_free(&input_obstack, NULL);
2694         obstack_free(&pp_obstack, NULL);
2695         obstack_free(&config_obstack, NULL);
2696
2697         strset_destroy(&stringset);
2698 }
2699
2700 int pptest_main(int argc, char **argv);
2701 int pptest_main(int argc, char **argv)
2702 {
2703         init_symbol_table();
2704         init_include_paths();
2705         init_preprocessor();
2706         init_tokens();
2707
2708         error_on_unknown_chars   = false;
2709         resolve_escape_sequences = false;
2710
2711         /* simplistic commandline parser */
2712         const char *filename = NULL;
2713         const char *output = NULL;
2714         for (int i = 1; i < argc; ++i) {
2715                 const char *opt = argv[i];
2716                 if (streq(opt, "-I")) {
2717                         append_include_path(&bracket_searchpath, argv[++i]);
2718                         continue;
2719                 } else if (streq(opt, "-E")) {
2720                         /* ignore */
2721                 } else if (streq(opt, "-o")) {
2722                         output = argv[++i];
2723                         continue;
2724                 } else if (opt[0] == '-') {
2725                         fprintf(stderr, "Unknown option '%s'\n", opt);
2726                 } else {
2727                         if (filename != NULL)
2728                                 fprintf(stderr, "Multiple inputs not supported\n");
2729                         filename = argv[i];
2730                 }
2731         }
2732         if (filename == NULL) {
2733                 fprintf(stderr, "No input specified\n");
2734                 return 1;
2735         }
2736
2737         if (output == NULL) {
2738                 out = stdout;
2739         } else {
2740                 out = fopen(output, "w");
2741                 if (out == NULL) {
2742                         fprintf(stderr, "Couldn't open output '%s'\n", output);
2743                         return 1;
2744                 }
2745         }
2746
2747         /* just here for gcc compatibility */
2748         fprintf(out, "# 1 \"%s\"\n", filename);
2749         fprintf(out, "# 1 \"<built-in>\"\n");
2750         fprintf(out, "# 1 \"<command-line>\"\n");
2751
2752         FILE *file = fopen(filename, "r");
2753         if (file == NULL) {
2754                 fprintf(stderr, "Couldn't open input '%s'\n", filename);
2755                 return 1;
2756         }
2757         switch_pp_input(file, filename, NULL, false);
2758
2759         for (;;) {
2760                 next_preprocessing_token();
2761                 if (pp_token.kind == T_EOF)
2762                         break;
2763                 emit_pp_token();
2764         }
2765
2766         fputc('\n', out);
2767         check_unclosed_conditionals();
2768         fclose(close_pp_input());
2769         if (out != stdout)
2770                 fclose(out);
2771
2772         exit_tokens();
2773         exit_preprocessor();
2774         exit_symbol_table();
2775
2776         return 0;
2777 }