nsz Git - cparser/blob - preprocessor.c

   1 /*
   2  * This file is part of cparser.
   3  * Copyright (C) 2012 Matthias Braun <matze@braunis.de>
   4  */
   5 #include <config.h>
   6
   7 #include <assert.h>
   8 #include <errno.h>
   9 #include <string.h>
  10 #include <stdbool.h>
  11 #include <ctype.h>
  12
  13 #include "preprocessor.h"
  14 #include "symbol_t.h"
  15 #include "adt/util.h"
  16 #include "adt/error.h"
  17 #include "adt/strutil.h"
  18 #include "adt/strset.h"
  19 #include "lang_features.h"
  20 #include "diagnostic.h"
  21 #include "string_rep.h"
  22 #include "input.h"
  23 #include "symbol_table.h"
  24
  25 #define MAX_PUTBACK 3
  26 #define INCLUDE_LIMIT 199  /* 199 is for gcc "compatibility" */
  27
  28 typedef struct saved_token_t {
  29         token_t token;
  30         bool    had_whitespace;
  31 } saved_token_t;
  32
  33 typedef struct whitespace_info_t {
  34         /** current token had whitespace in front of it */
  35         bool     had_whitespace;
  36         /** current token is at the beginning of a line.
  37          * => a "#" at line begin starts a preprocessing directive. */
  38         bool     at_line_begin;
  39         /** number of spaces before the first token in a line */
  40         unsigned whitespace_at_line_begin;
  41 } whitespace_info_t;
  42
  43 struct pp_definition_t {
  44         symbol_t          *symbol;
  45         position_t         pos;
  46         pp_definition_t   *parent_expansion;
  47         size_t             expand_pos;
  48         whitespace_info_t  expand_info;
  49         bool               is_variadic    : 1;
  50         bool               is_expanding   : 1;
  51         bool               has_parameters : 1;
  52         bool               is_parameter   : 1;
  53         pp_definition_t   *function_definition;
  54         size_t             n_parameters;
  55         pp_definition_t   *parameters;
  56
  57         /* replacement */
  58         size_t             list_len;
  59         saved_token_t     *token_list;
  60 };
  61
  62 typedef struct pp_conditional_t pp_conditional_t;
  63 struct pp_conditional_t {
  64         position_t         pos;
  65         bool               condition;
  66         bool               in_else;
  67         /** conditional in skip mode (then+else gets skipped) */
  68         bool               skip;
  69         pp_conditional_t  *parent;
  70 };
  71
  72 typedef struct pp_input_t pp_input_t;
  73 struct pp_input_t {
  74         FILE               *file;
  75         input_t            *input;
  76         utf32               c;
  77         utf32               buf[1024+MAX_PUTBACK];
  78         const utf32        *bufend;
  79         const utf32        *bufpos;
  80         position_t          pos;
  81         pp_input_t         *parent;
  82         unsigned            output_line;
  83         searchpath_entry_t *path;
  84 };
  85
  86 struct searchpath_entry_t {
  87         const char         *path;
  88         searchpath_entry_t *next;
  89         bool                is_system_path;
  90 };
  91
  92 static pp_input_t      input;
  93
  94 static pp_input_t     *input_stack;
  95 static unsigned        n_inputs;
  96 static struct obstack  input_obstack;
  97
  98 static pp_conditional_t *conditional_stack;
  99
 100 token_t                  pp_token;
 101 bool                     allow_dollar_in_symbol   = true;
 102 static bool              resolve_escape_sequences = true;
 103 static bool              error_on_unknown_chars   = true;
 104 static bool              skip_mode;
 105 static FILE             *out;
 106 static struct obstack    pp_obstack;
 107 static struct obstack    config_obstack;
 108 static const char       *printed_input_name = NULL;
 109 static position_t        expansion_pos;
 110 static pp_definition_t  *current_expansion  = NULL;
 111 static pp_definition_t  *current_call       = NULL;
 112 static pp_definition_t  *current_argument   = NULL;
 113 static pp_definition_t  *argument_expanding = NULL;
 114 static unsigned          argument_brace_count;
 115 static strset_t          stringset;
 116 static token_kind_t      last_token;
 117
 118 struct searchpath_t {
 119         searchpath_entry_t  *first;
 120         searchpath_entry_t **anchor;
 121         bool                 is_system_path;
 122 };
 123
 124 searchpath_t bracket_searchpath = { NULL, &bracket_searchpath.first, false };
 125 searchpath_t quote_searchpath   = { NULL, &quote_searchpath.first,   false };
 126 searchpath_t system_searchpath  = { NULL, &system_searchpath.first,  true  };
 127 searchpath_t after_searchpath   = { NULL, &after_searchpath.first,   true  };
 128
 129 static whitespace_info_t next_info; /* valid if had_whitespace is true */
 130 static whitespace_info_t info;
 131
 132 static inline void next_char(void);
 133 static void next_input_token(void);
 134 static void print_line_directive(const position_t *pos, const char *add);
 135
 136 static symbol_t *symbol_colongreater;
 137 static symbol_t *symbol_lesscolon;
 138 static symbol_t *symbol_lesspercent;
 139 static symbol_t *symbol_percentcolon;
 140 static symbol_t *symbol_percentcolonpercentcolon;
 141 static symbol_t *symbol_percentgreater;
 142
 143 static symbol_t *symbol_L;
 144 static symbol_t *symbol_U;
 145 static symbol_t *symbol_u;
 146 static symbol_t *symbol_u8;
 147
 148 static void init_symbols(void)
 149 {
 150         symbol_colongreater             = symbol_table_insert(":>");
 151         symbol_lesscolon                = symbol_table_insert("<:");
 152         symbol_lesspercent              = symbol_table_insert("<%");
 153         symbol_percentcolon             = symbol_table_insert("%:");
 154         symbol_percentcolonpercentcolon = symbol_table_insert("%:%:");
 155         symbol_percentgreater           = symbol_table_insert("%>");
 156
 157         symbol_L  = symbol_table_insert("L");
 158         symbol_U  = symbol_table_insert("U");
 159         symbol_u  = symbol_table_insert("u");
 160         symbol_u8 = symbol_table_insert("u8");
 161 }
 162
 163 void switch_pp_input(FILE *const file, char const *const filename, searchpath_entry_t *const path, bool const is_system_header)
 164 {
 165         input.file                 = file;
 166         input.input                = input_from_stream(file, NULL);
 167         input.bufend               = NULL;
 168         input.bufpos               = NULL;
 169         input.output_line          = 0;
 170         input.pos.input_name       = filename;
 171         input.pos.lineno           = 1;
 172         input.pos.is_system_header = is_system_header;
 173         input.path                 = path;
 174
 175         /* indicate that we're at a new input */
 176         print_line_directive(&input.pos, input_stack != NULL ? "1" : NULL);
 177
 178         /* place a virtual '\n' so we realize we're at line begin */
 179         input.pos.lineno = 0;
 180         input.c          = '\n';
 181 }
 182
 183 FILE *close_pp_input(void)
 184 {
 185         input_free(input.input);
 186
 187         FILE* const file = input.file;
 188         assert(file);
 189
 190         input.input  = NULL;
 191         input.file   = NULL;
 192         input.bufend = NULL;
 193         input.bufpos = NULL;
 194         input.c      = EOF;
 195
 196         return file;
 197 }
 198
 199 static void push_input(void)
 200 {
 201         pp_input_t *const saved_input = obstack_copy(&input_obstack, &input, sizeof(input));
 202
 203         /* adjust buffer positions */
 204         if (input.bufpos != NULL)
 205                 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
 206         if (input.bufend != NULL)
 207                 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
 208
 209         saved_input->parent = input_stack;
 210         input_stack         = saved_input;
 211         ++n_inputs;
 212 }
 213
 214 static void pop_restore_input(void)
 215 {
 216         assert(n_inputs > 0);
 217         assert(input_stack != NULL);
 218
 219         pp_input_t *saved_input = input_stack;
 220
 221         memcpy(&input, saved_input, sizeof(input));
 222         input.parent = NULL;
 223
 224         /* adjust buffer positions */
 225         if (saved_input->bufpos != NULL)
 226                 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
 227         if (saved_input->bufend != NULL)
 228                 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
 229
 230         input_stack = saved_input->parent;
 231         obstack_free(&input_obstack, saved_input);
 232         --n_inputs;
 233 }
 234
 235 /**
 236  * Prints a parse error message at the current token.
 237  *
 238  * @param msg   the error message
 239  */
 240 static void parse_error(const char *msg)
 241 {
 242         errorf(&pp_token.base.pos,  "%s", msg);
 243 }
 244
 245 static inline void next_real_char(void)
 246 {
 247         assert(input.bufpos <= input.bufend);
 248         if (input.bufpos >= input.bufend) {
 249                 size_t const n = decode(input.input, input.buf + MAX_PUTBACK, lengthof(input.buf) - MAX_PUTBACK);
 250                 if (n == 0) {
 251                         input.c = EOF;
 252                         return;
 253                 }
 254                 input.bufpos = input.buf + MAX_PUTBACK;
 255                 input.bufend = input.bufpos + n;
 256         }
 257         input.c = *input.bufpos++;
 258         ++input.pos.colno;
 259 }
 260
 261 /**
 262  * Put a character back into the buffer.
 263  *
 264  * @param pc  the character to put back
 265  */
 266 static inline void put_back(utf32 const pc)
 267 {
 268         assert(input.bufpos > input.buf);
 269         *(--input.bufpos - input.buf + input.buf) = (char) pc;
 270         --input.pos.colno;
 271 }
 272
 273 #define NEWLINE \
 274         '\r': \
 275                 next_char(); \
 276                 if (input.c == '\n') { \
 277         case '\n': \
 278                         next_char(); \
 279                 } \
 280                 ++input.pos.lineno; \
 281                 input.pos.colno = 1; \
 282                 goto newline; \
 283                 newline // Let it look like an ordinary case label.
 284
 285 #define eat(c_type) (assert(input.c == c_type), next_char())
 286
 287 static void maybe_concat_lines(void)
 288 {
 289         eat('\\');
 290
 291         switch (input.c) {
 292         case NEWLINE:
 293                 info.whitespace_at_line_begin = 0;
 294                 return;
 295
 296         default:
 297                 break;
 298         }
 299
 300         put_back(input.c);
 301         input.c = '\\';
 302 }
 303
 304 /**
 305  * Set c to the next input character, ie.
 306  * after expanding trigraphs.
 307  */
 308 static inline void next_char(void)
 309 {
 310         next_real_char();
 311
 312         /* filter trigraphs and concatenated lines */
 313         if (UNLIKELY(input.c == '\\')) {
 314                 maybe_concat_lines();
 315                 goto end_of_next_char;
 316         }
 317
 318         if (LIKELY(input.c != '?'))
 319                 goto end_of_next_char;
 320
 321         next_real_char();
 322         if (LIKELY(input.c != '?')) {
 323                 put_back(input.c);
 324                 input.c = '?';
 325                 goto end_of_next_char;
 326         }
 327
 328         next_real_char();
 329         switch (input.c) {
 330         case '=': input.c = '#'; break;
 331         case '(': input.c = '['; break;
 332         case '/': input.c = '\\'; maybe_concat_lines(); break;
 333         case ')': input.c = ']'; break;
 334         case '\'': input.c = '^'; break;
 335         case '<': input.c = '{'; break;
 336         case '!': input.c = '|'; break;
 337         case '>': input.c = '}'; break;
 338         case '-': input.c = '~'; break;
 339         default:
 340                 put_back(input.c);
 341                 put_back('?');
 342                 input.c = '?';
 343                 break;
 344         }
 345
 346 end_of_next_char:;
 347 #ifdef DEBUG_CHARS
 348         printf("nchar '%c'\n", input.c);
 349 #endif
 350 }
 351
 352
 353
 354 /**
 355  * Returns true if the given char is a octal digit.
 356  *
 357  * @param char  the character to check
 358  */
 359 static inline bool is_octal_digit(int chr)
 360 {
 361         switch (chr) {
 362         case '0':
 363         case '1':
 364         case '2':
 365         case '3':
 366         case '4':
 367         case '5':
 368         case '6':
 369         case '7':
 370                 return true;
 371         default:
 372                 return false;
 373         }
 374 }
 375
 376 /**
 377  * Returns the value of a digit.
 378  * The only portable way to do it ...
 379  */
 380 static int digit_value(int digit)
 381 {
 382         switch (digit) {
 383         case '0': return 0;
 384         case '1': return 1;
 385         case '2': return 2;
 386         case '3': return 3;
 387         case '4': return 4;
 388         case '5': return 5;
 389         case '6': return 6;
 390         case '7': return 7;
 391         case '8': return 8;
 392         case '9': return 9;
 393         case 'a':
 394         case 'A': return 10;
 395         case 'b':
 396         case 'B': return 11;
 397         case 'c':
 398         case 'C': return 12;
 399         case 'd':
 400         case 'D': return 13;
 401         case 'e':
 402         case 'E': return 14;
 403         case 'f':
 404         case 'F': return 15;
 405         default:
 406                 panic("wrong character given");
 407         }
 408 }
 409
 410 /**
 411  * Parses an octal character sequence.
 412  *
 413  * @param first_digit  the already read first digit
 414  */
 415 static utf32 parse_octal_sequence(const utf32 first_digit)
 416 {
 417         assert(is_octal_digit(first_digit));
 418         utf32 value = digit_value(first_digit);
 419         if (!is_octal_digit(input.c)) return value;
 420         value = 8 * value + digit_value(input.c);
 421         next_char();
 422         if (!is_octal_digit(input.c)) return value;
 423         value = 8 * value + digit_value(input.c);
 424         next_char();
 425         return value;
 426
 427 }
 428
 429 /**
 430  * Parses a hex character sequence.
 431  */
 432 static utf32 parse_hex_sequence(void)
 433 {
 434         utf32 value = 0;
 435         while (isxdigit(input.c)) {
 436                 value = 16 * value + digit_value(input.c);
 437                 next_char();
 438         }
 439         return value;
 440 }
 441
 442 static bool is_universal_char_valid(utf32 const v)
 443 {
 444         /* C11 §6.4.3:2 */
 445         if (v < 0xA0U && v != 0x24 && v != 0x40 && v != 0x60)
 446                 return false;
 447         if (0xD800 <= v && v <= 0xDFFF)
 448                 return false;
 449         return true;
 450 }
 451
 452 static utf32 parse_universal_char(unsigned const n_digits)
 453 {
 454         utf32 v = 0;
 455         for (unsigned k = n_digits; k != 0; --k) {
 456                 if (isxdigit(input.c)) {
 457                         v = 16 * v + digit_value(input.c);
 458                         if (!resolve_escape_sequences)
 459                                 obstack_1grow(&symbol_obstack, input.c);
 460                         next_char();
 461                 } else {
 462                         errorf(&input.pos,
 463                                "short universal character name, expected %u more digits",
 464                                    k);
 465                         break;
 466                 }
 467         }
 468         if (!is_universal_char_valid(v)) {
 469                 errorf(&input.pos,
 470                        "\\%c%0*X is not a valid universal character name",
 471                        n_digits == 4 ? 'u' : 'U', (int)n_digits, v);
 472         }
 473         return v;
 474 }
 475
 476 static bool is_universal_char_valid_identifier_c99(utf32 const v)
 477 {
 478         static const utf32 single_chars[] = {
 479                 0x00AA, 0x00BA, 0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0,
 480                 0x1F59, 0x1F5B, 0x1F5D, 0x05BF, 0x09B2, 0x0A02, 0x0A5E, 0x0A74,
 481                 0x0A8D, 0x0AD0, 0x0AE0, 0x0B9C, 0x0CDE, 0x0E84, 0x0E8A, 0x0E8D,
 482                 0x0EA5, 0x0EA7, 0x0EC6, 0x0F00, 0x0F35, 0x0F37, 0x0F39, 0x0F97,
 483                 0x0FB9, 0x00B5, 0x00B7, 0x02BB, 0x037A, 0x0559, 0x093D, 0x0B3D,
 484                 0x1FBE, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128
 485         };
 486
 487         static const utf32 ranges[][2] = {
 488                 {0x00C0, 0x00D6}, {0x00D8, 0x00F6}, {0x00F8, 0x01F5}, {0x01FA, 0x0217},
 489                 {0x0250, 0x02A8}, {0x1E00, 0x1E9B}, {0x1EA0, 0x1EF9}, {0x0388, 0x038A},
 490                 {0x038E, 0x03A1}, {0x03A3, 0x03CE}, {0x03D0, 0x03D6}, {0x03E2, 0x03F3},
 491                 {0x1F00, 0x1F15}, {0x1F18, 0x1F1D}, {0x1F20, 0x1F45}, {0x1F48, 0x1F4D},
 492                 {0x1F50, 0x1F57}, {0x1F5F, 0x1F7D}, {0x1F80, 0x1FB4}, {0x1FB6, 0x1FBC},
 493                 {0x1FC2, 0x1FC4}, {0x1FC6, 0x1FCC}, {0x1FD0, 0x1FD3}, {0x1FD6, 0x1FDB},
 494                 {0x1FE0, 0x1FEC}, {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFC}, {0x0401, 0x040C},
 495                 {0x040E, 0x044F}, {0x0451, 0x045C}, {0x045E, 0x0481}, {0x0490, 0x04C4},
 496                 {0x04C7, 0x04C8}, {0x04CB, 0x04CC}, {0x04D0, 0x04EB}, {0x04EE, 0x04F5},
 497                 {0x04F8, 0x04F9}, {0x0531, 0x0556}, {0x0561, 0x0587}, {0x05B0, 0x05B9},
 498                 {0x05BB, 0x05BD}, {0x05C1, 0x05C2}, {0x05D0, 0x05EA}, {0x05F0, 0x05F2},
 499                 {0x0621, 0x063A}, {0x0640, 0x0652}, {0x0670, 0x06B7}, {0x06BA, 0x06BE},
 500                 {0x06C0, 0x06CE}, {0x06D0, 0x06DC}, {0x06E5, 0x06E8}, {0x06EA, 0x06ED},
 501                 {0x0901, 0x0903}, {0x0905, 0x0939}, {0x093E, 0x094D}, {0x0950, 0x0952},
 502                 {0x0958, 0x0963}, {0x0981, 0x0983}, {0x0985, 0x098C}, {0x098F, 0x0990},
 503                 {0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B6, 0x09B9}, {0x09BE, 0x09C4},
 504                 {0x09C7, 0x09C8}, {0x09CB, 0x09CD}, {0x09DC, 0x09DD}, {0x09DF, 0x09E3},
 505                 {0x09F0, 0x09F1}, {0x0A05, 0x0A0A}, {0x0A0F, 0x0A10}, {0x0A13, 0x0A28},
 506                 {0x0A2A, 0x0A30}, {0x0A32, 0x0A33}, {0x0A35, 0x0A36}, {0x0A38, 0x0A39},
 507                 {0x0A3E, 0x0A42}, {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A59, 0x0A5C},
 508                 {0x0A81, 0x0A83}, {0x0A85, 0x0A8B}, {0x0A8F, 0x0A91}, {0x0A93, 0x0AA8},
 509                 {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3}, {0x0AB5, 0x0AB9}, {0x0ABD, 0x0AC5},
 510                 {0x0AC7, 0x0AC9}, {0x0ACB, 0x0ACD}, {0x0B01, 0x0B03}, {0x0B05, 0x0B0C},
 511                 {0x0B0F, 0x0B10}, {0x0B13, 0x0B28}, {0x0B2A, 0x0B30}, {0x0B32, 0x0B33},
 512                 {0x0B36, 0x0B39}, {0x0B3E, 0x0B43}, {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D},
 513                 {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B61}, {0x0B82, 0x0B83}, {0x0B85, 0x0B8A},
 514                 {0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, {0x0B99, 0x0B9A}, {0x0B9E, 0x0B9F},
 515                 {0x0BA3, 0x0BA4}, {0x0BA8, 0x0BAA}, {0x0BAE, 0x0BB5}, {0x0BB7, 0x0BB9},
 516                 {0x0BBE, 0x0BC2}, {0x0BC6, 0x0BC8}, {0x0BCA, 0x0BCD}, {0x0C01, 0x0C03},
 517                 {0x0C05, 0x0C0C}, {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C33},
 518                 {0x0C35, 0x0C39}, {0x0C3E, 0x0C44}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D},
 519                 {0x0C60, 0x0C61}, {0x0C82, 0x0C83}, {0x0C85, 0x0C8C}, {0x0C8E, 0x0C90},
 520                 {0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9}, {0x0CBE, 0x0CC4},
 521                 {0x0CC6, 0x0CC8}, {0x0CCA, 0x0CCD}, {0x0CE0, 0x0CE1}, {0x0D02, 0x0D03},
 522                 {0x0D05, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D28}, {0x0D2A, 0x0D39},
 523                 {0x0D3E, 0x0D43}, {0x0D46, 0x0D48}, {0x0D4A, 0x0D4D}, {0x0D60, 0x0D61},
 524                 {0x0E01, 0x0E3A}, {0x0E40, 0x0E5B}, {0x0E81, 0x0E82}, {0x0E87, 0x0E88},
 525                 {0x0E94, 0x0E97}, {0x0E99, 0x0E9F}, {0x0EA1, 0x0EA3}, {0x0EAA, 0x0EAB},
 526                 {0x0EAD, 0x0EAE}, {0x0EB0, 0x0EB9}, {0x0EBB, 0x0EBD}, {0x0EC0, 0x0EC4},
 527                 {0x0EC8, 0x0ECD}, {0x0EDC, 0x0EDD}, {0x0F18, 0x0F19}, {0x0F3E, 0x0F47},
 528                 {0x0F49, 0x0F69}, {0x0F71, 0x0F84}, {0x0F86, 0x0F8B}, {0x0F90, 0x0F95},
 529                 {0x0F99, 0x0FAD}, {0x0FB1, 0x0FB7}, {0x10A0, 0x10C5}, {0x10D0, 0x10F6},
 530                 {0x3041, 0x3093}, {0x309B, 0x309C}, {0x30A1, 0x30F6}, {0x30FB, 0x30FC},
 531                 {0x3105, 0x312C}, {0x4E00, 0x9FA5}, {0xAC00, 0xD7A3}, {0x0660, 0x0669},
 532                 {0x06F0, 0x06F9}, {0x0966, 0x096F}, {0x09E6, 0x09EF}, {0x0A66, 0x0A6F},
 533                 {0x0AE6, 0x0AEF}, {0x0B66, 0x0B6F}, {0x0BE7, 0x0BEF}, {0x0C66, 0x0C6F},
 534                 {0x0CE6, 0x0CEF}, {0x0D66, 0x0D6F}, {0x0E50, 0x0E59}, {0x0ED0, 0x0ED9},
 535                 {0x0F20, 0x0F33}, {0x02B0, 0x02B8}, {0x02BD, 0x02C1}, {0x02D0, 0x02D1},
 536                 {0x02E0, 0x02E4}, {0x203F, 0x2040}, {0x210A, 0x2113}, {0x2118, 0x211D},
 537                 {0x212A, 0x2131}, {0x2133, 0x2138}, {0x2160, 0x2182}, {0x3005, 0x3007},
 538                 {0x3021, 0x3029},
 539         };
 540         for (size_t i = 0; i < sizeof(ranges)/sizeof(ranges[0]); ++i) {
 541                 if (ranges[i][0] <= v && v <= ranges[i][1])
 542                         return true;
 543         }
 544         for (size_t i = 0; i < sizeof(single_chars)/sizeof(single_chars[0]); ++i) {
 545                 if (v == single_chars[i])
 546                         return true;
 547         }
 548         return false;
 549 }
 550
 551 static bool is_universal_char_valid_identifier_c11(utf32 const v)
 552 {
 553         /* C11 Annex D.1 */
 554         if (                v == 0x000A8) return true;
 555         if (                v == 0x000AA) return true;
 556         if (                v == 0x000AD) return true;
 557         if (                v == 0x000AF) return true;
 558         if (0x000B2 <= v && v <= 0x000B5) return true;
 559         if (0x000B7 <= v && v <= 0x000BA) return true;
 560         if (0x000BC <= v && v <= 0x000BE) return true;
 561         if (0x000C0 <= v && v <= 0x000D6) return true;
 562         if (0x000D8 <= v && v <= 0x000F6) return true;
 563         if (0x000F8 <= v && v <= 0x000FF) return true;
 564         if (0x00100 <= v && v <= 0x0167F) return true;
 565         if (0x01681 <= v && v <= 0x0180D) return true;
 566         if (0x0180F <= v && v <= 0x01FFF) return true;
 567         if (0x0200B <= v && v <= 0x0200D) return true;
 568         if (0x0202A <= v && v <= 0x0202E) return true;
 569         if (0x0203F <= v && v <= 0x02040) return true;
 570         if (                v == 0x02054) return true;
 571         if (0x02060 <= v && v <= 0x0206F) return true;
 572         if (0x02070 <= v && v <= 0x0218F) return true;
 573         if (0x02460 <= v && v <= 0x024FF) return true;
 574         if (0x02776 <= v && v <= 0x02793) return true;
 575         if (0x02C00 <= v && v <= 0x02DFF) return true;
 576         if (0x02E80 <= v && v <= 0x02FFF) return true;
 577         if (0x03004 <= v && v <= 0x03007) return true;
 578         if (0x03021 <= v && v <= 0x0302F) return true;
 579         if (0x03031 <= v && v <= 0x0303F) return true;
 580         if (0x03040 <= v && v <= 0x0D7FF) return true;
 581         if (0x0F900 <= v && v <= 0x0FD3D) return true;
 582         if (0x0FD40 <= v && v <= 0x0FDCF) return true;
 583         if (0x0FDF0 <= v && v <= 0x0FE44) return true;
 584         if (0x0FE47 <= v && v <= 0x0FFFD) return true;
 585         if (0x10000 <= v && v <= 0x1FFFD) return true;
 586         if (0x20000 <= v && v <= 0x2FFFD) return true;
 587         if (0x30000 <= v && v <= 0x3FFFD) return true;
 588         if (0x40000 <= v && v <= 0x4FFFD) return true;
 589         if (0x50000 <= v && v <= 0x5FFFD) return true;
 590         if (0x60000 <= v && v <= 0x6FFFD) return true;
 591         if (0x70000 <= v && v <= 0x7FFFD) return true;
 592         if (0x80000 <= v && v <= 0x8FFFD) return true;
 593         if (0x90000 <= v && v <= 0x9FFFD) return true;
 594         if (0xA0000 <= v && v <= 0xAFFFD) return true;
 595         if (0xB0000 <= v && v <= 0xBFFFD) return true;
 596         if (0xC0000 <= v && v <= 0xCFFFD) return true;
 597         if (0xD0000 <= v && v <= 0xDFFFD) return true;
 598         if (0xE0000 <= v && v <= 0xEFFFD) return true;
 599         return false;
 600 }
 601
 602 static bool is_universal_char_valid_identifier(utf32 const v)
 603 {
 604         if (c_mode & _C11)
 605                 return is_universal_char_valid_identifier_c11(v);
 606         return is_universal_char_valid_identifier_c99(v);
 607 }
 608
 609 static bool is_universal_char_invalid_identifier_start(utf32 const v)
 610 {
 611         if (! (c_mode & _C11))
 612                 return false;
 613
 614         /* C11 Annex D.2 */
 615         if (0x0300 <= v && v <= 0x036F) return true;
 616         if (0x1DC0 <= v && v <= 0x1DFF) return true;
 617         if (0x20D0 <= v && v <= 0x20FF) return true;
 618         if (0xFE20 <= v && v <= 0xFE2F) return true;
 619         return false;
 620 }
 621
 622 /**
 623  * Parse an escape sequence.
 624  */
 625 static utf32 parse_escape_sequence(void)
 626 {
 627         eat('\\');
 628
 629         utf32 const ec = input.c;
 630         next_char();
 631
 632         switch (ec) {
 633         case '"':  return '"';
 634         case '\'': return '\'';
 635         case '\\': return '\\';
 636         case '?': return '\?';
 637         case 'a': return '\a';
 638         case 'b': return '\b';
 639         case 'f': return '\f';
 640         case 'n': return '\n';
 641         case 'r': return '\r';
 642         case 't': return '\t';
 643         case 'v': return '\v';
 644         case 'x':
 645                 return parse_hex_sequence();
 646         case '0':
 647         case '1':
 648         case '2':
 649         case '3':
 650         case '4':
 651         case '5':
 652         case '6':
 653         case '7':
 654                 return parse_octal_sequence(ec);
 655         case EOF:
 656                 parse_error("reached end of file while parsing escape sequence");
 657                 return EOF;
 658         /* \E is not documented, but handled, by GCC.  It is acceptable according
 659          * to §6.11.4, whereas \e is not. */
 660         case 'E':
 661         case 'e':
 662                 if (c_mode & _GNUC)
 663                         return 27;   /* hopefully 27 is ALWAYS the code for ESCAPE */
 664                 break;
 665
 666         case 'U': return parse_universal_char(8);
 667         case 'u': return parse_universal_char(4);
 668
 669         default:
 670                 break;
 671         }
 672         /* §6.4.4.4:8 footnote 64 */
 673         parse_error("unknown escape sequence");
 674         return EOF;
 675 }
 676
 677 static const char *identify_string(char *string)
 678 {
 679         const char *result = strset_insert(&stringset, string);
 680         if (result != string) {
 681                 obstack_free(&symbol_obstack, string);
 682         }
 683         return result;
 684 }
 685
 686 static string_t sym_make_string(string_encoding_t const enc)
 687 {
 688         obstack_1grow(&symbol_obstack, '\0');
 689         size_t      const len    = obstack_object_size(&symbol_obstack) - 1;
 690         char       *const string = obstack_finish(&symbol_obstack);
 691         char const *const result = identify_string(string);
 692         return (string_t){ result, len, enc };
 693 }
 694
 695 string_t make_string(char const *const string)
 696 {
 697         obstack_grow(&symbol_obstack, string, strlen(string));
 698         return sym_make_string(STRING_ENCODING_CHAR);
 699 }
 700
 701 static utf32 get_string_encoding_limit(string_encoding_t const enc)
 702 {
 703         switch (enc) {
 704         case STRING_ENCODING_CHAR:   return 0xFF;
 705         case STRING_ENCODING_CHAR16: return 0xFFFF;
 706         case STRING_ENCODING_CHAR32: return 0xFFFFFFFF;
 707         case STRING_ENCODING_UTF8:   return 0xFFFFFFFF;
 708         case STRING_ENCODING_WIDE:   return 0xFFFFFFFF; // FIXME depends on settings
 709         }
 710         panic("invalid string encoding");
 711 }
 712
 713 static void parse_string(utf32 const delimiter, token_kind_t const kind,
 714                          string_encoding_t const enc,
 715                          char const *const context)
 716 {
 717         eat(delimiter);
 718
 719         utf32 const limit = get_string_encoding_limit(enc);
 720         while (true) {
 721                 switch (input.c) {
 722                 case '\\': {
 723                         if (resolve_escape_sequences) {
 724                                 utf32 const tc = parse_escape_sequence();
 725                                 if (tc > limit) {
 726                                         warningf(WARN_OTHER, &pp_token.base.pos,
 727                                                  "escape sequence out of range");
 728                                 }
 729                                 if (enc == STRING_ENCODING_CHAR) {
 730                                         obstack_1grow(&symbol_obstack, tc);
 731                                 } else {
 732                                         obstack_grow_utf8(&symbol_obstack, tc);
 733                                 }
 734                         } else {
 735                                 obstack_1grow(&symbol_obstack, (char)input.c);
 736                                 next_char();
 737                                 obstack_1grow(&symbol_obstack, (char)input.c);
 738                                 next_char();
 739                         }
 740                         break;
 741                 }
 742
 743                 case NEWLINE:
 744                         errorf(&pp_token.base.pos, "newline while parsing %s", context);
 745                         break;
 746
 747                 case EOF:
 748                         errorf(&pp_token.base.pos, "EOF while parsing %s", context);
 749                         goto end_of_string;
 750
 751                 default:
 752                         if (input.c == delimiter) {
 753                                 next_char();
 754                                 goto end_of_string;
 755                         } else {
 756                                 obstack_grow_utf8(&symbol_obstack, input.c);
 757                                 next_char();
 758                                 break;
 759                         }
 760                 }
 761         }
 762
 763 end_of_string:
 764         pp_token.kind           = kind;
 765         pp_token.literal.string = sym_make_string(enc);
 766 }
 767
 768 static void parse_string_literal(string_encoding_t const enc)
 769 {
 770         parse_string('"', T_STRING_LITERAL, enc, "string literal");
 771 }
 772
 773 static void parse_character_constant(string_encoding_t const enc)
 774 {
 775         parse_string('\'', T_CHARACTER_CONSTANT, enc, "character constant");
 776         if (pp_token.literal.string.size == 0) {
 777                 parse_error("empty character constant");
 778         }
 779 }
 780
 781 #define SYMBOL_CASES_WITHOUT_E_P \
 782              '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
 783         case 'a': \
 784         case 'b': \
 785         case 'c': \
 786         case 'd': \
 787         case 'f': \
 788         case 'g': \
 789         case 'h': \
 790         case 'i': \
 791         case 'j': \
 792         case 'k': \
 793         case 'l': \
 794         case 'm': \
 795         case 'n': \
 796         case 'o': \
 797         case 'q': \
 798         case 'r': \
 799         case 's': \
 800         case 't': \
 801         case 'u': \
 802         case 'v': \
 803         case 'w': \
 804         case 'x': \
 805         case 'y': \
 806         case 'z': \
 807         case 'A': \
 808         case 'B': \
 809         case 'C': \
 810         case 'D': \
 811         case 'F': \
 812         case 'G': \
 813         case 'H': \
 814         case 'I': \
 815         case 'J': \
 816         case 'K': \
 817         case 'L': \
 818         case 'M': \
 819         case 'N': \
 820         case 'O': \
 821         case 'Q': \
 822         case 'R': \
 823         case 'S': \
 824         case 'T': \
 825         case 'U': \
 826         case 'V': \
 827         case 'W': \
 828         case 'X': \
 829         case 'Y': \
 830         case 'Z': \
 831         case '_'
 832
 833 #define SYMBOL_CASES \
 834              SYMBOL_CASES_WITHOUT_E_P: \
 835         case 'e': \
 836         case 'p': \
 837         case 'E': \
 838         case 'P'
 839
 840 #define DIGIT_CASES \
 841              '0':  \
 842         case '1':  \
 843         case '2':  \
 844         case '3':  \
 845         case '4':  \
 846         case '5':  \
 847         case '6':  \
 848         case '7':  \
 849         case '8':  \
 850         case '9'
 851
 852 static void start_expanding(pp_definition_t *definition)
 853 {
 854         definition->parent_expansion = current_expansion;
 855         definition->expand_pos       = 0;
 856         definition->is_expanding     = true;
 857         if (definition->list_len > 0) {
 858                 definition->token_list[0].had_whitespace
 859                         = info.had_whitespace;
 860         }
 861         current_expansion = definition;
 862 }
 863
 864 static void finished_expanding(pp_definition_t *definition)
 865 {
 866         assert(definition->is_expanding);
 867         pp_definition_t *parent = definition->parent_expansion;
 868         definition->parent_expansion = NULL;
 869         definition->is_expanding     = false;
 870
 871         /* stop further expanding once we expanded a parameter used in a
 872          * sub macro-call */
 873         if (definition == argument_expanding)
 874                 argument_expanding = NULL;
 875
 876         assert(current_expansion == definition);
 877         current_expansion = parent;
 878 }
 879
 880 static void grow_string_escaped(struct obstack *obst, const string_t *string, char const *delimiter)
 881 {
 882         char const *prefix = get_string_encoding_prefix(string->encoding);
 883         obstack_printf(obst, "%s%s", prefix, delimiter);
 884         size_t      size = string->size;
 885         const char *str  = string->begin;
 886         if (resolve_escape_sequences) {
 887                 obstack_grow(obst, str, size);
 888         } else {
 889                 for (size_t i = 0; i < size; ++i) {
 890                         const char c = str[i];
 891                         if (c == '\\' || c == '"')
 892                                 obstack_1grow(obst, '\\');
 893                         obstack_1grow(obst, c);
 894                 }
 895         }
 896         obstack_printf(obst, "%s", delimiter);
 897 }
 898
 899 static void grow_token(struct obstack *obst, const token_t *token)
 900 {
 901         switch (token->kind) {
 902         case T_NUMBER:
 903                 obstack_grow(obst, token->literal.string.begin, token->literal.string.size);
 904                 break;
 905
 906         case T_STRING_LITERAL: {
 907                 char const *const delimiter = resolve_escape_sequences ? "\"" : "\\\"";
 908                 grow_string_escaped(obst, &token->literal.string, delimiter);
 909                 break;
 910         }
 911
 912         case T_CHARACTER_CONSTANT:
 913                 grow_string_escaped(obst, &token->literal.string, "'");
 914                 break;
 915
 916         case T_IDENTIFIER:
 917         default: {
 918                 const char *str = token->base.symbol->string;
 919                 size_t      len = strlen(str);
 920                 obstack_grow(obst, str, len);
 921                 break;
 922         }
 923         }
 924 }
 925
 926 static void stringify(const pp_definition_t *definition)
 927 {
 928         assert(obstack_object_size(&symbol_obstack) == 0);
 929
 930         size_t list_len = definition->list_len;
 931         for (size_t p = 0; p < list_len; ++p) {
 932                 const saved_token_t *saved = &definition->token_list[p];
 933                 if (p > 0 && saved->had_whitespace)
 934                         obstack_1grow(&symbol_obstack, ' ');
 935                 grow_token(&symbol_obstack, &saved->token);
 936         }
 937         pp_token.kind           = T_STRING_LITERAL;
 938         pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
 939 }
 940
 941 static inline void set_punctuator(token_kind_t const kind)
 942 {
 943         pp_token.kind        = kind;
 944         pp_token.base.symbol = token_symbols[kind];
 945 }
 946
 947 static inline void set_digraph(token_kind_t const kind, symbol_t *const symbol)
 948 {
 949         pp_token.kind        = kind;
 950         pp_token.base.symbol = symbol;
 951 }
 952
 953 /**
 954  * returns next final token from a preprocessor macro expansion
 955  */
 956 static bool expand_next(void)
 957 {
 958         if (current_expansion == NULL)
 959                 return false;
 960
 961 restart:;
 962         size_t pos = current_expansion->expand_pos;
 963         if (pos >= current_expansion->list_len) {
 964                 finished_expanding(current_expansion);
 965                 /* it was the outermost expansion, parse pptoken normally */
 966                 if (current_expansion == NULL) {
 967                         return false;
 968                 }
 969                 goto restart;
 970         }
 971         const saved_token_t *saved = &current_expansion->token_list[pos++];
 972         pp_token = saved->token;
 973         if (pp_token.kind == '#') {
 974                 if (pos < current_expansion->list_len) {
 975                         const saved_token_t *next = &current_expansion->token_list[pos];
 976                         if (next->token.kind == T_MACRO_PARAMETER) {
 977                                 pp_definition_t *def = next->token.macro_parameter.def;
 978                                 assert(def != NULL && def->is_parameter);
 979                                 stringify(def);
 980                                 ++pos;
 981                         }
 982                 }
 983         }
 984
 985         if (current_expansion->expand_pos > 0)
 986                 info.had_whitespace = saved->had_whitespace;
 987         current_expansion->expand_pos = pos;
 988         pp_token.base.pos             = expansion_pos;
 989
 990         return true;
 991 }
 992
 993 /**
 994  * Returns the next token kind found when continuing the current expansions
 995  * without starting new sub-expansions.
 996  */
 997 static token_kind_t peek_expansion(void)
 998 {
 999         for (pp_definition_t *e = current_expansion; e; e = e->parent_expansion) {
1000                 if (e->expand_pos < e->list_len)
1001                         return e->token_list[e->expand_pos].token.kind;
1002         }
1003         return T_EOF;
1004 }
1005
1006 static void skip_line_comment(void)
1007 {
1008         info.had_whitespace = true;
1009         while (true) {
1010                 switch (input.c) {
1011                 case EOF:
1012                         return;
1013
1014                 case '\r':
1015                 case '\n':
1016                         return;
1017
1018                 default:
1019                         next_char();
1020                         break;
1021                 }
1022         }
1023 }
1024
1025 static void skip_multiline_comment(void)
1026 {
1027         info.had_whitespace = true;
1028
1029         position_t const start_pos = input.pos;
1030         while (true) {
1031                 switch (input.c) {
1032                 case '/':
1033                         next_char();
1034                         if (input.c == '*') {
1035                                 /* TODO: nested comment, warn here */
1036                         }
1037                         break;
1038                 case '*':
1039                         next_char();
1040                         if (input.c == '/') {
1041                                 if (input.pos.lineno != input.output_line)
1042                                         info.whitespace_at_line_begin = input.pos.colno;
1043                                 next_char();
1044                                 return;
1045                         }
1046                         break;
1047
1048                 case NEWLINE:
1049                         break;
1050
1051                 case EOF:
1052                         errorf(&start_pos, "at end of file while looking for comment end");
1053                         return;
1054
1055                 default:
1056                         next_char();
1057                         break;
1058                 }
1059         }
1060 }
1061
1062 static bool skip_till_newline(bool stop_at_non_whitespace)
1063 {
1064         bool res = false;
1065         while (true) {
1066                 switch (input.c) {
1067                 case ' ':
1068                 case '\t':
1069                         next_char();
1070                         continue;
1071
1072                 case '/':
1073                         next_char();
1074                         if (input.c == '/') {
1075                                 next_char();
1076                                 skip_line_comment();
1077                                 continue;
1078                         } else if (input.c == '*') {
1079                                 next_char();
1080                                 skip_multiline_comment();
1081                                 continue;
1082                         } else {
1083                                 put_back(input.c);
1084                                 input.c = '/';
1085                         }
1086                         return true;
1087
1088                 case NEWLINE:
1089                         return res;
1090
1091                 default:
1092                         if (stop_at_non_whitespace)
1093                                 return false;
1094                         res = true;
1095                         next_char();
1096                         continue;
1097                 }
1098         }
1099 }
1100
1101 static void skip_whitespace(void)
1102 {
1103         while (true) {
1104                 switch (input.c) {
1105                 case ' ':
1106                 case '\t':
1107                         ++info.whitespace_at_line_begin;
1108                         info.had_whitespace = true;
1109                         next_char();
1110                         continue;
1111
1112                 case NEWLINE:
1113                         info.at_line_begin  = true;
1114                         info.had_whitespace = true;
1115                         info.whitespace_at_line_begin = 0;
1116                         continue;
1117
1118                 case '/':
1119                         next_char();
1120                         if (input.c == '/') {
1121                                 next_char();
1122                                 skip_line_comment();
1123                                 continue;
1124                         } else if (input.c == '*') {
1125                                 next_char();
1126                                 skip_multiline_comment();
1127                                 continue;
1128                         } else {
1129                                 put_back(input.c);
1130                                 input.c = '/';
1131                         }
1132                         return;
1133
1134                 default:
1135                         return;
1136                 }
1137         }
1138 }
1139
1140 static inline void eat_pp(pp_token_kind_t const kind)
1141 {
1142         assert(pp_token.base.symbol->pp_ID == kind);
1143         (void) kind;
1144         next_input_token();
1145 }
1146
1147 static inline void eat_token(token_kind_t const kind)
1148 {
1149         assert(pp_token.kind == kind);
1150         (void)kind;
1151         next_input_token();
1152 }
1153
1154 static string_encoding_t identify_encoding_prefix(symbol_t *const sym)
1155 {
1156         if (sym == symbol_L) return STRING_ENCODING_WIDE;
1157         if (c_mode & _C11) {
1158                 if (sym == symbol_U)  return STRING_ENCODING_CHAR32;
1159                 if (sym == symbol_u)  return STRING_ENCODING_CHAR16;
1160                 if (sym == symbol_u8) return STRING_ENCODING_UTF8;
1161         }
1162         return STRING_ENCODING_CHAR;
1163 }
1164
1165 static void parse_symbol(void)
1166 {
1167         assert(obstack_object_size(&symbol_obstack) == 0);
1168         while (true) {
1169                 switch (input.c) {
1170                 case DIGIT_CASES:
1171                 case SYMBOL_CASES:
1172                         obstack_1grow(&symbol_obstack, (char) input.c);
1173                         next_char();
1174                         break;
1175
1176                 case '\\':
1177                         next_char();
1178                         switch (input.c) {
1179                         {
1180                                 unsigned n;
1181                         case 'U': n = 8; goto universal;
1182                         case 'u': n = 4; goto universal;
1183 universal:
1184                                 if (!resolve_escape_sequences) {
1185                                         obstack_1grow(&symbol_obstack, '\\');
1186                                         obstack_1grow(&symbol_obstack, input.c);
1187                                 }
1188                                 next_char();
1189                                 utf32 const v = parse_universal_char(n);
1190                                 if (!is_universal_char_valid_identifier(v)) {
1191                                         if (is_universal_char_valid(v)) {
1192                                                 errorf(&input.pos,
1193                                                            "universal character \\%c%0*X is not valid in an identifier",
1194                                                            n == 4 ? 'u' : 'U', (int)n, v);
1195                                         }
1196                                 } else if (obstack_object_size(&symbol_obstack) == 0 && is_universal_char_invalid_identifier_start(v)) {
1197                                         errorf(&input.pos,
1198                                                    "universal character \\%c%0*X is not valid as start of an identifier",
1199                                                    n == 4 ? 'u' : 'U', (int)n, v);
1200                                 } else if (resolve_escape_sequences) {
1201                                         obstack_grow_utf8(&symbol_obstack, v);
1202                                 }
1203                                 break;
1204                         }
1205
1206                         default:
1207                                 put_back(input.c);
1208                                 input.c = '\\';
1209                                 goto end_symbol;
1210                         }
1211
1212                 default:
1213 dollar_sign:
1214                         goto end_symbol;
1215                 }
1216         }
1217
1218 end_symbol:
1219         obstack_1grow(&symbol_obstack, '\0');
1220         char *string = obstack_finish(&symbol_obstack);
1221
1222         symbol_t *symbol = symbol_table_insert(string);
1223
1224         /* Might be a prefixed string or character constant: L/U/u/u8"string". */
1225         if (input.c == '"') {
1226                 string_encoding_t const enc = identify_encoding_prefix(symbol);
1227                 if (enc != STRING_ENCODING_CHAR) {
1228                         parse_string_literal(enc);
1229                         return;
1230                 }
1231         } else if (input.c == '\'') {
1232                 string_encoding_t const enc = identify_encoding_prefix(symbol);
1233                 if (enc != STRING_ENCODING_CHAR) {
1234                         if (enc == STRING_ENCODING_UTF8) {
1235                                 errorf(&pp_token.base.pos,
1236                                        "'u8' is not a valid encoding for a chracter constant");
1237                         }
1238                         parse_character_constant(enc);
1239                         return;
1240                 }
1241         }
1242
1243         pp_token.kind        = symbol->ID;
1244         pp_token.base.symbol = symbol;
1245
1246         /* we can free the memory from symbol obstack if we already had an entry in
1247          * the symbol table */
1248         if (symbol->string != string) {
1249                 obstack_free(&symbol_obstack, string);
1250         }
1251 }
1252
1253 static void parse_number(void)
1254 {
1255         obstack_1grow(&symbol_obstack, (char) input.c);
1256         next_char();
1257
1258         while (true) {
1259                 switch (input.c) {
1260                 case '.':
1261                 case DIGIT_CASES:
1262                 case SYMBOL_CASES_WITHOUT_E_P:
1263                         obstack_1grow(&symbol_obstack, (char) input.c);
1264                         next_char();
1265                         break;
1266
1267                 case 'e':
1268                 case 'p':
1269                 case 'E':
1270                 case 'P':
1271                         obstack_1grow(&symbol_obstack, (char) input.c);
1272                         next_char();
1273                         if (input.c == '+' || input.c == '-') {
1274                                 obstack_1grow(&symbol_obstack, (char) input.c);
1275                                 next_char();
1276                         }
1277                         break;
1278
1279                 default:
1280 dollar_sign:
1281                         goto end_number;
1282                 }
1283         }
1284
1285 end_number:
1286         pp_token.kind           = T_NUMBER;
1287         pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
1288 }
1289
1290 #define MAYBE_PROLOG \
1291         next_char(); \
1292         switch (input.c) {
1293
1294 #define MAYBE(ch, kind) \
1295         case ch: \
1296                 next_char(); \
1297                 set_punctuator(kind); \
1298                 return;
1299
1300 #define MAYBE_DIGRAPH(ch, kind, symbol) \
1301         case ch: \
1302                 next_char(); \
1303                 set_digraph(kind, symbol); \
1304                 return;
1305
1306 #define ELSE_CODE(code) \
1307         default: \
1308                 code \
1309         }
1310
1311 #define ELSE(kind) ELSE_CODE(set_punctuator(kind); return;)
1312
1313 /** identifies and returns the next preprocessing token contained in the
1314  * input stream. No macro expansion is performed. */
1315 static void next_input_token(void)
1316 {
1317         if (next_info.had_whitespace) {
1318                 info = next_info;
1319                 next_info.had_whitespace = false;
1320         } else {
1321                 info.at_line_begin  = false;
1322                 info.had_whitespace = false;
1323         }
1324 restart:
1325         pp_token.base.pos    = input.pos;
1326         pp_token.base.symbol = NULL;
1327
1328         switch (input.c) {
1329         case ' ':
1330         case '\t':
1331                 info.whitespace_at_line_begin++;
1332                 info.had_whitespace = true;
1333                 next_char();
1334                 goto restart;
1335
1336         case NEWLINE:
1337                 info.at_line_begin            = true;
1338                 info.had_whitespace           = true;
1339                 info.whitespace_at_line_begin = 0;
1340                 goto restart;
1341
1342         case SYMBOL_CASES:
1343                 parse_symbol();
1344                 return;
1345
1346         case DIGIT_CASES:
1347                 parse_number();
1348                 return;
1349
1350         case '"':
1351                 parse_string_literal(STRING_ENCODING_CHAR);
1352                 return;
1353
1354         case '\'':
1355                 parse_character_constant(STRING_ENCODING_CHAR);
1356                 return;
1357
1358         case '.':
1359                 MAYBE_PROLOG
1360                         case '0':
1361                         case '1':
1362                         case '2':
1363                         case '3':
1364                         case '4':
1365                         case '5':
1366                         case '6':
1367                         case '7':
1368                         case '8':
1369                         case '9':
1370                                 put_back(input.c);
1371                                 input.c = '.';
1372                                 parse_number();
1373                                 return;
1374
1375                         case '.':
1376                                 MAYBE_PROLOG
1377                                 MAYBE('.', T_DOTDOTDOT)
1378                                 ELSE_CODE(
1379                                         put_back(input.c);
1380                                         input.c = '.';
1381                                         set_punctuator('.');
1382                                         return;
1383                                 )
1384                 ELSE('.')
1385         case '&':
1386                 MAYBE_PROLOG
1387                 MAYBE('&', T_ANDAND)
1388                 MAYBE('=', T_ANDEQUAL)
1389                 ELSE('&')
1390         case '*':
1391                 MAYBE_PROLOG
1392                 MAYBE('=', T_ASTERISKEQUAL)
1393                 ELSE('*')
1394         case '+':
1395                 MAYBE_PROLOG
1396                 MAYBE('+', T_PLUSPLUS)
1397                 MAYBE('=', T_PLUSEQUAL)
1398                 ELSE('+')
1399         case '-':
1400                 MAYBE_PROLOG
1401                 MAYBE('>', T_MINUSGREATER)
1402                 MAYBE('-', T_MINUSMINUS)
1403                 MAYBE('=', T_MINUSEQUAL)
1404                 ELSE('-')
1405         case '!':
1406                 MAYBE_PROLOG
1407                 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
1408                 ELSE('!')
1409         case '/':
1410                 MAYBE_PROLOG
1411                 MAYBE('=', T_SLASHEQUAL)
1412                 case '*':
1413                         next_char();
1414                         skip_multiline_comment();
1415                         goto restart;
1416                 case '/':
1417                         next_char();
1418                         skip_line_comment();
1419                         goto restart;
1420                 ELSE('/')
1421         case '%':
1422                 MAYBE_PROLOG
1423                 MAYBE_DIGRAPH('>', '}', symbol_percentgreater)
1424                 MAYBE('=', T_PERCENTEQUAL)
1425                 case ':':
1426                         MAYBE_PROLOG
1427                         case '%':
1428                                 MAYBE_PROLOG
1429                                 MAYBE_DIGRAPH(':', T_HASHHASH, symbol_percentcolonpercentcolon)
1430                                 ELSE_CODE(
1431                                         put_back(input.c);
1432                                         input.c = '%';
1433                                         goto digraph_percentcolon;
1434                                 )
1435                         ELSE_CODE(
1436 digraph_percentcolon:
1437                                 set_digraph('#', symbol_percentcolon);
1438                                 return;
1439                         )
1440                 ELSE('%')
1441         case '<':
1442                 MAYBE_PROLOG
1443                 MAYBE_DIGRAPH(':', '[', symbol_lesscolon)
1444                 MAYBE_DIGRAPH('%', '{', symbol_lesspercent)
1445                 MAYBE('=', T_LESSEQUAL)
1446                 case '<':
1447                         MAYBE_PROLOG
1448                         MAYBE('=', T_LESSLESSEQUAL)
1449                         ELSE(T_LESSLESS)
1450                 ELSE('<')
1451         case '>':
1452                 MAYBE_PROLOG
1453                 MAYBE('=', T_GREATEREQUAL)
1454                 case '>':
1455                         MAYBE_PROLOG
1456                         MAYBE('=', T_GREATERGREATEREQUAL)
1457                         ELSE(T_GREATERGREATER)
1458                 ELSE('>')
1459         case '^':
1460                 MAYBE_PROLOG
1461                 MAYBE('=', T_CARETEQUAL)
1462                 ELSE('^')
1463         case '|':
1464                 MAYBE_PROLOG
1465                 MAYBE('=', T_PIPEEQUAL)
1466                 MAYBE('|', T_PIPEPIPE)
1467                 ELSE('|')
1468         case ':':
1469                 MAYBE_PROLOG
1470                 MAYBE_DIGRAPH('>', ']', symbol_colongreater)
1471                 case ':':
1472                         if (c_mode & _CXX) {
1473                                 next_char();
1474                                 set_punctuator(T_COLONCOLON);
1475                                 return;
1476                         }
1477                         /* FALLTHROUGH */
1478                 ELSE(':')
1479         case '=':
1480                 MAYBE_PROLOG
1481                 MAYBE('=', T_EQUALEQUAL)
1482                 ELSE('=')
1483         case '#':
1484                 MAYBE_PROLOG
1485                 MAYBE('#', T_HASHHASH)
1486                 ELSE('#')
1487
1488         case '?':
1489         case '[':
1490         case ']':
1491         case '(':
1492         case ')':
1493         case '{':
1494         case '}':
1495         case '~':
1496         case ';':
1497         case ',':
1498                 set_punctuator(input.c);
1499                 next_char();
1500                 return;
1501
1502         case EOF:
1503                 if (input_stack != NULL) {
1504                         fclose(close_pp_input());
1505                         pop_restore_input();
1506                         if (out)
1507                                 fputc('\n', out);
1508                         if (input.c == (utf32)EOF)
1509                                 --input.pos.lineno;
1510                         print_line_directive(&input.pos, "2");
1511                         goto restart;
1512                 } else {
1513                         info.at_line_begin = true;
1514                         set_punctuator(T_EOF);
1515                 }
1516                 return;
1517
1518         case '\\':
1519                 next_char();
1520                 int next_c = input.c;
1521                 put_back(input.c);
1522                 input.c = '\\';
1523                 if (next_c == 'U' || next_c == 'u') {
1524                         parse_symbol();
1525                         return;
1526                 }
1527                 /* FALLTHROUGH */
1528         default:
1529 dollar_sign:
1530                 if (error_on_unknown_chars) {
1531                         errorf(&pp_token.base.pos, "unknown character '%lc' found", input.c);
1532                         next_char();
1533                         goto restart;
1534                 } else {
1535                         assert(obstack_object_size(&symbol_obstack) == 0);
1536                         obstack_grow_utf8(&symbol_obstack, input.c);
1537                         obstack_1grow(&symbol_obstack, '\0');
1538                         char     *const string = obstack_finish(&symbol_obstack);
1539                         symbol_t *const symbol = symbol_table_insert(string);
1540                         if (symbol->string != string)
1541                                 obstack_free(&symbol_obstack, string);
1542
1543                         pp_token.kind        = T_UNKNOWN_CHAR;
1544                         pp_token.base.symbol = symbol;
1545                         next_char();
1546                         return;
1547                 }
1548         }
1549 }
1550
1551 static void print_quoted_string(const char *const string)
1552 {
1553         fputc('"', out);
1554         for (const char *c = string; *c != 0; ++c) {
1555                 switch (*c) {
1556                 case '"': fputs("\\\"", out); break;
1557                 case '\\':  fputs("\\\\", out); break;
1558                 case '\a':  fputs("\\a", out); break;
1559                 case '\b':  fputs("\\b", out); break;
1560                 case '\f':  fputs("\\f", out); break;
1561                 case '\n':  fputs("\\n", out); break;
1562                 case '\r':  fputs("\\r", out); break;
1563                 case '\t':  fputs("\\t", out); break;
1564                 case '\v':  fputs("\\v", out); break;
1565                 case '\?':  fputs("\\?", out); break;
1566                 default:
1567                         if (!isprint(*c)) {
1568                                 fprintf(out, "\\%03o", (unsigned)*c);
1569                                 break;
1570                         }
1571                         fputc(*c, out);
1572                         break;
1573                 }
1574         }
1575         fputc('"', out);
1576 }
1577
1578 static void print_line_directive(const position_t *pos, const char *add)
1579 {
1580         if (!out)
1581                 return;
1582
1583         fprintf(out, "# %u ", pos->lineno);
1584         print_quoted_string(pos->input_name);
1585         if (add != NULL) {
1586                 fputc(' ', out);
1587                 fputs(add, out);
1588         }
1589         if (pos->is_system_header) {
1590                 fputs(" 3", out);
1591         }
1592
1593         printed_input_name = pos->input_name;
1594         input.output_line  = pos->lineno-1;
1595 }
1596
1597 static bool emit_newlines(void)
1598 {
1599         if (!out)
1600                 return true;
1601
1602         unsigned delta = pp_token.base.pos.lineno - input.output_line;
1603         if (delta == 0)
1604                 return false;
1605
1606         if (delta >= 9) {
1607                 fputc('\n', out);
1608                 print_line_directive(&pp_token.base.pos, NULL);
1609                 fputc('\n', out);
1610         } else {
1611                 for (unsigned i = 0; i < delta; ++i) {
1612                         fputc('\n', out);
1613                 }
1614         }
1615         input.output_line = pp_token.base.pos.lineno;
1616
1617         unsigned whitespace = info.whitespace_at_line_begin;
1618         /* make sure there is at least 1 whitespace before a (macro-expanded)
1619          * '#' at line begin. I'm not sure why this is good, but gcc does it. */
1620         if (pp_token.kind == '#' && whitespace == 0)
1621                 ++whitespace;
1622         for (unsigned i = 0; i < whitespace; ++i)
1623                 fputc(' ', out);
1624
1625         return true;
1626 }
1627
1628 void set_preprocessor_output(FILE *output)
1629 {
1630         out = output;
1631         if (out != NULL) {
1632                 error_on_unknown_chars   = false;
1633                 resolve_escape_sequences = false;
1634         } else {
1635                 error_on_unknown_chars   = true;
1636                 resolve_escape_sequences = true;
1637         }
1638 }
1639
1640 void emit_pp_token(void)
1641 {
1642         if (!emit_newlines() &&
1643             (info.had_whitespace || tokens_would_paste(last_token, pp_token.kind)))
1644                 fputc(' ', out);
1645
1646         switch (pp_token.kind) {
1647         case T_NUMBER:
1648                 fputs(pp_token.literal.string.begin, out);
1649                 break;
1650
1651         case T_STRING_LITERAL:
1652                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1653                 fputc('"', out);
1654                 fputs(pp_token.literal.string.begin, out);
1655                 fputc('"', out);
1656                 break;
1657
1658         case T_CHARACTER_CONSTANT:
1659                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1660                 fputc('\'', out);
1661                 fputs(pp_token.literal.string.begin, out);
1662                 fputc('\'', out);
1663                 break;
1664
1665         case T_MACRO_PARAMETER:
1666                 panic("macro parameter not expanded");
1667
1668         default:
1669                 fputs(pp_token.base.symbol->string, out);
1670                 break;
1671         }
1672         last_token = pp_token.kind;
1673 }
1674
1675 static void eat_pp_directive(void)
1676 {
1677         while (!info.at_line_begin) {
1678                 next_input_token();
1679         }
1680 }
1681
1682 static bool strings_equal(const string_t *string1, const string_t *string2)
1683 {
1684         size_t size = string1->size;
1685         if (size != string2->size)
1686                 return false;
1687
1688         const char *c1 = string1->begin;
1689         const char *c2 = string2->begin;
1690         for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1691                 if (*c1 != *c2)
1692                         return false;
1693         }
1694         return true;
1695 }
1696
1697 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1698 {
1699         if (token1->kind != token2->kind)
1700                 return false;
1701
1702         switch (token1->kind) {
1703         case T_NUMBER:
1704         case T_CHARACTER_CONSTANT:
1705         case T_STRING_LITERAL:
1706                 return strings_equal(&token1->literal.string, &token2->literal.string);
1707
1708         case T_MACRO_PARAMETER:
1709                 return token1->macro_parameter.def->symbol
1710                     == token2->macro_parameter.def->symbol;
1711
1712         default:
1713                 return token1->base.symbol == token2->base.symbol;
1714         }
1715 }
1716
1717 static bool pp_definitions_equal(const pp_definition_t *definition1,
1718                                  const pp_definition_t *definition2)
1719 {
1720         if (definition1->list_len != definition2->list_len)
1721                 return false;
1722
1723         size_t               len = definition1->list_len;
1724         const saved_token_t *t1  = definition1->token_list;
1725         const saved_token_t *t2  = definition2->token_list;
1726         for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1727                 if (!pp_tokens_equal(&t1->token, &t2->token))
1728                         return false;
1729                 if (t1->had_whitespace != t2->had_whitespace)
1730                         return false;
1731         }
1732         return true;
1733 }
1734
1735 static void missing_macro_param_error(void)
1736 {
1737         errorf(&pp_token.base.pos, "'#' is not followed by a macro parameter");
1738 }
1739
1740 static bool is_defineable_token(char const *const context)
1741 {
1742         if (info.at_line_begin) {
1743                 errorf(&pp_token.base.pos, "unexpected end of line after %s", context);
1744         }
1745
1746         symbol_t *const symbol = pp_token.base.symbol;
1747         if (!symbol)
1748                 goto no_ident;
1749
1750         if (pp_token.kind != T_IDENTIFIER) {
1751                 switch (symbol->string[0]) {
1752                 case SYMBOL_CASES:
1753 dollar_sign:
1754                         break;
1755
1756                 default:
1757 no_ident:
1758                         errorf(&pp_token.base.pos, "expected identifier after %s, got %K",
1759                                context, &pp_token);
1760                         return false;
1761                 }
1762         }
1763
1764         /* TODO turn this into a flag in pp_def. */
1765         switch (symbol->pp_ID) {
1766         /* §6.10.8:4 */
1767         case TP_defined:
1768                 errorf(&pp_token.base.pos, "%K cannot be used as macro name in %s",
1769                        &pp_token, context);
1770                 return false;
1771
1772         default:
1773                 return true;
1774         }
1775 }
1776
1777 static void parse_define_directive(void)
1778 {
1779         eat_pp(TP_define);
1780         if (skip_mode) {
1781                 eat_pp_directive();
1782                 return;
1783         }
1784
1785         assert(obstack_object_size(&pp_obstack) == 0);
1786
1787         if (!is_defineable_token("#define"))
1788                 goto error_out;
1789         symbol_t *const symbol = pp_token.base.symbol;
1790
1791         pp_definition_t *new_definition
1792                 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1793         memset(new_definition, 0, sizeof(new_definition[0]));
1794         new_definition->symbol = symbol;
1795         new_definition->pos    = input.pos;
1796
1797         /* this is probably the only place where spaces are significant in the
1798          * lexer (except for the fact that they separate tokens). #define b(x)
1799          * is something else than #define b (x) */
1800         if (input.c == '(') {
1801                 next_input_token();
1802                 eat_token('(');
1803
1804                 while (true) {
1805                         switch (pp_token.kind) {
1806                         case T_DOTDOTDOT:
1807                                 new_definition->is_variadic = true;
1808                                 eat_token(T_DOTDOTDOT);
1809                                 if (pp_token.kind != ')') {
1810                                         errorf(&input.pos,
1811                                                         "'...' not at end of macro argument list");
1812                                         goto error_out;
1813                                 }
1814                                 break;
1815
1816                         case T_IDENTIFIER: {
1817                                 pp_definition_t parameter;
1818                                 memset(&parameter, 0, sizeof(parameter));
1819                                 parameter.pos          = pp_token.base.pos;
1820                                 parameter.symbol       = pp_token.base.symbol;
1821                                 parameter.is_parameter = true;
1822                                 obstack_grow(&pp_obstack, &parameter, sizeof(parameter));
1823                                 eat_token(T_IDENTIFIER);
1824
1825                                 if (pp_token.kind == ',') {
1826                                         eat_token(',');
1827                                         break;
1828                                 }
1829
1830                                 if (pp_token.kind != ')') {
1831                                         errorf(&pp_token.base.pos,
1832                                                "expected ',' or ')' after identifier, got %K",
1833                                                &pp_token);
1834                                         goto error_out;
1835                                 }
1836                                 break;
1837                         }
1838
1839                         case ')':
1840                                 eat_token(')');
1841                                 goto finish_argument_list;
1842
1843                         default:
1844                                 errorf(&pp_token.base.pos,
1845                                        "expected identifier, '...' or ')' in #define argument list, got %K",
1846                                        &pp_token);
1847                                 goto error_out;
1848                         }
1849                 }
1850
1851         finish_argument_list:
1852                 new_definition->has_parameters = true;
1853                 size_t size = obstack_object_size(&pp_obstack);
1854                 new_definition->n_parameters
1855                         = size / sizeof(new_definition->parameters[0]);
1856                 new_definition->parameters = obstack_finish(&pp_obstack);
1857                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1858                         pp_definition_t *const param     = &new_definition->parameters[i];
1859                         symbol_t        *const param_sym = param->symbol;
1860                         pp_definition_t *const previous  = param_sym->pp_definition;
1861                         if (previous != NULL
1862                             && previous->function_definition == new_definition) {
1863                                 errorf(&param->pos, "duplicate macro parameter '%Y'", param_sym);
1864                                 param->symbol = sym_anonymous;
1865                                 continue;
1866                         }
1867                         param->parent_expansion    = previous;
1868                         param->function_definition = new_definition;
1869                         param_sym->pp_definition   = param;
1870                 }
1871         } else {
1872                 next_input_token();
1873         }
1874
1875         /* construct token list */
1876         assert(obstack_object_size(&pp_obstack) == 0);
1877         bool next_must_be_param = false;
1878         while (!info.at_line_begin) {
1879                 if (pp_token.kind == T_IDENTIFIER) {
1880                         pp_definition_t *const definition = pp_token.base.symbol->pp_definition;
1881                         if (definition != NULL
1882                             && definition->function_definition == new_definition) {
1883                             pp_token.kind                = T_MACRO_PARAMETER;
1884                             pp_token.macro_parameter.def = definition;
1885                         }
1886                 }
1887                 if (next_must_be_param && pp_token.kind != T_MACRO_PARAMETER) {
1888                         missing_macro_param_error();
1889                 }
1890                 saved_token_t saved_token;
1891                 saved_token.token = pp_token;
1892                 saved_token.had_whitespace = info.had_whitespace;
1893                 obstack_grow(&pp_obstack, &saved_token, sizeof(saved_token));
1894                 next_must_be_param
1895                         = new_definition->has_parameters && pp_token.kind == '#';
1896                 next_input_token();
1897         }
1898         if (next_must_be_param)
1899                 missing_macro_param_error();
1900
1901         new_definition->list_len   = obstack_object_size(&pp_obstack)
1902                 / sizeof(new_definition->token_list[0]);
1903         new_definition->token_list = obstack_finish(&pp_obstack);
1904
1905         if (new_definition->has_parameters) {
1906                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1907                         pp_definition_t *const param     = &new_definition->parameters[i];
1908                         symbol_t        *const param_sym = param->symbol;
1909                         if (param_sym == sym_anonymous)
1910                                 continue;
1911                         assert(param_sym->pp_definition == param);
1912                         assert(param->function_definition == new_definition);
1913                         param_sym->pp_definition = param->parent_expansion;
1914                         param->parent_expansion  = NULL;
1915                 }
1916         }
1917
1918         pp_definition_t *old_definition = symbol->pp_definition;
1919         if (old_definition != NULL) {
1920                 if (!pp_definitions_equal(old_definition, new_definition)) {
1921                         warningf(WARN_OTHER, &input.pos,
1922                                  "multiple definition of macro '%Y' (first defined %P)",
1923                                  symbol, &old_definition->pos);
1924                 } else {
1925                         /* reuse the old definition */
1926                         obstack_free(&pp_obstack, new_definition);
1927                         new_definition = old_definition;
1928                 }
1929         }
1930
1931         symbol->pp_definition = new_definition;
1932         return;
1933
1934 error_out:
1935         if (obstack_object_size(&pp_obstack) > 0) {
1936                 char *ptr = obstack_finish(&pp_obstack);
1937                 obstack_free(&pp_obstack, ptr);
1938         }
1939         eat_pp_directive();
1940 }
1941
1942 static void parse_undef_directive(void)
1943 {
1944         eat_pp(TP_undef);
1945         if (skip_mode) {
1946                 eat_pp_directive();
1947                 return;
1948         }
1949
1950         if (!is_defineable_token("#undef")) {
1951                 eat_pp_directive();
1952                 return;
1953         }
1954
1955         pp_token.base.symbol->pp_definition = NULL;
1956         next_input_token();
1957
1958         if (!info.at_line_begin) {
1959                 warningf(WARN_OTHER, &input.pos, "extra tokens at end of #undef directive");
1960         }
1961         eat_pp_directive();
1962 }
1963
1964 /** behind an #include we can have the special headername lexems.
1965  * They're only allowed behind an #include so they're not recognized
1966  * by the normal next_preprocessing_token. We handle them as a special
1967  * exception here */
1968 static const char *parse_headername(bool *system_include)
1969 {
1970         if (info.at_line_begin) {
1971                 parse_error("expected headername after #include");
1972                 return NULL;
1973         }
1974
1975         /* check whether we have a "... or <... headername */
1976         position_t pos = input.pos;
1977         switch (input.c) {
1978         {
1979                 utf32 delimiter;
1980         case '<': delimiter = '>'; *system_include = true;  goto parse_name;
1981         case '"': delimiter = '"'; *system_include = false; goto parse_name;
1982 parse_name:
1983                 assert(obstack_object_size(&symbol_obstack) == 0);
1984                 next_char();
1985                 while (true) {
1986                         switch (input.c) {
1987                         case NEWLINE:
1988                         case EOF:
1989                                 {
1990                                         char *dummy = obstack_finish(&symbol_obstack);
1991                                         obstack_free(&symbol_obstack, dummy);
1992                                 }
1993                                 errorf(&pp_token.base.pos,
1994                                        "header name without closing '%c'", (char)delimiter);
1995                                 return NULL;
1996
1997                         default:
1998                                 if (input.c == delimiter) {
1999                                         next_char();
2000                                         goto finish_headername;
2001                                 } else {
2002                                         obstack_1grow(&symbol_obstack, (char)input.c);
2003                                         next_char();
2004                                 }
2005                                 break;
2006                         }
2007                 }
2008                 /* we should never be here */
2009         }
2010
2011         default:
2012                 next_preprocessing_token();
2013                 if (info.at_line_begin) {
2014                         /* TODO: if we are already in the new line then we parsed more than
2015                          * wanted. We reuse the token, but could produce following errors
2016                          * misbehaviours... */
2017                         goto error_invalid_input;
2018                 }
2019                 if (pp_token.kind == T_STRING_LITERAL) {
2020                         *system_include = false;
2021                         return pp_token.literal.string.begin;
2022                 } else if (pp_token.kind == '<') {
2023                         *system_include = true;
2024                         assert(obstack_object_size(&pp_obstack) == 0);
2025                         while (true) {
2026                                 next_preprocessing_token();
2027                                 if (info.at_line_begin) {
2028                                         /* TODO: we shouldn't have parsed/expanded something on the
2029                                          * next line yet... */
2030                                         char *dummy = obstack_finish(&pp_obstack);
2031                                         obstack_free(&pp_obstack, dummy);
2032                                         goto error_invalid_input;
2033                                 }
2034                                 if (pp_token.kind == '>')
2035                                         break;
2036
2037                                 saved_token_t saved;
2038                                 saved.token          = pp_token;
2039                                 saved.had_whitespace = info.had_whitespace;
2040                                 obstack_grow(&pp_obstack, &saved, sizeof(saved));
2041                         }
2042                         size_t size = obstack_object_size(&pp_obstack);
2043                         assert(size % sizeof(saved_token_t) == 0);
2044                         size_t n_tokens = size / sizeof(saved_token_t);
2045                         saved_token_t *tokens = obstack_finish(&pp_obstack);
2046                         assert(obstack_object_size(&symbol_obstack) == 0);
2047                         for (size_t i = 0; i < n_tokens; ++i) {
2048                                 const saved_token_t *saved = &tokens[i];
2049                                 if (i > 0 && saved->had_whitespace)
2050                                         obstack_1grow(&symbol_obstack, ' ');
2051                                 grow_token(&symbol_obstack, &saved->token);
2052                         }
2053                         obstack_free(&pp_obstack, tokens);
2054                         goto finish_headername;
2055                 } else {
2056 error_invalid_input:
2057                         {
2058                                 char *dummy = obstack_finish(&symbol_obstack);
2059                                 obstack_free(&symbol_obstack, dummy);
2060                         }
2061
2062                         errorf(&pp_token.base.pos,
2063                                "expected \"FILENAME\" or <FILENAME> after #include");
2064                         return NULL;
2065                 }
2066         }
2067
2068 finish_headername:
2069         obstack_1grow(&symbol_obstack, '\0');
2070         char *const  headername = obstack_finish(&symbol_obstack);
2071         const char  *identified = identify_string(headername);
2072         pp_token.base.pos = pos;
2073         return identified;
2074 }
2075
2076 static bool do_include(bool const bracket_include, bool const include_next, char const *const headername)
2077 {
2078         size_t const        headername_len = strlen(headername);
2079         searchpath_entry_t *entry;
2080         if (include_next) {
2081                 entry = input.path      ? input.path->next
2082                       : bracket_include ? bracket_searchpath.first
2083                       : quote_searchpath.first;
2084         } else {
2085                 if (!bracket_include) {
2086                         /* put dirname of current input on obstack */
2087                         const char *filename   = input.pos.input_name;
2088                         const char *last_slash = strrchr(filename, '/');
2089                         const char *full_name;
2090                         if (last_slash != NULL) {
2091                                 size_t len = last_slash - filename;
2092                                 obstack_grow(&symbol_obstack, filename, len + 1);
2093                                 obstack_grow0(&symbol_obstack, headername, headername_len);
2094                                 char *complete_path = obstack_finish(&symbol_obstack);
2095                                 full_name = identify_string(complete_path);
2096                         } else {
2097                                 full_name = headername;
2098                         }
2099
2100                         FILE *file = fopen(full_name, "r");
2101                         if (file != NULL) {
2102                                 switch_pp_input(file, full_name, NULL, false);
2103                                 return true;
2104                         }
2105                         entry = quote_searchpath.first;
2106                 } else {
2107                         entry = bracket_searchpath.first;
2108                 }
2109         }
2110
2111         assert(obstack_object_size(&symbol_obstack) == 0);
2112         /* check searchpath */
2113         for (; entry; entry = entry->next) {
2114             const char *path = entry->path;
2115             size_t      len  = strlen(path);
2116                 obstack_grow(&symbol_obstack, path, len);
2117                 if (path[len-1] != '/')
2118                         obstack_1grow(&symbol_obstack, '/');
2119                 obstack_grow(&symbol_obstack, headername, headername_len+1);
2120
2121                 char *complete_path = obstack_finish(&symbol_obstack);
2122                 FILE *file          = fopen(complete_path, "r");
2123                 if (file != NULL) {
2124                         const char *filename = identify_string(complete_path);
2125                         switch_pp_input(file, filename, entry, entry->is_system_path);
2126                         return true;
2127                 } else {
2128                         obstack_free(&symbol_obstack, complete_path);
2129                 }
2130         }
2131
2132         return false;
2133 }
2134
2135 static void parse_include_directive(bool const include_next)
2136 {
2137         if (skip_mode) {
2138                 eat_pp_directive();
2139                 return;
2140         }
2141
2142         /* do not eat the TP_include, since it would already parse the next token
2143          * which needs special handling here. */
2144         skip_till_newline(true);
2145         bool system_include;
2146         const char *headername = parse_headername(&system_include);
2147         if (headername == NULL) {
2148                 eat_pp_directive();
2149                 return;
2150         }
2151
2152         bool had_nonwhitespace = skip_till_newline(false);
2153         if (had_nonwhitespace) {
2154                 warningf(WARN_OTHER, &input.pos,
2155                          "extra tokens at end of #include directive");
2156         }
2157
2158         if (n_inputs > INCLUDE_LIMIT) {
2159                 errorf(&pp_token.base.pos, "#include nested too deeply");
2160                 /* eat \n or EOF */
2161                 next_input_token();
2162                 return;
2163         }
2164
2165         /* switch inputs */
2166         info.whitespace_at_line_begin = 0;
2167         info.had_whitespace           = false;
2168         info.at_line_begin            = true;
2169         emit_newlines();
2170         push_input();
2171         bool res = do_include(system_include, include_next, headername);
2172         if (res) {
2173                 next_input_token();
2174         } else {
2175                 errorf(&pp_token.base.pos, "failed including '%s': %s", headername, strerror(errno));
2176                 pop_restore_input();
2177         }
2178 }
2179
2180 static pp_conditional_t *push_conditional(void)
2181 {
2182         pp_conditional_t *conditional
2183                 = obstack_alloc(&pp_obstack, sizeof(*conditional));
2184         memset(conditional, 0, sizeof(*conditional));
2185
2186         conditional->parent = conditional_stack;
2187         conditional_stack   = conditional;
2188
2189         return conditional;
2190 }
2191
2192 static void pop_conditional(void)
2193 {
2194         assert(conditional_stack != NULL);
2195         conditional_stack = conditional_stack->parent;
2196 }
2197
2198 void check_unclosed_conditionals(void)
2199 {
2200         while (conditional_stack != NULL) {
2201                 pp_conditional_t *conditional = conditional_stack;
2202
2203                 if (conditional->in_else) {
2204                         errorf(&conditional->pos, "unterminated #else");
2205                 } else {
2206                         errorf(&conditional->pos, "unterminated condition");
2207                 }
2208                 pop_conditional();
2209         }
2210 }
2211
2212 static void parse_ifdef_ifndef_directive(bool const is_ifdef)
2213 {
2214         bool condition;
2215         eat_pp(is_ifdef ? TP_ifdef : TP_ifndef);
2216
2217         if (skip_mode) {
2218                 eat_pp_directive();
2219                 pp_conditional_t *conditional = push_conditional();
2220                 conditional->pos  = pp_token.base.pos;
2221                 conditional->skip = true;
2222                 return;
2223         }
2224
2225         if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
2226                 errorf(&pp_token.base.pos, "expected identifier after #%s, got %K",
2227                        is_ifdef ? "ifdef" : "ifndef", &pp_token);
2228                 eat_pp_directive();
2229
2230                 /* just take the true case in the hope to avoid further errors */
2231                 condition = true;
2232         } else {
2233                 /* evaluate whether we are in true or false case */
2234                 condition = (bool)pp_token.base.symbol->pp_definition == is_ifdef;
2235                 eat_token(T_IDENTIFIER);
2236
2237                 if (!info.at_line_begin) {
2238                         errorf(&pp_token.base.pos, "extra tokens at end of #%s",
2239                                is_ifdef ? "ifdef" : "ifndef");
2240                         eat_pp_directive();
2241                 }
2242         }
2243
2244         pp_conditional_t *conditional = push_conditional();
2245         conditional->pos       = pp_token.base.pos;
2246         conditional->condition = condition;
2247
2248         if (!condition) {
2249                 skip_mode = true;
2250         }
2251 }
2252
2253 static void parse_else_directive(void)
2254 {
2255         eat_pp(TP_else);
2256
2257         if (!info.at_line_begin) {
2258                 if (!skip_mode) {
2259                         warningf(WARN_OTHER, &pp_token.base.pos, "extra tokens at end of #else");
2260                 }
2261                 eat_pp_directive();
2262         }
2263
2264         pp_conditional_t *conditional = conditional_stack;
2265         if (conditional == NULL) {
2266                 errorf(&pp_token.base.pos, "#else without prior #if");
2267                 return;
2268         }
2269
2270         if (conditional->in_else) {
2271                 errorf(&pp_token.base.pos,
2272                        "#else after #else (condition started %P)",
2273                        &conditional->pos);
2274                 skip_mode = true;
2275                 return;
2276         }
2277
2278         conditional->in_else = true;
2279         if (!conditional->skip) {
2280                 skip_mode = conditional->condition;
2281         }
2282         conditional->pos = pp_token.base.pos;
2283 }
2284
2285 static void parse_endif_directive(void)
2286 {
2287         eat_pp(TP_endif);
2288
2289         if (!info.at_line_begin) {
2290                 if (!skip_mode) {
2291                         warningf(WARN_OTHER, &pp_token.base.pos, "extra tokens at end of #endif");
2292                 }
2293                 eat_pp_directive();
2294         }
2295
2296         pp_conditional_t *conditional = conditional_stack;
2297         if (conditional == NULL) {
2298                 errorf(&pp_token.base.pos, "#endif without prior #if");
2299                 return;
2300         }
2301
2302         if (!conditional->skip) {
2303                 skip_mode = false;
2304         }
2305         pop_conditional();
2306 }
2307
2308 typedef enum stdc_pragma_kind_t {
2309         STDC_UNKNOWN,
2310         STDC_FP_CONTRACT,
2311         STDC_FENV_ACCESS,
2312         STDC_CX_LIMITED_RANGE
2313 } stdc_pragma_kind_t;
2314
2315 typedef enum stdc_pragma_value_kind_t {
2316         STDC_VALUE_UNKNOWN,
2317         STDC_VALUE_ON,
2318         STDC_VALUE_OFF,
2319         STDC_VALUE_DEFAULT
2320 } stdc_pragma_value_kind_t;
2321
2322 static void parse_pragma_directive(void)
2323 {
2324         eat_pp(TP_pragma);
2325         if (skip_mode) {
2326                 eat_pp_directive();
2327                 return;
2328         }
2329
2330         if (pp_token.kind != T_IDENTIFIER) {
2331                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.pos,
2332                          "expected identifier after #pragma");
2333                 eat_pp_directive();
2334                 return;
2335         }
2336
2337         stdc_pragma_kind_t kind = STDC_UNKNOWN;
2338         if (pp_token.base.symbol->pp_ID == TP_STDC && c_mode & _C99) {
2339                 /* a STDC pragma */
2340                 next_input_token();
2341
2342                 switch (pp_token.base.symbol->pp_ID) {
2343                 case TP_FP_CONTRACT:      kind = STDC_FP_CONTRACT;      break;
2344                 case TP_FENV_ACCESS:      kind = STDC_FENV_ACCESS;      break;
2345                 case TP_CX_LIMITED_RANGE: kind = STDC_CX_LIMITED_RANGE; break;
2346                 default:                  break;
2347                 }
2348                 if (kind != STDC_UNKNOWN) {
2349                         next_input_token();
2350                         stdc_pragma_value_kind_t value;
2351                         switch (pp_token.base.symbol->pp_ID) {
2352                         case TP_ON:      value = STDC_VALUE_ON;      break;
2353                         case TP_OFF:     value = STDC_VALUE_OFF;     break;
2354                         case TP_DEFAULT: value = STDC_VALUE_DEFAULT; break;
2355                         default:         value = STDC_VALUE_UNKNOWN; break;
2356                         }
2357                         if (value == STDC_VALUE_UNKNOWN) {
2358                                 kind = STDC_UNKNOWN;
2359                                 errorf(&pp_token.base.pos, "bad STDC pragma argument");
2360                         }
2361                 }
2362         }
2363         eat_pp_directive();
2364         if (kind == STDC_UNKNOWN) {
2365                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.pos,
2366                          "encountered unknown #pragma");
2367         }
2368 }
2369
2370 static void parse_line_directive(void)
2371 {
2372         if (pp_token.kind != T_NUMBER) {
2373                 if (!skip_mode)
2374                         parse_error("expected integer");
2375         } else {
2376                 char      *end;
2377                 long const line = strtol(pp_token.literal.string.begin, &end, 0);
2378                 if (*end == '\0') {
2379                         /* use offset -1 as this is about the next line */
2380                         input.pos.lineno = line - 1;
2381                         /* force output of line */
2382                         input.output_line = input.pos.lineno - 20;
2383                 } else {
2384                         if (!skip_mode) {
2385                                 errorf(&input.pos, "'%S' is not a valid line number",
2386                                            &pp_token.literal.string);
2387                         }
2388                 }
2389                 next_input_token();
2390                 if (info.at_line_begin)
2391                         return;
2392         }
2393         if (pp_token.kind == T_STRING_LITERAL
2394             && pp_token.literal.string.encoding == STRING_ENCODING_CHAR) {
2395                 input.pos.input_name       = pp_token.literal.string.begin;
2396                 input.pos.is_system_header = false;
2397                 next_input_token();
2398
2399                 /* attempt to parse numeric flags as outputted by gcc preprocessor */
2400                 while (!info.at_line_begin && pp_token.kind == T_NUMBER) {
2401                         /* flags:
2402                          * 1 - indicates start of a new file
2403                          * 2 - indicates return from a file
2404                          * 3 - indicates system header
2405                          * 4 - indicates implicit extern "C" in C++ mode
2406                          *
2407                          * currently we're only interested in "3"
2408                          */
2409                         if (streq(pp_token.literal.string.begin, "3")) {
2410                                 input.pos.is_system_header = true;
2411                         }
2412                         next_input_token();
2413                 }
2414         }
2415
2416         eat_pp_directive();
2417 }
2418
2419 static void parse_error_directive(void)
2420 {
2421         if (skip_mode) {
2422                 eat_pp_directive();
2423                 return;
2424         }
2425
2426         bool const old_resolve_escape_sequences = resolve_escape_sequences;
2427         resolve_escape_sequences = false;
2428
2429         position_t const pos = pp_token.base.pos;
2430         do {
2431                 if (info.had_whitespace && obstack_object_size(&pp_obstack) != 0)
2432                         obstack_1grow(&pp_obstack, ' ');
2433
2434                 switch (pp_token.kind) {
2435                 case T_NUMBER: {
2436                         string_t const *const str = &pp_token.literal.string;
2437                         obstack_grow(&pp_obstack, str->begin, str->size);
2438                         break;
2439                 }
2440
2441                 {
2442                         char delim;
2443                 case T_STRING_LITERAL:     delim =  '"'; goto string;
2444                 case T_CHARACTER_CONSTANT: delim = '\''; goto string;
2445 string:;
2446                         string_t const *const str = &pp_token.literal.string;
2447                         char     const *const enc = get_string_encoding_prefix(str->encoding);
2448                         obstack_printf(&pp_obstack, "%s%c%s%c", enc, delim, str->begin, delim);
2449                         break;
2450                 }
2451
2452                 default: {
2453                         char const *const str = pp_token.base.symbol->string;
2454                         obstack_grow(&pp_obstack, str, strlen(str));
2455                         break;
2456                 }
2457                 }
2458
2459                 next_input_token();
2460         } while (!info.at_line_begin);
2461
2462         resolve_escape_sequences = old_resolve_escape_sequences;
2463
2464         obstack_1grow(&pp_obstack, '\0');
2465         char *const str = obstack_finish(&pp_obstack);
2466         errorf(&pos, "#%s", str);
2467         obstack_free(&pp_obstack, str);
2468 }
2469
2470 static void parse_preprocessing_directive(void)
2471 {
2472         eat_token('#');
2473
2474         if (info.at_line_begin) {
2475                 /* empty directive */
2476                 return;
2477         }
2478
2479         if (pp_token.base.symbol) {
2480                 switch (pp_token.base.symbol->pp_ID) {
2481                 case TP_define:       parse_define_directive();            break;
2482                 case TP_else:         parse_else_directive();              break;
2483                 case TP_endif:        parse_endif_directive();             break;
2484                 case TP_error:        parse_error_directive();             break;
2485                 case TP_ifdef:        parse_ifdef_ifndef_directive(true);  break;
2486                 case TP_ifndef:       parse_ifdef_ifndef_directive(false); break;
2487                 case TP_include:      parse_include_directive(false);      break;
2488                 case TP_include_next: parse_include_directive(true);       break;
2489                 case TP_line:         next_input_token(); goto line_directive;
2490                 case TP_pragma:       parse_pragma_directive();            break;
2491                 case TP_undef:        parse_undef_directive();             break;
2492                 default:              goto skip;
2493                 }
2494         } else if (pp_token.kind == T_NUMBER) {
2495 line_directive:
2496                 parse_line_directive();
2497         } else {
2498 skip:
2499                 if (!skip_mode) {
2500                         errorf(&pp_token.base.pos, "invalid preprocessing directive #%K", &pp_token);
2501                 }
2502                 eat_pp_directive();
2503         }
2504
2505         assert(info.at_line_begin);
2506 }
2507
2508 static void finish_current_argument(void)
2509 {
2510         if (current_argument == NULL)
2511                 return;
2512         size_t size = obstack_object_size(&pp_obstack);
2513         current_argument->list_len   = size/sizeof(current_argument->token_list[0]);
2514         current_argument->token_list = obstack_finish(&pp_obstack);
2515 }
2516
2517 void next_preprocessing_token(void)
2518 {
2519 restart:
2520         if (!expand_next()) {
2521                 do {
2522                         next_input_token();
2523                         while (pp_token.kind == '#' && info.at_line_begin) {
2524                                 parse_preprocessing_directive();
2525                         }
2526                 } while (skip_mode && pp_token.kind != T_EOF);
2527         }
2528
2529         const token_kind_t kind = pp_token.kind;
2530         if (current_call == NULL || argument_expanding != NULL) {
2531                 symbol_t *const symbol = pp_token.base.symbol;
2532                 if (symbol) {
2533                         if (kind == T_MACRO_PARAMETER) {
2534                                 assert(current_expansion != NULL);
2535                                 start_expanding(pp_token.macro_parameter.def);
2536                                 goto restart;
2537                         }
2538
2539                         pp_definition_t *const pp_definition = symbol->pp_definition;
2540                         if (pp_definition != NULL && !pp_definition->is_expanding) {
2541                                 if (pp_definition->has_parameters) {
2542
2543                                         /* check if next token is a '(' */
2544                                         whitespace_info_t old_info   = info;
2545                                         token_kind_t      next_token = peek_expansion();
2546                                         if (next_token == T_EOF) {
2547                                                 info.at_line_begin  = false;
2548                                                 info.had_whitespace = false;
2549                                                 skip_whitespace();
2550                                                 if (input.c == '(') {
2551                                                         next_token = '(';
2552                                                 }
2553                                         }
2554
2555                                         if (next_token == '(') {
2556                                                 if (current_expansion == NULL)
2557                                                         expansion_pos = pp_token.base.pos;
2558                                                 next_preprocessing_token();
2559                                                 assert(pp_token.kind == '(');
2560
2561                                                 pp_definition->parent_expansion = current_expansion;
2562                                                 current_call              = pp_definition;
2563                                                 current_call->expand_pos  = 0;
2564                                                 current_call->expand_info = old_info;
2565                                                 if (current_call->n_parameters > 0) {
2566                                                         current_argument = &current_call->parameters[0];
2567                                                         assert(argument_brace_count == 0);
2568                                                 }
2569                                                 goto restart;
2570                                         } else {
2571                                                 /* skip_whitespaces() skipped newlines and whitespace,
2572                                                  * remember results for next token */
2573                                                 next_info = info;
2574                                                 info      = old_info;
2575                                                 return;
2576                                         }
2577                                 } else {
2578                                         if (current_expansion == NULL)
2579                                                 expansion_pos = pp_token.base.pos;
2580                                         start_expanding(pp_definition);
2581                                         goto restart;
2582                                 }
2583                         }
2584                 }
2585         }
2586
2587         if (current_call != NULL) {
2588                 /* current_call != NULL */
2589                 if (kind == '(') {
2590                         ++argument_brace_count;
2591                 } else if (kind == ')') {
2592                         if (argument_brace_count > 0) {
2593                                 --argument_brace_count;
2594                         } else {
2595                                 finish_current_argument();
2596                                 assert(kind == ')');
2597                                 start_expanding(current_call);
2598                                 info = current_call->expand_info;
2599                                 current_call     = NULL;
2600                                 current_argument = NULL;
2601                                 goto restart;
2602                         }
2603                 } else if (kind == ',' && argument_brace_count == 0) {
2604                         finish_current_argument();
2605                         current_call->expand_pos++;
2606                         if (current_call->expand_pos >= current_call->n_parameters) {
2607                                 errorf(&pp_token.base.pos,
2608                                            "too many arguments passed for macro '%Y'",
2609                                            current_call->symbol);
2610                                 current_argument = NULL;
2611                         } else {
2612                                 current_argument
2613                                         = &current_call->parameters[current_call->expand_pos];
2614                         }
2615                         goto restart;
2616                 } else if (kind == T_MACRO_PARAMETER) {
2617                         /* parameters have to be fully expanded before being used as
2618                          * parameters for another macro-call */
2619                         assert(current_expansion != NULL);
2620                         pp_definition_t *argument = pp_token.macro_parameter.def;
2621                         argument_expanding = argument;
2622                         start_expanding(argument);
2623                         goto restart;
2624                 } else if (kind == T_EOF) {
2625                         errorf(&expansion_pos,
2626                                "reached end of file while parsing arguments for '%Y'",
2627                                current_call->symbol);
2628                         return;
2629                 }
2630                 if (current_argument != NULL) {
2631                         saved_token_t saved;
2632                         saved.token = pp_token;
2633                         saved.had_whitespace = info.had_whitespace;
2634                         obstack_grow(&pp_obstack, &saved, sizeof(saved));
2635                 }
2636                 goto restart;
2637         }
2638 }
2639
2640 void append_include_path(searchpath_t *paths, const char *path)
2641 {
2642         searchpath_entry_t *entry = OALLOCZ(&config_obstack, searchpath_entry_t);
2643         entry->path           = path;
2644         entry->is_system_path = paths->is_system_path;
2645
2646         *paths->anchor = entry;
2647         paths->anchor  = &entry->next;
2648 }
2649
2650 static void append_env_paths(searchpath_t *paths, const char *envvar)
2651 {
2652         const char *val = getenv(envvar);
2653         if (val != NULL && *val != '\0') {
2654                 const char *begin = val;
2655                 const char *c;
2656                 do {
2657                         c = begin;
2658                         while (*c != '\0' && *c != ':')
2659                                 ++c;
2660
2661                         size_t len = c-begin;
2662                         if (len == 0) {
2663                                 /* use "." for gcc compatibility (Matze: I would expect that
2664                                  * nothing happens for an empty entry...) */
2665                                 append_include_path(paths, ".");
2666                         } else {
2667                                 char *const string = obstack_copy0(&config_obstack, begin, len);
2668                                 append_include_path(paths, string);
2669                         }
2670
2671                         begin = c+1;
2672                         /* skip : */
2673                         if (*begin == ':')
2674                                 ++begin;
2675                 } while (*c != '\0');
2676         }
2677 }
2678
2679 static void append_searchpath(searchpath_t *path, const searchpath_t *append)
2680 {
2681         *path->anchor = append->first;
2682 }
2683
2684 static void setup_include_path(void)
2685 {
2686         /* built-in paths */
2687         append_include_path(&system_searchpath, "/usr/include");
2688
2689         /* parse environment variable */
2690         append_env_paths(&bracket_searchpath, "CPATH");
2691         append_env_paths(&system_searchpath,
2692                          c_mode & _CXX ? "CPLUS_INCLUDE_PATH" : "C_INCLUDE_PATH");
2693
2694         /* append system search path to bracket searchpath */
2695         append_searchpath(&system_searchpath,  &after_searchpath);
2696         append_searchpath(&bracket_searchpath, &system_searchpath);
2697         append_searchpath(&quote_searchpath, &bracket_searchpath);
2698 }
2699
2700 static void input_error(unsigned const delta_lines, unsigned const delta_cols, char const *const message)
2701 {
2702         position_t pos = pp_token.base.pos;
2703         pos.lineno += delta_lines;
2704         pos.colno  += delta_cols;
2705         errorf(&pos, "%s", message);
2706 }
2707
2708 void init_include_paths(void)
2709 {
2710         obstack_init(&config_obstack);
2711 }
2712
2713 void init_preprocessor(void)
2714 {
2715         init_symbols();
2716
2717         obstack_init(&pp_obstack);
2718         obstack_init(&input_obstack);
2719         strset_init(&stringset);
2720
2721         setup_include_path();
2722
2723         set_input_error_callback(input_error);
2724 }
2725
2726 void exit_preprocessor(void)
2727 {
2728         obstack_free(&input_obstack, NULL);
2729         obstack_free(&pp_obstack, NULL);
2730         obstack_free(&config_obstack, NULL);
2731
2732         strset_destroy(&stringset);
2733 }
2734
2735 int pptest_main(int argc, char **argv);
2736 int pptest_main(int argc, char **argv)
2737 {
2738         init_symbol_table();
2739         init_include_paths();
2740         init_preprocessor();
2741         init_tokens();
2742
2743         error_on_unknown_chars   = false;
2744         resolve_escape_sequences = false;
2745
2746         /* simplistic commandline parser */
2747         const char *filename = NULL;
2748         const char *output = NULL;
2749         for (int i = 1; i < argc; ++i) {
2750                 const char *opt = argv[i];
2751                 if (streq(opt, "-I")) {
2752                         append_include_path(&bracket_searchpath, argv[++i]);
2753                         continue;
2754                 } else if (streq(opt, "-E")) {
2755                         /* ignore */
2756                 } else if (streq(opt, "-o")) {
2757                         output = argv[++i];
2758                         continue;
2759                 } else if (opt[0] == '-') {
2760                         fprintf(stderr, "Unknown option '%s'\n", opt);
2761                 } else {
2762                         if (filename != NULL)
2763                                 fprintf(stderr, "Multiple inputs not supported\n");
2764                         filename = argv[i];
2765                 }
2766         }
2767         if (filename == NULL) {
2768                 fprintf(stderr, "No input specified\n");
2769                 return 1;
2770         }
2771
2772         if (output == NULL) {
2773                 out = stdout;
2774         } else {
2775                 out = fopen(output, "w");
2776                 if (out == NULL) {
2777                         fprintf(stderr, "Couldn't open output '%s'\n", output);
2778                         return 1;
2779                 }
2780         }
2781
2782         /* just here for gcc compatibility */
2783         fprintf(out, "# 1 \"%s\"\n", filename);
2784         fprintf(out, "# 1 \"<built-in>\"\n");
2785         fprintf(out, "# 1 \"<command-line>\"\n");
2786
2787         FILE *file = fopen(filename, "r");
2788         if (file == NULL) {
2789                 fprintf(stderr, "Couldn't open input '%s'\n", filename);
2790                 return 1;
2791         }
2792         switch_pp_input(file, filename, NULL, false);
2793
2794         for (;;) {
2795                 next_preprocessing_token();
2796                 if (pp_token.kind == T_EOF)
2797                         break;
2798                 emit_pp_token();
2799         }
2800
2801         fputc('\n', out);
2802         check_unclosed_conditionals();
2803         fclose(close_pp_input());
2804         if (out != stdout)
2805                 fclose(out);
2806
2807         exit_tokens();
2808         exit_preprocessor();
2809         exit_symbol_table();
2810
2811         return 0;
2812 }