nsz Git - cparser/blob - preprocessor.c

   1 #include <config.h>
   2
   3 #include <assert.h>
   4 #include <errno.h>
   5 #include <string.h>
   6 #include <stdbool.h>
   7 #include <ctype.h>
   8
   9 #include "preprocessor.h"
  10 #include "token_t.h"
  11 #include "symbol_t.h"
  12 #include "adt/util.h"
  13 #include "adt/error.h"
  14 #include "adt/strutil.h"
  15 #include "adt/strset.h"
  16 #include "lang_features.h"
  17 #include "diagnostic.h"
  18 #include "string_rep.h"
  19 #include "input.h"
  20
  21 #define MAX_PUTBACK 3
  22 #define INCLUDE_LIMIT 199  /* 199 is for gcc "compatibility" */
  23
  24 typedef struct saved_token_t {
  25         token_t token;
  26         bool    had_whitespace;
  27 } saved_token_t;
  28
  29 typedef struct whitespace_info_t {
  30         /** current token had whitespace in front of it */
  31         bool     had_whitespace;
  32         /** current token is at the beginning of a line.
  33          * => a "#" at line begin starts a preprocessing directive. */
  34         bool     at_line_begin;
  35         /** number of spaces before the first token in a line */
  36         unsigned whitespace_at_line_begin;
  37 } whitespace_info_t;
  38
  39 struct pp_definition_t {
  40         symbol_t          *symbol;
  41         source_position_t  source_position;
  42         pp_definition_t   *parent_expansion;
  43         size_t             expand_pos;
  44         whitespace_info_t  expand_info;
  45         bool               is_variadic    : 1;
  46         bool               is_expanding   : 1;
  47         bool               has_parameters : 1;
  48         bool               is_parameter   : 1;
  49         pp_definition_t   *function_definition;
  50         size_t             n_parameters;
  51         pp_definition_t   *parameters;
  52
  53         /* replacement */
  54         size_t             list_len;
  55         saved_token_t     *token_list;
  56 };
  57
  58 typedef struct pp_conditional_t pp_conditional_t;
  59 struct pp_conditional_t {
  60         source_position_t  source_position;
  61         bool               condition;
  62         bool               in_else;
  63         /** conditional in skip mode (then+else gets skipped) */
  64         bool               skip;
  65         pp_conditional_t  *parent;
  66 };
  67
  68 typedef struct pp_input_t pp_input_t;
  69 struct pp_input_t {
  70         FILE               *file;
  71         input_t            *input;
  72         utf32               c;
  73         utf32               buf[1024+MAX_PUTBACK];
  74         const utf32        *bufend;
  75         const utf32        *bufpos;
  76         source_position_t   position;
  77         pp_input_t         *parent;
  78         unsigned            output_line;
  79         searchpath_entry_t *path;
  80 };
  81
  82 struct searchpath_entry_t {
  83         const char         *path;
  84         searchpath_entry_t *next;
  85         bool                is_system_path;
  86 };
  87
  88 static pp_input_t      input;
  89
  90 static pp_input_t     *input_stack;
  91 static unsigned        n_inputs;
  92 static struct obstack  input_obstack;
  93
  94 static pp_conditional_t *conditional_stack;
  95
  96 token_t                  pp_token;
  97 bool                     allow_dollar_in_symbol   = true;
  98 static bool              resolve_escape_sequences = true;
  99 static bool              error_on_unknown_chars   = true;
 100 static bool              skip_mode;
 101 static FILE             *out;
 102 static struct obstack    pp_obstack;
 103 static struct obstack    config_obstack;
 104 static const char       *printed_input_name = NULL;
 105 static source_position_t expansion_pos;
 106 static pp_definition_t  *current_expansion  = NULL;
 107 static pp_definition_t  *current_call       = NULL;
 108 static pp_definition_t  *current_argument   = NULL;
 109 static pp_definition_t  *argument_expanding = NULL;
 110 static unsigned          argument_brace_count;
 111 static strset_t          stringset;
 112 static token_kind_t      last_token;
 113
 114 struct searchpath_t {
 115         searchpath_entry_t  *first;
 116         searchpath_entry_t **anchor;
 117         bool                 is_system_path;
 118 };
 119
 120 searchpath_t bracket_searchpath = { NULL, &bracket_searchpath.first, false };
 121 searchpath_t quote_searchpath   = { NULL, &quote_searchpath.first,   false };
 122 searchpath_t system_searchpath  = { NULL, &system_searchpath.first,  true  };
 123 searchpath_t after_searchpath   = { NULL, &after_searchpath.first,   true  };
 124
 125 static whitespace_info_t next_info; /* valid if had_whitespace is true */
 126 static whitespace_info_t info;
 127
 128 static inline void next_char(void);
 129 static void next_input_token(void);
 130 static void print_line_directive(const source_position_t *pos, const char *add);
 131
 132 static symbol_t *symbol_colongreater;
 133 static symbol_t *symbol_lesscolon;
 134 static symbol_t *symbol_lesspercent;
 135 static symbol_t *symbol_percentcolon;
 136 static symbol_t *symbol_percentcolonpercentcolon;
 137 static symbol_t *symbol_percentgreater;
 138
 139 static symbol_t *symbol_L;
 140 static symbol_t *symbol_U;
 141 static symbol_t *symbol_u;
 142 static symbol_t *symbol_u8;
 143
 144 static void init_symbols(void)
 145 {
 146         symbol_colongreater             = symbol_table_insert(":>");
 147         symbol_lesscolon                = symbol_table_insert("<:");
 148         symbol_lesspercent              = symbol_table_insert("<%");
 149         symbol_percentcolon             = symbol_table_insert("%:");
 150         symbol_percentcolonpercentcolon = symbol_table_insert("%:%:");
 151         symbol_percentgreater           = symbol_table_insert("%>");
 152
 153         symbol_L  = symbol_table_insert("L");
 154         symbol_U  = symbol_table_insert("U");
 155         symbol_u  = symbol_table_insert("u");
 156         symbol_u8 = symbol_table_insert("u8");
 157 }
 158
 159 void switch_pp_input(FILE *const file, char const *const filename, searchpath_entry_t *const path, bool const is_system_header)
 160 {
 161         input.file                      = file;
 162         input.input                     = input_from_stream(file, NULL);
 163         input.bufend                    = NULL;
 164         input.bufpos                    = NULL;
 165         input.output_line               = 0;
 166         input.position.input_name       = filename;
 167         input.position.lineno           = 1;
 168         input.position.is_system_header = is_system_header;
 169         input.path                      = path;
 170
 171         /* indicate that we're at a new input */
 172         print_line_directive(&input.position, input_stack != NULL ? "1" : NULL);
 173
 174         /* place a virtual '\n' so we realize we're at line begin */
 175         input.position.lineno = 0;
 176         input.c               = '\n';
 177 }
 178
 179 FILE *close_pp_input(void)
 180 {
 181         input_free(input.input);
 182
 183         FILE* const file = input.file;
 184         assert(file);
 185
 186         input.input  = NULL;
 187         input.file   = NULL;
 188         input.bufend = NULL;
 189         input.bufpos = NULL;
 190         input.c      = EOF;
 191
 192         return file;
 193 }
 194
 195 static void push_input(void)
 196 {
 197         pp_input_t *const saved_input = obstack_copy(&input_obstack, &input, sizeof(input));
 198
 199         /* adjust buffer positions */
 200         if (input.bufpos != NULL)
 201                 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
 202         if (input.bufend != NULL)
 203                 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
 204
 205         saved_input->parent = input_stack;
 206         input_stack         = saved_input;
 207         ++n_inputs;
 208 }
 209
 210 static void pop_restore_input(void)
 211 {
 212         assert(n_inputs > 0);
 213         assert(input_stack != NULL);
 214
 215         pp_input_t *saved_input = input_stack;
 216
 217         memcpy(&input, saved_input, sizeof(input));
 218         input.parent = NULL;
 219
 220         /* adjust buffer positions */
 221         if (saved_input->bufpos != NULL)
 222                 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
 223         if (saved_input->bufend != NULL)
 224                 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
 225
 226         input_stack = saved_input->parent;
 227         obstack_free(&input_obstack, saved_input);
 228         --n_inputs;
 229 }
 230
 231 /**
 232  * Prints a parse error message at the current token.
 233  *
 234  * @param msg   the error message
 235  */
 236 static void parse_error(const char *msg)
 237 {
 238         errorf(&pp_token.base.source_position,  "%s", msg);
 239 }
 240
 241 static inline void next_real_char(void)
 242 {
 243         assert(input.bufpos <= input.bufend);
 244         if (input.bufpos >= input.bufend) {
 245                 size_t const n = decode(input.input, input.buf + MAX_PUTBACK, lengthof(input.buf) - MAX_PUTBACK);
 246                 if (n == 0) {
 247                         input.c = EOF;
 248                         return;
 249                 }
 250                 input.bufpos = input.buf + MAX_PUTBACK;
 251                 input.bufend = input.bufpos + n;
 252         }
 253         input.c = *input.bufpos++;
 254         ++input.position.colno;
 255 }
 256
 257 /**
 258  * Put a character back into the buffer.
 259  *
 260  * @param pc  the character to put back
 261  */
 262 static inline void put_back(utf32 const pc)
 263 {
 264         assert(input.bufpos > input.buf);
 265         *(--input.bufpos - input.buf + input.buf) = (char) pc;
 266         --input.position.colno;
 267 }
 268
 269 #define NEWLINE \
 270         '\r': \
 271                 next_char(); \
 272                 if (input.c == '\n') { \
 273         case '\n': \
 274                         next_char(); \
 275                 } \
 276                 ++input.position.lineno; \
 277                 input.position.colno = 1; \
 278                 goto newline; \
 279                 newline // Let it look like an ordinary case label.
 280
 281 #define eat(c_type) (assert(input.c == c_type), next_char())
 282
 283 static void maybe_concat_lines(void)
 284 {
 285         eat('\\');
 286
 287         switch (input.c) {
 288         case NEWLINE:
 289                 info.whitespace_at_line_begin = 0;
 290                 return;
 291
 292         default:
 293                 break;
 294         }
 295
 296         put_back(input.c);
 297         input.c = '\\';
 298 }
 299
 300 /**
 301  * Set c to the next input character, ie.
 302  * after expanding trigraphs.
 303  */
 304 static inline void next_char(void)
 305 {
 306         next_real_char();
 307
 308         /* filter trigraphs and concatenated lines */
 309         if (UNLIKELY(input.c == '\\')) {
 310                 maybe_concat_lines();
 311                 goto end_of_next_char;
 312         }
 313
 314         if (LIKELY(input.c != '?'))
 315                 goto end_of_next_char;
 316
 317         next_real_char();
 318         if (LIKELY(input.c != '?')) {
 319                 put_back(input.c);
 320                 input.c = '?';
 321                 goto end_of_next_char;
 322         }
 323
 324         next_real_char();
 325         switch (input.c) {
 326         case '=': input.c = '#'; break;
 327         case '(': input.c = '['; break;
 328         case '/': input.c = '\\'; maybe_concat_lines(); break;
 329         case ')': input.c = ']'; break;
 330         case '\'': input.c = '^'; break;
 331         case '<': input.c = '{'; break;
 332         case '!': input.c = '|'; break;
 333         case '>': input.c = '}'; break;
 334         case '-': input.c = '~'; break;
 335         default:
 336                 put_back(input.c);
 337                 put_back('?');
 338                 input.c = '?';
 339                 break;
 340         }
 341
 342 end_of_next_char:;
 343 #ifdef DEBUG_CHARS
 344         printf("nchar '%c'\n", input.c);
 345 #endif
 346 }
 347
 348
 349
 350 /**
 351  * Returns true if the given char is a octal digit.
 352  *
 353  * @param char  the character to check
 354  */
 355 static inline bool is_octal_digit(int chr)
 356 {
 357         switch (chr) {
 358         case '0':
 359         case '1':
 360         case '2':
 361         case '3':
 362         case '4':
 363         case '5':
 364         case '6':
 365         case '7':
 366                 return true;
 367         default:
 368                 return false;
 369         }
 370 }
 371
 372 /**
 373  * Returns the value of a digit.
 374  * The only portable way to do it ...
 375  */
 376 static int digit_value(int digit)
 377 {
 378         switch (digit) {
 379         case '0': return 0;
 380         case '1': return 1;
 381         case '2': return 2;
 382         case '3': return 3;
 383         case '4': return 4;
 384         case '5': return 5;
 385         case '6': return 6;
 386         case '7': return 7;
 387         case '8': return 8;
 388         case '9': return 9;
 389         case 'a':
 390         case 'A': return 10;
 391         case 'b':
 392         case 'B': return 11;
 393         case 'c':
 394         case 'C': return 12;
 395         case 'd':
 396         case 'D': return 13;
 397         case 'e':
 398         case 'E': return 14;
 399         case 'f':
 400         case 'F': return 15;
 401         default:
 402                 panic("wrong character given");
 403         }
 404 }
 405
 406 /**
 407  * Parses an octal character sequence.
 408  *
 409  * @param first_digit  the already read first digit
 410  */
 411 static utf32 parse_octal_sequence(const utf32 first_digit)
 412 {
 413         assert(is_octal_digit(first_digit));
 414         utf32 value = digit_value(first_digit);
 415         if (!is_octal_digit(input.c)) return value;
 416         value = 8 * value + digit_value(input.c);
 417         next_char();
 418         if (!is_octal_digit(input.c)) return value;
 419         value = 8 * value + digit_value(input.c);
 420         next_char();
 421         return value;
 422
 423 }
 424
 425 /**
 426  * Parses a hex character sequence.
 427  */
 428 static utf32 parse_hex_sequence(void)
 429 {
 430         utf32 value = 0;
 431         while (isxdigit(input.c)) {
 432                 value = 16 * value + digit_value(input.c);
 433                 next_char();
 434         }
 435         return value;
 436 }
 437
 438 static bool is_universal_char_valid(utf32 const v)
 439 {
 440         /* C11 §6.4.3:2 */
 441         if (v < 0xA0U && v != 0x24 && v != 0x40 && v != 0x60)
 442                 return false;
 443         if (0xD800 <= v && v <= 0xDFFF)
 444                 return false;
 445         return true;
 446 }
 447
 448 static utf32 parse_universal_char(unsigned const n_digits)
 449 {
 450         utf32 v = 0;
 451         for (unsigned k = n_digits; k != 0; --k) {
 452                 if (isxdigit(input.c)) {
 453                         v = 16 * v + digit_value(input.c);
 454                         if (!resolve_escape_sequences)
 455                                 obstack_1grow(&symbol_obstack, input.c);
 456                         next_char();
 457                 } else {
 458                         errorf(&input.position,
 459                                "short universal character name, expected %u more digits",
 460                                    k);
 461                         break;
 462                 }
 463         }
 464         if (!is_universal_char_valid(v)) {
 465                 errorf(&input.position,
 466                        "\\%c%0*X is not a valid universal character name",
 467                        n_digits == 4 ? 'u' : 'U', (int)n_digits, v);
 468         }
 469         return v;
 470 }
 471
 472 static bool is_universal_char_valid_identifier_c99(utf32 const v)
 473 {
 474         static const utf32 single_chars[] = {
 475                 0x00AA, 0x00BA, 0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0,
 476                 0x1F59, 0x1F5B, 0x1F5D, 0x05BF, 0x09B2, 0x0A02, 0x0A5E, 0x0A74,
 477                 0x0A8D, 0x0AD0, 0x0AE0, 0x0B9C, 0x0CDE, 0x0E84, 0x0E8A, 0x0E8D,
 478                 0x0EA5, 0x0EA7, 0x0EC6, 0x0F00, 0x0F35, 0x0F37, 0x0F39, 0x0F97,
 479                 0x0FB9, 0x00B5, 0x00B7, 0x02BB, 0x037A, 0x0559, 0x093D, 0x0B3D,
 480                 0x1FBE, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128
 481         };
 482
 483         static const utf32 ranges[][2] = {
 484                 {0x00C0, 0x00D6}, {0x00D8, 0x00F6}, {0x00F8, 0x01F5}, {0x01FA, 0x0217},
 485                 {0x0250, 0x02A8}, {0x1E00, 0x1E9B}, {0x1EA0, 0x1EF9}, {0x0388, 0x038A},
 486                 {0x038E, 0x03A1}, {0x03A3, 0x03CE}, {0x03D0, 0x03D6}, {0x03E2, 0x03F3},
 487                 {0x1F00, 0x1F15}, {0x1F18, 0x1F1D}, {0x1F20, 0x1F45}, {0x1F48, 0x1F4D},
 488                 {0x1F50, 0x1F57}, {0x1F5F, 0x1F7D}, {0x1F80, 0x1FB4}, {0x1FB6, 0x1FBC},
 489                 {0x1FC2, 0x1FC4}, {0x1FC6, 0x1FCC}, {0x1FD0, 0x1FD3}, {0x1FD6, 0x1FDB},
 490                 {0x1FE0, 0x1FEC}, {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFC}, {0x0401, 0x040C},
 491                 {0x040E, 0x044F}, {0x0451, 0x045C}, {0x045E, 0x0481}, {0x0490, 0x04C4},
 492                 {0x04C7, 0x04C8}, {0x04CB, 0x04CC}, {0x04D0, 0x04EB}, {0x04EE, 0x04F5},
 493                 {0x04F8, 0x04F9}, {0x0531, 0x0556}, {0x0561, 0x0587}, {0x05B0, 0x05B9},
 494                 {0x05BB, 0x05BD}, {0x05C1, 0x05C2}, {0x05D0, 0x05EA}, {0x05F0, 0x05F2},
 495                 {0x0621, 0x063A}, {0x0640, 0x0652}, {0x0670, 0x06B7}, {0x06BA, 0x06BE},
 496                 {0x06C0, 0x06CE}, {0x06D0, 0x06DC}, {0x06E5, 0x06E8}, {0x06EA, 0x06ED},
 497                 {0x0901, 0x0903}, {0x0905, 0x0939}, {0x093E, 0x094D}, {0x0950, 0x0952},
 498                 {0x0958, 0x0963}, {0x0981, 0x0983}, {0x0985, 0x098C}, {0x098F, 0x0990},
 499                 {0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B6, 0x09B9}, {0x09BE, 0x09C4},
 500                 {0x09C7, 0x09C8}, {0x09CB, 0x09CD}, {0x09DC, 0x09DD}, {0x09DF, 0x09E3},
 501                 {0x09F0, 0x09F1}, {0x0A05, 0x0A0A}, {0x0A0F, 0x0A10}, {0x0A13, 0x0A28},
 502                 {0x0A2A, 0x0A30}, {0x0A32, 0x0A33}, {0x0A35, 0x0A36}, {0x0A38, 0x0A39},
 503                 {0x0A3E, 0x0A42}, {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A59, 0x0A5C},
 504                 {0x0A81, 0x0A83}, {0x0A85, 0x0A8B}, {0x0A8F, 0x0A91}, {0x0A93, 0x0AA8},
 505                 {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3}, {0x0AB5, 0x0AB9}, {0x0ABD, 0x0AC5},
 506                 {0x0AC7, 0x0AC9}, {0x0ACB, 0x0ACD}, {0x0B01, 0x0B03}, {0x0B05, 0x0B0C},
 507                 {0x0B0F, 0x0B10}, {0x0B13, 0x0B28}, {0x0B2A, 0x0B30}, {0x0B32, 0x0B33},
 508                 {0x0B36, 0x0B39}, {0x0B3E, 0x0B43}, {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D},
 509                 {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B61}, {0x0B82, 0x0B83}, {0x0B85, 0x0B8A},
 510                 {0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, {0x0B99, 0x0B9A}, {0x0B9E, 0x0B9F},
 511                 {0x0BA3, 0x0BA4}, {0x0BA8, 0x0BAA}, {0x0BAE, 0x0BB5}, {0x0BB7, 0x0BB9},
 512                 {0x0BBE, 0x0BC2}, {0x0BC6, 0x0BC8}, {0x0BCA, 0x0BCD}, {0x0C01, 0x0C03},
 513                 {0x0C05, 0x0C0C}, {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C33},
 514                 {0x0C35, 0x0C39}, {0x0C3E, 0x0C44}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D},
 515                 {0x0C60, 0x0C61}, {0x0C82, 0x0C83}, {0x0C85, 0x0C8C}, {0x0C8E, 0x0C90},
 516                 {0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9}, {0x0CBE, 0x0CC4},
 517                 {0x0CC6, 0x0CC8}, {0x0CCA, 0x0CCD}, {0x0CE0, 0x0CE1}, {0x0D02, 0x0D03},
 518                 {0x0D05, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D28}, {0x0D2A, 0x0D39},
 519                 {0x0D3E, 0x0D43}, {0x0D46, 0x0D48}, {0x0D4A, 0x0D4D}, {0x0D60, 0x0D61},
 520                 {0x0E01, 0x0E3A}, {0x0E40, 0x0E5B}, {0x0E81, 0x0E82}, {0x0E87, 0x0E88},
 521                 {0x0E94, 0x0E97}, {0x0E99, 0x0E9F}, {0x0EA1, 0x0EA3}, {0x0EAA, 0x0EAB},
 522                 {0x0EAD, 0x0EAE}, {0x0EB0, 0x0EB9}, {0x0EBB, 0x0EBD}, {0x0EC0, 0x0EC4},
 523                 {0x0EC8, 0x0ECD}, {0x0EDC, 0x0EDD}, {0x0F18, 0x0F19}, {0x0F3E, 0x0F47},
 524                 {0x0F49, 0x0F69}, {0x0F71, 0x0F84}, {0x0F86, 0x0F8B}, {0x0F90, 0x0F95},
 525                 {0x0F99, 0x0FAD}, {0x0FB1, 0x0FB7}, {0x10A0, 0x10C5}, {0x10D0, 0x10F6},
 526                 {0x3041, 0x3093}, {0x309B, 0x309C}, {0x30A1, 0x30F6}, {0x30FB, 0x30FC},
 527                 {0x3105, 0x312C}, {0x4E00, 0x9FA5}, {0xAC00, 0xD7A3}, {0x0660, 0x0669},
 528                 {0x06F0, 0x06F9}, {0x0966, 0x096F}, {0x09E6, 0x09EF}, {0x0A66, 0x0A6F},
 529                 {0x0AE6, 0x0AEF}, {0x0B66, 0x0B6F}, {0x0BE7, 0x0BEF}, {0x0C66, 0x0C6F},
 530                 {0x0CE6, 0x0CEF}, {0x0D66, 0x0D6F}, {0x0E50, 0x0E59}, {0x0ED0, 0x0ED9},
 531                 {0x0F20, 0x0F33}, {0x02B0, 0x02B8}, {0x02BD, 0x02C1}, {0x02D0, 0x02D1},
 532                 {0x02E0, 0x02E4}, {0x203F, 0x2040}, {0x210A, 0x2113}, {0x2118, 0x211D},
 533                 {0x212A, 0x2131}, {0x2133, 0x2138}, {0x2160, 0x2182}, {0x3005, 0x3007},
 534                 {0x3021, 0x3029},
 535         };
 536         for (size_t i = 0; i < sizeof(ranges)/sizeof(ranges[0]); ++i) {
 537                 if (ranges[i][0] <= v && v <= ranges[i][1])
 538                         return true;
 539         }
 540         for (size_t i = 0; i < sizeof(single_chars)/sizeof(single_chars[0]); ++i) {
 541                 if (v == single_chars[i])
 542                         return true;
 543         }
 544         return false;
 545 }
 546
 547 static bool is_universal_char_valid_identifier_c11(utf32 const v)
 548 {
 549         /* C11 Annex D.1 */
 550         if (                v == 0x000A8) return true;
 551         if (                v == 0x000AA) return true;
 552         if (                v == 0x000AD) return true;
 553         if (                v == 0x000AF) return true;
 554         if (0x000B2 <= v && v <= 0x000B5) return true;
 555         if (0x000B7 <= v && v <= 0x000BA) return true;
 556         if (0x000BC <= v && v <= 0x000BE) return true;
 557         if (0x000C0 <= v && v <= 0x000D6) return true;
 558         if (0x000D8 <= v && v <= 0x000F6) return true;
 559         if (0x000F8 <= v && v <= 0x000FF) return true;
 560         if (0x00100 <= v && v <= 0x0167F) return true;
 561         if (0x01681 <= v && v <= 0x0180D) return true;
 562         if (0x0180F <= v && v <= 0x01FFF) return true;
 563         if (0x0200B <= v && v <= 0x0200D) return true;
 564         if (0x0202A <= v && v <= 0x0202E) return true;
 565         if (0x0203F <= v && v <= 0x02040) return true;
 566         if (                v == 0x02054) return true;
 567         if (0x02060 <= v && v <= 0x0206F) return true;
 568         if (0x02070 <= v && v <= 0x0218F) return true;
 569         if (0x02460 <= v && v <= 0x024FF) return true;
 570         if (0x02776 <= v && v <= 0x02793) return true;
 571         if (0x02C00 <= v && v <= 0x02DFF) return true;
 572         if (0x02E80 <= v && v <= 0x02FFF) return true;
 573         if (0x03004 <= v && v <= 0x03007) return true;
 574         if (0x03021 <= v && v <= 0x0302F) return true;
 575         if (0x03031 <= v && v <= 0x0303F) return true;
 576         if (0x03040 <= v && v <= 0x0D7FF) return true;
 577         if (0x0F900 <= v && v <= 0x0FD3D) return true;
 578         if (0x0FD40 <= v && v <= 0x0FDCF) return true;
 579         if (0x0FDF0 <= v && v <= 0x0FE44) return true;
 580         if (0x0FE47 <= v && v <= 0x0FFFD) return true;
 581         if (0x10000 <= v && v <= 0x1FFFD) return true;
 582         if (0x20000 <= v && v <= 0x2FFFD) return true;
 583         if (0x30000 <= v && v <= 0x3FFFD) return true;
 584         if (0x40000 <= v && v <= 0x4FFFD) return true;
 585         if (0x50000 <= v && v <= 0x5FFFD) return true;
 586         if (0x60000 <= v && v <= 0x6FFFD) return true;
 587         if (0x70000 <= v && v <= 0x7FFFD) return true;
 588         if (0x80000 <= v && v <= 0x8FFFD) return true;
 589         if (0x90000 <= v && v <= 0x9FFFD) return true;
 590         if (0xA0000 <= v && v <= 0xAFFFD) return true;
 591         if (0xB0000 <= v && v <= 0xBFFFD) return true;
 592         if (0xC0000 <= v && v <= 0xCFFFD) return true;
 593         if (0xD0000 <= v && v <= 0xDFFFD) return true;
 594         if (0xE0000 <= v && v <= 0xEFFFD) return true;
 595         return false;
 596 }
 597
 598 static bool is_universal_char_valid_identifier(utf32 const v)
 599 {
 600         if (c_mode & _C11)
 601                 return is_universal_char_valid_identifier_c11(v);
 602         return is_universal_char_valid_identifier_c99(v);
 603 }
 604
 605 static bool is_universal_char_invalid_identifier_start(utf32 const v)
 606 {
 607         if (! (c_mode & _C11))
 608                 return false;
 609
 610         /* C11 Annex D.2 */
 611         if (0x0300 <= v && v <= 0x036F) return true;
 612         if (0x1DC0 <= v && v <= 0x1DFF) return true;
 613         if (0x20D0 <= v && v <= 0x20FF) return true;
 614         if (0xFE20 <= v && v <= 0xFE2F) return true;
 615         return false;
 616 }
 617
 618 /**
 619  * Parse an escape sequence.
 620  */
 621 static utf32 parse_escape_sequence(void)
 622 {
 623         eat('\\');
 624
 625         utf32 const ec = input.c;
 626         next_char();
 627
 628         switch (ec) {
 629         case '"':  return '"';
 630         case '\'': return '\'';
 631         case '\\': return '\\';
 632         case '?': return '\?';
 633         case 'a': return '\a';
 634         case 'b': return '\b';
 635         case 'f': return '\f';
 636         case 'n': return '\n';
 637         case 'r': return '\r';
 638         case 't': return '\t';
 639         case 'v': return '\v';
 640         case 'x':
 641                 return parse_hex_sequence();
 642         case '0':
 643         case '1':
 644         case '2':
 645         case '3':
 646         case '4':
 647         case '5':
 648         case '6':
 649         case '7':
 650                 return parse_octal_sequence(ec);
 651         case EOF:
 652                 parse_error("reached end of file while parsing escape sequence");
 653                 return EOF;
 654         /* \E is not documented, but handled, by GCC.  It is acceptable according
 655          * to §6.11.4, whereas \e is not. */
 656         case 'E':
 657         case 'e':
 658                 if (c_mode & _GNUC)
 659                         return 27;   /* hopefully 27 is ALWAYS the code for ESCAPE */
 660                 break;
 661
 662         case 'U': return parse_universal_char(8);
 663         case 'u': return parse_universal_char(4);
 664
 665         default:
 666                 break;
 667         }
 668         /* §6.4.4.4:8 footnote 64 */
 669         parse_error("unknown escape sequence");
 670         return EOF;
 671 }
 672
 673 static const char *identify_string(char *string)
 674 {
 675         const char *result = strset_insert(&stringset, string);
 676         if (result != string) {
 677                 obstack_free(&symbol_obstack, string);
 678         }
 679         return result;
 680 }
 681
 682 static string_t sym_make_string(string_encoding_t const enc)
 683 {
 684         obstack_1grow(&symbol_obstack, '\0');
 685         size_t      const len    = obstack_object_size(&symbol_obstack) - 1;
 686         char       *const string = obstack_finish(&symbol_obstack);
 687         char const *const result = identify_string(string);
 688         return (string_t){ result, len, enc };
 689 }
 690
 691 string_t make_string(char const *const string)
 692 {
 693         obstack_grow(&symbol_obstack, string, strlen(string));
 694         return sym_make_string(STRING_ENCODING_CHAR);
 695 }
 696
 697 static utf32 get_string_encoding_limit(string_encoding_t const enc)
 698 {
 699         switch (enc) {
 700         case STRING_ENCODING_CHAR:   return 0xFF;
 701         case STRING_ENCODING_CHAR16: return 0xFFFF;
 702         case STRING_ENCODING_CHAR32: return 0xFFFFFFFF;
 703         case STRING_ENCODING_UTF8:   return 0xFFFFFFFF;
 704         case STRING_ENCODING_WIDE:   return 0xFFFFFFFF; // FIXME depends on settings
 705         }
 706         panic("invalid string encoding");
 707 }
 708
 709 static void parse_string(utf32 const delimiter, token_kind_t const kind,
 710                          string_encoding_t const enc,
 711                          char const *const context)
 712 {
 713         const unsigned start_linenr = input.position.lineno;
 714
 715         eat(delimiter);
 716
 717         utf32 const limit = get_string_encoding_limit(enc);
 718         while (true) {
 719                 switch (input.c) {
 720                 case '\\': {
 721                         if (resolve_escape_sequences) {
 722                                 utf32 const tc = parse_escape_sequence();
 723                                 if (tc > limit) {
 724                                         warningf(WARN_OTHER, &pp_token.base.source_position, "escape sequence out of range");
 725                                 }
 726                                 if (enc == STRING_ENCODING_CHAR) {
 727                                         obstack_1grow(&symbol_obstack, tc);
 728                                 } else {
 729                                         obstack_grow_utf8(&symbol_obstack, tc);
 730                                 }
 731                         } else {
 732                                 obstack_1grow(&symbol_obstack, (char)input.c);
 733                                 next_char();
 734                                 obstack_1grow(&symbol_obstack, (char)input.c);
 735                                 next_char();
 736                         }
 737                         break;
 738                 }
 739
 740                 case NEWLINE:
 741                         errorf(&pp_token.base.source_position, "newline while parsing %s", context);
 742                         break;
 743
 744                 case EOF: {
 745                         source_position_t source_position;
 746                         source_position.input_name = pp_token.base.source_position.input_name;
 747                         source_position.lineno     = start_linenr;
 748                         errorf(&source_position, "EOF while parsing %s", context);
 749                         goto end_of_string;
 750                 }
 751
 752                 default:
 753                         if (input.c == delimiter) {
 754                                 next_char();
 755                                 goto end_of_string;
 756                         } else {
 757                                 obstack_grow_utf8(&symbol_obstack, input.c);
 758                                 next_char();
 759                                 break;
 760                         }
 761                 }
 762         }
 763
 764 end_of_string:
 765         pp_token.kind           = kind;
 766         pp_token.literal.string = sym_make_string(enc);
 767 }
 768
 769 static void parse_string_literal(string_encoding_t const enc)
 770 {
 771         parse_string('"', T_STRING_LITERAL, enc, "string literal");
 772 }
 773
 774 static void parse_character_constant(string_encoding_t const enc)
 775 {
 776         parse_string('\'', T_CHARACTER_CONSTANT, enc, "character constant");
 777         if (pp_token.literal.string.size == 0) {
 778                 parse_error("empty character constant");
 779         }
 780 }
 781
 782 #define SYMBOL_CASES_WITHOUT_E_P \
 783              '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
 784         case 'a': \
 785         case 'b': \
 786         case 'c': \
 787         case 'd': \
 788         case 'f': \
 789         case 'g': \
 790         case 'h': \
 791         case 'i': \
 792         case 'j': \
 793         case 'k': \
 794         case 'l': \
 795         case 'm': \
 796         case 'n': \
 797         case 'o': \
 798         case 'q': \
 799         case 'r': \
 800         case 's': \
 801         case 't': \
 802         case 'u': \
 803         case 'v': \
 804         case 'w': \
 805         case 'x': \
 806         case 'y': \
 807         case 'z': \
 808         case 'A': \
 809         case 'B': \
 810         case 'C': \
 811         case 'D': \
 812         case 'F': \
 813         case 'G': \
 814         case 'H': \
 815         case 'I': \
 816         case 'J': \
 817         case 'K': \
 818         case 'L': \
 819         case 'M': \
 820         case 'N': \
 821         case 'O': \
 822         case 'Q': \
 823         case 'R': \
 824         case 'S': \
 825         case 'T': \
 826         case 'U': \
 827         case 'V': \
 828         case 'W': \
 829         case 'X': \
 830         case 'Y': \
 831         case 'Z': \
 832         case '_'
 833
 834 #define SYMBOL_CASES \
 835              SYMBOL_CASES_WITHOUT_E_P: \
 836         case 'e': \
 837         case 'p': \
 838         case 'E': \
 839         case 'P'
 840
 841 #define DIGIT_CASES \
 842              '0':  \
 843         case '1':  \
 844         case '2':  \
 845         case '3':  \
 846         case '4':  \
 847         case '5':  \
 848         case '6':  \
 849         case '7':  \
 850         case '8':  \
 851         case '9'
 852
 853 static void start_expanding(pp_definition_t *definition)
 854 {
 855         definition->parent_expansion = current_expansion;
 856         definition->expand_pos       = 0;
 857         definition->is_expanding     = true;
 858         if (definition->list_len > 0) {
 859                 definition->token_list[0].had_whitespace
 860                         = info.had_whitespace;
 861         }
 862         current_expansion = definition;
 863 }
 864
 865 static void finished_expanding(pp_definition_t *definition)
 866 {
 867         assert(definition->is_expanding);
 868         pp_definition_t *parent = definition->parent_expansion;
 869         definition->parent_expansion = NULL;
 870         definition->is_expanding     = false;
 871
 872         /* stop further expanding once we expanded a parameter used in a
 873          * sub macro-call */
 874         if (definition == argument_expanding)
 875                 argument_expanding = NULL;
 876
 877         assert(current_expansion == definition);
 878         current_expansion = parent;
 879 }
 880
 881 static void grow_string_escaped(struct obstack *obst, const string_t *string, char const *delimiter)
 882 {
 883         char const *prefix = get_string_encoding_prefix(string->encoding);
 884         obstack_printf(obst, "%s%s", prefix, delimiter);
 885         size_t      size = string->size;
 886         const char *str  = string->begin;
 887         if (resolve_escape_sequences) {
 888                 obstack_grow(obst, str, size);
 889         } else {
 890                 for (size_t i = 0; i < size; ++i) {
 891                         const char c = str[i];
 892                         if (c == '\\' || c == '"')
 893                                 obstack_1grow(obst, '\\');
 894                         obstack_1grow(obst, c);
 895                 }
 896         }
 897         obstack_printf(obst, "%s", delimiter);
 898 }
 899
 900 static void grow_token(struct obstack *obst, const token_t *token)
 901 {
 902         switch (token->kind) {
 903         case T_NUMBER:
 904                 obstack_grow(obst, token->literal.string.begin, token->literal.string.size);
 905                 break;
 906
 907         case T_STRING_LITERAL: {
 908                 char const *const delimiter = resolve_escape_sequences ? "\"" : "\\\"";
 909                 grow_string_escaped(obst, &token->literal.string, delimiter);
 910                 break;
 911         }
 912
 913         case T_CHARACTER_CONSTANT:
 914                 grow_string_escaped(obst, &token->literal.string, "'");
 915                 break;
 916
 917         case T_IDENTIFIER:
 918         default: {
 919                 const char *str = token->base.symbol->string;
 920                 size_t      len = strlen(str);
 921                 obstack_grow(obst, str, len);
 922                 break;
 923         }
 924         }
 925 }
 926
 927 static void stringify(const pp_definition_t *definition)
 928 {
 929         assert(obstack_object_size(&symbol_obstack) == 0);
 930
 931         size_t list_len = definition->list_len;
 932         for (size_t p = 0; p < list_len; ++p) {
 933                 const saved_token_t *saved = &definition->token_list[p];
 934                 if (p > 0 && saved->had_whitespace)
 935                         obstack_1grow(&symbol_obstack, ' ');
 936                 grow_token(&symbol_obstack, &saved->token);
 937         }
 938         pp_token.kind           = T_STRING_LITERAL;
 939         pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
 940 }
 941
 942 static inline void set_punctuator(token_kind_t const kind)
 943 {
 944         pp_token.kind        = kind;
 945         pp_token.base.symbol = token_symbols[kind];
 946 }
 947
 948 static inline void set_digraph(token_kind_t const kind, symbol_t *const symbol)
 949 {
 950         pp_token.kind        = kind;
 951         pp_token.base.symbol = symbol;
 952 }
 953
 954 /**
 955  * returns next final token from a preprocessor macro expansion
 956  */
 957 static bool expand_next(void)
 958 {
 959         if (current_expansion == NULL)
 960                 return false;
 961
 962 restart:;
 963         size_t pos = current_expansion->expand_pos;
 964         if (pos >= current_expansion->list_len) {
 965                 finished_expanding(current_expansion);
 966                 /* it was the outermost expansion, parse pptoken normally */
 967                 if (current_expansion == NULL) {
 968                         return false;
 969                 }
 970                 goto restart;
 971         }
 972         const saved_token_t *saved = &current_expansion->token_list[pos++];
 973         pp_token = saved->token;
 974         if (pp_token.kind == '#') {
 975                 if (pos < current_expansion->list_len) {
 976                         const saved_token_t *next = &current_expansion->token_list[pos];
 977                         if (next->token.kind == T_MACRO_PARAMETER) {
 978                                 pp_definition_t *def = next->token.macro_parameter.def;
 979                                 assert(def != NULL && def->is_parameter);
 980                                 stringify(def);
 981                                 ++pos;
 982                         }
 983                 }
 984         }
 985
 986         if (current_expansion->expand_pos > 0)
 987                 info.had_whitespace = saved->had_whitespace;
 988         current_expansion->expand_pos = pos;
 989         pp_token.base.source_position = expansion_pos;
 990
 991         return true;
 992 }
 993
 994 /**
 995  * Returns the next token kind found when continuing the current expansions
 996  * without starting new sub-expansions.
 997  */
 998 static token_kind_t peek_expansion(void)
 999 {
1000         for (pp_definition_t *e = current_expansion; e; e = e->parent_expansion) {
1001                 if (e->expand_pos < e->list_len)
1002                         return e->token_list[e->expand_pos].token.kind;
1003         }
1004         return T_EOF;
1005 }
1006
1007 static void skip_line_comment(void)
1008 {
1009         info.had_whitespace = true;
1010         while (true) {
1011                 switch (input.c) {
1012                 case EOF:
1013                         return;
1014
1015                 case '\r':
1016                 case '\n':
1017                         return;
1018
1019                 default:
1020                         next_char();
1021                         break;
1022                 }
1023         }
1024 }
1025
1026 static void skip_multiline_comment(void)
1027 {
1028         info.had_whitespace = true;
1029
1030         unsigned start_linenr = input.position.lineno;
1031         while (true) {
1032                 switch (input.c) {
1033                 case '/':
1034                         next_char();
1035                         if (input.c == '*') {
1036                                 /* TODO: nested comment, warn here */
1037                         }
1038                         break;
1039                 case '*':
1040                         next_char();
1041                         if (input.c == '/') {
1042                                 if (input.position.lineno != input.output_line)
1043                                         info.whitespace_at_line_begin = input.position.colno;
1044                                 next_char();
1045                                 return;
1046                         }
1047                         break;
1048
1049                 case NEWLINE:
1050                         break;
1051
1052                 case EOF: {
1053                         source_position_t source_position;
1054                         source_position.input_name = pp_token.base.source_position.input_name;
1055                         source_position.lineno     = start_linenr;
1056                         errorf(&source_position, "at end of file while looking for comment end");
1057                         return;
1058                 }
1059
1060                 default:
1061                         next_char();
1062                         break;
1063                 }
1064         }
1065 }
1066
1067 static bool skip_till_newline(bool stop_at_non_whitespace)
1068 {
1069         bool res = false;
1070         while (true) {
1071                 switch (input.c) {
1072                 case ' ':
1073                 case '\t':
1074                         next_char();
1075                         continue;
1076
1077                 case '/':
1078                         next_char();
1079                         if (input.c == '/') {
1080                                 next_char();
1081                                 skip_line_comment();
1082                                 continue;
1083                         } else if (input.c == '*') {
1084                                 next_char();
1085                                 skip_multiline_comment();
1086                                 continue;
1087                         } else {
1088                                 put_back(input.c);
1089                                 input.c = '/';
1090                         }
1091                         return true;
1092
1093                 case NEWLINE:
1094                         return res;
1095
1096                 default:
1097                         if (stop_at_non_whitespace)
1098                                 return false;
1099                         res = true;
1100                         next_char();
1101                         continue;
1102                 }
1103         }
1104 }
1105
1106 static void skip_whitespace(void)
1107 {
1108         while (true) {
1109                 switch (input.c) {
1110                 case ' ':
1111                 case '\t':
1112                         ++info.whitespace_at_line_begin;
1113                         info.had_whitespace = true;
1114                         next_char();
1115                         continue;
1116
1117                 case NEWLINE:
1118                         info.at_line_begin  = true;
1119                         info.had_whitespace = true;
1120                         info.whitespace_at_line_begin = 0;
1121                         continue;
1122
1123                 case '/':
1124                         next_char();
1125                         if (input.c == '/') {
1126                                 next_char();
1127                                 skip_line_comment();
1128                                 continue;
1129                         } else if (input.c == '*') {
1130                                 next_char();
1131                                 skip_multiline_comment();
1132                                 continue;
1133                         } else {
1134                                 put_back(input.c);
1135                                 input.c = '/';
1136                         }
1137                         return;
1138
1139                 default:
1140                         return;
1141                 }
1142         }
1143 }
1144
1145 static inline void eat_pp(pp_token_kind_t const kind)
1146 {
1147         assert(pp_token.base.symbol->pp_ID == kind);
1148         (void) kind;
1149         next_input_token();
1150 }
1151
1152 static inline void eat_token(token_kind_t const kind)
1153 {
1154         assert(pp_token.kind == kind);
1155         (void)kind;
1156         next_input_token();
1157 }
1158
1159 static string_encoding_t identify_encoding_prefix(symbol_t *const sym)
1160 {
1161         if (sym == symbol_L) return STRING_ENCODING_WIDE;
1162         if (c_mode & _C11) {
1163                 if (sym == symbol_U)  return STRING_ENCODING_CHAR32;
1164                 if (sym == symbol_u)  return STRING_ENCODING_CHAR16;
1165                 if (sym == symbol_u8) return STRING_ENCODING_UTF8;
1166         }
1167         return STRING_ENCODING_CHAR;
1168 }
1169
1170 static void parse_symbol(void)
1171 {
1172         assert(obstack_object_size(&symbol_obstack) == 0);
1173         while (true) {
1174                 switch (input.c) {
1175                 case DIGIT_CASES:
1176                 case SYMBOL_CASES:
1177                         obstack_1grow(&symbol_obstack, (char) input.c);
1178                         next_char();
1179                         break;
1180
1181                 case '\\':
1182                         next_char();
1183                         switch (input.c) {
1184                         {
1185                                 unsigned n;
1186                         case 'U': n = 8; goto universal;
1187                         case 'u': n = 4; goto universal;
1188 universal:
1189                                 if (!resolve_escape_sequences) {
1190                                         obstack_1grow(&symbol_obstack, '\\');
1191                                         obstack_1grow(&symbol_obstack, input.c);
1192                                 }
1193                                 next_char();
1194                                 utf32 const v = parse_universal_char(n);
1195                                 if (!is_universal_char_valid_identifier(v)) {
1196                                         if (is_universal_char_valid(v)) {
1197                                                 errorf(&input.position,
1198                                                            "universal character \\%c%0*X is not valid in an identifier",
1199                                                            n == 4 ? 'u' : 'U', (int)n, v);
1200                                         }
1201                                 } else if (obstack_object_size(&symbol_obstack) == 0 && is_universal_char_invalid_identifier_start(v)) {
1202                                         errorf(&input.position,
1203                                                    "universal character \\%c%0*X is not valid as start of an identifier",
1204                                                    n == 4 ? 'u' : 'U', (int)n, v);
1205                                 } else if (resolve_escape_sequences) {
1206                                         obstack_grow_utf8(&symbol_obstack, v);
1207                                 }
1208                                 break;
1209                         }
1210
1211                         default:
1212                                 put_back(input.c);
1213                                 input.c = '\\';
1214                                 goto end_symbol;
1215                         }
1216
1217                 default:
1218 dollar_sign:
1219                         goto end_symbol;
1220                 }
1221         }
1222
1223 end_symbol:
1224         obstack_1grow(&symbol_obstack, '\0');
1225         char *string = obstack_finish(&symbol_obstack);
1226
1227         symbol_t *symbol = symbol_table_insert(string);
1228
1229         /* Might be a prefixed string or character constant: L/U/u/u8"string". */
1230         if (input.c == '"') {
1231                 string_encoding_t const enc = identify_encoding_prefix(symbol);
1232                 if (enc != STRING_ENCODING_CHAR) {
1233                         parse_string_literal(enc);
1234                         return;
1235                 }
1236         } else if (input.c == '\'') {
1237                 string_encoding_t const enc = identify_encoding_prefix(symbol);
1238                 if (enc != STRING_ENCODING_CHAR) {
1239                         if (enc == STRING_ENCODING_UTF8) {
1240                                 errorf(&pp_token.base.source_position, "'u8' is not a valid encoding for a chracter constant");
1241                         }
1242                         parse_character_constant(enc);
1243                         return;
1244                 }
1245         }
1246
1247         pp_token.kind        = symbol->ID;
1248         pp_token.base.symbol = symbol;
1249
1250         /* we can free the memory from symbol obstack if we already had an entry in
1251          * the symbol table */
1252         if (symbol->string != string) {
1253                 obstack_free(&symbol_obstack, string);
1254         }
1255 }
1256
1257 static void parse_number(void)
1258 {
1259         obstack_1grow(&symbol_obstack, (char) input.c);
1260         next_char();
1261
1262         while (true) {
1263                 switch (input.c) {
1264                 case '.':
1265                 case DIGIT_CASES:
1266                 case SYMBOL_CASES_WITHOUT_E_P:
1267                         obstack_1grow(&symbol_obstack, (char) input.c);
1268                         next_char();
1269                         break;
1270
1271                 case 'e':
1272                 case 'p':
1273                 case 'E':
1274                 case 'P':
1275                         obstack_1grow(&symbol_obstack, (char) input.c);
1276                         next_char();
1277                         if (input.c == '+' || input.c == '-') {
1278                                 obstack_1grow(&symbol_obstack, (char) input.c);
1279                                 next_char();
1280                         }
1281                         break;
1282
1283                 default:
1284 dollar_sign:
1285                         goto end_number;
1286                 }
1287         }
1288
1289 end_number:
1290         pp_token.kind           = T_NUMBER;
1291         pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
1292 }
1293
1294 #define MAYBE_PROLOG \
1295         next_char(); \
1296         switch (input.c) {
1297
1298 #define MAYBE(ch, kind) \
1299         case ch: \
1300                 next_char(); \
1301                 set_punctuator(kind); \
1302                 return;
1303
1304 #define MAYBE_DIGRAPH(ch, kind, symbol) \
1305         case ch: \
1306                 next_char(); \
1307                 set_digraph(kind, symbol); \
1308                 return;
1309
1310 #define ELSE_CODE(code) \
1311         default: \
1312                 code \
1313         }
1314
1315 #define ELSE(kind) ELSE_CODE(set_punctuator(kind); return;)
1316
1317 /** identifies and returns the next preprocessing token contained in the
1318  * input stream. No macro expansion is performed. */
1319 static void next_input_token(void)
1320 {
1321         if (next_info.had_whitespace) {
1322                 info = next_info;
1323                 next_info.had_whitespace = false;
1324         } else {
1325                 info.at_line_begin  = false;
1326                 info.had_whitespace = false;
1327         }
1328 restart:
1329         pp_token.base.source_position = input.position;
1330         pp_token.base.symbol          = NULL;
1331
1332         switch (input.c) {
1333         case ' ':
1334         case '\t':
1335                 info.whitespace_at_line_begin++;
1336                 info.had_whitespace = true;
1337                 next_char();
1338                 goto restart;
1339
1340         case NEWLINE:
1341                 info.at_line_begin            = true;
1342                 info.had_whitespace           = true;
1343                 info.whitespace_at_line_begin = 0;
1344                 goto restart;
1345
1346         case SYMBOL_CASES:
1347                 parse_symbol();
1348                 return;
1349
1350         case DIGIT_CASES:
1351                 parse_number();
1352                 return;
1353
1354         case '"':
1355                 parse_string_literal(STRING_ENCODING_CHAR);
1356                 return;
1357
1358         case '\'':
1359                 parse_character_constant(STRING_ENCODING_CHAR);
1360                 return;
1361
1362         case '.':
1363                 MAYBE_PROLOG
1364                         case '0':
1365                         case '1':
1366                         case '2':
1367                         case '3':
1368                         case '4':
1369                         case '5':
1370                         case '6':
1371                         case '7':
1372                         case '8':
1373                         case '9':
1374                                 put_back(input.c);
1375                                 input.c = '.';
1376                                 parse_number();
1377                                 return;
1378
1379                         case '.':
1380                                 MAYBE_PROLOG
1381                                 MAYBE('.', T_DOTDOTDOT)
1382                                 ELSE_CODE(
1383                                         put_back(input.c);
1384                                         input.c = '.';
1385                                         set_punctuator('.');
1386                                         return;
1387                                 )
1388                 ELSE('.')
1389         case '&':
1390                 MAYBE_PROLOG
1391                 MAYBE('&', T_ANDAND)
1392                 MAYBE('=', T_ANDEQUAL)
1393                 ELSE('&')
1394         case '*':
1395                 MAYBE_PROLOG
1396                 MAYBE('=', T_ASTERISKEQUAL)
1397                 ELSE('*')
1398         case '+':
1399                 MAYBE_PROLOG
1400                 MAYBE('+', T_PLUSPLUS)
1401                 MAYBE('=', T_PLUSEQUAL)
1402                 ELSE('+')
1403         case '-':
1404                 MAYBE_PROLOG
1405                 MAYBE('>', T_MINUSGREATER)
1406                 MAYBE('-', T_MINUSMINUS)
1407                 MAYBE('=', T_MINUSEQUAL)
1408                 ELSE('-')
1409         case '!':
1410                 MAYBE_PROLOG
1411                 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
1412                 ELSE('!')
1413         case '/':
1414                 MAYBE_PROLOG
1415                 MAYBE('=', T_SLASHEQUAL)
1416                 case '*':
1417                         next_char();
1418                         skip_multiline_comment();
1419                         goto restart;
1420                 case '/':
1421                         next_char();
1422                         skip_line_comment();
1423                         goto restart;
1424                 ELSE('/')
1425         case '%':
1426                 MAYBE_PROLOG
1427                 MAYBE_DIGRAPH('>', '}', symbol_percentgreater)
1428                 MAYBE('=', T_PERCENTEQUAL)
1429                 case ':':
1430                         MAYBE_PROLOG
1431                         case '%':
1432                                 MAYBE_PROLOG
1433                                 MAYBE_DIGRAPH(':', T_HASHHASH, symbol_percentcolonpercentcolon)
1434                                 ELSE_CODE(
1435                                         put_back(input.c);
1436                                         input.c = '%';
1437                                         goto digraph_percentcolon;
1438                                 )
1439                         ELSE_CODE(
1440 digraph_percentcolon:
1441                                 set_digraph('#', symbol_percentcolon);
1442                                 return;
1443                         )
1444                 ELSE('%')
1445         case '<':
1446                 MAYBE_PROLOG
1447                 MAYBE_DIGRAPH(':', '[', symbol_lesscolon)
1448                 MAYBE_DIGRAPH('%', '{', symbol_lesspercent)
1449                 MAYBE('=', T_LESSEQUAL)
1450                 case '<':
1451                         MAYBE_PROLOG
1452                         MAYBE('=', T_LESSLESSEQUAL)
1453                         ELSE(T_LESSLESS)
1454                 ELSE('<')
1455         case '>':
1456                 MAYBE_PROLOG
1457                 MAYBE('=', T_GREATEREQUAL)
1458                 case '>':
1459                         MAYBE_PROLOG
1460                         MAYBE('=', T_GREATERGREATEREQUAL)
1461                         ELSE(T_GREATERGREATER)
1462                 ELSE('>')
1463         case '^':
1464                 MAYBE_PROLOG
1465                 MAYBE('=', T_CARETEQUAL)
1466                 ELSE('^')
1467         case '|':
1468                 MAYBE_PROLOG
1469                 MAYBE('=', T_PIPEEQUAL)
1470                 MAYBE('|', T_PIPEPIPE)
1471                 ELSE('|')
1472         case ':':
1473                 MAYBE_PROLOG
1474                 MAYBE_DIGRAPH('>', ']', symbol_colongreater)
1475                 case ':':
1476                         if (c_mode & _CXX) {
1477                                 next_char();
1478                                 set_punctuator(T_COLONCOLON);
1479                                 return;
1480                         }
1481                         /* FALLTHROUGH */
1482                 ELSE(':')
1483         case '=':
1484                 MAYBE_PROLOG
1485                 MAYBE('=', T_EQUALEQUAL)
1486                 ELSE('=')
1487         case '#':
1488                 MAYBE_PROLOG
1489                 MAYBE('#', T_HASHHASH)
1490                 ELSE('#')
1491
1492         case '?':
1493         case '[':
1494         case ']':
1495         case '(':
1496         case ')':
1497         case '{':
1498         case '}':
1499         case '~':
1500         case ';':
1501         case ',':
1502                 set_punctuator(input.c);
1503                 next_char();
1504                 return;
1505
1506         case EOF:
1507                 if (input_stack != NULL) {
1508                         fclose(close_pp_input());
1509                         pop_restore_input();
1510                         if (out)
1511                                 fputc('\n', out);
1512                         if (input.c == (utf32)EOF)
1513                                 --input.position.lineno;
1514                         print_line_directive(&input.position, "2");
1515                         goto restart;
1516                 } else {
1517                         info.at_line_begin = true;
1518                         set_punctuator(T_EOF);
1519                 }
1520                 return;
1521
1522         case '\\':
1523                 next_char();
1524                 int next_c = input.c;
1525                 put_back(input.c);
1526                 input.c = '\\';
1527                 if (next_c == 'U' || next_c == 'u') {
1528                         parse_symbol();
1529                         return;
1530                 }
1531                 /* FALLTHROUGH */
1532         default:
1533 dollar_sign:
1534                 if (error_on_unknown_chars) {
1535                         errorf(&pp_token.base.source_position, "unknown character '%lc' found", input.c);
1536                         next_char();
1537                         goto restart;
1538                 } else {
1539                         assert(obstack_object_size(&symbol_obstack) == 0);
1540                         obstack_grow_utf8(&symbol_obstack, input.c);
1541                         obstack_1grow(&symbol_obstack, '\0');
1542                         char     *const string = obstack_finish(&symbol_obstack);
1543                         symbol_t *const symbol = symbol_table_insert(string);
1544                         if (symbol->string != string)
1545                                 obstack_free(&symbol_obstack, string);
1546
1547                         pp_token.kind        = T_UNKNOWN_CHAR;
1548                         pp_token.base.symbol = symbol;
1549                         next_char();
1550                         return;
1551                 }
1552         }
1553 }
1554
1555 static void print_quoted_string(const char *const string)
1556 {
1557         fputc('"', out);
1558         for (const char *c = string; *c != 0; ++c) {
1559                 switch (*c) {
1560                 case '"': fputs("\\\"", out); break;
1561                 case '\\':  fputs("\\\\", out); break;
1562                 case '\a':  fputs("\\a", out); break;
1563                 case '\b':  fputs("\\b", out); break;
1564                 case '\f':  fputs("\\f", out); break;
1565                 case '\n':  fputs("\\n", out); break;
1566                 case '\r':  fputs("\\r", out); break;
1567                 case '\t':  fputs("\\t", out); break;
1568                 case '\v':  fputs("\\v", out); break;
1569                 case '\?':  fputs("\\?", out); break;
1570                 default:
1571                         if (!isprint(*c)) {
1572                                 fprintf(out, "\\%03o", (unsigned)*c);
1573                                 break;
1574                         }
1575                         fputc(*c, out);
1576                         break;
1577                 }
1578         }
1579         fputc('"', out);
1580 }
1581
1582 static void print_line_directive(const source_position_t *pos, const char *add)
1583 {
1584         if (!out)
1585                 return;
1586
1587         fprintf(out, "# %u ", pos->lineno);
1588         print_quoted_string(pos->input_name);
1589         if (add != NULL) {
1590                 fputc(' ', out);
1591                 fputs(add, out);
1592         }
1593         if (pos->is_system_header) {
1594                 fputs(" 3", out);
1595         }
1596
1597         printed_input_name = pos->input_name;
1598         input.output_line  = pos->lineno-1;
1599 }
1600
1601 static bool emit_newlines(void)
1602 {
1603         if (!out)
1604                 return true;
1605
1606         unsigned delta = pp_token.base.source_position.lineno - input.output_line;
1607         if (delta == 0)
1608                 return false;
1609
1610         if (delta >= 9) {
1611                 fputc('\n', out);
1612                 print_line_directive(&pp_token.base.source_position, NULL);
1613                 fputc('\n', out);
1614         } else {
1615                 for (unsigned i = 0; i < delta; ++i) {
1616                         fputc('\n', out);
1617                 }
1618         }
1619         input.output_line = pp_token.base.source_position.lineno;
1620
1621         unsigned whitespace = info.whitespace_at_line_begin;
1622         /* make sure there is at least 1 whitespace before a (macro-expanded)
1623          * '#' at line begin. I'm not sure why this is good, but gcc does it. */
1624         if (pp_token.kind == '#' && whitespace == 0)
1625                 ++whitespace;
1626         for (unsigned i = 0; i < whitespace; ++i)
1627                 fputc(' ', out);
1628
1629         return true;
1630 }
1631
1632 void set_preprocessor_output(FILE *output)
1633 {
1634         out = output;
1635         if (out != NULL) {
1636                 error_on_unknown_chars   = false;
1637                 resolve_escape_sequences = false;
1638         } else {
1639                 error_on_unknown_chars   = true;
1640                 resolve_escape_sequences = true;
1641         }
1642 }
1643
1644 void emit_pp_token(void)
1645 {
1646         if (!emit_newlines() &&
1647             (info.had_whitespace || tokens_would_paste(last_token, pp_token.kind)))
1648                 fputc(' ', out);
1649
1650         switch (pp_token.kind) {
1651         case T_NUMBER:
1652                 fputs(pp_token.literal.string.begin, out);
1653                 break;
1654
1655         case T_STRING_LITERAL:
1656                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1657                 fputc('"', out);
1658                 fputs(pp_token.literal.string.begin, out);
1659                 fputc('"', out);
1660                 break;
1661
1662         case T_CHARACTER_CONSTANT:
1663                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1664                 fputc('\'', out);
1665                 fputs(pp_token.literal.string.begin, out);
1666                 fputc('\'', out);
1667                 break;
1668
1669         case T_MACRO_PARAMETER:
1670                 panic("macro parameter not expanded");
1671
1672         default:
1673                 fputs(pp_token.base.symbol->string, out);
1674                 break;
1675         }
1676         last_token = pp_token.kind;
1677 }
1678
1679 static void eat_pp_directive(void)
1680 {
1681         while (!info.at_line_begin) {
1682                 next_input_token();
1683         }
1684 }
1685
1686 static bool strings_equal(const string_t *string1, const string_t *string2)
1687 {
1688         size_t size = string1->size;
1689         if (size != string2->size)
1690                 return false;
1691
1692         const char *c1 = string1->begin;
1693         const char *c2 = string2->begin;
1694         for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1695                 if (*c1 != *c2)
1696                         return false;
1697         }
1698         return true;
1699 }
1700
1701 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1702 {
1703         if (token1->kind != token2->kind)
1704                 return false;
1705
1706         switch (token1->kind) {
1707         case T_NUMBER:
1708         case T_CHARACTER_CONSTANT:
1709         case T_STRING_LITERAL:
1710                 return strings_equal(&token1->literal.string, &token2->literal.string);
1711
1712         case T_MACRO_PARAMETER:
1713                 return token1->macro_parameter.def->symbol
1714                     == token2->macro_parameter.def->symbol;
1715
1716         default:
1717                 return token1->base.symbol == token2->base.symbol;
1718         }
1719 }
1720
1721 static bool pp_definitions_equal(const pp_definition_t *definition1,
1722                                  const pp_definition_t *definition2)
1723 {
1724         if (definition1->list_len != definition2->list_len)
1725                 return false;
1726
1727         size_t               len = definition1->list_len;
1728         const saved_token_t *t1  = definition1->token_list;
1729         const saved_token_t *t2  = definition2->token_list;
1730         for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1731                 if (!pp_tokens_equal(&t1->token, &t2->token))
1732                         return false;
1733                 if (t1->had_whitespace != t2->had_whitespace)
1734                         return false;
1735         }
1736         return true;
1737 }
1738
1739 static void missing_macro_param_error(void)
1740 {
1741         errorf(&pp_token.base.source_position,
1742                "'#' is not followed by a macro parameter");
1743 }
1744
1745 static bool is_defineable_token(char const *const context)
1746 {
1747         if (info.at_line_begin) {
1748                 errorf(&pp_token.base.source_position, "unexpected end of line after %s", context);
1749         }
1750
1751         symbol_t *const symbol = pp_token.base.symbol;
1752         if (!symbol)
1753                 goto no_ident;
1754
1755         if (pp_token.kind != T_IDENTIFIER) {
1756                 switch (symbol->string[0]) {
1757                 case SYMBOL_CASES:
1758 dollar_sign:
1759                         break;
1760
1761                 default:
1762 no_ident:
1763                         errorf(&pp_token.base.source_position, "expected identifier after %s, got %K", context, &pp_token);
1764                         return false;
1765                 }
1766         }
1767
1768         /* TODO turn this into a flag in pp_def. */
1769         switch (symbol->pp_ID) {
1770         /* §6.10.8:4 */
1771         case TP_defined:
1772                 errorf(&pp_token.base.source_position, "%K cannot be used as macro name in %s", &pp_token, context);
1773                 return false;
1774
1775         default:
1776                 return true;
1777         }
1778 }
1779
1780 static void parse_define_directive(void)
1781 {
1782         eat_pp(TP_define);
1783         if (skip_mode) {
1784                 eat_pp_directive();
1785                 return;
1786         }
1787
1788         assert(obstack_object_size(&pp_obstack) == 0);
1789
1790         if (!is_defineable_token("#define"))
1791                 goto error_out;
1792         symbol_t *const symbol = pp_token.base.symbol;
1793
1794         pp_definition_t *new_definition
1795                 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1796         memset(new_definition, 0, sizeof(new_definition[0]));
1797         new_definition->symbol          = symbol;
1798         new_definition->source_position = input.position;
1799
1800         /* this is probably the only place where spaces are significant in the
1801          * lexer (except for the fact that they separate tokens). #define b(x)
1802          * is something else than #define b (x) */
1803         if (input.c == '(') {
1804                 next_input_token();
1805                 eat_token('(');
1806
1807                 while (true) {
1808                         switch (pp_token.kind) {
1809                         case T_DOTDOTDOT:
1810                                 new_definition->is_variadic = true;
1811                                 eat_token(T_DOTDOTDOT);
1812                                 if (pp_token.kind != ')') {
1813                                         errorf(&input.position,
1814                                                         "'...' not at end of macro argument list");
1815                                         goto error_out;
1816                                 }
1817                                 break;
1818
1819                         case T_IDENTIFIER: {
1820                                 pp_definition_t parameter;
1821                                 memset(&parameter, 0, sizeof(parameter));
1822                                 parameter.source_position = pp_token.base.source_position;
1823                                 parameter.symbol          = pp_token.base.symbol;
1824                                 parameter.is_parameter    = true;
1825                                 obstack_grow(&pp_obstack, &parameter, sizeof(parameter));
1826                                 eat_token(T_IDENTIFIER);
1827
1828                                 if (pp_token.kind == ',') {
1829                                         eat_token(',');
1830                                         break;
1831                                 }
1832
1833                                 if (pp_token.kind != ')') {
1834                                         errorf(&pp_token.base.source_position,
1835                                                "expected ',' or ')' after identifier, got %K",
1836                                                &pp_token);
1837                                         goto error_out;
1838                                 }
1839                                 break;
1840                         }
1841
1842                         case ')':
1843                                 eat_token(')');
1844                                 goto finish_argument_list;
1845
1846                         default:
1847                                 errorf(&pp_token.base.source_position,
1848                                        "expected identifier, '...' or ')' in #define argument list, got %K",
1849                                        &pp_token);
1850                                 goto error_out;
1851                         }
1852                 }
1853
1854         finish_argument_list:
1855                 new_definition->has_parameters = true;
1856                 size_t size = obstack_object_size(&pp_obstack);
1857                 new_definition->n_parameters
1858                         = size / sizeof(new_definition->parameters[0]);
1859                 new_definition->parameters = obstack_finish(&pp_obstack);
1860                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1861                         pp_definition_t *param    = &new_definition->parameters[i];
1862                         symbol_t        *symbol   = param->symbol;
1863                         pp_definition_t *previous = symbol->pp_definition;
1864                         if (previous != NULL
1865                             && previous->function_definition == new_definition) {
1866                                 errorf(&param->source_position,
1867                                        "duplicate macro parameter '%Y'", symbol);
1868                                 param->symbol = sym_anonymous;
1869                                 continue;
1870                         }
1871                         param->parent_expansion    = previous;
1872                         param->function_definition = new_definition;
1873                         symbol->pp_definition      = param;
1874                 }
1875         } else {
1876                 next_input_token();
1877         }
1878
1879         /* construct token list */
1880         assert(obstack_object_size(&pp_obstack) == 0);
1881         bool next_must_be_param = false;
1882         while (!info.at_line_begin) {
1883                 if (pp_token.kind == T_IDENTIFIER) {
1884                         const symbol_t  *symbol     = pp_token.base.symbol;
1885                         pp_definition_t *definition = symbol->pp_definition;
1886                         if (definition != NULL
1887                             && definition->function_definition == new_definition) {
1888                             pp_token.kind                = T_MACRO_PARAMETER;
1889                             pp_token.macro_parameter.def = definition;
1890                         }
1891                 }
1892                 if (next_must_be_param && pp_token.kind != T_MACRO_PARAMETER) {
1893                         missing_macro_param_error();
1894                 }
1895                 saved_token_t saved_token;
1896                 saved_token.token = pp_token;
1897                 saved_token.had_whitespace = info.had_whitespace;
1898                 obstack_grow(&pp_obstack, &saved_token, sizeof(saved_token));
1899                 next_must_be_param
1900                         = new_definition->has_parameters && pp_token.kind == '#';
1901                 next_input_token();
1902         }
1903         if (next_must_be_param)
1904                 missing_macro_param_error();
1905
1906         new_definition->list_len   = obstack_object_size(&pp_obstack)
1907                 / sizeof(new_definition->token_list[0]);
1908         new_definition->token_list = obstack_finish(&pp_obstack);
1909
1910         if (new_definition->has_parameters) {
1911                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1912                         pp_definition_t *param      = &new_definition->parameters[i];
1913                         symbol_t        *symbol     = param->symbol;
1914                         if (symbol == sym_anonymous)
1915                                 continue;
1916                         assert(symbol->pp_definition == param);
1917                         assert(param->function_definition == new_definition);
1918                         symbol->pp_definition   = param->parent_expansion;
1919                         param->parent_expansion = NULL;
1920                 }
1921         }
1922
1923         pp_definition_t *old_definition = symbol->pp_definition;
1924         if (old_definition != NULL) {
1925                 if (!pp_definitions_equal(old_definition, new_definition)) {
1926                         warningf(WARN_OTHER, &input.position, "multiple definition of macro '%Y' (first defined %P)", symbol, &old_definition->source_position);
1927                 } else {
1928                         /* reuse the old definition */
1929                         obstack_free(&pp_obstack, new_definition);
1930                         new_definition = old_definition;
1931                 }
1932         }
1933
1934         symbol->pp_definition = new_definition;
1935         return;
1936
1937 error_out:
1938         if (obstack_object_size(&pp_obstack) > 0) {
1939                 char *ptr = obstack_finish(&pp_obstack);
1940                 obstack_free(&pp_obstack, ptr);
1941         }
1942         eat_pp_directive();
1943 }
1944
1945 static void parse_undef_directive(void)
1946 {
1947         eat_pp(TP_undef);
1948         if (skip_mode) {
1949                 eat_pp_directive();
1950                 return;
1951         }
1952
1953         if (!is_defineable_token("#undef")) {
1954                 eat_pp_directive();
1955                 return;
1956         }
1957
1958         pp_token.base.symbol->pp_definition = NULL;
1959         next_input_token();
1960
1961         if (!info.at_line_begin) {
1962                 warningf(WARN_OTHER, &input.position, "extra tokens at end of #undef directive");
1963         }
1964         eat_pp_directive();
1965 }
1966
1967 /** behind an #include we can have the special headername lexems.
1968  * They're only allowed behind an #include so they're not recognized
1969  * by the normal next_preprocessing_token. We handle them as a special
1970  * exception here */
1971 static const char *parse_headername(bool *system_include)
1972 {
1973         if (info.at_line_begin) {
1974                 parse_error("expected headername after #include");
1975                 return NULL;
1976         }
1977
1978         /* check wether we have a "... or <... headername */
1979         source_position_t position = input.position;
1980         switch (input.c) {
1981         {
1982                 utf32 delimiter;
1983         case '<': delimiter = '>'; *system_include = true;  goto parse_name;
1984         case '"': delimiter = '"'; *system_include = false; goto parse_name;
1985 parse_name:
1986                 assert(obstack_object_size(&symbol_obstack) == 0);
1987                 next_char();
1988                 while (true) {
1989                         switch (input.c) {
1990                         case NEWLINE:
1991                         case EOF:
1992                                 {
1993                                         char *dummy = obstack_finish(&symbol_obstack);
1994                                         obstack_free(&symbol_obstack, dummy);
1995                                 }
1996                                 errorf(&pp_token.base.source_position,
1997                                        "header name without closing '%c'", (char)delimiter);
1998                                 return NULL;
1999
2000                         default:
2001                                 if (input.c == delimiter) {
2002                                         next_char();
2003                                         goto finish_headername;
2004                                 } else {
2005                                         obstack_1grow(&symbol_obstack, (char)input.c);
2006                                         next_char();
2007                                 }
2008                                 break;
2009                         }
2010                 }
2011                 /* we should never be here */
2012         }
2013
2014         default:
2015                 next_preprocessing_token();
2016                 if (info.at_line_begin) {
2017                         /* TODO: if we are already in the new line then we parsed more than
2018                          * wanted. We reuse the token, but could produce following errors
2019                          * misbehaviours... */
2020                         goto error_invalid_input;
2021                 }
2022                 if (pp_token.kind == T_STRING_LITERAL) {
2023                         *system_include = false;
2024                         return pp_token.literal.string.begin;
2025                 } else if (pp_token.kind == '<') {
2026                         *system_include = true;
2027                         assert(obstack_object_size(&pp_obstack) == 0);
2028                         while (true) {
2029                                 next_preprocessing_token();
2030                                 if (info.at_line_begin) {
2031                                         /* TODO: we shouldn't have parsed/expanded something on the
2032                                          * next line yet... */
2033                                         char *dummy = obstack_finish(&pp_obstack);
2034                                         obstack_free(&pp_obstack, dummy);
2035                                         goto error_invalid_input;
2036                                 }
2037                                 if (pp_token.kind == '>')
2038                                         break;
2039
2040                                 saved_token_t saved;
2041                                 saved.token          = pp_token;
2042                                 saved.had_whitespace = info.had_whitespace;
2043                                 obstack_grow(&pp_obstack, &saved, sizeof(saved));
2044                         }
2045                         size_t size = obstack_object_size(&pp_obstack);
2046                         assert(size % sizeof(saved_token_t) == 0);
2047                         size_t n_tokens = size / sizeof(saved_token_t);
2048                         saved_token_t *tokens = obstack_finish(&pp_obstack);
2049                         assert(obstack_object_size(&symbol_obstack) == 0);
2050                         for (size_t i = 0; i < n_tokens; ++i) {
2051                                 const saved_token_t *saved = &tokens[i];
2052                                 if (i > 0 && saved->had_whitespace)
2053                                         obstack_1grow(&symbol_obstack, ' ');
2054                                 grow_token(&symbol_obstack, &saved->token);
2055                         }
2056                         obstack_free(&pp_obstack, tokens);
2057                         goto finish_headername;
2058                 } else {
2059 error_invalid_input:
2060                         {
2061                                 char *dummy = obstack_finish(&symbol_obstack);
2062                                 obstack_free(&symbol_obstack, dummy);
2063                         }
2064
2065                         errorf(&pp_token.base.source_position,
2066                                "expected \"FILENAME\" or <FILENAME> after #include");
2067                         return NULL;
2068                 }
2069         }
2070
2071 finish_headername:
2072         obstack_1grow(&symbol_obstack, '\0');
2073         char *const  headername = obstack_finish(&symbol_obstack);
2074         const char  *identified = identify_string(headername);
2075         pp_token.base.source_position = position;
2076         return identified;
2077 }
2078
2079 static bool do_include(bool const bracket_include, bool const include_next, char const *const headername)
2080 {
2081         size_t const        headername_len = strlen(headername);
2082         searchpath_entry_t *entry;
2083         if (include_next) {
2084                 entry = input.path      ? input.path->next
2085                       : bracket_include ? bracket_searchpath.first
2086                       : quote_searchpath.first;
2087         } else {
2088                 if (!bracket_include) {
2089                         /* put dirname of current input on obstack */
2090                         const char *filename   = input.position.input_name;
2091                         const char *last_slash = strrchr(filename, '/');
2092                         const char *full_name;
2093                         if (last_slash != NULL) {
2094                                 size_t len = last_slash - filename;
2095                                 obstack_grow(&symbol_obstack, filename, len + 1);
2096                                 obstack_grow0(&symbol_obstack, headername, headername_len);
2097                                 char *complete_path = obstack_finish(&symbol_obstack);
2098                                 full_name = identify_string(complete_path);
2099                         } else {
2100                                 full_name = headername;
2101                         }
2102
2103                         FILE *file = fopen(full_name, "r");
2104                         if (file != NULL) {
2105                                 switch_pp_input(file, full_name, NULL, false);
2106                                 return true;
2107                         }
2108                         entry = quote_searchpath.first;
2109                 } else {
2110                         entry = bracket_searchpath.first;
2111                 }
2112         }
2113
2114         assert(obstack_object_size(&symbol_obstack) == 0);
2115         /* check searchpath */
2116         for (; entry; entry = entry->next) {
2117             const char *path = entry->path;
2118             size_t      len  = strlen(path);
2119                 obstack_grow(&symbol_obstack, path, len);
2120                 if (path[len-1] != '/')
2121                         obstack_1grow(&symbol_obstack, '/');
2122                 obstack_grow(&symbol_obstack, headername, headername_len+1);
2123
2124                 char *complete_path = obstack_finish(&symbol_obstack);
2125                 FILE *file          = fopen(complete_path, "r");
2126                 if (file != NULL) {
2127                         const char *filename = identify_string(complete_path);
2128                         switch_pp_input(file, filename, entry, entry->is_system_path);
2129                         return true;
2130                 } else {
2131                         obstack_free(&symbol_obstack, complete_path);
2132                 }
2133         }
2134
2135         return false;
2136 }
2137
2138 static void parse_include_directive(bool const include_next)
2139 {
2140         if (skip_mode) {
2141                 eat_pp_directive();
2142                 return;
2143         }
2144
2145         /* do not eat the TP_include, since it would already parse the next token
2146          * which needs special handling here. */
2147         skip_till_newline(true);
2148         bool system_include;
2149         const char *headername = parse_headername(&system_include);
2150         if (headername == NULL) {
2151                 eat_pp_directive();
2152                 return;
2153         }
2154
2155         bool had_nonwhitespace = skip_till_newline(false);
2156         if (had_nonwhitespace) {
2157                 warningf(WARN_OTHER, &input.position,
2158                          "extra tokens at end of #include directive");
2159         }
2160
2161         if (n_inputs > INCLUDE_LIMIT) {
2162                 errorf(&pp_token.base.source_position, "#include nested too deeply");
2163                 /* eat \n or EOF */
2164                 next_input_token();
2165                 return;
2166         }
2167
2168         /* switch inputs */
2169         info.whitespace_at_line_begin = 0;
2170         info.had_whitespace           = false;
2171         info.at_line_begin            = true;
2172         emit_newlines();
2173         push_input();
2174         bool res = do_include(system_include, include_next, headername);
2175         if (res) {
2176                 next_input_token();
2177         } else {
2178                 errorf(&pp_token.base.source_position, "failed including '%s': %s", headername, strerror(errno));
2179                 pop_restore_input();
2180         }
2181 }
2182
2183 static pp_conditional_t *push_conditional(void)
2184 {
2185         pp_conditional_t *conditional
2186                 = obstack_alloc(&pp_obstack, sizeof(*conditional));
2187         memset(conditional, 0, sizeof(*conditional));
2188
2189         conditional->parent = conditional_stack;
2190         conditional_stack   = conditional;
2191
2192         return conditional;
2193 }
2194
2195 static void pop_conditional(void)
2196 {
2197         assert(conditional_stack != NULL);
2198         conditional_stack = conditional_stack->parent;
2199 }
2200
2201 void check_unclosed_conditionals(void)
2202 {
2203         while (conditional_stack != NULL) {
2204                 pp_conditional_t *conditional = conditional_stack;
2205
2206                 if (conditional->in_else) {
2207                         errorf(&conditional->source_position, "unterminated #else");
2208                 } else {
2209                         errorf(&conditional->source_position, "unterminated condition");
2210                 }
2211                 pop_conditional();
2212         }
2213 }
2214
2215 static void parse_ifdef_ifndef_directive(bool const is_ifdef)
2216 {
2217         bool condition;
2218         eat_pp(is_ifdef ? TP_ifdef : TP_ifndef);
2219
2220         if (skip_mode) {
2221                 eat_pp_directive();
2222                 pp_conditional_t *conditional = push_conditional();
2223                 conditional->source_position  = pp_token.base.source_position;
2224                 conditional->skip             = true;
2225                 return;
2226         }
2227
2228         if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
2229                 errorf(&pp_token.base.source_position,
2230                        "expected identifier after #%s, got %K",
2231                        is_ifdef ? "ifdef" : "ifndef", &pp_token);
2232                 eat_pp_directive();
2233
2234                 /* just take the true case in the hope to avoid further errors */
2235                 condition = true;
2236         } else {
2237                 /* evaluate wether we are in true or false case */
2238                 condition = (bool)pp_token.base.symbol->pp_definition == is_ifdef;
2239                 eat_token(T_IDENTIFIER);
2240
2241                 if (!info.at_line_begin) {
2242                         errorf(&pp_token.base.source_position,
2243                                "extra tokens at end of #%s",
2244                                is_ifdef ? "ifdef" : "ifndef");
2245                         eat_pp_directive();
2246                 }
2247         }
2248
2249         pp_conditional_t *conditional = push_conditional();
2250         conditional->source_position  = pp_token.base.source_position;
2251         conditional->condition        = condition;
2252
2253         if (!condition) {
2254                 skip_mode = true;
2255         }
2256 }
2257
2258 static void parse_else_directive(void)
2259 {
2260         eat_pp(TP_else);
2261
2262         if (!info.at_line_begin) {
2263                 if (!skip_mode) {
2264                         warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #else");
2265                 }
2266                 eat_pp_directive();
2267         }
2268
2269         pp_conditional_t *conditional = conditional_stack;
2270         if (conditional == NULL) {
2271                 errorf(&pp_token.base.source_position, "#else without prior #if");
2272                 return;
2273         }
2274
2275         if (conditional->in_else) {
2276                 errorf(&pp_token.base.source_position,
2277                        "#else after #else (condition started %P)",
2278                        &conditional->source_position);
2279                 skip_mode = true;
2280                 return;
2281         }
2282
2283         conditional->in_else = true;
2284         if (!conditional->skip) {
2285                 skip_mode = conditional->condition;
2286         }
2287         conditional->source_position = pp_token.base.source_position;
2288 }
2289
2290 static void parse_endif_directive(void)
2291 {
2292         eat_pp(TP_endif);
2293
2294         if (!info.at_line_begin) {
2295                 if (!skip_mode) {
2296                         warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #endif");
2297                 }
2298                 eat_pp_directive();
2299         }
2300
2301         pp_conditional_t *conditional = conditional_stack;
2302         if (conditional == NULL) {
2303                 errorf(&pp_token.base.source_position, "#endif without prior #if");
2304                 return;
2305         }
2306
2307         if (!conditional->skip) {
2308                 skip_mode = false;
2309         }
2310         pop_conditional();
2311 }
2312
2313 typedef enum stdc_pragma_kind_t {
2314         STDC_UNKNOWN,
2315         STDC_FP_CONTRACT,
2316         STDC_FENV_ACCESS,
2317         STDC_CX_LIMITED_RANGE
2318 } stdc_pragma_kind_t;
2319
2320 typedef enum stdc_pragma_value_kind_t {
2321         STDC_VALUE_UNKNOWN,
2322         STDC_VALUE_ON,
2323         STDC_VALUE_OFF,
2324         STDC_VALUE_DEFAULT
2325 } stdc_pragma_value_kind_t;
2326
2327 static void parse_pragma_directive(void)
2328 {
2329         eat_pp(TP_pragma);
2330         if (skip_mode) {
2331                 eat_pp_directive();
2332                 return;
2333         }
2334
2335         if (pp_token.kind != T_IDENTIFIER) {
2336                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2337                          "expected identifier after #pragma");
2338                 eat_pp_directive();
2339                 return;
2340         }
2341
2342         stdc_pragma_kind_t kind = STDC_UNKNOWN;
2343         if (pp_token.base.symbol->pp_ID == TP_STDC && c_mode & _C99) {
2344                 /* a STDC pragma */
2345                 next_input_token();
2346
2347                 switch (pp_token.base.symbol->pp_ID) {
2348                 case TP_FP_CONTRACT:      kind = STDC_FP_CONTRACT;      break;
2349                 case TP_FENV_ACCESS:      kind = STDC_FENV_ACCESS;      break;
2350                 case TP_CX_LIMITED_RANGE: kind = STDC_CX_LIMITED_RANGE; break;
2351                 default:                  break;
2352                 }
2353                 if (kind != STDC_UNKNOWN) {
2354                         next_input_token();
2355                         stdc_pragma_value_kind_t value;
2356                         switch (pp_token.base.symbol->pp_ID) {
2357                         case TP_ON:      value = STDC_VALUE_ON;      break;
2358                         case TP_OFF:     value = STDC_VALUE_OFF;     break;
2359                         case TP_DEFAULT: value = STDC_VALUE_DEFAULT; break;
2360                         default:         value = STDC_VALUE_UNKNOWN; break;
2361                         }
2362                         if (value == STDC_VALUE_UNKNOWN) {
2363                                 kind = STDC_UNKNOWN;
2364                                 errorf(&pp_token.base.source_position, "bad STDC pragma argument");
2365                         }
2366                 }
2367         }
2368         eat_pp_directive();
2369         if (kind == STDC_UNKNOWN) {
2370                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2371                          "encountered unknown #pragma");
2372         }
2373 }
2374
2375 static void parse_line_directive(void)
2376 {
2377         if (pp_token.kind != T_NUMBER) {
2378                 if (!skip_mode)
2379                         parse_error("expected integer");
2380         } else {
2381                 char      *end;
2382                 long const line = strtol(pp_token.literal.string.begin, &end, 0);
2383                 if (*end == '\0') {
2384                         /* use offset -1 as this is about the next line */
2385                         input.position.lineno = line - 1;
2386                         /* force output of line */
2387                         input.output_line = input.position.lineno - 20;
2388                 } else {
2389                         if (!skip_mode) {
2390                                 errorf(&input.position, "'%S' is not a valid line number",
2391                                            &pp_token.literal.string);
2392                         }
2393                 }
2394                 next_input_token();
2395                 if (info.at_line_begin)
2396                         return;
2397         }
2398         if (pp_token.kind == T_STRING_LITERAL
2399             && pp_token.literal.string.encoding == STRING_ENCODING_CHAR) {
2400                 input.position.input_name       = pp_token.literal.string.begin;
2401                 input.position.is_system_header = false;
2402                 next_input_token();
2403
2404                 /* attempt to parse numeric flags as outputted by gcc preprocessor */
2405                 while (!info.at_line_begin && pp_token.kind == T_NUMBER) {
2406                         /* flags:
2407                          * 1 - indicates start of a new file
2408                          * 2 - indicates return from a file
2409                          * 3 - indicates system header
2410                          * 4 - indicates implicit extern "C" in C++ mode
2411                          *
2412                          * currently we're only interested in "3"
2413                          */
2414                         if (streq(pp_token.literal.string.begin, "3")) {
2415                                 input.position.is_system_header = true;
2416                         }
2417                         next_input_token();
2418                 }
2419         }
2420
2421         eat_pp_directive();
2422 }
2423
2424 static void parse_error_directive(void)
2425 {
2426         if (skip_mode) {
2427                 eat_pp_directive();
2428                 return;
2429         }
2430
2431         bool const old_resolve_escape_sequences = resolve_escape_sequences;
2432         resolve_escape_sequences = false;
2433
2434         source_position_t const pos = pp_token.base.source_position;
2435         do {
2436                 if (info.had_whitespace && obstack_object_size(&pp_obstack) != 0)
2437                         obstack_1grow(&pp_obstack, ' ');
2438
2439                 switch (pp_token.kind) {
2440                 case T_NUMBER: {
2441                         string_t const *const str = &pp_token.literal.string;
2442                         obstack_grow(&pp_obstack, str->begin, str->size);
2443                         break;
2444                 }
2445
2446                 {
2447                         char delim;
2448                 case T_STRING_LITERAL:     delim =  '"'; goto string;
2449                 case T_CHARACTER_CONSTANT: delim = '\''; goto string;
2450 string:;
2451                         string_t const *const str = &pp_token.literal.string;
2452                         char     const *const enc = get_string_encoding_prefix(str->encoding);
2453                         obstack_printf(&pp_obstack, "%s%c%s%c", enc, delim, str->begin, delim);
2454                         break;
2455                 }
2456
2457                 default: {
2458                         char const *const str = pp_token.base.symbol->string;
2459                         obstack_grow(&pp_obstack, str, strlen(str));
2460                         break;
2461                 }
2462                 }
2463
2464                 next_input_token();
2465         } while (!info.at_line_begin);
2466
2467         resolve_escape_sequences = old_resolve_escape_sequences;
2468
2469         obstack_1grow(&pp_obstack, '\0');
2470         char *const str = obstack_finish(&pp_obstack);
2471         errorf(&pos, "#%s", str);
2472         obstack_free(&pp_obstack, str);
2473 }
2474
2475 static void parse_preprocessing_directive(void)
2476 {
2477         eat_token('#');
2478
2479         if (info.at_line_begin) {
2480                 /* empty directive */
2481                 return;
2482         }
2483
2484         if (pp_token.base.symbol) {
2485                 switch (pp_token.base.symbol->pp_ID) {
2486                 case TP_define:       parse_define_directive();            break;
2487                 case TP_else:         parse_else_directive();              break;
2488                 case TP_endif:        parse_endif_directive();             break;
2489                 case TP_error:        parse_error_directive();             break;
2490                 case TP_ifdef:        parse_ifdef_ifndef_directive(true);  break;
2491                 case TP_ifndef:       parse_ifdef_ifndef_directive(false); break;
2492                 case TP_include:      parse_include_directive(false);      break;
2493                 case TP_include_next: parse_include_directive(true);       break;
2494                 case TP_line:         next_input_token(); goto line_directive;
2495                 case TP_pragma:       parse_pragma_directive();            break;
2496                 case TP_undef:        parse_undef_directive();             break;
2497                 default:              goto skip;
2498                 }
2499         } else if (pp_token.kind == T_NUMBER) {
2500 line_directive:
2501                 parse_line_directive();
2502         } else {
2503 skip:
2504                 if (!skip_mode) {
2505                         errorf(&pp_token.base.source_position, "invalid preprocessing directive #%K", &pp_token);
2506                 }
2507                 eat_pp_directive();
2508         }
2509
2510         assert(info.at_line_begin);
2511 }
2512
2513 static void finish_current_argument(void)
2514 {
2515         if (current_argument == NULL)
2516                 return;
2517         size_t size = obstack_object_size(&pp_obstack);
2518         current_argument->list_len   = size/sizeof(current_argument->token_list[0]);
2519         current_argument->token_list = obstack_finish(&pp_obstack);
2520 }
2521
2522 void next_preprocessing_token(void)
2523 {
2524 restart:
2525         if (!expand_next()) {
2526                 do {
2527                         next_input_token();
2528                         while (pp_token.kind == '#' && info.at_line_begin) {
2529                                 parse_preprocessing_directive();
2530                         }
2531                 } while (skip_mode && pp_token.kind != T_EOF);
2532         }
2533
2534         const token_kind_t kind = pp_token.kind;
2535         if (current_call == NULL || argument_expanding != NULL) {
2536                 symbol_t *const symbol = pp_token.base.symbol;
2537                 if (symbol) {
2538                         if (kind == T_MACRO_PARAMETER) {
2539                                 assert(current_expansion != NULL);
2540                                 start_expanding(pp_token.macro_parameter.def);
2541                                 goto restart;
2542                         }
2543
2544                         pp_definition_t *const pp_definition = symbol->pp_definition;
2545                         if (pp_definition != NULL && !pp_definition->is_expanding) {
2546                                 if (pp_definition->has_parameters) {
2547
2548                                         /* check if next token is a '(' */
2549                                         whitespace_info_t old_info   = info;
2550                                         token_kind_t      next_token = peek_expansion();
2551                                         if (next_token == T_EOF) {
2552                                                 info.at_line_begin  = false;
2553                                                 info.had_whitespace = false;
2554                                                 skip_whitespace();
2555                                                 if (input.c == '(') {
2556                                                         next_token = '(';
2557                                                 }
2558                                         }
2559
2560                                         if (next_token == '(') {
2561                                                 if (current_expansion == NULL)
2562                                                         expansion_pos = pp_token.base.source_position;
2563                                                 next_preprocessing_token();
2564                                                 assert(pp_token.kind == '(');
2565
2566                                                 pp_definition->parent_expansion = current_expansion;
2567                                                 current_call              = pp_definition;
2568                                                 current_call->expand_pos  = 0;
2569                                                 current_call->expand_info = old_info;
2570                                                 if (current_call->n_parameters > 0) {
2571                                                         current_argument = &current_call->parameters[0];
2572                                                         assert(argument_brace_count == 0);
2573                                                 }
2574                                                 goto restart;
2575                                         } else {
2576                                                 /* skip_whitespaces() skipped newlines and whitespace,
2577                                                  * remember results for next token */
2578                                                 next_info = info;
2579                                                 info      = old_info;
2580                                                 return;
2581                                         }
2582                                 } else {
2583                                         if (current_expansion == NULL)
2584                                                 expansion_pos = pp_token.base.source_position;
2585                                         start_expanding(pp_definition);
2586                                         goto restart;
2587                                 }
2588                         }
2589                 }
2590         }
2591
2592         if (current_call != NULL) {
2593                 /* current_call != NULL */
2594                 if (kind == '(') {
2595                         ++argument_brace_count;
2596                 } else if (kind == ')') {
2597                         if (argument_brace_count > 0) {
2598                                 --argument_brace_count;
2599                         } else {
2600                                 finish_current_argument();
2601                                 assert(kind == ')');
2602                                 start_expanding(current_call);
2603                                 info = current_call->expand_info;
2604                                 current_call     = NULL;
2605                                 current_argument = NULL;
2606                                 goto restart;
2607                         }
2608                 } else if (kind == ',' && argument_brace_count == 0) {
2609                         finish_current_argument();
2610                         current_call->expand_pos++;
2611                         if (current_call->expand_pos >= current_call->n_parameters) {
2612                                 errorf(&pp_token.base.source_position,
2613                                            "too many arguments passed for macro '%Y'",
2614                                            current_call->symbol);
2615                                 current_argument = NULL;
2616                         } else {
2617                                 current_argument
2618                                         = &current_call->parameters[current_call->expand_pos];
2619                         }
2620                         goto restart;
2621                 } else if (kind == T_MACRO_PARAMETER) {
2622                         /* parameters have to be fully expanded before being used as
2623                          * parameters for another macro-call */
2624                         assert(current_expansion != NULL);
2625                         pp_definition_t *argument = pp_token.macro_parameter.def;
2626                         argument_expanding = argument;
2627                         start_expanding(argument);
2628                         goto restart;
2629                 } else if (kind == T_EOF) {
2630                         errorf(&expansion_pos,
2631                                "reached end of file while parsing arguments for '%Y'",
2632                                current_call->symbol);
2633                         return;
2634                 }
2635                 if (current_argument != NULL) {
2636                         saved_token_t saved;
2637                         saved.token = pp_token;
2638                         saved.had_whitespace = info.had_whitespace;
2639                         obstack_grow(&pp_obstack, &saved, sizeof(saved));
2640                 }
2641                 goto restart;
2642         }
2643 }
2644
2645 void append_include_path(searchpath_t *paths, const char *path)
2646 {
2647         searchpath_entry_t *entry = OALLOCZ(&config_obstack, searchpath_entry_t);
2648         entry->path           = path;
2649         entry->is_system_path = paths->is_system_path;
2650
2651         *paths->anchor = entry;
2652         paths->anchor  = &entry->next;
2653 }
2654
2655 static void append_env_paths(searchpath_t *paths, const char *envvar)
2656 {
2657         const char *val = getenv(envvar);
2658         if (val != NULL && *val != '\0') {
2659                 const char *begin = val;
2660                 const char *c;
2661                 do {
2662                         c = begin;
2663                         while (*c != '\0' && *c != ':')
2664                                 ++c;
2665
2666                         size_t len = c-begin;
2667                         if (len == 0) {
2668                                 /* use "." for gcc compatibility (Matze: I would expect that
2669                                  * nothing happens for an empty entry...) */
2670                                 append_include_path(paths, ".");
2671                         } else {
2672                                 char *const string = obstack_copy0(&config_obstack, begin, len);
2673                                 append_include_path(paths, string);
2674                         }
2675
2676                         begin = c+1;
2677                         /* skip : */
2678                         if (*begin == ':')
2679                                 ++begin;
2680                 } while(*c != '\0');
2681         }
2682 }
2683
2684 static void append_searchpath(searchpath_t *path, const searchpath_t *append)
2685 {
2686         *path->anchor = append->first;
2687 }
2688
2689 static void setup_include_path(void)
2690 {
2691         /* built-in paths */
2692         append_include_path(&system_searchpath, "/usr/include");
2693
2694         /* parse environment variable */
2695         append_env_paths(&bracket_searchpath, "CPATH");
2696         append_env_paths(&system_searchpath,
2697                          c_mode & _CXX ? "CPLUS_INCLUDE_PATH" : "C_INCLUDE_PATH");
2698
2699         /* append system search path to bracket searchpath */
2700         append_searchpath(&system_searchpath,  &after_searchpath);
2701         append_searchpath(&bracket_searchpath, &system_searchpath);
2702         append_searchpath(&quote_searchpath, &bracket_searchpath);
2703 }
2704
2705 static void input_error(unsigned const delta_lines, unsigned const delta_cols, char const *const message)
2706 {
2707         source_position_t pos = pp_token.base.source_position;
2708         pos.lineno += delta_lines;
2709         pos.colno  += delta_cols;
2710         errorf(&pos, "%s", message);
2711 }
2712
2713 void init_include_paths(void)
2714 {
2715         obstack_init(&config_obstack);
2716 }
2717
2718 void init_preprocessor(void)
2719 {
2720         init_symbols();
2721
2722         obstack_init(&pp_obstack);
2723         obstack_init(&input_obstack);
2724         strset_init(&stringset);
2725
2726         setup_include_path();
2727
2728         set_input_error_callback(input_error);
2729 }
2730
2731 void exit_preprocessor(void)
2732 {
2733         obstack_free(&input_obstack, NULL);
2734         obstack_free(&pp_obstack, NULL);
2735         obstack_free(&config_obstack, NULL);
2736
2737         strset_destroy(&stringset);
2738 }
2739
2740 int pptest_main(int argc, char **argv);
2741 int pptest_main(int argc, char **argv)
2742 {
2743         init_symbol_table();
2744         init_include_paths();
2745         init_preprocessor();
2746         init_tokens();
2747
2748         error_on_unknown_chars   = false;
2749         resolve_escape_sequences = false;
2750
2751         /* simplistic commandline parser */
2752         const char *filename = NULL;
2753         const char *output = NULL;
2754         for (int i = 1; i < argc; ++i) {
2755                 const char *opt = argv[i];
2756                 if (streq(opt, "-I")) {
2757                         append_include_path(&bracket_searchpath, argv[++i]);
2758                         continue;
2759                 } else if (streq(opt, "-E")) {
2760                         /* ignore */
2761                 } else if (streq(opt, "-o")) {
2762                         output = argv[++i];
2763                         continue;
2764                 } else if (opt[0] == '-') {
2765                         fprintf(stderr, "Unknown option '%s'\n", opt);
2766                 } else {
2767                         if (filename != NULL)
2768                                 fprintf(stderr, "Multiple inputs not supported\n");
2769                         filename = argv[i];
2770                 }
2771         }
2772         if (filename == NULL) {
2773                 fprintf(stderr, "No input specified\n");
2774                 return 1;
2775         }
2776
2777         if (output == NULL) {
2778                 out = stdout;
2779         } else {
2780                 out = fopen(output, "w");
2781                 if (out == NULL) {
2782                         fprintf(stderr, "Couldn't open output '%s'\n", output);
2783                         return 1;
2784                 }
2785         }
2786
2787         /* just here for gcc compatibility */
2788         fprintf(out, "# 1 \"%s\"\n", filename);
2789         fprintf(out, "# 1 \"<built-in>\"\n");
2790         fprintf(out, "# 1 \"<command-line>\"\n");
2791
2792         FILE *file = fopen(filename, "r");
2793         if (file == NULL) {
2794                 fprintf(stderr, "Couldn't open input '%s'\n", filename);
2795                 return 1;
2796         }
2797         switch_pp_input(file, filename, NULL, false);
2798
2799         for (;;) {
2800                 next_preprocessing_token();
2801                 if (pp_token.kind == T_EOF)
2802                         break;
2803                 emit_pp_token();
2804         }
2805
2806         fputc('\n', out);
2807         check_unclosed_conditionals();
2808         fclose(close_pp_input());
2809         if (out != stdout)
2810                 fclose(out);
2811
2812         exit_tokens();
2813         exit_preprocessor();
2814         exit_symbol_table();
2815
2816         return 0;
2817 }