nsz Git - cparser/blob - preprocessor.c

   1 #include <config.h>
   2
   3 #include <assert.h>
   4 #include <errno.h>
   5 #include <string.h>
   6 #include <stdbool.h>
   7 #include <ctype.h>
   8
   9 #include "preprocessor.h"
  10 #include "token_t.h"
  11 #include "symbol_t.h"
  12 #include "adt/util.h"
  13 #include "adt/error.h"
  14 #include "adt/strutil.h"
  15 #include "adt/strset.h"
  16 #include "lang_features.h"
  17 #include "diagnostic.h"
  18 #include "string_rep.h"
  19 #include "input.h"
  20
  21 #define MAX_PUTBACK 3
  22 #define INCLUDE_LIMIT 199  /* 199 is for gcc "compatibility" */
  23
  24 typedef struct saved_token_t {
  25         token_t token;
  26         bool    had_whitespace;
  27 } saved_token_t;
  28
  29 typedef struct whitespace_info_t {
  30         /** current token had whitespace in front of it */
  31         bool     had_whitespace;
  32         /** current token is at the beginning of a line.
  33          * => a "#" at line begin starts a preprocessing directive. */
  34         bool     at_line_begin;
  35         /** number of spaces before the first token in a line */
  36         unsigned whitespace_at_line_begin;
  37 } whitespace_info_t;
  38
  39 struct pp_definition_t {
  40         symbol_t          *symbol;
  41         source_position_t  source_position;
  42         pp_definition_t   *parent_expansion;
  43         size_t             expand_pos;
  44         whitespace_info_t  expand_info;
  45         bool               is_variadic    : 1;
  46         bool               is_expanding   : 1;
  47         bool               has_parameters : 1;
  48         bool               is_parameter   : 1;
  49         pp_definition_t   *function_definition;
  50         size_t             n_parameters;
  51         pp_definition_t   *parameters;
  52
  53         /* replacement */
  54         size_t             list_len;
  55         saved_token_t     *token_list;
  56 };
  57
  58 typedef struct pp_conditional_t pp_conditional_t;
  59 struct pp_conditional_t {
  60         source_position_t  source_position;
  61         bool               condition;
  62         bool               in_else;
  63         /** conditional in skip mode (then+else gets skipped) */
  64         bool               skip;
  65         pp_conditional_t  *parent;
  66 };
  67
  68 typedef struct pp_input_t pp_input_t;
  69 struct pp_input_t {
  70         FILE               *file;
  71         input_t            *input;
  72         utf32               c;
  73         utf32               buf[1024+MAX_PUTBACK];
  74         const utf32        *bufend;
  75         const utf32        *bufpos;
  76         source_position_t   position;
  77         pp_input_t         *parent;
  78         unsigned            output_line;
  79         searchpath_entry_t *path;
  80 };
  81
  82 struct searchpath_entry_t {
  83         const char         *path;
  84         searchpath_entry_t *next;
  85         bool                is_system_path;
  86 };
  87
  88 static pp_input_t      input;
  89
  90 static pp_input_t     *input_stack;
  91 static unsigned        n_inputs;
  92 static struct obstack  input_obstack;
  93
  94 static pp_conditional_t *conditional_stack;
  95
  96 token_t                  pp_token;
  97 bool                     allow_dollar_in_symbol   = true;
  98 static bool              resolve_escape_sequences = true;
  99 static bool              error_on_unknown_chars   = true;
 100 static bool              skip_mode;
 101 static FILE             *out;
 102 static struct obstack    pp_obstack;
 103 static struct obstack    config_obstack;
 104 static const char       *printed_input_name = NULL;
 105 static source_position_t expansion_pos;
 106 static pp_definition_t  *current_expansion  = NULL;
 107 static pp_definition_t  *current_call       = NULL;
 108 static pp_definition_t  *current_argument   = NULL;
 109 static pp_definition_t  *argument_expanding = NULL;
 110 static unsigned          argument_brace_count;
 111 static strset_t          stringset;
 112 static token_kind_t      last_token;
 113
 114 struct searchpath_t {
 115         searchpath_entry_t  *first;
 116         searchpath_entry_t **anchor;
 117         bool                 is_system_path;
 118 };
 119
 120 searchpath_t bracket_searchpath = { NULL, &bracket_searchpath.first, false };
 121 searchpath_t quote_searchpath   = { NULL, &quote_searchpath.first,   false };
 122 searchpath_t system_searchpath  = { NULL, &system_searchpath.first,  true  };
 123 searchpath_t after_searchpath   = { NULL, &after_searchpath.first,   true  };
 124
 125 static whitespace_info_t next_info; /* valid if had_whitespace is true */
 126 static whitespace_info_t info;
 127
 128 static inline void next_char(void);
 129 static void next_input_token(void);
 130 static void print_line_directive(const source_position_t *pos, const char *add);
 131
 132 static symbol_t *symbol_colongreater;
 133 static symbol_t *symbol_lesscolon;
 134 static symbol_t *symbol_lesspercent;
 135 static symbol_t *symbol_percentcolon;
 136 static symbol_t *symbol_percentcolonpercentcolon;
 137 static symbol_t *symbol_percentgreater;
 138
 139 static symbol_t *symbol_L;
 140 static symbol_t *symbol_U;
 141 static symbol_t *symbol_u;
 142 static symbol_t *symbol_u8;
 143
 144 static void init_symbols(void)
 145 {
 146         symbol_colongreater             = symbol_table_insert(":>");
 147         symbol_lesscolon                = symbol_table_insert("<:");
 148         symbol_lesspercent              = symbol_table_insert("<%");
 149         symbol_percentcolon             = symbol_table_insert("%:");
 150         symbol_percentcolonpercentcolon = symbol_table_insert("%:%:");
 151         symbol_percentgreater           = symbol_table_insert("%>");
 152
 153         symbol_L  = symbol_table_insert("L");
 154         symbol_U  = symbol_table_insert("U");
 155         symbol_u  = symbol_table_insert("u");
 156         symbol_u8 = symbol_table_insert("u8");
 157 }
 158
 159 void switch_pp_input(FILE *const file, char const *const filename, searchpath_entry_t *const path, bool const is_system_header)
 160 {
 161         input.file                      = file;
 162         input.input                     = input_from_stream(file, NULL);
 163         input.bufend                    = NULL;
 164         input.bufpos                    = NULL;
 165         input.output_line               = 0;
 166         input.position.input_name       = filename;
 167         input.position.lineno           = 1;
 168         input.position.is_system_header = is_system_header;
 169         input.path                      = path;
 170
 171         /* indicate that we're at a new input */
 172         print_line_directive(&input.position, input_stack != NULL ? "1" : NULL);
 173
 174         /* place a virtual '\n' so we realize we're at line begin */
 175         input.position.lineno = 0;
 176         input.c               = '\n';
 177 }
 178
 179 FILE *close_pp_input(void)
 180 {
 181         input_free(input.input);
 182
 183         FILE* const file = input.file;
 184         assert(file);
 185
 186         input.input  = NULL;
 187         input.file   = NULL;
 188         input.bufend = NULL;
 189         input.bufpos = NULL;
 190         input.c      = EOF;
 191
 192         return file;
 193 }
 194
 195 static void push_input(void)
 196 {
 197         pp_input_t *const saved_input = obstack_copy(&input_obstack, &input, sizeof(input));
 198
 199         /* adjust buffer positions */
 200         if (input.bufpos != NULL)
 201                 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
 202         if (input.bufend != NULL)
 203                 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
 204
 205         saved_input->parent = input_stack;
 206         input_stack         = saved_input;
 207         ++n_inputs;
 208 }
 209
 210 static void pop_restore_input(void)
 211 {
 212         assert(n_inputs > 0);
 213         assert(input_stack != NULL);
 214
 215         pp_input_t *saved_input = input_stack;
 216
 217         memcpy(&input, saved_input, sizeof(input));
 218         input.parent = NULL;
 219
 220         /* adjust buffer positions */
 221         if (saved_input->bufpos != NULL)
 222                 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
 223         if (saved_input->bufend != NULL)
 224                 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
 225
 226         input_stack = saved_input->parent;
 227         obstack_free(&input_obstack, saved_input);
 228         --n_inputs;
 229 }
 230
 231 /**
 232  * Prints a parse error message at the current token.
 233  *
 234  * @param msg   the error message
 235  */
 236 static void parse_error(const char *msg)
 237 {
 238         errorf(&pp_token.base.source_position,  "%s", msg);
 239 }
 240
 241 static inline void next_real_char(void)
 242 {
 243         assert(input.bufpos <= input.bufend);
 244         if (input.bufpos >= input.bufend) {
 245                 size_t const n = decode(input.input, input.buf + MAX_PUTBACK, lengthof(input.buf) - MAX_PUTBACK);
 246                 if (n == 0) {
 247                         input.c = EOF;
 248                         return;
 249                 }
 250                 input.bufpos = input.buf + MAX_PUTBACK;
 251                 input.bufend = input.bufpos + n;
 252         }
 253         input.c = *input.bufpos++;
 254         ++input.position.colno;
 255 }
 256
 257 /**
 258  * Put a character back into the buffer.
 259  *
 260  * @param pc  the character to put back
 261  */
 262 static inline void put_back(utf32 const pc)
 263 {
 264         assert(input.bufpos > input.buf);
 265         *(--input.bufpos - input.buf + input.buf) = (char) pc;
 266         --input.position.colno;
 267 }
 268
 269 #define NEWLINE \
 270         '\r': \
 271                 next_char(); \
 272                 if (input.c == '\n') { \
 273         case '\n': \
 274                         next_char(); \
 275                 } \
 276                 ++input.position.lineno; \
 277                 input.position.colno = 1; \
 278                 goto newline; \
 279                 newline // Let it look like an ordinary case label.
 280
 281 #define eat(c_type) (assert(input.c == c_type), next_char())
 282
 283 static void maybe_concat_lines(void)
 284 {
 285         eat('\\');
 286
 287         switch (input.c) {
 288         case NEWLINE:
 289                 info.whitespace_at_line_begin = 0;
 290                 return;
 291
 292         default:
 293                 break;
 294         }
 295
 296         put_back(input.c);
 297         input.c = '\\';
 298 }
 299
 300 /**
 301  * Set c to the next input character, ie.
 302  * after expanding trigraphs.
 303  */
 304 static inline void next_char(void)
 305 {
 306         next_real_char();
 307
 308         /* filter trigraphs and concatenated lines */
 309         if (UNLIKELY(input.c == '\\')) {
 310                 maybe_concat_lines();
 311                 goto end_of_next_char;
 312         }
 313
 314         if (LIKELY(input.c != '?'))
 315                 goto end_of_next_char;
 316
 317         next_real_char();
 318         if (LIKELY(input.c != '?')) {
 319                 put_back(input.c);
 320                 input.c = '?';
 321                 goto end_of_next_char;
 322         }
 323
 324         next_real_char();
 325         switch (input.c) {
 326         case '=': input.c = '#'; break;
 327         case '(': input.c = '['; break;
 328         case '/': input.c = '\\'; maybe_concat_lines(); break;
 329         case ')': input.c = ']'; break;
 330         case '\'': input.c = '^'; break;
 331         case '<': input.c = '{'; break;
 332         case '!': input.c = '|'; break;
 333         case '>': input.c = '}'; break;
 334         case '-': input.c = '~'; break;
 335         default:
 336                 put_back(input.c);
 337                 put_back('?');
 338                 input.c = '?';
 339                 break;
 340         }
 341
 342 end_of_next_char:;
 343 #ifdef DEBUG_CHARS
 344         printf("nchar '%c'\n", input.c);
 345 #endif
 346 }
 347
 348
 349
 350 /**
 351  * Returns true if the given char is a octal digit.
 352  *
 353  * @param char  the character to check
 354  */
 355 static inline bool is_octal_digit(int chr)
 356 {
 357         switch (chr) {
 358         case '0':
 359         case '1':
 360         case '2':
 361         case '3':
 362         case '4':
 363         case '5':
 364         case '6':
 365         case '7':
 366                 return true;
 367         default:
 368                 return false;
 369         }
 370 }
 371
 372 /**
 373  * Returns the value of a digit.
 374  * The only portable way to do it ...
 375  */
 376 static int digit_value(int digit)
 377 {
 378         switch (digit) {
 379         case '0': return 0;
 380         case '1': return 1;
 381         case '2': return 2;
 382         case '3': return 3;
 383         case '4': return 4;
 384         case '5': return 5;
 385         case '6': return 6;
 386         case '7': return 7;
 387         case '8': return 8;
 388         case '9': return 9;
 389         case 'a':
 390         case 'A': return 10;
 391         case 'b':
 392         case 'B': return 11;
 393         case 'c':
 394         case 'C': return 12;
 395         case 'd':
 396         case 'D': return 13;
 397         case 'e':
 398         case 'E': return 14;
 399         case 'f':
 400         case 'F': return 15;
 401         default:
 402                 panic("wrong character given");
 403         }
 404 }
 405
 406 /**
 407  * Parses an octal character sequence.
 408  *
 409  * @param first_digit  the already read first digit
 410  */
 411 static utf32 parse_octal_sequence(const utf32 first_digit)
 412 {
 413         assert(is_octal_digit(first_digit));
 414         utf32 value = digit_value(first_digit);
 415         if (!is_octal_digit(input.c)) return value;
 416         value = 8 * value + digit_value(input.c);
 417         next_char();
 418         if (!is_octal_digit(input.c)) return value;
 419         value = 8 * value + digit_value(input.c);
 420         next_char();
 421         return value;
 422
 423 }
 424
 425 /**
 426  * Parses a hex character sequence.
 427  */
 428 static utf32 parse_hex_sequence(void)
 429 {
 430         utf32 value = 0;
 431         while (isxdigit(input.c)) {
 432                 value = 16 * value + digit_value(input.c);
 433                 next_char();
 434         }
 435         return value;
 436 }
 437
 438 static bool is_universal_char_valid(utf32 const v)
 439 {
 440         /* C11 §6.4.3:2 */
 441         if (v < 0xA0U && v != 0x24 && v != 0x40 && v != 0x60)
 442                 return false;
 443         if (0xD800 <= v && v <= 0xDFFF)
 444                 return false;
 445         return true;
 446 }
 447
 448 static utf32 parse_universal_char(unsigned const n_digits)
 449 {
 450         utf32 v = 0;
 451         for (unsigned k = n_digits; k != 0; --k) {
 452                 if (isxdigit(input.c)) {
 453                         v = 16 * v + digit_value(input.c);
 454                         if (!resolve_escape_sequences)
 455                                 obstack_1grow(&symbol_obstack, input.c);
 456                         next_char();
 457                 } else {
 458                         errorf(&input.position,
 459                                "short universal character name, expected %u more digits",
 460                                    k);
 461                         break;
 462                 }
 463         }
 464         if (!is_universal_char_valid(v)) {
 465                 errorf(&input.position,
 466                        "\\%c%0*X is not a valid universal character name",
 467                        n_digits == 4 ? 'u' : 'U', (int)n_digits, v);
 468         }
 469         return v;
 470 }
 471
 472 static bool is_universal_char_valid_identifier_c99(utf32 const v)
 473 {
 474         static const utf32 single_chars[] = {
 475                 0x00AA, 0x00BA, 0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0,
 476                 0x1F59, 0x1F5B, 0x1F5D, 0x05BF, 0x09B2, 0x0A02, 0x0A5E, 0x0A74,
 477                 0x0A8D, 0x0AD0, 0x0AE0, 0x0B9C, 0x0CDE, 0x0E84, 0x0E8A, 0x0E8D,
 478                 0x0EA5, 0x0EA7, 0x0EC6, 0x0F00, 0x0F35, 0x0F37, 0x0F39, 0x0F97,
 479                 0x0FB9, 0x00B5, 0x00B7, 0x02BB, 0x037A, 0x0559, 0x093D, 0x0B3D,
 480                 0x1FBE, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128
 481         };
 482
 483         static const utf32 ranges[][2] = {
 484                 {0x00C0, 0x00D6}, {0x00D8, 0x00F6}, {0x00F8, 0x01F5}, {0x01FA, 0x0217},
 485                 {0x0250, 0x02A8}, {0x1E00, 0x1E9B}, {0x1EA0, 0x1EF9}, {0x0388, 0x038A},
 486                 {0x038E, 0x03A1}, {0x03A3, 0x03CE}, {0x03D0, 0x03D6}, {0x03E2, 0x03F3},
 487                 {0x1F00, 0x1F15}, {0x1F18, 0x1F1D}, {0x1F20, 0x1F45}, {0x1F48, 0x1F4D},
 488                 {0x1F50, 0x1F57}, {0x1F5F, 0x1F7D}, {0x1F80, 0x1FB4}, {0x1FB6, 0x1FBC},
 489                 {0x1FC2, 0x1FC4}, {0x1FC6, 0x1FCC}, {0x1FD0, 0x1FD3}, {0x1FD6, 0x1FDB},
 490                 {0x1FE0, 0x1FEC}, {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFC}, {0x0401, 0x040C},
 491                 {0x040E, 0x044F}, {0x0451, 0x045C}, {0x045E, 0x0481}, {0x0490, 0x04C4},
 492                 {0x04C7, 0x04C8}, {0x04CB, 0x04CC}, {0x04D0, 0x04EB}, {0x04EE, 0x04F5},
 493                 {0x04F8, 0x04F9}, {0x0531, 0x0556}, {0x0561, 0x0587}, {0x05B0, 0x05B9},
 494                 {0x05BB, 0x05BD}, {0x05C1, 0x05C2}, {0x05D0, 0x05EA}, {0x05F0, 0x05F2},
 495                 {0x0621, 0x063A}, {0x0640, 0x0652}, {0x0670, 0x06B7}, {0x06BA, 0x06BE},
 496                 {0x06C0, 0x06CE}, {0x06D0, 0x06DC}, {0x06E5, 0x06E8}, {0x06EA, 0x06ED},
 497                 {0x0901, 0x0903}, {0x0905, 0x0939}, {0x093E, 0x094D}, {0x0950, 0x0952},
 498                 {0x0958, 0x0963}, {0x0981, 0x0983}, {0x0985, 0x098C}, {0x098F, 0x0990},
 499                 {0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B6, 0x09B9}, {0x09BE, 0x09C4},
 500                 {0x09C7, 0x09C8}, {0x09CB, 0x09CD}, {0x09DC, 0x09DD}, {0x09DF, 0x09E3},
 501                 {0x09F0, 0x09F1}, {0x0A05, 0x0A0A}, {0x0A0F, 0x0A10}, {0x0A13, 0x0A28},
 502                 {0x0A2A, 0x0A30}, {0x0A32, 0x0A33}, {0x0A35, 0x0A36}, {0x0A38, 0x0A39},
 503                 {0x0A3E, 0x0A42}, {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A59, 0x0A5C},
 504                 {0x0A81, 0x0A83}, {0x0A85, 0x0A8B}, {0x0A8F, 0x0A91}, {0x0A93, 0x0AA8},
 505                 {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3}, {0x0AB5, 0x0AB9}, {0x0ABD, 0x0AC5},
 506                 {0x0AC7, 0x0AC9}, {0x0ACB, 0x0ACD}, {0x0B01, 0x0B03}, {0x0B05, 0x0B0C},
 507                 {0x0B0F, 0x0B10}, {0x0B13, 0x0B28}, {0x0B2A, 0x0B30}, {0x0B32, 0x0B33},
 508                 {0x0B36, 0x0B39}, {0x0B3E, 0x0B43}, {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D},
 509                 {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B61}, {0x0B82, 0x0B83}, {0x0B85, 0x0B8A},
 510                 {0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, {0x0B99, 0x0B9A}, {0x0B9E, 0x0B9F},
 511                 {0x0BA3, 0x0BA4}, {0x0BA8, 0x0BAA}, {0x0BAE, 0x0BB5}, {0x0BB7, 0x0BB9},
 512                 {0x0BBE, 0x0BC2}, {0x0BC6, 0x0BC8}, {0x0BCA, 0x0BCD}, {0x0C01, 0x0C03},
 513                 {0x0C05, 0x0C0C}, {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C33},
 514                 {0x0C35, 0x0C39}, {0x0C3E, 0x0C44}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D},
 515                 {0x0C60, 0x0C61}, {0x0C82, 0x0C83}, {0x0C85, 0x0C8C}, {0x0C8E, 0x0C90},
 516                 {0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9}, {0x0CBE, 0x0CC4},
 517                 {0x0CC6, 0x0CC8}, {0x0CCA, 0x0CCD}, {0x0CE0, 0x0CE1}, {0x0D02, 0x0D03},
 518                 {0x0D05, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D28}, {0x0D2A, 0x0D39},
 519                 {0x0D3E, 0x0D43}, {0x0D46, 0x0D48}, {0x0D4A, 0x0D4D}, {0x0D60, 0x0D61},
 520                 {0x0E01, 0x0E3A}, {0x0E40, 0x0E5B}, {0x0E81, 0x0E82}, {0x0E87, 0x0E88},
 521                 {0x0E94, 0x0E97}, {0x0E99, 0x0E9F}, {0x0EA1, 0x0EA3}, {0x0EAA, 0x0EAB},
 522                 {0x0EAD, 0x0EAE}, {0x0EB0, 0x0EB9}, {0x0EBB, 0x0EBD}, {0x0EC0, 0x0EC4},
 523                 {0x0EC8, 0x0ECD}, {0x0EDC, 0x0EDD}, {0x0F18, 0x0F19}, {0x0F3E, 0x0F47},
 524                 {0x0F49, 0x0F69}, {0x0F71, 0x0F84}, {0x0F86, 0x0F8B}, {0x0F90, 0x0F95},
 525                 {0x0F99, 0x0FAD}, {0x0FB1, 0x0FB7}, {0x10A0, 0x10C5}, {0x10D0, 0x10F6},
 526                 {0x3041, 0x3093}, {0x309B, 0x309C}, {0x30A1, 0x30F6}, {0x30FB, 0x30FC},
 527                 {0x3105, 0x312C}, {0x4E00, 0x9FA5}, {0xAC00, 0xD7A3}, {0x0660, 0x0669},
 528                 {0x06F0, 0x06F9}, {0x0966, 0x096F}, {0x09E6, 0x09EF}, {0x0A66, 0x0A6F},
 529                 {0x0AE6, 0x0AEF}, {0x0B66, 0x0B6F}, {0x0BE7, 0x0BEF}, {0x0C66, 0x0C6F},
 530                 {0x0CE6, 0x0CEF}, {0x0D66, 0x0D6F}, {0x0E50, 0x0E59}, {0x0ED0, 0x0ED9},
 531                 {0x0F20, 0x0F33}, {0x02B0, 0x02B8}, {0x02BD, 0x02C1}, {0x02D0, 0x02D1},
 532                 {0x02E0, 0x02E4}, {0x203F, 0x2040}, {0x210A, 0x2113}, {0x2118, 0x211D},
 533                 {0x212A, 0x2131}, {0x2133, 0x2138}, {0x2160, 0x2182}, {0x3005, 0x3007},
 534                 {0x3021, 0x3029},
 535         };
 536         for (size_t i = 0; i < sizeof(ranges)/sizeof(ranges[0]); ++i) {
 537                 if (ranges[i][0] <= v && v <= ranges[i][1])
 538                         return true;
 539         }
 540         for (size_t i = 0; i < sizeof(single_chars)/sizeof(single_chars[0]); ++i) {
 541                 if (v == single_chars[i])
 542                         return true;
 543         }
 544         return false;
 545 }
 546
 547 static bool is_universal_char_valid_identifier_c11(utf32 const v)
 548 {
 549         /* C11 Annex D.1 */
 550         if (                v == 0x000A8) return true;
 551         if (                v == 0x000AA) return true;
 552         if (                v == 0x000AD) return true;
 553         if (                v == 0x000AF) return true;
 554         if (0x000B2 <= v && v <= 0x000B5) return true;
 555         if (0x000B7 <= v && v <= 0x000BA) return true;
 556         if (0x000BC <= v && v <= 0x000BE) return true;
 557         if (0x000C0 <= v && v <= 0x000D6) return true;
 558         if (0x000D8 <= v && v <= 0x000F6) return true;
 559         if (0x000F8 <= v && v <= 0x000FF) return true;
 560         if (0x00100 <= v && v <= 0x0167F) return true;
 561         if (0x01681 <= v && v <= 0x0180D) return true;
 562         if (0x0180F <= v && v <= 0x01FFF) return true;
 563         if (0x0200B <= v && v <= 0x0200D) return true;
 564         if (0x0202A <= v && v <= 0x0202E) return true;
 565         if (0x0203F <= v && v <= 0x02040) return true;
 566         if (                v == 0x02054) return true;
 567         if (0x02060 <= v && v <= 0x0206F) return true;
 568         if (0x02070 <= v && v <= 0x0218F) return true;
 569         if (0x02460 <= v && v <= 0x024FF) return true;
 570         if (0x02776 <= v && v <= 0x02793) return true;
 571         if (0x02C00 <= v && v <= 0x02DFF) return true;
 572         if (0x02E80 <= v && v <= 0x02FFF) return true;
 573         if (0x03004 <= v && v <= 0x03007) return true;
 574         if (0x03021 <= v && v <= 0x0302F) return true;
 575         if (0x03031 <= v && v <= 0x0303F) return true;
 576         if (0x03040 <= v && v <= 0x0D7FF) return true;
 577         if (0x0F900 <= v && v <= 0x0FD3D) return true;
 578         if (0x0FD40 <= v && v <= 0x0FDCF) return true;
 579         if (0x0FDF0 <= v && v <= 0x0FE44) return true;
 580         if (0x0FE47 <= v && v <= 0x0FFFD) return true;
 581         if (0x10000 <= v && v <= 0x1FFFD) return true;
 582         if (0x20000 <= v && v <= 0x2FFFD) return true;
 583         if (0x30000 <= v && v <= 0x3FFFD) return true;
 584         if (0x40000 <= v && v <= 0x4FFFD) return true;
 585         if (0x50000 <= v && v <= 0x5FFFD) return true;
 586         if (0x60000 <= v && v <= 0x6FFFD) return true;
 587         if (0x70000 <= v && v <= 0x7FFFD) return true;
 588         if (0x80000 <= v && v <= 0x8FFFD) return true;
 589         if (0x90000 <= v && v <= 0x9FFFD) return true;
 590         if (0xA0000 <= v && v <= 0xAFFFD) return true;
 591         if (0xB0000 <= v && v <= 0xBFFFD) return true;
 592         if (0xC0000 <= v && v <= 0xCFFFD) return true;
 593         if (0xD0000 <= v && v <= 0xDFFFD) return true;
 594         if (0xE0000 <= v && v <= 0xEFFFD) return true;
 595         return false;
 596 }
 597
 598 static bool is_universal_char_valid_identifier(utf32 const v)
 599 {
 600         if (c_mode & _C11)
 601                 return is_universal_char_valid_identifier_c11(v);
 602         return is_universal_char_valid_identifier_c99(v);
 603 }
 604
 605 static bool is_universal_char_invalid_identifier_start(utf32 const v)
 606 {
 607         if (! (c_mode & _C11))
 608                 return false;
 609
 610         /* C11 Annex D.2 */
 611         if (0x0300 <= v && v <= 0x036F) return true;
 612         if (0x1DC0 <= v && v <= 0x1DFF) return true;
 613         if (0x20D0 <= v && v <= 0x20FF) return true;
 614         if (0xFE20 <= v && v <= 0xFE2F) return true;
 615         return false;
 616 }
 617
 618 /**
 619  * Parse an escape sequence.
 620  */
 621 static utf32 parse_escape_sequence(void)
 622 {
 623         eat('\\');
 624
 625         utf32 const ec = input.c;
 626         next_char();
 627
 628         switch (ec) {
 629         case '"':  return '"';
 630         case '\'': return '\'';
 631         case '\\': return '\\';
 632         case '?': return '\?';
 633         case 'a': return '\a';
 634         case 'b': return '\b';
 635         case 'f': return '\f';
 636         case 'n': return '\n';
 637         case 'r': return '\r';
 638         case 't': return '\t';
 639         case 'v': return '\v';
 640         case 'x':
 641                 return parse_hex_sequence();
 642         case '0':
 643         case '1':
 644         case '2':
 645         case '3':
 646         case '4':
 647         case '5':
 648         case '6':
 649         case '7':
 650                 return parse_octal_sequence(ec);
 651         case EOF:
 652                 parse_error("reached end of file while parsing escape sequence");
 653                 return EOF;
 654         /* \E is not documented, but handled, by GCC.  It is acceptable according
 655          * to §6.11.4, whereas \e is not. */
 656         case 'E':
 657         case 'e':
 658                 if (c_mode & _GNUC)
 659                         return 27;   /* hopefully 27 is ALWAYS the code for ESCAPE */
 660                 break;
 661
 662         case 'U': return parse_universal_char(8);
 663         case 'u': return parse_universal_char(4);
 664
 665         default:
 666                 break;
 667         }
 668         /* §6.4.4.4:8 footnote 64 */
 669         parse_error("unknown escape sequence");
 670         return EOF;
 671 }
 672
 673 static const char *identify_string(char *string)
 674 {
 675         const char *result = strset_insert(&stringset, string);
 676         if (result != string) {
 677                 obstack_free(&symbol_obstack, string);
 678         }
 679         return result;
 680 }
 681
 682 static string_t sym_make_string(string_encoding_t const enc)
 683 {
 684         obstack_1grow(&symbol_obstack, '\0');
 685         size_t      const len    = obstack_object_size(&symbol_obstack) - 1;
 686         char       *const string = obstack_finish(&symbol_obstack);
 687         char const *const result = identify_string(string);
 688         return (string_t){ result, len, enc };
 689 }
 690
 691 string_t make_string(char const *const string)
 692 {
 693         obstack_grow(&symbol_obstack, string, strlen(string));
 694         return sym_make_string(STRING_ENCODING_CHAR);
 695 }
 696
 697 static utf32 get_string_encoding_limit(string_encoding_t const enc)
 698 {
 699         switch (enc) {
 700         case STRING_ENCODING_CHAR:   return 0xFF;
 701         case STRING_ENCODING_CHAR16: return 0xFFFF;
 702         case STRING_ENCODING_CHAR32: return 0xFFFFFFFF;
 703         case STRING_ENCODING_UTF8:   return 0xFFFFFFFF;
 704         case STRING_ENCODING_WIDE:   return 0xFFFFFFFF; // FIXME depends on settings
 705         }
 706         panic("invalid string encoding");
 707 }
 708
 709 static void parse_string(utf32 const delimiter, token_kind_t const kind,
 710                          string_encoding_t const enc,
 711                          char const *const context)
 712 {
 713         const unsigned start_linenr = input.position.lineno;
 714
 715         eat(delimiter);
 716
 717         utf32 const limit = get_string_encoding_limit(enc);
 718         while (true) {
 719                 switch (input.c) {
 720                 case '\\': {
 721                         if (resolve_escape_sequences) {
 722                                 utf32 const tc = parse_escape_sequence();
 723                                 if (tc > limit) {
 724                                         warningf(WARN_OTHER, &pp_token.base.source_position, "escape sequence out of range");
 725                                 }
 726                                 if (enc == STRING_ENCODING_CHAR) {
 727                                         obstack_1grow(&symbol_obstack, tc);
 728                                 } else {
 729                                         obstack_grow_utf8(&symbol_obstack, tc);
 730                                 }
 731                         } else {
 732                                 obstack_1grow(&symbol_obstack, (char)input.c);
 733                                 next_char();
 734                                 obstack_1grow(&symbol_obstack, (char)input.c);
 735                                 next_char();
 736                         }
 737                         break;
 738                 }
 739
 740                 case NEWLINE:
 741                         errorf(&pp_token.base.source_position, "newline while parsing %s", context);
 742                         break;
 743
 744                 case EOF: {
 745                         source_position_t source_position;
 746                         source_position.input_name = pp_token.base.source_position.input_name;
 747                         source_position.lineno     = start_linenr;
 748                         errorf(&source_position, "EOF while parsing %s", context);
 749                         goto end_of_string;
 750                 }
 751
 752                 default:
 753                         if (input.c == delimiter) {
 754                                 next_char();
 755                                 goto end_of_string;
 756                         } else {
 757                                 obstack_grow_utf8(&symbol_obstack, input.c);
 758                                 next_char();
 759                                 break;
 760                         }
 761                 }
 762         }
 763
 764 end_of_string:
 765         pp_token.kind           = kind;
 766         pp_token.literal.string = sym_make_string(enc);
 767 }
 768
 769 static void parse_string_literal(string_encoding_t const enc)
 770 {
 771         parse_string('"', T_STRING_LITERAL, enc, "string literal");
 772 }
 773
 774 static void parse_character_constant(string_encoding_t const enc)
 775 {
 776         parse_string('\'', T_CHARACTER_CONSTANT, enc, "character constant");
 777         if (pp_token.literal.string.size == 0) {
 778                 parse_error("empty character constant");
 779         }
 780 }
 781
 782 #define SYMBOL_CASES_WITHOUT_E_P \
 783              '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
 784         case 'a': \
 785         case 'b': \
 786         case 'c': \
 787         case 'd': \
 788         case 'f': \
 789         case 'g': \
 790         case 'h': \
 791         case 'i': \
 792         case 'j': \
 793         case 'k': \
 794         case 'l': \
 795         case 'm': \
 796         case 'n': \
 797         case 'o': \
 798         case 'q': \
 799         case 'r': \
 800         case 's': \
 801         case 't': \
 802         case 'u': \
 803         case 'v': \
 804         case 'w': \
 805         case 'x': \
 806         case 'y': \
 807         case 'z': \
 808         case 'A': \
 809         case 'B': \
 810         case 'C': \
 811         case 'D': \
 812         case 'F': \
 813         case 'G': \
 814         case 'H': \
 815         case 'I': \
 816         case 'J': \
 817         case 'K': \
 818         case 'L': \
 819         case 'M': \
 820         case 'N': \
 821         case 'O': \
 822         case 'Q': \
 823         case 'R': \
 824         case 'S': \
 825         case 'T': \
 826         case 'U': \
 827         case 'V': \
 828         case 'W': \
 829         case 'X': \
 830         case 'Y': \
 831         case 'Z': \
 832         case '_'
 833
 834 #define SYMBOL_CASES \
 835              SYMBOL_CASES_WITHOUT_E_P: \
 836         case 'e': \
 837         case 'p': \
 838         case 'E': \
 839         case 'P'
 840
 841 #define DIGIT_CASES \
 842              '0':  \
 843         case '1':  \
 844         case '2':  \
 845         case '3':  \
 846         case '4':  \
 847         case '5':  \
 848         case '6':  \
 849         case '7':  \
 850         case '8':  \
 851         case '9'
 852
 853 static void start_expanding(pp_definition_t *definition)
 854 {
 855         definition->parent_expansion = current_expansion;
 856         definition->expand_pos       = 0;
 857         definition->is_expanding     = true;
 858         if (definition->list_len > 0) {
 859                 definition->token_list[0].had_whitespace
 860                         = info.had_whitespace;
 861         }
 862         current_expansion = definition;
 863 }
 864
 865 static void finished_expanding(pp_definition_t *definition)
 866 {
 867         assert(definition->is_expanding);
 868         pp_definition_t *parent = definition->parent_expansion;
 869         definition->parent_expansion = NULL;
 870         definition->is_expanding     = false;
 871
 872         /* stop further expanding once we expanded a parameter used in a
 873          * sub macro-call */
 874         if (definition == argument_expanding)
 875                 argument_expanding = NULL;
 876
 877         assert(current_expansion == definition);
 878         current_expansion = parent;
 879 }
 880
 881 static void grow_string_escaped(struct obstack *obst, const string_t *string, char const *delimiter)
 882 {
 883         char const *prefix = get_string_encoding_prefix(string->encoding);
 884         obstack_printf(obst, "%s%s", prefix, delimiter);
 885         size_t      size = string->size;
 886         const char *str  = string->begin;
 887         if (resolve_escape_sequences) {
 888                 obstack_grow(obst, str, size);
 889         } else {
 890                 for (size_t i = 0; i < size; ++i) {
 891                         const char c = str[i];
 892                         if (c == '\\' || c == '"')
 893                                 obstack_1grow(obst, '\\');
 894                         obstack_1grow(obst, c);
 895                 }
 896         }
 897         obstack_printf(obst, "%s", delimiter);
 898 }
 899
 900 static void grow_token(struct obstack *obst, const token_t *token)
 901 {
 902         switch (token->kind) {
 903         case T_NUMBER:
 904                 obstack_grow(obst, token->literal.string.begin, token->literal.string.size);
 905                 break;
 906
 907         case T_STRING_LITERAL: {
 908                 char const *const delimiter = resolve_escape_sequences ? "\"" : "\\\"";
 909                 grow_string_escaped(obst, &token->literal.string, delimiter);
 910                 break;
 911         }
 912
 913         case T_CHARACTER_CONSTANT:
 914                 grow_string_escaped(obst, &token->literal.string, "'");
 915                 break;
 916
 917         case T_IDENTIFIER:
 918         default: {
 919                 const char *str = token->base.symbol->string;
 920                 size_t      len = strlen(str);
 921                 obstack_grow(obst, str, len);
 922                 break;
 923         }
 924         }
 925 }
 926
 927 static void stringify(const pp_definition_t *definition)
 928 {
 929         assert(obstack_object_size(&symbol_obstack) == 0);
 930
 931         size_t list_len = definition->list_len;
 932         for (size_t p = 0; p < list_len; ++p) {
 933                 const saved_token_t *saved = &definition->token_list[p];
 934                 if (p > 0 && saved->had_whitespace)
 935                         obstack_1grow(&symbol_obstack, ' ');
 936                 grow_token(&symbol_obstack, &saved->token);
 937         }
 938         pp_token.kind           = T_STRING_LITERAL;
 939         pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
 940 }
 941
 942 static inline void set_punctuator(token_kind_t const kind)
 943 {
 944         pp_token.kind        = kind;
 945         pp_token.base.symbol = token_symbols[kind];
 946 }
 947
 948 static inline void set_digraph(token_kind_t const kind, symbol_t *const symbol)
 949 {
 950         pp_token.kind        = kind;
 951         pp_token.base.symbol = symbol;
 952 }
 953
 954 /**
 955  * returns next final token from a preprocessor macro expansion
 956  */
 957 static bool expand_next(void)
 958 {
 959         if (current_expansion == NULL)
 960                 return false;
 961
 962 restart:;
 963         size_t pos = current_expansion->expand_pos;
 964         if (pos >= current_expansion->list_len) {
 965                 finished_expanding(current_expansion);
 966                 /* it was the outermost expansion, parse pptoken normally */
 967                 if (current_expansion == NULL) {
 968                         return false;
 969                 }
 970                 goto restart;
 971         }
 972         const saved_token_t *saved = &current_expansion->token_list[pos++];
 973         pp_token = saved->token;
 974         if (pp_token.kind == '#') {
 975                 if (pos < current_expansion->list_len) {
 976                         const saved_token_t *next = &current_expansion->token_list[pos];
 977                         if (next->token.kind == T_MACRO_PARAMETER) {
 978                                 pp_definition_t *def = next->token.macro_parameter.def;
 979                                 assert(def != NULL && def->is_parameter);
 980                                 stringify(def);
 981                                 ++pos;
 982                         }
 983                 }
 984         }
 985
 986         if (current_expansion->expand_pos > 0)
 987                 info.had_whitespace = saved->had_whitespace;
 988         current_expansion->expand_pos = pos;
 989         pp_token.base.source_position = expansion_pos;
 990
 991         return true;
 992 }
 993
 994 /**
 995  * Returns the next token kind found when continuing the current expansions
 996  * without starting new sub-expansions.
 997  */
 998 static token_kind_t peek_expansion(void)
 999 {
1000         for (pp_definition_t *e = current_expansion; e; e = e->parent_expansion) {
1001                 if (e->expand_pos < e->list_len)
1002                         return e->token_list[e->expand_pos].token.kind;
1003         }
1004         return T_EOF;
1005 }
1006
1007 static void skip_line_comment(void)
1008 {
1009         info.had_whitespace = true;
1010         while (true) {
1011                 switch (input.c) {
1012                 case EOF:
1013                         return;
1014
1015                 case '\r':
1016                 case '\n':
1017                         return;
1018
1019                 default:
1020                         next_char();
1021                         break;
1022                 }
1023         }
1024 }
1025
1026 static void skip_multiline_comment(void)
1027 {
1028         info.had_whitespace = true;
1029
1030         unsigned start_linenr = input.position.lineno;
1031         while (true) {
1032                 switch (input.c) {
1033                 case '/':
1034                         next_char();
1035                         if (input.c == '*') {
1036                                 /* TODO: nested comment, warn here */
1037                         }
1038                         break;
1039                 case '*':
1040                         next_char();
1041                         if (input.c == '/') {
1042                                 if (input.position.lineno != input.output_line)
1043                                         info.whitespace_at_line_begin = input.position.colno;
1044                                 next_char();
1045                                 return;
1046                         }
1047                         break;
1048
1049                 case NEWLINE:
1050                         break;
1051
1052                 case EOF: {
1053                         source_position_t source_position;
1054                         source_position.input_name = pp_token.base.source_position.input_name;
1055                         source_position.lineno     = start_linenr;
1056                         errorf(&source_position, "at end of file while looking for comment end");
1057                         return;
1058                 }
1059
1060                 default:
1061                         next_char();
1062                         break;
1063                 }
1064         }
1065 }
1066
1067 static bool skip_till_newline(bool stop_at_non_whitespace)
1068 {
1069         bool res = false;
1070         while (true) {
1071                 switch (input.c) {
1072                 case ' ':
1073                 case '\t':
1074                         next_char();
1075                         continue;
1076
1077                 case '/':
1078                         next_char();
1079                         if (input.c == '/') {
1080                                 next_char();
1081                                 skip_line_comment();
1082                                 continue;
1083                         } else if (input.c == '*') {
1084                                 next_char();
1085                                 skip_multiline_comment();
1086                                 continue;
1087                         } else {
1088                                 put_back(input.c);
1089                                 input.c = '/';
1090                         }
1091                         return true;
1092
1093                 case NEWLINE:
1094                         return res;
1095
1096                 default:
1097                         if (stop_at_non_whitespace)
1098                                 return false;
1099                         res = true;
1100                         next_char();
1101                         continue;
1102                 }
1103         }
1104 }
1105
1106 static void skip_whitespace(void)
1107 {
1108         while (true) {
1109                 switch (input.c) {
1110                 case ' ':
1111                 case '\t':
1112                         ++info.whitespace_at_line_begin;
1113                         info.had_whitespace = true;
1114                         next_char();
1115                         continue;
1116
1117                 case NEWLINE:
1118                         info.at_line_begin  = true;
1119                         info.had_whitespace = true;
1120                         info.whitespace_at_line_begin = 0;
1121                         continue;
1122
1123                 case '/':
1124                         next_char();
1125                         if (input.c == '/') {
1126                                 next_char();
1127                                 skip_line_comment();
1128                                 continue;
1129                         } else if (input.c == '*') {
1130                                 next_char();
1131                                 skip_multiline_comment();
1132                                 continue;
1133                         } else {
1134                                 put_back(input.c);
1135                                 input.c = '/';
1136                         }
1137                         return;
1138
1139                 default:
1140                         return;
1141                 }
1142         }
1143 }
1144
1145 static inline void eat_pp(pp_token_kind_t const kind)
1146 {
1147         assert(pp_token.base.symbol->pp_ID == kind);
1148         (void) kind;
1149         next_input_token();
1150 }
1151
1152 static inline void eat_token(token_kind_t const kind)
1153 {
1154         assert(pp_token.kind == kind);
1155         (void)kind;
1156         next_input_token();
1157 }
1158
1159 static string_encoding_t identify_encoding_prefix(symbol_t *const sym)
1160 {
1161         if (sym == symbol_L) return STRING_ENCODING_WIDE;
1162         if (c_mode & _C11) {
1163                 if (sym == symbol_U)  return STRING_ENCODING_CHAR32;
1164                 if (sym == symbol_u)  return STRING_ENCODING_CHAR16;
1165                 if (sym == symbol_u8) return STRING_ENCODING_UTF8;
1166         }
1167         return STRING_ENCODING_CHAR;
1168 }
1169
1170 static void parse_symbol(void)
1171 {
1172         assert(obstack_object_size(&symbol_obstack) == 0);
1173         while (true) {
1174                 switch (input.c) {
1175                 case DIGIT_CASES:
1176                 case SYMBOL_CASES:
1177                         obstack_1grow(&symbol_obstack, (char) input.c);
1178                         next_char();
1179                         break;
1180
1181                 case '\\':
1182                         next_char();
1183                         switch (input.c) {
1184                         {
1185                                 unsigned n;
1186                         case 'U': n = 8; goto universal;
1187                         case 'u': n = 4; goto universal;
1188 universal:
1189                                 if (!resolve_escape_sequences) {
1190                                         obstack_1grow(&symbol_obstack, '\\');
1191                                         obstack_1grow(&symbol_obstack, input.c);
1192                                 }
1193                                 next_char();
1194                                 utf32 const v = parse_universal_char(n);
1195                                 if (!is_universal_char_valid_identifier(v)) {
1196                                         if (is_universal_char_valid(v)) {
1197                                                 errorf(&input.position,
1198                                                            "universal character \\%c%0*X is not valid in an identifier",
1199                                                            n == 4 ? 'u' : 'U', (int)n, v);
1200                                         }
1201                                 } else if (obstack_object_size(&symbol_obstack) == 0 && is_universal_char_invalid_identifier_start(v)) {
1202                                         errorf(&input.position,
1203                                                    "universal character \\%c%0*X is not valid as start of an identifier",
1204                                                    n == 4 ? 'u' : 'U', (int)n, v);
1205                                 } else if (resolve_escape_sequences) {
1206                                         obstack_grow_utf8(&symbol_obstack, v);
1207                                 }
1208                                 break;
1209                         }
1210
1211                         default:
1212                                 put_back(input.c);
1213                                 input.c = '\\';
1214                                 goto end_symbol;
1215                         }
1216
1217                 default:
1218 dollar_sign:
1219                         goto end_symbol;
1220                 }
1221         }
1222
1223 end_symbol:
1224         obstack_1grow(&symbol_obstack, '\0');
1225         char *string = obstack_finish(&symbol_obstack);
1226
1227         symbol_t *symbol = symbol_table_insert(string);
1228
1229         /* Might be a prefixed string or character constant: L/U/u/u8"string". */
1230         if (input.c == '"') {
1231                 string_encoding_t const enc = identify_encoding_prefix(symbol);
1232                 if (enc != STRING_ENCODING_CHAR) {
1233                         parse_string_literal(enc);
1234                         return;
1235                 }
1236         } else if (input.c == '\'') {
1237                 string_encoding_t const enc = identify_encoding_prefix(symbol);
1238                 if (enc != STRING_ENCODING_CHAR) {
1239                         if (enc == STRING_ENCODING_UTF8) {
1240                                 errorf(&pp_token.base.source_position, "'u8' is not a valid encoding for a chracter constant");
1241                         }
1242                         parse_character_constant(enc);
1243                         return;
1244                 }
1245         }
1246
1247         pp_token.kind        = symbol->ID;
1248         pp_token.base.symbol = symbol;
1249
1250         /* we can free the memory from symbol obstack if we already had an entry in
1251          * the symbol table */
1252         if (symbol->string != string) {
1253                 obstack_free(&symbol_obstack, string);
1254         }
1255 }
1256
1257 static void parse_number(void)
1258 {
1259         obstack_1grow(&symbol_obstack, (char) input.c);
1260         next_char();
1261
1262         while (true) {
1263                 switch (input.c) {
1264                 case '.':
1265                 case DIGIT_CASES:
1266                 case SYMBOL_CASES_WITHOUT_E_P:
1267                         obstack_1grow(&symbol_obstack, (char) input.c);
1268                         next_char();
1269                         break;
1270
1271                 case 'e':
1272                 case 'p':
1273                 case 'E':
1274                 case 'P':
1275                         obstack_1grow(&symbol_obstack, (char) input.c);
1276                         next_char();
1277                         if (input.c == '+' || input.c == '-') {
1278                                 obstack_1grow(&symbol_obstack, (char) input.c);
1279                                 next_char();
1280                         }
1281                         break;
1282
1283                 default:
1284 dollar_sign:
1285                         goto end_number;
1286                 }
1287         }
1288
1289 end_number:
1290         pp_token.kind           = T_NUMBER;
1291         pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
1292 }
1293
1294 #define MAYBE_PROLOG \
1295         next_char(); \
1296         switch (input.c) {
1297
1298 #define MAYBE(ch, kind) \
1299         case ch: \
1300                 next_char(); \
1301                 set_punctuator(kind); \
1302                 return;
1303
1304 #define MAYBE_DIGRAPH(ch, kind, symbol) \
1305         case ch: \
1306                 next_char(); \
1307                 set_digraph(kind, symbol); \
1308                 return;
1309
1310 #define ELSE_CODE(code) \
1311         default: \
1312                 code \
1313         }
1314
1315 #define ELSE(kind) ELSE_CODE(set_punctuator(kind); return;)
1316
1317 /** identifies and returns the next preprocessing token contained in the
1318  * input stream. No macro expansion is performed. */
1319 static void next_input_token(void)
1320 {
1321         if (next_info.had_whitespace) {
1322                 info = next_info;
1323                 next_info.had_whitespace = false;
1324         } else {
1325                 info.at_line_begin  = false;
1326                 info.had_whitespace = false;
1327         }
1328 restart:
1329         pp_token.base.source_position = input.position;
1330         pp_token.base.symbol          = NULL;
1331
1332         switch (input.c) {
1333         case ' ':
1334         case '\t':
1335                 info.whitespace_at_line_begin++;
1336                 info.had_whitespace = true;
1337                 next_char();
1338                 goto restart;
1339
1340         case NEWLINE:
1341                 info.at_line_begin            = true;
1342                 info.had_whitespace           = true;
1343                 info.whitespace_at_line_begin = 0;
1344                 goto restart;
1345
1346         case SYMBOL_CASES:
1347                 parse_symbol();
1348                 return;
1349
1350         case DIGIT_CASES:
1351                 parse_number();
1352                 return;
1353
1354         case '"':
1355                 parse_string_literal(STRING_ENCODING_CHAR);
1356                 return;
1357
1358         case '\'':
1359                 parse_character_constant(STRING_ENCODING_CHAR);
1360                 return;
1361
1362         case '.':
1363                 MAYBE_PROLOG
1364                         case '0':
1365                         case '1':
1366                         case '2':
1367                         case '3':
1368                         case '4':
1369                         case '5':
1370                         case '6':
1371                         case '7':
1372                         case '8':
1373                         case '9':
1374                                 put_back(input.c);
1375                                 input.c = '.';
1376                                 parse_number();
1377                                 return;
1378
1379                         case '.':
1380                                 MAYBE_PROLOG
1381                                 MAYBE('.', T_DOTDOTDOT)
1382                                 ELSE_CODE(
1383                                         put_back(input.c);
1384                                         input.c = '.';
1385                                         set_punctuator('.');
1386                                         return;
1387                                 )
1388                 ELSE('.')
1389         case '&':
1390                 MAYBE_PROLOG
1391                 MAYBE('&', T_ANDAND)
1392                 MAYBE('=', T_ANDEQUAL)
1393                 ELSE('&')
1394         case '*':
1395                 MAYBE_PROLOG
1396                 MAYBE('=', T_ASTERISKEQUAL)
1397                 ELSE('*')
1398         case '+':
1399                 MAYBE_PROLOG
1400                 MAYBE('+', T_PLUSPLUS)
1401                 MAYBE('=', T_PLUSEQUAL)
1402                 ELSE('+')
1403         case '-':
1404                 MAYBE_PROLOG
1405                 MAYBE('>', T_MINUSGREATER)
1406                 MAYBE('-', T_MINUSMINUS)
1407                 MAYBE('=', T_MINUSEQUAL)
1408                 ELSE('-')
1409         case '!':
1410                 MAYBE_PROLOG
1411                 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
1412                 ELSE('!')
1413         case '/':
1414                 MAYBE_PROLOG
1415                 MAYBE('=', T_SLASHEQUAL)
1416                 case '*':
1417                         next_char();
1418                         skip_multiline_comment();
1419                         goto restart;
1420                 case '/':
1421                         next_char();
1422                         skip_line_comment();
1423                         goto restart;
1424                 ELSE('/')
1425         case '%':
1426                 MAYBE_PROLOG
1427                 MAYBE_DIGRAPH('>', '}', symbol_percentgreater)
1428                 MAYBE('=', T_PERCENTEQUAL)
1429                 case ':':
1430                         MAYBE_PROLOG
1431                         case '%':
1432                                 MAYBE_PROLOG
1433                                 MAYBE_DIGRAPH(':', T_HASHHASH, symbol_percentcolonpercentcolon)
1434                                 ELSE_CODE(
1435                                         put_back(input.c);
1436                                         input.c = '%';
1437                                         goto digraph_percentcolon;
1438                                 )
1439                         ELSE_CODE(
1440 digraph_percentcolon:
1441                                 set_digraph('#', symbol_percentcolon);
1442                                 return;
1443                         )
1444                 ELSE('%')
1445         case '<':
1446                 MAYBE_PROLOG
1447                 MAYBE_DIGRAPH(':', '[', symbol_lesscolon)
1448                 MAYBE_DIGRAPH('%', '{', symbol_lesspercent)
1449                 MAYBE('=', T_LESSEQUAL)
1450                 case '<':
1451                         MAYBE_PROLOG
1452                         MAYBE('=', T_LESSLESSEQUAL)
1453                         ELSE(T_LESSLESS)
1454                 ELSE('<')
1455         case '>':
1456                 MAYBE_PROLOG
1457                 MAYBE('=', T_GREATEREQUAL)
1458                 case '>':
1459                         MAYBE_PROLOG
1460                         MAYBE('=', T_GREATERGREATEREQUAL)
1461                         ELSE(T_GREATERGREATER)
1462                 ELSE('>')
1463         case '^':
1464                 MAYBE_PROLOG
1465                 MAYBE('=', T_CARETEQUAL)
1466                 ELSE('^')
1467         case '|':
1468                 MAYBE_PROLOG
1469                 MAYBE('=', T_PIPEEQUAL)
1470                 MAYBE('|', T_PIPEPIPE)
1471                 ELSE('|')
1472         case ':':
1473                 MAYBE_PROLOG
1474                 MAYBE_DIGRAPH('>', ']', symbol_colongreater)
1475                 case ':':
1476                         if (c_mode & _CXX) {
1477                                 next_char();
1478                                 set_punctuator(T_COLONCOLON);
1479                                 return;
1480                         }
1481                         /* FALLTHROUGH */
1482                 ELSE(':')
1483         case '=':
1484                 MAYBE_PROLOG
1485                 MAYBE('=', T_EQUALEQUAL)
1486                 ELSE('=')
1487         case '#':
1488                 MAYBE_PROLOG
1489                 MAYBE('#', T_HASHHASH)
1490                 ELSE('#')
1491
1492         case '?':
1493         case '[':
1494         case ']':
1495         case '(':
1496         case ')':
1497         case '{':
1498         case '}':
1499         case '~':
1500         case ';':
1501         case ',':
1502                 set_punctuator(input.c);
1503                 next_char();
1504                 return;
1505
1506         case EOF:
1507                 if (input_stack != NULL) {
1508                         fclose(close_pp_input());
1509                         pop_restore_input();
1510                         if (out)
1511                                 fputc('\n', out);
1512                         if (input.c == (utf32)EOF)
1513                                 --input.position.lineno;
1514                         print_line_directive(&input.position, "2");
1515                         goto restart;
1516                 } else {
1517                         info.at_line_begin = true;
1518                         set_punctuator(T_EOF);
1519                 }
1520                 return;
1521
1522         case '\\':
1523                 next_char();
1524                 int next_c = input.c;
1525                 put_back(input.c);
1526                 input.c = '\\';
1527                 if (next_c == 'U' || next_c == 'u') {
1528                         parse_symbol();
1529                         return;
1530                 }
1531                 /* FALLTHROUGH */
1532         default:
1533 dollar_sign:
1534                 if (error_on_unknown_chars) {
1535                         errorf(&pp_token.base.source_position,
1536                                "unknown character '%lc' found\n", input.c);
1537                         next_char();
1538                         goto restart;
1539                 } else {
1540                         assert(obstack_object_size(&symbol_obstack) == 0);
1541                         obstack_grow_utf8(&symbol_obstack, input.c);
1542                         obstack_1grow(&symbol_obstack, '\0');
1543                         char     *const string = obstack_finish(&symbol_obstack);
1544                         symbol_t *const symbol = symbol_table_insert(string);
1545                         if (symbol->string != string)
1546                                 obstack_free(&symbol_obstack, string);
1547
1548                         pp_token.kind        = T_UNKNOWN_CHAR;
1549                         pp_token.base.symbol = symbol;
1550                         next_char();
1551                         return;
1552                 }
1553         }
1554 }
1555
1556 static void print_quoted_string(const char *const string)
1557 {
1558         fputc('"', out);
1559         for (const char *c = string; *c != 0; ++c) {
1560                 switch (*c) {
1561                 case '"': fputs("\\\"", out); break;
1562                 case '\\':  fputs("\\\\", out); break;
1563                 case '\a':  fputs("\\a", out); break;
1564                 case '\b':  fputs("\\b", out); break;
1565                 case '\f':  fputs("\\f", out); break;
1566                 case '\n':  fputs("\\n", out); break;
1567                 case '\r':  fputs("\\r", out); break;
1568                 case '\t':  fputs("\\t", out); break;
1569                 case '\v':  fputs("\\v", out); break;
1570                 case '\?':  fputs("\\?", out); break;
1571                 default:
1572                         if (!isprint(*c)) {
1573                                 fprintf(out, "\\%03o", (unsigned)*c);
1574                                 break;
1575                         }
1576                         fputc(*c, out);
1577                         break;
1578                 }
1579         }
1580         fputc('"', out);
1581 }
1582
1583 static void print_line_directive(const source_position_t *pos, const char *add)
1584 {
1585         if (!out)
1586                 return;
1587
1588         fprintf(out, "# %u ", pos->lineno);
1589         print_quoted_string(pos->input_name);
1590         if (add != NULL) {
1591                 fputc(' ', out);
1592                 fputs(add, out);
1593         }
1594         if (pos->is_system_header) {
1595                 fputs(" 3", out);
1596         }
1597
1598         printed_input_name = pos->input_name;
1599         input.output_line  = pos->lineno-1;
1600 }
1601
1602 static bool emit_newlines(void)
1603 {
1604         if (!out)
1605                 return true;
1606
1607         unsigned delta = pp_token.base.source_position.lineno - input.output_line;
1608         if (delta == 0)
1609                 return false;
1610
1611         if (delta >= 9) {
1612                 fputc('\n', out);
1613                 print_line_directive(&pp_token.base.source_position, NULL);
1614                 fputc('\n', out);
1615         } else {
1616                 for (unsigned i = 0; i < delta; ++i) {
1617                         fputc('\n', out);
1618                 }
1619         }
1620         input.output_line = pp_token.base.source_position.lineno;
1621
1622         unsigned whitespace = info.whitespace_at_line_begin;
1623         /* make sure there is at least 1 whitespace before a (macro-expanded)
1624          * '#' at line begin. I'm not sure why this is good, but gcc does it. */
1625         if (pp_token.kind == '#' && whitespace == 0)
1626                 ++whitespace;
1627         for (unsigned i = 0; i < whitespace; ++i)
1628                 fputc(' ', out);
1629
1630         return true;
1631 }
1632
1633 void set_preprocessor_output(FILE *output)
1634 {
1635         out = output;
1636         if (out != NULL) {
1637                 error_on_unknown_chars   = false;
1638                 resolve_escape_sequences = false;
1639         } else {
1640                 error_on_unknown_chars   = true;
1641                 resolve_escape_sequences = true;
1642         }
1643 }
1644
1645 void emit_pp_token(void)
1646 {
1647         if (!emit_newlines() &&
1648             (info.had_whitespace || tokens_would_paste(last_token, pp_token.kind)))
1649                 fputc(' ', out);
1650
1651         switch (pp_token.kind) {
1652         case T_NUMBER:
1653                 fputs(pp_token.literal.string.begin, out);
1654                 break;
1655
1656         case T_STRING_LITERAL:
1657                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1658                 fputc('"', out);
1659                 fputs(pp_token.literal.string.begin, out);
1660                 fputc('"', out);
1661                 break;
1662
1663         case T_CHARACTER_CONSTANT:
1664                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1665                 fputc('\'', out);
1666                 fputs(pp_token.literal.string.begin, out);
1667                 fputc('\'', out);
1668                 break;
1669
1670         case T_MACRO_PARAMETER:
1671                 panic("macro parameter not expanded");
1672
1673         default:
1674                 fputs(pp_token.base.symbol->string, out);
1675                 break;
1676         }
1677         last_token = pp_token.kind;
1678 }
1679
1680 static void eat_pp_directive(void)
1681 {
1682         while (!info.at_line_begin) {
1683                 next_input_token();
1684         }
1685 }
1686
1687 static bool strings_equal(const string_t *string1, const string_t *string2)
1688 {
1689         size_t size = string1->size;
1690         if (size != string2->size)
1691                 return false;
1692
1693         const char *c1 = string1->begin;
1694         const char *c2 = string2->begin;
1695         for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1696                 if (*c1 != *c2)
1697                         return false;
1698         }
1699         return true;
1700 }
1701
1702 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1703 {
1704         if (token1->kind != token2->kind)
1705                 return false;
1706
1707         switch (token1->kind) {
1708         case T_NUMBER:
1709         case T_CHARACTER_CONSTANT:
1710         case T_STRING_LITERAL:
1711                 return strings_equal(&token1->literal.string, &token2->literal.string);
1712
1713         case T_MACRO_PARAMETER:
1714                 return token1->macro_parameter.def->symbol
1715                     == token2->macro_parameter.def->symbol;
1716
1717         default:
1718                 return token1->base.symbol == token2->base.symbol;
1719         }
1720 }
1721
1722 static bool pp_definitions_equal(const pp_definition_t *definition1,
1723                                  const pp_definition_t *definition2)
1724 {
1725         if (definition1->list_len != definition2->list_len)
1726                 return false;
1727
1728         size_t               len = definition1->list_len;
1729         const saved_token_t *t1  = definition1->token_list;
1730         const saved_token_t *t2  = definition2->token_list;
1731         for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1732                 if (!pp_tokens_equal(&t1->token, &t2->token))
1733                         return false;
1734                 if (t1->had_whitespace != t2->had_whitespace)
1735                         return false;
1736         }
1737         return true;
1738 }
1739
1740 static void missing_macro_param_error(void)
1741 {
1742         errorf(&pp_token.base.source_position,
1743                "'#' is not followed by a macro parameter");
1744 }
1745
1746 static bool is_defineable_token(char const *const context)
1747 {
1748         if (info.at_line_begin) {
1749                 errorf(&pp_token.base.source_position, "unexpected end of line after %s", context);
1750         }
1751
1752         symbol_t *const symbol = pp_token.base.symbol;
1753         if (!symbol)
1754                 goto no_ident;
1755
1756         if (pp_token.kind != T_IDENTIFIER) {
1757                 switch (symbol->string[0]) {
1758                 case SYMBOL_CASES:
1759 dollar_sign:
1760                         break;
1761
1762                 default:
1763 no_ident:
1764                         errorf(&pp_token.base.source_position, "expected identifier after %s, got %K", context, &pp_token);
1765                         return false;
1766                 }
1767         }
1768
1769         /* TODO turn this into a flag in pp_def. */
1770         switch (symbol->pp_ID) {
1771         /* §6.10.8:4 */
1772         case TP_defined:
1773                 errorf(&pp_token.base.source_position, "%K cannot be used as macro name in %s", &pp_token, context);
1774                 return false;
1775
1776         default:
1777                 return true;
1778         }
1779 }
1780
1781 static void parse_define_directive(void)
1782 {
1783         eat_pp(TP_define);
1784         if (skip_mode) {
1785                 eat_pp_directive();
1786                 return;
1787         }
1788
1789         assert(obstack_object_size(&pp_obstack) == 0);
1790
1791         if (!is_defineable_token("#define"))
1792                 goto error_out;
1793         symbol_t *const symbol = pp_token.base.symbol;
1794
1795         pp_definition_t *new_definition
1796                 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1797         memset(new_definition, 0, sizeof(new_definition[0]));
1798         new_definition->symbol          = symbol;
1799         new_definition->source_position = input.position;
1800
1801         /* this is probably the only place where spaces are significant in the
1802          * lexer (except for the fact that they separate tokens). #define b(x)
1803          * is something else than #define b (x) */
1804         if (input.c == '(') {
1805                 next_input_token();
1806                 eat_token('(');
1807
1808                 while (true) {
1809                         switch (pp_token.kind) {
1810                         case T_DOTDOTDOT:
1811                                 new_definition->is_variadic = true;
1812                                 eat_token(T_DOTDOTDOT);
1813                                 if (pp_token.kind != ')') {
1814                                         errorf(&input.position,
1815                                                         "'...' not at end of macro argument list");
1816                                         goto error_out;
1817                                 }
1818                                 break;
1819
1820                         case T_IDENTIFIER: {
1821                                 pp_definition_t parameter;
1822                                 memset(&parameter, 0, sizeof(parameter));
1823                                 parameter.source_position = pp_token.base.source_position;
1824                                 parameter.symbol          = pp_token.base.symbol;
1825                                 parameter.is_parameter    = true;
1826                                 obstack_grow(&pp_obstack, &parameter, sizeof(parameter));
1827                                 eat_token(T_IDENTIFIER);
1828
1829                                 if (pp_token.kind == ',') {
1830                                         eat_token(',');
1831                                         break;
1832                                 }
1833
1834                                 if (pp_token.kind != ')') {
1835                                         errorf(&pp_token.base.source_position,
1836                                                "expected ',' or ')' after identifier, got %K",
1837                                                &pp_token);
1838                                         goto error_out;
1839                                 }
1840                                 break;
1841                         }
1842
1843                         case ')':
1844                                 eat_token(')');
1845                                 goto finish_argument_list;
1846
1847                         default:
1848                                 errorf(&pp_token.base.source_position,
1849                                        "expected identifier, '...' or ')' in #define argument list, got %K",
1850                                        &pp_token);
1851                                 goto error_out;
1852                         }
1853                 }
1854
1855         finish_argument_list:
1856                 new_definition->has_parameters = true;
1857                 size_t size = obstack_object_size(&pp_obstack);
1858                 new_definition->n_parameters
1859                         = size / sizeof(new_definition->parameters[0]);
1860                 new_definition->parameters = obstack_finish(&pp_obstack);
1861                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1862                         pp_definition_t *param    = &new_definition->parameters[i];
1863                         symbol_t        *symbol   = param->symbol;
1864                         pp_definition_t *previous = symbol->pp_definition;
1865                         if (previous != NULL
1866                             && previous->function_definition == new_definition) {
1867                                 errorf(&param->source_position,
1868                                        "duplicate macro parameter '%Y'", symbol);
1869                                 param->symbol = sym_anonymous;
1870                                 continue;
1871                         }
1872                         param->parent_expansion    = previous;
1873                         param->function_definition = new_definition;
1874                         symbol->pp_definition      = param;
1875                 }
1876         } else {
1877                 next_input_token();
1878         }
1879
1880         /* construct token list */
1881         assert(obstack_object_size(&pp_obstack) == 0);
1882         bool next_must_be_param = false;
1883         while (!info.at_line_begin) {
1884                 if (pp_token.kind == T_IDENTIFIER) {
1885                         const symbol_t  *symbol     = pp_token.base.symbol;
1886                         pp_definition_t *definition = symbol->pp_definition;
1887                         if (definition != NULL
1888                             && definition->function_definition == new_definition) {
1889                             pp_token.kind                = T_MACRO_PARAMETER;
1890                             pp_token.macro_parameter.def = definition;
1891                         }
1892                 }
1893                 if (next_must_be_param && pp_token.kind != T_MACRO_PARAMETER) {
1894                         missing_macro_param_error();
1895                 }
1896                 saved_token_t saved_token;
1897                 saved_token.token = pp_token;
1898                 saved_token.had_whitespace = info.had_whitespace;
1899                 obstack_grow(&pp_obstack, &saved_token, sizeof(saved_token));
1900                 next_must_be_param
1901                         = new_definition->has_parameters && pp_token.kind == '#';
1902                 next_input_token();
1903         }
1904         if (next_must_be_param)
1905                 missing_macro_param_error();
1906
1907         new_definition->list_len   = obstack_object_size(&pp_obstack)
1908                 / sizeof(new_definition->token_list[0]);
1909         new_definition->token_list = obstack_finish(&pp_obstack);
1910
1911         if (new_definition->has_parameters) {
1912                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1913                         pp_definition_t *param      = &new_definition->parameters[i];
1914                         symbol_t        *symbol     = param->symbol;
1915                         if (symbol == sym_anonymous)
1916                                 continue;
1917                         assert(symbol->pp_definition == param);
1918                         assert(param->function_definition == new_definition);
1919                         symbol->pp_definition   = param->parent_expansion;
1920                         param->parent_expansion = NULL;
1921                 }
1922         }
1923
1924         pp_definition_t *old_definition = symbol->pp_definition;
1925         if (old_definition != NULL) {
1926                 if (!pp_definitions_equal(old_definition, new_definition)) {
1927                         warningf(WARN_OTHER, &input.position, "multiple definition of macro '%Y' (first defined %P)", symbol, &old_definition->source_position);
1928                 } else {
1929                         /* reuse the old definition */
1930                         obstack_free(&pp_obstack, new_definition);
1931                         new_definition = old_definition;
1932                 }
1933         }
1934
1935         symbol->pp_definition = new_definition;
1936         return;
1937
1938 error_out:
1939         if (obstack_object_size(&pp_obstack) > 0) {
1940                 char *ptr = obstack_finish(&pp_obstack);
1941                 obstack_free(&pp_obstack, ptr);
1942         }
1943         eat_pp_directive();
1944 }
1945
1946 static void parse_undef_directive(void)
1947 {
1948         eat_pp(TP_undef);
1949         if (skip_mode) {
1950                 eat_pp_directive();
1951                 return;
1952         }
1953
1954         if (!is_defineable_token("#undef")) {
1955                 eat_pp_directive();
1956                 return;
1957         }
1958
1959         pp_token.base.symbol->pp_definition = NULL;
1960         next_input_token();
1961
1962         if (!info.at_line_begin) {
1963                 warningf(WARN_OTHER, &input.position, "extra tokens at end of #undef directive");
1964         }
1965         eat_pp_directive();
1966 }
1967
1968 /** behind an #include we can have the special headername lexems.
1969  * They're only allowed behind an #include so they're not recognized
1970  * by the normal next_preprocessing_token. We handle them as a special
1971  * exception here */
1972 static const char *parse_headername(bool *system_include)
1973 {
1974         if (info.at_line_begin) {
1975                 parse_error("expected headername after #include");
1976                 return NULL;
1977         }
1978
1979         /* check wether we have a "... or <... headername */
1980         source_position_t position = input.position;
1981         switch (input.c) {
1982         {
1983                 utf32 delimiter;
1984         case '<': delimiter = '>'; *system_include = true;  goto parse_name;
1985         case '"': delimiter = '"'; *system_include = false; goto parse_name;
1986 parse_name:
1987                 assert(obstack_object_size(&symbol_obstack) == 0);
1988                 next_char();
1989                 while (true) {
1990                         switch (input.c) {
1991                         case NEWLINE:
1992                         case EOF:
1993                                 {
1994                                         char *dummy = obstack_finish(&symbol_obstack);
1995                                         obstack_free(&symbol_obstack, dummy);
1996                                 }
1997                                 errorf(&pp_token.base.source_position,
1998                                        "header name without closing '%c'", (char)delimiter);
1999                                 return NULL;
2000
2001                         default:
2002                                 if (input.c == delimiter) {
2003                                         next_char();
2004                                         goto finish_headername;
2005                                 } else {
2006                                         obstack_1grow(&symbol_obstack, (char)input.c);
2007                                         next_char();
2008                                 }
2009                                 break;
2010                         }
2011                 }
2012                 /* we should never be here */
2013         }
2014
2015         default:
2016                 next_preprocessing_token();
2017                 if (info.at_line_begin) {
2018                         /* TODO: if we are already in the new line then we parsed more than
2019                          * wanted. We reuse the token, but could produce following errors
2020                          * misbehaviours... */
2021                         goto error_invalid_input;
2022                 }
2023                 if (pp_token.kind == T_STRING_LITERAL) {
2024                         *system_include = false;
2025                         return pp_token.literal.string.begin;
2026                 } else if (pp_token.kind == '<') {
2027                         *system_include = true;
2028                         assert(obstack_object_size(&pp_obstack) == 0);
2029                         while (true) {
2030                                 next_preprocessing_token();
2031                                 if (info.at_line_begin) {
2032                                         /* TODO: we shouldn't have parsed/expanded something on the
2033                                          * next line yet... */
2034                                         char *dummy = obstack_finish(&pp_obstack);
2035                                         obstack_free(&pp_obstack, dummy);
2036                                         goto error_invalid_input;
2037                                 }
2038                                 if (pp_token.kind == '>')
2039                                         break;
2040
2041                                 saved_token_t saved;
2042                                 saved.token          = pp_token;
2043                                 saved.had_whitespace = info.had_whitespace;
2044                                 obstack_grow(&pp_obstack, &saved, sizeof(saved));
2045                         }
2046                         size_t size = obstack_object_size(&pp_obstack);
2047                         assert(size % sizeof(saved_token_t) == 0);
2048                         size_t n_tokens = size / sizeof(saved_token_t);
2049                         saved_token_t *tokens = obstack_finish(&pp_obstack);
2050                         assert(obstack_object_size(&symbol_obstack) == 0);
2051                         for (size_t i = 0; i < n_tokens; ++i) {
2052                                 const saved_token_t *saved = &tokens[i];
2053                                 if (i > 0 && saved->had_whitespace)
2054                                         obstack_1grow(&symbol_obstack, ' ');
2055                                 grow_token(&symbol_obstack, &saved->token);
2056                         }
2057                         obstack_free(&pp_obstack, tokens);
2058                         goto finish_headername;
2059                 } else {
2060 error_invalid_input:
2061                         {
2062                                 char *dummy = obstack_finish(&symbol_obstack);
2063                                 obstack_free(&symbol_obstack, dummy);
2064                         }
2065
2066                         errorf(&pp_token.base.source_position,
2067                                "expected \"FILENAME\" or <FILENAME> after #include");
2068                         return NULL;
2069                 }
2070         }
2071
2072 finish_headername:
2073         obstack_1grow(&symbol_obstack, '\0');
2074         char *const  headername = obstack_finish(&symbol_obstack);
2075         const char  *identified = identify_string(headername);
2076         pp_token.base.source_position = position;
2077         return identified;
2078 }
2079
2080 static bool do_include(bool const bracket_include, bool const include_next, char const *const headername)
2081 {
2082         size_t const        headername_len = strlen(headername);
2083         searchpath_entry_t *entry;
2084         if (include_next) {
2085                 entry = input.path      ? input.path->next
2086                       : bracket_include ? bracket_searchpath.first
2087                       : quote_searchpath.first;
2088         } else {
2089                 if (!bracket_include) {
2090                         /* put dirname of current input on obstack */
2091                         const char *filename   = input.position.input_name;
2092                         const char *last_slash = strrchr(filename, '/');
2093                         const char *full_name;
2094                         if (last_slash != NULL) {
2095                                 size_t len = last_slash - filename;
2096                                 obstack_grow(&symbol_obstack, filename, len + 1);
2097                                 obstack_grow0(&symbol_obstack, headername, headername_len);
2098                                 char *complete_path = obstack_finish(&symbol_obstack);
2099                                 full_name = identify_string(complete_path);
2100                         } else {
2101                                 full_name = headername;
2102                         }
2103
2104                         FILE *file = fopen(full_name, "r");
2105                         if (file != NULL) {
2106                                 switch_pp_input(file, full_name, NULL, false);
2107                                 return true;
2108                         }
2109                         entry = quote_searchpath.first;
2110                 } else {
2111                         entry = bracket_searchpath.first;
2112                 }
2113         }
2114
2115         assert(obstack_object_size(&symbol_obstack) == 0);
2116         /* check searchpath */
2117         for (; entry; entry = entry->next) {
2118             const char *path = entry->path;
2119             size_t      len  = strlen(path);
2120                 obstack_grow(&symbol_obstack, path, len);
2121                 if (path[len-1] != '/')
2122                         obstack_1grow(&symbol_obstack, '/');
2123                 obstack_grow(&symbol_obstack, headername, headername_len+1);
2124
2125                 char *complete_path = obstack_finish(&symbol_obstack);
2126                 FILE *file          = fopen(complete_path, "r");
2127                 if (file != NULL) {
2128                         const char *filename = identify_string(complete_path);
2129                         switch_pp_input(file, filename, entry, entry->is_system_path);
2130                         return true;
2131                 } else {
2132                         obstack_free(&symbol_obstack, complete_path);
2133                 }
2134         }
2135
2136         return false;
2137 }
2138
2139 static void parse_include_directive(bool const include_next)
2140 {
2141         if (skip_mode) {
2142                 eat_pp_directive();
2143                 return;
2144         }
2145
2146         /* do not eat the TP_include, since it would already parse the next token
2147          * which needs special handling here. */
2148         skip_till_newline(true);
2149         bool system_include;
2150         const char *headername = parse_headername(&system_include);
2151         if (headername == NULL) {
2152                 eat_pp_directive();
2153                 return;
2154         }
2155
2156         bool had_nonwhitespace = skip_till_newline(false);
2157         if (had_nonwhitespace) {
2158                 warningf(WARN_OTHER, &input.position,
2159                          "extra tokens at end of #include directive");
2160         }
2161
2162         if (n_inputs > INCLUDE_LIMIT) {
2163                 errorf(&pp_token.base.source_position, "#include nested too deeply");
2164                 /* eat \n or EOF */
2165                 next_input_token();
2166                 return;
2167         }
2168
2169         /* switch inputs */
2170         info.whitespace_at_line_begin = 0;
2171         info.had_whitespace           = false;
2172         info.at_line_begin            = true;
2173         emit_newlines();
2174         push_input();
2175         bool res = do_include(system_include, include_next, headername);
2176         if (res) {
2177                 next_input_token();
2178         } else {
2179                 errorf(&pp_token.base.source_position, "failed including '%s': %s", headername, strerror(errno));
2180                 pop_restore_input();
2181         }
2182 }
2183
2184 static pp_conditional_t *push_conditional(void)
2185 {
2186         pp_conditional_t *conditional
2187                 = obstack_alloc(&pp_obstack, sizeof(*conditional));
2188         memset(conditional, 0, sizeof(*conditional));
2189
2190         conditional->parent = conditional_stack;
2191         conditional_stack   = conditional;
2192
2193         return conditional;
2194 }
2195
2196 static void pop_conditional(void)
2197 {
2198         assert(conditional_stack != NULL);
2199         conditional_stack = conditional_stack->parent;
2200 }
2201
2202 void check_unclosed_conditionals(void)
2203 {
2204         while (conditional_stack != NULL) {
2205                 pp_conditional_t *conditional = conditional_stack;
2206
2207                 if (conditional->in_else) {
2208                         errorf(&conditional->source_position, "unterminated #else");
2209                 } else {
2210                         errorf(&conditional->source_position, "unterminated condition");
2211                 }
2212                 pop_conditional();
2213         }
2214 }
2215
2216 static void parse_ifdef_ifndef_directive(bool const is_ifdef)
2217 {
2218         bool condition;
2219         eat_pp(is_ifdef ? TP_ifdef : TP_ifndef);
2220
2221         if (skip_mode) {
2222                 eat_pp_directive();
2223                 pp_conditional_t *conditional = push_conditional();
2224                 conditional->source_position  = pp_token.base.source_position;
2225                 conditional->skip             = true;
2226                 return;
2227         }
2228
2229         if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
2230                 errorf(&pp_token.base.source_position,
2231                        "expected identifier after #%s, got %K",
2232                        is_ifdef ? "ifdef" : "ifndef", &pp_token);
2233                 eat_pp_directive();
2234
2235                 /* just take the true case in the hope to avoid further errors */
2236                 condition = true;
2237         } else {
2238                 /* evaluate wether we are in true or false case */
2239                 condition = (bool)pp_token.base.symbol->pp_definition == is_ifdef;
2240                 eat_token(T_IDENTIFIER);
2241
2242                 if (!info.at_line_begin) {
2243                         errorf(&pp_token.base.source_position,
2244                                "extra tokens at end of #%s",
2245                                is_ifdef ? "ifdef" : "ifndef");
2246                         eat_pp_directive();
2247                 }
2248         }
2249
2250         pp_conditional_t *conditional = push_conditional();
2251         conditional->source_position  = pp_token.base.source_position;
2252         conditional->condition        = condition;
2253
2254         if (!condition) {
2255                 skip_mode = true;
2256         }
2257 }
2258
2259 static void parse_else_directive(void)
2260 {
2261         eat_pp(TP_else);
2262
2263         if (!info.at_line_begin) {
2264                 if (!skip_mode) {
2265                         warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #else");
2266                 }
2267                 eat_pp_directive();
2268         }
2269
2270         pp_conditional_t *conditional = conditional_stack;
2271         if (conditional == NULL) {
2272                 errorf(&pp_token.base.source_position, "#else without prior #if");
2273                 return;
2274         }
2275
2276         if (conditional->in_else) {
2277                 errorf(&pp_token.base.source_position,
2278                        "#else after #else (condition started %P)",
2279                        &conditional->source_position);
2280                 skip_mode = true;
2281                 return;
2282         }
2283
2284         conditional->in_else = true;
2285         if (!conditional->skip) {
2286                 skip_mode = conditional->condition;
2287         }
2288         conditional->source_position = pp_token.base.source_position;
2289 }
2290
2291 static void parse_endif_directive(void)
2292 {
2293         eat_pp(TP_endif);
2294
2295         if (!info.at_line_begin) {
2296                 if (!skip_mode) {
2297                         warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #endif");
2298                 }
2299                 eat_pp_directive();
2300         }
2301
2302         pp_conditional_t *conditional = conditional_stack;
2303         if (conditional == NULL) {
2304                 errorf(&pp_token.base.source_position, "#endif without prior #if");
2305                 return;
2306         }
2307
2308         if (!conditional->skip) {
2309                 skip_mode = false;
2310         }
2311         pop_conditional();
2312 }
2313
2314 typedef enum stdc_pragma_kind_t {
2315         STDC_UNKNOWN,
2316         STDC_FP_CONTRACT,
2317         STDC_FENV_ACCESS,
2318         STDC_CX_LIMITED_RANGE
2319 } stdc_pragma_kind_t;
2320
2321 typedef enum stdc_pragma_value_kind_t {
2322         STDC_VALUE_UNKNOWN,
2323         STDC_VALUE_ON,
2324         STDC_VALUE_OFF,
2325         STDC_VALUE_DEFAULT
2326 } stdc_pragma_value_kind_t;
2327
2328 static void parse_pragma_directive(void)
2329 {
2330         eat_pp(TP_pragma);
2331         if (skip_mode) {
2332                 eat_pp_directive();
2333                 return;
2334         }
2335
2336         if (pp_token.kind != T_IDENTIFIER) {
2337                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2338                          "expected identifier after #pragma");
2339                 eat_pp_directive();
2340                 return;
2341         }
2342
2343         stdc_pragma_kind_t kind = STDC_UNKNOWN;
2344         if (pp_token.base.symbol->pp_ID == TP_STDC && c_mode & _C99) {
2345                 /* a STDC pragma */
2346                 next_input_token();
2347
2348                 switch (pp_token.base.symbol->pp_ID) {
2349                 case TP_FP_CONTRACT:      kind = STDC_FP_CONTRACT;      break;
2350                 case TP_FENV_ACCESS:      kind = STDC_FENV_ACCESS;      break;
2351                 case TP_CX_LIMITED_RANGE: kind = STDC_CX_LIMITED_RANGE; break;
2352                 default:                  break;
2353                 }
2354                 if (kind != STDC_UNKNOWN) {
2355                         next_input_token();
2356                         stdc_pragma_value_kind_t value;
2357                         switch (pp_token.base.symbol->pp_ID) {
2358                         case TP_ON:      value = STDC_VALUE_ON;      break;
2359                         case TP_OFF:     value = STDC_VALUE_OFF;     break;
2360                         case TP_DEFAULT: value = STDC_VALUE_DEFAULT; break;
2361                         default:         value = STDC_VALUE_UNKNOWN; break;
2362                         }
2363                         if (value == STDC_VALUE_UNKNOWN) {
2364                                 kind = STDC_UNKNOWN;
2365                                 errorf(&pp_token.base.source_position, "bad STDC pragma argument");
2366                         }
2367                 }
2368         }
2369         eat_pp_directive();
2370         if (kind == STDC_UNKNOWN) {
2371                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2372                          "encountered unknown #pragma");
2373         }
2374 }
2375
2376 static void parse_line_directive(void)
2377 {
2378         if (pp_token.kind != T_NUMBER) {
2379                 if (!skip_mode)
2380                         parse_error("expected integer");
2381         } else {
2382                 char      *end;
2383                 long const line = strtol(pp_token.literal.string.begin, &end, 0);
2384                 if (*end == '\0') {
2385                         /* use offset -1 as this is about the next line */
2386                         input.position.lineno = line - 1;
2387                         /* force output of line */
2388                         input.output_line = input.position.lineno - 20;
2389                 } else {
2390                         if (!skip_mode) {
2391                                 errorf(&input.position, "'%S' is not a valid line number",
2392                                            &pp_token.literal.string);
2393                         }
2394                 }
2395                 next_input_token();
2396                 if (info.at_line_begin)
2397                         return;
2398         }
2399         if (pp_token.kind == T_STRING_LITERAL
2400             && pp_token.literal.string.encoding == STRING_ENCODING_CHAR) {
2401                 input.position.input_name       = pp_token.literal.string.begin;
2402                 input.position.is_system_header = false;
2403                 next_input_token();
2404
2405                 /* attempt to parse numeric flags as outputted by gcc preprocessor */
2406                 while (!info.at_line_begin && pp_token.kind == T_NUMBER) {
2407                         /* flags:
2408                          * 1 - indicates start of a new file
2409                          * 2 - indicates return from a file
2410                          * 3 - indicates system header
2411                          * 4 - indicates implicit extern "C" in C++ mode
2412                          *
2413                          * currently we're only interested in "3"
2414                          */
2415                         if (streq(pp_token.literal.string.begin, "3")) {
2416                                 input.position.is_system_header = true;
2417                         }
2418                         next_input_token();
2419                 }
2420         }
2421
2422         eat_pp_directive();
2423 }
2424
2425 static void parse_error_directive(void)
2426 {
2427         if (skip_mode) {
2428                 eat_pp_directive();
2429                 return;
2430         }
2431
2432         bool const old_resolve_escape_sequences = resolve_escape_sequences;
2433         resolve_escape_sequences = false;
2434
2435         source_position_t const pos = pp_token.base.source_position;
2436         do {
2437                 if (info.had_whitespace && obstack_object_size(&pp_obstack) != 0)
2438                         obstack_1grow(&pp_obstack, ' ');
2439
2440                 switch (pp_token.kind) {
2441                 case T_NUMBER: {
2442                         string_t const *const str = &pp_token.literal.string;
2443                         obstack_grow(&pp_obstack, str->begin, str->size);
2444                         break;
2445                 }
2446
2447                 {
2448                         char delim;
2449                 case T_STRING_LITERAL:     delim =  '"'; goto string;
2450                 case T_CHARACTER_CONSTANT: delim = '\''; goto string;
2451 string:;
2452                         string_t const *const str = &pp_token.literal.string;
2453                         char     const *const enc = get_string_encoding_prefix(str->encoding);
2454                         obstack_printf(&pp_obstack, "%s%c%s%c", enc, delim, str->begin, delim);
2455                         break;
2456                 }
2457
2458                 default: {
2459                         char const *const str = pp_token.base.symbol->string;
2460                         obstack_grow(&pp_obstack, str, strlen(str));
2461                         break;
2462                 }
2463                 }
2464
2465                 next_input_token();
2466         } while (!info.at_line_begin);
2467
2468         resolve_escape_sequences = old_resolve_escape_sequences;
2469
2470         obstack_1grow(&pp_obstack, '\0');
2471         char *const str = obstack_finish(&pp_obstack);
2472         errorf(&pos, "#%s", str);
2473         obstack_free(&pp_obstack, str);
2474 }
2475
2476 static void parse_preprocessing_directive(void)
2477 {
2478         eat_token('#');
2479
2480         if (info.at_line_begin) {
2481                 /* empty directive */
2482                 return;
2483         }
2484
2485         if (pp_token.base.symbol) {
2486                 switch (pp_token.base.symbol->pp_ID) {
2487                 case TP_define:       parse_define_directive();            break;
2488                 case TP_else:         parse_else_directive();              break;
2489                 case TP_endif:        parse_endif_directive();             break;
2490                 case TP_error:        parse_error_directive();             break;
2491                 case TP_ifdef:        parse_ifdef_ifndef_directive(true);  break;
2492                 case TP_ifndef:       parse_ifdef_ifndef_directive(false); break;
2493                 case TP_include:      parse_include_directive(false);      break;
2494                 case TP_include_next: parse_include_directive(true);       break;
2495                 case TP_line:         next_input_token(); goto line_directive;
2496                 case TP_pragma:       parse_pragma_directive();            break;
2497                 case TP_undef:        parse_undef_directive();             break;
2498                 default:              goto skip;
2499                 }
2500         } else if (pp_token.kind == T_NUMBER) {
2501 line_directive:
2502                 parse_line_directive();
2503         } else {
2504 skip:
2505                 if (!skip_mode) {
2506                         errorf(&pp_token.base.source_position, "invalid preprocessing directive #%K", &pp_token);
2507                 }
2508                 eat_pp_directive();
2509         }
2510
2511         assert(info.at_line_begin);
2512 }
2513
2514 static void finish_current_argument(void)
2515 {
2516         if (current_argument == NULL)
2517                 return;
2518         size_t size = obstack_object_size(&pp_obstack);
2519         current_argument->list_len   = size/sizeof(current_argument->token_list[0]);
2520         current_argument->token_list = obstack_finish(&pp_obstack);
2521 }
2522
2523 void next_preprocessing_token(void)
2524 {
2525 restart:
2526         if (!expand_next()) {
2527                 do {
2528                         next_input_token();
2529                         while (pp_token.kind == '#' && info.at_line_begin) {
2530                                 parse_preprocessing_directive();
2531                         }
2532                 } while (skip_mode && pp_token.kind != T_EOF);
2533         }
2534
2535         const token_kind_t kind = pp_token.kind;
2536         if (current_call == NULL || argument_expanding != NULL) {
2537                 symbol_t *const symbol = pp_token.base.symbol;
2538                 if (symbol) {
2539                         if (kind == T_MACRO_PARAMETER) {
2540                                 assert(current_expansion != NULL);
2541                                 start_expanding(pp_token.macro_parameter.def);
2542                                 goto restart;
2543                         }
2544
2545                         pp_definition_t *const pp_definition = symbol->pp_definition;
2546                         if (pp_definition != NULL && !pp_definition->is_expanding) {
2547                                 if (pp_definition->has_parameters) {
2548
2549                                         /* check if next token is a '(' */
2550                                         whitespace_info_t old_info   = info;
2551                                         token_kind_t      next_token = peek_expansion();
2552                                         if (next_token == T_EOF) {
2553                                                 info.at_line_begin  = false;
2554                                                 info.had_whitespace = false;
2555                                                 skip_whitespace();
2556                                                 if (input.c == '(') {
2557                                                         next_token = '(';
2558                                                 }
2559                                         }
2560
2561                                         if (next_token == '(') {
2562                                                 if (current_expansion == NULL)
2563                                                         expansion_pos = pp_token.base.source_position;
2564                                                 next_preprocessing_token();
2565                                                 assert(pp_token.kind == '(');
2566
2567                                                 pp_definition->parent_expansion = current_expansion;
2568                                                 current_call              = pp_definition;
2569                                                 current_call->expand_pos  = 0;
2570                                                 current_call->expand_info = old_info;
2571                                                 if (current_call->n_parameters > 0) {
2572                                                         current_argument = &current_call->parameters[0];
2573                                                         assert(argument_brace_count == 0);
2574                                                 }
2575                                                 goto restart;
2576                                         } else {
2577                                                 /* skip_whitespaces() skipped newlines and whitespace,
2578                                                  * remember results for next token */
2579                                                 next_info = info;
2580                                                 info      = old_info;
2581                                                 return;
2582                                         }
2583                                 } else {
2584                                         if (current_expansion == NULL)
2585                                                 expansion_pos = pp_token.base.source_position;
2586                                         start_expanding(pp_definition);
2587                                         goto restart;
2588                                 }
2589                         }
2590                 }
2591         }
2592
2593         if (current_call != NULL) {
2594                 /* current_call != NULL */
2595                 if (kind == '(') {
2596                         ++argument_brace_count;
2597                 } else if (kind == ')') {
2598                         if (argument_brace_count > 0) {
2599                                 --argument_brace_count;
2600                         } else {
2601                                 finish_current_argument();
2602                                 assert(kind == ')');
2603                                 start_expanding(current_call);
2604                                 info = current_call->expand_info;
2605                                 current_call     = NULL;
2606                                 current_argument = NULL;
2607                                 goto restart;
2608                         }
2609                 } else if (kind == ',' && argument_brace_count == 0) {
2610                         finish_current_argument();
2611                         current_call->expand_pos++;
2612                         if (current_call->expand_pos >= current_call->n_parameters) {
2613                                 errorf(&pp_token.base.source_position,
2614                                            "too many arguments passed for macro '%Y'",
2615                                            current_call->symbol);
2616                                 current_argument = NULL;
2617                         } else {
2618                                 current_argument
2619                                         = &current_call->parameters[current_call->expand_pos];
2620                         }
2621                         goto restart;
2622                 } else if (kind == T_MACRO_PARAMETER) {
2623                         /* parameters have to be fully expanded before being used as
2624                          * parameters for another macro-call */
2625                         assert(current_expansion != NULL);
2626                         pp_definition_t *argument = pp_token.macro_parameter.def;
2627                         argument_expanding = argument;
2628                         start_expanding(argument);
2629                         goto restart;
2630                 } else if (kind == T_EOF) {
2631                         errorf(&expansion_pos,
2632                                "reached end of file while parsing arguments for '%Y'",
2633                                current_call->symbol);
2634                         return;
2635                 }
2636                 if (current_argument != NULL) {
2637                         saved_token_t saved;
2638                         saved.token = pp_token;
2639                         saved.had_whitespace = info.had_whitespace;
2640                         obstack_grow(&pp_obstack, &saved, sizeof(saved));
2641                 }
2642                 goto restart;
2643         }
2644 }
2645
2646 void append_include_path(searchpath_t *paths, const char *path)
2647 {
2648         searchpath_entry_t *entry = OALLOCZ(&config_obstack, searchpath_entry_t);
2649         entry->path           = path;
2650         entry->is_system_path = paths->is_system_path;
2651
2652         *paths->anchor = entry;
2653         paths->anchor  = &entry->next;
2654 }
2655
2656 static void append_env_paths(searchpath_t *paths, const char *envvar)
2657 {
2658         const char *val = getenv(envvar);
2659         if (val != NULL && *val != '\0') {
2660                 const char *begin = val;
2661                 const char *c;
2662                 do {
2663                         c = begin;
2664                         while (*c != '\0' && *c != ':')
2665                                 ++c;
2666
2667                         size_t len = c-begin;
2668                         if (len == 0) {
2669                                 /* use "." for gcc compatibility (Matze: I would expect that
2670                                  * nothing happens for an empty entry...) */
2671                                 append_include_path(paths, ".");
2672                         } else {
2673                                 char *const string = obstack_copy0(&config_obstack, begin, len);
2674                                 append_include_path(paths, string);
2675                         }
2676
2677                         begin = c+1;
2678                         /* skip : */
2679                         if (*begin == ':')
2680                                 ++begin;
2681                 } while(*c != '\0');
2682         }
2683 }
2684
2685 static void append_searchpath(searchpath_t *path, const searchpath_t *append)
2686 {
2687         *path->anchor = append->first;
2688 }
2689
2690 static void setup_include_path(void)
2691 {
2692         /* built-in paths */
2693         append_include_path(&system_searchpath, "/usr/include");
2694
2695         /* parse environment variable */
2696         append_env_paths(&bracket_searchpath, "CPATH");
2697         append_env_paths(&system_searchpath,
2698                          c_mode & _CXX ? "CPLUS_INCLUDE_PATH" : "C_INCLUDE_PATH");
2699
2700         /* append system search path to bracket searchpath */
2701         append_searchpath(&system_searchpath,  &after_searchpath);
2702         append_searchpath(&bracket_searchpath, &system_searchpath);
2703         append_searchpath(&quote_searchpath, &bracket_searchpath);
2704 }
2705
2706 static void input_error(unsigned const delta_lines, unsigned const delta_cols, char const *const message)
2707 {
2708         source_position_t pos = pp_token.base.source_position;
2709         pos.lineno += delta_lines;
2710         pos.colno  += delta_cols;
2711         errorf(&pos, "%s", message);
2712 }
2713
2714 void init_include_paths(void)
2715 {
2716         obstack_init(&config_obstack);
2717 }
2718
2719 void init_preprocessor(void)
2720 {
2721         init_symbols();
2722
2723         obstack_init(&pp_obstack);
2724         obstack_init(&input_obstack);
2725         strset_init(&stringset);
2726
2727         setup_include_path();
2728
2729         set_input_error_callback(input_error);
2730 }
2731
2732 void exit_preprocessor(void)
2733 {
2734         obstack_free(&input_obstack, NULL);
2735         obstack_free(&pp_obstack, NULL);
2736         obstack_free(&config_obstack, NULL);
2737
2738         strset_destroy(&stringset);
2739 }
2740
2741 int pptest_main(int argc, char **argv);
2742 int pptest_main(int argc, char **argv)
2743 {
2744         init_symbol_table();
2745         init_include_paths();
2746         init_preprocessor();
2747         init_tokens();
2748
2749         error_on_unknown_chars   = false;
2750         resolve_escape_sequences = false;
2751
2752         /* simplistic commandline parser */
2753         const char *filename = NULL;
2754         const char *output = NULL;
2755         for (int i = 1; i < argc; ++i) {
2756                 const char *opt = argv[i];
2757                 if (streq(opt, "-I")) {
2758                         append_include_path(&bracket_searchpath, argv[++i]);
2759                         continue;
2760                 } else if (streq(opt, "-E")) {
2761                         /* ignore */
2762                 } else if (streq(opt, "-o")) {
2763                         output = argv[++i];
2764                         continue;
2765                 } else if (opt[0] == '-') {
2766                         fprintf(stderr, "Unknown option '%s'\n", opt);
2767                 } else {
2768                         if (filename != NULL)
2769                                 fprintf(stderr, "Multiple inputs not supported\n");
2770                         filename = argv[i];
2771                 }
2772         }
2773         if (filename == NULL) {
2774                 fprintf(stderr, "No input specified\n");
2775                 return 1;
2776         }
2777
2778         if (output == NULL) {
2779                 out = stdout;
2780         } else {
2781                 out = fopen(output, "w");
2782                 if (out == NULL) {
2783                         fprintf(stderr, "Couldn't open output '%s'\n", output);
2784                         return 1;
2785                 }
2786         }
2787
2788         /* just here for gcc compatibility */
2789         fprintf(out, "# 1 \"%s\"\n", filename);
2790         fprintf(out, "# 1 \"<built-in>\"\n");
2791         fprintf(out, "# 1 \"<command-line>\"\n");
2792
2793         FILE *file = fopen(filename, "r");
2794         if (file == NULL) {
2795                 fprintf(stderr, "Couldn't open input '%s'\n", filename);
2796                 return 1;
2797         }
2798         switch_pp_input(file, filename, NULL, false);
2799
2800         for (;;) {
2801                 next_preprocessing_token();
2802                 if (pp_token.kind == T_EOF)
2803                         break;
2804                 emit_pp_token();
2805         }
2806
2807         fputc('\n', out);
2808         check_unclosed_conditionals();
2809         fclose(close_pp_input());
2810         if (out != stdout)
2811                 fclose(out);
2812
2813         exit_tokens();
2814         exit_preprocessor();
2815         exit_symbol_table();
2816
2817         return 0;
2818 }