nsz Git - cparser/blob - preprocessor.c

   1 #include <config.h>
   2
   3 #include <assert.h>
   4 #include <errno.h>
   5 #include <string.h>
   6 #include <stdbool.h>
   7 #include <ctype.h>
   8
   9 #include "preprocessor.h"
  10 #include "token_t.h"
  11 #include "symbol_t.h"
  12 #include "adt/util.h"
  13 #include "adt/error.h"
  14 #include "adt/strutil.h"
  15 #include "adt/strset.h"
  16 #include "lang_features.h"
  17 #include "diagnostic.h"
  18 #include "string_rep.h"
  19 #include "input.h"
  20
  21 #define MAX_PUTBACK 3
  22 #define INCLUDE_LIMIT 199  /* 199 is for gcc "compatibility" */
  23
  24 typedef struct saved_token_t {
  25         token_t token;
  26         bool    had_whitespace;
  27 } saved_token_t;
  28
  29 typedef struct whitespace_info_t {
  30         /** current token had whitespace in front of it */
  31         bool     had_whitespace;
  32         /** current token is at the beginning of a line.
  33          * => a "#" at line begin starts a preprocessing directive. */
  34         bool     at_line_begin;
  35         /** number of spaces before the first token in a line */
  36         unsigned whitespace_at_line_begin;
  37 } whitespace_info_t;
  38
  39 struct pp_definition_t {
  40         symbol_t          *symbol;
  41         position_t         pos;
  42         pp_definition_t   *parent_expansion;
  43         size_t             expand_pos;
  44         whitespace_info_t  expand_info;
  45         bool               is_variadic    : 1;
  46         bool               is_expanding   : 1;
  47         bool               has_parameters : 1;
  48         bool               is_parameter   : 1;
  49         pp_definition_t   *function_definition;
  50         size_t             n_parameters;
  51         pp_definition_t   *parameters;
  52
  53         /* replacement */
  54         size_t             list_len;
  55         saved_token_t     *token_list;
  56 };
  57
  58 typedef struct pp_conditional_t pp_conditional_t;
  59 struct pp_conditional_t {
  60         position_t         pos;
  61         bool               condition;
  62         bool               in_else;
  63         /** conditional in skip mode (then+else gets skipped) */
  64         bool               skip;
  65         pp_conditional_t  *parent;
  66 };
  67
  68 typedef struct pp_input_t pp_input_t;
  69 struct pp_input_t {
  70         FILE               *file;
  71         input_t            *input;
  72         utf32               c;
  73         utf32               buf[1024+MAX_PUTBACK];
  74         const utf32        *bufend;
  75         const utf32        *bufpos;
  76         position_t          pos;
  77         pp_input_t         *parent;
  78         unsigned            output_line;
  79         searchpath_entry_t *path;
  80 };
  81
  82 struct searchpath_entry_t {
  83         const char         *path;
  84         searchpath_entry_t *next;
  85         bool                is_system_path;
  86 };
  87
  88 static pp_input_t      input;
  89
  90 static pp_input_t     *input_stack;
  91 static unsigned        n_inputs;
  92 static struct obstack  input_obstack;
  93
  94 static pp_conditional_t *conditional_stack;
  95
  96 token_t                  pp_token;
  97 bool                     allow_dollar_in_symbol   = true;
  98 static bool              resolve_escape_sequences = true;
  99 static bool              error_on_unknown_chars   = true;
 100 static bool              skip_mode;
 101 static FILE             *out;
 102 static struct obstack    pp_obstack;
 103 static struct obstack    config_obstack;
 104 static const char       *printed_input_name = NULL;
 105 static position_t        expansion_pos;
 106 static pp_definition_t  *current_expansion  = NULL;
 107 static pp_definition_t  *current_call       = NULL;
 108 static pp_definition_t  *current_argument   = NULL;
 109 static pp_definition_t  *argument_expanding = NULL;
 110 static unsigned          argument_brace_count;
 111 static strset_t          stringset;
 112 static token_kind_t      last_token;
 113
 114 struct searchpath_t {
 115         searchpath_entry_t  *first;
 116         searchpath_entry_t **anchor;
 117         bool                 is_system_path;
 118 };
 119
 120 searchpath_t bracket_searchpath = { NULL, &bracket_searchpath.first, false };
 121 searchpath_t quote_searchpath   = { NULL, &quote_searchpath.first,   false };
 122 searchpath_t system_searchpath  = { NULL, &system_searchpath.first,  true  };
 123 searchpath_t after_searchpath   = { NULL, &after_searchpath.first,   true  };
 124
 125 static whitespace_info_t next_info; /* valid if had_whitespace is true */
 126 static whitespace_info_t info;
 127
 128 static inline void next_char(void);
 129 static void next_input_token(void);
 130 static void print_line_directive(const position_t *pos, const char *add);
 131
 132 static symbol_t *symbol_colongreater;
 133 static symbol_t *symbol_lesscolon;
 134 static symbol_t *symbol_lesspercent;
 135 static symbol_t *symbol_percentcolon;
 136 static symbol_t *symbol_percentcolonpercentcolon;
 137 static symbol_t *symbol_percentgreater;
 138
 139 static symbol_t *symbol_L;
 140 static symbol_t *symbol_U;
 141 static symbol_t *symbol_u;
 142 static symbol_t *symbol_u8;
 143
 144 static void init_symbols(void)
 145 {
 146         symbol_colongreater             = symbol_table_insert(":>");
 147         symbol_lesscolon                = symbol_table_insert("<:");
 148         symbol_lesspercent              = symbol_table_insert("<%");
 149         symbol_percentcolon             = symbol_table_insert("%:");
 150         symbol_percentcolonpercentcolon = symbol_table_insert("%:%:");
 151         symbol_percentgreater           = symbol_table_insert("%>");
 152
 153         symbol_L  = symbol_table_insert("L");
 154         symbol_U  = symbol_table_insert("U");
 155         symbol_u  = symbol_table_insert("u");
 156         symbol_u8 = symbol_table_insert("u8");
 157 }
 158
 159 void switch_pp_input(FILE *const file, char const *const filename, searchpath_entry_t *const path, bool const is_system_header)
 160 {
 161         input.file                 = file;
 162         input.input                = input_from_stream(file, NULL);
 163         input.bufend               = NULL;
 164         input.bufpos               = NULL;
 165         input.output_line          = 0;
 166         input.pos.input_name       = filename;
 167         input.pos.lineno           = 1;
 168         input.pos.is_system_header = is_system_header;
 169         input.path                 = path;
 170
 171         /* indicate that we're at a new input */
 172         print_line_directive(&input.pos, input_stack != NULL ? "1" : NULL);
 173
 174         /* place a virtual '\n' so we realize we're at line begin */
 175         input.pos.lineno = 0;
 176         input.c          = '\n';
 177 }
 178
 179 FILE *close_pp_input(void)
 180 {
 181         input_free(input.input);
 182
 183         FILE* const file = input.file;
 184         assert(file);
 185
 186         input.input  = NULL;
 187         input.file   = NULL;
 188         input.bufend = NULL;
 189         input.bufpos = NULL;
 190         input.c      = EOF;
 191
 192         return file;
 193 }
 194
 195 static void push_input(void)
 196 {
 197         pp_input_t *const saved_input = obstack_copy(&input_obstack, &input, sizeof(input));
 198
 199         /* adjust buffer positions */
 200         if (input.bufpos != NULL)
 201                 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
 202         if (input.bufend != NULL)
 203                 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
 204
 205         saved_input->parent = input_stack;
 206         input_stack         = saved_input;
 207         ++n_inputs;
 208 }
 209
 210 static void pop_restore_input(void)
 211 {
 212         assert(n_inputs > 0);
 213         assert(input_stack != NULL);
 214
 215         pp_input_t *saved_input = input_stack;
 216
 217         memcpy(&input, saved_input, sizeof(input));
 218         input.parent = NULL;
 219
 220         /* adjust buffer positions */
 221         if (saved_input->bufpos != NULL)
 222                 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
 223         if (saved_input->bufend != NULL)
 224                 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
 225
 226         input_stack = saved_input->parent;
 227         obstack_free(&input_obstack, saved_input);
 228         --n_inputs;
 229 }
 230
 231 /**
 232  * Prints a parse error message at the current token.
 233  *
 234  * @param msg   the error message
 235  */
 236 static void parse_error(const char *msg)
 237 {
 238         errorf(&pp_token.base.pos,  "%s", msg);
 239 }
 240
 241 static inline void next_real_char(void)
 242 {
 243         assert(input.bufpos <= input.bufend);
 244         if (input.bufpos >= input.bufend) {
 245                 size_t const n = decode(input.input, input.buf + MAX_PUTBACK, lengthof(input.buf) - MAX_PUTBACK);
 246                 if (n == 0) {
 247                         input.c = EOF;
 248                         return;
 249                 }
 250                 input.bufpos = input.buf + MAX_PUTBACK;
 251                 input.bufend = input.bufpos + n;
 252         }
 253         input.c = *input.bufpos++;
 254         ++input.pos.colno;
 255 }
 256
 257 /**
 258  * Put a character back into the buffer.
 259  *
 260  * @param pc  the character to put back
 261  */
 262 static inline void put_back(utf32 const pc)
 263 {
 264         assert(input.bufpos > input.buf);
 265         *(--input.bufpos - input.buf + input.buf) = (char) pc;
 266         --input.pos.colno;
 267 }
 268
 269 #define NEWLINE \
 270         '\r': \
 271                 next_char(); \
 272                 if (input.c == '\n') { \
 273         case '\n': \
 274                         next_char(); \
 275                 } \
 276                 ++input.pos.lineno; \
 277                 input.pos.colno = 1; \
 278                 goto newline; \
 279                 newline // Let it look like an ordinary case label.
 280
 281 #define eat(c_type) (assert(input.c == c_type), next_char())
 282
 283 static void maybe_concat_lines(void)
 284 {
 285         eat('\\');
 286
 287         switch (input.c) {
 288         case NEWLINE:
 289                 info.whitespace_at_line_begin = 0;
 290                 return;
 291
 292         default:
 293                 break;
 294         }
 295
 296         put_back(input.c);
 297         input.c = '\\';
 298 }
 299
 300 /**
 301  * Set c to the next input character, ie.
 302  * after expanding trigraphs.
 303  */
 304 static inline void next_char(void)
 305 {
 306         next_real_char();
 307
 308         /* filter trigraphs and concatenated lines */
 309         if (UNLIKELY(input.c == '\\')) {
 310                 maybe_concat_lines();
 311                 goto end_of_next_char;
 312         }
 313
 314         if (LIKELY(input.c != '?'))
 315                 goto end_of_next_char;
 316
 317         next_real_char();
 318         if (LIKELY(input.c != '?')) {
 319                 put_back(input.c);
 320                 input.c = '?';
 321                 goto end_of_next_char;
 322         }
 323
 324         next_real_char();
 325         switch (input.c) {
 326         case '=': input.c = '#'; break;
 327         case '(': input.c = '['; break;
 328         case '/': input.c = '\\'; maybe_concat_lines(); break;
 329         case ')': input.c = ']'; break;
 330         case '\'': input.c = '^'; break;
 331         case '<': input.c = '{'; break;
 332         case '!': input.c = '|'; break;
 333         case '>': input.c = '}'; break;
 334         case '-': input.c = '~'; break;
 335         default:
 336                 put_back(input.c);
 337                 put_back('?');
 338                 input.c = '?';
 339                 break;
 340         }
 341
 342 end_of_next_char:;
 343 #ifdef DEBUG_CHARS
 344         printf("nchar '%c'\n", input.c);
 345 #endif
 346 }
 347
 348
 349
 350 /**
 351  * Returns true if the given char is a octal digit.
 352  *
 353  * @param char  the character to check
 354  */
 355 static inline bool is_octal_digit(int chr)
 356 {
 357         switch (chr) {
 358         case '0':
 359         case '1':
 360         case '2':
 361         case '3':
 362         case '4':
 363         case '5':
 364         case '6':
 365         case '7':
 366                 return true;
 367         default:
 368                 return false;
 369         }
 370 }
 371
 372 /**
 373  * Returns the value of a digit.
 374  * The only portable way to do it ...
 375  */
 376 static int digit_value(int digit)
 377 {
 378         switch (digit) {
 379         case '0': return 0;
 380         case '1': return 1;
 381         case '2': return 2;
 382         case '3': return 3;
 383         case '4': return 4;
 384         case '5': return 5;
 385         case '6': return 6;
 386         case '7': return 7;
 387         case '8': return 8;
 388         case '9': return 9;
 389         case 'a':
 390         case 'A': return 10;
 391         case 'b':
 392         case 'B': return 11;
 393         case 'c':
 394         case 'C': return 12;
 395         case 'd':
 396         case 'D': return 13;
 397         case 'e':
 398         case 'E': return 14;
 399         case 'f':
 400         case 'F': return 15;
 401         default:
 402                 panic("wrong character given");
 403         }
 404 }
 405
 406 /**
 407  * Parses an octal character sequence.
 408  *
 409  * @param first_digit  the already read first digit
 410  */
 411 static utf32 parse_octal_sequence(const utf32 first_digit)
 412 {
 413         assert(is_octal_digit(first_digit));
 414         utf32 value = digit_value(first_digit);
 415         if (!is_octal_digit(input.c)) return value;
 416         value = 8 * value + digit_value(input.c);
 417         next_char();
 418         if (!is_octal_digit(input.c)) return value;
 419         value = 8 * value + digit_value(input.c);
 420         next_char();
 421         return value;
 422
 423 }
 424
 425 /**
 426  * Parses a hex character sequence.
 427  */
 428 static utf32 parse_hex_sequence(void)
 429 {
 430         utf32 value = 0;
 431         while (isxdigit(input.c)) {
 432                 value = 16 * value + digit_value(input.c);
 433                 next_char();
 434         }
 435         return value;
 436 }
 437
 438 static bool is_universal_char_valid(utf32 const v)
 439 {
 440         /* C11 §6.4.3:2 */
 441         if (v < 0xA0U && v != 0x24 && v != 0x40 && v != 0x60)
 442                 return false;
 443         if (0xD800 <= v && v <= 0xDFFF)
 444                 return false;
 445         return true;
 446 }
 447
 448 static utf32 parse_universal_char(unsigned const n_digits)
 449 {
 450         utf32 v = 0;
 451         for (unsigned k = n_digits; k != 0; --k) {
 452                 if (isxdigit(input.c)) {
 453                         v = 16 * v + digit_value(input.c);
 454                         if (!resolve_escape_sequences)
 455                                 obstack_1grow(&symbol_obstack, input.c);
 456                         next_char();
 457                 } else {
 458                         errorf(&input.pos,
 459                                "short universal character name, expected %u more digits",
 460                                    k);
 461                         break;
 462                 }
 463         }
 464         if (!is_universal_char_valid(v)) {
 465                 errorf(&input.pos,
 466                        "\\%c%0*X is not a valid universal character name",
 467                        n_digits == 4 ? 'u' : 'U', (int)n_digits, v);
 468         }
 469         return v;
 470 }
 471
 472 static bool is_universal_char_valid_identifier_c99(utf32 const v)
 473 {
 474         static const utf32 single_chars[] = {
 475                 0x00AA, 0x00BA, 0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0,
 476                 0x1F59, 0x1F5B, 0x1F5D, 0x05BF, 0x09B2, 0x0A02, 0x0A5E, 0x0A74,
 477                 0x0A8D, 0x0AD0, 0x0AE0, 0x0B9C, 0x0CDE, 0x0E84, 0x0E8A, 0x0E8D,
 478                 0x0EA5, 0x0EA7, 0x0EC6, 0x0F00, 0x0F35, 0x0F37, 0x0F39, 0x0F97,
 479                 0x0FB9, 0x00B5, 0x00B7, 0x02BB, 0x037A, 0x0559, 0x093D, 0x0B3D,
 480                 0x1FBE, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128
 481         };
 482
 483         static const utf32 ranges[][2] = {
 484                 {0x00C0, 0x00D6}, {0x00D8, 0x00F6}, {0x00F8, 0x01F5}, {0x01FA, 0x0217},
 485                 {0x0250, 0x02A8}, {0x1E00, 0x1E9B}, {0x1EA0, 0x1EF9}, {0x0388, 0x038A},
 486                 {0x038E, 0x03A1}, {0x03A3, 0x03CE}, {0x03D0, 0x03D6}, {0x03E2, 0x03F3},
 487                 {0x1F00, 0x1F15}, {0x1F18, 0x1F1D}, {0x1F20, 0x1F45}, {0x1F48, 0x1F4D},
 488                 {0x1F50, 0x1F57}, {0x1F5F, 0x1F7D}, {0x1F80, 0x1FB4}, {0x1FB6, 0x1FBC},
 489                 {0x1FC2, 0x1FC4}, {0x1FC6, 0x1FCC}, {0x1FD0, 0x1FD3}, {0x1FD6, 0x1FDB},
 490                 {0x1FE0, 0x1FEC}, {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFC}, {0x0401, 0x040C},
 491                 {0x040E, 0x044F}, {0x0451, 0x045C}, {0x045E, 0x0481}, {0x0490, 0x04C4},
 492                 {0x04C7, 0x04C8}, {0x04CB, 0x04CC}, {0x04D0, 0x04EB}, {0x04EE, 0x04F5},
 493                 {0x04F8, 0x04F9}, {0x0531, 0x0556}, {0x0561, 0x0587}, {0x05B0, 0x05B9},
 494                 {0x05BB, 0x05BD}, {0x05C1, 0x05C2}, {0x05D0, 0x05EA}, {0x05F0, 0x05F2},
 495                 {0x0621, 0x063A}, {0x0640, 0x0652}, {0x0670, 0x06B7}, {0x06BA, 0x06BE},
 496                 {0x06C0, 0x06CE}, {0x06D0, 0x06DC}, {0x06E5, 0x06E8}, {0x06EA, 0x06ED},
 497                 {0x0901, 0x0903}, {0x0905, 0x0939}, {0x093E, 0x094D}, {0x0950, 0x0952},
 498                 {0x0958, 0x0963}, {0x0981, 0x0983}, {0x0985, 0x098C}, {0x098F, 0x0990},
 499                 {0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B6, 0x09B9}, {0x09BE, 0x09C4},
 500                 {0x09C7, 0x09C8}, {0x09CB, 0x09CD}, {0x09DC, 0x09DD}, {0x09DF, 0x09E3},
 501                 {0x09F0, 0x09F1}, {0x0A05, 0x0A0A}, {0x0A0F, 0x0A10}, {0x0A13, 0x0A28},
 502                 {0x0A2A, 0x0A30}, {0x0A32, 0x0A33}, {0x0A35, 0x0A36}, {0x0A38, 0x0A39},
 503                 {0x0A3E, 0x0A42}, {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A59, 0x0A5C},
 504                 {0x0A81, 0x0A83}, {0x0A85, 0x0A8B}, {0x0A8F, 0x0A91}, {0x0A93, 0x0AA8},
 505                 {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3}, {0x0AB5, 0x0AB9}, {0x0ABD, 0x0AC5},
 506                 {0x0AC7, 0x0AC9}, {0x0ACB, 0x0ACD}, {0x0B01, 0x0B03}, {0x0B05, 0x0B0C},
 507                 {0x0B0F, 0x0B10}, {0x0B13, 0x0B28}, {0x0B2A, 0x0B30}, {0x0B32, 0x0B33},
 508                 {0x0B36, 0x0B39}, {0x0B3E, 0x0B43}, {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D},
 509                 {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B61}, {0x0B82, 0x0B83}, {0x0B85, 0x0B8A},
 510                 {0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, {0x0B99, 0x0B9A}, {0x0B9E, 0x0B9F},
 511                 {0x0BA3, 0x0BA4}, {0x0BA8, 0x0BAA}, {0x0BAE, 0x0BB5}, {0x0BB7, 0x0BB9},
 512                 {0x0BBE, 0x0BC2}, {0x0BC6, 0x0BC8}, {0x0BCA, 0x0BCD}, {0x0C01, 0x0C03},
 513                 {0x0C05, 0x0C0C}, {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C33},
 514                 {0x0C35, 0x0C39}, {0x0C3E, 0x0C44}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D},
 515                 {0x0C60, 0x0C61}, {0x0C82, 0x0C83}, {0x0C85, 0x0C8C}, {0x0C8E, 0x0C90},
 516                 {0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9}, {0x0CBE, 0x0CC4},
 517                 {0x0CC6, 0x0CC8}, {0x0CCA, 0x0CCD}, {0x0CE0, 0x0CE1}, {0x0D02, 0x0D03},
 518                 {0x0D05, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D28}, {0x0D2A, 0x0D39},
 519                 {0x0D3E, 0x0D43}, {0x0D46, 0x0D48}, {0x0D4A, 0x0D4D}, {0x0D60, 0x0D61},
 520                 {0x0E01, 0x0E3A}, {0x0E40, 0x0E5B}, {0x0E81, 0x0E82}, {0x0E87, 0x0E88},
 521                 {0x0E94, 0x0E97}, {0x0E99, 0x0E9F}, {0x0EA1, 0x0EA3}, {0x0EAA, 0x0EAB},
 522                 {0x0EAD, 0x0EAE}, {0x0EB0, 0x0EB9}, {0x0EBB, 0x0EBD}, {0x0EC0, 0x0EC4},
 523                 {0x0EC8, 0x0ECD}, {0x0EDC, 0x0EDD}, {0x0F18, 0x0F19}, {0x0F3E, 0x0F47},
 524                 {0x0F49, 0x0F69}, {0x0F71, 0x0F84}, {0x0F86, 0x0F8B}, {0x0F90, 0x0F95},
 525                 {0x0F99, 0x0FAD}, {0x0FB1, 0x0FB7}, {0x10A0, 0x10C5}, {0x10D0, 0x10F6},
 526                 {0x3041, 0x3093}, {0x309B, 0x309C}, {0x30A1, 0x30F6}, {0x30FB, 0x30FC},
 527                 {0x3105, 0x312C}, {0x4E00, 0x9FA5}, {0xAC00, 0xD7A3}, {0x0660, 0x0669},
 528                 {0x06F0, 0x06F9}, {0x0966, 0x096F}, {0x09E6, 0x09EF}, {0x0A66, 0x0A6F},
 529                 {0x0AE6, 0x0AEF}, {0x0B66, 0x0B6F}, {0x0BE7, 0x0BEF}, {0x0C66, 0x0C6F},
 530                 {0x0CE6, 0x0CEF}, {0x0D66, 0x0D6F}, {0x0E50, 0x0E59}, {0x0ED0, 0x0ED9},
 531                 {0x0F20, 0x0F33}, {0x02B0, 0x02B8}, {0x02BD, 0x02C1}, {0x02D0, 0x02D1},
 532                 {0x02E0, 0x02E4}, {0x203F, 0x2040}, {0x210A, 0x2113}, {0x2118, 0x211D},
 533                 {0x212A, 0x2131}, {0x2133, 0x2138}, {0x2160, 0x2182}, {0x3005, 0x3007},
 534                 {0x3021, 0x3029},
 535         };
 536         for (size_t i = 0; i < sizeof(ranges)/sizeof(ranges[0]); ++i) {
 537                 if (ranges[i][0] <= v && v <= ranges[i][1])
 538                         return true;
 539         }
 540         for (size_t i = 0; i < sizeof(single_chars)/sizeof(single_chars[0]); ++i) {
 541                 if (v == single_chars[i])
 542                         return true;
 543         }
 544         return false;
 545 }
 546
 547 static bool is_universal_char_valid_identifier_c11(utf32 const v)
 548 {
 549         /* C11 Annex D.1 */
 550         if (                v == 0x000A8) return true;
 551         if (                v == 0x000AA) return true;
 552         if (                v == 0x000AD) return true;
 553         if (                v == 0x000AF) return true;
 554         if (0x000B2 <= v && v <= 0x000B5) return true;
 555         if (0x000B7 <= v && v <= 0x000BA) return true;
 556         if (0x000BC <= v && v <= 0x000BE) return true;
 557         if (0x000C0 <= v && v <= 0x000D6) return true;
 558         if (0x000D8 <= v && v <= 0x000F6) return true;
 559         if (0x000F8 <= v && v <= 0x000FF) return true;
 560         if (0x00100 <= v && v <= 0x0167F) return true;
 561         if (0x01681 <= v && v <= 0x0180D) return true;
 562         if (0x0180F <= v && v <= 0x01FFF) return true;
 563         if (0x0200B <= v && v <= 0x0200D) return true;
 564         if (0x0202A <= v && v <= 0x0202E) return true;
 565         if (0x0203F <= v && v <= 0x02040) return true;
 566         if (                v == 0x02054) return true;
 567         if (0x02060 <= v && v <= 0x0206F) return true;
 568         if (0x02070 <= v && v <= 0x0218F) return true;
 569         if (0x02460 <= v && v <= 0x024FF) return true;
 570         if (0x02776 <= v && v <= 0x02793) return true;
 571         if (0x02C00 <= v && v <= 0x02DFF) return true;
 572         if (0x02E80 <= v && v <= 0x02FFF) return true;
 573         if (0x03004 <= v && v <= 0x03007) return true;
 574         if (0x03021 <= v && v <= 0x0302F) return true;
 575         if (0x03031 <= v && v <= 0x0303F) return true;
 576         if (0x03040 <= v && v <= 0x0D7FF) return true;
 577         if (0x0F900 <= v && v <= 0x0FD3D) return true;
 578         if (0x0FD40 <= v && v <= 0x0FDCF) return true;
 579         if (0x0FDF0 <= v && v <= 0x0FE44) return true;
 580         if (0x0FE47 <= v && v <= 0x0FFFD) return true;
 581         if (0x10000 <= v && v <= 0x1FFFD) return true;
 582         if (0x20000 <= v && v <= 0x2FFFD) return true;
 583         if (0x30000 <= v && v <= 0x3FFFD) return true;
 584         if (0x40000 <= v && v <= 0x4FFFD) return true;
 585         if (0x50000 <= v && v <= 0x5FFFD) return true;
 586         if (0x60000 <= v && v <= 0x6FFFD) return true;
 587         if (0x70000 <= v && v <= 0x7FFFD) return true;
 588         if (0x80000 <= v && v <= 0x8FFFD) return true;
 589         if (0x90000 <= v && v <= 0x9FFFD) return true;
 590         if (0xA0000 <= v && v <= 0xAFFFD) return true;
 591         if (0xB0000 <= v && v <= 0xBFFFD) return true;
 592         if (0xC0000 <= v && v <= 0xCFFFD) return true;
 593         if (0xD0000 <= v && v <= 0xDFFFD) return true;
 594         if (0xE0000 <= v && v <= 0xEFFFD) return true;
 595         return false;
 596 }
 597
 598 static bool is_universal_char_valid_identifier(utf32 const v)
 599 {
 600         if (c_mode & _C11)
 601                 return is_universal_char_valid_identifier_c11(v);
 602         return is_universal_char_valid_identifier_c99(v);
 603 }
 604
 605 static bool is_universal_char_invalid_identifier_start(utf32 const v)
 606 {
 607         if (! (c_mode & _C11))
 608                 return false;
 609
 610         /* C11 Annex D.2 */
 611         if (0x0300 <= v && v <= 0x036F) return true;
 612         if (0x1DC0 <= v && v <= 0x1DFF) return true;
 613         if (0x20D0 <= v && v <= 0x20FF) return true;
 614         if (0xFE20 <= v && v <= 0xFE2F) return true;
 615         return false;
 616 }
 617
 618 /**
 619  * Parse an escape sequence.
 620  */
 621 static utf32 parse_escape_sequence(void)
 622 {
 623         eat('\\');
 624
 625         utf32 const ec = input.c;
 626         next_char();
 627
 628         switch (ec) {
 629         case '"':  return '"';
 630         case '\'': return '\'';
 631         case '\\': return '\\';
 632         case '?': return '\?';
 633         case 'a': return '\a';
 634         case 'b': return '\b';
 635         case 'f': return '\f';
 636         case 'n': return '\n';
 637         case 'r': return '\r';
 638         case 't': return '\t';
 639         case 'v': return '\v';
 640         case 'x':
 641                 return parse_hex_sequence();
 642         case '0':
 643         case '1':
 644         case '2':
 645         case '3':
 646         case '4':
 647         case '5':
 648         case '6':
 649         case '7':
 650                 return parse_octal_sequence(ec);
 651         case EOF:
 652                 parse_error("reached end of file while parsing escape sequence");
 653                 return EOF;
 654         /* \E is not documented, but handled, by GCC.  It is acceptable according
 655          * to §6.11.4, whereas \e is not. */
 656         case 'E':
 657         case 'e':
 658                 if (c_mode & _GNUC)
 659                         return 27;   /* hopefully 27 is ALWAYS the code for ESCAPE */
 660                 break;
 661
 662         case 'U': return parse_universal_char(8);
 663         case 'u': return parse_universal_char(4);
 664
 665         default:
 666                 break;
 667         }
 668         /* §6.4.4.4:8 footnote 64 */
 669         parse_error("unknown escape sequence");
 670         return EOF;
 671 }
 672
 673 static const char *identify_string(char *string)
 674 {
 675         const char *result = strset_insert(&stringset, string);
 676         if (result != string) {
 677                 obstack_free(&symbol_obstack, string);
 678         }
 679         return result;
 680 }
 681
 682 static string_t sym_make_string(string_encoding_t const enc)
 683 {
 684         obstack_1grow(&symbol_obstack, '\0');
 685         size_t      const len    = obstack_object_size(&symbol_obstack) - 1;
 686         char       *const string = obstack_finish(&symbol_obstack);
 687         char const *const result = identify_string(string);
 688         return (string_t){ result, len, enc };
 689 }
 690
 691 string_t make_string(char const *const string)
 692 {
 693         obstack_grow(&symbol_obstack, string, strlen(string));
 694         return sym_make_string(STRING_ENCODING_CHAR);
 695 }
 696
 697 static utf32 get_string_encoding_limit(string_encoding_t const enc)
 698 {
 699         switch (enc) {
 700         case STRING_ENCODING_CHAR:   return 0xFF;
 701         case STRING_ENCODING_CHAR16: return 0xFFFF;
 702         case STRING_ENCODING_CHAR32: return 0xFFFFFFFF;
 703         case STRING_ENCODING_UTF8:   return 0xFFFFFFFF;
 704         case STRING_ENCODING_WIDE:   return 0xFFFFFFFF; // FIXME depends on settings
 705         }
 706         panic("invalid string encoding");
 707 }
 708
 709 static void parse_string(utf32 const delimiter, token_kind_t const kind,
 710                          string_encoding_t const enc,
 711                          char const *const context)
 712 {
 713         eat(delimiter);
 714
 715         utf32 const limit = get_string_encoding_limit(enc);
 716         while (true) {
 717                 switch (input.c) {
 718                 case '\\': {
 719                         if (resolve_escape_sequences) {
 720                                 utf32 const tc = parse_escape_sequence();
 721                                 if (tc > limit) {
 722                                         warningf(WARN_OTHER, &pp_token.base.pos,
 723                                                  "escape sequence out of range");
 724                                 }
 725                                 if (enc == STRING_ENCODING_CHAR) {
 726                                         obstack_1grow(&symbol_obstack, tc);
 727                                 } else {
 728                                         obstack_grow_utf8(&symbol_obstack, tc);
 729                                 }
 730                         } else {
 731                                 obstack_1grow(&symbol_obstack, (char)input.c);
 732                                 next_char();
 733                                 obstack_1grow(&symbol_obstack, (char)input.c);
 734                                 next_char();
 735                         }
 736                         break;
 737                 }
 738
 739                 case NEWLINE:
 740                         errorf(&pp_token.base.pos, "newline while parsing %s", context);
 741                         break;
 742
 743                 case EOF:
 744                         errorf(&pp_token.base.pos, "EOF while parsing %s", context);
 745                         goto end_of_string;
 746
 747                 default:
 748                         if (input.c == delimiter) {
 749                                 next_char();
 750                                 goto end_of_string;
 751                         } else {
 752                                 obstack_grow_utf8(&symbol_obstack, input.c);
 753                                 next_char();
 754                                 break;
 755                         }
 756                 }
 757         }
 758
 759 end_of_string:
 760         pp_token.kind           = kind;
 761         pp_token.literal.string = sym_make_string(enc);
 762 }
 763
 764 static void parse_string_literal(string_encoding_t const enc)
 765 {
 766         parse_string('"', T_STRING_LITERAL, enc, "string literal");
 767 }
 768
 769 static void parse_character_constant(string_encoding_t const enc)
 770 {
 771         parse_string('\'', T_CHARACTER_CONSTANT, enc, "character constant");
 772         if (pp_token.literal.string.size == 0) {
 773                 parse_error("empty character constant");
 774         }
 775 }
 776
 777 #define SYMBOL_CASES_WITHOUT_E_P \
 778              '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
 779         case 'a': \
 780         case 'b': \
 781         case 'c': \
 782         case 'd': \
 783         case 'f': \
 784         case 'g': \
 785         case 'h': \
 786         case 'i': \
 787         case 'j': \
 788         case 'k': \
 789         case 'l': \
 790         case 'm': \
 791         case 'n': \
 792         case 'o': \
 793         case 'q': \
 794         case 'r': \
 795         case 's': \
 796         case 't': \
 797         case 'u': \
 798         case 'v': \
 799         case 'w': \
 800         case 'x': \
 801         case 'y': \
 802         case 'z': \
 803         case 'A': \
 804         case 'B': \
 805         case 'C': \
 806         case 'D': \
 807         case 'F': \
 808         case 'G': \
 809         case 'H': \
 810         case 'I': \
 811         case 'J': \
 812         case 'K': \
 813         case 'L': \
 814         case 'M': \
 815         case 'N': \
 816         case 'O': \
 817         case 'Q': \
 818         case 'R': \
 819         case 'S': \
 820         case 'T': \
 821         case 'U': \
 822         case 'V': \
 823         case 'W': \
 824         case 'X': \
 825         case 'Y': \
 826         case 'Z': \
 827         case '_'
 828
 829 #define SYMBOL_CASES \
 830              SYMBOL_CASES_WITHOUT_E_P: \
 831         case 'e': \
 832         case 'p': \
 833         case 'E': \
 834         case 'P'
 835
 836 #define DIGIT_CASES \
 837              '0':  \
 838         case '1':  \
 839         case '2':  \
 840         case '3':  \
 841         case '4':  \
 842         case '5':  \
 843         case '6':  \
 844         case '7':  \
 845         case '8':  \
 846         case '9'
 847
 848 static void start_expanding(pp_definition_t *definition)
 849 {
 850         definition->parent_expansion = current_expansion;
 851         definition->expand_pos       = 0;
 852         definition->is_expanding     = true;
 853         if (definition->list_len > 0) {
 854                 definition->token_list[0].had_whitespace
 855                         = info.had_whitespace;
 856         }
 857         current_expansion = definition;
 858 }
 859
 860 static void finished_expanding(pp_definition_t *definition)
 861 {
 862         assert(definition->is_expanding);
 863         pp_definition_t *parent = definition->parent_expansion;
 864         definition->parent_expansion = NULL;
 865         definition->is_expanding     = false;
 866
 867         /* stop further expanding once we expanded a parameter used in a
 868          * sub macro-call */
 869         if (definition == argument_expanding)
 870                 argument_expanding = NULL;
 871
 872         assert(current_expansion == definition);
 873         current_expansion = parent;
 874 }
 875
 876 static void grow_string_escaped(struct obstack *obst, const string_t *string, char const *delimiter)
 877 {
 878         char const *prefix = get_string_encoding_prefix(string->encoding);
 879         obstack_printf(obst, "%s%s", prefix, delimiter);
 880         size_t      size = string->size;
 881         const char *str  = string->begin;
 882         if (resolve_escape_sequences) {
 883                 obstack_grow(obst, str, size);
 884         } else {
 885                 for (size_t i = 0; i < size; ++i) {
 886                         const char c = str[i];
 887                         if (c == '\\' || c == '"')
 888                                 obstack_1grow(obst, '\\');
 889                         obstack_1grow(obst, c);
 890                 }
 891         }
 892         obstack_printf(obst, "%s", delimiter);
 893 }
 894
 895 static void grow_token(struct obstack *obst, const token_t *token)
 896 {
 897         switch (token->kind) {
 898         case T_NUMBER:
 899                 obstack_grow(obst, token->literal.string.begin, token->literal.string.size);
 900                 break;
 901
 902         case T_STRING_LITERAL: {
 903                 char const *const delimiter = resolve_escape_sequences ? "\"" : "\\\"";
 904                 grow_string_escaped(obst, &token->literal.string, delimiter);
 905                 break;
 906         }
 907
 908         case T_CHARACTER_CONSTANT:
 909                 grow_string_escaped(obst, &token->literal.string, "'");
 910                 break;
 911
 912         case T_IDENTIFIER:
 913         default: {
 914                 const char *str = token->base.symbol->string;
 915                 size_t      len = strlen(str);
 916                 obstack_grow(obst, str, len);
 917                 break;
 918         }
 919         }
 920 }
 921
 922 static void stringify(const pp_definition_t *definition)
 923 {
 924         assert(obstack_object_size(&symbol_obstack) == 0);
 925
 926         size_t list_len = definition->list_len;
 927         for (size_t p = 0; p < list_len; ++p) {
 928                 const saved_token_t *saved = &definition->token_list[p];
 929                 if (p > 0 && saved->had_whitespace)
 930                         obstack_1grow(&symbol_obstack, ' ');
 931                 grow_token(&symbol_obstack, &saved->token);
 932         }
 933         pp_token.kind           = T_STRING_LITERAL;
 934         pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
 935 }
 936
 937 static inline void set_punctuator(token_kind_t const kind)
 938 {
 939         pp_token.kind        = kind;
 940         pp_token.base.symbol = token_symbols[kind];
 941 }
 942
 943 static inline void set_digraph(token_kind_t const kind, symbol_t *const symbol)
 944 {
 945         pp_token.kind        = kind;
 946         pp_token.base.symbol = symbol;
 947 }
 948
 949 /**
 950  * returns next final token from a preprocessor macro expansion
 951  */
 952 static bool expand_next(void)
 953 {
 954         if (current_expansion == NULL)
 955                 return false;
 956
 957 restart:;
 958         size_t pos = current_expansion->expand_pos;
 959         if (pos >= current_expansion->list_len) {
 960                 finished_expanding(current_expansion);
 961                 /* it was the outermost expansion, parse pptoken normally */
 962                 if (current_expansion == NULL) {
 963                         return false;
 964                 }
 965                 goto restart;
 966         }
 967         const saved_token_t *saved = &current_expansion->token_list[pos++];
 968         pp_token = saved->token;
 969         if (pp_token.kind == '#') {
 970                 if (pos < current_expansion->list_len) {
 971                         const saved_token_t *next = &current_expansion->token_list[pos];
 972                         if (next->token.kind == T_MACRO_PARAMETER) {
 973                                 pp_definition_t *def = next->token.macro_parameter.def;
 974                                 assert(def != NULL && def->is_parameter);
 975                                 stringify(def);
 976                                 ++pos;
 977                         }
 978                 }
 979         }
 980
 981         if (current_expansion->expand_pos > 0)
 982                 info.had_whitespace = saved->had_whitespace;
 983         current_expansion->expand_pos = pos;
 984         pp_token.base.pos             = expansion_pos;
 985
 986         return true;
 987 }
 988
 989 /**
 990  * Returns the next token kind found when continuing the current expansions
 991  * without starting new sub-expansions.
 992  */
 993 static token_kind_t peek_expansion(void)
 994 {
 995         for (pp_definition_t *e = current_expansion; e; e = e->parent_expansion) {
 996                 if (e->expand_pos < e->list_len)
 997                         return e->token_list[e->expand_pos].token.kind;
 998         }
 999         return T_EOF;
1000 }
1001
1002 static void skip_line_comment(void)
1003 {
1004         info.had_whitespace = true;
1005         while (true) {
1006                 switch (input.c) {
1007                 case EOF:
1008                         return;
1009
1010                 case '\r':
1011                 case '\n':
1012                         return;
1013
1014                 default:
1015                         next_char();
1016                         break;
1017                 }
1018         }
1019 }
1020
1021 static void skip_multiline_comment(void)
1022 {
1023         info.had_whitespace = true;
1024
1025         position_t const start_pos = input.pos;
1026         while (true) {
1027                 switch (input.c) {
1028                 case '/':
1029                         next_char();
1030                         if (input.c == '*') {
1031                                 /* TODO: nested comment, warn here */
1032                         }
1033                         break;
1034                 case '*':
1035                         next_char();
1036                         if (input.c == '/') {
1037                                 if (input.pos.lineno != input.output_line)
1038                                         info.whitespace_at_line_begin = input.pos.colno;
1039                                 next_char();
1040                                 return;
1041                         }
1042                         break;
1043
1044                 case NEWLINE:
1045                         break;
1046
1047                 case EOF:
1048                         errorf(&start_pos, "at end of file while looking for comment end");
1049                         return;
1050
1051                 default:
1052                         next_char();
1053                         break;
1054                 }
1055         }
1056 }
1057
1058 static bool skip_till_newline(bool stop_at_non_whitespace)
1059 {
1060         bool res = false;
1061         while (true) {
1062                 switch (input.c) {
1063                 case ' ':
1064                 case '\t':
1065                         next_char();
1066                         continue;
1067
1068                 case '/':
1069                         next_char();
1070                         if (input.c == '/') {
1071                                 next_char();
1072                                 skip_line_comment();
1073                                 continue;
1074                         } else if (input.c == '*') {
1075                                 next_char();
1076                                 skip_multiline_comment();
1077                                 continue;
1078                         } else {
1079                                 put_back(input.c);
1080                                 input.c = '/';
1081                         }
1082                         return true;
1083
1084                 case NEWLINE:
1085                         return res;
1086
1087                 default:
1088                         if (stop_at_non_whitespace)
1089                                 return false;
1090                         res = true;
1091                         next_char();
1092                         continue;
1093                 }
1094         }
1095 }
1096
1097 static void skip_whitespace(void)
1098 {
1099         while (true) {
1100                 switch (input.c) {
1101                 case ' ':
1102                 case '\t':
1103                         ++info.whitespace_at_line_begin;
1104                         info.had_whitespace = true;
1105                         next_char();
1106                         continue;
1107
1108                 case NEWLINE:
1109                         info.at_line_begin  = true;
1110                         info.had_whitespace = true;
1111                         info.whitespace_at_line_begin = 0;
1112                         continue;
1113
1114                 case '/':
1115                         next_char();
1116                         if (input.c == '/') {
1117                                 next_char();
1118                                 skip_line_comment();
1119                                 continue;
1120                         } else if (input.c == '*') {
1121                                 next_char();
1122                                 skip_multiline_comment();
1123                                 continue;
1124                         } else {
1125                                 put_back(input.c);
1126                                 input.c = '/';
1127                         }
1128                         return;
1129
1130                 default:
1131                         return;
1132                 }
1133         }
1134 }
1135
1136 static inline void eat_pp(pp_token_kind_t const kind)
1137 {
1138         assert(pp_token.base.symbol->pp_ID == kind);
1139         (void) kind;
1140         next_input_token();
1141 }
1142
1143 static inline void eat_token(token_kind_t const kind)
1144 {
1145         assert(pp_token.kind == kind);
1146         (void)kind;
1147         next_input_token();
1148 }
1149
1150 static string_encoding_t identify_encoding_prefix(symbol_t *const sym)
1151 {
1152         if (sym == symbol_L) return STRING_ENCODING_WIDE;
1153         if (c_mode & _C11) {
1154                 if (sym == symbol_U)  return STRING_ENCODING_CHAR32;
1155                 if (sym == symbol_u)  return STRING_ENCODING_CHAR16;
1156                 if (sym == symbol_u8) return STRING_ENCODING_UTF8;
1157         }
1158         return STRING_ENCODING_CHAR;
1159 }
1160
1161 static void parse_symbol(void)
1162 {
1163         assert(obstack_object_size(&symbol_obstack) == 0);
1164         while (true) {
1165                 switch (input.c) {
1166                 case DIGIT_CASES:
1167                 case SYMBOL_CASES:
1168                         obstack_1grow(&symbol_obstack, (char) input.c);
1169                         next_char();
1170                         break;
1171
1172                 case '\\':
1173                         next_char();
1174                         switch (input.c) {
1175                         {
1176                                 unsigned n;
1177                         case 'U': n = 8; goto universal;
1178                         case 'u': n = 4; goto universal;
1179 universal:
1180                                 if (!resolve_escape_sequences) {
1181                                         obstack_1grow(&symbol_obstack, '\\');
1182                                         obstack_1grow(&symbol_obstack, input.c);
1183                                 }
1184                                 next_char();
1185                                 utf32 const v = parse_universal_char(n);
1186                                 if (!is_universal_char_valid_identifier(v)) {
1187                                         if (is_universal_char_valid(v)) {
1188                                                 errorf(&input.pos,
1189                                                            "universal character \\%c%0*X is not valid in an identifier",
1190                                                            n == 4 ? 'u' : 'U', (int)n, v);
1191                                         }
1192                                 } else if (obstack_object_size(&symbol_obstack) == 0 && is_universal_char_invalid_identifier_start(v)) {
1193                                         errorf(&input.pos,
1194                                                    "universal character \\%c%0*X is not valid as start of an identifier",
1195                                                    n == 4 ? 'u' : 'U', (int)n, v);
1196                                 } else if (resolve_escape_sequences) {
1197                                         obstack_grow_utf8(&symbol_obstack, v);
1198                                 }
1199                                 break;
1200                         }
1201
1202                         default:
1203                                 put_back(input.c);
1204                                 input.c = '\\';
1205                                 goto end_symbol;
1206                         }
1207
1208                 default:
1209 dollar_sign:
1210                         goto end_symbol;
1211                 }
1212         }
1213
1214 end_symbol:
1215         obstack_1grow(&symbol_obstack, '\0');
1216         char *string = obstack_finish(&symbol_obstack);
1217
1218         symbol_t *symbol = symbol_table_insert(string);
1219
1220         /* Might be a prefixed string or character constant: L/U/u/u8"string". */
1221         if (input.c == '"') {
1222                 string_encoding_t const enc = identify_encoding_prefix(symbol);
1223                 if (enc != STRING_ENCODING_CHAR) {
1224                         parse_string_literal(enc);
1225                         return;
1226                 }
1227         } else if (input.c == '\'') {
1228                 string_encoding_t const enc = identify_encoding_prefix(symbol);
1229                 if (enc != STRING_ENCODING_CHAR) {
1230                         if (enc == STRING_ENCODING_UTF8) {
1231                                 errorf(&pp_token.base.pos,
1232                                        "'u8' is not a valid encoding for a chracter constant");
1233                         }
1234                         parse_character_constant(enc);
1235                         return;
1236                 }
1237         }
1238
1239         pp_token.kind        = symbol->ID;
1240         pp_token.base.symbol = symbol;
1241
1242         /* we can free the memory from symbol obstack if we already had an entry in
1243          * the symbol table */
1244         if (symbol->string != string) {
1245                 obstack_free(&symbol_obstack, string);
1246         }
1247 }
1248
1249 static void parse_number(void)
1250 {
1251         obstack_1grow(&symbol_obstack, (char) input.c);
1252         next_char();
1253
1254         while (true) {
1255                 switch (input.c) {
1256                 case '.':
1257                 case DIGIT_CASES:
1258                 case SYMBOL_CASES_WITHOUT_E_P:
1259                         obstack_1grow(&symbol_obstack, (char) input.c);
1260                         next_char();
1261                         break;
1262
1263                 case 'e':
1264                 case 'p':
1265                 case 'E':
1266                 case 'P':
1267                         obstack_1grow(&symbol_obstack, (char) input.c);
1268                         next_char();
1269                         if (input.c == '+' || input.c == '-') {
1270                                 obstack_1grow(&symbol_obstack, (char) input.c);
1271                                 next_char();
1272                         }
1273                         break;
1274
1275                 default:
1276 dollar_sign:
1277                         goto end_number;
1278                 }
1279         }
1280
1281 end_number:
1282         pp_token.kind           = T_NUMBER;
1283         pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
1284 }
1285
1286 #define MAYBE_PROLOG \
1287         next_char(); \
1288         switch (input.c) {
1289
1290 #define MAYBE(ch, kind) \
1291         case ch: \
1292                 next_char(); \
1293                 set_punctuator(kind); \
1294                 return;
1295
1296 #define MAYBE_DIGRAPH(ch, kind, symbol) \
1297         case ch: \
1298                 next_char(); \
1299                 set_digraph(kind, symbol); \
1300                 return;
1301
1302 #define ELSE_CODE(code) \
1303         default: \
1304                 code \
1305         }
1306
1307 #define ELSE(kind) ELSE_CODE(set_punctuator(kind); return;)
1308
1309 /** identifies and returns the next preprocessing token contained in the
1310  * input stream. No macro expansion is performed. */
1311 static void next_input_token(void)
1312 {
1313         if (next_info.had_whitespace) {
1314                 info = next_info;
1315                 next_info.had_whitespace = false;
1316         } else {
1317                 info.at_line_begin  = false;
1318                 info.had_whitespace = false;
1319         }
1320 restart:
1321         pp_token.base.pos    = input.pos;
1322         pp_token.base.symbol = NULL;
1323
1324         switch (input.c) {
1325         case ' ':
1326         case '\t':
1327                 info.whitespace_at_line_begin++;
1328                 info.had_whitespace = true;
1329                 next_char();
1330                 goto restart;
1331
1332         case NEWLINE:
1333                 info.at_line_begin            = true;
1334                 info.had_whitespace           = true;
1335                 info.whitespace_at_line_begin = 0;
1336                 goto restart;
1337
1338         case SYMBOL_CASES:
1339                 parse_symbol();
1340                 return;
1341
1342         case DIGIT_CASES:
1343                 parse_number();
1344                 return;
1345
1346         case '"':
1347                 parse_string_literal(STRING_ENCODING_CHAR);
1348                 return;
1349
1350         case '\'':
1351                 parse_character_constant(STRING_ENCODING_CHAR);
1352                 return;
1353
1354         case '.':
1355                 MAYBE_PROLOG
1356                         case '0':
1357                         case '1':
1358                         case '2':
1359                         case '3':
1360                         case '4':
1361                         case '5':
1362                         case '6':
1363                         case '7':
1364                         case '8':
1365                         case '9':
1366                                 put_back(input.c);
1367                                 input.c = '.';
1368                                 parse_number();
1369                                 return;
1370
1371                         case '.':
1372                                 MAYBE_PROLOG
1373                                 MAYBE('.', T_DOTDOTDOT)
1374                                 ELSE_CODE(
1375                                         put_back(input.c);
1376                                         input.c = '.';
1377                                         set_punctuator('.');
1378                                         return;
1379                                 )
1380                 ELSE('.')
1381         case '&':
1382                 MAYBE_PROLOG
1383                 MAYBE('&', T_ANDAND)
1384                 MAYBE('=', T_ANDEQUAL)
1385                 ELSE('&')
1386         case '*':
1387                 MAYBE_PROLOG
1388                 MAYBE('=', T_ASTERISKEQUAL)
1389                 ELSE('*')
1390         case '+':
1391                 MAYBE_PROLOG
1392                 MAYBE('+', T_PLUSPLUS)
1393                 MAYBE('=', T_PLUSEQUAL)
1394                 ELSE('+')
1395         case '-':
1396                 MAYBE_PROLOG
1397                 MAYBE('>', T_MINUSGREATER)
1398                 MAYBE('-', T_MINUSMINUS)
1399                 MAYBE('=', T_MINUSEQUAL)
1400                 ELSE('-')
1401         case '!':
1402                 MAYBE_PROLOG
1403                 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
1404                 ELSE('!')
1405         case '/':
1406                 MAYBE_PROLOG
1407                 MAYBE('=', T_SLASHEQUAL)
1408                 case '*':
1409                         next_char();
1410                         skip_multiline_comment();
1411                         goto restart;
1412                 case '/':
1413                         next_char();
1414                         skip_line_comment();
1415                         goto restart;
1416                 ELSE('/')
1417         case '%':
1418                 MAYBE_PROLOG
1419                 MAYBE_DIGRAPH('>', '}', symbol_percentgreater)
1420                 MAYBE('=', T_PERCENTEQUAL)
1421                 case ':':
1422                         MAYBE_PROLOG
1423                         case '%':
1424                                 MAYBE_PROLOG
1425                                 MAYBE_DIGRAPH(':', T_HASHHASH, symbol_percentcolonpercentcolon)
1426                                 ELSE_CODE(
1427                                         put_back(input.c);
1428                                         input.c = '%';
1429                                         goto digraph_percentcolon;
1430                                 )
1431                         ELSE_CODE(
1432 digraph_percentcolon:
1433                                 set_digraph('#', symbol_percentcolon);
1434                                 return;
1435                         )
1436                 ELSE('%')
1437         case '<':
1438                 MAYBE_PROLOG
1439                 MAYBE_DIGRAPH(':', '[', symbol_lesscolon)
1440                 MAYBE_DIGRAPH('%', '{', symbol_lesspercent)
1441                 MAYBE('=', T_LESSEQUAL)
1442                 case '<':
1443                         MAYBE_PROLOG
1444                         MAYBE('=', T_LESSLESSEQUAL)
1445                         ELSE(T_LESSLESS)
1446                 ELSE('<')
1447         case '>':
1448                 MAYBE_PROLOG
1449                 MAYBE('=', T_GREATEREQUAL)
1450                 case '>':
1451                         MAYBE_PROLOG
1452                         MAYBE('=', T_GREATERGREATEREQUAL)
1453                         ELSE(T_GREATERGREATER)
1454                 ELSE('>')
1455         case '^':
1456                 MAYBE_PROLOG
1457                 MAYBE('=', T_CARETEQUAL)
1458                 ELSE('^')
1459         case '|':
1460                 MAYBE_PROLOG
1461                 MAYBE('=', T_PIPEEQUAL)
1462                 MAYBE('|', T_PIPEPIPE)
1463                 ELSE('|')
1464         case ':':
1465                 MAYBE_PROLOG
1466                 MAYBE_DIGRAPH('>', ']', symbol_colongreater)
1467                 case ':':
1468                         if (c_mode & _CXX) {
1469                                 next_char();
1470                                 set_punctuator(T_COLONCOLON);
1471                                 return;
1472                         }
1473                         /* FALLTHROUGH */
1474                 ELSE(':')
1475         case '=':
1476                 MAYBE_PROLOG
1477                 MAYBE('=', T_EQUALEQUAL)
1478                 ELSE('=')
1479         case '#':
1480                 MAYBE_PROLOG
1481                 MAYBE('#', T_HASHHASH)
1482                 ELSE('#')
1483
1484         case '?':
1485         case '[':
1486         case ']':
1487         case '(':
1488         case ')':
1489         case '{':
1490         case '}':
1491         case '~':
1492         case ';':
1493         case ',':
1494                 set_punctuator(input.c);
1495                 next_char();
1496                 return;
1497
1498         case EOF:
1499                 if (input_stack != NULL) {
1500                         fclose(close_pp_input());
1501                         pop_restore_input();
1502                         if (out)
1503                                 fputc('\n', out);
1504                         if (input.c == (utf32)EOF)
1505                                 --input.pos.lineno;
1506                         print_line_directive(&input.pos, "2");
1507                         goto restart;
1508                 } else {
1509                         info.at_line_begin = true;
1510                         set_punctuator(T_EOF);
1511                 }
1512                 return;
1513
1514         case '\\':
1515                 next_char();
1516                 int next_c = input.c;
1517                 put_back(input.c);
1518                 input.c = '\\';
1519                 if (next_c == 'U' || next_c == 'u') {
1520                         parse_symbol();
1521                         return;
1522                 }
1523                 /* FALLTHROUGH */
1524         default:
1525 dollar_sign:
1526                 if (error_on_unknown_chars) {
1527                         errorf(&pp_token.base.pos, "unknown character '%lc' found", input.c);
1528                         next_char();
1529                         goto restart;
1530                 } else {
1531                         assert(obstack_object_size(&symbol_obstack) == 0);
1532                         obstack_grow_utf8(&symbol_obstack, input.c);
1533                         obstack_1grow(&symbol_obstack, '\0');
1534                         char     *const string = obstack_finish(&symbol_obstack);
1535                         symbol_t *const symbol = symbol_table_insert(string);
1536                         if (symbol->string != string)
1537                                 obstack_free(&symbol_obstack, string);
1538
1539                         pp_token.kind        = T_UNKNOWN_CHAR;
1540                         pp_token.base.symbol = symbol;
1541                         next_char();
1542                         return;
1543                 }
1544         }
1545 }
1546
1547 static void print_quoted_string(const char *const string)
1548 {
1549         fputc('"', out);
1550         for (const char *c = string; *c != 0; ++c) {
1551                 switch (*c) {
1552                 case '"': fputs("\\\"", out); break;
1553                 case '\\':  fputs("\\\\", out); break;
1554                 case '\a':  fputs("\\a", out); break;
1555                 case '\b':  fputs("\\b", out); break;
1556                 case '\f':  fputs("\\f", out); break;
1557                 case '\n':  fputs("\\n", out); break;
1558                 case '\r':  fputs("\\r", out); break;
1559                 case '\t':  fputs("\\t", out); break;
1560                 case '\v':  fputs("\\v", out); break;
1561                 case '\?':  fputs("\\?", out); break;
1562                 default:
1563                         if (!isprint(*c)) {
1564                                 fprintf(out, "\\%03o", (unsigned)*c);
1565                                 break;
1566                         }
1567                         fputc(*c, out);
1568                         break;
1569                 }
1570         }
1571         fputc('"', out);
1572 }
1573
1574 static void print_line_directive(const position_t *pos, const char *add)
1575 {
1576         if (!out)
1577                 return;
1578
1579         fprintf(out, "# %u ", pos->lineno);
1580         print_quoted_string(pos->input_name);
1581         if (add != NULL) {
1582                 fputc(' ', out);
1583                 fputs(add, out);
1584         }
1585         if (pos->is_system_header) {
1586                 fputs(" 3", out);
1587         }
1588
1589         printed_input_name = pos->input_name;
1590         input.output_line  = pos->lineno-1;
1591 }
1592
1593 static bool emit_newlines(void)
1594 {
1595         if (!out)
1596                 return true;
1597
1598         unsigned delta = pp_token.base.pos.lineno - input.output_line;
1599         if (delta == 0)
1600                 return false;
1601
1602         if (delta >= 9) {
1603                 fputc('\n', out);
1604                 print_line_directive(&pp_token.base.pos, NULL);
1605                 fputc('\n', out);
1606         } else {
1607                 for (unsigned i = 0; i < delta; ++i) {
1608                         fputc('\n', out);
1609                 }
1610         }
1611         input.output_line = pp_token.base.pos.lineno;
1612
1613         unsigned whitespace = info.whitespace_at_line_begin;
1614         /* make sure there is at least 1 whitespace before a (macro-expanded)
1615          * '#' at line begin. I'm not sure why this is good, but gcc does it. */
1616         if (pp_token.kind == '#' && whitespace == 0)
1617                 ++whitespace;
1618         for (unsigned i = 0; i < whitespace; ++i)
1619                 fputc(' ', out);
1620
1621         return true;
1622 }
1623
1624 void set_preprocessor_output(FILE *output)
1625 {
1626         out = output;
1627         if (out != NULL) {
1628                 error_on_unknown_chars   = false;
1629                 resolve_escape_sequences = false;
1630         } else {
1631                 error_on_unknown_chars   = true;
1632                 resolve_escape_sequences = true;
1633         }
1634 }
1635
1636 void emit_pp_token(void)
1637 {
1638         if (!emit_newlines() &&
1639             (info.had_whitespace || tokens_would_paste(last_token, pp_token.kind)))
1640                 fputc(' ', out);
1641
1642         switch (pp_token.kind) {
1643         case T_NUMBER:
1644                 fputs(pp_token.literal.string.begin, out);
1645                 break;
1646
1647         case T_STRING_LITERAL:
1648                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1649                 fputc('"', out);
1650                 fputs(pp_token.literal.string.begin, out);
1651                 fputc('"', out);
1652                 break;
1653
1654         case T_CHARACTER_CONSTANT:
1655                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1656                 fputc('\'', out);
1657                 fputs(pp_token.literal.string.begin, out);
1658                 fputc('\'', out);
1659                 break;
1660
1661         case T_MACRO_PARAMETER:
1662                 panic("macro parameter not expanded");
1663
1664         default:
1665                 fputs(pp_token.base.symbol->string, out);
1666                 break;
1667         }
1668         last_token = pp_token.kind;
1669 }
1670
1671 static void eat_pp_directive(void)
1672 {
1673         while (!info.at_line_begin) {
1674                 next_input_token();
1675         }
1676 }
1677
1678 static bool strings_equal(const string_t *string1, const string_t *string2)
1679 {
1680         size_t size = string1->size;
1681         if (size != string2->size)
1682                 return false;
1683
1684         const char *c1 = string1->begin;
1685         const char *c2 = string2->begin;
1686         for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1687                 if (*c1 != *c2)
1688                         return false;
1689         }
1690         return true;
1691 }
1692
1693 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1694 {
1695         if (token1->kind != token2->kind)
1696                 return false;
1697
1698         switch (token1->kind) {
1699         case T_NUMBER:
1700         case T_CHARACTER_CONSTANT:
1701         case T_STRING_LITERAL:
1702                 return strings_equal(&token1->literal.string, &token2->literal.string);
1703
1704         case T_MACRO_PARAMETER:
1705                 return token1->macro_parameter.def->symbol
1706                     == token2->macro_parameter.def->symbol;
1707
1708         default:
1709                 return token1->base.symbol == token2->base.symbol;
1710         }
1711 }
1712
1713 static bool pp_definitions_equal(const pp_definition_t *definition1,
1714                                  const pp_definition_t *definition2)
1715 {
1716         if (definition1->list_len != definition2->list_len)
1717                 return false;
1718
1719         size_t               len = definition1->list_len;
1720         const saved_token_t *t1  = definition1->token_list;
1721         const saved_token_t *t2  = definition2->token_list;
1722         for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1723                 if (!pp_tokens_equal(&t1->token, &t2->token))
1724                         return false;
1725                 if (t1->had_whitespace != t2->had_whitespace)
1726                         return false;
1727         }
1728         return true;
1729 }
1730
1731 static void missing_macro_param_error(void)
1732 {
1733         errorf(&pp_token.base.pos, "'#' is not followed by a macro parameter");
1734 }
1735
1736 static bool is_defineable_token(char const *const context)
1737 {
1738         if (info.at_line_begin) {
1739                 errorf(&pp_token.base.pos, "unexpected end of line after %s", context);
1740         }
1741
1742         symbol_t *const symbol = pp_token.base.symbol;
1743         if (!symbol)
1744                 goto no_ident;
1745
1746         if (pp_token.kind != T_IDENTIFIER) {
1747                 switch (symbol->string[0]) {
1748                 case SYMBOL_CASES:
1749 dollar_sign:
1750                         break;
1751
1752                 default:
1753 no_ident:
1754                         errorf(&pp_token.base.pos, "expected identifier after %s, got %K",
1755                                context, &pp_token);
1756                         return false;
1757                 }
1758         }
1759
1760         /* TODO turn this into a flag in pp_def. */
1761         switch (symbol->pp_ID) {
1762         /* §6.10.8:4 */
1763         case TP_defined:
1764                 errorf(&pp_token.base.pos, "%K cannot be used as macro name in %s",
1765                        &pp_token, context);
1766                 return false;
1767
1768         default:
1769                 return true;
1770         }
1771 }
1772
1773 static void parse_define_directive(void)
1774 {
1775         eat_pp(TP_define);
1776         if (skip_mode) {
1777                 eat_pp_directive();
1778                 return;
1779         }
1780
1781         assert(obstack_object_size(&pp_obstack) == 0);
1782
1783         if (!is_defineable_token("#define"))
1784                 goto error_out;
1785         symbol_t *const symbol = pp_token.base.symbol;
1786
1787         pp_definition_t *new_definition
1788                 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1789         memset(new_definition, 0, sizeof(new_definition[0]));
1790         new_definition->symbol = symbol;
1791         new_definition->pos    = input.pos;
1792
1793         /* this is probably the only place where spaces are significant in the
1794          * lexer (except for the fact that they separate tokens). #define b(x)
1795          * is something else than #define b (x) */
1796         if (input.c == '(') {
1797                 next_input_token();
1798                 eat_token('(');
1799
1800                 while (true) {
1801                         switch (pp_token.kind) {
1802                         case T_DOTDOTDOT:
1803                                 new_definition->is_variadic = true;
1804                                 eat_token(T_DOTDOTDOT);
1805                                 if (pp_token.kind != ')') {
1806                                         errorf(&input.pos,
1807                                                         "'...' not at end of macro argument list");
1808                                         goto error_out;
1809                                 }
1810                                 break;
1811
1812                         case T_IDENTIFIER: {
1813                                 pp_definition_t parameter;
1814                                 memset(&parameter, 0, sizeof(parameter));
1815                                 parameter.pos          = pp_token.base.pos;
1816                                 parameter.symbol       = pp_token.base.symbol;
1817                                 parameter.is_parameter = true;
1818                                 obstack_grow(&pp_obstack, &parameter, sizeof(parameter));
1819                                 eat_token(T_IDENTIFIER);
1820
1821                                 if (pp_token.kind == ',') {
1822                                         eat_token(',');
1823                                         break;
1824                                 }
1825
1826                                 if (pp_token.kind != ')') {
1827                                         errorf(&pp_token.base.pos,
1828                                                "expected ',' or ')' after identifier, got %K",
1829                                                &pp_token);
1830                                         goto error_out;
1831                                 }
1832                                 break;
1833                         }
1834
1835                         case ')':
1836                                 eat_token(')');
1837                                 goto finish_argument_list;
1838
1839                         default:
1840                                 errorf(&pp_token.base.pos,
1841                                        "expected identifier, '...' or ')' in #define argument list, got %K",
1842                                        &pp_token);
1843                                 goto error_out;
1844                         }
1845                 }
1846
1847         finish_argument_list:
1848                 new_definition->has_parameters = true;
1849                 size_t size = obstack_object_size(&pp_obstack);
1850                 new_definition->n_parameters
1851                         = size / sizeof(new_definition->parameters[0]);
1852                 new_definition->parameters = obstack_finish(&pp_obstack);
1853                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1854                         pp_definition_t *param    = &new_definition->parameters[i];
1855                         symbol_t        *symbol   = param->symbol;
1856                         pp_definition_t *previous = symbol->pp_definition;
1857                         if (previous != NULL
1858                             && previous->function_definition == new_definition) {
1859                                 errorf(&param->pos, "duplicate macro parameter '%Y'", symbol);
1860                                 param->symbol = sym_anonymous;
1861                                 continue;
1862                         }
1863                         param->parent_expansion    = previous;
1864                         param->function_definition = new_definition;
1865                         symbol->pp_definition      = param;
1866                 }
1867         } else {
1868                 next_input_token();
1869         }
1870
1871         /* construct token list */
1872         assert(obstack_object_size(&pp_obstack) == 0);
1873         bool next_must_be_param = false;
1874         while (!info.at_line_begin) {
1875                 if (pp_token.kind == T_IDENTIFIER) {
1876                         const symbol_t  *symbol     = pp_token.base.symbol;
1877                         pp_definition_t *definition = symbol->pp_definition;
1878                         if (definition != NULL
1879                             && definition->function_definition == new_definition) {
1880                             pp_token.kind                = T_MACRO_PARAMETER;
1881                             pp_token.macro_parameter.def = definition;
1882                         }
1883                 }
1884                 if (next_must_be_param && pp_token.kind != T_MACRO_PARAMETER) {
1885                         missing_macro_param_error();
1886                 }
1887                 saved_token_t saved_token;
1888                 saved_token.token = pp_token;
1889                 saved_token.had_whitespace = info.had_whitespace;
1890                 obstack_grow(&pp_obstack, &saved_token, sizeof(saved_token));
1891                 next_must_be_param
1892                         = new_definition->has_parameters && pp_token.kind == '#';
1893                 next_input_token();
1894         }
1895         if (next_must_be_param)
1896                 missing_macro_param_error();
1897
1898         new_definition->list_len   = obstack_object_size(&pp_obstack)
1899                 / sizeof(new_definition->token_list[0]);
1900         new_definition->token_list = obstack_finish(&pp_obstack);
1901
1902         if (new_definition->has_parameters) {
1903                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1904                         pp_definition_t *param      = &new_definition->parameters[i];
1905                         symbol_t        *symbol     = param->symbol;
1906                         if (symbol == sym_anonymous)
1907                                 continue;
1908                         assert(symbol->pp_definition == param);
1909                         assert(param->function_definition == new_definition);
1910                         symbol->pp_definition   = param->parent_expansion;
1911                         param->parent_expansion = NULL;
1912                 }
1913         }
1914
1915         pp_definition_t *old_definition = symbol->pp_definition;
1916         if (old_definition != NULL) {
1917                 if (!pp_definitions_equal(old_definition, new_definition)) {
1918                         warningf(WARN_OTHER, &input.pos,
1919                                  "multiple definition of macro '%Y' (first defined %P)",
1920                                  symbol, &old_definition->pos);
1921                 } else {
1922                         /* reuse the old definition */
1923                         obstack_free(&pp_obstack, new_definition);
1924                         new_definition = old_definition;
1925                 }
1926         }
1927
1928         symbol->pp_definition = new_definition;
1929         return;
1930
1931 error_out:
1932         if (obstack_object_size(&pp_obstack) > 0) {
1933                 char *ptr = obstack_finish(&pp_obstack);
1934                 obstack_free(&pp_obstack, ptr);
1935         }
1936         eat_pp_directive();
1937 }
1938
1939 static void parse_undef_directive(void)
1940 {
1941         eat_pp(TP_undef);
1942         if (skip_mode) {
1943                 eat_pp_directive();
1944                 return;
1945         }
1946
1947         if (!is_defineable_token("#undef")) {
1948                 eat_pp_directive();
1949                 return;
1950         }
1951
1952         pp_token.base.symbol->pp_definition = NULL;
1953         next_input_token();
1954
1955         if (!info.at_line_begin) {
1956                 warningf(WARN_OTHER, &input.pos, "extra tokens at end of #undef directive");
1957         }
1958         eat_pp_directive();
1959 }
1960
1961 /** behind an #include we can have the special headername lexems.
1962  * They're only allowed behind an #include so they're not recognized
1963  * by the normal next_preprocessing_token. We handle them as a special
1964  * exception here */
1965 static const char *parse_headername(bool *system_include)
1966 {
1967         if (info.at_line_begin) {
1968                 parse_error("expected headername after #include");
1969                 return NULL;
1970         }
1971
1972         /* check wether we have a "... or <... headername */
1973         position_t pos = input.pos;
1974         switch (input.c) {
1975         {
1976                 utf32 delimiter;
1977         case '<': delimiter = '>'; *system_include = true;  goto parse_name;
1978         case '"': delimiter = '"'; *system_include = false; goto parse_name;
1979 parse_name:
1980                 assert(obstack_object_size(&symbol_obstack) == 0);
1981                 next_char();
1982                 while (true) {
1983                         switch (input.c) {
1984                         case NEWLINE:
1985                         case EOF:
1986                                 {
1987                                         char *dummy = obstack_finish(&symbol_obstack);
1988                                         obstack_free(&symbol_obstack, dummy);
1989                                 }
1990                                 errorf(&pp_token.base.pos,
1991                                        "header name without closing '%c'", (char)delimiter);
1992                                 return NULL;
1993
1994                         default:
1995                                 if (input.c == delimiter) {
1996                                         next_char();
1997                                         goto finish_headername;
1998                                 } else {
1999                                         obstack_1grow(&symbol_obstack, (char)input.c);
2000                                         next_char();
2001                                 }
2002                                 break;
2003                         }
2004                 }
2005                 /* we should never be here */
2006         }
2007
2008         default:
2009                 next_preprocessing_token();
2010                 if (info.at_line_begin) {
2011                         /* TODO: if we are already in the new line then we parsed more than
2012                          * wanted. We reuse the token, but could produce following errors
2013                          * misbehaviours... */
2014                         goto error_invalid_input;
2015                 }
2016                 if (pp_token.kind == T_STRING_LITERAL) {
2017                         *system_include = false;
2018                         return pp_token.literal.string.begin;
2019                 } else if (pp_token.kind == '<') {
2020                         *system_include = true;
2021                         assert(obstack_object_size(&pp_obstack) == 0);
2022                         while (true) {
2023                                 next_preprocessing_token();
2024                                 if (info.at_line_begin) {
2025                                         /* TODO: we shouldn't have parsed/expanded something on the
2026                                          * next line yet... */
2027                                         char *dummy = obstack_finish(&pp_obstack);
2028                                         obstack_free(&pp_obstack, dummy);
2029                                         goto error_invalid_input;
2030                                 }
2031                                 if (pp_token.kind == '>')
2032                                         break;
2033
2034                                 saved_token_t saved;
2035                                 saved.token          = pp_token;
2036                                 saved.had_whitespace = info.had_whitespace;
2037                                 obstack_grow(&pp_obstack, &saved, sizeof(saved));
2038                         }
2039                         size_t size = obstack_object_size(&pp_obstack);
2040                         assert(size % sizeof(saved_token_t) == 0);
2041                         size_t n_tokens = size / sizeof(saved_token_t);
2042                         saved_token_t *tokens = obstack_finish(&pp_obstack);
2043                         assert(obstack_object_size(&symbol_obstack) == 0);
2044                         for (size_t i = 0; i < n_tokens; ++i) {
2045                                 const saved_token_t *saved = &tokens[i];
2046                                 if (i > 0 && saved->had_whitespace)
2047                                         obstack_1grow(&symbol_obstack, ' ');
2048                                 grow_token(&symbol_obstack, &saved->token);
2049                         }
2050                         obstack_free(&pp_obstack, tokens);
2051                         goto finish_headername;
2052                 } else {
2053 error_invalid_input:
2054                         {
2055                                 char *dummy = obstack_finish(&symbol_obstack);
2056                                 obstack_free(&symbol_obstack, dummy);
2057                         }
2058
2059                         errorf(&pp_token.base.pos,
2060                                "expected \"FILENAME\" or <FILENAME> after #include");
2061                         return NULL;
2062                 }
2063         }
2064
2065 finish_headername:
2066         obstack_1grow(&symbol_obstack, '\0');
2067         char *const  headername = obstack_finish(&symbol_obstack);
2068         const char  *identified = identify_string(headername);
2069         pp_token.base.pos = pos;
2070         return identified;
2071 }
2072
2073 static bool do_include(bool const bracket_include, bool const include_next, char const *const headername)
2074 {
2075         size_t const        headername_len = strlen(headername);
2076         searchpath_entry_t *entry;
2077         if (include_next) {
2078                 entry = input.path      ? input.path->next
2079                       : bracket_include ? bracket_searchpath.first
2080                       : quote_searchpath.first;
2081         } else {
2082                 if (!bracket_include) {
2083                         /* put dirname of current input on obstack */
2084                         const char *filename   = input.pos.input_name;
2085                         const char *last_slash = strrchr(filename, '/');
2086                         const char *full_name;
2087                         if (last_slash != NULL) {
2088                                 size_t len = last_slash - filename;
2089                                 obstack_grow(&symbol_obstack, filename, len + 1);
2090                                 obstack_grow0(&symbol_obstack, headername, headername_len);
2091                                 char *complete_path = obstack_finish(&symbol_obstack);
2092                                 full_name = identify_string(complete_path);
2093                         } else {
2094                                 full_name = headername;
2095                         }
2096
2097                         FILE *file = fopen(full_name, "r");
2098                         if (file != NULL) {
2099                                 switch_pp_input(file, full_name, NULL, false);
2100                                 return true;
2101                         }
2102                         entry = quote_searchpath.first;
2103                 } else {
2104                         entry = bracket_searchpath.first;
2105                 }
2106         }
2107
2108         assert(obstack_object_size(&symbol_obstack) == 0);
2109         /* check searchpath */
2110         for (; entry; entry = entry->next) {
2111             const char *path = entry->path;
2112             size_t      len  = strlen(path);
2113                 obstack_grow(&symbol_obstack, path, len);
2114                 if (path[len-1] != '/')
2115                         obstack_1grow(&symbol_obstack, '/');
2116                 obstack_grow(&symbol_obstack, headername, headername_len+1);
2117
2118                 char *complete_path = obstack_finish(&symbol_obstack);
2119                 FILE *file          = fopen(complete_path, "r");
2120                 if (file != NULL) {
2121                         const char *filename = identify_string(complete_path);
2122                         switch_pp_input(file, filename, entry, entry->is_system_path);
2123                         return true;
2124                 } else {
2125                         obstack_free(&symbol_obstack, complete_path);
2126                 }
2127         }
2128
2129         return false;
2130 }
2131
2132 static void parse_include_directive(bool const include_next)
2133 {
2134         if (skip_mode) {
2135                 eat_pp_directive();
2136                 return;
2137         }
2138
2139         /* do not eat the TP_include, since it would already parse the next token
2140          * which needs special handling here. */
2141         skip_till_newline(true);
2142         bool system_include;
2143         const char *headername = parse_headername(&system_include);
2144         if (headername == NULL) {
2145                 eat_pp_directive();
2146                 return;
2147         }
2148
2149         bool had_nonwhitespace = skip_till_newline(false);
2150         if (had_nonwhitespace) {
2151                 warningf(WARN_OTHER, &input.pos,
2152                          "extra tokens at end of #include directive");
2153         }
2154
2155         if (n_inputs > INCLUDE_LIMIT) {
2156                 errorf(&pp_token.base.pos, "#include nested too deeply");
2157                 /* eat \n or EOF */
2158                 next_input_token();
2159                 return;
2160         }
2161
2162         /* switch inputs */
2163         info.whitespace_at_line_begin = 0;
2164         info.had_whitespace           = false;
2165         info.at_line_begin            = true;
2166         emit_newlines();
2167         push_input();
2168         bool res = do_include(system_include, include_next, headername);
2169         if (res) {
2170                 next_input_token();
2171         } else {
2172                 errorf(&pp_token.base.pos, "failed including '%s': %s", headername, strerror(errno));
2173                 pop_restore_input();
2174         }
2175 }
2176
2177 static pp_conditional_t *push_conditional(void)
2178 {
2179         pp_conditional_t *conditional
2180                 = obstack_alloc(&pp_obstack, sizeof(*conditional));
2181         memset(conditional, 0, sizeof(*conditional));
2182
2183         conditional->parent = conditional_stack;
2184         conditional_stack   = conditional;
2185
2186         return conditional;
2187 }
2188
2189 static void pop_conditional(void)
2190 {
2191         assert(conditional_stack != NULL);
2192         conditional_stack = conditional_stack->parent;
2193 }
2194
2195 void check_unclosed_conditionals(void)
2196 {
2197         while (conditional_stack != NULL) {
2198                 pp_conditional_t *conditional = conditional_stack;
2199
2200                 if (conditional->in_else) {
2201                         errorf(&conditional->pos, "unterminated #else");
2202                 } else {
2203                         errorf(&conditional->pos, "unterminated condition");
2204                 }
2205                 pop_conditional();
2206         }
2207 }
2208
2209 static void parse_ifdef_ifndef_directive(bool const is_ifdef)
2210 {
2211         bool condition;
2212         eat_pp(is_ifdef ? TP_ifdef : TP_ifndef);
2213
2214         if (skip_mode) {
2215                 eat_pp_directive();
2216                 pp_conditional_t *conditional = push_conditional();
2217                 conditional->pos  = pp_token.base.pos;
2218                 conditional->skip = true;
2219                 return;
2220         }
2221
2222         if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
2223                 errorf(&pp_token.base.pos, "expected identifier after #%s, got %K",
2224                        is_ifdef ? "ifdef" : "ifndef", &pp_token);
2225                 eat_pp_directive();
2226
2227                 /* just take the true case in the hope to avoid further errors */
2228                 condition = true;
2229         } else {
2230                 /* evaluate wether we are in true or false case */
2231                 condition = (bool)pp_token.base.symbol->pp_definition == is_ifdef;
2232                 eat_token(T_IDENTIFIER);
2233
2234                 if (!info.at_line_begin) {
2235                         errorf(&pp_token.base.pos, "extra tokens at end of #%s",
2236                                is_ifdef ? "ifdef" : "ifndef");
2237                         eat_pp_directive();
2238                 }
2239         }
2240
2241         pp_conditional_t *conditional = push_conditional();
2242         conditional->pos       = pp_token.base.pos;
2243         conditional->condition = condition;
2244
2245         if (!condition) {
2246                 skip_mode = true;
2247         }
2248 }
2249
2250 static void parse_else_directive(void)
2251 {
2252         eat_pp(TP_else);
2253
2254         if (!info.at_line_begin) {
2255                 if (!skip_mode) {
2256                         warningf(WARN_OTHER, &pp_token.base.pos, "extra tokens at end of #else");
2257                 }
2258                 eat_pp_directive();
2259         }
2260
2261         pp_conditional_t *conditional = conditional_stack;
2262         if (conditional == NULL) {
2263                 errorf(&pp_token.base.pos, "#else without prior #if");
2264                 return;
2265         }
2266
2267         if (conditional->in_else) {
2268                 errorf(&pp_token.base.pos,
2269                        "#else after #else (condition started %P)",
2270                        &conditional->pos);
2271                 skip_mode = true;
2272                 return;
2273         }
2274
2275         conditional->in_else = true;
2276         if (!conditional->skip) {
2277                 skip_mode = conditional->condition;
2278         }
2279         conditional->pos = pp_token.base.pos;
2280 }
2281
2282 static void parse_endif_directive(void)
2283 {
2284         eat_pp(TP_endif);
2285
2286         if (!info.at_line_begin) {
2287                 if (!skip_mode) {
2288                         warningf(WARN_OTHER, &pp_token.base.pos, "extra tokens at end of #endif");
2289                 }
2290                 eat_pp_directive();
2291         }
2292
2293         pp_conditional_t *conditional = conditional_stack;
2294         if (conditional == NULL) {
2295                 errorf(&pp_token.base.pos, "#endif without prior #if");
2296                 return;
2297         }
2298
2299         if (!conditional->skip) {
2300                 skip_mode = false;
2301         }
2302         pop_conditional();
2303 }
2304
2305 typedef enum stdc_pragma_kind_t {
2306         STDC_UNKNOWN,
2307         STDC_FP_CONTRACT,
2308         STDC_FENV_ACCESS,
2309         STDC_CX_LIMITED_RANGE
2310 } stdc_pragma_kind_t;
2311
2312 typedef enum stdc_pragma_value_kind_t {
2313         STDC_VALUE_UNKNOWN,
2314         STDC_VALUE_ON,
2315         STDC_VALUE_OFF,
2316         STDC_VALUE_DEFAULT
2317 } stdc_pragma_value_kind_t;
2318
2319 static void parse_pragma_directive(void)
2320 {
2321         eat_pp(TP_pragma);
2322         if (skip_mode) {
2323                 eat_pp_directive();
2324                 return;
2325         }
2326
2327         if (pp_token.kind != T_IDENTIFIER) {
2328                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.pos,
2329                          "expected identifier after #pragma");
2330                 eat_pp_directive();
2331                 return;
2332         }
2333
2334         stdc_pragma_kind_t kind = STDC_UNKNOWN;
2335         if (pp_token.base.symbol->pp_ID == TP_STDC && c_mode & _C99) {
2336                 /* a STDC pragma */
2337                 next_input_token();
2338
2339                 switch (pp_token.base.symbol->pp_ID) {
2340                 case TP_FP_CONTRACT:      kind = STDC_FP_CONTRACT;      break;
2341                 case TP_FENV_ACCESS:      kind = STDC_FENV_ACCESS;      break;
2342                 case TP_CX_LIMITED_RANGE: kind = STDC_CX_LIMITED_RANGE; break;
2343                 default:                  break;
2344                 }
2345                 if (kind != STDC_UNKNOWN) {
2346                         next_input_token();
2347                         stdc_pragma_value_kind_t value;
2348                         switch (pp_token.base.symbol->pp_ID) {
2349                         case TP_ON:      value = STDC_VALUE_ON;      break;
2350                         case TP_OFF:     value = STDC_VALUE_OFF;     break;
2351                         case TP_DEFAULT: value = STDC_VALUE_DEFAULT; break;
2352                         default:         value = STDC_VALUE_UNKNOWN; break;
2353                         }
2354                         if (value == STDC_VALUE_UNKNOWN) {
2355                                 kind = STDC_UNKNOWN;
2356                                 errorf(&pp_token.base.pos, "bad STDC pragma argument");
2357                         }
2358                 }
2359         }
2360         eat_pp_directive();
2361         if (kind == STDC_UNKNOWN) {
2362                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.pos,
2363                          "encountered unknown #pragma");
2364         }
2365 }
2366
2367 static void parse_line_directive(void)
2368 {
2369         if (pp_token.kind != T_NUMBER) {
2370                 if (!skip_mode)
2371                         parse_error("expected integer");
2372         } else {
2373                 char      *end;
2374                 long const line = strtol(pp_token.literal.string.begin, &end, 0);
2375                 if (*end == '\0') {
2376                         /* use offset -1 as this is about the next line */
2377                         input.pos.lineno = line - 1;
2378                         /* force output of line */
2379                         input.output_line = input.pos.lineno - 20;
2380                 } else {
2381                         if (!skip_mode) {
2382                                 errorf(&input.pos, "'%S' is not a valid line number",
2383                                            &pp_token.literal.string);
2384                         }
2385                 }
2386                 next_input_token();
2387                 if (info.at_line_begin)
2388                         return;
2389         }
2390         if (pp_token.kind == T_STRING_LITERAL
2391             && pp_token.literal.string.encoding == STRING_ENCODING_CHAR) {
2392                 input.pos.input_name       = pp_token.literal.string.begin;
2393                 input.pos.is_system_header = false;
2394                 next_input_token();
2395
2396                 /* attempt to parse numeric flags as outputted by gcc preprocessor */
2397                 while (!info.at_line_begin && pp_token.kind == T_NUMBER) {
2398                         /* flags:
2399                          * 1 - indicates start of a new file
2400                          * 2 - indicates return from a file
2401                          * 3 - indicates system header
2402                          * 4 - indicates implicit extern "C" in C++ mode
2403                          *
2404                          * currently we're only interested in "3"
2405                          */
2406                         if (streq(pp_token.literal.string.begin, "3")) {
2407                                 input.pos.is_system_header = true;
2408                         }
2409                         next_input_token();
2410                 }
2411         }
2412
2413         eat_pp_directive();
2414 }
2415
2416 static void parse_error_directive(void)
2417 {
2418         if (skip_mode) {
2419                 eat_pp_directive();
2420                 return;
2421         }
2422
2423         bool const old_resolve_escape_sequences = resolve_escape_sequences;
2424         resolve_escape_sequences = false;
2425
2426         position_t const pos = pp_token.base.pos;
2427         do {
2428                 if (info.had_whitespace && obstack_object_size(&pp_obstack) != 0)
2429                         obstack_1grow(&pp_obstack, ' ');
2430
2431                 switch (pp_token.kind) {
2432                 case T_NUMBER: {
2433                         string_t const *const str = &pp_token.literal.string;
2434                         obstack_grow(&pp_obstack, str->begin, str->size);
2435                         break;
2436                 }
2437
2438                 {
2439                         char delim;
2440                 case T_STRING_LITERAL:     delim =  '"'; goto string;
2441                 case T_CHARACTER_CONSTANT: delim = '\''; goto string;
2442 string:;
2443                         string_t const *const str = &pp_token.literal.string;
2444                         char     const *const enc = get_string_encoding_prefix(str->encoding);
2445                         obstack_printf(&pp_obstack, "%s%c%s%c", enc, delim, str->begin, delim);
2446                         break;
2447                 }
2448
2449                 default: {
2450                         char const *const str = pp_token.base.symbol->string;
2451                         obstack_grow(&pp_obstack, str, strlen(str));
2452                         break;
2453                 }
2454                 }
2455
2456                 next_input_token();
2457         } while (!info.at_line_begin);
2458
2459         resolve_escape_sequences = old_resolve_escape_sequences;
2460
2461         obstack_1grow(&pp_obstack, '\0');
2462         char *const str = obstack_finish(&pp_obstack);
2463         errorf(&pos, "#%s", str);
2464         obstack_free(&pp_obstack, str);
2465 }
2466
2467 static void parse_preprocessing_directive(void)
2468 {
2469         eat_token('#');
2470
2471         if (info.at_line_begin) {
2472                 /* empty directive */
2473                 return;
2474         }
2475
2476         if (pp_token.base.symbol) {
2477                 switch (pp_token.base.symbol->pp_ID) {
2478                 case TP_define:       parse_define_directive();            break;
2479                 case TP_else:         parse_else_directive();              break;
2480                 case TP_endif:        parse_endif_directive();             break;
2481                 case TP_error:        parse_error_directive();             break;
2482                 case TP_ifdef:        parse_ifdef_ifndef_directive(true);  break;
2483                 case TP_ifndef:       parse_ifdef_ifndef_directive(false); break;
2484                 case TP_include:      parse_include_directive(false);      break;
2485                 case TP_include_next: parse_include_directive(true);       break;
2486                 case TP_line:         next_input_token(); goto line_directive;
2487                 case TP_pragma:       parse_pragma_directive();            break;
2488                 case TP_undef:        parse_undef_directive();             break;
2489                 default:              goto skip;
2490                 }
2491         } else if (pp_token.kind == T_NUMBER) {
2492 line_directive:
2493                 parse_line_directive();
2494         } else {
2495 skip:
2496                 if (!skip_mode) {
2497                         errorf(&pp_token.base.pos, "invalid preprocessing directive #%K", &pp_token);
2498                 }
2499                 eat_pp_directive();
2500         }
2501
2502         assert(info.at_line_begin);
2503 }
2504
2505 static void finish_current_argument(void)
2506 {
2507         if (current_argument == NULL)
2508                 return;
2509         size_t size = obstack_object_size(&pp_obstack);
2510         current_argument->list_len   = size/sizeof(current_argument->token_list[0]);
2511         current_argument->token_list = obstack_finish(&pp_obstack);
2512 }
2513
2514 void next_preprocessing_token(void)
2515 {
2516 restart:
2517         if (!expand_next()) {
2518                 do {
2519                         next_input_token();
2520                         while (pp_token.kind == '#' && info.at_line_begin) {
2521                                 parse_preprocessing_directive();
2522                         }
2523                 } while (skip_mode && pp_token.kind != T_EOF);
2524         }
2525
2526         const token_kind_t kind = pp_token.kind;
2527         if (current_call == NULL || argument_expanding != NULL) {
2528                 symbol_t *const symbol = pp_token.base.symbol;
2529                 if (symbol) {
2530                         if (kind == T_MACRO_PARAMETER) {
2531                                 assert(current_expansion != NULL);
2532                                 start_expanding(pp_token.macro_parameter.def);
2533                                 goto restart;
2534                         }
2535
2536                         pp_definition_t *const pp_definition = symbol->pp_definition;
2537                         if (pp_definition != NULL && !pp_definition->is_expanding) {
2538                                 if (pp_definition->has_parameters) {
2539
2540                                         /* check if next token is a '(' */
2541                                         whitespace_info_t old_info   = info;
2542                                         token_kind_t      next_token = peek_expansion();
2543                                         if (next_token == T_EOF) {
2544                                                 info.at_line_begin  = false;
2545                                                 info.had_whitespace = false;
2546                                                 skip_whitespace();
2547                                                 if (input.c == '(') {
2548                                                         next_token = '(';
2549                                                 }
2550                                         }
2551
2552                                         if (next_token == '(') {
2553                                                 if (current_expansion == NULL)
2554                                                         expansion_pos = pp_token.base.pos;
2555                                                 next_preprocessing_token();
2556                                                 assert(pp_token.kind == '(');
2557
2558                                                 pp_definition->parent_expansion = current_expansion;
2559                                                 current_call              = pp_definition;
2560                                                 current_call->expand_pos  = 0;
2561                                                 current_call->expand_info = old_info;
2562                                                 if (current_call->n_parameters > 0) {
2563                                                         current_argument = &current_call->parameters[0];
2564                                                         assert(argument_brace_count == 0);
2565                                                 }
2566                                                 goto restart;
2567                                         } else {
2568                                                 /* skip_whitespaces() skipped newlines and whitespace,
2569                                                  * remember results for next token */
2570                                                 next_info = info;
2571                                                 info      = old_info;
2572                                                 return;
2573                                         }
2574                                 } else {
2575                                         if (current_expansion == NULL)
2576                                                 expansion_pos = pp_token.base.pos;
2577                                         start_expanding(pp_definition);
2578                                         goto restart;
2579                                 }
2580                         }
2581                 }
2582         }
2583
2584         if (current_call != NULL) {
2585                 /* current_call != NULL */
2586                 if (kind == '(') {
2587                         ++argument_brace_count;
2588                 } else if (kind == ')') {
2589                         if (argument_brace_count > 0) {
2590                                 --argument_brace_count;
2591                         } else {
2592                                 finish_current_argument();
2593                                 assert(kind == ')');
2594                                 start_expanding(current_call);
2595                                 info = current_call->expand_info;
2596                                 current_call     = NULL;
2597                                 current_argument = NULL;
2598                                 goto restart;
2599                         }
2600                 } else if (kind == ',' && argument_brace_count == 0) {
2601                         finish_current_argument();
2602                         current_call->expand_pos++;
2603                         if (current_call->expand_pos >= current_call->n_parameters) {
2604                                 errorf(&pp_token.base.pos,
2605                                            "too many arguments passed for macro '%Y'",
2606                                            current_call->symbol);
2607                                 current_argument = NULL;
2608                         } else {
2609                                 current_argument
2610                                         = &current_call->parameters[current_call->expand_pos];
2611                         }
2612                         goto restart;
2613                 } else if (kind == T_MACRO_PARAMETER) {
2614                         /* parameters have to be fully expanded before being used as
2615                          * parameters for another macro-call */
2616                         assert(current_expansion != NULL);
2617                         pp_definition_t *argument = pp_token.macro_parameter.def;
2618                         argument_expanding = argument;
2619                         start_expanding(argument);
2620                         goto restart;
2621                 } else if (kind == T_EOF) {
2622                         errorf(&expansion_pos,
2623                                "reached end of file while parsing arguments for '%Y'",
2624                                current_call->symbol);
2625                         return;
2626                 }
2627                 if (current_argument != NULL) {
2628                         saved_token_t saved;
2629                         saved.token = pp_token;
2630                         saved.had_whitespace = info.had_whitespace;
2631                         obstack_grow(&pp_obstack, &saved, sizeof(saved));
2632                 }
2633                 goto restart;
2634         }
2635 }
2636
2637 void append_include_path(searchpath_t *paths, const char *path)
2638 {
2639         searchpath_entry_t *entry = OALLOCZ(&config_obstack, searchpath_entry_t);
2640         entry->path           = path;
2641         entry->is_system_path = paths->is_system_path;
2642
2643         *paths->anchor = entry;
2644         paths->anchor  = &entry->next;
2645 }
2646
2647 static void append_env_paths(searchpath_t *paths, const char *envvar)
2648 {
2649         const char *val = getenv(envvar);
2650         if (val != NULL && *val != '\0') {
2651                 const char *begin = val;
2652                 const char *c;
2653                 do {
2654                         c = begin;
2655                         while (*c != '\0' && *c != ':')
2656                                 ++c;
2657
2658                         size_t len = c-begin;
2659                         if (len == 0) {
2660                                 /* use "." for gcc compatibility (Matze: I would expect that
2661                                  * nothing happens for an empty entry...) */
2662                                 append_include_path(paths, ".");
2663                         } else {
2664                                 char *const string = obstack_copy0(&config_obstack, begin, len);
2665                                 append_include_path(paths, string);
2666                         }
2667
2668                         begin = c+1;
2669                         /* skip : */
2670                         if (*begin == ':')
2671                                 ++begin;
2672                 } while(*c != '\0');
2673         }
2674 }
2675
2676 static void append_searchpath(searchpath_t *path, const searchpath_t *append)
2677 {
2678         *path->anchor = append->first;
2679 }
2680
2681 static void setup_include_path(void)
2682 {
2683         /* built-in paths */
2684         append_include_path(&system_searchpath, "/usr/include");
2685
2686         /* parse environment variable */
2687         append_env_paths(&bracket_searchpath, "CPATH");
2688         append_env_paths(&system_searchpath,
2689                          c_mode & _CXX ? "CPLUS_INCLUDE_PATH" : "C_INCLUDE_PATH");
2690
2691         /* append system search path to bracket searchpath */
2692         append_searchpath(&system_searchpath,  &after_searchpath);
2693         append_searchpath(&bracket_searchpath, &system_searchpath);
2694         append_searchpath(&quote_searchpath, &bracket_searchpath);
2695 }
2696
2697 static void input_error(unsigned const delta_lines, unsigned const delta_cols, char const *const message)
2698 {
2699         position_t pos = pp_token.base.pos;
2700         pos.lineno += delta_lines;
2701         pos.colno  += delta_cols;
2702         errorf(&pos, "%s", message);
2703 }
2704
2705 void init_include_paths(void)
2706 {
2707         obstack_init(&config_obstack);
2708 }
2709
2710 void init_preprocessor(void)
2711 {
2712         init_symbols();
2713
2714         obstack_init(&pp_obstack);
2715         obstack_init(&input_obstack);
2716         strset_init(&stringset);
2717
2718         setup_include_path();
2719
2720         set_input_error_callback(input_error);
2721 }
2722
2723 void exit_preprocessor(void)
2724 {
2725         obstack_free(&input_obstack, NULL);
2726         obstack_free(&pp_obstack, NULL);
2727         obstack_free(&config_obstack, NULL);
2728
2729         strset_destroy(&stringset);
2730 }
2731
2732 int pptest_main(int argc, char **argv);
2733 int pptest_main(int argc, char **argv)
2734 {
2735         init_symbol_table();
2736         init_include_paths();
2737         init_preprocessor();
2738         init_tokens();
2739
2740         error_on_unknown_chars   = false;
2741         resolve_escape_sequences = false;
2742
2743         /* simplistic commandline parser */
2744         const char *filename = NULL;
2745         const char *output = NULL;
2746         for (int i = 1; i < argc; ++i) {
2747                 const char *opt = argv[i];
2748                 if (streq(opt, "-I")) {
2749                         append_include_path(&bracket_searchpath, argv[++i]);
2750                         continue;
2751                 } else if (streq(opt, "-E")) {
2752                         /* ignore */
2753                 } else if (streq(opt, "-o")) {
2754                         output = argv[++i];
2755                         continue;
2756                 } else if (opt[0] == '-') {
2757                         fprintf(stderr, "Unknown option '%s'\n", opt);
2758                 } else {
2759                         if (filename != NULL)
2760                                 fprintf(stderr, "Multiple inputs not supported\n");
2761                         filename = argv[i];
2762                 }
2763         }
2764         if (filename == NULL) {
2765                 fprintf(stderr, "No input specified\n");
2766                 return 1;
2767         }
2768
2769         if (output == NULL) {
2770                 out = stdout;
2771         } else {
2772                 out = fopen(output, "w");
2773                 if (out == NULL) {
2774                         fprintf(stderr, "Couldn't open output '%s'\n", output);
2775                         return 1;
2776                 }
2777         }
2778
2779         /* just here for gcc compatibility */
2780         fprintf(out, "# 1 \"%s\"\n", filename);
2781         fprintf(out, "# 1 \"<built-in>\"\n");
2782         fprintf(out, "# 1 \"<command-line>\"\n");
2783
2784         FILE *file = fopen(filename, "r");
2785         if (file == NULL) {
2786                 fprintf(stderr, "Couldn't open input '%s'\n", filename);
2787                 return 1;
2788         }
2789         switch_pp_input(file, filename, NULL, false);
2790
2791         for (;;) {
2792                 next_preprocessing_token();
2793                 if (pp_token.kind == T_EOF)
2794                         break;
2795                 emit_pp_token();
2796         }
2797
2798         fputc('\n', out);
2799         check_unclosed_conditionals();
2800         fclose(close_pp_input());
2801         if (out != stdout)
2802                 fclose(out);
2803
2804         exit_tokens();
2805         exit_preprocessor();
2806         exit_symbol_table();
2807
2808         return 0;
2809 }