nsz Git - cparser/blob - preprocessor.c

   1 /*
   2  * This file is part of cparser.
   3  * Copyright (C) 2012 Matthias Braun <matze@braunis.de>
   4  */
   5 #include <config.h>
   6
   7 #include <assert.h>
   8 #include <errno.h>
   9 #include <string.h>
  10 #include <stdbool.h>
  11 #include <ctype.h>
  12
  13 #include "preprocessor.h"
  14 #include "token_t.h"
  15 #include "symbol_t.h"
  16 #include "adt/util.h"
  17 #include "adt/error.h"
  18 #include "adt/strutil.h"
  19 #include "adt/strset.h"
  20 #include "lang_features.h"
  21 #include "diagnostic.h"
  22 #include "string_rep.h"
  23 #include "input.h"
  24
  25 #define MAX_PUTBACK 3
  26 #define INCLUDE_LIMIT 199  /* 199 is for gcc "compatibility" */
  27
  28 typedef struct saved_token_t {
  29         token_t token;
  30         bool    had_whitespace;
  31 } saved_token_t;
  32
  33 typedef struct whitespace_info_t {
  34         /** current token had whitespace in front of it */
  35         bool     had_whitespace;
  36         /** current token is at the beginning of a line.
  37          * => a "#" at line begin starts a preprocessing directive. */
  38         bool     at_line_begin;
  39         /** number of spaces before the first token in a line */
  40         unsigned whitespace_at_line_begin;
  41 } whitespace_info_t;
  42
  43 struct pp_definition_t {
  44         symbol_t          *symbol;
  45         position_t         pos;
  46         pp_definition_t   *parent_expansion;
  47         size_t             expand_pos;
  48         whitespace_info_t  expand_info;
  49         bool               is_variadic    : 1;
  50         bool               is_expanding   : 1;
  51         bool               has_parameters : 1;
  52         bool               is_parameter   : 1;
  53         pp_definition_t   *function_definition;
  54         size_t             n_parameters;
  55         pp_definition_t   *parameters;
  56
  57         /* replacement */
  58         size_t             list_len;
  59         saved_token_t     *token_list;
  60 };
  61
  62 typedef struct pp_conditional_t pp_conditional_t;
  63 struct pp_conditional_t {
  64         position_t         pos;
  65         bool               condition;
  66         bool               in_else;
  67         /** conditional in skip mode (then+else gets skipped) */
  68         bool               skip;
  69         pp_conditional_t  *parent;
  70 };
  71
  72 typedef struct pp_input_t pp_input_t;
  73 struct pp_input_t {
  74         FILE               *file;
  75         input_t            *input;
  76         utf32               c;
  77         utf32               buf[1024+MAX_PUTBACK];
  78         const utf32        *bufend;
  79         const utf32        *bufpos;
  80         position_t          pos;
  81         pp_input_t         *parent;
  82         unsigned            output_line;
  83         searchpath_entry_t *path;
  84 };
  85
  86 struct searchpath_entry_t {
  87         const char         *path;
  88         searchpath_entry_t *next;
  89         bool                is_system_path;
  90 };
  91
  92 static pp_input_t      input;
  93
  94 static pp_input_t     *input_stack;
  95 static unsigned        n_inputs;
  96 static struct obstack  input_obstack;
  97
  98 static pp_conditional_t *conditional_stack;
  99
 100 token_t                  pp_token;
 101 bool                     allow_dollar_in_symbol   = true;
 102 static bool              resolve_escape_sequences = true;
 103 static bool              error_on_unknown_chars   = true;
 104 static bool              skip_mode;
 105 static FILE             *out;
 106 static struct obstack    pp_obstack;
 107 static struct obstack    config_obstack;
 108 static const char       *printed_input_name = NULL;
 109 static position_t        expansion_pos;
 110 static pp_definition_t  *current_expansion  = NULL;
 111 static pp_definition_t  *current_call       = NULL;
 112 static pp_definition_t  *current_argument   = NULL;
 113 static pp_definition_t  *argument_expanding = NULL;
 114 static unsigned          argument_brace_count;
 115 static strset_t          stringset;
 116 static token_kind_t      last_token;
 117
 118 struct searchpath_t {
 119         searchpath_entry_t  *first;
 120         searchpath_entry_t **anchor;
 121         bool                 is_system_path;
 122 };
 123
 124 searchpath_t bracket_searchpath = { NULL, &bracket_searchpath.first, false };
 125 searchpath_t quote_searchpath   = { NULL, &quote_searchpath.first,   false };
 126 searchpath_t system_searchpath  = { NULL, &system_searchpath.first,  true  };
 127 searchpath_t after_searchpath   = { NULL, &after_searchpath.first,   true  };
 128
 129 static whitespace_info_t next_info; /* valid if had_whitespace is true */
 130 static whitespace_info_t info;
 131
 132 static inline void next_char(void);
 133 static void next_input_token(void);
 134 static void print_line_directive(const position_t *pos, const char *add);
 135
 136 static symbol_t *symbol_colongreater;
 137 static symbol_t *symbol_lesscolon;
 138 static symbol_t *symbol_lesspercent;
 139 static symbol_t *symbol_percentcolon;
 140 static symbol_t *symbol_percentcolonpercentcolon;
 141 static symbol_t *symbol_percentgreater;
 142
 143 static symbol_t *symbol_L;
 144 static symbol_t *symbol_U;
 145 static symbol_t *symbol_u;
 146 static symbol_t *symbol_u8;
 147
 148 static void init_symbols(void)
 149 {
 150         symbol_colongreater             = symbol_table_insert(":>");
 151         symbol_lesscolon                = symbol_table_insert("<:");
 152         symbol_lesspercent              = symbol_table_insert("<%");
 153         symbol_percentcolon             = symbol_table_insert("%:");
 154         symbol_percentcolonpercentcolon = symbol_table_insert("%:%:");
 155         symbol_percentgreater           = symbol_table_insert("%>");
 156
 157         symbol_L  = symbol_table_insert("L");
 158         symbol_U  = symbol_table_insert("U");
 159         symbol_u  = symbol_table_insert("u");
 160         symbol_u8 = symbol_table_insert("u8");
 161 }
 162
 163 void switch_pp_input(FILE *const file, char const *const filename, searchpath_entry_t *const path, bool const is_system_header)
 164 {
 165         input.file                 = file;
 166         input.input                = input_from_stream(file, NULL);
 167         input.bufend               = NULL;
 168         input.bufpos               = NULL;
 169         input.output_line          = 0;
 170         input.pos.input_name       = filename;
 171         input.pos.lineno           = 1;
 172         input.pos.is_system_header = is_system_header;
 173         input.path                 = path;
 174
 175         /* indicate that we're at a new input */
 176         print_line_directive(&input.pos, input_stack != NULL ? "1" : NULL);
 177
 178         /* place a virtual '\n' so we realize we're at line begin */
 179         input.pos.lineno = 0;
 180         input.c          = '\n';
 181 }
 182
 183 FILE *close_pp_input(void)
 184 {
 185         input_free(input.input);
 186
 187         FILE* const file = input.file;
 188         assert(file);
 189
 190         input.input  = NULL;
 191         input.file   = NULL;
 192         input.bufend = NULL;
 193         input.bufpos = NULL;
 194         input.c      = EOF;
 195
 196         return file;
 197 }
 198
 199 static void push_input(void)
 200 {
 201         pp_input_t *const saved_input = obstack_copy(&input_obstack, &input, sizeof(input));
 202
 203         /* adjust buffer positions */
 204         if (input.bufpos != NULL)
 205                 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
 206         if (input.bufend != NULL)
 207                 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
 208
 209         saved_input->parent = input_stack;
 210         input_stack         = saved_input;
 211         ++n_inputs;
 212 }
 213
 214 static void pop_restore_input(void)
 215 {
 216         assert(n_inputs > 0);
 217         assert(input_stack != NULL);
 218
 219         pp_input_t *saved_input = input_stack;
 220
 221         memcpy(&input, saved_input, sizeof(input));
 222         input.parent = NULL;
 223
 224         /* adjust buffer positions */
 225         if (saved_input->bufpos != NULL)
 226                 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
 227         if (saved_input->bufend != NULL)
 228                 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
 229
 230         input_stack = saved_input->parent;
 231         obstack_free(&input_obstack, saved_input);
 232         --n_inputs;
 233 }
 234
 235 /**
 236  * Prints a parse error message at the current token.
 237  *
 238  * @param msg   the error message
 239  */
 240 static void parse_error(const char *msg)
 241 {
 242         errorf(&pp_token.base.pos,  "%s", msg);
 243 }
 244
 245 static inline void next_real_char(void)
 246 {
 247         assert(input.bufpos <= input.bufend);
 248         if (input.bufpos >= input.bufend) {
 249                 size_t const n = decode(input.input, input.buf + MAX_PUTBACK, lengthof(input.buf) - MAX_PUTBACK);
 250                 if (n == 0) {
 251                         input.c = EOF;
 252                         return;
 253                 }
 254                 input.bufpos = input.buf + MAX_PUTBACK;
 255                 input.bufend = input.bufpos + n;
 256         }
 257         input.c = *input.bufpos++;
 258         ++input.pos.colno;
 259 }
 260
 261 /**
 262  * Put a character back into the buffer.
 263  *
 264  * @param pc  the character to put back
 265  */
 266 static inline void put_back(utf32 const pc)
 267 {
 268         assert(input.bufpos > input.buf);
 269         *(--input.bufpos - input.buf + input.buf) = (char) pc;
 270         --input.pos.colno;
 271 }
 272
 273 #define NEWLINE \
 274         '\r': \
 275                 next_char(); \
 276                 if (input.c == '\n') { \
 277         case '\n': \
 278                         next_char(); \
 279                 } \
 280                 ++input.pos.lineno; \
 281                 input.pos.colno = 1; \
 282                 goto newline; \
 283                 newline // Let it look like an ordinary case label.
 284
 285 #define eat(c_type) (assert(input.c == c_type), next_char())
 286
 287 static void maybe_concat_lines(void)
 288 {
 289         eat('\\');
 290
 291         switch (input.c) {
 292         case NEWLINE:
 293                 info.whitespace_at_line_begin = 0;
 294                 return;
 295
 296         default:
 297                 break;
 298         }
 299
 300         put_back(input.c);
 301         input.c = '\\';
 302 }
 303
 304 /**
 305  * Set c to the next input character, ie.
 306  * after expanding trigraphs.
 307  */
 308 static inline void next_char(void)
 309 {
 310         next_real_char();
 311
 312         /* filter trigraphs and concatenated lines */
 313         if (UNLIKELY(input.c == '\\')) {
 314                 maybe_concat_lines();
 315                 goto end_of_next_char;
 316         }
 317
 318         if (LIKELY(input.c != '?'))
 319                 goto end_of_next_char;
 320
 321         next_real_char();
 322         if (LIKELY(input.c != '?')) {
 323                 put_back(input.c);
 324                 input.c = '?';
 325                 goto end_of_next_char;
 326         }
 327
 328         next_real_char();
 329         switch (input.c) {
 330         case '=': input.c = '#'; break;
 331         case '(': input.c = '['; break;
 332         case '/': input.c = '\\'; maybe_concat_lines(); break;
 333         case ')': input.c = ']'; break;
 334         case '\'': input.c = '^'; break;
 335         case '<': input.c = '{'; break;
 336         case '!': input.c = '|'; break;
 337         case '>': input.c = '}'; break;
 338         case '-': input.c = '~'; break;
 339         default:
 340                 put_back(input.c);
 341                 put_back('?');
 342                 input.c = '?';
 343                 break;
 344         }
 345
 346 end_of_next_char:;
 347 #ifdef DEBUG_CHARS
 348         printf("nchar '%c'\n", input.c);
 349 #endif
 350 }
 351
 352
 353
 354 /**
 355  * Returns true if the given char is a octal digit.
 356  *
 357  * @param char  the character to check
 358  */
 359 static inline bool is_octal_digit(int chr)
 360 {
 361         switch (chr) {
 362         case '0':
 363         case '1':
 364         case '2':
 365         case '3':
 366         case '4':
 367         case '5':
 368         case '6':
 369         case '7':
 370                 return true;
 371         default:
 372                 return false;
 373         }
 374 }
 375
 376 /**
 377  * Returns the value of a digit.
 378  * The only portable way to do it ...
 379  */
 380 static int digit_value(int digit)
 381 {
 382         switch (digit) {
 383         case '0': return 0;
 384         case '1': return 1;
 385         case '2': return 2;
 386         case '3': return 3;
 387         case '4': return 4;
 388         case '5': return 5;
 389         case '6': return 6;
 390         case '7': return 7;
 391         case '8': return 8;
 392         case '9': return 9;
 393         case 'a':
 394         case 'A': return 10;
 395         case 'b':
 396         case 'B': return 11;
 397         case 'c':
 398         case 'C': return 12;
 399         case 'd':
 400         case 'D': return 13;
 401         case 'e':
 402         case 'E': return 14;
 403         case 'f':
 404         case 'F': return 15;
 405         default:
 406                 panic("wrong character given");
 407         }
 408 }
 409
 410 /**
 411  * Parses an octal character sequence.
 412  *
 413  * @param first_digit  the already read first digit
 414  */
 415 static utf32 parse_octal_sequence(const utf32 first_digit)
 416 {
 417         assert(is_octal_digit(first_digit));
 418         utf32 value = digit_value(first_digit);
 419         if (!is_octal_digit(input.c)) return value;
 420         value = 8 * value + digit_value(input.c);
 421         next_char();
 422         if (!is_octal_digit(input.c)) return value;
 423         value = 8 * value + digit_value(input.c);
 424         next_char();
 425         return value;
 426
 427 }
 428
 429 /**
 430  * Parses a hex character sequence.
 431  */
 432 static utf32 parse_hex_sequence(void)
 433 {
 434         utf32 value = 0;
 435         while (isxdigit(input.c)) {
 436                 value = 16 * value + digit_value(input.c);
 437                 next_char();
 438         }
 439         return value;
 440 }
 441
 442 static bool is_universal_char_valid(utf32 const v)
 443 {
 444         /* C11 §6.4.3:2 */
 445         if (v < 0xA0U && v != 0x24 && v != 0x40 && v != 0x60)
 446                 return false;
 447         if (0xD800 <= v && v <= 0xDFFF)
 448                 return false;
 449         return true;
 450 }
 451
 452 static utf32 parse_universal_char(unsigned const n_digits)
 453 {
 454         utf32 v = 0;
 455         for (unsigned k = n_digits; k != 0; --k) {
 456                 if (isxdigit(input.c)) {
 457                         v = 16 * v + digit_value(input.c);
 458                         if (!resolve_escape_sequences)
 459                                 obstack_1grow(&symbol_obstack, input.c);
 460                         next_char();
 461                 } else {
 462                         errorf(&input.pos,
 463                                "short universal character name, expected %u more digits",
 464                                    k);
 465                         break;
 466                 }
 467         }
 468         if (!is_universal_char_valid(v)) {
 469                 errorf(&input.pos,
 470                        "\\%c%0*X is not a valid universal character name",
 471                        n_digits == 4 ? 'u' : 'U', (int)n_digits, v);
 472         }
 473         return v;
 474 }
 475
 476 static bool is_universal_char_valid_identifier_c99(utf32 const v)
 477 {
 478         static const utf32 single_chars[] = {
 479                 0x00AA, 0x00BA, 0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0,
 480                 0x1F59, 0x1F5B, 0x1F5D, 0x05BF, 0x09B2, 0x0A02, 0x0A5E, 0x0A74,
 481                 0x0A8D, 0x0AD0, 0x0AE0, 0x0B9C, 0x0CDE, 0x0E84, 0x0E8A, 0x0E8D,
 482                 0x0EA5, 0x0EA7, 0x0EC6, 0x0F00, 0x0F35, 0x0F37, 0x0F39, 0x0F97,
 483                 0x0FB9, 0x00B5, 0x00B7, 0x02BB, 0x037A, 0x0559, 0x093D, 0x0B3D,
 484                 0x1FBE, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128
 485         };
 486
 487         static const utf32 ranges[][2] = {
 488                 {0x00C0, 0x00D6}, {0x00D8, 0x00F6}, {0x00F8, 0x01F5}, {0x01FA, 0x0217},
 489                 {0x0250, 0x02A8}, {0x1E00, 0x1E9B}, {0x1EA0, 0x1EF9}, {0x0388, 0x038A},
 490                 {0x038E, 0x03A1}, {0x03A3, 0x03CE}, {0x03D0, 0x03D6}, {0x03E2, 0x03F3},
 491                 {0x1F00, 0x1F15}, {0x1F18, 0x1F1D}, {0x1F20, 0x1F45}, {0x1F48, 0x1F4D},
 492                 {0x1F50, 0x1F57}, {0x1F5F, 0x1F7D}, {0x1F80, 0x1FB4}, {0x1FB6, 0x1FBC},
 493                 {0x1FC2, 0x1FC4}, {0x1FC6, 0x1FCC}, {0x1FD0, 0x1FD3}, {0x1FD6, 0x1FDB},
 494                 {0x1FE0, 0x1FEC}, {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFC}, {0x0401, 0x040C},
 495                 {0x040E, 0x044F}, {0x0451, 0x045C}, {0x045E, 0x0481}, {0x0490, 0x04C4},
 496                 {0x04C7, 0x04C8}, {0x04CB, 0x04CC}, {0x04D0, 0x04EB}, {0x04EE, 0x04F5},
 497                 {0x04F8, 0x04F9}, {0x0531, 0x0556}, {0x0561, 0x0587}, {0x05B0, 0x05B9},
 498                 {0x05BB, 0x05BD}, {0x05C1, 0x05C2}, {0x05D0, 0x05EA}, {0x05F0, 0x05F2},
 499                 {0x0621, 0x063A}, {0x0640, 0x0652}, {0x0670, 0x06B7}, {0x06BA, 0x06BE},
 500                 {0x06C0, 0x06CE}, {0x06D0, 0x06DC}, {0x06E5, 0x06E8}, {0x06EA, 0x06ED},
 501                 {0x0901, 0x0903}, {0x0905, 0x0939}, {0x093E, 0x094D}, {0x0950, 0x0952},
 502                 {0x0958, 0x0963}, {0x0981, 0x0983}, {0x0985, 0x098C}, {0x098F, 0x0990},
 503                 {0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B6, 0x09B9}, {0x09BE, 0x09C4},
 504                 {0x09C7, 0x09C8}, {0x09CB, 0x09CD}, {0x09DC, 0x09DD}, {0x09DF, 0x09E3},
 505                 {0x09F0, 0x09F1}, {0x0A05, 0x0A0A}, {0x0A0F, 0x0A10}, {0x0A13, 0x0A28},
 506                 {0x0A2A, 0x0A30}, {0x0A32, 0x0A33}, {0x0A35, 0x0A36}, {0x0A38, 0x0A39},
 507                 {0x0A3E, 0x0A42}, {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A59, 0x0A5C},
 508                 {0x0A81, 0x0A83}, {0x0A85, 0x0A8B}, {0x0A8F, 0x0A91}, {0x0A93, 0x0AA8},
 509                 {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3}, {0x0AB5, 0x0AB9}, {0x0ABD, 0x0AC5},
 510                 {0x0AC7, 0x0AC9}, {0x0ACB, 0x0ACD}, {0x0B01, 0x0B03}, {0x0B05, 0x0B0C},
 511                 {0x0B0F, 0x0B10}, {0x0B13, 0x0B28}, {0x0B2A, 0x0B30}, {0x0B32, 0x0B33},
 512                 {0x0B36, 0x0B39}, {0x0B3E, 0x0B43}, {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D},
 513                 {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B61}, {0x0B82, 0x0B83}, {0x0B85, 0x0B8A},
 514                 {0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, {0x0B99, 0x0B9A}, {0x0B9E, 0x0B9F},
 515                 {0x0BA3, 0x0BA4}, {0x0BA8, 0x0BAA}, {0x0BAE, 0x0BB5}, {0x0BB7, 0x0BB9},
 516                 {0x0BBE, 0x0BC2}, {0x0BC6, 0x0BC8}, {0x0BCA, 0x0BCD}, {0x0C01, 0x0C03},
 517                 {0x0C05, 0x0C0C}, {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C33},
 518                 {0x0C35, 0x0C39}, {0x0C3E, 0x0C44}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D},
 519                 {0x0C60, 0x0C61}, {0x0C82, 0x0C83}, {0x0C85, 0x0C8C}, {0x0C8E, 0x0C90},
 520                 {0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9}, {0x0CBE, 0x0CC4},
 521                 {0x0CC6, 0x0CC8}, {0x0CCA, 0x0CCD}, {0x0CE0, 0x0CE1}, {0x0D02, 0x0D03},
 522                 {0x0D05, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D28}, {0x0D2A, 0x0D39},
 523                 {0x0D3E, 0x0D43}, {0x0D46, 0x0D48}, {0x0D4A, 0x0D4D}, {0x0D60, 0x0D61},
 524                 {0x0E01, 0x0E3A}, {0x0E40, 0x0E5B}, {0x0E81, 0x0E82}, {0x0E87, 0x0E88},
 525                 {0x0E94, 0x0E97}, {0x0E99, 0x0E9F}, {0x0EA1, 0x0EA3}, {0x0EAA, 0x0EAB},
 526                 {0x0EAD, 0x0EAE}, {0x0EB0, 0x0EB9}, {0x0EBB, 0x0EBD}, {0x0EC0, 0x0EC4},
 527                 {0x0EC8, 0x0ECD}, {0x0EDC, 0x0EDD}, {0x0F18, 0x0F19}, {0x0F3E, 0x0F47},
 528                 {0x0F49, 0x0F69}, {0x0F71, 0x0F84}, {0x0F86, 0x0F8B}, {0x0F90, 0x0F95},
 529                 {0x0F99, 0x0FAD}, {0x0FB1, 0x0FB7}, {0x10A0, 0x10C5}, {0x10D0, 0x10F6},
 530                 {0x3041, 0x3093}, {0x309B, 0x309C}, {0x30A1, 0x30F6}, {0x30FB, 0x30FC},
 531                 {0x3105, 0x312C}, {0x4E00, 0x9FA5}, {0xAC00, 0xD7A3}, {0x0660, 0x0669},
 532                 {0x06F0, 0x06F9}, {0x0966, 0x096F}, {0x09E6, 0x09EF}, {0x0A66, 0x0A6F},
 533                 {0x0AE6, 0x0AEF}, {0x0B66, 0x0B6F}, {0x0BE7, 0x0BEF}, {0x0C66, 0x0C6F},
 534                 {0x0CE6, 0x0CEF}, {0x0D66, 0x0D6F}, {0x0E50, 0x0E59}, {0x0ED0, 0x0ED9},
 535                 {0x0F20, 0x0F33}, {0x02B0, 0x02B8}, {0x02BD, 0x02C1}, {0x02D0, 0x02D1},
 536                 {0x02E0, 0x02E4}, {0x203F, 0x2040}, {0x210A, 0x2113}, {0x2118, 0x211D},
 537                 {0x212A, 0x2131}, {0x2133, 0x2138}, {0x2160, 0x2182}, {0x3005, 0x3007},
 538                 {0x3021, 0x3029},
 539         };
 540         for (size_t i = 0; i < sizeof(ranges)/sizeof(ranges[0]); ++i) {
 541                 if (ranges[i][0] <= v && v <= ranges[i][1])
 542                         return true;
 543         }
 544         for (size_t i = 0; i < sizeof(single_chars)/sizeof(single_chars[0]); ++i) {
 545                 if (v == single_chars[i])
 546                         return true;
 547         }
 548         return false;
 549 }
 550
 551 static bool is_universal_char_valid_identifier_c11(utf32 const v)
 552 {
 553         /* C11 Annex D.1 */
 554         if (                v == 0x000A8) return true;
 555         if (                v == 0x000AA) return true;
 556         if (                v == 0x000AD) return true;
 557         if (                v == 0x000AF) return true;
 558         if (0x000B2 <= v && v <= 0x000B5) return true;
 559         if (0x000B7 <= v && v <= 0x000BA) return true;
 560         if (0x000BC <= v && v <= 0x000BE) return true;
 561         if (0x000C0 <= v && v <= 0x000D6) return true;
 562         if (0x000D8 <= v && v <= 0x000F6) return true;
 563         if (0x000F8 <= v && v <= 0x000FF) return true;
 564         if (0x00100 <= v && v <= 0x0167F) return true;
 565         if (0x01681 <= v && v <= 0x0180D) return true;
 566         if (0x0180F <= v && v <= 0x01FFF) return true;
 567         if (0x0200B <= v && v <= 0x0200D) return true;
 568         if (0x0202A <= v && v <= 0x0202E) return true;
 569         if (0x0203F <= v && v <= 0x02040) return true;
 570         if (                v == 0x02054) return true;
 571         if (0x02060 <= v && v <= 0x0206F) return true;
 572         if (0x02070 <= v && v <= 0x0218F) return true;
 573         if (0x02460 <= v && v <= 0x024FF) return true;
 574         if (0x02776 <= v && v <= 0x02793) return true;
 575         if (0x02C00 <= v && v <= 0x02DFF) return true;
 576         if (0x02E80 <= v && v <= 0x02FFF) return true;
 577         if (0x03004 <= v && v <= 0x03007) return true;
 578         if (0x03021 <= v && v <= 0x0302F) return true;
 579         if (0x03031 <= v && v <= 0x0303F) return true;
 580         if (0x03040 <= v && v <= 0x0D7FF) return true;
 581         if (0x0F900 <= v && v <= 0x0FD3D) return true;
 582         if (0x0FD40 <= v && v <= 0x0FDCF) return true;
 583         if (0x0FDF0 <= v && v <= 0x0FE44) return true;
 584         if (0x0FE47 <= v && v <= 0x0FFFD) return true;
 585         if (0x10000 <= v && v <= 0x1FFFD) return true;
 586         if (0x20000 <= v && v <= 0x2FFFD) return true;
 587         if (0x30000 <= v && v <= 0x3FFFD) return true;
 588         if (0x40000 <= v && v <= 0x4FFFD) return true;
 589         if (0x50000 <= v && v <= 0x5FFFD) return true;
 590         if (0x60000 <= v && v <= 0x6FFFD) return true;
 591         if (0x70000 <= v && v <= 0x7FFFD) return true;
 592         if (0x80000 <= v && v <= 0x8FFFD) return true;
 593         if (0x90000 <= v && v <= 0x9FFFD) return true;
 594         if (0xA0000 <= v && v <= 0xAFFFD) return true;
 595         if (0xB0000 <= v && v <= 0xBFFFD) return true;
 596         if (0xC0000 <= v && v <= 0xCFFFD) return true;
 597         if (0xD0000 <= v && v <= 0xDFFFD) return true;
 598         if (0xE0000 <= v && v <= 0xEFFFD) return true;
 599         return false;
 600 }
 601
 602 static bool is_universal_char_valid_identifier(utf32 const v)
 603 {
 604         if (c_mode & _C11)
 605                 return is_universal_char_valid_identifier_c11(v);
 606         return is_universal_char_valid_identifier_c99(v);
 607 }
 608
 609 static bool is_universal_char_invalid_identifier_start(utf32 const v)
 610 {
 611         if (! (c_mode & _C11))
 612                 return false;
 613
 614         /* C11 Annex D.2 */
 615         if (0x0300 <= v && v <= 0x036F) return true;
 616         if (0x1DC0 <= v && v <= 0x1DFF) return true;
 617         if (0x20D0 <= v && v <= 0x20FF) return true;
 618         if (0xFE20 <= v && v <= 0xFE2F) return true;
 619         return false;
 620 }
 621
 622 /**
 623  * Parse an escape sequence.
 624  */
 625 static utf32 parse_escape_sequence(void)
 626 {
 627         eat('\\');
 628
 629         utf32 const ec = input.c;
 630         next_char();
 631
 632         switch (ec) {
 633         case '"':  return '"';
 634         case '\'': return '\'';
 635         case '\\': return '\\';
 636         case '?': return '\?';
 637         case 'a': return '\a';
 638         case 'b': return '\b';
 639         case 'f': return '\f';
 640         case 'n': return '\n';
 641         case 'r': return '\r';
 642         case 't': return '\t';
 643         case 'v': return '\v';
 644         case 'x':
 645                 return parse_hex_sequence();
 646         case '0':
 647         case '1':
 648         case '2':
 649         case '3':
 650         case '4':
 651         case '5':
 652         case '6':
 653         case '7':
 654                 return parse_octal_sequence(ec);
 655         case EOF:
 656                 parse_error("reached end of file while parsing escape sequence");
 657                 return EOF;
 658         /* \E is not documented, but handled, by GCC.  It is acceptable according
 659          * to §6.11.4, whereas \e is not. */
 660         case 'E':
 661         case 'e':
 662                 if (c_mode & _GNUC)
 663                         return 27;   /* hopefully 27 is ALWAYS the code for ESCAPE */
 664                 break;
 665
 666         case 'U': return parse_universal_char(8);
 667         case 'u': return parse_universal_char(4);
 668
 669         default:
 670                 break;
 671         }
 672         /* §6.4.4.4:8 footnote 64 */
 673         parse_error("unknown escape sequence");
 674         return EOF;
 675 }
 676
 677 static const char *identify_string(char *string)
 678 {
 679         const char *result = strset_insert(&stringset, string);
 680         if (result != string) {
 681                 obstack_free(&symbol_obstack, string);
 682         }
 683         return result;
 684 }
 685
 686 static string_t sym_make_string(string_encoding_t const enc)
 687 {
 688         obstack_1grow(&symbol_obstack, '\0');
 689         size_t      const len    = obstack_object_size(&symbol_obstack) - 1;
 690         char       *const string = obstack_finish(&symbol_obstack);
 691         char const *const result = identify_string(string);
 692         return (string_t){ result, len, enc };
 693 }
 694
 695 string_t make_string(char const *const string)
 696 {
 697         obstack_grow(&symbol_obstack, string, strlen(string));
 698         return sym_make_string(STRING_ENCODING_CHAR);
 699 }
 700
 701 static utf32 get_string_encoding_limit(string_encoding_t const enc)
 702 {
 703         switch (enc) {
 704         case STRING_ENCODING_CHAR:   return 0xFF;
 705         case STRING_ENCODING_CHAR16: return 0xFFFF;
 706         case STRING_ENCODING_CHAR32: return 0xFFFFFFFF;
 707         case STRING_ENCODING_UTF8:   return 0xFFFFFFFF;
 708         case STRING_ENCODING_WIDE:   return 0xFFFFFFFF; // FIXME depends on settings
 709         }
 710         panic("invalid string encoding");
 711 }
 712
 713 static void parse_string(utf32 const delimiter, token_kind_t const kind,
 714                          string_encoding_t const enc,
 715                          char const *const context)
 716 {
 717         eat(delimiter);
 718
 719         utf32 const limit = get_string_encoding_limit(enc);
 720         while (true) {
 721                 switch (input.c) {
 722                 case '\\': {
 723                         if (resolve_escape_sequences) {
 724                                 utf32 const tc = parse_escape_sequence();
 725                                 if (tc > limit) {
 726                                         warningf(WARN_OTHER, &pp_token.base.pos,
 727                                                  "escape sequence out of range");
 728                                 }
 729                                 if (enc == STRING_ENCODING_CHAR) {
 730                                         obstack_1grow(&symbol_obstack, tc);
 731                                 } else {
 732                                         obstack_grow_utf8(&symbol_obstack, tc);
 733                                 }
 734                         } else {
 735                                 obstack_1grow(&symbol_obstack, (char)input.c);
 736                                 next_char();
 737                                 obstack_1grow(&symbol_obstack, (char)input.c);
 738                                 next_char();
 739                         }
 740                         break;
 741                 }
 742
 743                 case NEWLINE:
 744                         errorf(&pp_token.base.pos, "newline while parsing %s", context);
 745                         break;
 746
 747                 case EOF:
 748                         errorf(&pp_token.base.pos, "EOF while parsing %s", context);
 749                         goto end_of_string;
 750
 751                 default:
 752                         if (input.c == delimiter) {
 753                                 next_char();
 754                                 goto end_of_string;
 755                         } else {
 756                                 obstack_grow_utf8(&symbol_obstack, input.c);
 757                                 next_char();
 758                                 break;
 759                         }
 760                 }
 761         }
 762
 763 end_of_string:
 764         pp_token.kind           = kind;
 765         pp_token.literal.string = sym_make_string(enc);
 766 }
 767
 768 static void parse_string_literal(string_encoding_t const enc)
 769 {
 770         parse_string('"', T_STRING_LITERAL, enc, "string literal");
 771 }
 772
 773 static void parse_character_constant(string_encoding_t const enc)
 774 {
 775         parse_string('\'', T_CHARACTER_CONSTANT, enc, "character constant");
 776         if (pp_token.literal.string.size == 0) {
 777                 parse_error("empty character constant");
 778         }
 779 }
 780
 781 #define SYMBOL_CASES_WITHOUT_E_P \
 782              '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
 783         case 'a': \
 784         case 'b': \
 785         case 'c': \
 786         case 'd': \
 787         case 'f': \
 788         case 'g': \
 789         case 'h': \
 790         case 'i': \
 791         case 'j': \
 792         case 'k': \
 793         case 'l': \
 794         case 'm': \
 795         case 'n': \
 796         case 'o': \
 797         case 'q': \
 798         case 'r': \
 799         case 's': \
 800         case 't': \
 801         case 'u': \
 802         case 'v': \
 803         case 'w': \
 804         case 'x': \
 805         case 'y': \
 806         case 'z': \
 807         case 'A': \
 808         case 'B': \
 809         case 'C': \
 810         case 'D': \
 811         case 'F': \
 812         case 'G': \
 813         case 'H': \
 814         case 'I': \
 815         case 'J': \
 816         case 'K': \
 817         case 'L': \
 818         case 'M': \
 819         case 'N': \
 820         case 'O': \
 821         case 'Q': \
 822         case 'R': \
 823         case 'S': \
 824         case 'T': \
 825         case 'U': \
 826         case 'V': \
 827         case 'W': \
 828         case 'X': \
 829         case 'Y': \
 830         case 'Z': \
 831         case '_'
 832
 833 #define SYMBOL_CASES \
 834              SYMBOL_CASES_WITHOUT_E_P: \
 835         case 'e': \
 836         case 'p': \
 837         case 'E': \
 838         case 'P'
 839
 840 #define DIGIT_CASES \
 841              '0':  \
 842         case '1':  \
 843         case '2':  \
 844         case '3':  \
 845         case '4':  \
 846         case '5':  \
 847         case '6':  \
 848         case '7':  \
 849         case '8':  \
 850         case '9'
 851
 852 static void start_expanding(pp_definition_t *definition)
 853 {
 854         definition->parent_expansion = current_expansion;
 855         definition->expand_pos       = 0;
 856         definition->is_expanding     = true;
 857         if (definition->list_len > 0) {
 858                 definition->token_list[0].had_whitespace
 859                         = info.had_whitespace;
 860         }
 861         current_expansion = definition;
 862 }
 863
 864 static void finished_expanding(pp_definition_t *definition)
 865 {
 866         assert(definition->is_expanding);
 867         pp_definition_t *parent = definition->parent_expansion;
 868         definition->parent_expansion = NULL;
 869         definition->is_expanding     = false;
 870
 871         /* stop further expanding once we expanded a parameter used in a
 872          * sub macro-call */
 873         if (definition == argument_expanding)
 874                 argument_expanding = NULL;
 875
 876         assert(current_expansion == definition);
 877         current_expansion = parent;
 878 }
 879
 880 static void grow_string_escaped(struct obstack *obst, const string_t *string, char const *delimiter)
 881 {
 882         char const *prefix = get_string_encoding_prefix(string->encoding);
 883         obstack_printf(obst, "%s%s", prefix, delimiter);
 884         size_t      size = string->size;
 885         const char *str  = string->begin;
 886         if (resolve_escape_sequences) {
 887                 obstack_grow(obst, str, size);
 888         } else {
 889                 for (size_t i = 0; i < size; ++i) {
 890                         const char c = str[i];
 891                         if (c == '\\' || c == '"')
 892                                 obstack_1grow(obst, '\\');
 893                         obstack_1grow(obst, c);
 894                 }
 895         }
 896         obstack_printf(obst, "%s", delimiter);
 897 }
 898
 899 static void grow_token(struct obstack *obst, const token_t *token)
 900 {
 901         switch (token->kind) {
 902         case T_NUMBER:
 903                 obstack_grow(obst, token->literal.string.begin, token->literal.string.size);
 904                 break;
 905
 906         case T_STRING_LITERAL: {
 907                 char const *const delimiter = resolve_escape_sequences ? "\"" : "\\\"";
 908                 grow_string_escaped(obst, &token->literal.string, delimiter);
 909                 break;
 910         }
 911
 912         case T_CHARACTER_CONSTANT:
 913                 grow_string_escaped(obst, &token->literal.string, "'");
 914                 break;
 915
 916         case T_IDENTIFIER:
 917         default: {
 918                 const char *str = token->base.symbol->string;
 919                 size_t      len = strlen(str);
 920                 obstack_grow(obst, str, len);
 921                 break;
 922         }
 923         }
 924 }
 925
 926 static void stringify(const pp_definition_t *definition)
 927 {
 928         assert(obstack_object_size(&symbol_obstack) == 0);
 929
 930         size_t list_len = definition->list_len;
 931         for (size_t p = 0; p < list_len; ++p) {
 932                 const saved_token_t *saved = &definition->token_list[p];
 933                 if (p > 0 && saved->had_whitespace)
 934                         obstack_1grow(&symbol_obstack, ' ');
 935                 grow_token(&symbol_obstack, &saved->token);
 936         }
 937         pp_token.kind           = T_STRING_LITERAL;
 938         pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
 939 }
 940
 941 static inline void set_punctuator(token_kind_t const kind)
 942 {
 943         pp_token.kind        = kind;
 944         pp_token.base.symbol = token_symbols[kind];
 945 }
 946
 947 static inline void set_digraph(token_kind_t const kind, symbol_t *const symbol)
 948 {
 949         pp_token.kind        = kind;
 950         pp_token.base.symbol = symbol;
 951 }
 952
 953 /**
 954  * returns next final token from a preprocessor macro expansion
 955  */
 956 static bool expand_next(void)
 957 {
 958         if (current_expansion == NULL)
 959                 return false;
 960
 961 restart:;
 962         size_t pos = current_expansion->expand_pos;
 963         if (pos >= current_expansion->list_len) {
 964                 finished_expanding(current_expansion);
 965                 /* it was the outermost expansion, parse pptoken normally */
 966                 if (current_expansion == NULL) {
 967                         return false;
 968                 }
 969                 goto restart;
 970         }
 971         const saved_token_t *saved = &current_expansion->token_list[pos++];
 972         pp_token = saved->token;
 973         if (pp_token.kind == '#') {
 974                 if (pos < current_expansion->list_len) {
 975                         const saved_token_t *next = &current_expansion->token_list[pos];
 976                         if (next->token.kind == T_MACRO_PARAMETER) {
 977                                 pp_definition_t *def = next->token.macro_parameter.def;
 978                                 assert(def != NULL && def->is_parameter);
 979                                 stringify(def);
 980                                 ++pos;
 981                         }
 982                 }
 983         }
 984
 985         if (current_expansion->expand_pos > 0)
 986                 info.had_whitespace = saved->had_whitespace;
 987         current_expansion->expand_pos = pos;
 988         pp_token.base.pos             = expansion_pos;
 989
 990         return true;
 991 }
 992
 993 /**
 994  * Returns the next token kind found when continuing the current expansions
 995  * without starting new sub-expansions.
 996  */
 997 static token_kind_t peek_expansion(void)
 998 {
 999         for (pp_definition_t *e = current_expansion; e; e = e->parent_expansion) {
1000                 if (e->expand_pos < e->list_len)
1001                         return e->token_list[e->expand_pos].token.kind;
1002         }
1003         return T_EOF;
1004 }
1005
1006 static void skip_line_comment(void)
1007 {
1008         info.had_whitespace = true;
1009         while (true) {
1010                 switch (input.c) {
1011                 case EOF:
1012                         return;
1013
1014                 case '\r':
1015                 case '\n':
1016                         return;
1017
1018                 default:
1019                         next_char();
1020                         break;
1021                 }
1022         }
1023 }
1024
1025 static void skip_multiline_comment(void)
1026 {
1027         info.had_whitespace = true;
1028
1029         position_t const start_pos = input.pos;
1030         while (true) {
1031                 switch (input.c) {
1032                 case '/':
1033                         next_char();
1034                         if (input.c == '*') {
1035                                 /* TODO: nested comment, warn here */
1036                         }
1037                         break;
1038                 case '*':
1039                         next_char();
1040                         if (input.c == '/') {
1041                                 if (input.pos.lineno != input.output_line)
1042                                         info.whitespace_at_line_begin = input.pos.colno;
1043                                 next_char();
1044                                 return;
1045                         }
1046                         break;
1047
1048                 case NEWLINE:
1049                         break;
1050
1051                 case EOF:
1052                         errorf(&start_pos, "at end of file while looking for comment end");
1053                         return;
1054
1055                 default:
1056                         next_char();
1057                         break;
1058                 }
1059         }
1060 }
1061
1062 static bool skip_till_newline(bool stop_at_non_whitespace)
1063 {
1064         bool res = false;
1065         while (true) {
1066                 switch (input.c) {
1067                 case ' ':
1068                 case '\t':
1069                         next_char();
1070                         continue;
1071
1072                 case '/':
1073                         next_char();
1074                         if (input.c == '/') {
1075                                 next_char();
1076                                 skip_line_comment();
1077                                 continue;
1078                         } else if (input.c == '*') {
1079                                 next_char();
1080                                 skip_multiline_comment();
1081                                 continue;
1082                         } else {
1083                                 put_back(input.c);
1084                                 input.c = '/';
1085                         }
1086                         return true;
1087
1088                 case NEWLINE:
1089                         return res;
1090
1091                 default:
1092                         if (stop_at_non_whitespace)
1093                                 return false;
1094                         res = true;
1095                         next_char();
1096                         continue;
1097                 }
1098         }
1099 }
1100
1101 static void skip_whitespace(void)
1102 {
1103         while (true) {
1104                 switch (input.c) {
1105                 case ' ':
1106                 case '\t':
1107                         ++info.whitespace_at_line_begin;
1108                         info.had_whitespace = true;
1109                         next_char();
1110                         continue;
1111
1112                 case NEWLINE:
1113                         info.at_line_begin  = true;
1114                         info.had_whitespace = true;
1115                         info.whitespace_at_line_begin = 0;
1116                         continue;
1117
1118                 case '/':
1119                         next_char();
1120                         if (input.c == '/') {
1121                                 next_char();
1122                                 skip_line_comment();
1123                                 continue;
1124                         } else if (input.c == '*') {
1125                                 next_char();
1126                                 skip_multiline_comment();
1127                                 continue;
1128                         } else {
1129                                 put_back(input.c);
1130                                 input.c = '/';
1131                         }
1132                         return;
1133
1134                 default:
1135                         return;
1136                 }
1137         }
1138 }
1139
1140 static inline void eat_pp(pp_token_kind_t const kind)
1141 {
1142         assert(pp_token.base.symbol->pp_ID == kind);
1143         (void) kind;
1144         next_input_token();
1145 }
1146
1147 static inline void eat_token(token_kind_t const kind)
1148 {
1149         assert(pp_token.kind == kind);
1150         (void)kind;
1151         next_input_token();
1152 }
1153
1154 static string_encoding_t identify_encoding_prefix(symbol_t *const sym)
1155 {
1156         if (sym == symbol_L) return STRING_ENCODING_WIDE;
1157         if (c_mode & _C11) {
1158                 if (sym == symbol_U)  return STRING_ENCODING_CHAR32;
1159                 if (sym == symbol_u)  return STRING_ENCODING_CHAR16;
1160                 if (sym == symbol_u8) return STRING_ENCODING_UTF8;
1161         }
1162         return STRING_ENCODING_CHAR;
1163 }
1164
1165 static void parse_symbol(void)
1166 {
1167         assert(obstack_object_size(&symbol_obstack) == 0);
1168         while (true) {
1169                 switch (input.c) {
1170                 case DIGIT_CASES:
1171                 case SYMBOL_CASES:
1172                         obstack_1grow(&symbol_obstack, (char) input.c);
1173                         next_char();
1174                         break;
1175
1176                 case '\\':
1177                         next_char();
1178                         switch (input.c) {
1179                         {
1180                                 unsigned n;
1181                         case 'U': n = 8; goto universal;
1182                         case 'u': n = 4; goto universal;
1183 universal:
1184                                 if (!resolve_escape_sequences) {
1185                                         obstack_1grow(&symbol_obstack, '\\');
1186                                         obstack_1grow(&symbol_obstack, input.c);
1187                                 }
1188                                 next_char();
1189                                 utf32 const v = parse_universal_char(n);
1190                                 if (!is_universal_char_valid_identifier(v)) {
1191                                         if (is_universal_char_valid(v)) {
1192                                                 errorf(&input.pos,
1193                                                            "universal character \\%c%0*X is not valid in an identifier",
1194                                                            n == 4 ? 'u' : 'U', (int)n, v);
1195                                         }
1196                                 } else if (obstack_object_size(&symbol_obstack) == 0 && is_universal_char_invalid_identifier_start(v)) {
1197                                         errorf(&input.pos,
1198                                                    "universal character \\%c%0*X is not valid as start of an identifier",
1199                                                    n == 4 ? 'u' : 'U', (int)n, v);
1200                                 } else if (resolve_escape_sequences) {
1201                                         obstack_grow_utf8(&symbol_obstack, v);
1202                                 }
1203                                 break;
1204                         }
1205
1206                         default:
1207                                 put_back(input.c);
1208                                 input.c = '\\';
1209                                 goto end_symbol;
1210                         }
1211
1212                 default:
1213 dollar_sign:
1214                         goto end_symbol;
1215                 }
1216         }
1217
1218 end_symbol:
1219         obstack_1grow(&symbol_obstack, '\0');
1220         char *string = obstack_finish(&symbol_obstack);
1221
1222         symbol_t *symbol = symbol_table_insert(string);
1223
1224         /* Might be a prefixed string or character constant: L/U/u/u8"string". */
1225         if (input.c == '"') {
1226                 string_encoding_t const enc = identify_encoding_prefix(symbol);
1227                 if (enc != STRING_ENCODING_CHAR) {
1228                         parse_string_literal(enc);
1229                         return;
1230                 }
1231         } else if (input.c == '\'') {
1232                 string_encoding_t const enc = identify_encoding_prefix(symbol);
1233                 if (enc != STRING_ENCODING_CHAR) {
1234                         if (enc == STRING_ENCODING_UTF8) {
1235                                 errorf(&pp_token.base.pos,
1236                                        "'u8' is not a valid encoding for a chracter constant");
1237                         }
1238                         parse_character_constant(enc);
1239                         return;
1240                 }
1241         }
1242
1243         pp_token.kind        = symbol->ID;
1244         pp_token.base.symbol = symbol;
1245
1246         /* we can free the memory from symbol obstack if we already had an entry in
1247          * the symbol table */
1248         if (symbol->string != string) {
1249                 obstack_free(&symbol_obstack, string);
1250         }
1251 }
1252
1253 static void parse_number(void)
1254 {
1255         obstack_1grow(&symbol_obstack, (char) input.c);
1256         next_char();
1257
1258         while (true) {
1259                 switch (input.c) {
1260                 case '.':
1261                 case DIGIT_CASES:
1262                 case SYMBOL_CASES_WITHOUT_E_P:
1263                         obstack_1grow(&symbol_obstack, (char) input.c);
1264                         next_char();
1265                         break;
1266
1267                 case 'e':
1268                 case 'p':
1269                 case 'E':
1270                 case 'P':
1271                         obstack_1grow(&symbol_obstack, (char) input.c);
1272                         next_char();
1273                         if (input.c == '+' || input.c == '-') {
1274                                 obstack_1grow(&symbol_obstack, (char) input.c);
1275                                 next_char();
1276                         }
1277                         break;
1278
1279                 default:
1280 dollar_sign:
1281                         goto end_number;
1282                 }
1283         }
1284
1285 end_number:
1286         pp_token.kind           = T_NUMBER;
1287         pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
1288 }
1289
1290 #define MAYBE_PROLOG \
1291         next_char(); \
1292         switch (input.c) {
1293
1294 #define MAYBE(ch, kind) \
1295         case ch: \
1296                 next_char(); \
1297                 set_punctuator(kind); \
1298                 return;
1299
1300 #define MAYBE_DIGRAPH(ch, kind, symbol) \
1301         case ch: \
1302                 next_char(); \
1303                 set_digraph(kind, symbol); \
1304                 return;
1305
1306 #define ELSE_CODE(code) \
1307         default: \
1308                 code \
1309         }
1310
1311 #define ELSE(kind) ELSE_CODE(set_punctuator(kind); return;)
1312
1313 /** identifies and returns the next preprocessing token contained in the
1314  * input stream. No macro expansion is performed. */
1315 static void next_input_token(void)
1316 {
1317         if (next_info.had_whitespace) {
1318                 info = next_info;
1319                 next_info.had_whitespace = false;
1320         } else {
1321                 info.at_line_begin  = false;
1322                 info.had_whitespace = false;
1323         }
1324 restart:
1325         pp_token.base.pos    = input.pos;
1326         pp_token.base.symbol = NULL;
1327
1328         switch (input.c) {
1329         case ' ':
1330         case '\t':
1331                 info.whitespace_at_line_begin++;
1332                 info.had_whitespace = true;
1333                 next_char();
1334                 goto restart;
1335
1336         case NEWLINE:
1337                 info.at_line_begin            = true;
1338                 info.had_whitespace           = true;
1339                 info.whitespace_at_line_begin = 0;
1340                 goto restart;
1341
1342         case SYMBOL_CASES:
1343                 parse_symbol();
1344                 return;
1345
1346         case DIGIT_CASES:
1347                 parse_number();
1348                 return;
1349
1350         case '"':
1351                 parse_string_literal(STRING_ENCODING_CHAR);
1352                 return;
1353
1354         case '\'':
1355                 parse_character_constant(STRING_ENCODING_CHAR);
1356                 return;
1357
1358         case '.':
1359                 MAYBE_PROLOG
1360                         case '0':
1361                         case '1':
1362                         case '2':
1363                         case '3':
1364                         case '4':
1365                         case '5':
1366                         case '6':
1367                         case '7':
1368                         case '8':
1369                         case '9':
1370                                 put_back(input.c);
1371                                 input.c = '.';
1372                                 parse_number();
1373                                 return;
1374
1375                         case '.':
1376                                 MAYBE_PROLOG
1377                                 MAYBE('.', T_DOTDOTDOT)
1378                                 ELSE_CODE(
1379                                         put_back(input.c);
1380                                         input.c = '.';
1381                                         set_punctuator('.');
1382                                         return;
1383                                 )
1384                 ELSE('.')
1385         case '&':
1386                 MAYBE_PROLOG
1387                 MAYBE('&', T_ANDAND)
1388                 MAYBE('=', T_ANDEQUAL)
1389                 ELSE('&')
1390         case '*':
1391                 MAYBE_PROLOG
1392                 MAYBE('=', T_ASTERISKEQUAL)
1393                 ELSE('*')
1394         case '+':
1395                 MAYBE_PROLOG
1396                 MAYBE('+', T_PLUSPLUS)
1397                 MAYBE('=', T_PLUSEQUAL)
1398                 ELSE('+')
1399         case '-':
1400                 MAYBE_PROLOG
1401                 MAYBE('>', T_MINUSGREATER)
1402                 MAYBE('-', T_MINUSMINUS)
1403                 MAYBE('=', T_MINUSEQUAL)
1404                 ELSE('-')
1405         case '!':
1406                 MAYBE_PROLOG
1407                 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
1408                 ELSE('!')
1409         case '/':
1410                 MAYBE_PROLOG
1411                 MAYBE('=', T_SLASHEQUAL)
1412                 case '*':
1413                         next_char();
1414                         skip_multiline_comment();
1415                         goto restart;
1416                 case '/':
1417                         next_char();
1418                         skip_line_comment();
1419                         goto restart;
1420                 ELSE('/')
1421         case '%':
1422                 MAYBE_PROLOG
1423                 MAYBE_DIGRAPH('>', '}', symbol_percentgreater)
1424                 MAYBE('=', T_PERCENTEQUAL)
1425                 case ':':
1426                         MAYBE_PROLOG
1427                         case '%':
1428                                 MAYBE_PROLOG
1429                                 MAYBE_DIGRAPH(':', T_HASHHASH, symbol_percentcolonpercentcolon)
1430                                 ELSE_CODE(
1431                                         put_back(input.c);
1432                                         input.c = '%';
1433                                         goto digraph_percentcolon;
1434                                 )
1435                         ELSE_CODE(
1436 digraph_percentcolon:
1437                                 set_digraph('#', symbol_percentcolon);
1438                                 return;
1439                         )
1440                 ELSE('%')
1441         case '<':
1442                 MAYBE_PROLOG
1443                 MAYBE_DIGRAPH(':', '[', symbol_lesscolon)
1444                 MAYBE_DIGRAPH('%', '{', symbol_lesspercent)
1445                 MAYBE('=', T_LESSEQUAL)
1446                 case '<':
1447                         MAYBE_PROLOG
1448                         MAYBE('=', T_LESSLESSEQUAL)
1449                         ELSE(T_LESSLESS)
1450                 ELSE('<')
1451         case '>':
1452                 MAYBE_PROLOG
1453                 MAYBE('=', T_GREATEREQUAL)
1454                 case '>':
1455                         MAYBE_PROLOG
1456                         MAYBE('=', T_GREATERGREATEREQUAL)
1457                         ELSE(T_GREATERGREATER)
1458                 ELSE('>')
1459         case '^':
1460                 MAYBE_PROLOG
1461                 MAYBE('=', T_CARETEQUAL)
1462                 ELSE('^')
1463         case '|':
1464                 MAYBE_PROLOG
1465                 MAYBE('=', T_PIPEEQUAL)
1466                 MAYBE('|', T_PIPEPIPE)
1467                 ELSE('|')
1468         case ':':
1469                 MAYBE_PROLOG
1470                 MAYBE_DIGRAPH('>', ']', symbol_colongreater)
1471                 case ':':
1472                         if (c_mode & _CXX) {
1473                                 next_char();
1474                                 set_punctuator(T_COLONCOLON);
1475                                 return;
1476                         }
1477                         /* FALLTHROUGH */
1478                 ELSE(':')
1479         case '=':
1480                 MAYBE_PROLOG
1481                 MAYBE('=', T_EQUALEQUAL)
1482                 ELSE('=')
1483         case '#':
1484                 MAYBE_PROLOG
1485                 MAYBE('#', T_HASHHASH)
1486                 ELSE('#')
1487
1488         case '?':
1489         case '[':
1490         case ']':
1491         case '(':
1492         case ')':
1493         case '{':
1494         case '}':
1495         case '~':
1496         case ';':
1497         case ',':
1498                 set_punctuator(input.c);
1499                 next_char();
1500                 return;
1501
1502         case EOF:
1503                 if (input_stack != NULL) {
1504                         fclose(close_pp_input());
1505                         pop_restore_input();
1506                         if (out)
1507                                 fputc('\n', out);
1508                         if (input.c == (utf32)EOF)
1509                                 --input.pos.lineno;
1510                         print_line_directive(&input.pos, "2");
1511                         goto restart;
1512                 } else {
1513                         info.at_line_begin = true;
1514                         set_punctuator(T_EOF);
1515                 }
1516                 return;
1517
1518         case '\\':
1519                 next_char();
1520                 int next_c = input.c;
1521                 put_back(input.c);
1522                 input.c = '\\';
1523                 if (next_c == 'U' || next_c == 'u') {
1524                         parse_symbol();
1525                         return;
1526                 }
1527                 /* FALLTHROUGH */
1528         default:
1529 dollar_sign:
1530                 if (error_on_unknown_chars) {
1531                         errorf(&pp_token.base.pos, "unknown character '%lc' found", input.c);
1532                         next_char();
1533                         goto restart;
1534                 } else {
1535                         assert(obstack_object_size(&symbol_obstack) == 0);
1536                         obstack_grow_utf8(&symbol_obstack, input.c);
1537                         obstack_1grow(&symbol_obstack, '\0');
1538                         char     *const string = obstack_finish(&symbol_obstack);
1539                         symbol_t *const symbol = symbol_table_insert(string);
1540                         if (symbol->string != string)
1541                                 obstack_free(&symbol_obstack, string);
1542
1543                         pp_token.kind        = T_UNKNOWN_CHAR;
1544                         pp_token.base.symbol = symbol;
1545                         next_char();
1546                         return;
1547                 }
1548         }
1549 }
1550
1551 static void print_quoted_string(const char *const string)
1552 {
1553         fputc('"', out);
1554         for (const char *c = string; *c != 0; ++c) {
1555                 switch (*c) {
1556                 case '"': fputs("\\\"", out); break;
1557                 case '\\':  fputs("\\\\", out); break;
1558                 case '\a':  fputs("\\a", out); break;
1559                 case '\b':  fputs("\\b", out); break;
1560                 case '\f':  fputs("\\f", out); break;
1561                 case '\n':  fputs("\\n", out); break;
1562                 case '\r':  fputs("\\r", out); break;
1563                 case '\t':  fputs("\\t", out); break;
1564                 case '\v':  fputs("\\v", out); break;
1565                 case '\?':  fputs("\\?", out); break;
1566                 default:
1567                         if (!isprint(*c)) {
1568                                 fprintf(out, "\\%03o", (unsigned)*c);
1569                                 break;
1570                         }
1571                         fputc(*c, out);
1572                         break;
1573                 }
1574         }
1575         fputc('"', out);
1576 }
1577
1578 static void print_line_directive(const position_t *pos, const char *add)
1579 {
1580         if (!out)
1581                 return;
1582
1583         fprintf(out, "# %u ", pos->lineno);
1584         print_quoted_string(pos->input_name);
1585         if (add != NULL) {
1586                 fputc(' ', out);
1587                 fputs(add, out);
1588         }
1589         if (pos->is_system_header) {
1590                 fputs(" 3", out);
1591         }
1592
1593         printed_input_name = pos->input_name;
1594         input.output_line  = pos->lineno-1;
1595 }
1596
1597 static bool emit_newlines(void)
1598 {
1599         if (!out)
1600                 return true;
1601
1602         unsigned delta = pp_token.base.pos.lineno - input.output_line;
1603         if (delta == 0)
1604                 return false;
1605
1606         if (delta >= 9) {
1607                 fputc('\n', out);
1608                 print_line_directive(&pp_token.base.pos, NULL);
1609                 fputc('\n', out);
1610         } else {
1611                 for (unsigned i = 0; i < delta; ++i) {
1612                         fputc('\n', out);
1613                 }
1614         }
1615         input.output_line = pp_token.base.pos.lineno;
1616
1617         unsigned whitespace = info.whitespace_at_line_begin;
1618         /* make sure there is at least 1 whitespace before a (macro-expanded)
1619          * '#' at line begin. I'm not sure why this is good, but gcc does it. */
1620         if (pp_token.kind == '#' && whitespace == 0)
1621                 ++whitespace;
1622         for (unsigned i = 0; i < whitespace; ++i)
1623                 fputc(' ', out);
1624
1625         return true;
1626 }
1627
1628 void set_preprocessor_output(FILE *output)
1629 {
1630         out = output;
1631         if (out != NULL) {
1632                 error_on_unknown_chars   = false;
1633                 resolve_escape_sequences = false;
1634         } else {
1635                 error_on_unknown_chars   = true;
1636                 resolve_escape_sequences = true;
1637         }
1638 }
1639
1640 void emit_pp_token(void)
1641 {
1642         if (!emit_newlines() &&
1643             (info.had_whitespace || tokens_would_paste(last_token, pp_token.kind)))
1644                 fputc(' ', out);
1645
1646         switch (pp_token.kind) {
1647         case T_NUMBER:
1648                 fputs(pp_token.literal.string.begin, out);
1649                 break;
1650
1651         case T_STRING_LITERAL:
1652                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1653                 fputc('"', out);
1654                 fputs(pp_token.literal.string.begin, out);
1655                 fputc('"', out);
1656                 break;
1657
1658         case T_CHARACTER_CONSTANT:
1659                 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1660                 fputc('\'', out);
1661                 fputs(pp_token.literal.string.begin, out);
1662                 fputc('\'', out);
1663                 break;
1664
1665         case T_MACRO_PARAMETER:
1666                 panic("macro parameter not expanded");
1667
1668         default:
1669                 fputs(pp_token.base.symbol->string, out);
1670                 break;
1671         }
1672         last_token = pp_token.kind;
1673 }
1674
1675 static void eat_pp_directive(void)
1676 {
1677         while (!info.at_line_begin) {
1678                 next_input_token();
1679         }
1680 }
1681
1682 static bool strings_equal(const string_t *string1, const string_t *string2)
1683 {
1684         size_t size = string1->size;
1685         if (size != string2->size)
1686                 return false;
1687
1688         const char *c1 = string1->begin;
1689         const char *c2 = string2->begin;
1690         for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1691                 if (*c1 != *c2)
1692                         return false;
1693         }
1694         return true;
1695 }
1696
1697 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1698 {
1699         if (token1->kind != token2->kind)
1700                 return false;
1701
1702         switch (token1->kind) {
1703         case T_NUMBER:
1704         case T_CHARACTER_CONSTANT:
1705         case T_STRING_LITERAL:
1706                 return strings_equal(&token1->literal.string, &token2->literal.string);
1707
1708         case T_MACRO_PARAMETER:
1709                 return token1->macro_parameter.def->symbol
1710                     == token2->macro_parameter.def->symbol;
1711
1712         default:
1713                 return token1->base.symbol == token2->base.symbol;
1714         }
1715 }
1716
1717 static bool pp_definitions_equal(const pp_definition_t *definition1,
1718                                  const pp_definition_t *definition2)
1719 {
1720         if (definition1->list_len != definition2->list_len)
1721                 return false;
1722
1723         size_t               len = definition1->list_len;
1724         const saved_token_t *t1  = definition1->token_list;
1725         const saved_token_t *t2  = definition2->token_list;
1726         for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1727                 if (!pp_tokens_equal(&t1->token, &t2->token))
1728                         return false;
1729                 if (t1->had_whitespace != t2->had_whitespace)
1730                         return false;
1731         }
1732         return true;
1733 }
1734
1735 static void missing_macro_param_error(void)
1736 {
1737         errorf(&pp_token.base.pos, "'#' is not followed by a macro parameter");
1738 }
1739
1740 static bool is_defineable_token(char const *const context)
1741 {
1742         if (info.at_line_begin) {
1743                 errorf(&pp_token.base.pos, "unexpected end of line after %s", context);
1744         }
1745
1746         symbol_t *const symbol = pp_token.base.symbol;
1747         if (!symbol)
1748                 goto no_ident;
1749
1750         if (pp_token.kind != T_IDENTIFIER) {
1751                 switch (symbol->string[0]) {
1752                 case SYMBOL_CASES:
1753 dollar_sign:
1754                         break;
1755
1756                 default:
1757 no_ident:
1758                         errorf(&pp_token.base.pos, "expected identifier after %s, got %K",
1759                                context, &pp_token);
1760                         return false;
1761                 }
1762         }
1763
1764         /* TODO turn this into a flag in pp_def. */
1765         switch (symbol->pp_ID) {
1766         /* §6.10.8:4 */
1767         case TP_defined:
1768                 errorf(&pp_token.base.pos, "%K cannot be used as macro name in %s",
1769                        &pp_token, context);
1770                 return false;
1771
1772         default:
1773                 return true;
1774         }
1775 }
1776
1777 static void parse_define_directive(void)
1778 {
1779         eat_pp(TP_define);
1780         if (skip_mode) {
1781                 eat_pp_directive();
1782                 return;
1783         }
1784
1785         assert(obstack_object_size(&pp_obstack) == 0);
1786
1787         if (!is_defineable_token("#define"))
1788                 goto error_out;
1789         symbol_t *const symbol = pp_token.base.symbol;
1790
1791         pp_definition_t *new_definition
1792                 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1793         memset(new_definition, 0, sizeof(new_definition[0]));
1794         new_definition->symbol = symbol;
1795         new_definition->pos    = input.pos;
1796
1797         /* this is probably the only place where spaces are significant in the
1798          * lexer (except for the fact that they separate tokens). #define b(x)
1799          * is something else than #define b (x) */
1800         if (input.c == '(') {
1801                 next_input_token();
1802                 eat_token('(');
1803
1804                 while (true) {
1805                         switch (pp_token.kind) {
1806                         case T_DOTDOTDOT:
1807                                 new_definition->is_variadic = true;
1808                                 eat_token(T_DOTDOTDOT);
1809                                 if (pp_token.kind != ')') {
1810                                         errorf(&input.pos,
1811                                                         "'...' not at end of macro argument list");
1812                                         goto error_out;
1813                                 }
1814                                 break;
1815
1816                         case T_IDENTIFIER: {
1817                                 pp_definition_t parameter;
1818                                 memset(&parameter, 0, sizeof(parameter));
1819                                 parameter.pos          = pp_token.base.pos;
1820                                 parameter.symbol       = pp_token.base.symbol;
1821                                 parameter.is_parameter = true;
1822                                 obstack_grow(&pp_obstack, &parameter, sizeof(parameter));
1823                                 eat_token(T_IDENTIFIER);
1824
1825                                 if (pp_token.kind == ',') {
1826                                         eat_token(',');
1827                                         break;
1828                                 }
1829
1830                                 if (pp_token.kind != ')') {
1831                                         errorf(&pp_token.base.pos,
1832                                                "expected ',' or ')' after identifier, got %K",
1833                                                &pp_token);
1834                                         goto error_out;
1835                                 }
1836                                 break;
1837                         }
1838
1839                         case ')':
1840                                 eat_token(')');
1841                                 goto finish_argument_list;
1842
1843                         default:
1844                                 errorf(&pp_token.base.pos,
1845                                        "expected identifier, '...' or ')' in #define argument list, got %K",
1846                                        &pp_token);
1847                                 goto error_out;
1848                         }
1849                 }
1850
1851         finish_argument_list:
1852                 new_definition->has_parameters = true;
1853                 size_t size = obstack_object_size(&pp_obstack);
1854                 new_definition->n_parameters
1855                         = size / sizeof(new_definition->parameters[0]);
1856                 new_definition->parameters = obstack_finish(&pp_obstack);
1857                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1858                         pp_definition_t *param    = &new_definition->parameters[i];
1859                         symbol_t        *symbol   = param->symbol;
1860                         pp_definition_t *previous = symbol->pp_definition;
1861                         if (previous != NULL
1862                             && previous->function_definition == new_definition) {
1863                                 errorf(&param->pos, "duplicate macro parameter '%Y'", symbol);
1864                                 param->symbol = sym_anonymous;
1865                                 continue;
1866                         }
1867                         param->parent_expansion    = previous;
1868                         param->function_definition = new_definition;
1869                         symbol->pp_definition      = param;
1870                 }
1871         } else {
1872                 next_input_token();
1873         }
1874
1875         /* construct token list */
1876         assert(obstack_object_size(&pp_obstack) == 0);
1877         bool next_must_be_param = false;
1878         while (!info.at_line_begin) {
1879                 if (pp_token.kind == T_IDENTIFIER) {
1880                         const symbol_t  *symbol     = pp_token.base.symbol;
1881                         pp_definition_t *definition = symbol->pp_definition;
1882                         if (definition != NULL
1883                             && definition->function_definition == new_definition) {
1884                             pp_token.kind                = T_MACRO_PARAMETER;
1885                             pp_token.macro_parameter.def = definition;
1886                         }
1887                 }
1888                 if (next_must_be_param && pp_token.kind != T_MACRO_PARAMETER) {
1889                         missing_macro_param_error();
1890                 }
1891                 saved_token_t saved_token;
1892                 saved_token.token = pp_token;
1893                 saved_token.had_whitespace = info.had_whitespace;
1894                 obstack_grow(&pp_obstack, &saved_token, sizeof(saved_token));
1895                 next_must_be_param
1896                         = new_definition->has_parameters && pp_token.kind == '#';
1897                 next_input_token();
1898         }
1899         if (next_must_be_param)
1900                 missing_macro_param_error();
1901
1902         new_definition->list_len   = obstack_object_size(&pp_obstack)
1903                 / sizeof(new_definition->token_list[0]);
1904         new_definition->token_list = obstack_finish(&pp_obstack);
1905
1906         if (new_definition->has_parameters) {
1907                 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1908                         pp_definition_t *param      = &new_definition->parameters[i];
1909                         symbol_t        *symbol     = param->symbol;
1910                         if (symbol == sym_anonymous)
1911                                 continue;
1912                         assert(symbol->pp_definition == param);
1913                         assert(param->function_definition == new_definition);
1914                         symbol->pp_definition   = param->parent_expansion;
1915                         param->parent_expansion = NULL;
1916                 }
1917         }
1918
1919         pp_definition_t *old_definition = symbol->pp_definition;
1920         if (old_definition != NULL) {
1921                 if (!pp_definitions_equal(old_definition, new_definition)) {
1922                         warningf(WARN_OTHER, &input.pos,
1923                                  "multiple definition of macro '%Y' (first defined %P)",
1924                                  symbol, &old_definition->pos);
1925                 } else {
1926                         /* reuse the old definition */
1927                         obstack_free(&pp_obstack, new_definition);
1928                         new_definition = old_definition;
1929                 }
1930         }
1931
1932         symbol->pp_definition = new_definition;
1933         return;
1934
1935 error_out:
1936         if (obstack_object_size(&pp_obstack) > 0) {
1937                 char *ptr = obstack_finish(&pp_obstack);
1938                 obstack_free(&pp_obstack, ptr);
1939         }
1940         eat_pp_directive();
1941 }
1942
1943 static void parse_undef_directive(void)
1944 {
1945         eat_pp(TP_undef);
1946         if (skip_mode) {
1947                 eat_pp_directive();
1948                 return;
1949         }
1950
1951         if (!is_defineable_token("#undef")) {
1952                 eat_pp_directive();
1953                 return;
1954         }
1955
1956         pp_token.base.symbol->pp_definition = NULL;
1957         next_input_token();
1958
1959         if (!info.at_line_begin) {
1960                 warningf(WARN_OTHER, &input.pos, "extra tokens at end of #undef directive");
1961         }
1962         eat_pp_directive();
1963 }
1964
1965 /** behind an #include we can have the special headername lexems.
1966  * They're only allowed behind an #include so they're not recognized
1967  * by the normal next_preprocessing_token. We handle them as a special
1968  * exception here */
1969 static const char *parse_headername(bool *system_include)
1970 {
1971         if (info.at_line_begin) {
1972                 parse_error("expected headername after #include");
1973                 return NULL;
1974         }
1975
1976         /* check whether we have a "... or <... headername */
1977         position_t pos = input.pos;
1978         switch (input.c) {
1979         {
1980                 utf32 delimiter;
1981         case '<': delimiter = '>'; *system_include = true;  goto parse_name;
1982         case '"': delimiter = '"'; *system_include = false; goto parse_name;
1983 parse_name:
1984                 assert(obstack_object_size(&symbol_obstack) == 0);
1985                 next_char();
1986                 while (true) {
1987                         switch (input.c) {
1988                         case NEWLINE:
1989                         case EOF:
1990                                 {
1991                                         char *dummy = obstack_finish(&symbol_obstack);
1992                                         obstack_free(&symbol_obstack, dummy);
1993                                 }
1994                                 errorf(&pp_token.base.pos,
1995                                        "header name without closing '%c'", (char)delimiter);
1996                                 return NULL;
1997
1998                         default:
1999                                 if (input.c == delimiter) {
2000                                         next_char();
2001                                         goto finish_headername;
2002                                 } else {
2003                                         obstack_1grow(&symbol_obstack, (char)input.c);
2004                                         next_char();
2005                                 }
2006                                 break;
2007                         }
2008                 }
2009                 /* we should never be here */
2010         }
2011
2012         default:
2013                 next_preprocessing_token();
2014                 if (info.at_line_begin) {
2015                         /* TODO: if we are already in the new line then we parsed more than
2016                          * wanted. We reuse the token, but could produce following errors
2017                          * misbehaviours... */
2018                         goto error_invalid_input;
2019                 }
2020                 if (pp_token.kind == T_STRING_LITERAL) {
2021                         *system_include = false;
2022                         return pp_token.literal.string.begin;
2023                 } else if (pp_token.kind == '<') {
2024                         *system_include = true;
2025                         assert(obstack_object_size(&pp_obstack) == 0);
2026                         while (true) {
2027                                 next_preprocessing_token();
2028                                 if (info.at_line_begin) {
2029                                         /* TODO: we shouldn't have parsed/expanded something on the
2030                                          * next line yet... */
2031                                         char *dummy = obstack_finish(&pp_obstack);
2032                                         obstack_free(&pp_obstack, dummy);
2033                                         goto error_invalid_input;
2034                                 }
2035                                 if (pp_token.kind == '>')
2036                                         break;
2037
2038                                 saved_token_t saved;
2039                                 saved.token          = pp_token;
2040                                 saved.had_whitespace = info.had_whitespace;
2041                                 obstack_grow(&pp_obstack, &saved, sizeof(saved));
2042                         }
2043                         size_t size = obstack_object_size(&pp_obstack);
2044                         assert(size % sizeof(saved_token_t) == 0);
2045                         size_t n_tokens = size / sizeof(saved_token_t);
2046                         saved_token_t *tokens = obstack_finish(&pp_obstack);
2047                         assert(obstack_object_size(&symbol_obstack) == 0);
2048                         for (size_t i = 0; i < n_tokens; ++i) {
2049                                 const saved_token_t *saved = &tokens[i];
2050                                 if (i > 0 && saved->had_whitespace)
2051                                         obstack_1grow(&symbol_obstack, ' ');
2052                                 grow_token(&symbol_obstack, &saved->token);
2053                         }
2054                         obstack_free(&pp_obstack, tokens);
2055                         goto finish_headername;
2056                 } else {
2057 error_invalid_input:
2058                         {
2059                                 char *dummy = obstack_finish(&symbol_obstack);
2060                                 obstack_free(&symbol_obstack, dummy);
2061                         }
2062
2063                         errorf(&pp_token.base.pos,
2064                                "expected \"FILENAME\" or <FILENAME> after #include");
2065                         return NULL;
2066                 }
2067         }
2068
2069 finish_headername:
2070         obstack_1grow(&symbol_obstack, '\0');
2071         char *const  headername = obstack_finish(&symbol_obstack);
2072         const char  *identified = identify_string(headername);
2073         pp_token.base.pos = pos;
2074         return identified;
2075 }
2076
2077 static bool do_include(bool const bracket_include, bool const include_next, char const *const headername)
2078 {
2079         size_t const        headername_len = strlen(headername);
2080         searchpath_entry_t *entry;
2081         if (include_next) {
2082                 entry = input.path      ? input.path->next
2083                       : bracket_include ? bracket_searchpath.first
2084                       : quote_searchpath.first;
2085         } else {
2086                 if (!bracket_include) {
2087                         /* put dirname of current input on obstack */
2088                         const char *filename   = input.pos.input_name;
2089                         const char *last_slash = strrchr(filename, '/');
2090                         const char *full_name;
2091                         if (last_slash != NULL) {
2092                                 size_t len = last_slash - filename;
2093                                 obstack_grow(&symbol_obstack, filename, len + 1);
2094                                 obstack_grow0(&symbol_obstack, headername, headername_len);
2095                                 char *complete_path = obstack_finish(&symbol_obstack);
2096                                 full_name = identify_string(complete_path);
2097                         } else {
2098                                 full_name = headername;
2099                         }
2100
2101                         FILE *file = fopen(full_name, "r");
2102                         if (file != NULL) {
2103                                 switch_pp_input(file, full_name, NULL, false);
2104                                 return true;
2105                         }
2106                         entry = quote_searchpath.first;
2107                 } else {
2108                         entry = bracket_searchpath.first;
2109                 }
2110         }
2111
2112         assert(obstack_object_size(&symbol_obstack) == 0);
2113         /* check searchpath */
2114         for (; entry; entry = entry->next) {
2115             const char *path = entry->path;
2116             size_t      len  = strlen(path);
2117                 obstack_grow(&symbol_obstack, path, len);
2118                 if (path[len-1] != '/')
2119                         obstack_1grow(&symbol_obstack, '/');
2120                 obstack_grow(&symbol_obstack, headername, headername_len+1);
2121
2122                 char *complete_path = obstack_finish(&symbol_obstack);
2123                 FILE *file          = fopen(complete_path, "r");
2124                 if (file != NULL) {
2125                         const char *filename = identify_string(complete_path);
2126                         switch_pp_input(file, filename, entry, entry->is_system_path);
2127                         return true;
2128                 } else {
2129                         obstack_free(&symbol_obstack, complete_path);
2130                 }
2131         }
2132
2133         return false;
2134 }
2135
2136 static void parse_include_directive(bool const include_next)
2137 {
2138         if (skip_mode) {
2139                 eat_pp_directive();
2140                 return;
2141         }
2142
2143         /* do not eat the TP_include, since it would already parse the next token
2144          * which needs special handling here. */
2145         skip_till_newline(true);
2146         bool system_include;
2147         const char *headername = parse_headername(&system_include);
2148         if (headername == NULL) {
2149                 eat_pp_directive();
2150                 return;
2151         }
2152
2153         bool had_nonwhitespace = skip_till_newline(false);
2154         if (had_nonwhitespace) {
2155                 warningf(WARN_OTHER, &input.pos,
2156                          "extra tokens at end of #include directive");
2157         }
2158
2159         if (n_inputs > INCLUDE_LIMIT) {
2160                 errorf(&pp_token.base.pos, "#include nested too deeply");
2161                 /* eat \n or EOF */
2162                 next_input_token();
2163                 return;
2164         }
2165
2166         /* switch inputs */
2167         info.whitespace_at_line_begin = 0;
2168         info.had_whitespace           = false;
2169         info.at_line_begin            = true;
2170         emit_newlines();
2171         push_input();
2172         bool res = do_include(system_include, include_next, headername);
2173         if (res) {
2174                 next_input_token();
2175         } else {
2176                 errorf(&pp_token.base.pos, "failed including '%s': %s", headername, strerror(errno));
2177                 pop_restore_input();
2178         }
2179 }
2180
2181 static pp_conditional_t *push_conditional(void)
2182 {
2183         pp_conditional_t *conditional
2184                 = obstack_alloc(&pp_obstack, sizeof(*conditional));
2185         memset(conditional, 0, sizeof(*conditional));
2186
2187         conditional->parent = conditional_stack;
2188         conditional_stack   = conditional;
2189
2190         return conditional;
2191 }
2192
2193 static void pop_conditional(void)
2194 {
2195         assert(conditional_stack != NULL);
2196         conditional_stack = conditional_stack->parent;
2197 }
2198
2199 void check_unclosed_conditionals(void)
2200 {
2201         while (conditional_stack != NULL) {
2202                 pp_conditional_t *conditional = conditional_stack;
2203
2204                 if (conditional->in_else) {
2205                         errorf(&conditional->pos, "unterminated #else");
2206                 } else {
2207                         errorf(&conditional->pos, "unterminated condition");
2208                 }
2209                 pop_conditional();
2210         }
2211 }
2212
2213 static void parse_ifdef_ifndef_directive(bool const is_ifdef)
2214 {
2215         bool condition;
2216         eat_pp(is_ifdef ? TP_ifdef : TP_ifndef);
2217
2218         if (skip_mode) {
2219                 eat_pp_directive();
2220                 pp_conditional_t *conditional = push_conditional();
2221                 conditional->pos  = pp_token.base.pos;
2222                 conditional->skip = true;
2223                 return;
2224         }
2225
2226         if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
2227                 errorf(&pp_token.base.pos, "expected identifier after #%s, got %K",
2228                        is_ifdef ? "ifdef" : "ifndef", &pp_token);
2229                 eat_pp_directive();
2230
2231                 /* just take the true case in the hope to avoid further errors */
2232                 condition = true;
2233         } else {
2234                 /* evaluate whether we are in true or false case */
2235                 condition = (bool)pp_token.base.symbol->pp_definition == is_ifdef;
2236                 eat_token(T_IDENTIFIER);
2237
2238                 if (!info.at_line_begin) {
2239                         errorf(&pp_token.base.pos, "extra tokens at end of #%s",
2240                                is_ifdef ? "ifdef" : "ifndef");
2241                         eat_pp_directive();
2242                 }
2243         }
2244
2245         pp_conditional_t *conditional = push_conditional();
2246         conditional->pos       = pp_token.base.pos;
2247         conditional->condition = condition;
2248
2249         if (!condition) {
2250                 skip_mode = true;
2251         }
2252 }
2253
2254 static void parse_else_directive(void)
2255 {
2256         eat_pp(TP_else);
2257
2258         if (!info.at_line_begin) {
2259                 if (!skip_mode) {
2260                         warningf(WARN_OTHER, &pp_token.base.pos, "extra tokens at end of #else");
2261                 }
2262                 eat_pp_directive();
2263         }
2264
2265         pp_conditional_t *conditional = conditional_stack;
2266         if (conditional == NULL) {
2267                 errorf(&pp_token.base.pos, "#else without prior #if");
2268                 return;
2269         }
2270
2271         if (conditional->in_else) {
2272                 errorf(&pp_token.base.pos,
2273                        "#else after #else (condition started %P)",
2274                        &conditional->pos);
2275                 skip_mode = true;
2276                 return;
2277         }
2278
2279         conditional->in_else = true;
2280         if (!conditional->skip) {
2281                 skip_mode = conditional->condition;
2282         }
2283         conditional->pos = pp_token.base.pos;
2284 }
2285
2286 static void parse_endif_directive(void)
2287 {
2288         eat_pp(TP_endif);
2289
2290         if (!info.at_line_begin) {
2291                 if (!skip_mode) {
2292                         warningf(WARN_OTHER, &pp_token.base.pos, "extra tokens at end of #endif");
2293                 }
2294                 eat_pp_directive();
2295         }
2296
2297         pp_conditional_t *conditional = conditional_stack;
2298         if (conditional == NULL) {
2299                 errorf(&pp_token.base.pos, "#endif without prior #if");
2300                 return;
2301         }
2302
2303         if (!conditional->skip) {
2304                 skip_mode = false;
2305         }
2306         pop_conditional();
2307 }
2308
2309 typedef enum stdc_pragma_kind_t {
2310         STDC_UNKNOWN,
2311         STDC_FP_CONTRACT,
2312         STDC_FENV_ACCESS,
2313         STDC_CX_LIMITED_RANGE
2314 } stdc_pragma_kind_t;
2315
2316 typedef enum stdc_pragma_value_kind_t {
2317         STDC_VALUE_UNKNOWN,
2318         STDC_VALUE_ON,
2319         STDC_VALUE_OFF,
2320         STDC_VALUE_DEFAULT
2321 } stdc_pragma_value_kind_t;
2322
2323 static void parse_pragma_directive(void)
2324 {
2325         eat_pp(TP_pragma);
2326         if (skip_mode) {
2327                 eat_pp_directive();
2328                 return;
2329         }
2330
2331         if (pp_token.kind != T_IDENTIFIER) {
2332                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.pos,
2333                          "expected identifier after #pragma");
2334                 eat_pp_directive();
2335                 return;
2336         }
2337
2338         stdc_pragma_kind_t kind = STDC_UNKNOWN;
2339         if (pp_token.base.symbol->pp_ID == TP_STDC && c_mode & _C99) {
2340                 /* a STDC pragma */
2341                 next_input_token();
2342
2343                 switch (pp_token.base.symbol->pp_ID) {
2344                 case TP_FP_CONTRACT:      kind = STDC_FP_CONTRACT;      break;
2345                 case TP_FENV_ACCESS:      kind = STDC_FENV_ACCESS;      break;
2346                 case TP_CX_LIMITED_RANGE: kind = STDC_CX_LIMITED_RANGE; break;
2347                 default:                  break;
2348                 }
2349                 if (kind != STDC_UNKNOWN) {
2350                         next_input_token();
2351                         stdc_pragma_value_kind_t value;
2352                         switch (pp_token.base.symbol->pp_ID) {
2353                         case TP_ON:      value = STDC_VALUE_ON;      break;
2354                         case TP_OFF:     value = STDC_VALUE_OFF;     break;
2355                         case TP_DEFAULT: value = STDC_VALUE_DEFAULT; break;
2356                         default:         value = STDC_VALUE_UNKNOWN; break;
2357                         }
2358                         if (value == STDC_VALUE_UNKNOWN) {
2359                                 kind = STDC_UNKNOWN;
2360                                 errorf(&pp_token.base.pos, "bad STDC pragma argument");
2361                         }
2362                 }
2363         }
2364         eat_pp_directive();
2365         if (kind == STDC_UNKNOWN) {
2366                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.pos,
2367                          "encountered unknown #pragma");
2368         }
2369 }
2370
2371 static void parse_line_directive(void)
2372 {
2373         if (pp_token.kind != T_NUMBER) {
2374                 if (!skip_mode)
2375                         parse_error("expected integer");
2376         } else {
2377                 char      *end;
2378                 long const line = strtol(pp_token.literal.string.begin, &end, 0);
2379                 if (*end == '\0') {
2380                         /* use offset -1 as this is about the next line */
2381                         input.pos.lineno = line - 1;
2382                         /* force output of line */
2383                         input.output_line = input.pos.lineno - 20;
2384                 } else {
2385                         if (!skip_mode) {
2386                                 errorf(&input.pos, "'%S' is not a valid line number",
2387                                            &pp_token.literal.string);
2388                         }
2389                 }
2390                 next_input_token();
2391                 if (info.at_line_begin)
2392                         return;
2393         }
2394         if (pp_token.kind == T_STRING_LITERAL
2395             && pp_token.literal.string.encoding == STRING_ENCODING_CHAR) {
2396                 input.pos.input_name       = pp_token.literal.string.begin;
2397                 input.pos.is_system_header = false;
2398                 next_input_token();
2399
2400                 /* attempt to parse numeric flags as outputted by gcc preprocessor */
2401                 while (!info.at_line_begin && pp_token.kind == T_NUMBER) {
2402                         /* flags:
2403                          * 1 - indicates start of a new file
2404                          * 2 - indicates return from a file
2405                          * 3 - indicates system header
2406                          * 4 - indicates implicit extern "C" in C++ mode
2407                          *
2408                          * currently we're only interested in "3"
2409                          */
2410                         if (streq(pp_token.literal.string.begin, "3")) {
2411                                 input.pos.is_system_header = true;
2412                         }
2413                         next_input_token();
2414                 }
2415         }
2416
2417         eat_pp_directive();
2418 }
2419
2420 static void parse_error_directive(void)
2421 {
2422         if (skip_mode) {
2423                 eat_pp_directive();
2424                 return;
2425         }
2426
2427         bool const old_resolve_escape_sequences = resolve_escape_sequences;
2428         resolve_escape_sequences = false;
2429
2430         position_t const pos = pp_token.base.pos;
2431         do {
2432                 if (info.had_whitespace && obstack_object_size(&pp_obstack) != 0)
2433                         obstack_1grow(&pp_obstack, ' ');
2434
2435                 switch (pp_token.kind) {
2436                 case T_NUMBER: {
2437                         string_t const *const str = &pp_token.literal.string;
2438                         obstack_grow(&pp_obstack, str->begin, str->size);
2439                         break;
2440                 }
2441
2442                 {
2443                         char delim;
2444                 case T_STRING_LITERAL:     delim =  '"'; goto string;
2445                 case T_CHARACTER_CONSTANT: delim = '\''; goto string;
2446 string:;
2447                         string_t const *const str = &pp_token.literal.string;
2448                         char     const *const enc = get_string_encoding_prefix(str->encoding);
2449                         obstack_printf(&pp_obstack, "%s%c%s%c", enc, delim, str->begin, delim);
2450                         break;
2451                 }
2452
2453                 default: {
2454                         char const *const str = pp_token.base.symbol->string;
2455                         obstack_grow(&pp_obstack, str, strlen(str));
2456                         break;
2457                 }
2458                 }
2459
2460                 next_input_token();
2461         } while (!info.at_line_begin);
2462
2463         resolve_escape_sequences = old_resolve_escape_sequences;
2464
2465         obstack_1grow(&pp_obstack, '\0');
2466         char *const str = obstack_finish(&pp_obstack);
2467         errorf(&pos, "#%s", str);
2468         obstack_free(&pp_obstack, str);
2469 }
2470
2471 static void parse_preprocessing_directive(void)
2472 {
2473         eat_token('#');
2474
2475         if (info.at_line_begin) {
2476                 /* empty directive */
2477                 return;
2478         }
2479
2480         if (pp_token.base.symbol) {
2481                 switch (pp_token.base.symbol->pp_ID) {
2482                 case TP_define:       parse_define_directive();            break;
2483                 case TP_else:         parse_else_directive();              break;
2484                 case TP_endif:        parse_endif_directive();             break;
2485                 case TP_error:        parse_error_directive();             break;
2486                 case TP_ifdef:        parse_ifdef_ifndef_directive(true);  break;
2487                 case TP_ifndef:       parse_ifdef_ifndef_directive(false); break;
2488                 case TP_include:      parse_include_directive(false);      break;
2489                 case TP_include_next: parse_include_directive(true);       break;
2490                 case TP_line:         next_input_token(); goto line_directive;
2491                 case TP_pragma:       parse_pragma_directive();            break;
2492                 case TP_undef:        parse_undef_directive();             break;
2493                 default:              goto skip;
2494                 }
2495         } else if (pp_token.kind == T_NUMBER) {
2496 line_directive:
2497                 parse_line_directive();
2498         } else {
2499 skip:
2500                 if (!skip_mode) {
2501                         errorf(&pp_token.base.pos, "invalid preprocessing directive #%K", &pp_token);
2502                 }
2503                 eat_pp_directive();
2504         }
2505
2506         assert(info.at_line_begin);
2507 }
2508
2509 static void finish_current_argument(void)
2510 {
2511         if (current_argument == NULL)
2512                 return;
2513         size_t size = obstack_object_size(&pp_obstack);
2514         current_argument->list_len   = size/sizeof(current_argument->token_list[0]);
2515         current_argument->token_list = obstack_finish(&pp_obstack);
2516 }
2517
2518 void next_preprocessing_token(void)
2519 {
2520 restart:
2521         if (!expand_next()) {
2522                 do {
2523                         next_input_token();
2524                         while (pp_token.kind == '#' && info.at_line_begin) {
2525                                 parse_preprocessing_directive();
2526                         }
2527                 } while (skip_mode && pp_token.kind != T_EOF);
2528         }
2529
2530         const token_kind_t kind = pp_token.kind;
2531         if (current_call == NULL || argument_expanding != NULL) {
2532                 symbol_t *const symbol = pp_token.base.symbol;
2533                 if (symbol) {
2534                         if (kind == T_MACRO_PARAMETER) {
2535                                 assert(current_expansion != NULL);
2536                                 start_expanding(pp_token.macro_parameter.def);
2537                                 goto restart;
2538                         }
2539
2540                         pp_definition_t *const pp_definition = symbol->pp_definition;
2541                         if (pp_definition != NULL && !pp_definition->is_expanding) {
2542                                 if (pp_definition->has_parameters) {
2543
2544                                         /* check if next token is a '(' */
2545                                         whitespace_info_t old_info   = info;
2546                                         token_kind_t      next_token = peek_expansion();
2547                                         if (next_token == T_EOF) {
2548                                                 info.at_line_begin  = false;
2549                                                 info.had_whitespace = false;
2550                                                 skip_whitespace();
2551                                                 if (input.c == '(') {
2552                                                         next_token = '(';
2553                                                 }
2554                                         }
2555
2556                                         if (next_token == '(') {
2557                                                 if (current_expansion == NULL)
2558                                                         expansion_pos = pp_token.base.pos;
2559                                                 next_preprocessing_token();
2560                                                 assert(pp_token.kind == '(');
2561
2562                                                 pp_definition->parent_expansion = current_expansion;
2563                                                 current_call              = pp_definition;
2564                                                 current_call->expand_pos  = 0;
2565                                                 current_call->expand_info = old_info;
2566                                                 if (current_call->n_parameters > 0) {
2567                                                         current_argument = &current_call->parameters[0];
2568                                                         assert(argument_brace_count == 0);
2569                                                 }
2570                                                 goto restart;
2571                                         } else {
2572                                                 /* skip_whitespaces() skipped newlines and whitespace,
2573                                                  * remember results for next token */
2574                                                 next_info = info;
2575                                                 info      = old_info;
2576                                                 return;
2577                                         }
2578                                 } else {
2579                                         if (current_expansion == NULL)
2580                                                 expansion_pos = pp_token.base.pos;
2581                                         start_expanding(pp_definition);
2582                                         goto restart;
2583                                 }
2584                         }
2585                 }
2586         }
2587
2588         if (current_call != NULL) {
2589                 /* current_call != NULL */
2590                 if (kind == '(') {
2591                         ++argument_brace_count;
2592                 } else if (kind == ')') {
2593                         if (argument_brace_count > 0) {
2594                                 --argument_brace_count;
2595                         } else {
2596                                 finish_current_argument();
2597                                 assert(kind == ')');
2598                                 start_expanding(current_call);
2599                                 info = current_call->expand_info;
2600                                 current_call     = NULL;
2601                                 current_argument = NULL;
2602                                 goto restart;
2603                         }
2604                 } else if (kind == ',' && argument_brace_count == 0) {
2605                         finish_current_argument();
2606                         current_call->expand_pos++;
2607                         if (current_call->expand_pos >= current_call->n_parameters) {
2608                                 errorf(&pp_token.base.pos,
2609                                            "too many arguments passed for macro '%Y'",
2610                                            current_call->symbol);
2611                                 current_argument = NULL;
2612                         } else {
2613                                 current_argument
2614                                         = &current_call->parameters[current_call->expand_pos];
2615                         }
2616                         goto restart;
2617                 } else if (kind == T_MACRO_PARAMETER) {
2618                         /* parameters have to be fully expanded before being used as
2619                          * parameters for another macro-call */
2620                         assert(current_expansion != NULL);
2621                         pp_definition_t *argument = pp_token.macro_parameter.def;
2622                         argument_expanding = argument;
2623                         start_expanding(argument);
2624                         goto restart;
2625                 } else if (kind == T_EOF) {
2626                         errorf(&expansion_pos,
2627                                "reached end of file while parsing arguments for '%Y'",
2628                                current_call->symbol);
2629                         return;
2630                 }
2631                 if (current_argument != NULL) {
2632                         saved_token_t saved;
2633                         saved.token = pp_token;
2634                         saved.had_whitespace = info.had_whitespace;
2635                         obstack_grow(&pp_obstack, &saved, sizeof(saved));
2636                 }
2637                 goto restart;
2638         }
2639 }
2640
2641 void append_include_path(searchpath_t *paths, const char *path)
2642 {
2643         searchpath_entry_t *entry = OALLOCZ(&config_obstack, searchpath_entry_t);
2644         entry->path           = path;
2645         entry->is_system_path = paths->is_system_path;
2646
2647         *paths->anchor = entry;
2648         paths->anchor  = &entry->next;
2649 }
2650
2651 static void append_env_paths(searchpath_t *paths, const char *envvar)
2652 {
2653         const char *val = getenv(envvar);
2654         if (val != NULL && *val != '\0') {
2655                 const char *begin = val;
2656                 const char *c;
2657                 do {
2658                         c = begin;
2659                         while (*c != '\0' && *c != ':')
2660                                 ++c;
2661
2662                         size_t len = c-begin;
2663                         if (len == 0) {
2664                                 /* use "." for gcc compatibility (Matze: I would expect that
2665                                  * nothing happens for an empty entry...) */
2666                                 append_include_path(paths, ".");
2667                         } else {
2668                                 char *const string = obstack_copy0(&config_obstack, begin, len);
2669                                 append_include_path(paths, string);
2670                         }
2671
2672                         begin = c+1;
2673                         /* skip : */
2674                         if (*begin == ':')
2675                                 ++begin;
2676                 } while (*c != '\0');
2677         }
2678 }
2679
2680 static void append_searchpath(searchpath_t *path, const searchpath_t *append)
2681 {
2682         *path->anchor = append->first;
2683 }
2684
2685 static void setup_include_path(void)
2686 {
2687         /* built-in paths */
2688         append_include_path(&system_searchpath, "/usr/include");
2689
2690         /* parse environment variable */
2691         append_env_paths(&bracket_searchpath, "CPATH");
2692         append_env_paths(&system_searchpath,
2693                          c_mode & _CXX ? "CPLUS_INCLUDE_PATH" : "C_INCLUDE_PATH");
2694
2695         /* append system search path to bracket searchpath */
2696         append_searchpath(&system_searchpath,  &after_searchpath);
2697         append_searchpath(&bracket_searchpath, &system_searchpath);
2698         append_searchpath(&quote_searchpath, &bracket_searchpath);
2699 }
2700
2701 static void input_error(unsigned const delta_lines, unsigned const delta_cols, char const *const message)
2702 {
2703         position_t pos = pp_token.base.pos;
2704         pos.lineno += delta_lines;
2705         pos.colno  += delta_cols;
2706         errorf(&pos, "%s", message);
2707 }
2708
2709 void init_include_paths(void)
2710 {
2711         obstack_init(&config_obstack);
2712 }
2713
2714 void init_preprocessor(void)
2715 {
2716         init_symbols();
2717
2718         obstack_init(&pp_obstack);
2719         obstack_init(&input_obstack);
2720         strset_init(&stringset);
2721
2722         setup_include_path();
2723
2724         set_input_error_callback(input_error);
2725 }
2726
2727 void exit_preprocessor(void)
2728 {
2729         obstack_free(&input_obstack, NULL);
2730         obstack_free(&pp_obstack, NULL);
2731         obstack_free(&config_obstack, NULL);
2732
2733         strset_destroy(&stringset);
2734 }
2735
2736 int pptest_main(int argc, char **argv);
2737 int pptest_main(int argc, char **argv)
2738 {
2739         init_symbol_table();
2740         init_include_paths();
2741         init_preprocessor();
2742         init_tokens();
2743
2744         error_on_unknown_chars   = false;
2745         resolve_escape_sequences = false;
2746
2747         /* simplistic commandline parser */
2748         const char *filename = NULL;
2749         const char *output = NULL;
2750         for (int i = 1; i < argc; ++i) {
2751                 const char *opt = argv[i];
2752                 if (streq(opt, "-I")) {
2753                         append_include_path(&bracket_searchpath, argv[++i]);
2754                         continue;
2755                 } else if (streq(opt, "-E")) {
2756                         /* ignore */
2757                 } else if (streq(opt, "-o")) {
2758                         output = argv[++i];
2759                         continue;
2760                 } else if (opt[0] == '-') {
2761                         fprintf(stderr, "Unknown option '%s'\n", opt);
2762                 } else {
2763                         if (filename != NULL)
2764                                 fprintf(stderr, "Multiple inputs not supported\n");
2765                         filename = argv[i];
2766                 }
2767         }
2768         if (filename == NULL) {
2769                 fprintf(stderr, "No input specified\n");
2770                 return 1;
2771         }
2772
2773         if (output == NULL) {
2774                 out = stdout;
2775         } else {
2776                 out = fopen(output, "w");
2777                 if (out == NULL) {
2778                         fprintf(stderr, "Couldn't open output '%s'\n", output);
2779                         return 1;
2780                 }
2781         }
2782
2783         /* just here for gcc compatibility */
2784         fprintf(out, "# 1 \"%s\"\n", filename);
2785         fprintf(out, "# 1 \"<built-in>\"\n");
2786         fprintf(out, "# 1 \"<command-line>\"\n");
2787
2788         FILE *file = fopen(filename, "r");
2789         if (file == NULL) {
2790                 fprintf(stderr, "Couldn't open input '%s'\n", filename);
2791                 return 1;
2792         }
2793         switch_pp_input(file, filename, NULL, false);
2794
2795         for (;;) {
2796                 next_preprocessing_token();
2797                 if (pp_token.kind == T_EOF)
2798                         break;
2799                 emit_pp_token();
2800         }
2801
2802         fputc('\n', out);
2803         check_unclosed_conditionals();
2804         fclose(close_pp_input());
2805         if (out != stdout)
2806                 fclose(out);
2807
2808         exit_tokens();
2809         exit_preprocessor();
2810         exit_symbol_table();
2811
2812         return 0;
2813 }