nsz Git - cparser/blob - preprocessor.c

   1 #include <config.h>
   2
   3 #include <assert.h>
   4 #include <errno.h>
   5 #include <string.h>
   6 #include <stdbool.h>
   7 #include <ctype.h>
   8
   9 #include "token_t.h"
  10 #include "symbol_t.h"
  11 #include "adt/util.h"
  12 #include "adt/error.h"
  13 #include "lang_features.h"
  14 #include "diagnostic.h"
  15 #include "string_rep.h"
  16 #include "input.h"
  17
  18 #define MAX_PUTBACK 3
  19 #define INCLUDE_LIMIT 199  /* 199 is for gcc "compatibility" */
  20
  21 struct pp_argument_t {
  22         size_t   list_len;
  23         token_t *token_list;
  24 };
  25
  26 struct pp_definition_t {
  27         symbol_t          *symbol;
  28         source_position_t  source_position;
  29         pp_definition_t   *parent_expansion;
  30         size_t             expand_pos;
  31         bool               is_variadic    : 1;
  32         bool               is_expanding   : 1;
  33         bool               has_parameters : 1;
  34         size_t             n_parameters;
  35         symbol_t          *parameters;
  36
  37         /* replacement */
  38         size_t             list_len;
  39         token_t           *token_list;
  40
  41 };
  42
  43 typedef struct pp_conditional_t pp_conditional_t;
  44 struct pp_conditional_t {
  45         source_position_t  source_position;
  46         bool               condition;
  47         bool               in_else;
  48         bool               skip; /**< conditional in skip mode (then+else gets skipped) */
  49         pp_conditional_t  *parent;
  50 };
  51
  52 typedef struct pp_input_t pp_input_t;
  53 struct pp_input_t {
  54         FILE              *file;
  55         input_t           *input;
  56         utf32              c;
  57         utf32              buf[1024+MAX_PUTBACK];
  58         const utf32       *bufend;
  59         const utf32       *bufpos;
  60         source_position_t  position;
  61         pp_input_t        *parent;
  62         unsigned           output_line;
  63 };
  64
  65 /** additional info about the current token */
  66 typedef struct add_token_info_t {
  67         /** whitespace from beginning of line to the token */
  68         unsigned whitespace;
  69         /** there has been any whitespace before the token */
  70         bool     had_whitespace;
  71         /** the token is at the beginning of the line */
  72         bool     at_line_begin;
  73 } add_token_info_t;
  74
  75 static pp_input_t input;
  76
  77 static pp_input_t     *input_stack;
  78 static unsigned        n_inputs;
  79 static struct obstack  input_obstack;
  80
  81 static pp_conditional_t *conditional_stack;
  82
  83 static token_t           pp_token;
  84 static bool              resolve_escape_sequences = false;
  85 static bool              ignore_unknown_chars     = true;
  86 static bool              in_pp_directive;
  87 static bool              skip_mode;
  88 static FILE             *out;
  89 static struct obstack    pp_obstack;
  90 static const char       *printed_input_name = NULL;
  91 static source_position_t expansion_pos;
  92 static pp_definition_t  *current_expansion  = NULL;
  93 static preprocessor_token_type_t last_token = TP_ERROR;
  94
  95 static add_token_info_t  info;
  96
  97 static inline void next_char(void);
  98 static void next_preprocessing_token(void);
  99 static void print_line_directive(const source_position_t *pos, const char *add);
 100
 101 static bool open_input(const char *filename)
 102 {
 103         FILE *file = fopen(filename, "r");
 104         if (file == NULL)
 105                 return false;
 106
 107         input.file                = file;
 108         input.input               = input_from_stream(file, NULL);
 109         input.bufend              = NULL;
 110         input.bufpos              = NULL;
 111         input.output_line         = 0;
 112         input.position.input_name = filename;
 113         input.position.lineno     = 1;
 114
 115         /* indicate that we're at a new input */
 116         print_line_directive(&input.position, input_stack != NULL ? "1" : NULL);
 117
 118         /* place a virtual '\n' so we realize we're at line begin */
 119         input.position.lineno     = 0;
 120         input.c = '\n';
 121         next_preprocessing_token();
 122
 123         return true;
 124 }
 125
 126 static void close_input(void)
 127 {
 128         input_free(input.input);
 129         assert(input.file != NULL);
 130
 131         fclose(input.file);
 132         input.input  = NULL;
 133         input.file   = NULL;
 134         input.bufend = NULL;
 135         input.bufpos = NULL;
 136         input.c      = EOF;
 137 }
 138
 139 static void push_input(void)
 140 {
 141         pp_input_t *saved_input
 142                 = obstack_alloc(&input_obstack, sizeof(*saved_input));
 143
 144         memcpy(saved_input, &input, sizeof(*saved_input));
 145
 146         /* adjust buffer positions */
 147         if (input.bufpos != NULL)
 148                 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
 149         if (input.bufend != NULL)
 150                 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
 151
 152         saved_input->parent = input_stack;
 153         input_stack         = saved_input;
 154         ++n_inputs;
 155 }
 156
 157 static void pop_restore_input(void)
 158 {
 159         assert(n_inputs > 0);
 160         assert(input_stack != NULL);
 161
 162         pp_input_t *saved_input = input_stack;
 163
 164         memcpy(&input, saved_input, sizeof(input));
 165         input.parent = NULL;
 166
 167         /* adjust buffer positions */
 168         if (saved_input->bufpos != NULL)
 169                 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
 170         if (saved_input->bufend != NULL)
 171                 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
 172
 173         input_stack = saved_input->parent;
 174         obstack_free(&input_obstack, saved_input);
 175         --n_inputs;
 176 }
 177
 178 /**
 179  * Prints a parse error message at the current token.
 180  *
 181  * @param msg   the error message
 182  */
 183 static void parse_error(const char *msg)
 184 {
 185         errorf(&pp_token.source_position,  "%s", msg);
 186 }
 187
 188 static inline void next_real_char(void)
 189 {
 190         assert(input.bufpos <= input.bufend);
 191         if (input.bufpos >= input.bufend) {
 192                 size_t n = decode(input.input, input.buf + MAX_PUTBACK,
 193                                   sizeof(input.buf)/sizeof(input.buf[0]) - MAX_PUTBACK);
 194                 if (n == 0) {
 195                         input.c = EOF;
 196                         return;
 197                 }
 198                 input.bufpos = input.buf + MAX_PUTBACK;
 199                 input.bufend = input.bufpos + n;
 200         }
 201         input.c = *input.bufpos++;
 202         ++input.position.colno;
 203 }
 204
 205 /**
 206  * Put a character back into the buffer.
 207  *
 208  * @param pc  the character to put back
 209  */
 210 static inline void put_back(utf32 const pc)
 211 {
 212         assert(input.bufpos > input.buf);
 213         *(--input.bufpos - input.buf + input.buf) = (char) pc;
 214         --input.position.colno;
 215 }
 216
 217 #define MATCH_NEWLINE(code)                   \
 218         case '\r':                                \
 219                 next_char();                          \
 220                 if (input.c == '\n') {                \
 221         case '\n':                                \
 222                         next_char();                      \
 223                 }                                     \
 224                 info.whitespace = 0;                  \
 225                 ++input.position.lineno;              \
 226                 input.position.colno = 1;             \
 227                 code
 228
 229 #define eat(c_type) (assert(input.c == c_type), next_char())
 230
 231 static void maybe_concat_lines(void)
 232 {
 233         eat('\\');
 234
 235         switch (input.c) {
 236         MATCH_NEWLINE(
 237                 return;
 238         )
 239
 240         default:
 241                 break;
 242         }
 243
 244         put_back(input.c);
 245         input.c = '\\';
 246 }
 247
 248 /**
 249  * Set c to the next input character, ie.
 250  * after expanding trigraphs.
 251  */
 252 static inline void next_char(void)
 253 {
 254         next_real_char();
 255
 256         /* filter trigraphs and concatenated lines */
 257         if (UNLIKELY(input.c == '\\')) {
 258                 maybe_concat_lines();
 259                 goto end_of_next_char;
 260         }
 261
 262         if (LIKELY(input.c != '?'))
 263                 goto end_of_next_char;
 264
 265         next_real_char();
 266         if (LIKELY(input.c != '?')) {
 267                 put_back(input.c);
 268                 input.c = '?';
 269                 goto end_of_next_char;
 270         }
 271
 272         next_real_char();
 273         switch (input.c) {
 274         case '=': input.c = '#'; break;
 275         case '(': input.c = '['; break;
 276         case '/': input.c = '\\'; maybe_concat_lines(); break;
 277         case ')': input.c = ']'; break;
 278         case '\'': input.c = '^'; break;
 279         case '<': input.c = '{'; break;
 280         case '!': input.c = '|'; break;
 281         case '>': input.c = '}'; break;
 282         case '-': input.c = '~'; break;
 283         default:
 284                 put_back(input.c);
 285                 put_back('?');
 286                 input.c = '?';
 287                 break;
 288         }
 289
 290 end_of_next_char:;
 291 #ifdef DEBUG_CHARS
 292         printf("nchar '%c'\n", input.c);
 293 #endif
 294 }
 295
 296
 297
 298 /**
 299  * Returns true if the given char is a octal digit.
 300  *
 301  * @param char  the character to check
 302  */
 303 static inline bool is_octal_digit(int chr)
 304 {
 305         switch (chr) {
 306         case '0':
 307         case '1':
 308         case '2':
 309         case '3':
 310         case '4':
 311         case '5':
 312         case '6':
 313         case '7':
 314                 return true;
 315         default:
 316                 return false;
 317         }
 318 }
 319
 320 /**
 321  * Returns the value of a digit.
 322  * The only portable way to do it ...
 323  */
 324 static int digit_value(int digit)
 325 {
 326         switch (digit) {
 327         case '0': return 0;
 328         case '1': return 1;
 329         case '2': return 2;
 330         case '3': return 3;
 331         case '4': return 4;
 332         case '5': return 5;
 333         case '6': return 6;
 334         case '7': return 7;
 335         case '8': return 8;
 336         case '9': return 9;
 337         case 'a':
 338         case 'A': return 10;
 339         case 'b':
 340         case 'B': return 11;
 341         case 'c':
 342         case 'C': return 12;
 343         case 'd':
 344         case 'D': return 13;
 345         case 'e':
 346         case 'E': return 14;
 347         case 'f':
 348         case 'F': return 15;
 349         default:
 350                 panic("wrong character given");
 351         }
 352 }
 353
 354 /**
 355  * Parses an octal character sequence.
 356  *
 357  * @param first_digit  the already read first digit
 358  */
 359 static int parse_octal_sequence(const int first_digit)
 360 {
 361         assert(is_octal_digit(first_digit));
 362         int value = digit_value(first_digit);
 363         if (!is_octal_digit(input.c)) return value;
 364         value = 8 * value + digit_value(input.c);
 365         next_char();
 366         if (!is_octal_digit(input.c)) return value;
 367         value = 8 * value + digit_value(input.c);
 368         next_char();
 369
 370         if (char_is_signed) {
 371                 return (signed char) value;
 372         } else {
 373                 return (unsigned char) value;
 374         }
 375 }
 376
 377 /**
 378  * Parses a hex character sequence.
 379  */
 380 static int parse_hex_sequence(void)
 381 {
 382         int value = 0;
 383         while (isxdigit(input.c)) {
 384                 value = 16 * value + digit_value(input.c);
 385                 next_char();
 386         }
 387
 388         if (char_is_signed) {
 389                 return (signed char) value;
 390         } else {
 391                 return (unsigned char) value;
 392         }
 393 }
 394
 395 /**
 396  * Parse an escape sequence.
 397  */
 398 static int parse_escape_sequence(void)
 399 {
 400         eat('\\');
 401
 402         int ec = input.c;
 403         next_char();
 404
 405         switch (ec) {
 406         case '"':  return '"';
 407         case '\'': return '\'';
 408         case '\\': return '\\';
 409         case '?': return '\?';
 410         case 'a': return '\a';
 411         case 'b': return '\b';
 412         case 'f': return '\f';
 413         case 'n': return '\n';
 414         case 'r': return '\r';
 415         case 't': return '\t';
 416         case 'v': return '\v';
 417         case 'x':
 418                 return parse_hex_sequence();
 419         case '0':
 420         case '1':
 421         case '2':
 422         case '3':
 423         case '4':
 424         case '5':
 425         case '6':
 426         case '7':
 427                 return parse_octal_sequence(ec);
 428         case EOF:
 429                 parse_error("reached end of file while parsing escape sequence");
 430                 return EOF;
 431         default:
 432                 parse_error("unknown escape sequence");
 433                 return EOF;
 434         }
 435 }
 436
 437 static void grow_symbol(utf32 const tc)
 438 {
 439         struct obstack *const o  = &symbol_obstack;
 440         if (tc < 0x80U) {
 441                 obstack_1grow(o, tc);
 442         } else if (tc < 0x800) {
 443                 obstack_1grow(o, 0xC0 | (tc >> 6));
 444                 obstack_1grow(o, 0x80 | (tc & 0x3F));
 445         } else if (tc < 0x10000) {
 446                 obstack_1grow(o, 0xE0 | ( tc >> 12));
 447                 obstack_1grow(o, 0x80 | ((tc >>  6) & 0x3F));
 448                 obstack_1grow(o, 0x80 | ( tc        & 0x3F));
 449         } else {
 450                 obstack_1grow(o, 0xF0 | ( tc >> 18));
 451                 obstack_1grow(o, 0x80 | ((tc >> 12) & 0x3F));
 452                 obstack_1grow(o, 0x80 | ((tc >>  6) & 0x3F));
 453                 obstack_1grow(o, 0x80 | ( tc        & 0x3F));
 454         }
 455 }
 456
 457 static string_t identify_string(char *string, size_t len)
 458 {
 459         /* TODO hash */
 460 #if 0
 461         const char *result = strset_insert(&stringset, concat);
 462         if (result != concat) {
 463                 obstack_free(&symbol_obstack, concat);
 464         }
 465 #else
 466         const char *result = string;
 467 #endif
 468         return (string_t) {result, len};
 469 }
 470
 471 static void parse_string_literal(void)
 472 {
 473         const unsigned start_linenr = input.position.lineno;
 474
 475         eat('"');
 476
 477         while (true) {
 478                 switch (input.c) {
 479                 case '\\': {
 480                         utf32 tc;
 481                         if (resolve_escape_sequences) {
 482                                 tc = parse_escape_sequence();
 483                                 obstack_1grow(&symbol_obstack, (char) tc);
 484                         } else {
 485                                 obstack_1grow(&symbol_obstack, (char) input.c);
 486                                 next_char();
 487                                 obstack_1grow(&symbol_obstack, (char) input.c);
 488                                 next_char();
 489                         }
 490                         break;
 491                 }
 492
 493                 case EOF: {
 494                         source_position_t source_position;
 495                         source_position.input_name = pp_token.source_position.input_name;
 496                         source_position.lineno     = start_linenr;
 497                         errorf(&source_position, "string has no end");
 498                         pp_token.type = TP_ERROR;
 499                         return;
 500                 }
 501
 502                 case '"':
 503                         next_char();
 504                         goto end_of_string;
 505
 506                 default:
 507                         grow_symbol(input.c);
 508                         next_char();
 509                         break;
 510                 }
 511         }
 512
 513 end_of_string:
 514         /* add finishing 0 to the string */
 515         obstack_1grow(&symbol_obstack, '\0');
 516         const size_t size   = (size_t)obstack_object_size(&symbol_obstack);
 517         char *const  string = obstack_finish(&symbol_obstack);
 518
 519         pp_token.type    = TP_STRING_LITERAL;
 520         pp_token.literal = identify_string(string, size);
 521 }
 522
 523 /**
 524  * Parse a wide string literal and set lexer_token.
 525  */
 526 static void parse_wide_string_literal(void)
 527 {
 528         parse_string_literal();
 529         if (pp_token.type == TP_STRING_LITERAL)
 530                 pp_token.type = TP_WIDE_STRING_LITERAL;
 531 }
 532
 533 static void parse_wide_character_constant(void)
 534 {
 535         eat('\'');
 536
 537         while (true) {
 538                 switch (input.c) {
 539                 case '\\': {
 540                         const utf32 tc = parse_escape_sequence();
 541                         grow_symbol(tc);
 542                         break;
 543                 }
 544
 545                 MATCH_NEWLINE(
 546                         parse_error("newline while parsing character constant");
 547                         break;
 548                 )
 549
 550                 case '\'':
 551                         next_char();
 552                         goto end_of_wide_char_constant;
 553
 554                 case EOF:
 555                         parse_error("EOF while parsing character constant");
 556                         pp_token.type = TP_ERROR;
 557                         return;
 558
 559                 default:
 560                         grow_symbol(input.c);
 561                         next_char();
 562                         break;
 563                 }
 564         }
 565
 566 end_of_wide_char_constant:
 567         obstack_1grow(&symbol_obstack, '\0');
 568         size_t  size = (size_t) obstack_object_size(&symbol_obstack)-1;
 569         char   *string = obstack_finish(&symbol_obstack);
 570         pp_token.type       = TP_WIDE_CHARACTER_CONSTANT;
 571         pp_token.literal = identify_string(string, size);
 572
 573         if (size == 0) {
 574                 parse_error("empty character constant");
 575         }
 576 }
 577
 578 static void parse_character_constant(void)
 579 {
 580         const unsigned start_linenr = input.position.lineno;
 581
 582         eat('\'');
 583
 584         int tc;
 585         while (true) {
 586                 switch (input.c) {
 587                 case '\\':
 588                         tc = parse_escape_sequence();
 589                         obstack_1grow(&symbol_obstack, (char) tc);
 590                         break;
 591
 592                 MATCH_NEWLINE(
 593                         parse_error("newline while parsing character constant");
 594                         break;
 595                 )
 596
 597                 case EOF: {
 598                         source_position_t source_position;
 599                         source_position.input_name = pp_token.source_position.input_name;
 600                         source_position.lineno     = start_linenr;
 601                         errorf(&source_position, "EOF while parsing character constant");
 602                         pp_token.type = TP_ERROR;
 603                         return;
 604                 }
 605
 606                 case '\'':
 607                         next_char();
 608                         goto end_of_char_constant;
 609
 610                 default:
 611                         obstack_1grow(&symbol_obstack, (char) input.c);
 612                         next_char();
 613                         break;
 614
 615                 }
 616         }
 617
 618 end_of_char_constant:;
 619         const size_t      size   = (size_t)obstack_object_size(&symbol_obstack);
 620         const char *const string = obstack_finish(&symbol_obstack);
 621
 622         pp_token.type          = TP_CHARACTER_CONSTANT;
 623         pp_token.literal.begin = string;
 624         pp_token.literal.size  = size;
 625 }
 626
 627 #define SYMBOL_CHARS_WITHOUT_E_P \
 628         case 'a': \
 629         case 'b': \
 630         case 'c': \
 631         case 'd': \
 632         case 'f': \
 633         case 'g': \
 634         case 'h': \
 635         case 'i': \
 636         case 'j': \
 637         case 'k': \
 638         case 'l': \
 639         case 'm': \
 640         case 'n': \
 641         case 'o': \
 642         case 'q': \
 643         case 'r': \
 644         case 's': \
 645         case 't': \
 646         case 'u': \
 647         case 'v': \
 648         case 'w': \
 649         case 'x': \
 650         case 'y': \
 651         case 'z': \
 652         case 'A': \
 653         case 'B': \
 654         case 'C': \
 655         case 'D': \
 656         case 'F': \
 657         case 'G': \
 658         case 'H': \
 659         case 'I': \
 660         case 'J': \
 661         case 'K': \
 662         case 'L': \
 663         case 'M': \
 664         case 'N': \
 665         case 'O': \
 666         case 'Q': \
 667         case 'R': \
 668         case 'S': \
 669         case 'T': \
 670         case 'U': \
 671         case 'V': \
 672         case 'W': \
 673         case 'X': \
 674         case 'Y': \
 675         case 'Z': \
 676         case '_':
 677
 678 #define SYMBOL_CHARS \
 679         SYMBOL_CHARS_WITHOUT_E_P \
 680         case 'e': \
 681         case 'p': \
 682         case 'E': \
 683         case 'P':
 684
 685 #define DIGITS \
 686         case '0':  \
 687         case '1':  \
 688         case '2':  \
 689         case '3':  \
 690         case '4':  \
 691         case '5':  \
 692         case '6':  \
 693         case '7':  \
 694         case '8':  \
 695         case '9':
 696
 697 /**
 698  * returns next final token from a preprocessor macro expansion
 699  */
 700 static void expand_next(void)
 701 {
 702         assert(current_expansion != NULL);
 703
 704         pp_definition_t *definition = current_expansion;
 705
 706 restart:
 707         if (definition->list_len == 0
 708                         || definition->expand_pos >= definition->list_len) {
 709                 /* we're finished with the current macro, move up 1 level in the
 710                  * expansion stack */
 711                 pp_definition_t *parent = definition->parent_expansion;
 712                 definition->parent_expansion = NULL;
 713                 definition->is_expanding     = false;
 714
 715                 /* it was the outermost expansion, parse normal pptoken */
 716                 if (parent == NULL) {
 717                         current_expansion = NULL;
 718                         next_preprocessing_token();
 719                         return;
 720                 }
 721                 definition        = parent;
 722                 current_expansion = definition;
 723                 goto restart;
 724         }
 725         pp_token = definition->token_list[definition->expand_pos];
 726         pp_token.source_position = expansion_pos;
 727         ++definition->expand_pos;
 728
 729         if (pp_token.type != TP_IDENTIFIER)
 730                 return;
 731
 732         /* if it was an identifier then we might need to expand again */
 733         pp_definition_t *symbol_definition = pp_token.symbol->pp_definition;
 734         if (symbol_definition != NULL && !symbol_definition->is_expanding) {
 735                 symbol_definition->parent_expansion = definition;
 736                 symbol_definition->expand_pos       = 0;
 737                 symbol_definition->is_expanding     = true;
 738                 definition                          = symbol_definition;
 739                 current_expansion                   = definition;
 740                 goto restart;
 741         }
 742 }
 743
 744 static void skip_line_comment(void)
 745 {
 746         while (true) {
 747                 switch (input.c) {
 748                 case EOF:
 749                         return;
 750
 751                 case '\r':
 752                 case '\n':
 753                         return;
 754
 755                 default:
 756                         next_char();
 757                         break;
 758                 }
 759         }
 760 }
 761
 762 static void skip_multiline_comment(void)
 763 {
 764         unsigned start_linenr = input.position.lineno;
 765         while (true) {
 766                 switch (input.c) {
 767                 case '/':
 768                         next_char();
 769                         if (input.c == '*') {
 770                                 /* TODO: nested comment, warn here */
 771                         }
 772                         break;
 773                 case '*':
 774                         next_char();
 775                         if (input.c == '/') {
 776                                 next_char();
 777                                 info.whitespace += input.position.colno-1;
 778                                 return;
 779                         }
 780                         break;
 781
 782                 MATCH_NEWLINE(
 783                         info.at_line_begin |= !in_pp_directive;
 784                         break;
 785                 )
 786
 787                 case EOF: {
 788                         source_position_t source_position;
 789                         source_position.input_name = pp_token.source_position.input_name;
 790                         source_position.lineno     = start_linenr;
 791                         errorf(&source_position, "at end of file while looking for comment end");
 792                         return;
 793                 }
 794
 795                 default:
 796                         next_char();
 797                         break;
 798                 }
 799         }
 800 }
 801
 802 static void skip_whitespace(void)
 803 {
 804         while (true) {
 805                 switch (input.c) {
 806                 case ' ':
 807                 case '\t':
 808                         next_char();
 809                         continue;
 810
 811                 MATCH_NEWLINE(
 812                         info.at_line_begin = true;
 813                         return;
 814                 )
 815
 816                 case '/':
 817                         next_char();
 818                         if (input.c == '/') {
 819                                 next_char();
 820                                 skip_line_comment();
 821                                 continue;
 822                         } else if (input.c == '*') {
 823                                 next_char();
 824                                 skip_multiline_comment();
 825                                 continue;
 826                         } else {
 827                                 put_back(input.c);
 828                                 input.c = '/';
 829                         }
 830                         return;
 831                 default:
 832                         return;
 833                 }
 834         }
 835 }
 836
 837 static void eat_pp(int type)
 838 {
 839         (void) type;
 840         assert(pp_token.type == type);
 841         next_preprocessing_token();
 842 }
 843
 844 static void parse_symbol(void)
 845 {
 846         obstack_1grow(&symbol_obstack, (char) input.c);
 847         next_char();
 848
 849         while (true) {
 850                 switch (input.c) {
 851                 DIGITS
 852                 SYMBOL_CHARS
 853                         obstack_1grow(&symbol_obstack, (char) input.c);
 854                         next_char();
 855                         break;
 856
 857                 default:
 858                         goto end_symbol;
 859                 }
 860         }
 861
 862 end_symbol:
 863         obstack_1grow(&symbol_obstack, '\0');
 864         char *string = obstack_finish(&symbol_obstack);
 865
 866         /* might be a wide string or character constant ( L"string"/L'c' ) */
 867         if (input.c == '"' && string[0] == 'L' && string[1] == '\0') {
 868                 obstack_free(&symbol_obstack, string);
 869                 parse_wide_string_literal();
 870                 return;
 871         } else if (input.c == '\'' && string[0] == 'L' && string[1] == '\0') {
 872                 obstack_free(&symbol_obstack, string);
 873                 parse_wide_character_constant();
 874                 return;
 875         }
 876
 877         symbol_t *symbol = symbol_table_insert(string);
 878
 879         pp_token.type   = symbol->pp_ID;
 880         pp_token.symbol = symbol;
 881
 882         /* we can free the memory from symbol obstack if we already had an entry in
 883          * the symbol table */
 884         if (symbol->string != string) {
 885                 obstack_free(&symbol_obstack, string);
 886         }
 887 }
 888
 889 static void parse_number(void)
 890 {
 891         obstack_1grow(&symbol_obstack, (char) input.c);
 892         next_char();
 893
 894         while (true) {
 895                 switch (input.c) {
 896                 case '.':
 897                 DIGITS
 898                 SYMBOL_CHARS_WITHOUT_E_P
 899                         obstack_1grow(&symbol_obstack, (char) input.c);
 900                         next_char();
 901                         break;
 902
 903                 case 'e':
 904                 case 'p':
 905                 case 'E':
 906                 case 'P':
 907                         obstack_1grow(&symbol_obstack, (char) input.c);
 908                         next_char();
 909                         if (input.c == '+' || input.c == '-') {
 910                                 obstack_1grow(&symbol_obstack, (char) input.c);
 911                                 next_char();
 912                         }
 913                         break;
 914
 915                 default:
 916                         goto end_number;
 917                 }
 918         }
 919
 920 end_number:
 921         obstack_1grow(&symbol_obstack, '\0');
 922         size_t  size   = obstack_object_size(&symbol_obstack);
 923         char   *string = obstack_finish(&symbol_obstack);
 924
 925         pp_token.type          = TP_NUMBER;
 926         pp_token.literal.begin = string;
 927         pp_token.literal.size  = size;
 928 }
 929
 930
 931 #define MAYBE_PROLOG                                       \
 932                         next_char();                                   \
 933                         while (true) {                                 \
 934                                 switch (input.c) {
 935
 936 #define MAYBE(ch, set_type)                                \
 937                                 case ch:                                   \
 938                                         next_char();                           \
 939                                         pp_token.type = set_type;              \
 940                                         return;
 941
 942 #define ELSE_CODE(code)                                    \
 943                                 default:                                   \
 944                                         code                                   \
 945                                         return;                                \
 946                                 }                                          \
 947                         }
 948
 949 #define ELSE(set_type)                                     \
 950                 ELSE_CODE(                                         \
 951                         pp_token.type = set_type;                      \
 952                 )
 953
 954 static void next_preprocessing_token(void)
 955 {
 956         if (current_expansion != NULL) {
 957                 expand_next();
 958                 return;
 959         }
 960
 961         info.at_line_begin  = false;
 962         info.had_whitespace = false;
 963 restart:
 964         pp_token.source_position = input.position;
 965         switch (input.c) {
 966         case ' ':
 967         case '\t':
 968                 ++info.whitespace;
 969                 info.had_whitespace = true;
 970                 next_char();
 971                 goto restart;
 972
 973         MATCH_NEWLINE(
 974                 info.at_line_begin = true;
 975                 info.had_whitespace = true;
 976                 goto restart;
 977         )
 978
 979         SYMBOL_CHARS
 980                 parse_symbol();
 981                 return;
 982
 983         DIGITS
 984                 parse_number();
 985                 return;
 986
 987         case '"':
 988                 parse_string_literal();
 989                 return;
 990
 991         case '\'':
 992                 parse_character_constant();
 993                 return;
 994
 995         case '.':
 996                 MAYBE_PROLOG
 997                         case '0':
 998                         case '1':
 999                         case '2':
1000                         case '3':
1001                         case '4':
1002                         case '5':
1003                         case '6':
1004                         case '7':
1005                         case '8':
1006                         case '9':
1007                                 put_back(input.c);
1008                                 input.c = '.';
1009                                 parse_number();
1010                                 return;
1011
1012                         case '.':
1013                                 MAYBE_PROLOG
1014                                 MAYBE('.', TP_DOTDOTDOT)
1015                                 ELSE_CODE(
1016                                         put_back(input.c);
1017                                         input.c = '.';
1018                                         pp_token.type = '.';
1019                                 )
1020                 ELSE('.')
1021         case '&':
1022                 MAYBE_PROLOG
1023                 MAYBE('&', TP_ANDAND)
1024                 MAYBE('=', TP_ANDEQUAL)
1025                 ELSE('&')
1026         case '*':
1027                 MAYBE_PROLOG
1028                 MAYBE('=', TP_ASTERISKEQUAL)
1029                 ELSE('*')
1030         case '+':
1031                 MAYBE_PROLOG
1032                 MAYBE('+', TP_PLUSPLUS)
1033                 MAYBE('=', TP_PLUSEQUAL)
1034                 ELSE('+')
1035         case '-':
1036                 MAYBE_PROLOG
1037                 MAYBE('>', TP_MINUSGREATER)
1038                 MAYBE('-', TP_MINUSMINUS)
1039                 MAYBE('=', TP_MINUSEQUAL)
1040                 ELSE('-')
1041         case '!':
1042                 MAYBE_PROLOG
1043                 MAYBE('=', TP_EXCLAMATIONMARKEQUAL)
1044                 ELSE('!')
1045         case '/':
1046                 MAYBE_PROLOG
1047                 MAYBE('=', TP_SLASHEQUAL)
1048                         case '*':
1049                                 next_char();
1050                                 info.had_whitespace = true;
1051                                 skip_multiline_comment();
1052                                 goto restart;
1053                         case '/':
1054                                 next_char();
1055                                 info.had_whitespace = true;
1056                                 skip_line_comment();
1057                                 goto restart;
1058                 ELSE('/')
1059         case '%':
1060                 MAYBE_PROLOG
1061                 MAYBE('>', '}')
1062                 MAYBE('=', TP_PERCENTEQUAL)
1063                         case ':':
1064                                 MAYBE_PROLOG
1065                                         case '%':
1066                                                 MAYBE_PROLOG
1067                                                 MAYBE(':', TP_HASHHASH)
1068                                                 ELSE_CODE(
1069                                                         put_back(input.c);
1070                                                         input.c = '%';
1071                                                         pp_token.type = '#';
1072                                                 )
1073                                 ELSE('#')
1074                 ELSE('%')
1075         case '<':
1076                 MAYBE_PROLOG
1077                 MAYBE(':', '[')
1078                 MAYBE('%', '{')
1079                 MAYBE('=', TP_LESSEQUAL)
1080                         case '<':
1081                                 MAYBE_PROLOG
1082                                 MAYBE('=', TP_LESSLESSEQUAL)
1083                                 ELSE(TP_LESSLESS)
1084                 ELSE('<')
1085         case '>':
1086                 MAYBE_PROLOG
1087                 MAYBE('=', TP_GREATEREQUAL)
1088                         case '>':
1089                                 MAYBE_PROLOG
1090                                 MAYBE('=', TP_GREATERGREATEREQUAL)
1091                                 ELSE(TP_GREATERGREATER)
1092                 ELSE('>')
1093         case '^':
1094                 MAYBE_PROLOG
1095                 MAYBE('=', TP_CARETEQUAL)
1096                 ELSE('^')
1097         case '|':
1098                 MAYBE_PROLOG
1099                 MAYBE('=', TP_PIPEEQUAL)
1100                 MAYBE('|', TP_PIPEPIPE)
1101                 ELSE('|')
1102         case ':':
1103                 MAYBE_PROLOG
1104                 MAYBE('>', ']')
1105                 ELSE(':')
1106         case '=':
1107                 MAYBE_PROLOG
1108                 MAYBE('=', TP_EQUALEQUAL)
1109                 ELSE('=')
1110         case '#':
1111                 MAYBE_PROLOG
1112                 MAYBE('#', TP_HASHHASH)
1113                 ELSE_CODE(
1114                         pp_token.type = '#';
1115                 )
1116
1117         case '?':
1118         case '[':
1119         case ']':
1120         case '(':
1121         case ')':
1122         case '{':
1123         case '}':
1124         case '~':
1125         case ';':
1126         case ',':
1127         case '\\':
1128                 pp_token.type = input.c;
1129                 next_char();
1130                 return;
1131
1132         case EOF:
1133                 if (input_stack != NULL) {
1134                         close_input();
1135                         pop_restore_input();
1136                         fputc('\n', out);
1137                         print_line_directive(&input.position, "2");
1138                         goto restart;
1139                 } else {
1140                         pp_token.source_position.lineno++;
1141                         info.at_line_begin = true;
1142                         pp_token.type = TP_EOF;
1143                 }
1144                 return;
1145
1146         default:
1147                 next_char();
1148                 if (!ignore_unknown_chars) {
1149                         errorf(&pp_token.source_position, "unknown character '%c' found\n",
1150                                input.c);
1151                         pp_token.type = TP_ERROR;
1152                 } else {
1153                         pp_token.type = input.c;
1154                 }
1155                 return;
1156         }
1157 }
1158
1159 static void print_quoted_string(const char *const string)
1160 {
1161         fputc('"', out);
1162         for (const char *c = string; *c != 0; ++c) {
1163                 switch (*c) {
1164                 case '"': fputs("\\\"", out); break;
1165                 case '\\':  fputs("\\\\", out); break;
1166                 case '\a':  fputs("\\a", out); break;
1167                 case '\b':  fputs("\\b", out); break;
1168                 case '\f':  fputs("\\f", out); break;
1169                 case '\n':  fputs("\\n", out); break;
1170                 case '\r':  fputs("\\r", out); break;
1171                 case '\t':  fputs("\\t", out); break;
1172                 case '\v':  fputs("\\v", out); break;
1173                 case '\?':  fputs("\\?", out); break;
1174                 default:
1175                         if (!isprint(*c)) {
1176                                 fprintf(out, "\\%03o", (unsigned)*c);
1177                                 break;
1178                         }
1179                         fputc(*c, out);
1180                         break;
1181                 }
1182         }
1183         fputc('"', out);
1184 }
1185
1186 static void print_line_directive(const source_position_t *pos, const char *add)
1187 {
1188         fprintf(out, "# %u ", pos->lineno);
1189         print_quoted_string(pos->input_name);
1190         if (add != NULL) {
1191                 fputc(' ', out);
1192                 fputs(add, out);
1193         }
1194
1195         printed_input_name = pos->input_name;
1196         input.output_line  = pos->lineno-1;
1197 }
1198
1199 static void emit_newlines(void)
1200 {
1201         unsigned delta = pp_token.source_position.lineno - input.output_line;
1202
1203         if (delta >= 9) {
1204                 fputc('\n', out);
1205                 print_line_directive(&pp_token.source_position, NULL);
1206                 fputc('\n', out);
1207         } else {
1208                 for (unsigned i = 0; i < delta; ++i) {
1209                         fputc('\n', out);
1210                 }
1211         }
1212         input.output_line = pp_token.source_position.lineno;
1213 }
1214
1215 static void emit_pp_token(void)
1216 {
1217         if (skip_mode)
1218                 return;
1219
1220         if (info.at_line_begin) {
1221                 emit_newlines();
1222
1223                 for (unsigned i = 0; i < info.whitespace; ++i)
1224                         fputc(' ', out);
1225
1226         } else if (info.had_whitespace ||
1227                            tokens_would_paste(last_token, pp_token.type)) {
1228                 fputc(' ', out);
1229         }
1230
1231         switch (pp_token.type) {
1232         case TP_IDENTIFIER:
1233                 fputs(pp_token.symbol->string, out);
1234                 break;
1235         case TP_NUMBER:
1236                 fputs(pp_token.literal.begin, out);
1237                 break;
1238         case TP_WIDE_STRING_LITERAL:
1239                 fputc('L', out);
1240         case TP_STRING_LITERAL:
1241                 fputc('"', out);
1242                 fputs(pp_token.literal.begin, out);
1243                 fputc('"', out);
1244                 break;
1245         case TP_WIDE_CHARACTER_CONSTANT:
1246                 fputc('L', out);
1247         case TP_CHARACTER_CONSTANT:
1248                 fputc('\'', out);
1249                 fputs(pp_token.literal.begin, out);
1250                 fputc('\'', out);
1251                 break;
1252         default:
1253                 print_pp_token_type(out, pp_token.type);
1254                 break;
1255         }
1256         last_token = pp_token.type;
1257 }
1258
1259 static void eat_pp_directive(void)
1260 {
1261         while (!info.at_line_begin) {
1262                 next_preprocessing_token();
1263         }
1264 }
1265
1266 static bool strings_equal(const string_t *string1, const string_t *string2)
1267 {
1268         size_t size = string1->size;
1269         if (size != string2->size)
1270                 return false;
1271
1272         const char *c1 = string1->begin;
1273         const char *c2 = string2->begin;
1274         for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1275                 if (*c1 != *c2)
1276                         return false;
1277         }
1278         return true;
1279 }
1280
1281 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1282 {
1283         if (token1->type != token2->type)
1284                 return false;
1285
1286         switch (token1->type) {
1287         case TP_HEADERNAME:
1288                 /* TODO */
1289                 return false;
1290         case TP_IDENTIFIER:
1291                 return token1->symbol == token2->symbol;
1292         case TP_NUMBER:
1293         case TP_CHARACTER_CONSTANT:
1294         case TP_STRING_LITERAL:
1295                 return strings_equal(&token1->literal, &token2->literal);
1296
1297         default:
1298                 return true;
1299         }
1300 }
1301
1302 static bool pp_definitions_equal(const pp_definition_t *definition1,
1303                                  const pp_definition_t *definition2)
1304 {
1305         if (definition1->list_len != definition2->list_len)
1306                 return false;
1307
1308         size_t         len = definition1->list_len;
1309         const token_t *t1  = definition1->token_list;
1310         const token_t *t2  = definition2->token_list;
1311         for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1312                 if (!pp_tokens_equal(t1, t2))
1313                         return false;
1314         }
1315         return true;
1316 }
1317
1318 static void parse_define_directive(void)
1319 {
1320         eat_pp(TP_define);
1321         assert(obstack_object_size(&pp_obstack) == 0);
1322
1323         if (pp_token.type != TP_IDENTIFIER || info.at_line_begin) {
1324                 errorf(&pp_token.source_position,
1325                        "expected identifier after #define, got '%t'", &pp_token);
1326                 goto error_out;
1327         }
1328         symbol_t *symbol = pp_token.symbol;
1329
1330         pp_definition_t *new_definition
1331                 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1332         memset(new_definition, 0, sizeof(new_definition[0]));
1333         new_definition->source_position = input.position;
1334
1335         /* this is probably the only place where spaces are significant in the
1336          * lexer (except for the fact that they separate tokens). #define b(x)
1337          * is something else than #define b (x) */
1338         if (input.c == '(') {
1339                 /* eat the '(' */
1340                 next_preprocessing_token();
1341                 /* get next token after '(' */
1342                 next_preprocessing_token();
1343
1344                 while (true) {
1345                         switch (pp_token.type) {
1346                         case TP_DOTDOTDOT:
1347                                 new_definition->is_variadic = true;
1348                                 next_preprocessing_token();
1349                                 if (pp_token.type != ')') {
1350                                         errorf(&input.position,
1351                                                         "'...' not at end of macro argument list");
1352                                         goto error_out;
1353                                 }
1354                                 break;
1355                         case TP_IDENTIFIER:
1356                                 obstack_ptr_grow(&pp_obstack, pp_token.symbol);
1357                                 next_preprocessing_token();
1358
1359                                 if (pp_token.type == ',') {
1360                                         next_preprocessing_token();
1361                                         break;
1362                                 }
1363
1364                                 if (pp_token.type != ')') {
1365                                         errorf(&pp_token.source_position,
1366                                                "expected ',' or ')' after identifier, got '%t'",
1367                                                &pp_token);
1368                                         goto error_out;
1369                                 }
1370                                 break;
1371                         case ')':
1372                                 next_preprocessing_token();
1373                                 goto finish_argument_list;
1374                         default:
1375                                 errorf(&pp_token.source_position,
1376                                        "expected identifier, '...' or ')' in #define argument list, got '%t'",
1377                                        &pp_token);
1378                                 goto error_out;
1379                         }
1380                 }
1381
1382         finish_argument_list:
1383                 new_definition->has_parameters = true;
1384                 new_definition->n_parameters
1385                         = obstack_object_size(&pp_obstack) / sizeof(new_definition->parameters[0]);
1386                 new_definition->parameters = obstack_finish(&pp_obstack);
1387         } else {
1388                 next_preprocessing_token();
1389         }
1390
1391         /* construct a new pp_definition on the obstack */
1392         assert(obstack_object_size(&pp_obstack) == 0);
1393         size_t list_len = 0;
1394         while (!info.at_line_begin) {
1395                 obstack_grow(&pp_obstack, &pp_token, sizeof(pp_token));
1396                 ++list_len;
1397                 next_preprocessing_token();
1398         }
1399
1400         new_definition->list_len   = list_len;
1401         new_definition->token_list = obstack_finish(&pp_obstack);
1402
1403         pp_definition_t *old_definition = symbol->pp_definition;
1404         if (old_definition != NULL) {
1405                 if (!pp_definitions_equal(old_definition, new_definition)) {
1406                         warningf(WARN_OTHER, &input.position, "multiple definition of macro '%Y' (first defined %P)", symbol, &old_definition->source_position);
1407                 } else {
1408                         /* reuse the old definition */
1409                         obstack_free(&pp_obstack, new_definition);
1410                         new_definition = old_definition;
1411                 }
1412         }
1413
1414         symbol->pp_definition = new_definition;
1415         return;
1416
1417 error_out:
1418         if (obstack_object_size(&pp_obstack) > 0) {
1419                 char *ptr = obstack_finish(&pp_obstack);
1420                 obstack_free(&pp_obstack, ptr);
1421         }
1422         eat_pp_directive();
1423 }
1424
1425 static void parse_undef_directive(void)
1426 {
1427         eat_pp(TP_undef);
1428
1429         if (pp_token.type != TP_IDENTIFIER) {
1430                 errorf(&input.position,
1431                        "expected identifier after #undef, got '%t'", &pp_token);
1432                 eat_pp_directive();
1433                 return;
1434         }
1435
1436         symbol_t *symbol = pp_token.symbol;
1437         symbol->pp_definition = NULL;
1438         next_preprocessing_token();
1439
1440         if (!info.at_line_begin) {
1441                 warningf(WARN_OTHER, &input.position, "extra tokens at end of #undef directive");
1442         }
1443         eat_pp_directive();
1444 }
1445
1446 static const char *parse_headername(void)
1447 {
1448         /* behind an #include we can have the special headername lexems.
1449          * They're only allowed behind an #include so they're not recognized
1450          * by the normal next_preprocessing_token. We handle them as a special
1451          * exception here */
1452         skip_whitespace();
1453
1454         if (info.at_line_begin) {
1455                 parse_error("expected headername after #include");
1456                 return NULL;
1457         }
1458
1459         assert(obstack_object_size(&input_obstack) == 0);
1460
1461         /* check wether we have a "... or <... headername */
1462         switch (input.c) {
1463         case '<':
1464                 next_char();
1465                 while (true) {
1466                         switch (input.c) {
1467                         case EOF:
1468                                 /* fallthrough */
1469                         MATCH_NEWLINE(
1470                                 parse_error("header name without closing '>'");
1471                                 return NULL;
1472                         )
1473                         case '>':
1474                                 next_char();
1475                                 goto finished_headername;
1476                         }
1477                         obstack_1grow(&input_obstack, (char) input.c);
1478                         next_char();
1479                 }
1480                 /* we should never be here */
1481
1482         case '"':
1483                 next_char();
1484                 while (true) {
1485                         switch (input.c) {
1486                         case EOF:
1487                                 /* fallthrough */
1488                         MATCH_NEWLINE(
1489                                 parse_error("header name without closing '>'");
1490                                 return NULL;
1491                         )
1492                         case '"':
1493                                 next_char();
1494                                 goto finished_headername;
1495                         }
1496                         obstack_1grow(&input_obstack, (char) input.c);
1497                         next_char();
1498                 }
1499                 /* we should never be here */
1500
1501         default:
1502                 /* TODO: do normal pp_token parsing and concatenate results */
1503                 panic("pp_token concat include not implemented yet");
1504         }
1505
1506 finished_headername:
1507         obstack_1grow(&input_obstack, '\0');
1508         char *headername = obstack_finish(&input_obstack);
1509
1510         /* TODO: iterate search-path to find the file */
1511
1512         skip_whitespace();
1513
1514         return headername;
1515 }
1516
1517 static bool parse_include_directive(void)
1518 {
1519         /* don't eat the TP_include here!
1520          * we need an alternative parsing for the next token */
1521
1522         const char *headername = parse_headername();
1523         if (headername == NULL) {
1524                 eat_pp_directive();
1525                 return false;
1526         }
1527
1528         if (!info.at_line_begin) {
1529                 warningf(WARN_OTHER, &pp_token.source_position, "extra tokens at end of #include directive");
1530                 eat_pp_directive();
1531         }
1532
1533         if (n_inputs > INCLUDE_LIMIT) {
1534                 errorf(&pp_token.source_position, "#include nested too deeply");
1535                 /* eat \n or EOF */
1536                 next_preprocessing_token();
1537                 return false;
1538         }
1539
1540         /* we have to reenable space counting and macro expansion here,
1541          * because it is still disabled in directive parsing,
1542          * but we will trigger a preprocessing token reading of the new file
1543          * now and need expansions/space counting */
1544         in_pp_directive = false;
1545
1546         /* switch inputs */
1547         emit_newlines();
1548         push_input();
1549         bool res = open_input(headername);
1550         if (!res) {
1551                 errorf(&pp_token.source_position,
1552                        "failed including '%s': %s", headername, strerror(errno));
1553                 pop_restore_input();
1554                 return false;
1555         }
1556
1557         return true;
1558 }
1559
1560 static pp_conditional_t *push_conditional(void)
1561 {
1562         pp_conditional_t *conditional
1563                 = obstack_alloc(&pp_obstack, sizeof(*conditional));
1564         memset(conditional, 0, sizeof(*conditional));
1565
1566         conditional->parent = conditional_stack;
1567         conditional_stack   = conditional;
1568
1569         return conditional;
1570 }
1571
1572 static void pop_conditional(void)
1573 {
1574         assert(conditional_stack != NULL);
1575         conditional_stack = conditional_stack->parent;
1576 }
1577
1578 static void check_unclosed_conditionals(void)
1579 {
1580         while (conditional_stack != NULL) {
1581                 pp_conditional_t *conditional = conditional_stack;
1582
1583                 if (conditional->in_else) {
1584                         errorf(&conditional->source_position, "unterminated #else");
1585                 } else {
1586                         errorf(&conditional->source_position, "unterminated condition");
1587                 }
1588                 pop_conditional();
1589         }
1590 }
1591
1592 static void parse_ifdef_ifndef_directive(void)
1593 {
1594         bool is_ifndef = (pp_token.type == TP_ifndef);
1595         bool condition;
1596         next_preprocessing_token();
1597
1598         if (skip_mode) {
1599                 eat_pp_directive();
1600                 pp_conditional_t *conditional = push_conditional();
1601                 conditional->source_position  = pp_token.source_position;
1602                 conditional->skip             = true;
1603                 return;
1604         }
1605
1606         if (pp_token.type != TP_IDENTIFIER || info.at_line_begin) {
1607                 errorf(&pp_token.source_position,
1608                        "expected identifier after #%s, got '%t'",
1609                        is_ifndef ? "ifndef" : "ifdef", &pp_token);
1610                 eat_pp_directive();
1611
1612                 /* just take the true case in the hope to avoid further errors */
1613                 condition = true;
1614         } else {
1615                 symbol_t        *symbol        = pp_token.symbol;
1616                 pp_definition_t *pp_definition = symbol->pp_definition;
1617                 next_preprocessing_token();
1618
1619                 if (!info.at_line_begin) {
1620                         errorf(&pp_token.source_position,
1621                                "extra tokens at end of #%s",
1622                                is_ifndef ? "ifndef" : "ifdef");
1623                         eat_pp_directive();
1624                 }
1625
1626                 /* evaluate wether we are in true or false case */
1627                 condition = is_ifndef ? pp_definition == NULL : pp_definition != NULL;
1628         }
1629
1630         pp_conditional_t *conditional = push_conditional();
1631         conditional->source_position  = pp_token.source_position;
1632         conditional->condition        = condition;
1633
1634         if (!condition) {
1635                 skip_mode = true;
1636         }
1637 }
1638
1639 static void parse_else_directive(void)
1640 {
1641         eat_pp(TP_else);
1642
1643         if (!info.at_line_begin) {
1644                 if (!skip_mode) {
1645                         warningf(WARN_OTHER, &pp_token.source_position, "extra tokens at end of #else");
1646                 }
1647                 eat_pp_directive();
1648         }
1649
1650         pp_conditional_t *conditional = conditional_stack;
1651         if (conditional == NULL) {
1652                 errorf(&pp_token.source_position, "#else without prior #if");
1653                 return;
1654         }
1655
1656         if (conditional->in_else) {
1657                 errorf(&pp_token.source_position,
1658                        "#else after #else (condition started %P)",
1659                        conditional->source_position);
1660                 skip_mode = true;
1661                 return;
1662         }
1663
1664         conditional->in_else = true;
1665         if (!conditional->skip) {
1666                 skip_mode = conditional->condition;
1667         }
1668         conditional->source_position = pp_token.source_position;
1669 }
1670
1671 static void parse_endif_directive(void)
1672 {
1673         eat_pp(TP_endif);
1674
1675         if (!info.at_line_begin) {
1676                 if (!skip_mode) {
1677                         warningf(WARN_OTHER, &pp_token.source_position, "extra tokens at end of #endif");
1678                 }
1679                 eat_pp_directive();
1680         }
1681
1682         pp_conditional_t *conditional = conditional_stack;
1683         if (conditional == NULL) {
1684                 errorf(&pp_token.source_position, "#endif without prior #if");
1685                 return;
1686         }
1687
1688         if (!conditional->skip) {
1689                 skip_mode = false;
1690         }
1691         pop_conditional();
1692 }
1693
1694 static void parse_preprocessing_directive(void)
1695 {
1696         in_pp_directive = true;
1697         eat_pp('#');
1698
1699         if (skip_mode) {
1700                 switch (pp_token.type) {
1701                 case TP_ifdef:
1702                 case TP_ifndef:
1703                         parse_ifdef_ifndef_directive();
1704                         break;
1705                 case TP_else:
1706                         parse_else_directive();
1707                         break;
1708                 case TP_endif:
1709                         parse_endif_directive();
1710                         break;
1711                 default:
1712                         eat_pp_directive();
1713                         break;
1714                 }
1715         } else {
1716                 switch (pp_token.type) {
1717                 case TP_define:
1718                         parse_define_directive();
1719                         break;
1720                 case TP_undef:
1721                         parse_undef_directive();
1722                         break;
1723                 case TP_ifdef:
1724                 case TP_ifndef:
1725                         parse_ifdef_ifndef_directive();
1726                         break;
1727                 case TP_else:
1728                         parse_else_directive();
1729                         break;
1730                 case TP_endif:
1731                         parse_endif_directive();
1732                         break;
1733                 case TP_include:
1734                         parse_include_directive();
1735                         break;
1736                 default:
1737                         if (info.at_line_begin) {
1738                                 /* the nop directive "#" */
1739                                 break;
1740                         }
1741                         errorf(&pp_token.source_position,
1742                                    "invalid preprocessing directive #%t", &pp_token);
1743                         eat_pp_directive();
1744                         break;
1745                 }
1746         }
1747
1748         in_pp_directive = false;
1749         assert(info.at_line_begin);
1750 }
1751
1752 int pptest_main(int argc, char **argv);
1753 int pptest_main(int argc, char **argv)
1754 {
1755         init_symbol_table();
1756         init_tokens();
1757
1758         obstack_init(&pp_obstack);
1759         obstack_init(&input_obstack);
1760
1761         const char *filename = "t.c";
1762         if (argc > 1)
1763                 filename = argv[1];
1764
1765         out = stdout;
1766
1767         /* just here for gcc compatibility */
1768         fprintf(out, "# 1 \"%s\"\n", filename);
1769         fprintf(out, "# 1 \"<built-in>\"\n");
1770         fprintf(out, "# 1 \"<command-line>\"\n");
1771
1772         bool ok = open_input(filename);
1773         assert(ok);
1774
1775         while (true) {
1776                 if (pp_token.type == '#' && info.at_line_begin) {
1777                         parse_preprocessing_directive();
1778                         continue;
1779                 } else if (pp_token.type == TP_EOF) {
1780                         goto end_of_main_loop;
1781                 } else if (pp_token.type == TP_IDENTIFIER && !in_pp_directive) {
1782                         symbol_t *symbol = pp_token.symbol;
1783                         pp_definition_t *pp_definition = symbol->pp_definition;
1784                         if (pp_definition != NULL && !pp_definition->is_expanding) {
1785                                 expansion_pos = pp_token.source_position;
1786                                 if (pp_definition->has_parameters) {
1787                                         source_position_t position = pp_token.source_position;
1788                                         add_token_info_t old_info = info;
1789                                         next_preprocessing_token();
1790                                         add_token_info_t new_info = info;
1791
1792                                         /* no opening brace -> no expansion */
1793                                         if (pp_token.type == '(') {
1794                                                 eat_pp('(');
1795
1796                                                 /* parse arguments (TODO) */
1797                                                 while (pp_token.type != TP_EOF && pp_token.type != ')')
1798                                                         next_preprocessing_token();
1799                                         } else {
1800                                                 token_t next_token = pp_token;
1801                                                 /* restore identifier token */
1802                                                 pp_token.type            = TP_IDENTIFIER;
1803                                                 pp_token.symbol          = symbol;
1804                                                 pp_token.source_position = position;
1805                                                 info = old_info;
1806                                                 emit_pp_token();
1807
1808                                                 info = new_info;
1809                                                 pp_token = next_token;
1810                                                 continue;
1811                                         }
1812                                         info = old_info;
1813                                 }
1814                                 pp_definition->expand_pos   = 0;
1815                                 pp_definition->is_expanding = true;
1816                                 current_expansion           = pp_definition;
1817                                 expand_next();
1818                                 continue;
1819                         }
1820                 }
1821
1822                 emit_pp_token();
1823                 next_preprocessing_token();
1824         }
1825 end_of_main_loop:
1826
1827         fputc('\n', out);
1828         check_unclosed_conditionals();
1829         close_input();
1830
1831         obstack_free(&input_obstack, NULL);
1832         obstack_free(&pp_obstack, NULL);
1833
1834         exit_tokens();
1835         exit_symbol_table();
1836
1837         return 0;
1838 }