nsz Git - cparser/blob - preprocessor.c

   1 #include <config.h>
   2
   3 #include <assert.h>
   4 #include <errno.h>
   5 #include <string.h>
   6 #include <stdbool.h>
   7 #include <ctype.h>
   8
   9 #include "token_t.h"
  10 #include "symbol_t.h"
  11 #include "adt/util.h"
  12 #include "adt/error.h"
  13 #include "lang_features.h"
  14 #include "diagnostic.h"
  15 #include "string_rep.h"
  16 #include "input.h"
  17
  18 #define MAX_PUTBACK 3
  19 #define INCLUDE_LIMIT 199  /* 199 is for gcc "compatibility" */
  20
  21 struct pp_argument_t {
  22         size_t   list_len;
  23         token_t *token_list;
  24 };
  25
  26 struct pp_definition_t {
  27         symbol_t          *symbol;
  28         source_position_t  source_position;
  29         pp_definition_t   *parent_expansion;
  30         size_t             expand_pos;
  31         bool               is_variadic    : 1;
  32         bool               is_expanding   : 1;
  33         bool               has_parameters : 1;
  34         size_t             n_parameters;
  35         symbol_t          *parameters;
  36
  37         /* replacement */
  38         size_t             list_len;
  39         token_t           *token_list;
  40
  41 };
  42
  43 typedef struct pp_conditional_t pp_conditional_t;
  44 struct pp_conditional_t {
  45         source_position_t  source_position;
  46         bool               condition;
  47         bool               in_else;
  48         bool               skip; /**< conditional in skip mode (then+else gets skipped) */
  49         pp_conditional_t  *parent;
  50 };
  51
  52 typedef struct pp_input_t pp_input_t;
  53 struct pp_input_t {
  54         FILE              *file;
  55         input_t           *input;
  56         utf32              c;
  57         utf32              buf[1024+MAX_PUTBACK];
  58         const utf32       *bufend;
  59         const utf32       *bufpos;
  60         source_position_t  position;
  61         bool               had_non_space;
  62         pp_input_t        *parent;
  63 };
  64
  65 static pp_input_t input;
  66
  67 static pp_input_t     *input_stack;
  68 static unsigned        n_inputs;
  69 static struct obstack  input_obstack;
  70
  71 static pp_conditional_t *conditional_stack;
  72
  73 static token_t            pp_token;
  74 static bool               resolve_escape_sequences = false;
  75 static bool               do_print_spaces          = true;
  76 static bool               do_expansions;
  77 static bool               skip_mode;
  78 static FILE              *out;
  79 static struct obstack     pp_obstack;
  80 static unsigned           counted_newlines;
  81 static unsigned           counted_spaces;
  82 static const char        *printed_input_name = NULL;
  83 static pp_definition_t   *current_expansion  = NULL;
  84
  85 static inline void next_char(void);
  86 static void next_preprocessing_token(void);
  87 static void print_line_directive(const source_position_t *pos, const char *add);
  88
  89 static bool open_input(const char *filename)
  90 {
  91         FILE *file = fopen(filename, "r");
  92         if (file == NULL)
  93                 return false;
  94
  95         input.file                = file;
  96         input.input               = input_from_stream(file, NULL);
  97         input.bufend              = NULL;
  98         input.bufpos              = NULL;
  99         input.had_non_space       = false;
 100         input.position.input_name = filename;
 101         input.position.lineno     = 1;
 102
 103         /* indicate that we're at a new input */
 104         print_line_directive(&input.position, input_stack != NULL ? "1" : NULL);
 105
 106         counted_newlines = 0;
 107         counted_spaces   = 0;
 108
 109         /* read first char and first token */
 110         next_char();
 111         next_preprocessing_token();
 112
 113         return true;
 114 }
 115
 116 static void close_input(void)
 117 {
 118         /* ensure we have a newline at EOF */
 119         if (input.had_non_space) {
 120                 fputc('\n', out);
 121         }
 122
 123         input_free(input.input);
 124         assert(input.file != NULL);
 125
 126         fclose(input.file);
 127         input.input  = NULL;
 128         input.file   = NULL;
 129         input.bufend = NULL;
 130         input.bufpos = NULL;
 131         input.c      = EOF;
 132 }
 133
 134 static void push_input(void)
 135 {
 136         pp_input_t *saved_input
 137                 = obstack_alloc(&input_obstack, sizeof(*saved_input));
 138
 139         memcpy(saved_input, &input, sizeof(*saved_input));
 140
 141         /* adjust buffer positions */
 142         if (input.bufpos != NULL)
 143                 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
 144         if (input.bufend != NULL)
 145                 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
 146
 147         saved_input->parent = input_stack;
 148         input_stack         = saved_input;
 149         ++n_inputs;
 150 }
 151
 152 static void pop_restore_input(void)
 153 {
 154         assert(n_inputs > 0);
 155         assert(input_stack != NULL);
 156
 157         pp_input_t *saved_input = input_stack;
 158
 159         memcpy(&input, saved_input, sizeof(input));
 160         input.parent = NULL;
 161
 162         /* adjust buffer positions */
 163         if (saved_input->bufpos != NULL)
 164                 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
 165         if (saved_input->bufend != NULL)
 166                 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
 167
 168         input_stack = saved_input->parent;
 169         obstack_free(&input_obstack, saved_input);
 170         --n_inputs;
 171 }
 172
 173 /**
 174  * Prints a parse error message at the current token.
 175  *
 176  * @param msg   the error message
 177  */
 178 static void parse_error(const char *msg)
 179 {
 180         errorf(&pp_token.source_position,  "%s", msg);
 181 }
 182
 183 static inline void next_real_char(void)
 184 {
 185         assert(input.bufpos <= input.bufend);
 186         if (input.bufpos >= input.bufend) {
 187                 size_t n = decode(input.input, input.buf + MAX_PUTBACK,
 188                                   sizeof(input.buf)/sizeof(input.buf[0]) - MAX_PUTBACK);
 189                 if (n == 0) {
 190                         input.c = EOF;
 191                         return;
 192                 }
 193                 input.bufpos = input.buf + MAX_PUTBACK;
 194                 input.bufend = input.bufpos + n;
 195         }
 196         input.c = *input.bufpos++;
 197         ++input.position.colno;
 198 }
 199
 200 /**
 201  * Put a character back into the buffer.
 202  *
 203  * @param pc  the character to put back
 204  */
 205 static inline void put_back(utf32 const pc)
 206 {
 207         assert(input.bufpos > input.buf);
 208         *(--input.bufpos - input.buf + input.buf) = (char) pc;
 209         --input.position.colno;
 210 }
 211
 212 #define MATCH_NEWLINE(code)                   \
 213         case '\r':                                \
 214                 next_char();                          \
 215                 if (input.c == '\n') {                \
 216                         next_char();                      \
 217                 }                                     \
 218                 ++input.position.lineno;              \
 219                 code                                  \
 220         case '\n':                                \
 221                 next_char();                          \
 222                 ++input.position.lineno;              \
 223                 code
 224
 225 #define eat(c_type) (assert(input.c == c_type), next_char())
 226
 227 static void maybe_concat_lines(void)
 228 {
 229         eat('\\');
 230
 231         switch (input.c) {
 232         MATCH_NEWLINE(return;)
 233
 234         default:
 235                 break;
 236         }
 237
 238         put_back(input.c);
 239         input.c = '\\';
 240 }
 241
 242 /**
 243  * Set c to the next input character, ie.
 244  * after expanding trigraphs.
 245  */
 246 static inline void next_char(void)
 247 {
 248         next_real_char();
 249
 250         /* filter trigraphs and concatenated lines */
 251         if (UNLIKELY(input.c == '\\')) {
 252                 maybe_concat_lines();
 253                 goto end_of_next_char;
 254         }
 255
 256         if (LIKELY(input.c != '?'))
 257                 goto end_of_next_char;
 258
 259         next_real_char();
 260         if (LIKELY(input.c != '?')) {
 261                 put_back(input.c);
 262                 input.c = '?';
 263                 goto end_of_next_char;
 264         }
 265
 266         next_real_char();
 267         switch (input.c) {
 268         case '=': input.c = '#'; break;
 269         case '(': input.c = '['; break;
 270         case '/': input.c = '\\'; maybe_concat_lines(); break;
 271         case ')': input.c = ']'; break;
 272         case '\'': input.c = '^'; break;
 273         case '<': input.c = '{'; break;
 274         case '!': input.c = '|'; break;
 275         case '>': input.c = '}'; break;
 276         case '-': input.c = '~'; break;
 277         default:
 278                 put_back(input.c);
 279                 put_back('?');
 280                 input.c = '?';
 281                 break;
 282         }
 283
 284 end_of_next_char:;
 285 #ifdef DEBUG_CHARS
 286         printf("nchar '%c'\n", input.c);
 287 #endif
 288 }
 289
 290
 291
 292 /**
 293  * Returns true if the given char is a octal digit.
 294  *
 295  * @param char  the character to check
 296  */
 297 static inline bool is_octal_digit(int chr)
 298 {
 299         switch (chr) {
 300         case '0':
 301         case '1':
 302         case '2':
 303         case '3':
 304         case '4':
 305         case '5':
 306         case '6':
 307         case '7':
 308                 return true;
 309         default:
 310                 return false;
 311         }
 312 }
 313
 314 /**
 315  * Returns the value of a digit.
 316  * The only portable way to do it ...
 317  */
 318 static int digit_value(int digit)
 319 {
 320         switch (digit) {
 321         case '0': return 0;
 322         case '1': return 1;
 323         case '2': return 2;
 324         case '3': return 3;
 325         case '4': return 4;
 326         case '5': return 5;
 327         case '6': return 6;
 328         case '7': return 7;
 329         case '8': return 8;
 330         case '9': return 9;
 331         case 'a':
 332         case 'A': return 10;
 333         case 'b':
 334         case 'B': return 11;
 335         case 'c':
 336         case 'C': return 12;
 337         case 'd':
 338         case 'D': return 13;
 339         case 'e':
 340         case 'E': return 14;
 341         case 'f':
 342         case 'F': return 15;
 343         default:
 344                 panic("wrong character given");
 345         }
 346 }
 347
 348 /**
 349  * Parses an octal character sequence.
 350  *
 351  * @param first_digit  the already read first digit
 352  */
 353 static int parse_octal_sequence(const int first_digit)
 354 {
 355         assert(is_octal_digit(first_digit));
 356         int value = digit_value(first_digit);
 357         if (!is_octal_digit(input.c)) return value;
 358         value = 8 * value + digit_value(input.c);
 359         next_char();
 360         if (!is_octal_digit(input.c)) return value;
 361         value = 8 * value + digit_value(input.c);
 362         next_char();
 363
 364         if (char_is_signed) {
 365                 return (signed char) value;
 366         } else {
 367                 return (unsigned char) value;
 368         }
 369 }
 370
 371 /**
 372  * Parses a hex character sequence.
 373  */
 374 static int parse_hex_sequence(void)
 375 {
 376         int value = 0;
 377         while (isxdigit(input.c)) {
 378                 value = 16 * value + digit_value(input.c);
 379                 next_char();
 380         }
 381
 382         if (char_is_signed) {
 383                 return (signed char) value;
 384         } else {
 385                 return (unsigned char) value;
 386         }
 387 }
 388
 389 /**
 390  * Parse an escape sequence.
 391  */
 392 static int parse_escape_sequence(void)
 393 {
 394         eat('\\');
 395
 396         int ec = input.c;
 397         next_char();
 398
 399         switch (ec) {
 400         case '"':  return '"';
 401         case '\'': return '\'';
 402         case '\\': return '\\';
 403         case '?': return '\?';
 404         case 'a': return '\a';
 405         case 'b': return '\b';
 406         case 'f': return '\f';
 407         case 'n': return '\n';
 408         case 'r': return '\r';
 409         case 't': return '\t';
 410         case 'v': return '\v';
 411         case 'x':
 412                 return parse_hex_sequence();
 413         case '0':
 414         case '1':
 415         case '2':
 416         case '3':
 417         case '4':
 418         case '5':
 419         case '6':
 420         case '7':
 421                 return parse_octal_sequence(ec);
 422         case EOF:
 423                 parse_error("reached end of file while parsing escape sequence");
 424                 return EOF;
 425         default:
 426                 parse_error("unknown escape sequence");
 427                 return EOF;
 428         }
 429 }
 430
 431 static void parse_string_literal(void)
 432 {
 433         const unsigned start_linenr = input.position.lineno;
 434
 435         eat('"');
 436
 437         int tc;
 438         while (true) {
 439                 switch (input.c) {
 440                 case '\\':
 441                         if (resolve_escape_sequences) {
 442                                 tc = parse_escape_sequence();
 443                                 obstack_1grow(&symbol_obstack, (char) tc);
 444                         } else {
 445                                 obstack_1grow(&symbol_obstack, (char) input.c);
 446                                 next_char();
 447                                 obstack_1grow(&symbol_obstack, (char) input.c);
 448                                 next_char();
 449                         }
 450                         break;
 451
 452                 case EOF: {
 453                         source_position_t source_position;
 454                         source_position.input_name = pp_token.source_position.input_name;
 455                         source_position.lineno     = start_linenr;
 456                         errorf(&source_position, "string has no end");
 457                         pp_token.type = TP_ERROR;
 458                         return;
 459                 }
 460
 461                 case '"':
 462                         next_char();
 463                         goto end_of_string;
 464
 465                 default:
 466                         obstack_1grow(&symbol_obstack, (char) input.c);
 467                         next_char();
 468                         break;
 469                 }
 470         }
 471
 472 end_of_string:
 473         /* add finishing 0 to the string */
 474         obstack_1grow(&symbol_obstack, '\0');
 475         const size_t      size   = (size_t)obstack_object_size(&symbol_obstack);
 476         const char *const string = obstack_finish(&symbol_obstack);
 477
 478 #if 0 /* TODO hash */
 479         /* check if there is already a copy of the string */
 480         result = strset_insert(&stringset, string);
 481         if (result != string) {
 482                 obstack_free(&symbol_obstack, string);
 483         }
 484 #else
 485         const char *const result = string;
 486 #endif
 487
 488         pp_token.type          = TP_STRING_LITERAL;
 489         pp_token.literal.begin = result;
 490         pp_token.literal.size  = size;
 491 }
 492
 493 static void parse_wide_character_constant(void)
 494 {
 495         eat('\'');
 496
 497         int found_char = 0;
 498         while (true) {
 499                 switch (input.c) {
 500                 case '\\':
 501                         found_char = parse_escape_sequence();
 502                         break;
 503
 504                 MATCH_NEWLINE(
 505                         parse_error("newline while parsing character constant");
 506                         break;
 507                 )
 508
 509                 case '\'':
 510                         next_char();
 511                         goto end_of_wide_char_constant;
 512
 513                 case EOF:
 514                         parse_error("EOF while parsing character constant");
 515                         pp_token.type = TP_ERROR;
 516                         return;
 517
 518                 default:
 519                         if (found_char != 0) {
 520                                 parse_error("more than 1 characters in character "
 521                                             "constant");
 522                                 goto end_of_wide_char_constant;
 523                         } else {
 524                                 found_char = input.c;
 525                                 next_char();
 526                         }
 527                         break;
 528                 }
 529         }
 530
 531 end_of_wide_char_constant:
 532         pp_token.type       = TP_WIDE_CHARACTER_CONSTANT;
 533         /* TODO... */
 534 }
 535
 536 static void parse_character_constant(void)
 537 {
 538         const unsigned start_linenr = input.position.lineno;
 539
 540         eat('\'');
 541
 542         int tc;
 543         while (true) {
 544                 switch (input.c) {
 545                 case '\\':
 546                         tc = parse_escape_sequence();
 547                         obstack_1grow(&symbol_obstack, (char) tc);
 548                         break;
 549
 550                 MATCH_NEWLINE(
 551                         parse_error("newline while parsing character constant");
 552                         break;
 553                 )
 554
 555                 case EOF: {
 556                         source_position_t source_position;
 557                         source_position.input_name = pp_token.source_position.input_name;
 558                         source_position.lineno     = start_linenr;
 559                         errorf(&source_position, "EOF while parsing character constant");
 560                         pp_token.type = TP_ERROR;
 561                         return;
 562                 }
 563
 564                 case '\'':
 565                         next_char();
 566                         goto end_of_char_constant;
 567
 568                 default:
 569                         obstack_1grow(&symbol_obstack, (char) input.c);
 570                         next_char();
 571                         break;
 572
 573                 }
 574         }
 575
 576 end_of_char_constant:;
 577         const size_t      size   = (size_t)obstack_object_size(&symbol_obstack);
 578         const char *const string = obstack_finish(&symbol_obstack);
 579
 580         pp_token.type          = TP_CHARACTER_CONSTANT;
 581         pp_token.literal.begin = string;
 582         pp_token.literal.size  = size;
 583 }
 584
 585 #define SYMBOL_CHARS_WITHOUT_E_P \
 586         case 'a': \
 587         case 'b': \
 588         case 'c': \
 589         case 'd': \
 590         case 'f': \
 591         case 'g': \
 592         case 'h': \
 593         case 'i': \
 594         case 'j': \
 595         case 'k': \
 596         case 'l': \
 597         case 'm': \
 598         case 'n': \
 599         case 'o': \
 600         case 'q': \
 601         case 'r': \
 602         case 's': \
 603         case 't': \
 604         case 'u': \
 605         case 'v': \
 606         case 'w': \
 607         case 'x': \
 608         case 'y': \
 609         case 'z': \
 610         case 'A': \
 611         case 'B': \
 612         case 'C': \
 613         case 'D': \
 614         case 'F': \
 615         case 'G': \
 616         case 'H': \
 617         case 'I': \
 618         case 'J': \
 619         case 'K': \
 620         case 'L': \
 621         case 'M': \
 622         case 'N': \
 623         case 'O': \
 624         case 'Q': \
 625         case 'R': \
 626         case 'S': \
 627         case 'T': \
 628         case 'U': \
 629         case 'V': \
 630         case 'W': \
 631         case 'X': \
 632         case 'Y': \
 633         case 'Z': \
 634         case '_':
 635
 636 #define SYMBOL_CHARS \
 637         SYMBOL_CHARS_WITHOUT_E_P \
 638         case 'e': \
 639         case 'p': \
 640         case 'E': \
 641         case 'P':
 642
 643 #define DIGITS \
 644         case '0':  \
 645         case '1':  \
 646         case '2':  \
 647         case '3':  \
 648         case '4':  \
 649         case '5':  \
 650         case '6':  \
 651         case '7':  \
 652         case '8':  \
 653         case '9':
 654
 655 /**
 656  * returns next final token from a preprocessor macro expansion
 657  */
 658 static void expand_next(void)
 659 {
 660         assert(current_expansion != NULL);
 661
 662         pp_definition_t *definition = current_expansion;
 663
 664 restart:
 665         if (definition->list_len == 0
 666                         || definition->expand_pos >= definition->list_len) {
 667                 /* we're finished with the current macro, move up 1 level in the
 668                  * expansion stack */
 669                 pp_definition_t *parent = definition->parent_expansion;
 670                 definition->parent_expansion = NULL;
 671                 definition->is_expanding     = false;
 672
 673                 /* it was the outermost expansion, parse normal pptoken */
 674                 if (parent == NULL) {
 675                         current_expansion = NULL;
 676                         next_preprocessing_token();
 677                         return;
 678                 }
 679                 definition        = parent;
 680                 current_expansion = definition;
 681                 goto restart;
 682         }
 683         pp_token = definition->token_list[definition->expand_pos];
 684         ++definition->expand_pos;
 685
 686         if (pp_token.type != TP_IDENTIFIER)
 687                 return;
 688
 689         /* if it was an identifier then we might need to expand again */
 690         pp_definition_t *symbol_definition = pp_token.symbol->pp_definition;
 691         if (symbol_definition != NULL && !symbol_definition->is_expanding) {
 692                 symbol_definition->parent_expansion = definition;
 693                 symbol_definition->expand_pos       = 0;
 694                 symbol_definition->is_expanding     = true;
 695                 definition                          = symbol_definition;
 696                 current_expansion                   = definition;
 697                 goto restart;
 698         }
 699 }
 700
 701 static void skip_line_comment(void)
 702 {
 703         if (do_print_spaces)
 704                 counted_spaces++;
 705
 706         while (true) {
 707                 switch (input.c) {
 708                 case EOF:
 709                         return;
 710
 711                 case '\n':
 712                 case '\r':
 713                         return;
 714
 715                 default:
 716                         next_char();
 717                         break;
 718                 }
 719         }
 720 }
 721
 722 static void skip_multiline_comment(void)
 723 {
 724         if (do_print_spaces)
 725                 counted_spaces++;
 726
 727         unsigned start_linenr = input.position.lineno;
 728         while (true) {
 729                 switch (input.c) {
 730                 case '/':
 731                         next_char();
 732                         if (input.c == '*') {
 733                                 /* TODO: nested comment, warn here */
 734                         }
 735                         break;
 736                 case '*':
 737                         next_char();
 738                         if (input.c == '/') {
 739                                 next_char();
 740                                 return;
 741                         }
 742                         break;
 743
 744                 MATCH_NEWLINE(
 745                         if (do_print_spaces) {
 746                                 counted_newlines++;
 747                                 counted_spaces = 0;
 748                         }
 749                         break;
 750                 )
 751
 752                 case EOF: {
 753                         source_position_t source_position;
 754                         source_position.input_name = pp_token.source_position.input_name;
 755                         source_position.lineno     = start_linenr;
 756                         errorf(&source_position, "at end of file while looking for comment end");
 757                         return;
 758                 }
 759
 760                 default:
 761                         next_char();
 762                         break;
 763                 }
 764         }
 765 }
 766
 767 /* skip spaces advancing at the start of the next preprocessing token */
 768 static void skip_spaces(bool skip_newline)
 769 {
 770         while (true) {
 771                 switch (input.c) {
 772                 case ' ':
 773                 case '\t':
 774                         if (do_print_spaces)
 775                                 counted_spaces++;
 776                         next_char();
 777                         continue;
 778                 case '/':
 779                         next_char();
 780                         if (input.c == '/') {
 781                                 next_char();
 782                                 skip_line_comment();
 783                                 continue;
 784                         } else if (input.c == '*') {
 785                                 next_char();
 786                                 skip_multiline_comment();
 787                                 continue;
 788                         } else {
 789                                 put_back(input.c);
 790                                 input.c = '/';
 791                         }
 792                         return;
 793
 794                 case '\r':
 795                         if (!skip_newline)
 796                                 return;
 797
 798                         next_char();
 799                         if (input.c == '\n') {
 800                                 next_char();
 801                         }
 802                         ++input.position.lineno;
 803                         if (do_print_spaces)
 804                                 ++counted_newlines;
 805                         continue;
 806
 807                 case '\n':
 808                         if (!skip_newline)
 809                                 return;
 810
 811                         next_char();
 812                         ++input.position.lineno;
 813                         if (do_print_spaces)
 814                                 ++counted_newlines;
 815                         continue;
 816
 817                 default:
 818                         return;
 819                 }
 820         }
 821 }
 822
 823 static void eat_pp(int type)
 824 {
 825         (void) type;
 826         assert(pp_token.type == type);
 827         next_preprocessing_token();
 828 }
 829
 830 static void parse_symbol(void)
 831 {
 832         obstack_1grow(&symbol_obstack, (char) input.c);
 833         next_char();
 834
 835         while (true) {
 836                 switch (input.c) {
 837                 DIGITS
 838                 SYMBOL_CHARS
 839                         obstack_1grow(&symbol_obstack, (char) input.c);
 840                         next_char();
 841                         break;
 842
 843                 default:
 844                         goto end_symbol;
 845                 }
 846         }
 847
 848 end_symbol:
 849         obstack_1grow(&symbol_obstack, '\0');
 850         char *string = obstack_finish(&symbol_obstack);
 851
 852         /* might be a wide string or character constant ( L"string"/L'c' ) */
 853         if (input.c == '"' && string[0] == 'L' && string[1] == '\0') {
 854                 obstack_free(&symbol_obstack, string);
 855                 /* TODO */
 856                 return;
 857         } else if (input.c == '\'' && string[0] == 'L' && string[1] == '\0') {
 858                 obstack_free(&symbol_obstack, string);
 859                 parse_wide_character_constant();
 860                 return;
 861         }
 862
 863         symbol_t *symbol = symbol_table_insert(string);
 864
 865         pp_token.type   = symbol->pp_ID;
 866         pp_token.symbol = symbol;
 867
 868         /* we can free the memory from symbol obstack if we already had an entry in
 869          * the symbol table */
 870         if (symbol->string != string) {
 871                 obstack_free(&symbol_obstack, string);
 872         }
 873         if (!do_expansions)
 874                 return;
 875
 876         pp_definition_t *pp_definition = symbol->pp_definition;
 877         if (pp_definition == NULL)
 878                 return;
 879
 880         if (pp_definition->has_parameters) {
 881                 skip_spaces(true);
 882                 /* no opening brace -> no expansion */
 883                 if (input.c != '(')
 884                         return;
 885                 next_preprocessing_token();
 886                 eat_pp('(');
 887
 888                 /* parse arguments (TODO) */
 889                 while (pp_token.type != TP_EOF && pp_token.type != ')')
 890                         next_preprocessing_token();
 891                 next_preprocessing_token();
 892         }
 893
 894         pp_definition->expand_pos   = 0;
 895         pp_definition->is_expanding = true,
 896         current_expansion           = pp_definition;
 897         expand_next();
 898 }
 899
 900 static void parse_number(void)
 901 {
 902         obstack_1grow(&symbol_obstack, (char) input.c);
 903         next_char();
 904
 905         while (true) {
 906                 switch (input.c) {
 907                 case '.':
 908                 DIGITS
 909                 SYMBOL_CHARS_WITHOUT_E_P
 910                         obstack_1grow(&symbol_obstack, (char) input.c);
 911                         next_char();
 912                         break;
 913
 914                 case 'e':
 915                 case 'p':
 916                 case 'E':
 917                 case 'P':
 918                         obstack_1grow(&symbol_obstack, (char) input.c);
 919                         next_char();
 920                         if (input.c == '+' || input.c == '-') {
 921                                 obstack_1grow(&symbol_obstack, (char) input.c);
 922                                 next_char();
 923                         }
 924                         break;
 925
 926                 default:
 927                         goto end_number;
 928                 }
 929         }
 930
 931 end_number:
 932         obstack_1grow(&symbol_obstack, '\0');
 933         size_t  size   = obstack_object_size(&symbol_obstack);
 934         char   *string = obstack_finish(&symbol_obstack);
 935
 936         pp_token.type          = TP_NUMBER;
 937         pp_token.literal.begin = string;
 938         pp_token.literal.size  = size;
 939 }
 940
 941
 942 #define MAYBE_PROLOG                                       \
 943                         next_char();                                   \
 944                         while (true) {                                 \
 945                                 switch (input.c) {
 946
 947 #define MAYBE(ch, set_type)                                \
 948                                 case ch:                                   \
 949                                         next_char();                           \
 950                                         pp_token.type = set_type;              \
 951                                         return;
 952
 953 #define ELSE_CODE(code)                                    \
 954                                 default:                                   \
 955                                         code                                   \
 956                                         return;                                \
 957                                 }                                          \
 958                         }
 959
 960 #define ELSE(set_type)                                     \
 961                 ELSE_CODE(                                         \
 962                         pp_token.type = set_type;                      \
 963                 )
 964
 965 static void next_preprocessing_token(void)
 966 {
 967         if (current_expansion != NULL) {
 968                 expand_next();
 969                 return;
 970         }
 971
 972         pp_token.source_position = input.position;
 973
 974 restart:
 975         switch (input.c) {
 976         case ' ':
 977         case '\t':
 978                 if (do_print_spaces)
 979                         counted_spaces++;
 980                 next_char();
 981                 goto restart;
 982
 983         MATCH_NEWLINE(
 984                 counted_newlines++;
 985                 counted_spaces = 0;
 986                 pp_token.type = '\n';
 987                 return;
 988         )
 989
 990         SYMBOL_CHARS
 991                 parse_symbol();
 992                 return;
 993
 994         DIGITS
 995                 parse_number();
 996                 return;
 997
 998         case '"':
 999                 parse_string_literal();
1000                 return;
1001
1002         case '\'':
1003                 parse_character_constant();
1004                 return;
1005
1006         case '.':
1007                 MAYBE_PROLOG
1008                         case '0':
1009                         case '1':
1010                         case '2':
1011                         case '3':
1012                         case '4':
1013                         case '5':
1014                         case '6':
1015                         case '7':
1016                         case '8':
1017                         case '9':
1018                                 put_back(input.c);
1019                                 input.c = '.';
1020                                 parse_number();
1021                                 return;
1022
1023                         case '.':
1024                                 MAYBE_PROLOG
1025                                 MAYBE('.', TP_DOTDOTDOT)
1026                                 ELSE_CODE(
1027                                         put_back(input.c);
1028                                         input.c = '.';
1029                                         pp_token.type = '.';
1030                                 )
1031                 ELSE('.')
1032         case '&':
1033                 MAYBE_PROLOG
1034                 MAYBE('&', TP_ANDAND)
1035                 MAYBE('=', TP_ANDEQUAL)
1036                 ELSE('&')
1037         case '*':
1038                 MAYBE_PROLOG
1039                 MAYBE('=', TP_ASTERISKEQUAL)
1040                 ELSE('*')
1041         case '+':
1042                 MAYBE_PROLOG
1043                 MAYBE('+', TP_PLUSPLUS)
1044                 MAYBE('=', TP_PLUSEQUAL)
1045                 ELSE('+')
1046         case '-':
1047                 MAYBE_PROLOG
1048                 MAYBE('>', TP_MINUSGREATER)
1049                 MAYBE('-', TP_MINUSMINUS)
1050                 MAYBE('=', TP_MINUSEQUAL)
1051                 ELSE('-')
1052         case '!':
1053                 MAYBE_PROLOG
1054                 MAYBE('=', TP_EXCLAMATIONMARKEQUAL)
1055                 ELSE('!')
1056         case '/':
1057                 MAYBE_PROLOG
1058                 MAYBE('=', TP_SLASHEQUAL)
1059                         case '*':
1060                                 next_char();
1061                                 skip_multiline_comment();
1062                                 goto restart;
1063                         case '/':
1064                                 next_char();
1065                                 skip_line_comment();
1066                                 goto restart;
1067                 ELSE('/')
1068         case '%':
1069                 MAYBE_PROLOG
1070                 MAYBE('>', '}')
1071                 MAYBE('=', TP_PERCENTEQUAL)
1072                         case ':':
1073                                 MAYBE_PROLOG
1074                                         case '%':
1075                                                 MAYBE_PROLOG
1076                                                 MAYBE(':', TP_HASHHASH)
1077                                                 ELSE_CODE(
1078                                                         put_back(input.c);
1079                                                         input.c = '%';
1080                                                         pp_token.type = '#';
1081                                                 )
1082                                 ELSE('#')
1083                 ELSE('%')
1084         case '<':
1085                 MAYBE_PROLOG
1086                 MAYBE(':', '[')
1087                 MAYBE('%', '{')
1088                 MAYBE('=', TP_LESSEQUAL)
1089                         case '<':
1090                                 MAYBE_PROLOG
1091                                 MAYBE('=', TP_LESSLESSEQUAL)
1092                                 ELSE(TP_LESSLESS)
1093                 ELSE('<')
1094         case '>':
1095                 MAYBE_PROLOG
1096                 MAYBE('=', TP_GREATEREQUAL)
1097                         case '>':
1098                                 MAYBE_PROLOG
1099                                 MAYBE('=', TP_GREATERGREATEREQUAL)
1100                                 ELSE(TP_GREATERGREATER)
1101                 ELSE('>')
1102         case '^':
1103                 MAYBE_PROLOG
1104                 MAYBE('=', TP_CARETEQUAL)
1105                 ELSE('^')
1106         case '|':
1107                 MAYBE_PROLOG
1108                 MAYBE('=', TP_PIPEEQUAL)
1109                 MAYBE('|', TP_PIPEPIPE)
1110                 ELSE('|')
1111         case ':':
1112                 MAYBE_PROLOG
1113                 MAYBE('>', ']')
1114                 ELSE(':')
1115         case '=':
1116                 MAYBE_PROLOG
1117                 MAYBE('=', TP_EQUALEQUAL)
1118                 ELSE('=')
1119         case '#':
1120                 MAYBE_PROLOG
1121                 MAYBE('#', TP_HASHHASH)
1122                 ELSE('#')
1123
1124         case '?':
1125         case '[':
1126         case ']':
1127         case '(':
1128         case ')':
1129         case '{':
1130         case '}':
1131         case '~':
1132         case ';':
1133         case ',':
1134         case '\\':
1135                 pp_token.type = input.c;
1136                 next_char();
1137                 return;
1138
1139         case EOF:
1140                 if (input_stack != NULL) {
1141                         close_input();
1142                         pop_restore_input();
1143                         counted_newlines = 0;
1144                         counted_spaces   = 0;
1145                         /* hack to output correct line number */
1146                         print_line_directive(&input.position, "2");
1147                         next_preprocessing_token();
1148                 } else {
1149                         pp_token.type = TP_EOF;
1150                 }
1151                 return;
1152
1153         default:
1154                 next_char();
1155                 errorf(&pp_token.source_position, "unknown character '%c' found\n",
1156                        input.c);
1157                 pp_token.type = TP_ERROR;
1158                 return;
1159         }
1160 }
1161
1162 static void print_quoted_string(const char *const string)
1163 {
1164         fputc('"', out);
1165         for (const char *c = string; *c != 0; ++c) {
1166                 switch (*c) {
1167                 case '"': fputs("\\\"", out); break;
1168                 case '\\':  fputs("\\\\", out); break;
1169                 case '\a':  fputs("\\a", out); break;
1170                 case '\b':  fputs("\\b", out); break;
1171                 case '\f':  fputs("\\f", out); break;
1172                 case '\n':  fputs("\\n", out); break;
1173                 case '\r':  fputs("\\r", out); break;
1174                 case '\t':  fputs("\\t", out); break;
1175                 case '\v':  fputs("\\v", out); break;
1176                 case '\?':  fputs("\\?", out); break;
1177                 default:
1178                         if (!isprint(*c)) {
1179                                 fprintf(out, "\\%03o", (unsigned)*c);
1180                                 break;
1181                         }
1182                         fputc(*c, out);
1183                         break;
1184                 }
1185         }
1186         fputc('"', out);
1187 }
1188
1189 static void print_line_directive(const source_position_t *pos, const char *add)
1190 {
1191         fprintf(out, "# %u ", pos->lineno);
1192         print_quoted_string(pos->input_name);
1193         if (add != NULL) {
1194                 fputc(' ', out);
1195                 fputs(add, out);
1196         }
1197         fputc('\n', out);
1198
1199         printed_input_name = pos->input_name;
1200 }
1201
1202 static void print_spaces(void)
1203 {
1204         if (counted_newlines >= 9) {
1205                 if (input.had_non_space) {
1206                         fputc('\n', out);
1207                 }
1208                 print_line_directive(&pp_token.source_position, NULL);
1209                 counted_newlines = 0;
1210         } else {
1211                 for (unsigned i = 0; i < counted_newlines; ++i)
1212                         fputc('\n', out);
1213                 counted_newlines = 0;
1214         }
1215         for (unsigned i = 0; i < counted_spaces; ++i)
1216                 fputc(' ', out);
1217         counted_spaces = 0;
1218 }
1219
1220 static void emit_pp_token(void)
1221 {
1222         if (skip_mode)
1223                 return;
1224
1225         if (pp_token.type != '\n') {
1226                 print_spaces();
1227                 input.had_non_space = true;
1228         }
1229
1230         switch (pp_token.type) {
1231         case TP_IDENTIFIER:
1232                 fputs(pp_token.symbol->string, out);
1233                 break;
1234         case TP_NUMBER:
1235                 fputs(pp_token.literal.begin, out);
1236                 break;
1237         case TP_STRING_LITERAL:
1238                 fputc('"', out);
1239                 fputs(pp_token.literal.begin, out);
1240                 fputc('"', out);
1241                 break;
1242         case '\n':
1243                 break;
1244         default:
1245                 print_pp_token_type(out, pp_token.type);
1246                 break;
1247         }
1248 }
1249
1250 static void eat_pp_directive(void)
1251 {
1252         while (pp_token.type != '\n' && pp_token.type != TP_EOF) {
1253                 next_preprocessing_token();
1254         }
1255 }
1256
1257 static bool strings_equal(const string_t *string1, const string_t *string2)
1258 {
1259         size_t size = string1->size;
1260         if (size != string2->size)
1261                 return false;
1262
1263         const char *c1 = string1->begin;
1264         const char *c2 = string2->begin;
1265         for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1266                 if (*c1 != *c2)
1267                         return false;
1268         }
1269         return true;
1270 }
1271
1272 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1273 {
1274         if (token1->type != token2->type)
1275                 return false;
1276
1277         switch (token1->type) {
1278         case TP_HEADERNAME:
1279                 /* TODO */
1280                 return false;
1281         case TP_IDENTIFIER:
1282                 return token1->symbol == token2->symbol;
1283         case TP_NUMBER:
1284         case TP_CHARACTER_CONSTANT:
1285         case TP_STRING_LITERAL:
1286                 return strings_equal(&token1->literal, &token2->literal);
1287
1288         default:
1289                 return true;
1290         }
1291 }
1292
1293 static bool pp_definitions_equal(const pp_definition_t *definition1,
1294                                  const pp_definition_t *definition2)
1295 {
1296         if (definition1->list_len != definition2->list_len)
1297                 return false;
1298
1299         size_t         len = definition1->list_len;
1300         const token_t *t1  = definition1->token_list;
1301         const token_t *t2  = definition2->token_list;
1302         for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1303                 if (!pp_tokens_equal(t1, t2))
1304                         return false;
1305         }
1306         return true;
1307 }
1308
1309 static void parse_define_directive(void)
1310 {
1311         eat_pp(TP_define);
1312         assert(obstack_object_size(&pp_obstack) == 0);
1313
1314         if (pp_token.type != TP_IDENTIFIER) {
1315                 errorf(&pp_token.source_position,
1316                        "expected identifier after #define, got '%t'", &pp_token);
1317                 goto error_out;
1318         }
1319         symbol_t *symbol = pp_token.symbol;
1320
1321         pp_definition_t *new_definition
1322                 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1323         memset(new_definition, 0, sizeof(new_definition[0]));
1324         new_definition->source_position = input.position;
1325
1326         /* this is probably the only place where spaces are significant in the
1327          * lexer (except for the fact that they separate tokens). #define b(x)
1328          * is something else than #define b (x) */
1329         if (input.c == '(') {
1330                 /* eat the '(' */
1331                 next_preprocessing_token();
1332                 /* get next token after '(' */
1333                 next_preprocessing_token();
1334
1335                 while (true) {
1336                         switch (pp_token.type) {
1337                         case TP_DOTDOTDOT:
1338                                 new_definition->is_variadic = true;
1339                                 next_preprocessing_token();
1340                                 if (pp_token.type != ')') {
1341                                         errorf(&input.position,
1342                                                         "'...' not at end of macro argument list");
1343                                         goto error_out;
1344                                 }
1345                                 break;
1346                         case TP_IDENTIFIER:
1347                                 obstack_ptr_grow(&pp_obstack, pp_token.symbol);
1348                                 next_preprocessing_token();
1349
1350                                 if (pp_token.type == ',') {
1351                                         next_preprocessing_token();
1352                                         break;
1353                                 }
1354
1355                                 if (pp_token.type != ')') {
1356                                         errorf(&pp_token.source_position,
1357                                                "expected ',' or ')' after identifier, got '%t'",
1358                                                &pp_token);
1359                                         goto error_out;
1360                                 }
1361                                 break;
1362                         case ')':
1363                                 next_preprocessing_token();
1364                                 goto finish_argument_list;
1365                         default:
1366                                 errorf(&pp_token.source_position,
1367                                        "expected identifier, '...' or ')' in #define argument list, got '%t'",
1368                                        &pp_token);
1369                                 goto error_out;
1370                         }
1371                 }
1372
1373         finish_argument_list:
1374                 new_definition->has_parameters = true;
1375                 new_definition->n_parameters
1376                         = obstack_object_size(&pp_obstack) / sizeof(new_definition->parameters[0]);
1377                 new_definition->parameters = obstack_finish(&pp_obstack);
1378         } else {
1379                 next_preprocessing_token();
1380         }
1381
1382         /* construct a new pp_definition on the obstack */
1383         assert(obstack_object_size(&pp_obstack) == 0);
1384         size_t list_len = 0;
1385         while (pp_token.type != '\n' && pp_token.type != TP_EOF) {
1386                 obstack_grow(&pp_obstack, &pp_token, sizeof(pp_token));
1387                 ++list_len;
1388                 next_preprocessing_token();
1389         }
1390
1391         new_definition->list_len   = list_len;
1392         new_definition->token_list = obstack_finish(&pp_obstack);
1393
1394         pp_definition_t *old_definition = symbol->pp_definition;
1395         if (old_definition != NULL) {
1396                 if (!pp_definitions_equal(old_definition, new_definition)) {
1397                         warningf(WARN_OTHER, &input.position, "multiple definition of macro '%Y' (first defined %P)", symbol, &old_definition->source_position);
1398                 } else {
1399                         /* reuse the old definition */
1400                         obstack_free(&pp_obstack, new_definition);
1401                         new_definition = old_definition;
1402                 }
1403         }
1404
1405         symbol->pp_definition = new_definition;
1406         return;
1407
1408 error_out:
1409         if (obstack_object_size(&pp_obstack) > 0) {
1410                 char *ptr = obstack_finish(&pp_obstack);
1411                 obstack_free(&pp_obstack, ptr);
1412         }
1413         eat_pp_directive();
1414 }
1415
1416 static void parse_undef_directive(void)
1417 {
1418         eat_pp(TP_undef);
1419
1420         if (pp_token.type != TP_IDENTIFIER) {
1421                 errorf(&input.position,
1422                        "expected identifier after #undef, got '%t'", &pp_token);
1423                 eat_pp_directive();
1424                 return;
1425         }
1426
1427         symbol_t *symbol = pp_token.symbol;
1428         symbol->pp_definition = NULL;
1429         next_preprocessing_token();
1430
1431         if (pp_token.type != '\n') {
1432                 warningf(WARN_OTHER, &input.position, "extra tokens at end of #undef directive");
1433         }
1434         /* eat until '\n' */
1435         eat_pp_directive();
1436 }
1437
1438 static const char *parse_headername(void)
1439 {
1440         /* behind an #include we can have the special headername lexems.
1441          * They're only allowed behind an #include so they're not recognized
1442          * by the normal next_preprocessing_token. We handle them as a special
1443          * exception here */
1444
1445         /* skip spaces so we reach start of next preprocessing token */
1446         skip_spaces(false);
1447
1448         assert(obstack_object_size(&input_obstack) == 0);
1449
1450         /* check wether we have a "... or <... headername */
1451         switch (input.c) {
1452         case '<':
1453                 next_char();
1454                 while (true) {
1455                         switch (input.c) {
1456                         case EOF:
1457                                 /* fallthrough */
1458                         MATCH_NEWLINE(
1459                                 parse_error("header name without closing '>'");
1460                                 return NULL;
1461                         )
1462                         case '>':
1463                                 next_char();
1464                                 goto finished_headername;
1465                         }
1466                         obstack_1grow(&input_obstack, (char) input.c);
1467                         next_char();
1468                 }
1469                 /* we should never be here */
1470
1471         case '"':
1472                 next_char();
1473                 while (true) {
1474                         switch (input.c) {
1475                         case EOF:
1476                                 /* fallthrough */
1477                         MATCH_NEWLINE(
1478                                 parse_error("header name without closing '>'");
1479                                 return NULL;
1480                         )
1481                         case '"':
1482                                 next_char();
1483                                 goto finished_headername;
1484                         }
1485                         obstack_1grow(&input_obstack, (char) input.c);
1486                         next_char();
1487                 }
1488                 /* we should never be here */
1489
1490         default:
1491                 /* TODO: do normale pp_token parsing and concatenate results */
1492                 panic("pp_token concat include not implemented yet");
1493         }
1494
1495 finished_headername:
1496         obstack_1grow(&input_obstack, '\0');
1497         char *headername = obstack_finish(&input_obstack);
1498
1499         /* TODO: iterate search-path to find the file */
1500
1501         next_preprocessing_token();
1502
1503         return headername;
1504 }
1505
1506 static bool parse_include_directive(void)
1507 {
1508         /* don't eat the TP_include here!
1509          * we need an alternative parsing for the next token */
1510
1511         print_spaces();
1512
1513         const char *headername = parse_headername();
1514         if (headername == NULL) {
1515                 eat_pp_directive();
1516                 return false;
1517         }
1518
1519         if (pp_token.type != '\n' && pp_token.type != TP_EOF) {
1520                 warningf(WARN_OTHER, &pp_token.source_position, "extra tokens at end of #include directive");
1521                 eat_pp_directive();
1522         }
1523
1524         if (n_inputs > INCLUDE_LIMIT) {
1525                 errorf(&pp_token.source_position, "#include nested too deeply");
1526                 /* eat \n or EOF */
1527                 next_preprocessing_token();
1528                 return false;
1529         }
1530
1531         /* we have to reenable space counting and macro expansion here,
1532          * because it is still disabled in directive parsing,
1533          * but we will trigger a preprocessing token reading of the new file
1534          * now and need expansions/space counting */
1535         do_print_spaces = true;
1536         do_expansions   = true;
1537
1538         /* switch inputs */
1539         push_input();
1540         bool res = open_input(headername);
1541         if (!res) {
1542                 errorf(&pp_token.source_position,
1543                        "failed including '%s': %s", headername, strerror(errno));
1544                 pop_restore_input();
1545                 return false;
1546         }
1547
1548         return true;
1549 }
1550
1551 static pp_conditional_t *push_conditional(void)
1552 {
1553         pp_conditional_t *conditional
1554                 = obstack_alloc(&pp_obstack, sizeof(*conditional));
1555         memset(conditional, 0, sizeof(*conditional));
1556
1557         conditional->parent = conditional_stack;
1558         conditional_stack   = conditional;
1559
1560         return conditional;
1561 }
1562
1563 static void pop_conditional(void)
1564 {
1565         assert(conditional_stack != NULL);
1566         conditional_stack = conditional_stack->parent;
1567 }
1568
1569 static void check_unclosed_conditionals(void)
1570 {
1571         while (conditional_stack != NULL) {
1572                 pp_conditional_t *conditional = conditional_stack;
1573
1574                 if (conditional->in_else) {
1575                         errorf(&conditional->source_position, "unterminated #else");
1576                 } else {
1577                         errorf(&conditional->source_position, "unterminated condition");
1578                 }
1579                 pop_conditional();
1580         }
1581 }
1582
1583 static void parse_ifdef_ifndef_directive(void)
1584 {
1585         bool is_ifndef = (pp_token.type == TP_ifndef);
1586         bool condition;
1587         next_preprocessing_token();
1588
1589         if (skip_mode) {
1590                 eat_pp_directive();
1591                 pp_conditional_t *conditional = push_conditional();
1592                 conditional->source_position  = pp_token.source_position;
1593                 conditional->skip             = true;
1594                 return;
1595         }
1596
1597         if (pp_token.type != TP_IDENTIFIER) {
1598                 errorf(&pp_token.source_position,
1599                        "expected identifier after #%s, got '%t'",
1600                        is_ifndef ? "ifndef" : "ifdef", &pp_token);
1601                 eat_pp_directive();
1602
1603                 /* just take the true case in the hope to avoid further errors */
1604                 condition = true;
1605         } else {
1606                 symbol_t        *symbol        = pp_token.symbol;
1607                 pp_definition_t *pp_definition = symbol->pp_definition;
1608                 next_preprocessing_token();
1609
1610                 if (pp_token.type != '\n') {
1611                         errorf(&pp_token.source_position,
1612                                "extra tokens at end of #%s",
1613                                is_ifndef ? "ifndef" : "ifdef");
1614                         eat_pp_directive();
1615                 }
1616
1617                 /* evaluate wether we are in true or false case */
1618                 condition = is_ifndef ? pp_definition == NULL : pp_definition != NULL;
1619         }
1620
1621         pp_conditional_t *conditional = push_conditional();
1622         conditional->source_position  = pp_token.source_position;
1623         conditional->condition        = condition;
1624
1625         if (!condition) {
1626                 skip_mode = true;
1627         }
1628 }
1629
1630 static void parse_else_directive(void)
1631 {
1632         eat_pp(TP_else);
1633
1634         if (pp_token.type != '\n') {
1635                 if (!skip_mode) {
1636                         warningf(WARN_OTHER, &pp_token.source_position, "extra tokens at end of #else");
1637                 }
1638                 eat_pp_directive();
1639         }
1640
1641         pp_conditional_t *conditional = conditional_stack;
1642         if (conditional == NULL) {
1643                 errorf(&pp_token.source_position, "#else without prior #if");
1644                 return;
1645         }
1646
1647         if (conditional->in_else) {
1648                 errorf(&pp_token.source_position,
1649                        "#else after #else (condition started %P)",
1650                        conditional->source_position);
1651                 skip_mode = true;
1652                 return;
1653         }
1654
1655         conditional->in_else = true;
1656         if (!conditional->skip) {
1657                 skip_mode = conditional->condition;
1658         }
1659         conditional->source_position = pp_token.source_position;
1660 }
1661
1662 static void parse_endif_directive(void)
1663 {
1664         eat_pp(TP_endif);
1665
1666         if (pp_token.type != '\n') {
1667                 if (!skip_mode) {
1668                         warningf(WARN_OTHER, &pp_token.source_position, "extra tokens at end of #endif");
1669                 }
1670                 eat_pp_directive();
1671         }
1672
1673         pp_conditional_t *conditional = conditional_stack;
1674         if (conditional == NULL) {
1675                 errorf(&pp_token.source_position, "#endif without prior #if");
1676                 return;
1677         }
1678
1679         if (!conditional->skip) {
1680                 skip_mode = false;
1681         }
1682         pop_conditional();
1683 }
1684
1685 static void parse_preprocessing_directive(void)
1686 {
1687         do_print_spaces = false;
1688         do_expansions   = false;
1689         eat_pp('#');
1690
1691         if (skip_mode) {
1692                 switch (pp_token.type) {
1693                 case TP_ifdef:
1694                 case TP_ifndef:
1695                         parse_ifdef_ifndef_directive();
1696                         break;
1697                 case TP_else:
1698                         parse_else_directive();
1699                         break;
1700                 case TP_endif:
1701                         parse_endif_directive();
1702                         break;
1703                 default:
1704                         eat_pp_directive();
1705                         break;
1706                 }
1707         } else {
1708                 switch (pp_token.type) {
1709                 case TP_define:
1710                         parse_define_directive();
1711                         break;
1712                 case TP_undef:
1713                         parse_undef_directive();
1714                         break;
1715                 case TP_ifdef:
1716                 case TP_ifndef:
1717                         parse_ifdef_ifndef_directive();
1718                         break;
1719                 case TP_else:
1720                         parse_else_directive();
1721                         break;
1722                 case TP_endif:
1723                         parse_endif_directive();
1724                         break;
1725                 case TP_include: {
1726                         bool in_new_source = parse_include_directive();
1727                         /* no need to do anything if source file switched */
1728                         if (in_new_source)
1729                                 return;
1730                         break;
1731                 }
1732                 case '\n':
1733                         /* the nop directive */
1734                         break;
1735                 default:
1736                         errorf(&pp_token.source_position,
1737                                    "invalid preprocessing directive #%t", &pp_token);
1738                         eat_pp_directive();
1739                         break;
1740                 }
1741         }
1742
1743         do_print_spaces = true;
1744         do_expansions   = true;
1745
1746         /* eat '\n' */
1747         assert(pp_token.type == '\n' || pp_token.type == TP_EOF);
1748         next_preprocessing_token();
1749 }
1750
1751 #define GCC_COMPAT_MODE
1752
1753 int pptest_main(int argc, char **argv);
1754 int pptest_main(int argc, char **argv)
1755 {
1756         init_symbol_table();
1757         init_tokens();
1758
1759         obstack_init(&pp_obstack);
1760         obstack_init(&input_obstack);
1761
1762         const char *filename = "t.c";
1763         if (argc > 1)
1764                 filename = argv[1];
1765
1766         out = stdout;
1767
1768 #ifdef GCC_COMPAT_MODE
1769         /* this is here so we can directly compare "gcc -E" output and our output */
1770         fprintf(out, "# 1 \"%s\"\n", filename);
1771         fputs("# 1 \"<built-in>\"\n", out);
1772         fputs("# 1 \"<command-line>\"\n", out);
1773 #endif
1774
1775         bool ok = open_input(filename);
1776         assert(ok);
1777
1778         while (true) {
1779                 /* we're at a line begin */
1780                 if (pp_token.type == '#') {
1781                         parse_preprocessing_directive();
1782                 } else {
1783                         /* parse+emit a line */
1784                         while (pp_token.type != '\n') {
1785                                 if (pp_token.type == TP_EOF)
1786                                         goto end_of_main_loop;
1787                                 emit_pp_token();
1788                                 next_preprocessing_token();
1789                         }
1790                         emit_pp_token();
1791                         next_preprocessing_token();
1792                 }
1793         }
1794 end_of_main_loop:
1795
1796         check_unclosed_conditionals();
1797         close_input();
1798
1799         obstack_free(&input_obstack, NULL);
1800         obstack_free(&pp_obstack, NULL);
1801
1802         exit_tokens();
1803         exit_symbol_table();
1804
1805         return 0;
1806 }