nsz Git - cparser/blob - preprocessor.c

   1 #include <config.h>
   2
   3 #include "token_t.h"
   4 #include "symbol_t.h"
   5 #include "adt/util.h"
   6 #include "adt/error.h"
   7 #include "lang_features.h"
   8 #include "diagnostic.h"
   9 #include "string_rep.h"
  10
  11 #include <assert.h>
  12 #include <errno.h>
  13 #include <string.h>
  14 #include <stdbool.h>
  15 #include <ctype.h>
  16
  17 //#define DEBUG_CHARS
  18 #define MAX_PUTBACK 3
  19 #define INCLUDE_LIMIT 199  /* 199 is for gcc "compatibility" */
  20
  21 struct pp_argument_t {
  22         size_t   list_len;
  23         token_t *token_list;
  24 };
  25
  26 struct pp_definition_t {
  27         symbol_t          *symbol;
  28         source_position_t  source_position;
  29         pp_definition_t   *parent_expansion;
  30         size_t             expand_pos;
  31         bool               is_variadic    : 1;
  32         bool               is_expanding   : 1;
  33         bool               has_parameters : 1;
  34         size_t             n_parameters;
  35         symbol_t          *parameters;
  36
  37         /* replacement */
  38         size_t             list_len;
  39         token_t           *token_list;
  40
  41 };
  42
  43 typedef struct pp_conditional_t pp_conditional_t;
  44 struct pp_conditional_t {
  45         source_position_t  source_position;
  46         bool               condition;
  47         bool               in_else;
  48         bool               skip; /**< conditional in skip mode (then+else gets skipped) */
  49         pp_conditional_t  *parent;
  50 };
  51
  52 typedef struct pp_input_t pp_input_t;
  53 struct pp_input_t {
  54         FILE              *file;
  55         int                c;
  56         char               buf[1024+MAX_PUTBACK];
  57         const char        *bufend;
  58         const char        *bufpos;
  59         source_position_t  position;
  60         bool               had_non_space;
  61         pp_input_t        *parent;
  62 };
  63
  64 static pp_input_t input;
  65 #define CC input.c
  66
  67 static pp_input_t     *input_stack;
  68 static unsigned        n_inputs;
  69 static struct obstack  input_obstack;
  70
  71 static pp_conditional_t *conditional_stack;
  72
  73 static token_t            pp_token;
  74 static bool               resolve_escape_sequences = false;
  75 static bool               do_print_spaces          = true;
  76 static bool               do_expansions;
  77 static bool               skip_mode;
  78 static FILE              *out;
  79 static struct obstack     pp_obstack;
  80 static unsigned           counted_newlines;
  81 static unsigned           counted_spaces;
  82 static const char        *printed_input_name = NULL;
  83 static pp_definition_t   *current_expansion  = NULL;
  84
  85 static inline void next_char(void);
  86 static void next_preprocessing_token(void);
  87 static void print_line_directive(const source_position_t *pos, const char *add);
  88
  89 static bool open_input(const char *filename)
  90 {
  91         FILE *file = fopen(filename, "r");
  92         if (file == NULL)
  93                 return false;
  94
  95         input.file                = file;
  96         input.bufend              = NULL;
  97         input.bufpos              = NULL;
  98         input.had_non_space       = false;
  99         input.position.input_name = filename;
 100         input.position.lineno     = 1;
 101
 102         /* indicate that we're at a new input */
 103         print_line_directive(&input.position, input_stack != NULL ? "1" : NULL);
 104
 105         counted_newlines = 0;
 106         counted_spaces   = 0;
 107
 108         /* read first char and first token */
 109         next_char();
 110         next_preprocessing_token();
 111
 112         return true;
 113 }
 114
 115 static void close_input(void)
 116 {
 117         /* ensure we have a newline at EOF */
 118         if (input.had_non_space) {
 119                 fputc('\n', out);
 120         }
 121
 122         assert(input.file != NULL);
 123
 124         fclose(input.file);
 125         input.file   = NULL;
 126         input.bufend = NULL;
 127         input.bufpos = NULL;
 128         input.c      = EOF;
 129 }
 130
 131 static void push_input(void)
 132 {
 133         pp_input_t *saved_input
 134                 = obstack_alloc(&input_obstack, sizeof(*saved_input));
 135
 136         memcpy(saved_input, &input, sizeof(*saved_input));
 137
 138         /* adjust buffer positions */
 139         if (input.bufpos != NULL)
 140                 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
 141         if (input.bufend != NULL)
 142                 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
 143
 144         saved_input->parent = input_stack;
 145         input_stack         = saved_input;
 146         ++n_inputs;
 147 }
 148
 149 static void pop_restore_input(void)
 150 {
 151         assert(n_inputs > 0);
 152         assert(input_stack != NULL);
 153
 154         pp_input_t *saved_input = input_stack;
 155
 156         memcpy(&input, saved_input, sizeof(input));
 157         input.parent = NULL;
 158
 159         /* adjust buffer positions */
 160         if (saved_input->bufpos != NULL)
 161                 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
 162         if (saved_input->bufend != NULL)
 163                 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
 164
 165         input_stack = saved_input->parent;
 166         obstack_free(&input_obstack, saved_input);
 167         --n_inputs;
 168 }
 169
 170 /**
 171  * Prints a parse error message at the current token.
 172  *
 173  * @param msg   the error message
 174  */
 175 static void parse_error(const char *msg)
 176 {
 177         errorf(&pp_token.source_position,  "%s", msg);
 178 }
 179
 180 static inline void next_real_char(void)
 181 {
 182         assert(input.bufpos <= input.bufend);
 183         if (input.bufpos >= input.bufend) {
 184                 size_t s = fread(input.buf + MAX_PUTBACK, 1,
 185                                  sizeof(input.buf) - MAX_PUTBACK, input.file);
 186                 if (s == 0) {
 187                         CC = EOF;
 188                         return;
 189                 }
 190                 input.bufpos = input.buf + MAX_PUTBACK;
 191                 input.bufend = input.buf + MAX_PUTBACK + s;
 192         }
 193         CC = *input.bufpos++;
 194 }
 195
 196 /**
 197  * Put a character back into the buffer.
 198  *
 199  * @param pc  the character to put back
 200  */
 201 static inline void put_back(int pc)
 202 {
 203         assert(input.bufpos > input.buf);
 204         *(--input.bufpos - input.buf + input.buf) = (char) pc;
 205
 206 #ifdef DEBUG_CHARS
 207         printf("putback '%c'\n", pc);
 208 #endif
 209 }
 210
 211 #define MATCH_NEWLINE(code)                   \
 212         case '\r':                                \
 213                 next_char();                          \
 214                 if(CC == '\n') {                      \
 215                         next_char();                      \
 216                 }                                     \
 217                 ++input.position.lineno;              \
 218                 code                                  \
 219         case '\n':                                \
 220                 next_char();                          \
 221                 ++input.position.lineno;              \
 222                 code
 223
 224 #define eat(c_type)  do { assert(CC == c_type); next_char(); } while(0)
 225
 226 static void maybe_concat_lines(void)
 227 {
 228         eat('\\');
 229
 230         switch(CC) {
 231         MATCH_NEWLINE(return;)
 232
 233         default:
 234                 break;
 235         }
 236
 237         put_back(CC);
 238         CC = '\\';
 239 }
 240
 241 /**
 242  * Set c to the next input character, ie.
 243  * after expanding trigraphs.
 244  */
 245 static inline void next_char(void)
 246 {
 247         next_real_char();
 248
 249         /* filter trigraphs and concatenated lines */
 250         if(UNLIKELY(CC == '\\')) {
 251                 maybe_concat_lines();
 252                 goto end_of_next_char;
 253         }
 254
 255         if(LIKELY(CC != '?'))
 256                 goto end_of_next_char;
 257
 258         next_real_char();
 259         if(LIKELY(CC != '?')) {
 260                 put_back(CC);
 261                 CC = '?';
 262                 goto end_of_next_char;
 263         }
 264
 265         next_real_char();
 266         switch(CC) {
 267         case '=': CC = '#'; break;
 268         case '(': CC = '['; break;
 269         case '/': CC = '\\'; maybe_concat_lines(); break;
 270         case ')': CC = ']'; break;
 271         case '\'': CC = '^'; break;
 272         case '<': CC = '{'; break;
 273         case '!': CC = '|'; break;
 274         case '>': CC = '}'; break;
 275         case '-': CC = '~'; break;
 276         default:
 277                 put_back(CC);
 278                 put_back('?');
 279                 CC = '?';
 280                 break;
 281         }
 282
 283 end_of_next_char:;
 284 #ifdef DEBUG_CHARS
 285         printf("nchar '%c'\n", CC);
 286 #endif
 287 }
 288
 289
 290
 291 /**
 292  * Returns true if the given char is a octal digit.
 293  *
 294  * @param char  the character to check
 295  */
 296 static inline bool is_octal_digit(int chr)
 297 {
 298         switch(chr) {
 299         case '0':
 300         case '1':
 301         case '2':
 302         case '3':
 303         case '4':
 304         case '5':
 305         case '6':
 306         case '7':
 307                 return true;
 308         default:
 309                 return false;
 310         }
 311 }
 312
 313 /**
 314  * Returns the value of a digit.
 315  * The only portable way to do it ...
 316  */
 317 static int digit_value(int digit)
 318 {
 319         switch (digit) {
 320         case '0': return 0;
 321         case '1': return 1;
 322         case '2': return 2;
 323         case '3': return 3;
 324         case '4': return 4;
 325         case '5': return 5;
 326         case '6': return 6;
 327         case '7': return 7;
 328         case '8': return 8;
 329         case '9': return 9;
 330         case 'a':
 331         case 'A': return 10;
 332         case 'b':
 333         case 'B': return 11;
 334         case 'c':
 335         case 'C': return 12;
 336         case 'd':
 337         case 'D': return 13;
 338         case 'e':
 339         case 'E': return 14;
 340         case 'f':
 341         case 'F': return 15;
 342         default:
 343                 panic("wrong character given");
 344         }
 345 }
 346
 347 /**
 348  * Parses an octal character sequence.
 349  *
 350  * @param first_digit  the already read first digit
 351  */
 352 static int parse_octal_sequence(const int first_digit)
 353 {
 354         assert(is_octal_digit(first_digit));
 355         int value = digit_value(first_digit);
 356         if (!is_octal_digit(CC)) return value;
 357         value = 8 * value + digit_value(CC);
 358         next_char();
 359         if (!is_octal_digit(CC)) return value;
 360         value = 8 * value + digit_value(CC);
 361         next_char();
 362
 363         if(char_is_signed) {
 364                 return (signed char) value;
 365         } else {
 366                 return (unsigned char) value;
 367         }
 368 }
 369
 370 /**
 371  * Parses a hex character sequence.
 372  */
 373 static int parse_hex_sequence(void)
 374 {
 375         int value = 0;
 376         while(isxdigit(CC)) {
 377                 value = 16 * value + digit_value(CC);
 378                 next_char();
 379         }
 380
 381         if(char_is_signed) {
 382                 return (signed char) value;
 383         } else {
 384                 return (unsigned char) value;
 385         }
 386 }
 387
 388 /**
 389  * Parse an escape sequence.
 390  */
 391 static int parse_escape_sequence(void)
 392 {
 393         eat('\\');
 394
 395         int ec = CC;
 396         next_char();
 397
 398         switch(ec) {
 399         case '"':  return '"';
 400         case '\'': return '\'';
 401         case '\\': return '\\';
 402         case '?': return '\?';
 403         case 'a': return '\a';
 404         case 'b': return '\b';
 405         case 'f': return '\f';
 406         case 'n': return '\n';
 407         case 'r': return '\r';
 408         case 't': return '\t';
 409         case 'v': return '\v';
 410         case 'x':
 411                 return parse_hex_sequence();
 412         case '0':
 413         case '1':
 414         case '2':
 415         case '3':
 416         case '4':
 417         case '5':
 418         case '6':
 419         case '7':
 420                 return parse_octal_sequence(ec);
 421         case EOF:
 422                 parse_error("reached end of file while parsing escape sequence");
 423                 return EOF;
 424         default:
 425                 parse_error("unknown escape sequence");
 426                 return EOF;
 427         }
 428 }
 429
 430 static void parse_string_literal(void)
 431 {
 432         const unsigned start_linenr = input.position.lineno;
 433
 434         eat('"');
 435
 436         int tc;
 437         while(1) {
 438                 switch(CC) {
 439                 case '\\':
 440                         if(resolve_escape_sequences) {
 441                                 tc = parse_escape_sequence();
 442                                 obstack_1grow(&symbol_obstack, (char) tc);
 443                         } else {
 444                                 obstack_1grow(&symbol_obstack, (char) CC);
 445                                 next_char();
 446                                 obstack_1grow(&symbol_obstack, (char) CC);
 447                                 next_char();
 448                         }
 449                         break;
 450
 451                 case EOF: {
 452                         source_position_t source_position;
 453                         source_position.input_name = pp_token.source_position.input_name;
 454                         source_position.lineno     = start_linenr;
 455                         errorf(&source_position, "string has no end");
 456                         pp_token.type = TP_ERROR;
 457                         return;
 458                 }
 459
 460                 case '"':
 461                         next_char();
 462                         goto end_of_string;
 463
 464                 default:
 465                         obstack_1grow(&symbol_obstack, (char) CC);
 466                         next_char();
 467                         break;
 468                 }
 469         }
 470
 471 end_of_string:
 472         /* add finishing 0 to the string */
 473         obstack_1grow(&symbol_obstack, '\0');
 474         const size_t      size   = (size_t)obstack_object_size(&symbol_obstack);
 475         const char *const string = obstack_finish(&symbol_obstack);
 476
 477 #if 0 /* TODO hash */
 478         /* check if there is already a copy of the string */
 479         result = strset_insert(&stringset, string);
 480         if(result != string) {
 481                 obstack_free(&symbol_obstack, string);
 482         }
 483 #else
 484         const char *const result = string;
 485 #endif
 486
 487         pp_token.type          = TP_STRING_LITERAL;
 488         pp_token.literal.begin = result;
 489         pp_token.literal.size  = size;
 490 }
 491
 492 static void parse_wide_character_constant(void)
 493 {
 494         eat('\'');
 495
 496         int found_char = 0;
 497         while(1) {
 498                 switch(CC) {
 499                 case '\\':
 500                         found_char = parse_escape_sequence();
 501                         break;
 502
 503                 MATCH_NEWLINE(
 504                         parse_error("newline while parsing character constant");
 505                         break;
 506                 )
 507
 508                 case '\'':
 509                         next_char();
 510                         goto end_of_wide_char_constant;
 511
 512                 case EOF:
 513                         parse_error("EOF while parsing character constant");
 514                         pp_token.type = TP_ERROR;
 515                         return;
 516
 517                 default:
 518                         if(found_char != 0) {
 519                                 parse_error("more than 1 characters in character "
 520                                             "constant");
 521                                 goto end_of_wide_char_constant;
 522                         } else {
 523                                 found_char = CC;
 524                                 next_char();
 525                         }
 526                         break;
 527                 }
 528         }
 529
 530 end_of_wide_char_constant:
 531         pp_token.type       = TP_WIDE_CHARACTER_CONSTANT;
 532         /* TODO... */
 533 }
 534
 535 static void parse_character_constant(void)
 536 {
 537         const unsigned start_linenr = input.position.lineno;
 538
 539         eat('\'');
 540
 541         int tc;
 542         while(1) {
 543                 switch(CC) {
 544                 case '\\':
 545                         tc = parse_escape_sequence();
 546                         obstack_1grow(&symbol_obstack, (char) tc);
 547                         break;
 548
 549                 MATCH_NEWLINE(
 550                         parse_error("newline while parsing character constant");
 551                         break;
 552                 )
 553
 554                 case EOF: {
 555                         source_position_t source_position;
 556                         source_position.input_name = pp_token.source_position.input_name;
 557                         source_position.lineno     = start_linenr;
 558                         errorf(&source_position, "EOF while parsing character constant");
 559                         pp_token.type = TP_ERROR;
 560                         return;
 561                 }
 562
 563                 case '\'':
 564                         next_char();
 565                         goto end_of_char_constant;
 566
 567                 default:
 568                         obstack_1grow(&symbol_obstack, (char) CC);
 569                         next_char();
 570                         break;
 571
 572                 }
 573         }
 574
 575 end_of_char_constant:;
 576         const size_t      size   = (size_t)obstack_object_size(&symbol_obstack);
 577         const char *const string = obstack_finish(&symbol_obstack);
 578
 579         pp_token.type          = TP_CHARACTER_CONSTANT;
 580         pp_token.literal.begin = string;
 581         pp_token.literal.size  = size;
 582 }
 583
 584 #define SYMBOL_CHARS_WITHOUT_E_P \
 585         case 'a': \
 586         case 'b': \
 587         case 'c': \
 588         case 'd': \
 589         case 'f': \
 590         case 'g': \
 591         case 'h': \
 592         case 'i': \
 593         case 'j': \
 594         case 'k': \
 595         case 'l': \
 596         case 'm': \
 597         case 'n': \
 598         case 'o': \
 599         case 'q': \
 600         case 'r': \
 601         case 's': \
 602         case 't': \
 603         case 'u': \
 604         case 'v': \
 605         case 'w': \
 606         case 'x': \
 607         case 'y': \
 608         case 'z': \
 609         case 'A': \
 610         case 'B': \
 611         case 'C': \
 612         case 'D': \
 613         case 'F': \
 614         case 'G': \
 615         case 'H': \
 616         case 'I': \
 617         case 'J': \
 618         case 'K': \
 619         case 'L': \
 620         case 'M': \
 621         case 'N': \
 622         case 'O': \
 623         case 'Q': \
 624         case 'R': \
 625         case 'S': \
 626         case 'T': \
 627         case 'U': \
 628         case 'V': \
 629         case 'W': \
 630         case 'X': \
 631         case 'Y': \
 632         case 'Z': \
 633         case '_':
 634
 635 #define SYMBOL_CHARS \
 636         SYMBOL_CHARS_WITHOUT_E_P \
 637         case 'e': \
 638         case 'p': \
 639         case 'E': \
 640         case 'P':
 641
 642 #define DIGITS \
 643         case '0':  \
 644         case '1':  \
 645         case '2':  \
 646         case '3':  \
 647         case '4':  \
 648         case '5':  \
 649         case '6':  \
 650         case '7':  \
 651         case '8':  \
 652         case '9':
 653
 654 /**
 655  * returns next final token from a preprocessor macro expansion
 656  */
 657 static void expand_next(void)
 658 {
 659         assert(current_expansion != NULL);
 660
 661         pp_definition_t *definition = current_expansion;
 662
 663 restart:
 664         if(definition->list_len == 0
 665                         || definition->expand_pos >= definition->list_len) {
 666                 /* we're finished with the current macro, move up 1 level in the
 667                  * expansion stack */
 668                 pp_definition_t *parent = definition->parent_expansion;
 669                 definition->parent_expansion = NULL;
 670                 definition->is_expanding     = false;
 671
 672                 /* it was the outermost expansion, parse normal pptoken */
 673                 if(parent == NULL) {
 674                         current_expansion = NULL;
 675                         next_preprocessing_token();
 676                         return;
 677                 }
 678                 definition        = parent;
 679                 current_expansion = definition;
 680                 goto restart;
 681         }
 682         pp_token = definition->token_list[definition->expand_pos];
 683         ++definition->expand_pos;
 684
 685         if(pp_token.type != TP_IDENTIFIER)
 686                 return;
 687
 688         /* if it was an identifier then we might need to expand again */
 689         pp_definition_t *symbol_definition = pp_token.symbol->pp_definition;
 690         if(symbol_definition != NULL && !symbol_definition->is_expanding) {
 691                 symbol_definition->parent_expansion = definition;
 692                 symbol_definition->expand_pos       = 0;
 693                 symbol_definition->is_expanding     = true;
 694                 definition                          = symbol_definition;
 695                 current_expansion                   = definition;
 696                 goto restart;
 697         }
 698 }
 699
 700 static void skip_line_comment(void)
 701 {
 702         if(do_print_spaces)
 703                 counted_spaces++;
 704
 705         while(1) {
 706                 switch(CC) {
 707                 case EOF:
 708                         return;
 709
 710                 case '\n':
 711                 case '\r':
 712                         return;
 713
 714                 default:
 715                         next_char();
 716                         break;
 717                 }
 718         }
 719 }
 720
 721 static void skip_multiline_comment(void)
 722 {
 723         if(do_print_spaces)
 724                 counted_spaces++;
 725
 726         unsigned start_linenr = input.position.lineno;
 727         while(1) {
 728                 switch(CC) {
 729                 case '/':
 730                         next_char();
 731                         if (CC == '*') {
 732                                 /* TODO: nested comment, warn here */
 733                         }
 734                         break;
 735                 case '*':
 736                         next_char();
 737                         if(CC == '/') {
 738                                 next_char();
 739                                 return;
 740                         }
 741                         break;
 742
 743                 MATCH_NEWLINE(
 744                         if(do_print_spaces) {
 745                                 counted_newlines++;
 746                                 counted_spaces = 0;
 747                         }
 748                         break;
 749                 )
 750
 751                 case EOF: {
 752                         source_position_t source_position;
 753                         source_position.input_name = pp_token.source_position.input_name;
 754                         source_position.lineno     = start_linenr;
 755                         errorf(&source_position, "at end of file while looking for comment end");
 756                         return;
 757                 }
 758
 759                 default:
 760                         next_char();
 761                         break;
 762                 }
 763         }
 764 }
 765
 766 /* skip spaces advancing at the start of the next preprocessing token */
 767 static void skip_spaces(bool skip_newline)
 768 {
 769         while (true) {
 770                 switch (CC) {
 771                 case ' ':
 772                 case '\t':
 773                         if (do_print_spaces)
 774                                 counted_spaces++;
 775                         next_char();
 776                         continue;
 777                 case '/':
 778                         next_char();
 779                         if (CC == '/') {
 780                                 next_char();
 781                                 skip_line_comment();
 782                                 continue;
 783                         } else if (CC == '*') {
 784                                 next_char();
 785                                 skip_multiline_comment();
 786                                 continue;
 787                         } else {
 788                                 put_back(CC);
 789                                 CC = '/';
 790                         }
 791                         return;
 792
 793                 case '\r':
 794                         if (!skip_newline)
 795                                 return;
 796
 797                         next_char();
 798                         if(CC == '\n') {
 799                                 next_char();
 800                         }
 801                         ++input.position.lineno;
 802                         if (do_print_spaces)
 803                                 ++counted_newlines;
 804                         continue;
 805
 806                 case '\n':
 807                         if (!skip_newline)
 808                                 return;
 809
 810                         next_char();
 811                         ++input.position.lineno;
 812                         if (do_print_spaces)
 813                                 ++counted_newlines;
 814                         continue;
 815
 816                 default:
 817                         return;
 818                 }
 819         }
 820 }
 821
 822 static void eat_pp(int type)
 823 {
 824         (void) type;
 825         assert(pp_token.type == type);
 826         next_preprocessing_token();
 827 }
 828
 829 static void parse_symbol(void)
 830 {
 831         obstack_1grow(&symbol_obstack, (char) CC);
 832         next_char();
 833
 834         while(1) {
 835                 switch(CC) {
 836                 DIGITS
 837                 SYMBOL_CHARS
 838                         obstack_1grow(&symbol_obstack, (char) CC);
 839                         next_char();
 840                         break;
 841
 842                 default:
 843                         goto end_symbol;
 844                 }
 845         }
 846
 847 end_symbol:
 848         obstack_1grow(&symbol_obstack, '\0');
 849         char *string = obstack_finish(&symbol_obstack);
 850
 851         /* might be a wide string or character constant ( L"string"/L'c' ) */
 852         if (CC == '"' && string[0] == 'L' && string[1] == '\0') {
 853                 obstack_free(&symbol_obstack, string);
 854                 /* TODO */
 855                 return;
 856         } else if (CC == '\'' && string[0] == 'L' && string[1] == '\0') {
 857                 obstack_free(&symbol_obstack, string);
 858                 parse_wide_character_constant();
 859                 return;
 860         }
 861
 862         symbol_t *symbol = symbol_table_insert(string);
 863
 864         pp_token.type   = symbol->pp_ID;
 865         pp_token.symbol = symbol;
 866
 867         /* we can free the memory from symbol obstack if we already had an entry in
 868          * the symbol table */
 869         if (symbol->string != string) {
 870                 obstack_free(&symbol_obstack, string);
 871         }
 872         if (!do_expansions)
 873                 return;
 874
 875         pp_definition_t *pp_definition = symbol->pp_definition;
 876         if (pp_definition == NULL)
 877                 return;
 878
 879         if (pp_definition->has_parameters) {
 880                 skip_spaces(true);
 881                 /* no opening brace -> no expansion */
 882                 if (CC != '(')
 883                         return;
 884                 next_preprocessing_token();
 885                 eat_pp('(');
 886
 887                 /* parse arguments (TODO) */
 888                 while (pp_token.type != TP_EOF && pp_token.type != ')')
 889                         next_preprocessing_token();
 890                 next_preprocessing_token();
 891         }
 892
 893         pp_definition->expand_pos   = 0;
 894         pp_definition->is_expanding = true,
 895         current_expansion           = pp_definition;
 896         expand_next();
 897 }
 898
 899 static void parse_number(void)
 900 {
 901         obstack_1grow(&symbol_obstack, (char) CC);
 902         next_char();
 903
 904         while(1) {
 905                 switch(CC) {
 906                 case '.':
 907                 DIGITS
 908                 SYMBOL_CHARS_WITHOUT_E_P
 909                         obstack_1grow(&symbol_obstack, (char) CC);
 910                         next_char();
 911                         break;
 912
 913                 case 'e':
 914                 case 'p':
 915                 case 'E':
 916                 case 'P':
 917                         obstack_1grow(&symbol_obstack, (char) CC);
 918                         next_char();
 919                         if(CC == '+' || CC == '-') {
 920                                 obstack_1grow(&symbol_obstack, (char) CC);
 921                                 next_char();
 922                         }
 923                         break;
 924
 925                 default:
 926                         goto end_number;
 927                 }
 928         }
 929
 930 end_number:
 931         obstack_1grow(&symbol_obstack, '\0');
 932         size_t  size   = obstack_object_size(&symbol_obstack);
 933         char   *string = obstack_finish(&symbol_obstack);
 934
 935         pp_token.type          = TP_NUMBER;
 936         pp_token.literal.begin = string;
 937         pp_token.literal.size  = size;
 938 }
 939
 940
 941 #define MAYBE_PROLOG                                       \
 942                         next_char();                                   \
 943                         while(1) {                                     \
 944                                 switch(CC) {
 945
 946 #define MAYBE(ch, set_type)                                \
 947                                 case ch:                                   \
 948                                         next_char();                           \
 949                                         pp_token.type = set_type;              \
 950                                         return;
 951
 952 #define ELSE_CODE(code)                                    \
 953                                 default:                                   \
 954                                         code                                   \
 955                                         return;                                \
 956                                 }                                          \
 957                         } /* end of while(1) */                        \
 958
 959 #define ELSE(set_type)                                     \
 960                 ELSE_CODE(                                         \
 961                         pp_token.type = set_type;                      \
 962                 )
 963
 964 static void next_preprocessing_token(void)
 965 {
 966         if(current_expansion != NULL) {
 967                 expand_next();
 968                 return;
 969         }
 970
 971         pp_token.source_position = input.position;
 972
 973 restart:
 974         switch(CC) {
 975         case ' ':
 976         case '\t':
 977                 if (do_print_spaces)
 978                         counted_spaces++;
 979                 next_char();
 980                 goto restart;
 981
 982         MATCH_NEWLINE(
 983                 counted_newlines++;
 984                 counted_spaces = 0;
 985                 pp_token.type = '\n';
 986                 return;
 987         )
 988
 989         SYMBOL_CHARS
 990                 parse_symbol();
 991                 return;
 992
 993         DIGITS
 994                 parse_number();
 995                 return;
 996
 997         case '"':
 998                 parse_string_literal();
 999                 return;
1000
1001         case '\'':
1002                 parse_character_constant();
1003                 return;
1004
1005         case '.':
1006                 MAYBE_PROLOG
1007                         case '0':
1008                         case '1':
1009                         case '2':
1010                         case '3':
1011                         case '4':
1012                         case '5':
1013                         case '6':
1014                         case '7':
1015                         case '8':
1016                         case '9':
1017                                 put_back(CC);
1018                                 CC = '.';
1019                                 parse_number();
1020                                 return;
1021
1022                         case '.':
1023                                 MAYBE_PROLOG
1024                                 MAYBE('.', TP_DOTDOTDOT)
1025                                 ELSE_CODE(
1026                                         put_back(CC);
1027                                         CC = '.';
1028                                         pp_token.type = '.';
1029                                 )
1030                 ELSE('.')
1031         case '&':
1032                 MAYBE_PROLOG
1033                 MAYBE('&', TP_ANDAND)
1034                 MAYBE('=', TP_ANDEQUAL)
1035                 ELSE('&')
1036         case '*':
1037                 MAYBE_PROLOG
1038                 MAYBE('=', TP_ASTERISKEQUAL)
1039                 ELSE('*')
1040         case '+':
1041                 MAYBE_PROLOG
1042                 MAYBE('+', TP_PLUSPLUS)
1043                 MAYBE('=', TP_PLUSEQUAL)
1044                 ELSE('+')
1045         case '-':
1046                 MAYBE_PROLOG
1047                 MAYBE('>', TP_MINUSGREATER)
1048                 MAYBE('-', TP_MINUSMINUS)
1049                 MAYBE('=', TP_MINUSEQUAL)
1050                 ELSE('-')
1051         case '!':
1052                 MAYBE_PROLOG
1053                 MAYBE('=', TP_EXCLAMATIONMARKEQUAL)
1054                 ELSE('!')
1055         case '/':
1056                 MAYBE_PROLOG
1057                 MAYBE('=', TP_SLASHEQUAL)
1058                         case '*':
1059                                 next_char();
1060                                 skip_multiline_comment();
1061                                 goto restart;
1062                         case '/':
1063                                 next_char();
1064                                 skip_line_comment();
1065                                 goto restart;
1066                 ELSE('/')
1067         case '%':
1068                 MAYBE_PROLOG
1069                 MAYBE('>', '}')
1070                 MAYBE('=', TP_PERCENTEQUAL)
1071                         case ':':
1072                                 MAYBE_PROLOG
1073                                         case '%':
1074                                                 MAYBE_PROLOG
1075                                                 MAYBE(':', TP_HASHHASH)
1076                                                 ELSE_CODE(
1077                                                         put_back(CC);
1078                                                         CC = '%';
1079                                                         pp_token.type = '#';
1080                                                 )
1081                                 ELSE('#')
1082                 ELSE('%')
1083         case '<':
1084                 MAYBE_PROLOG
1085                 MAYBE(':', '[')
1086                 MAYBE('%', '{')
1087                 MAYBE('=', TP_LESSEQUAL)
1088                         case '<':
1089                                 MAYBE_PROLOG
1090                                 MAYBE('=', TP_LESSLESSEQUAL)
1091                                 ELSE(TP_LESSLESS)
1092                 ELSE('<')
1093         case '>':
1094                 MAYBE_PROLOG
1095                 MAYBE('=', TP_GREATEREQUAL)
1096                         case '>':
1097                                 MAYBE_PROLOG
1098                                 MAYBE('=', TP_GREATERGREATEREQUAL)
1099                                 ELSE(TP_GREATERGREATER)
1100                 ELSE('>')
1101         case '^':
1102                 MAYBE_PROLOG
1103                 MAYBE('=', TP_CARETEQUAL)
1104                 ELSE('^')
1105         case '|':
1106                 MAYBE_PROLOG
1107                 MAYBE('=', TP_PIPEEQUAL)
1108                 MAYBE('|', TP_PIPEPIPE)
1109                 ELSE('|')
1110         case ':':
1111                 MAYBE_PROLOG
1112                 MAYBE('>', ']')
1113                 ELSE(':')
1114         case '=':
1115                 MAYBE_PROLOG
1116                 MAYBE('=', TP_EQUALEQUAL)
1117                 ELSE('=')
1118         case '#':
1119                 MAYBE_PROLOG
1120                 MAYBE('#', TP_HASHHASH)
1121                 ELSE('#')
1122
1123         case '?':
1124         case '[':
1125         case ']':
1126         case '(':
1127         case ')':
1128         case '{':
1129         case '}':
1130         case '~':
1131         case ';':
1132         case ',':
1133         case '\\':
1134                 pp_token.type = CC;
1135                 next_char();
1136                 return;
1137
1138         case EOF:
1139                 if (input_stack != NULL) {
1140                         close_input();
1141                         pop_restore_input();
1142                         counted_newlines = 0;
1143                         counted_spaces   = 0;
1144                         /* hack to output correct line number */
1145                         print_line_directive(&input.position, "2");
1146                         next_preprocessing_token();
1147                 } else {
1148                         pp_token.type = TP_EOF;
1149                 }
1150                 return;
1151
1152         default:
1153                 next_char();
1154                 errorf(&pp_token.source_position, "unknown character '%c' found\n", CC);
1155                 pp_token.type = TP_ERROR;
1156                 return;
1157         }
1158 }
1159
1160 static void print_quoted_string(const char *const string)
1161 {
1162         fputc('"', out);
1163         for (const char *c = string; *c != 0; ++c) {
1164                 switch(*c) {
1165                 case '"': fputs("\\\"", out); break;
1166                 case '\\':  fputs("\\\\", out); break;
1167                 case '\a':  fputs("\\a", out); break;
1168                 case '\b':  fputs("\\b", out); break;
1169                 case '\f':  fputs("\\f", out); break;
1170                 case '\n':  fputs("\\n", out); break;
1171                 case '\r':  fputs("\\r", out); break;
1172                 case '\t':  fputs("\\t", out); break;
1173                 case '\v':  fputs("\\v", out); break;
1174                 case '\?':  fputs("\\?", out); break;
1175                 default:
1176                         if(!isprint(*c)) {
1177                                 fprintf(out, "\\%03o", (unsigned)*c);
1178                                 break;
1179                         }
1180                         fputc(*c, out);
1181                         break;
1182                 }
1183         }
1184         fputc('"', out);
1185 }
1186
1187 static void print_line_directive(const source_position_t *pos, const char *add)
1188 {
1189         fprintf(out, "# %u ", pos->lineno);
1190         print_quoted_string(pos->input_name);
1191         if (add != NULL) {
1192                 fputc(' ', out);
1193                 fputs(add, out);
1194         }
1195         fputc('\n', out);
1196
1197         printed_input_name = pos->input_name;
1198 }
1199
1200 static void print_spaces(void)
1201 {
1202         if (counted_newlines >= 9) {
1203                 if (input.had_non_space) {
1204                         fputc('\n', out);
1205                 }
1206                 print_line_directive(&pp_token.source_position, NULL);
1207                 counted_newlines = 0;
1208         } else {
1209                 for (unsigned i = 0; i < counted_newlines; ++i)
1210                         fputc('\n', out);
1211                 counted_newlines = 0;
1212         }
1213         for (unsigned i = 0; i < counted_spaces; ++i)
1214                 fputc(' ', out);
1215         counted_spaces = 0;
1216 }
1217
1218 static void emit_pp_token(void)
1219 {
1220         if (skip_mode)
1221                 return;
1222
1223         if (pp_token.type != '\n') {
1224                 print_spaces();
1225                 input.had_non_space = true;
1226         }
1227
1228         switch(pp_token.type) {
1229         case TP_IDENTIFIER:
1230                 fputs(pp_token.symbol->string, out);
1231                 break;
1232         case TP_NUMBER:
1233                 fputs(pp_token.literal.begin, out);
1234                 break;
1235         case TP_STRING_LITERAL:
1236                 fputc('"', out);
1237                 fputs(pp_token.literal.begin, out);
1238                 fputc('"', out);
1239                 break;
1240         case '\n':
1241                 break;
1242         default:
1243                 print_pp_token_type(out, pp_token.type);
1244                 break;
1245         }
1246 }
1247
1248 static void eat_pp_directive(void)
1249 {
1250         while(pp_token.type != '\n' && pp_token.type != TP_EOF) {
1251                 next_preprocessing_token();
1252         }
1253 }
1254
1255 static bool strings_equal(const string_t *string1, const string_t *string2)
1256 {
1257         size_t size = string1->size;
1258         if(size != string2->size)
1259                 return false;
1260
1261         const char *c1 = string1->begin;
1262         const char *c2 = string2->begin;
1263         for(size_t i = 0; i < size; ++i, ++c1, ++c2) {
1264                 if(*c1 != *c2)
1265                         return false;
1266         }
1267         return true;
1268 }
1269
1270 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1271 {
1272         if(token1->type != token2->type)
1273                 return false;
1274
1275         switch(token1->type) {
1276         case TP_HEADERNAME:
1277                 /* TODO */
1278                 return false;
1279         case TP_IDENTIFIER:
1280                 return token1->symbol == token2->symbol;
1281         case TP_NUMBER:
1282         case TP_CHARACTER_CONSTANT:
1283         case TP_STRING_LITERAL:
1284                 return strings_equal(&token1->literal, &token2->literal);
1285
1286         default:
1287                 return true;
1288         }
1289 }
1290
1291 static bool pp_definitions_equal(const pp_definition_t *definition1,
1292                                  const pp_definition_t *definition2)
1293 {
1294         if(definition1->list_len != definition2->list_len)
1295                 return false;
1296
1297         size_t         len = definition1->list_len;
1298         const token_t *t1  = definition1->token_list;
1299         const token_t *t2  = definition2->token_list;
1300         for(size_t i = 0; i < len; ++i, ++t1, ++t2) {
1301                 if(!pp_tokens_equal(t1, t2))
1302                         return false;
1303         }
1304         return true;
1305 }
1306
1307 static void parse_define_directive(void)
1308 {
1309         eat_pp(TP_define);
1310         assert(obstack_object_size(&pp_obstack) == 0);
1311
1312         if (pp_token.type != TP_IDENTIFIER) {
1313                 errorf(&pp_token.source_position,
1314                        "expected identifier after #define, got '%t'", &pp_token);
1315                 goto error_out;
1316         }
1317         symbol_t *symbol = pp_token.symbol;
1318
1319         pp_definition_t *new_definition
1320                 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1321         memset(new_definition, 0, sizeof(new_definition[0]));
1322         new_definition->source_position = input.position;
1323
1324         /* this is probably the only place where spaces are significant in the
1325          * lexer (except for the fact that they separate tokens). #define b(x)
1326          * is something else than #define b (x) */
1327         if (CC == '(') {
1328                 /* eat the '(' */
1329                 next_preprocessing_token();
1330                 /* get next token after '(' */
1331                 next_preprocessing_token();
1332
1333                 while (true) {
1334                         switch (pp_token.type) {
1335                         case TP_DOTDOTDOT:
1336                                 new_definition->is_variadic = true;
1337                                 next_preprocessing_token();
1338                                 if (pp_token.type != ')') {
1339                                         errorf(&input.position,
1340                                                         "'...' not at end of macro argument list");
1341                                         goto error_out;
1342                                 }
1343                                 break;
1344                         case TP_IDENTIFIER:
1345                                 obstack_ptr_grow(&pp_obstack, pp_token.symbol);
1346                                 next_preprocessing_token();
1347
1348                                 if (pp_token.type == ',') {
1349                                         next_preprocessing_token();
1350                                         break;
1351                                 }
1352
1353                                 if (pp_token.type != ')') {
1354                                         errorf(&pp_token.source_position,
1355                                                "expected ',' or ')' after identifier, got '%t'",
1356                                                &pp_token);
1357                                         goto error_out;
1358                                 }
1359                                 break;
1360                         case ')':
1361                                 next_preprocessing_token();
1362                                 goto finish_argument_list;
1363                         default:
1364                                 errorf(&pp_token.source_position,
1365                                        "expected identifier, '...' or ')' in #define argument list, got '%t'",
1366                                        &pp_token);
1367                                 goto error_out;
1368                         }
1369                 }
1370
1371         finish_argument_list:
1372                 new_definition->has_parameters = true;
1373                 new_definition->n_parameters
1374                         = obstack_object_size(&pp_obstack) / sizeof(new_definition->parameters[0]);
1375                 new_definition->parameters = obstack_finish(&pp_obstack);
1376         } else {
1377                 next_preprocessing_token();
1378         }
1379
1380         /* construct a new pp_definition on the obstack */
1381         assert(obstack_object_size(&pp_obstack) == 0);
1382         size_t list_len = 0;
1383         while (pp_token.type != '\n' && pp_token.type != TP_EOF) {
1384                 obstack_grow(&pp_obstack, &pp_token, sizeof(pp_token));
1385                 ++list_len;
1386                 next_preprocessing_token();
1387         }
1388
1389         new_definition->list_len   = list_len;
1390         new_definition->token_list = obstack_finish(&pp_obstack);
1391
1392         pp_definition_t *old_definition = symbol->pp_definition;
1393         if (old_definition != NULL) {
1394                 if (!pp_definitions_equal(old_definition, new_definition)) {
1395                         warningf(&input.position, "multiple definition of macro '%Y' (first defined %P)",
1396                                  symbol, &old_definition->source_position);
1397                 } else {
1398                         /* reuse the old definition */
1399                         obstack_free(&pp_obstack, new_definition);
1400                         new_definition = old_definition;
1401                 }
1402         }
1403
1404         symbol->pp_definition = new_definition;
1405         return;
1406
1407 error_out:
1408         if (obstack_object_size(&pp_obstack) > 0) {
1409                 char *ptr = obstack_finish(&pp_obstack);
1410                 obstack_free(&pp_obstack, ptr);
1411         }
1412         eat_pp_directive();
1413 }
1414
1415 static void parse_undef_directive(void)
1416 {
1417         eat_pp(TP_undef);
1418
1419         if(pp_token.type != TP_IDENTIFIER) {
1420                 errorf(&input.position,
1421                        "expected identifier after #undef, got '%t'", &pp_token);
1422                 eat_pp_directive();
1423                 return;
1424         }
1425
1426         symbol_t *symbol = pp_token.symbol;
1427         symbol->pp_definition = NULL;
1428         next_preprocessing_token();
1429
1430         if(pp_token.type != '\n') {
1431                 warningf(&input.position, "extra tokens at end of #undef directive");
1432         }
1433         /* eat until '\n' */
1434         eat_pp_directive();
1435 }
1436
1437 static const char *parse_headername(void)
1438 {
1439         /* behind an #include we can have the special headername lexems.
1440          * They're only allowed behind an #include so they're not recognized
1441          * by the normal next_preprocessing_token. We handle them as a special
1442          * exception here */
1443
1444         /* skip spaces so we reach start of next preprocessing token */
1445         skip_spaces(false);
1446
1447         assert(obstack_object_size(&input_obstack) == 0);
1448
1449         /* check wether we have a "... or <... headername */
1450         switch (CC) {
1451         case '<':
1452                 /* for now until we have proper searchpath handling */
1453                 obstack_1grow(&input_obstack, '.');
1454                 obstack_1grow(&input_obstack, '/');
1455
1456                 next_char();
1457                 while (true) {
1458                         switch (CC) {
1459                         case EOF:
1460                                 /* fallthrough */
1461                         MATCH_NEWLINE(
1462                                 parse_error("header name without closing '>'");
1463                                 return NULL;
1464                         )
1465                         case '>':
1466                                 next_char();
1467                                 goto finished_headername;
1468                         }
1469                         obstack_1grow(&input_obstack, (char) CC);
1470                         next_char();
1471                 }
1472                 /* we should never be here */
1473
1474         case '"':
1475                 /* for now until we have proper searchpath handling */
1476                 obstack_1grow(&input_obstack, '.');
1477                 obstack_1grow(&input_obstack, '/');
1478
1479                 next_char();
1480                 while (true) {
1481                         switch (CC) {
1482                         case EOF:
1483                                 /* fallthrough */
1484                         MATCH_NEWLINE(
1485                                 parse_error("header name without closing '>'");
1486                                 return NULL;
1487                         )
1488                         case '"':
1489                                 next_char();
1490                                 goto finished_headername;
1491                         }
1492                         obstack_1grow(&input_obstack, (char) CC);
1493                         next_char();
1494                 }
1495                 /* we should never be here */
1496
1497         default:
1498                 /* TODO: do normale pp_token parsing and concatenate results */
1499                 panic("pp_token concat include not implemented yet");
1500         }
1501
1502 finished_headername:
1503         obstack_1grow(&input_obstack, '\0');
1504         char *headername = obstack_finish(&input_obstack);
1505
1506         /* TODO: iterate search-path to find the file */
1507
1508         next_preprocessing_token();
1509
1510         return headername;
1511 }
1512
1513 static bool parse_include_directive(void)
1514 {
1515         /* don't eat the TP_include here!
1516          * we need an alternative parsing for the next token */
1517
1518         print_spaces();
1519
1520         const char *headername = parse_headername();
1521         if (headername == NULL) {
1522                 eat_pp_directive();
1523                 return false;
1524         }
1525
1526         if (pp_token.type != '\n' && pp_token.type != TP_EOF) {
1527                 warningf(&pp_token.source_position,
1528                          "extra tokens at end of #include directive");
1529                 eat_pp_directive();
1530         }
1531
1532         if (n_inputs > INCLUDE_LIMIT) {
1533                 errorf(&pp_token.source_position, "#include nested too deeply");
1534                 /* eat \n or EOF */
1535                 next_preprocessing_token();
1536                 return false;
1537         }
1538
1539         /* we have to reenable space counting and macro expansion here,
1540          * because it is still disabled in directive parsing,
1541          * but we will trigger a preprocessing token reading of the new file
1542          * now and need expansions/space counting */
1543         do_print_spaces = true;
1544         do_expansions   = true;
1545
1546         /* switch inputs */
1547         push_input();
1548         bool res = open_input(headername);
1549         if (!res) {
1550                 errorf(&pp_token.source_position,
1551                        "failed including '%s': %s", headername, strerror(errno));
1552                 pop_restore_input();
1553                 return false;
1554         }
1555
1556         return true;
1557 }
1558
1559 static pp_conditional_t *push_conditional(void)
1560 {
1561         pp_conditional_t *conditional
1562                 = obstack_alloc(&pp_obstack, sizeof(*conditional));
1563         memset(conditional, 0, sizeof(*conditional));
1564
1565         conditional->parent = conditional_stack;
1566         conditional_stack   = conditional;
1567
1568         return conditional;
1569 }
1570
1571 static void pop_conditional(void)
1572 {
1573         assert(conditional_stack != NULL);
1574         conditional_stack = conditional_stack->parent;
1575 }
1576
1577 static void check_unclosed_conditionals(void)
1578 {
1579         while (conditional_stack != NULL) {
1580                 pp_conditional_t *conditional = conditional_stack;
1581
1582                 if (conditional->in_else) {
1583                         errorf(&conditional->source_position, "unterminated #else");
1584                 } else {
1585                         errorf(&conditional->source_position, "unterminated condition");
1586                 }
1587                 pop_conditional();
1588         }
1589 }
1590
1591 static void parse_ifdef_ifndef_directive(void)
1592 {
1593         bool is_ifndef = (pp_token.type == TP_ifndef);
1594         bool condition;
1595         next_preprocessing_token();
1596
1597         if (skip_mode) {
1598                 eat_pp_directive();
1599                 pp_conditional_t *conditional = push_conditional();
1600                 conditional->source_position  = pp_token.source_position;
1601                 conditional->skip             = true;
1602                 return;
1603         }
1604
1605         if (pp_token.type != TP_IDENTIFIER) {
1606                 errorf(&pp_token.source_position,
1607                        "expected identifier after #%s, got '%t'",
1608                        is_ifndef ? "ifndef" : "ifdef", &pp_token);
1609                 eat_pp_directive();
1610
1611                 /* just take the true case in the hope to avoid further errors */
1612                 condition = true;
1613         } else {
1614                 symbol_t        *symbol        = pp_token.symbol;
1615                 pp_definition_t *pp_definition = symbol->pp_definition;
1616                 next_preprocessing_token();
1617
1618                 if (pp_token.type != '\n') {
1619                         errorf(&pp_token.source_position,
1620                                "extra tokens at end of #%s",
1621                                is_ifndef ? "ifndef" : "ifdef");
1622                         eat_pp_directive();
1623                 }
1624
1625                 /* evaluate wether we are in true or false case */
1626                 condition = is_ifndef ? pp_definition == NULL : pp_definition != NULL;
1627         }
1628
1629         pp_conditional_t *conditional = push_conditional();
1630         conditional->source_position  = pp_token.source_position;
1631         conditional->condition        = condition;
1632
1633         if (!condition) {
1634                 skip_mode = true;
1635         }
1636 }
1637
1638 static void parse_else_directive(void)
1639 {
1640         eat_pp(TP_else);
1641
1642         if (pp_token.type != '\n') {
1643                 if (!skip_mode) {
1644                         warningf(&pp_token.source_position, "extra tokens at end of #else");
1645                 }
1646                 eat_pp_directive();
1647         }
1648
1649         pp_conditional_t *conditional = conditional_stack;
1650         if (conditional == NULL) {
1651                 errorf(&pp_token.source_position, "#else without prior #if");
1652                 return;
1653         }
1654
1655         if (conditional->in_else) {
1656                 errorf(&pp_token.source_position,
1657                        "#else after #else (condition started %P)",
1658                        conditional->source_position);
1659                 skip_mode = true;
1660                 return;
1661         }
1662
1663         conditional->in_else = true;
1664         if (!conditional->skip) {
1665                 skip_mode = conditional->condition;
1666         }
1667         conditional->source_position = pp_token.source_position;
1668 }
1669
1670 static void parse_endif_directive(void)
1671 {
1672         eat_pp(TP_endif);
1673
1674         if (pp_token.type != '\n') {
1675                 if (!skip_mode) {
1676                         warningf(&pp_token.source_position,
1677                                  "extra tokens at end of #endif");
1678                 }
1679                 eat_pp_directive();
1680         }
1681
1682         pp_conditional_t *conditional = conditional_stack;
1683         if (conditional == NULL) {
1684                 errorf(&pp_token.source_position, "#endif without prior #if");
1685                 return;
1686         }
1687
1688         if (!conditional->skip) {
1689                 skip_mode = false;
1690         }
1691         pop_conditional();
1692 }
1693
1694 static void parse_preprocessing_directive(void)
1695 {
1696         do_print_spaces = false;
1697         do_expansions   = false;
1698         eat_pp('#');
1699
1700         if (skip_mode) {
1701                 switch(pp_token.type) {
1702                 case TP_ifdef:
1703                 case TP_ifndef:
1704                         parse_ifdef_ifndef_directive();
1705                         break;
1706                 case TP_else:
1707                         parse_else_directive();
1708                         break;
1709                 case TP_endif:
1710                         parse_endif_directive();
1711                         break;
1712                 default:
1713                         eat_pp_directive();
1714                         break;
1715                 }
1716         } else {
1717                 switch(pp_token.type) {
1718                 case TP_define:
1719                         parse_define_directive();
1720                         break;
1721                 case TP_undef:
1722                         parse_undef_directive();
1723                         break;
1724                 case TP_ifdef:
1725                 case TP_ifndef:
1726                         parse_ifdef_ifndef_directive();
1727                         break;
1728                 case TP_else:
1729                         parse_else_directive();
1730                         break;
1731                 case TP_endif:
1732                         parse_endif_directive();
1733                         break;
1734                 case TP_include: {
1735                         bool in_new_source = parse_include_directive();
1736                         /* no need to do anything if source file switched */
1737                         if (in_new_source)
1738                                 return;
1739                         break;
1740                 }
1741                 case '\n':
1742                         /* the nop directive */
1743                         break;
1744                 default:
1745                         errorf(&pp_token.source_position,
1746                                    "invalid preprocessing directive #%t", &pp_token);
1747                         eat_pp_directive();
1748                         break;
1749                 }
1750         }
1751
1752         do_print_spaces = true;
1753         do_expansions   = true;
1754
1755         /* eat '\n' */
1756         assert(pp_token.type == '\n' || pp_token.type == TP_EOF);
1757         next_preprocessing_token();
1758 }
1759
1760 #define GCC_COMPAT_MODE
1761
1762 int pptest_main(int argc, char **argv);
1763 int pptest_main(int argc, char **argv)
1764 {
1765         init_symbol_table();
1766         init_tokens();
1767
1768         obstack_init(&pp_obstack);
1769         obstack_init(&input_obstack);
1770
1771         const char *filename = "t.c";
1772         if (argc > 1)
1773                 filename = argv[1];
1774
1775         out = stdout;
1776
1777 #ifdef GCC_COMPAT_MODE
1778         /* this is here so we can directly compare "gcc -E" output and our output */
1779         fprintf(out, "# 1 \"%s\"\n", filename);
1780         fputs("# 1 \"<built-in>\"\n", out);
1781         fputs("# 1 \"<command-line>\"\n", out);
1782 #endif
1783
1784         bool ok = open_input(filename);
1785         assert(ok);
1786
1787         while(true) {
1788                 /* we're at a line begin */
1789                 if(pp_token.type == '#') {
1790                         parse_preprocessing_directive();
1791                 } else {
1792                         /* parse+emit a line */
1793                         while(pp_token.type != '\n') {
1794                                 if(pp_token.type == TP_EOF)
1795                                         goto end_of_main_loop;
1796                                 emit_pp_token();
1797                                 next_preprocessing_token();
1798                         }
1799                         emit_pp_token();
1800                         next_preprocessing_token();
1801                 }
1802         }
1803 end_of_main_loop:
1804
1805         check_unclosed_conditionals();
1806         close_input();
1807
1808         obstack_free(&input_obstack, NULL);
1809         obstack_free(&pp_obstack, NULL);
1810
1811         exit_tokens();
1812         exit_symbol_table();
1813
1814         return 0;
1815 }