nsz Git - cparser/blob - preprocessor.c

   1 #include <config.h>
   2
   3 #include "token_t.h"
   4 #include "symbol_t.h"
   5 #include "adt/util.h"
   6 #include "adt/error.h"
   7 #include "lang_features.h"
   8 #include "diagnostic.h"
   9 #include "string_rep.h"
  10
  11 #include <assert.h>
  12 #include <errno.h>
  13 #include <string.h>
  14 #include <stdbool.h>
  15 #include <ctype.h>
  16
  17 //#define DEBUG_CHARS
  18 #define MAX_PUTBACK 3
  19 #define INCLUDE_LIMIT 199  /* 199 is for gcc "compatibility" */
  20
  21 struct pp_argument_t {
  22         size_t   list_len;
  23         token_t *token_list;
  24 };
  25
  26 struct pp_definition_t {
  27         symbol_t          *symbol;
  28         source_position_t  source_position;
  29         pp_definition_t   *parent_expansion;
  30         size_t             expand_pos;
  31         bool               is_variadic    : 1;
  32         bool               is_expanding   : 1;
  33         bool               has_parameters : 1;
  34         size_t             n_parameters;
  35         symbol_t          *parameters;
  36
  37         /* replacement */
  38         size_t             list_len;
  39         token_t           *token_list;
  40
  41 };
  42
  43 typedef struct pp_conditional_t pp_conditional_t;
  44 struct pp_conditional_t {
  45         source_position_t  source_position;
  46         bool               condition;
  47         bool               in_else;
  48         bool               skip; /**< conditional in skip mode (then+else gets skipped) */
  49         pp_conditional_t  *parent;
  50 };
  51
  52 typedef struct pp_input_t pp_input_t;
  53 struct pp_input_t {
  54         FILE              *file;
  55         int                c;
  56         char               buf[1024+MAX_PUTBACK];
  57         const char        *bufend;
  58         const char        *bufpos;
  59         source_position_t  position;
  60         bool               had_non_space;
  61         pp_input_t        *parent;
  62 };
  63
  64 pp_input_t input;
  65 #define CC input.c
  66
  67 static pp_input_t     *input_stack;
  68 static unsigned        n_inputs;
  69 static struct obstack  input_obstack;
  70
  71 static pp_conditional_t *conditional_stack;
  72
  73 token_t                   pp_token;
  74 static bool               resolve_escape_sequences = false;
  75 static bool               do_print_spaces          = true;
  76 static bool               do_expansions;
  77 static bool               skip_mode;
  78 static FILE              *out;
  79 static struct obstack     pp_obstack;
  80 static unsigned           counted_newlines;
  81 static unsigned           counted_spaces;
  82 static const char        *printed_input_name = NULL;
  83 static pp_definition_t   *current_expansion  = NULL;
  84
  85 static inline void next_char(void);
  86 static void next_preprocessing_token(void);
  87 static void print_line_directive(const source_position_t *pos, const char *add);
  88
  89 static bool open_input(const char *filename)
  90 {
  91         FILE *file = fopen(filename, "r");
  92         if (file == NULL)
  93                 return false;
  94
  95         input.file                = file;
  96         input.bufend              = NULL;
  97         input.bufpos              = NULL;
  98         input.had_non_space       = false;
  99         input.position.input_name = filename;
 100         input.position.linenr     = 1;
 101
 102         /* indicate that we're at a new input */
 103         print_line_directive(&input.position, input_stack != NULL ? "1" : NULL);
 104
 105         counted_newlines = 0;
 106         counted_spaces   = 0;
 107
 108         /* read first char and first token */
 109         next_char();
 110         next_preprocessing_token();
 111
 112         return true;
 113 }
 114
 115 static void close_input(void)
 116 {
 117         /* ensure we have a newline at EOF */
 118         if (input.had_non_space) {
 119                 fputc('\n', out);
 120         }
 121
 122         assert(input.file != NULL);
 123
 124         fclose(input.file);
 125         input.file   = NULL;
 126         input.bufend = NULL;
 127         input.bufpos = NULL;
 128         input.c      = EOF;
 129 }
 130
 131 static void push_input(void)
 132 {
 133         pp_input_t *saved_input
 134                 = obstack_alloc(&input_obstack, sizeof(*saved_input));
 135
 136         memcpy(saved_input, &input, sizeof(*saved_input));
 137
 138         /* adjust buffer positions */
 139         if (input.bufpos != NULL)
 140                 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
 141         if (input.bufend != NULL)
 142                 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
 143
 144         saved_input->parent = input_stack;
 145         input_stack         = saved_input;
 146         ++n_inputs;
 147 }
 148
 149 static void pop_restore_input(void)
 150 {
 151         assert(n_inputs > 0);
 152         assert(input_stack != NULL);
 153
 154         pp_input_t *saved_input = input_stack;
 155
 156         memcpy(&input, saved_input, sizeof(input));
 157         input.parent = NULL;
 158
 159         /* adjust buffer positions */
 160         if (saved_input->bufpos != NULL)
 161                 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
 162         if (saved_input->bufend != NULL)
 163                 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
 164
 165         input_stack = saved_input->parent;
 166         obstack_free(&input_obstack, saved_input);
 167         --n_inputs;
 168 }
 169
 170 /**
 171  * Prints a parse error message at the current token.
 172  *
 173  * @param msg   the error message
 174  */
 175 static void parse_error(const char *msg)
 176 {
 177         errorf(&pp_token.source_position,  "%s", msg);
 178 }
 179
 180 static inline void next_real_char(void)
 181 {
 182         assert(input.bufpos <= input.bufend);
 183         if (input.bufpos >= input.bufend) {
 184                 size_t s = fread(input.buf + MAX_PUTBACK, 1,
 185                                  sizeof(input.buf) - MAX_PUTBACK, input.file);
 186                 if (s == 0) {
 187                         CC = EOF;
 188                         return;
 189                 }
 190                 input.bufpos = input.buf + MAX_PUTBACK;
 191                 input.bufend = input.buf + MAX_PUTBACK + s;
 192         }
 193         CC = *input.bufpos++;
 194 }
 195
 196 /**
 197  * Put a character back into the buffer.
 198  *
 199  * @param pc  the character to put back
 200  */
 201 static inline void put_back(int pc)
 202 {
 203         assert(input.bufpos > input.buf);
 204         *(--input.bufpos - input.buf + input.buf) = (char) pc;
 205
 206 #ifdef DEBUG_CHARS
 207         printf("putback '%c'\n", pc);
 208 #endif
 209 }
 210
 211 #define MATCH_NEWLINE(code)                   \
 212         case '\r':                                \
 213                 next_char();                          \
 214                 if(CC == '\n') {                      \
 215                         next_char();                      \
 216                 }                                     \
 217                 ++input.position.linenr;              \
 218                 code                                  \
 219         case '\n':                                \
 220                 next_char();                          \
 221                 ++input.position.linenr;              \
 222                 code
 223
 224 #define eat(c_type)  do { assert(CC == c_type); next_char(); } while(0)
 225
 226 static void maybe_concat_lines(void)
 227 {
 228         eat('\\');
 229
 230         switch(CC) {
 231         MATCH_NEWLINE(return;)
 232
 233         default:
 234                 break;
 235         }
 236
 237         put_back(CC);
 238         CC = '\\';
 239 }
 240
 241 /**
 242  * Set c to the next input character, ie.
 243  * after expanding trigraphs.
 244  */
 245 static inline void next_char(void)
 246 {
 247         next_real_char();
 248
 249         /* filter trigraphs and concatenated lines */
 250         if(UNLIKELY(CC == '\\')) {
 251                 maybe_concat_lines();
 252                 goto end_of_next_char;
 253         }
 254
 255         if(LIKELY(CC != '?'))
 256                 goto end_of_next_char;
 257
 258         next_real_char();
 259         if(LIKELY(CC != '?')) {
 260                 put_back(CC);
 261                 CC = '?';
 262                 goto end_of_next_char;
 263         }
 264
 265         next_real_char();
 266         switch(CC) {
 267         case '=': CC = '#'; break;
 268         case '(': CC = '['; break;
 269         case '/': CC = '\\'; maybe_concat_lines(); break;
 270         case ')': CC = ']'; break;
 271         case '\'': CC = '^'; break;
 272         case '<': CC = '{'; break;
 273         case '!': CC = '|'; break;
 274         case '>': CC = '}'; break;
 275         case '-': CC = '~'; break;
 276         default:
 277                 put_back(CC);
 278                 put_back('?');
 279                 CC = '?';
 280                 break;
 281         }
 282
 283 end_of_next_char:;
 284 #ifdef DEBUG_CHARS
 285         printf("nchar '%c'\n", CC);
 286 #endif
 287 }
 288
 289
 290
 291 /**
 292  * Returns true if the given char is a octal digit.
 293  *
 294  * @param char  the character to check
 295  */
 296 static inline bool is_octal_digit(int chr)
 297 {
 298         switch(chr) {
 299         case '0':
 300         case '1':
 301         case '2':
 302         case '3':
 303         case '4':
 304         case '5':
 305         case '6':
 306         case '7':
 307                 return true;
 308         default:
 309                 return false;
 310         }
 311 }
 312
 313 /**
 314  * Returns the value of a digit.
 315  * The only portable way to do it ...
 316  */
 317 static int digit_value(int digit) {
 318         switch (digit) {
 319         case '0': return 0;
 320         case '1': return 1;
 321         case '2': return 2;
 322         case '3': return 3;
 323         case '4': return 4;
 324         case '5': return 5;
 325         case '6': return 6;
 326         case '7': return 7;
 327         case '8': return 8;
 328         case '9': return 9;
 329         case 'a':
 330         case 'A': return 10;
 331         case 'b':
 332         case 'B': return 11;
 333         case 'c':
 334         case 'C': return 12;
 335         case 'd':
 336         case 'D': return 13;
 337         case 'e':
 338         case 'E': return 14;
 339         case 'f':
 340         case 'F': return 15;
 341         default:
 342                 panic("wrong character given");
 343         }
 344 }
 345
 346 /**
 347  * Parses an octal character sequence.
 348  *
 349  * @param first_digit  the already read first digit
 350  */
 351 static int parse_octal_sequence(const int first_digit)
 352 {
 353         assert(is_octal_digit(first_digit));
 354         int value = digit_value(first_digit);
 355         if (!is_octal_digit(CC)) return value;
 356         value = 8 * value + digit_value(CC);
 357         next_char();
 358         if (!is_octal_digit(CC)) return value;
 359         value = 8 * value + digit_value(CC);
 360         next_char();
 361
 362         if(char_is_signed) {
 363                 return (signed char) value;
 364         } else {
 365                 return (unsigned char) value;
 366         }
 367 }
 368
 369 /**
 370  * Parses a hex character sequence.
 371  */
 372 static int parse_hex_sequence(void)
 373 {
 374         int value = 0;
 375         while(isxdigit(CC)) {
 376                 value = 16 * value + digit_value(CC);
 377                 next_char();
 378         }
 379
 380         if(char_is_signed) {
 381                 return (signed char) value;
 382         } else {
 383                 return (unsigned char) value;
 384         }
 385 }
 386
 387 /**
 388  * Parse an escape sequence.
 389  */
 390 static int parse_escape_sequence(void)
 391 {
 392         eat('\\');
 393
 394         int ec = CC;
 395         next_char();
 396
 397         switch(ec) {
 398         case '"':  return '"';
 399         case '\'': return '\'';
 400         case '\\': return '\\';
 401         case '?': return '\?';
 402         case 'a': return '\a';
 403         case 'b': return '\b';
 404         case 'f': return '\f';
 405         case 'n': return '\n';
 406         case 'r': return '\r';
 407         case 't': return '\t';
 408         case 'v': return '\v';
 409         case 'x':
 410                 return parse_hex_sequence();
 411         case '0':
 412         case '1':
 413         case '2':
 414         case '3':
 415         case '4':
 416         case '5':
 417         case '6':
 418         case '7':
 419                 return parse_octal_sequence(ec);
 420         case EOF:
 421                 parse_error("reached end of file while parsing escape sequence");
 422                 return EOF;
 423         default:
 424                 parse_error("unknown escape sequence");
 425                 return EOF;
 426         }
 427 }
 428
 429 static void parse_string_literal(void)
 430 {
 431         const unsigned start_linenr = input.position.linenr;
 432
 433         eat('"');
 434
 435         int tc;
 436         while(1) {
 437                 switch(CC) {
 438                 case '\\':
 439                         if(resolve_escape_sequences) {
 440                                 tc = parse_escape_sequence();
 441                                 obstack_1grow(&symbol_obstack, (char) tc);
 442                         } else {
 443                                 obstack_1grow(&symbol_obstack, (char) CC);
 444                                 next_char();
 445                                 obstack_1grow(&symbol_obstack, (char) CC);
 446                                 next_char();
 447                         }
 448                         break;
 449
 450                 case EOF: {
 451                         source_position_t source_position;
 452                         source_position.input_name = pp_token.source_position.input_name;
 453                         source_position.linenr     = start_linenr;
 454                         errorf(&source_position, "string has no end");
 455                         pp_token.type = TP_ERROR;
 456                         return;
 457                 }
 458
 459                 case '"':
 460                         next_char();
 461                         goto end_of_string;
 462
 463                 default:
 464                         obstack_1grow(&symbol_obstack, (char) CC);
 465                         next_char();
 466                         break;
 467                 }
 468         }
 469
 470 end_of_string:
 471         /* add finishing 0 to the string */
 472         obstack_1grow(&symbol_obstack, '\0');
 473         const size_t      size   = (size_t)obstack_object_size(&symbol_obstack);
 474         const char *const string = obstack_finish(&symbol_obstack);
 475
 476 #if 0 /* TODO hash */
 477         /* check if there is already a copy of the string */
 478         result = strset_insert(&stringset, string);
 479         if(result != string) {
 480                 obstack_free(&symbol_obstack, string);
 481         }
 482 #else
 483         const char *const result = string;
 484 #endif
 485
 486         pp_token.type           = TP_STRING_LITERAL;
 487         pp_token.v.string.begin = result;
 488         pp_token.v.string.size  = size;
 489 }
 490
 491 static void parse_wide_character_constant(void)
 492 {
 493         eat('\'');
 494
 495         int found_char = 0;
 496         while(1) {
 497                 switch(CC) {
 498                 case '\\':
 499                         found_char = parse_escape_sequence();
 500                         break;
 501
 502                 MATCH_NEWLINE(
 503                         parse_error("newline while parsing character constant");
 504                         break;
 505                 )
 506
 507                 case '\'':
 508                         next_char();
 509                         goto end_of_wide_char_constant;
 510
 511                 case EOF:
 512                         parse_error("EOF while parsing character constant");
 513                         pp_token.type = TP_ERROR;
 514                         return;
 515
 516                 default:
 517                         if(found_char != 0) {
 518                                 parse_error("more than 1 characters in character "
 519                                             "constant");
 520                                 goto end_of_wide_char_constant;
 521                         } else {
 522                                 found_char = CC;
 523                                 next_char();
 524                         }
 525                         break;
 526                 }
 527         }
 528
 529 end_of_wide_char_constant:
 530         pp_token.type       = TP_WIDE_CHARACTER_CONSTANT;
 531         /* TODO... */
 532 }
 533
 534 static void parse_wide_string_literal(void)
 535 {
 536         const unsigned start_linenr = input.position.linenr;
 537
 538         assert(CC == '"');
 539         next_char();
 540
 541         while(1) {
 542                 switch(CC) {
 543                 case '\\': {
 544                         wchar_rep_t tc = parse_escape_sequence();
 545                         obstack_grow(&symbol_obstack, &tc, sizeof(tc));
 546                         break;
 547                 }
 548
 549                 case EOF: {
 550                         source_position_t source_position;
 551                         source_position.input_name = pp_token.source_position.input_name;
 552                         source_position.linenr     = start_linenr;
 553                         errorf(&source_position, "string has no end");
 554                         pp_token.type = TP_ERROR;
 555                         return;
 556                 }
 557
 558                 case '"':
 559                         next_char();
 560                         goto end_of_string;
 561
 562                 default: {
 563                         wchar_rep_t tc = CC;
 564                         obstack_grow(&symbol_obstack, &tc, sizeof(tc));
 565                         next_char();
 566                         break;
 567                 }
 568                 }
 569         }
 570
 571 end_of_string:;
 572         /* add finishing 0 to the string */
 573         static const wchar_rep_t nul = L'\0';
 574         obstack_grow(&symbol_obstack, &nul, sizeof(nul));
 575
 576         const size_t size
 577                 = (size_t)obstack_object_size(&symbol_obstack) / sizeof(wchar_rep_t);
 578         const wchar_rep_t *const string = obstack_finish(&symbol_obstack);
 579
 580 #if 0 /* TODO hash */
 581         /* check if there is already a copy of the string */
 582         const wchar_rep_t *const result = strset_insert(&stringset, string);
 583         if(result != string) {
 584                 obstack_free(&symbol_obstack, string);
 585         }
 586 #else
 587         const wchar_rep_t *const result = string;
 588 #endif
 589
 590         pp_token.type                = TP_WIDE_STRING_LITERAL;
 591         pp_token.v.wide_string.begin = result;
 592         pp_token.v.wide_string.size  = size;
 593 }
 594
 595 static void parse_character_constant(void)
 596 {
 597         const unsigned start_linenr = input.position.linenr;
 598
 599         eat('\'');
 600
 601         int tc;
 602         while(1) {
 603                 switch(CC) {
 604                 case '\\':
 605                         tc = parse_escape_sequence();
 606                         obstack_1grow(&symbol_obstack, (char) tc);
 607                         break;
 608
 609                 MATCH_NEWLINE(
 610                         parse_error("newline while parsing character constant");
 611                         break;
 612                 )
 613
 614                 case EOF: {
 615                         source_position_t source_position;
 616                         source_position.input_name = pp_token.source_position.input_name;
 617                         source_position.linenr     = start_linenr;
 618                         errorf(&source_position, "EOF while parsing character constant");
 619                         pp_token.type = TP_ERROR;
 620                         return;
 621                 }
 622
 623                 case '\'':
 624                         next_char();
 625                         goto end_of_char_constant;
 626
 627                 default:
 628                         obstack_1grow(&symbol_obstack, (char) CC);
 629                         next_char();
 630                         break;
 631
 632                 }
 633         }
 634
 635 end_of_char_constant:;
 636         const size_t      size   = (size_t)obstack_object_size(&symbol_obstack);
 637         const char *const string = obstack_finish(&symbol_obstack);
 638
 639         pp_token.type           = TP_CHARACTER_CONSTANT;
 640         pp_token.v.string.begin = string;
 641         pp_token.v.string.size  = size;
 642 }
 643
 644 #define SYMBOL_CHARS_WITHOUT_E_P \
 645         case 'a': \
 646         case 'b': \
 647         case 'c': \
 648         case 'd': \
 649         case 'f': \
 650         case 'g': \
 651         case 'h': \
 652         case 'i': \
 653         case 'j': \
 654         case 'k': \
 655         case 'l': \
 656         case 'm': \
 657         case 'n': \
 658         case 'o': \
 659         case 'q': \
 660         case 'r': \
 661         case 's': \
 662         case 't': \
 663         case 'u': \
 664         case 'v': \
 665         case 'w': \
 666         case 'x': \
 667         case 'y': \
 668         case 'z': \
 669         case 'A': \
 670         case 'B': \
 671         case 'C': \
 672         case 'D': \
 673         case 'F': \
 674         case 'G': \
 675         case 'H': \
 676         case 'I': \
 677         case 'J': \
 678         case 'K': \
 679         case 'L': \
 680         case 'M': \
 681         case 'N': \
 682         case 'O': \
 683         case 'Q': \
 684         case 'R': \
 685         case 'S': \
 686         case 'T': \
 687         case 'U': \
 688         case 'V': \
 689         case 'W': \
 690         case 'X': \
 691         case 'Y': \
 692         case 'Z': \
 693         case '_':
 694
 695 #define SYMBOL_CHARS \
 696         SYMBOL_CHARS_WITHOUT_E_P \
 697         case 'e': \
 698         case 'p': \
 699         case 'E': \
 700         case 'P':
 701
 702 #define DIGITS \
 703         case '0':  \
 704         case '1':  \
 705         case '2':  \
 706         case '3':  \
 707         case '4':  \
 708         case '5':  \
 709         case '6':  \
 710         case '7':  \
 711         case '8':  \
 712         case '9':
 713
 714 /**
 715  * returns next final token from a preprocessor macro expansion
 716  */
 717 static void expand_next(void)
 718 {
 719         assert(current_expansion != NULL);
 720
 721         pp_definition_t *definition = current_expansion;
 722
 723 restart:
 724         if(definition->list_len == 0
 725                         || definition->expand_pos >= definition->list_len) {
 726                 /* we're finished with the current macro, move up 1 level in the
 727                  * expansion stack */
 728                 pp_definition_t *parent = definition->parent_expansion;
 729                 definition->parent_expansion = NULL;
 730                 definition->is_expanding     = false;
 731
 732                 /* it was the outermost expansion, parse normal pptoken */
 733                 if(parent == NULL) {
 734                         current_expansion = NULL;
 735                         next_preprocessing_token();
 736                         return;
 737                 }
 738                 definition        = parent;
 739                 current_expansion = definition;
 740                 goto restart;
 741         }
 742         pp_token = definition->token_list[definition->expand_pos];
 743         ++definition->expand_pos;
 744
 745         if(pp_token.type != TP_IDENTIFIER)
 746                 return;
 747
 748         /* if it was an identifier then we might need to expand again */
 749         pp_definition_t *symbol_definition = pp_token.v.symbol->pp_definition;
 750         if(symbol_definition != NULL && !symbol_definition->is_expanding) {
 751                 symbol_definition->parent_expansion = definition;
 752                 symbol_definition->expand_pos       = 0;
 753                 symbol_definition->is_expanding     = true;
 754                 definition                          = symbol_definition;
 755                 current_expansion                   = definition;
 756                 goto restart;
 757         }
 758 }
 759
 760 static void skip_line_comment(void)
 761 {
 762         if(do_print_spaces)
 763                 counted_spaces++;
 764
 765         while(1) {
 766                 switch(CC) {
 767                 case EOF:
 768                         return;
 769
 770                 case '\n':
 771                 case '\r':
 772                         return;
 773
 774                 default:
 775                         next_char();
 776                         break;
 777                 }
 778         }
 779 }
 780
 781 static void skip_multiline_comment(void)
 782 {
 783         if(do_print_spaces)
 784                 counted_spaces++;
 785
 786         unsigned start_linenr = input.position.linenr;
 787         while(1) {
 788                 switch(CC) {
 789                 case '/':
 790                         next_char();
 791                         if (CC == '*') {
 792                                 /* TODO: nested comment, warn here */
 793                         }
 794                         break;
 795                 case '*':
 796                         next_char();
 797                         if(CC == '/') {
 798                                 next_char();
 799                                 return;
 800                         }
 801                         break;
 802
 803                 MATCH_NEWLINE(
 804                         if(do_print_spaces) {
 805                                 counted_newlines++;
 806                                 counted_spaces = 0;
 807                         }
 808                         break;
 809                 )
 810
 811                 case EOF: {
 812                         source_position_t source_position;
 813                         source_position.input_name = pp_token.source_position.input_name;
 814                         source_position.linenr     = start_linenr;
 815                         errorf(&source_position, "at end of file while looking for comment end");
 816                         return;
 817                 }
 818
 819                 default:
 820                         next_char();
 821                         break;
 822                 }
 823         }
 824 }
 825
 826 /* skip spaces advancing at the start of the next preprocessing token */
 827 static void skip_spaces(bool skip_newline)
 828 {
 829         while (true) {
 830                 switch (CC) {
 831                 case ' ':
 832                 case '\t':
 833                         if(do_print_spaces)
 834                                 counted_spaces++;
 835                         next_char();
 836                         continue;
 837                 case '/':
 838                         next_char();
 839                         if (CC == '/') {
 840                                 next_char();
 841                                 skip_line_comment();
 842                                 continue;
 843                         } else if (CC == '*') {
 844                                 next_char();
 845                                 skip_multiline_comment();
 846                                 continue;
 847                         } else {
 848                                 put_back(CC);
 849                                 CC = '/';
 850                         }
 851                         return;
 852
 853                 case '\r':
 854                         if (!skip_newline)
 855                                 return;
 856
 857                         next_char();
 858                         if(CC == '\n') {
 859                                 next_char();
 860                         }
 861                         ++input.position.linenr;
 862                         if (do_print_spaces)
 863                                 ++counted_newlines;
 864                         continue;
 865
 866                 case '\n':
 867                         if (!skip_newline)
 868                                 return;
 869
 870                         next_char();
 871                         ++input.position.linenr;
 872                         if (do_print_spaces)
 873                                 ++counted_newlines;
 874                         continue;
 875
 876                 default:
 877                         return;
 878                 }
 879         }
 880 }
 881
 882 static void eat_pp(int type)
 883 {
 884         (void) type;
 885         assert(pp_token.type == type);
 886         next_preprocessing_token();
 887 }
 888
 889 static void parse_symbol(void)
 890 {
 891         obstack_1grow(&symbol_obstack, (char) CC);
 892         next_char();
 893
 894         while(1) {
 895                 switch(CC) {
 896                 DIGITS
 897                 SYMBOL_CHARS
 898                         obstack_1grow(&symbol_obstack, (char) CC);
 899                         next_char();
 900                         break;
 901
 902                 default:
 903                         goto end_symbol;
 904                 }
 905         }
 906
 907 end_symbol:
 908         obstack_1grow(&symbol_obstack, '\0');
 909         char *string = obstack_finish(&symbol_obstack);
 910
 911         /* might be a wide string or character constant ( L"string"/L'c' ) */
 912         if (CC == '"' && string[0] == 'L' && string[1] == '\0') {
 913                 obstack_free(&symbol_obstack, string);
 914                 parse_wide_string_literal();
 915                 return;
 916         } else if (CC == '\'' && string[0] == 'L' && string[1] == '\0') {
 917                 obstack_free(&symbol_obstack, string);
 918                 parse_wide_character_constant();
 919                 return;
 920         }
 921
 922         symbol_t *symbol = symbol_table_insert(string);
 923
 924         pp_token.type     = symbol->pp_ID;
 925         pp_token.v.symbol = symbol;
 926
 927         /* we can free the memory from symbol obstack if we already had an entry in
 928          * the symbol table */
 929         if (symbol->string != string) {
 930                 obstack_free(&symbol_obstack, string);
 931         }
 932         if (!do_expansions)
 933                 return;
 934
 935         pp_definition_t *pp_definition = symbol->pp_definition;
 936         if (pp_definition == NULL)
 937                 return;
 938
 939         if (pp_definition->has_parameters) {
 940                 skip_spaces(true);
 941                 /* no opening brace -> no expansion */
 942                 if (CC != '(')
 943                         return;
 944                 next_preprocessing_token();
 945                 eat_pp('(');
 946
 947                 /* parse arguments (TODO) */
 948                 while (pp_token.type != TP_EOF && pp_token.type != ')')
 949                         next_preprocessing_token();
 950                 next_preprocessing_token();
 951         }
 952
 953         pp_definition->expand_pos   = 0;
 954         pp_definition->is_expanding = true,
 955         current_expansion           = pp_definition;
 956         expand_next();
 957 }
 958
 959 static void parse_number(void)
 960 {
 961         obstack_1grow(&symbol_obstack, (char) CC);
 962         next_char();
 963
 964         while(1) {
 965                 switch(CC) {
 966                 case '.':
 967                 DIGITS
 968                 SYMBOL_CHARS_WITHOUT_E_P
 969                         obstack_1grow(&symbol_obstack, (char) CC);
 970                         next_char();
 971                         break;
 972
 973                 case 'e':
 974                 case 'p':
 975                 case 'E':
 976                 case 'P':
 977                         obstack_1grow(&symbol_obstack, (char) CC);
 978                         next_char();
 979                         if(CC == '+' || CC == '-') {
 980                                 obstack_1grow(&symbol_obstack, (char) CC);
 981                                 next_char();
 982                         }
 983                         break;
 984
 985                 default:
 986                         goto end_number;
 987                 }
 988         }
 989
 990 end_number:
 991         obstack_1grow(&symbol_obstack, '\0');
 992         size_t  size   = obstack_object_size(&symbol_obstack);
 993         char   *string = obstack_finish(&symbol_obstack);
 994
 995         pp_token.type           = TP_NUMBER;
 996         pp_token.v.string.begin = string;
 997         pp_token.v.string.size  = size;
 998 }
 999
1000
1001
1002 #define MAYBE_PROLOG                                       \
1003                         next_char();                                   \
1004                         while(1) {                                     \
1005                                 switch(CC) {
1006
1007 #define MAYBE(ch, set_type)                                \
1008                                 case ch:                                   \
1009                                         next_char();                           \
1010                                         pp_token.type = set_type;              \
1011                                         return;
1012
1013 #define ELSE_CODE(code)                                    \
1014                                 default:                                   \
1015                                         code;                                  \
1016                                 }                                          \
1017                         } /* end of while(1) */                        \
1018                         break;
1019
1020 #define ELSE(set_type)                                     \
1021                 ELSE_CODE(                                         \
1022                         pp_token.type = set_type;                      \
1023                         return;                                        \
1024                 )
1025
1026 static void next_preprocessing_token(void)
1027 {
1028         if(current_expansion != NULL) {
1029                 expand_next();
1030                 return;
1031         }
1032
1033         pp_token.source_position = input.position;
1034
1035 restart:
1036         switch(CC) {
1037         case ' ':
1038         case '\t':
1039                 if(do_print_spaces)
1040                         counted_spaces++;
1041                 next_char();
1042                 goto restart;
1043
1044         MATCH_NEWLINE(
1045                 counted_newlines++;
1046                 counted_spaces = 0;
1047                 pp_token.type = '\n';
1048                 return;
1049         )
1050
1051         SYMBOL_CHARS
1052                 parse_symbol();
1053                 return;
1054
1055         DIGITS
1056                 parse_number();
1057                 return;
1058
1059         case '"':
1060                 parse_string_literal();
1061                 return;
1062
1063         case '\'':
1064                 parse_character_constant();
1065                 return;
1066
1067         case '.':
1068                 MAYBE_PROLOG
1069                         case '0':
1070                         case '1':
1071                         case '2':
1072                         case '3':
1073                         case '4':
1074                         case '5':
1075                         case '6':
1076                         case '7':
1077                         case '8':
1078                         case '9':
1079                                 put_back(CC);
1080                                 CC = '.';
1081                                 parse_number();
1082                                 return;
1083
1084                         case '.':
1085                                 MAYBE_PROLOG
1086                                 MAYBE('.', TP_DOTDOTDOT)
1087                                 ELSE_CODE(
1088                                         put_back(CC);
1089                                         CC = '.';
1090                                         pp_token.type = '.';
1091                                         return;
1092                                 )
1093                 ELSE('.')
1094         case '&':
1095                 MAYBE_PROLOG
1096                 MAYBE('&', TP_ANDAND)
1097                 MAYBE('=', TP_ANDEQUAL)
1098                 ELSE('&')
1099         case '*':
1100                 MAYBE_PROLOG
1101                 MAYBE('=', TP_ASTERISKEQUAL)
1102                 ELSE('*')
1103         case '+':
1104                 MAYBE_PROLOG
1105                 MAYBE('+', TP_PLUSPLUS)
1106                 MAYBE('=', TP_PLUSEQUAL)
1107                 ELSE('+')
1108         case '-':
1109                 MAYBE_PROLOG
1110                 MAYBE('>', TP_MINUSGREATER)
1111                 MAYBE('-', TP_MINUSMINUS)
1112                 MAYBE('=', TP_MINUSEQUAL)
1113                 ELSE('-')
1114         case '!':
1115                 MAYBE_PROLOG
1116                 MAYBE('=', TP_EXCLAMATIONMARKEQUAL)
1117                 ELSE('!')
1118         case '/':
1119                 MAYBE_PROLOG
1120                 MAYBE('=', TP_SLASHEQUAL)
1121                         case '*':
1122                                 next_char();
1123                                 skip_multiline_comment();
1124                                 goto restart;
1125                         case '/':
1126                                 next_char();
1127                                 skip_line_comment();
1128                                 goto restart;
1129                 ELSE('/')
1130         case '%':
1131                 MAYBE_PROLOG
1132                 MAYBE('>', '}')
1133                 MAYBE('=', TP_PERCENTEQUAL)
1134                         case ':':
1135                                 MAYBE_PROLOG
1136                                         case '%':
1137                                                 MAYBE_PROLOG
1138                                                 MAYBE(':', TP_HASHHASH)
1139                                                 ELSE_CODE(
1140                                                         put_back(CC);
1141                                                         CC = '%';
1142                                                         pp_token.type = '#';
1143                                                         return;
1144                                                 )
1145                                 ELSE('#')
1146                 ELSE('%')
1147         case '<':
1148                 MAYBE_PROLOG
1149                 MAYBE(':', '[')
1150                 MAYBE('%', '{')
1151                 MAYBE('=', TP_LESSEQUAL)
1152                         case '<':
1153                                 MAYBE_PROLOG
1154                                 MAYBE('=', TP_LESSLESSEQUAL)
1155                                 ELSE(TP_LESSLESS)
1156                 ELSE('<')
1157         case '>':
1158                 MAYBE_PROLOG
1159                 MAYBE('=', TP_GREATEREQUAL)
1160                         case '>':
1161                                 MAYBE_PROLOG
1162                                 MAYBE('=', TP_GREATERGREATEREQUAL)
1163                                 ELSE(TP_GREATERGREATER)
1164                 ELSE('>')
1165         case '^':
1166                 MAYBE_PROLOG
1167                 MAYBE('=', TP_CARETEQUAL)
1168                 ELSE('^')
1169         case '|':
1170                 MAYBE_PROLOG
1171                 MAYBE('=', TP_PIPEEQUAL)
1172                 MAYBE('|', TP_PIPEPIPE)
1173                 ELSE('|')
1174         case ':':
1175                 MAYBE_PROLOG
1176                 MAYBE('>', ']')
1177                 ELSE(':')
1178         case '=':
1179                 MAYBE_PROLOG
1180                 MAYBE('=', TP_EQUALEQUAL)
1181                 ELSE('=')
1182         case '#':
1183                 MAYBE_PROLOG
1184                 MAYBE('#', TP_HASHHASH)
1185                 ELSE('#')
1186
1187         case '?':
1188         case '[':
1189         case ']':
1190         case '(':
1191         case ')':
1192         case '{':
1193         case '}':
1194         case '~':
1195         case ';':
1196         case ',':
1197         case '\\':
1198                 pp_token.type = CC;
1199                 next_char();
1200                 return;
1201
1202         case EOF:
1203                 if (input_stack != NULL) {
1204                         close_input();
1205                         pop_restore_input();
1206                         counted_newlines = 0;
1207                         counted_spaces   = 0;
1208                         /* hack to output correct line number */
1209                         print_line_directive(&input.position, "2");
1210                         next_preprocessing_token();
1211                 } else {
1212                         pp_token.type = TP_EOF;
1213                 }
1214                 return;
1215
1216         default:
1217                 next_char();
1218                 errorf(&pp_token.source_position, "unknown character '%c' found\n", CC);
1219                 pp_token.type = TP_ERROR;
1220                 return;
1221         }
1222 }
1223
1224 static void print_quoted_string(const char *const string)
1225 {
1226         fputc('"', out);
1227         for (const char *c = string; *c != 0; ++c) {
1228                 switch(*c) {
1229                 case '"': fputs("\\\"", out); break;
1230                 case '\\':  fputs("\\\\", out); break;
1231                 case '\a':  fputs("\\a", out); break;
1232                 case '\b':  fputs("\\b", out); break;
1233                 case '\f':  fputs("\\f", out); break;
1234                 case '\n':  fputs("\\n", out); break;
1235                 case '\r':  fputs("\\r", out); break;
1236                 case '\t':  fputs("\\t", out); break;
1237                 case '\v':  fputs("\\v", out); break;
1238                 case '\?':  fputs("\\?", out); break;
1239                 default:
1240                         if(!isprint(*c)) {
1241                                 fprintf(out, "\\%03o", *c);
1242                                 break;
1243                         }
1244                         fputc(*c, out);
1245                         break;
1246                 }
1247         }
1248         fputc('"', out);
1249 }
1250
1251 static void print_line_directive(const source_position_t *pos, const char *add)
1252 {
1253         fprintf(out, "# %u ", pos->linenr);
1254         print_quoted_string(pos->input_name);
1255         if (add != NULL) {
1256                 fputc(' ', out);
1257                 fputs(add, out);
1258         }
1259         fputc('\n', out);
1260
1261         printed_input_name = pos->input_name;
1262 }
1263
1264 static void print_spaces(void)
1265 {
1266         if (counted_newlines >= 9) {
1267                 if (input.had_non_space) {
1268                         fputc('\n', out);
1269                 }
1270                 print_line_directive(&pp_token.source_position, NULL);
1271                 counted_newlines = 0;
1272         } else {
1273                 for (unsigned i = 0; i < counted_newlines; ++i)
1274                         fputc('\n', out);
1275                 counted_newlines = 0;
1276         }
1277         for (unsigned i = 0; i < counted_spaces; ++i)
1278                 fputc(' ', out);
1279         counted_spaces = 0;
1280 }
1281
1282 static void emit_pp_token(void)
1283 {
1284         if (skip_mode)
1285                 return;
1286
1287         if (pp_token.type != '\n') {
1288                 print_spaces();
1289                 input.had_non_space = true;
1290         }
1291
1292         switch(pp_token.type) {
1293         case TP_IDENTIFIER:
1294                 fputs(pp_token.v.symbol->string, out);
1295                 break;
1296         case TP_NUMBER:
1297                 fputs(pp_token.v.string.begin, out);
1298                 break;
1299         case TP_STRING_LITERAL:
1300                 fputc('"', out);
1301                 fputs(pp_token.v.string.begin, out);
1302                 fputc('"', out);
1303                 break;
1304         case '\n':
1305                 break;
1306         default:
1307                 print_pp_token_type(out, pp_token.type);
1308                 break;
1309         }
1310 }
1311
1312 static void eat_pp_directive(void)
1313 {
1314         while(pp_token.type != '\n' && pp_token.type != TP_EOF) {
1315                 next_preprocessing_token();
1316         }
1317 }
1318
1319 static bool strings_equal(const string_t *string1, const string_t *string2)
1320 {
1321         size_t size = string1->size;
1322         if(size != string2->size)
1323                 return false;
1324
1325         const char *c1 = string1->begin;
1326         const char *c2 = string2->begin;
1327         for(size_t i = 0; i < size; ++i, ++c1, ++c2) {
1328                 if(*c1 != *c2)
1329                         return false;
1330         }
1331         return true;
1332 }
1333
1334 static bool wide_strings_equal(const wide_string_t *string1,
1335                                const wide_string_t *string2)
1336 {
1337         size_t size = string1->size;
1338         if(size != string2->size)
1339                 return false;
1340
1341         const wchar_rep_t *c1 = string1->begin;
1342         const wchar_rep_t *c2 = string2->begin;
1343         for(size_t i = 0; i < size; ++i, ++c1, ++c2) {
1344                 if(*c1 != *c2)
1345                         return false;
1346         }
1347         return true;
1348 }
1349
1350 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1351 {
1352         if(token1->type != token2->type)
1353                 return false;
1354
1355         switch(token1->type) {
1356         case TP_HEADERNAME:
1357                 /* TODO */
1358                 return false;
1359         case TP_IDENTIFIER:
1360                 return token1->v.symbol == token2->v.symbol;
1361         case TP_NUMBER:
1362         case TP_CHARACTER_CONSTANT:
1363         case TP_STRING_LITERAL:
1364                 return strings_equal(&token1->v.string, &token2->v.string);
1365
1366         case TP_WIDE_CHARACTER_CONSTANT:
1367         case TP_WIDE_STRING_LITERAL:
1368                 return wide_strings_equal(&token1->v.wide_string,
1369                                           &token2->v.wide_string);
1370         default:
1371                 return true;
1372         }
1373 }
1374
1375 static bool pp_definitions_equal(const pp_definition_t *definition1,
1376                                  const pp_definition_t *definition2)
1377 {
1378         if(definition1->list_len != definition2->list_len)
1379                 return false;
1380
1381         size_t         len = definition1->list_len;
1382         const token_t *t1  = definition1->token_list;
1383         const token_t *t2  = definition2->token_list;
1384         for(size_t i = 0; i < len; ++i, ++t1, ++t2) {
1385                 if(!pp_tokens_equal(t1, t2))
1386                         return false;
1387         }
1388         return true;
1389 }
1390
1391 static void parse_define_directive(void)
1392 {
1393         eat_pp(TP_define);
1394         assert(obstack_object_size(&pp_obstack) == 0);
1395
1396         if (pp_token.type != TP_IDENTIFIER) {
1397                 errorf(&pp_token.source_position,
1398                        "expected identifier after #define, got '%t'", &pp_token);
1399                 goto error_out;
1400         }
1401         symbol_t *symbol = pp_token.v.symbol;
1402
1403         pp_definition_t *new_definition
1404                 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1405         memset(new_definition, 0, sizeof(new_definition[0]));
1406         new_definition->source_position = input.position;
1407
1408         /* this is probably the only place where spaces are significant in the
1409          * lexer (except for the fact that they separate tokens). #define b(x)
1410          * is something else than #define b (x) */
1411         if (CC == '(') {
1412                 /* eat the '(' */
1413                 next_preprocessing_token();
1414                 /* get next token after '(' */
1415                 next_preprocessing_token();
1416
1417                 while (true) {
1418                         switch (pp_token.type) {
1419                         case TP_DOTDOTDOT:
1420                                 new_definition->is_variadic = true;
1421                                 next_preprocessing_token();
1422                                 if (pp_token.type != ')') {
1423                                         errorf(&input.position,
1424                                                         "'...' not at end of macro argument list");
1425                                         goto error_out;
1426                                 }
1427                                 break;
1428                         case TP_IDENTIFIER:
1429                                 obstack_ptr_grow(&pp_obstack, pp_token.v.symbol);
1430                                 next_preprocessing_token();
1431
1432                                 if (pp_token.type == ',') {
1433                                         next_preprocessing_token();
1434                                         break;
1435                                 }
1436
1437                                 if (pp_token.type != ')') {
1438                                         errorf(&pp_token.source_position,
1439                                                "expected ',' or ')' after identifier, got '%t'",
1440                                                &pp_token);
1441                                         goto error_out;
1442                                 }
1443                                 break;
1444                         case ')':
1445                                 next_preprocessing_token();
1446                                 goto finish_argument_list;
1447                         default:
1448                                 errorf(&pp_token.source_position,
1449                                        "expected identifier, '...' or ')' in #define argument list, got '%t'",
1450                                        &pp_token);
1451                                 goto error_out;
1452                         }
1453                 }
1454
1455         finish_argument_list:
1456                 new_definition->has_parameters = true;
1457                 new_definition->n_parameters
1458                         = obstack_object_size(&pp_obstack) / sizeof(new_definition->parameters[0]);
1459                 new_definition->parameters = obstack_finish(&pp_obstack);
1460         } else {
1461                 next_preprocessing_token();
1462         }
1463
1464         /* construct a new pp_definition on the obstack */
1465         assert(obstack_object_size(&pp_obstack) == 0);
1466         size_t list_len = 0;
1467         while (pp_token.type != '\n' && pp_token.type != TP_EOF) {
1468                 obstack_grow(&pp_obstack, &pp_token, sizeof(pp_token));
1469                 ++list_len;
1470                 next_preprocessing_token();
1471         }
1472
1473         new_definition->list_len   = list_len;
1474         new_definition->token_list = obstack_finish(&pp_obstack);
1475
1476         pp_definition_t *old_definition = symbol->pp_definition;
1477         if (old_definition != NULL) {
1478                 if (!pp_definitions_equal(old_definition, new_definition)) {
1479                         warningf(&input.position, "multiple definition of macro '%Y' (first defined %P)",
1480                                  symbol, &old_definition->source_position);
1481                 } else {
1482                         /* reuse the old definition */
1483                         obstack_free(&pp_obstack, new_definition);
1484                         new_definition = old_definition;
1485                 }
1486         }
1487
1488         symbol->pp_definition = new_definition;
1489         return;
1490
1491 error_out:
1492         if (obstack_object_size(&pp_obstack) > 0) {
1493                 char *ptr = obstack_finish(&pp_obstack);
1494                 obstack_free(&pp_obstack, ptr);
1495         }
1496         eat_pp_directive();
1497 }
1498
1499 static void parse_undef_directive(void)
1500 {
1501         eat_pp(TP_undef);
1502
1503         if(pp_token.type != TP_IDENTIFIER) {
1504                 errorf(&input.position,
1505                        "expected identifier after #undef, got '%t'", &pp_token);
1506                 eat_pp_directive();
1507                 return;
1508         }
1509
1510         symbol_t *symbol = pp_token.v.symbol;
1511         symbol->pp_definition = NULL;
1512         next_preprocessing_token();
1513
1514         if(pp_token.type != '\n') {
1515                 warningf(&input.position, "extra tokens at end of #undef directive");
1516         }
1517         /* eat until '\n' */
1518         eat_pp_directive();
1519 }
1520
1521 static const char *parse_headername(void)
1522 {
1523         /* behind an #include we can have the special headername lexems.
1524          * They're only allowed behind an #include so they're not recognized
1525          * by the normal next_preprocessing_token. We handle them as a special
1526          * exception here */
1527
1528         /* skip spaces so we reach start of next preprocessing token */
1529         skip_spaces(false);
1530
1531         assert(obstack_object_size(&input_obstack) == 0);
1532
1533         /* check wether we have a "... or <... headername */
1534         switch (CC) {
1535         case '<':
1536                 /* for now until we have proper searchpath handling */
1537                 obstack_1grow(&input_obstack, '.');
1538                 obstack_1grow(&input_obstack, '/');
1539
1540                 next_char();
1541                 while (true) {
1542                         switch (CC) {
1543                         case EOF:
1544                                 /* fallthrough */
1545                         MATCH_NEWLINE(
1546                                 parse_error("header name without closing '>'");
1547                                 return NULL;
1548                         )
1549                         case '>':
1550                                 next_char();
1551                                 goto finished_headername;
1552                         }
1553                         obstack_1grow(&input_obstack, (char) CC);
1554                         next_char();
1555                 }
1556                 /* we should never be here */
1557
1558         case '"':
1559                 /* for now until we have proper searchpath handling */
1560                 obstack_1grow(&input_obstack, '.');
1561                 obstack_1grow(&input_obstack, '/');
1562
1563                 next_char();
1564                 while (true) {
1565                         switch (CC) {
1566                         case EOF:
1567                                 /* fallthrough */
1568                         MATCH_NEWLINE(
1569                                 parse_error("header name without closing '>'");
1570                                 return NULL;
1571                         )
1572                         case '"':
1573                                 next_char();
1574                                 goto finished_headername;
1575                         }
1576                         obstack_1grow(&input_obstack, (char) CC);
1577                         next_char();
1578                 }
1579                 /* we should never be here */
1580
1581         default:
1582                 /* TODO: do normale pp_token parsing and concatenate results */
1583                 panic("pp_token concat include not implemented yet");
1584         }
1585
1586 finished_headername:
1587         obstack_1grow(&input_obstack, '\0');
1588         char *headername = obstack_finish(&input_obstack);
1589
1590         /* TODO: iterate search-path to find the file */
1591
1592         next_preprocessing_token();
1593
1594         return headername;
1595 }
1596
1597 static bool parse_include_directive(void)
1598 {
1599         /* don't eat the TP_include here!
1600          * we need an alternative parsing for the next token */
1601
1602         print_spaces();
1603
1604         const char *headername = parse_headername();
1605         if (headername == NULL) {
1606                 eat_pp_directive();
1607                 return false;
1608         }
1609
1610         if (pp_token.type != '\n' && pp_token.type != TP_EOF) {
1611                 warningf(&pp_token.source_position,
1612                          "extra tokens at end of #include directive");
1613                 eat_pp_directive();
1614         }
1615
1616         if (n_inputs > INCLUDE_LIMIT) {
1617                 errorf(&pp_token.source_position, "#include nested too deeply");
1618                 /* eat \n or EOF */
1619                 next_preprocessing_token();
1620                 return false;
1621         }
1622
1623         /* we have to reenable space counting and macro expansion here,
1624          * because it is still disabled in directive parsing,
1625          * but we will trigger a preprocessing token reading of the new file
1626          * now and need expansions/space counting */
1627         do_print_spaces = true;
1628         do_expansions   = true;
1629
1630         /* switch inputs */
1631         push_input();
1632         bool res = open_input(headername);
1633         if (!res) {
1634                 errorf(&pp_token.source_position,
1635                        "failed including '%s': %s", headername, strerror(errno));
1636                 pop_restore_input();
1637                 return false;
1638         }
1639
1640         return true;
1641 }
1642
1643 static pp_conditional_t *push_conditional(void)
1644 {
1645         pp_conditional_t *conditional
1646                 = obstack_alloc(&pp_obstack, sizeof(*conditional));
1647         memset(conditional, 0, sizeof(*conditional));
1648
1649         conditional->parent = conditional_stack;
1650         conditional_stack   = conditional;
1651
1652         return conditional;
1653 }
1654
1655 static void pop_conditional(void)
1656 {
1657         assert(conditional_stack != NULL);
1658         conditional_stack = conditional_stack->parent;
1659 }
1660
1661 static void check_unclosed_conditionals(void)
1662 {
1663         while (conditional_stack != NULL) {
1664                 pp_conditional_t *conditional = conditional_stack;
1665
1666                 if (conditional->in_else) {
1667                         errorf(&conditional->source_position, "unterminated #else");
1668                 } else {
1669                         errorf(&conditional->source_position, "unterminated condition");
1670                 }
1671                 pop_conditional();
1672         }
1673 }
1674
1675 static void parse_ifdef_ifndef_directive(void)
1676 {
1677         bool is_ifndef = (pp_token.type == TP_ifndef);
1678         bool condition;
1679         next_preprocessing_token();
1680
1681         if (skip_mode) {
1682                 eat_pp_directive();
1683                 pp_conditional_t *conditional = push_conditional();
1684                 conditional->source_position  = pp_token.source_position;
1685                 conditional->skip             = true;
1686                 return;
1687         }
1688
1689         if (pp_token.type != TP_IDENTIFIER) {
1690                 errorf(&pp_token.source_position,
1691                        "expected identifier after #%s, got '%t'",
1692                        is_ifndef ? "ifndef" : "ifdef", &pp_token);
1693                 eat_pp_directive();
1694
1695                 /* just take the true case in the hope to avoid further errors */
1696                 condition = true;
1697         } else {
1698                 symbol_t        *symbol        = pp_token.v.symbol;
1699                 pp_definition_t *pp_definition = symbol->pp_definition;
1700                 next_preprocessing_token();
1701
1702                 if (pp_token.type != '\n') {
1703                         errorf(&pp_token.source_position,
1704                                "extra tokens at end of #%s",
1705                                is_ifndef ? "ifndef" : "ifdef");
1706                         eat_pp_directive();
1707                 }
1708
1709                 /* evaluate wether we are in true or false case */
1710                 condition = is_ifndef ? pp_definition == NULL : pp_definition != NULL;
1711         }
1712
1713         pp_conditional_t *conditional = push_conditional();
1714         conditional->source_position  = pp_token.source_position;
1715         conditional->condition        = condition;
1716
1717         if (!condition) {
1718                 skip_mode = true;
1719         }
1720 }
1721
1722 static void parse_else_directive(void)
1723 {
1724         eat_pp(TP_else);
1725
1726         if (pp_token.type != '\n') {
1727                 if (!skip_mode) {
1728                         warningf(&pp_token.source_position, "extra tokens at end of #else");
1729                 }
1730                 eat_pp_directive();
1731         }
1732
1733         pp_conditional_t *conditional = conditional_stack;
1734         if (conditional == NULL) {
1735                 errorf(&pp_token.source_position, "#else without prior #if");
1736                 return;
1737         }
1738
1739         if (conditional->in_else) {
1740                 errorf(&pp_token.source_position,
1741                        "#else after #else (condition started %P)",
1742                        conditional->source_position);
1743                 skip_mode = true;
1744                 return;
1745         }
1746
1747         conditional->in_else = true;
1748         if (!conditional->skip) {
1749                 skip_mode = conditional->condition;
1750         }
1751         conditional->source_position = pp_token.source_position;
1752 }
1753
1754 static void parse_endif_directive(void)
1755 {
1756         eat_pp(TP_endif);
1757
1758         if (pp_token.type != '\n') {
1759                 if (!skip_mode) {
1760                         warningf(&pp_token.source_position,
1761                                  "extra tokens at end of #endif");
1762                 }
1763                 eat_pp_directive();
1764         }
1765
1766         pp_conditional_t *conditional = conditional_stack;
1767         if (conditional == NULL) {
1768                 errorf(&pp_token.source_position, "#endif without prior #if");
1769                 return;
1770         }
1771
1772         if (!conditional->skip) {
1773                 skip_mode = false;
1774         }
1775         pop_conditional();
1776 }
1777
1778 static void parse_preprocessing_directive(void)
1779 {
1780         do_print_spaces = false;
1781         do_expansions   = false;
1782         eat_pp('#');
1783
1784         if (skip_mode) {
1785                 switch(pp_token.type) {
1786                 case TP_ifdef:
1787                 case TP_ifndef:
1788                         parse_ifdef_ifndef_directive();
1789                         break;
1790                 case TP_else:
1791                         parse_else_directive();
1792                         break;
1793                 case TP_endif:
1794                         parse_endif_directive();
1795                         break;
1796                 default:
1797                         eat_pp_directive();
1798                         break;
1799                 }
1800         } else {
1801                 switch(pp_token.type) {
1802                 case TP_define:
1803                         parse_define_directive();
1804                         break;
1805                 case TP_undef:
1806                         parse_undef_directive();
1807                         break;
1808                 case TP_ifdef:
1809                 case TP_ifndef:
1810                         parse_ifdef_ifndef_directive();
1811                         break;
1812                 case TP_else:
1813                         parse_else_directive();
1814                         break;
1815                 case TP_endif:
1816                         parse_endif_directive();
1817                         break;
1818                 case TP_include: {
1819                         bool in_new_source = parse_include_directive();
1820                         /* no need to do anything if source file switched */
1821                         if (in_new_source)
1822                                 return;
1823                         break;
1824                 }
1825                 case '\n':
1826                         /* the nop directive */
1827                         break;
1828                 default:
1829                         errorf(&pp_token.source_position,
1830                                    "invalid preprocessing directive #%t", &pp_token);
1831                         eat_pp_directive();
1832                         break;
1833                 }
1834         }
1835
1836         do_print_spaces = true;
1837         do_expansions   = true;
1838
1839         /* eat '\n' */
1840         assert(pp_token.type == '\n' || pp_token.type == TP_EOF);
1841         next_preprocessing_token();
1842 }
1843
1844 #define GCC_COMPAT_MODE
1845
1846 int pptest_main(int argc, char **argv);
1847 int pptest_main(int argc, char **argv)
1848 {
1849         init_symbol_table();
1850         init_tokens();
1851
1852         obstack_init(&pp_obstack);
1853         obstack_init(&input_obstack);
1854
1855         const char *filename = "t.c";
1856         if (argc > 1)
1857                 filename = argv[1];
1858
1859         out = stdout;
1860
1861 #ifdef GCC_COMPAT_MODE
1862         /* this is here so we can directly compare "gcc -E" output and our output */
1863         fprintf(out, "# 1 \"%s\"\n", filename);
1864         fputs("# 1 \"<built-in>\"\n", out);
1865         fputs("# 1 \"<command-line>\"\n", out);
1866 #endif
1867
1868         bool ok = open_input(filename);
1869         assert(ok);
1870
1871         while(true) {
1872                 /* we're at a line begin */
1873                 if(pp_token.type == '#') {
1874                         parse_preprocessing_directive();
1875                 } else {
1876                         /* parse+emit a line */
1877                         while(pp_token.type != '\n') {
1878                                 if(pp_token.type == TP_EOF)
1879                                         goto end_of_main_loop;
1880                                 emit_pp_token();
1881                                 next_preprocessing_token();
1882                         }
1883                         emit_pp_token();
1884                         next_preprocessing_token();
1885                 }
1886         }
1887 end_of_main_loop:
1888
1889         check_unclosed_conditionals();
1890         close_input();
1891
1892         obstack_free(&input_obstack, NULL);
1893         obstack_free(&pp_obstack, NULL);
1894
1895         exit_tokens();
1896         exit_symbol_table();
1897
1898         return 0;
1899 }