nsz Git - cparser/blob - preprocessor.c

   1 #include <config.h>
   2
   3 #include "token_t.h"
   4 #include "symbol_t.h"
   5 #include "adt/util.h"
   6 #include "adt/error.h"
   7 #include "lang_features.h"
   8 #include "diagnostic.h"
   9 #include "string_rep.h"
  10
  11 #include <assert.h>
  12 #include <errno.h>
  13 #include <string.h>
  14 #include <stdbool.h>
  15 #include <ctype.h>
  16
  17 //#define DEBUG_CHARS
  18 #define MAX_PUTBACK 3
  19 #define INCLUDE_LIMIT 199  /* 199 is for gcc "compatibility" */
  20
  21 struct pp_argument_t {
  22         size_t   list_len;
  23         token_t *token_list;
  24 };
  25
  26 struct pp_definition_t {
  27         symbol_t          *symbol;
  28         source_position_t  source_position;
  29         pp_definition_t   *parent_expansion;
  30         size_t             expand_pos;
  31         bool               is_variadic    : 1;
  32         bool               is_expanding   : 1;
  33         bool               has_parameters : 1;
  34         size_t             n_parameters;
  35         symbol_t          *parameters;
  36
  37         /* replacement */
  38         size_t             list_len;
  39         token_t           *token_list;
  40
  41 };
  42
  43 typedef struct pp_conditional_t pp_conditional_t;
  44 struct pp_conditional_t {
  45         source_position_t  source_position;
  46         bool               condition;
  47         bool               in_else;
  48         bool               skip; /**< conditional in skip mode (then+else gets skipped) */
  49         pp_conditional_t  *parent;
  50 };
  51
  52 typedef struct pp_input_t pp_input_t;
  53 struct pp_input_t {
  54         FILE              *file;
  55         int                c;
  56         char               buf[1024+MAX_PUTBACK];
  57         const char        *bufend;
  58         const char        *bufpos;
  59         source_position_t  position;
  60         bool               had_non_space;
  61         pp_input_t        *parent;
  62 };
  63
  64 pp_input_t input;
  65 #define CC input.c
  66
  67 static pp_input_t     *input_stack;
  68 static unsigned        n_inputs;
  69 static struct obstack  input_obstack;
  70
  71 static pp_conditional_t *conditional_stack;
  72
  73 token_t                   pp_token;
  74 static bool               resolve_escape_sequences = false;
  75 static bool               do_print_spaces          = true;
  76 static bool               do_expansions;
  77 static bool               skip_mode;
  78 static FILE              *out;
  79 static struct obstack     pp_obstack;
  80 static unsigned           counted_newlines;
  81 static unsigned           counted_spaces;
  82 static const char        *printed_input_name = NULL;
  83 static pp_definition_t   *current_expansion  = NULL;
  84
  85 static inline void next_char(void);
  86 static void next_preprocessing_token(void);
  87 static void print_line_directive(const source_position_t *pos, const char *add);
  88 static void print_spaces(void);
  89
  90 static bool open_input(const char *filename)
  91 {
  92         FILE *file = fopen(filename, "r");
  93         if (file == NULL)
  94                 return false;
  95
  96         input.file                = file;
  97         input.bufend              = NULL;
  98         input.bufpos              = NULL;
  99         input.had_non_space       = false;
 100         input.position.input_name = filename;
 101         input.position.linenr     = 1;
 102
 103         /* indicate that we're at a new input */
 104         print_line_directive(&input.position, input_stack != NULL ? "1" : NULL);
 105
 106         counted_newlines = 0;
 107         counted_spaces   = 0;
 108
 109         /* read first char and first token */
 110         next_char();
 111         next_preprocessing_token();
 112
 113         return true;
 114 }
 115
 116 static void close_input(void)
 117 {
 118         /* ensure we have a newline at EOF */
 119         if (input.had_non_space) {
 120                 fputc('\n', out);
 121         }
 122
 123         assert(input.file != NULL);
 124
 125         fclose(input.file);
 126         input.file   = NULL;
 127         input.bufend = NULL;
 128         input.bufpos = NULL;
 129         input.c      = EOF;
 130 }
 131
 132 static void push_input(void)
 133 {
 134         pp_input_t *saved_input
 135                 = obstack_alloc(&input_obstack, sizeof(*saved_input));
 136
 137         memcpy(saved_input, &input, sizeof(*saved_input));
 138
 139         /* adjust buffer positions */
 140         if (input.bufpos != NULL)
 141                 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
 142         if (input.bufend != NULL)
 143                 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
 144
 145         saved_input->parent = input_stack;
 146         input_stack         = saved_input;
 147         ++n_inputs;
 148 }
 149
 150 static void pop_restore_input(void)
 151 {
 152         assert(n_inputs > 0);
 153         assert(input_stack != NULL);
 154
 155         pp_input_t *saved_input = input_stack;
 156
 157         memcpy(&input, saved_input, sizeof(input));
 158         input.parent = NULL;
 159
 160         /* adjust buffer positions */
 161         if (saved_input->bufpos != NULL)
 162                 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
 163         if (saved_input->bufend != NULL)
 164                 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
 165
 166         input_stack = saved_input->parent;
 167         obstack_free(&input_obstack, saved_input);
 168         --n_inputs;
 169 }
 170
 171 /**
 172  * Prints a parse error message at the current token.
 173  *
 174  * @param msg   the error message
 175  */
 176 static void parse_error(const char *msg)
 177 {
 178         errorf(&pp_token.source_position,  "%s", msg);
 179 }
 180
 181 static inline void next_real_char(void)
 182 {
 183         assert(input.bufpos <= input.bufend);
 184         if (input.bufpos >= input.bufend) {
 185                 size_t s = fread(input.buf + MAX_PUTBACK, 1,
 186                                  sizeof(input.buf) - MAX_PUTBACK, input.file);
 187                 if(s == 0) {
 188                         CC = EOF;
 189                         return;
 190                 }
 191                 input.bufpos = input.buf + MAX_PUTBACK;
 192                 input.bufend = input.buf + MAX_PUTBACK + s;
 193         }
 194         CC = *input.bufpos++;
 195 }
 196
 197 /**
 198  * Put a character back into the buffer.
 199  *
 200  * @param pc  the character to put back
 201  */
 202 static inline void put_back(int pc)
 203 {
 204         assert(input.bufpos > input.buf);
 205         *(--input.bufpos - input.buf + input.buf) = (char) pc;
 206
 207 #ifdef DEBUG_CHARS
 208         printf("putback '%c'\n", pc);
 209 #endif
 210 }
 211
 212 #define MATCH_NEWLINE(code)                   \
 213         case '\r':                                \
 214                 next_char();                          \
 215                 if(CC == '\n') {                      \
 216                         next_char();                      \
 217                 }                                     \
 218                 ++input.position.linenr;              \
 219                 code                                  \
 220         case '\n':                                \
 221                 next_char();                          \
 222                 ++input.position.linenr;              \
 223                 code
 224
 225 #define eat(c_type)  do { assert(CC == c_type); next_char(); } while(0)
 226
 227 static void maybe_concat_lines(void)
 228 {
 229         eat('\\');
 230
 231         switch(CC) {
 232         MATCH_NEWLINE(return;)
 233
 234         default:
 235                 break;
 236         }
 237
 238         put_back(CC);
 239         CC = '\\';
 240 }
 241
 242 /**
 243  * Set c to the next input character, ie.
 244  * after expanding trigraphs.
 245  */
 246 static inline void next_char(void)
 247 {
 248         next_real_char();
 249
 250         /* filter trigraphs and concatenated lines */
 251         if(UNLIKELY(CC == '\\')) {
 252                 maybe_concat_lines();
 253                 goto end_of_next_char;
 254         }
 255
 256         if(LIKELY(CC != '?'))
 257                 goto end_of_next_char;
 258
 259         next_real_char();
 260         if(LIKELY(CC != '?')) {
 261                 put_back(CC);
 262                 CC = '?';
 263                 goto end_of_next_char;
 264         }
 265
 266         next_real_char();
 267         switch(CC) {
 268         case '=': CC = '#'; break;
 269         case '(': CC = '['; break;
 270         case '/': CC = '\\'; maybe_concat_lines(); break;
 271         case ')': CC = ']'; break;
 272         case '\'': CC = '^'; break;
 273         case '<': CC = '{'; break;
 274         case '!': CC = '|'; break;
 275         case '>': CC = '}'; break;
 276         case '-': CC = '~'; break;
 277         default:
 278                 put_back(CC);
 279                 put_back('?');
 280                 CC = '?';
 281                 break;
 282         }
 283
 284 end_of_next_char:;
 285 #ifdef DEBUG_CHARS
 286         printf("nchar '%c'\n", CC);
 287 #endif
 288 }
 289
 290
 291
 292 /**
 293  * Returns true if the given char is a octal digit.
 294  *
 295  * @param char  the character to check
 296  */
 297 static inline bool is_octal_digit(int chr)
 298 {
 299         switch(chr) {
 300         case '0':
 301         case '1':
 302         case '2':
 303         case '3':
 304         case '4':
 305         case '5':
 306         case '6':
 307         case '7':
 308                 return true;
 309         default:
 310                 return false;
 311         }
 312 }
 313
 314 /**
 315  * Returns the value of a digit.
 316  * The only portable way to do it ...
 317  */
 318 static int digit_value(int digit) {
 319         switch (digit) {
 320         case '0': return 0;
 321         case '1': return 1;
 322         case '2': return 2;
 323         case '3': return 3;
 324         case '4': return 4;
 325         case '5': return 5;
 326         case '6': return 6;
 327         case '7': return 7;
 328         case '8': return 8;
 329         case '9': return 9;
 330         case 'a':
 331         case 'A': return 10;
 332         case 'b':
 333         case 'B': return 11;
 334         case 'c':
 335         case 'C': return 12;
 336         case 'd':
 337         case 'D': return 13;
 338         case 'e':
 339         case 'E': return 14;
 340         case 'f':
 341         case 'F': return 15;
 342         default:
 343                 panic("wrong character given");
 344         }
 345 }
 346
 347 /**
 348  * Parses an octal character sequence.
 349  *
 350  * @param first_digit  the already read first digit
 351  */
 352 static int parse_octal_sequence(const int first_digit)
 353 {
 354         assert(is_octal_digit(first_digit));
 355         int value = digit_value(first_digit);
 356         if (!is_octal_digit(CC)) return value;
 357         value = 8 * value + digit_value(CC);
 358         next_char();
 359         if (!is_octal_digit(CC)) return value;
 360         value = 8 * value + digit_value(CC);
 361         next_char();
 362
 363         if(char_is_signed) {
 364                 return (signed char) value;
 365         } else {
 366                 return (unsigned char) value;
 367         }
 368 }
 369
 370 /**
 371  * Parses a hex character sequence.
 372  */
 373 static int parse_hex_sequence(void)
 374 {
 375         int value = 0;
 376         while(isxdigit(CC)) {
 377                 value = 16 * value + digit_value(CC);
 378                 next_char();
 379         }
 380
 381         if(char_is_signed) {
 382                 return (signed char) value;
 383         } else {
 384                 return (unsigned char) value;
 385         }
 386 }
 387
 388 /**
 389  * Parse an escape sequence.
 390  */
 391 static int parse_escape_sequence(void)
 392 {
 393         eat('\\');
 394
 395         int ec = CC;
 396         next_char();
 397
 398         switch(ec) {
 399         case '"':  return '"';
 400         case '\'': return '\'';
 401         case '\\': return '\\';
 402         case '?': return '\?';
 403         case 'a': return '\a';
 404         case 'b': return '\b';
 405         case 'f': return '\f';
 406         case 'n': return '\n';
 407         case 'r': return '\r';
 408         case 't': return '\t';
 409         case 'v': return '\v';
 410         case 'x':
 411                 return parse_hex_sequence();
 412         case '0':
 413         case '1':
 414         case '2':
 415         case '3':
 416         case '4':
 417         case '5':
 418         case '6':
 419         case '7':
 420                 return parse_octal_sequence(ec);
 421         case EOF:
 422                 parse_error("reached end of file while parsing escape sequence");
 423                 return EOF;
 424         default:
 425                 parse_error("unknown escape sequence");
 426                 return EOF;
 427         }
 428 }
 429
 430 static void parse_string_literal(void)
 431 {
 432         const unsigned start_linenr = input.position.linenr;
 433
 434         eat('"');
 435
 436         int tc;
 437         while(1) {
 438                 switch(CC) {
 439                 case '\\':
 440                         if(resolve_escape_sequences) {
 441                                 tc = parse_escape_sequence();
 442                                 obstack_1grow(&symbol_obstack, (char) tc);
 443                         } else {
 444                                 obstack_1grow(&symbol_obstack, (char) CC);
 445                                 next_char();
 446                                 obstack_1grow(&symbol_obstack, (char) CC);
 447                                 next_char();
 448                         }
 449                         break;
 450
 451                 case EOF: {
 452                         source_position_t source_position;
 453                         source_position.input_name = pp_token.source_position.input_name;
 454                         source_position.linenr     = start_linenr;
 455                         errorf(&source_position, "string has no end");
 456                         pp_token.type = TP_ERROR;
 457                         return;
 458                 }
 459
 460                 case '"':
 461                         next_char();
 462                         goto end_of_string;
 463
 464                 default:
 465                         obstack_1grow(&symbol_obstack, (char) CC);
 466                         next_char();
 467                         break;
 468                 }
 469         }
 470
 471 end_of_string:
 472         /* add finishing 0 to the string */
 473         obstack_1grow(&symbol_obstack, '\0');
 474         const size_t      size   = (size_t)obstack_object_size(&symbol_obstack);
 475         const char *const string = obstack_finish(&symbol_obstack);
 476
 477 #if 0 /* TODO hash */
 478         /* check if there is already a copy of the string */
 479         result = strset_insert(&stringset, string);
 480         if(result != string) {
 481                 obstack_free(&symbol_obstack, string);
 482         }
 483 #else
 484         const char *const result = string;
 485 #endif
 486
 487         pp_token.type           = TP_STRING_LITERAL;
 488         pp_token.v.string.begin = result;
 489         pp_token.v.string.size  = size;
 490 }
 491
 492 static void parse_wide_character_constant(void)
 493 {
 494         eat('\'');
 495
 496         int found_char = 0;
 497         while(1) {
 498                 switch(CC) {
 499                 case '\\':
 500                         found_char = parse_escape_sequence();
 501                         break;
 502
 503                 MATCH_NEWLINE(
 504                         parse_error("newline while parsing character constant");
 505                         break;
 506                 )
 507
 508                 case '\'':
 509                         next_char();
 510                         goto end_of_wide_char_constant;
 511
 512                 case EOF:
 513                         parse_error("EOF while parsing character constant");
 514                         pp_token.type = TP_ERROR;
 515                         return;
 516
 517                 default:
 518                         if(found_char != 0) {
 519                                 parse_error("more than 1 characters in character "
 520                                             "constant");
 521                                 goto end_of_wide_char_constant;
 522                         } else {
 523                                 found_char = CC;
 524                                 next_char();
 525                         }
 526                         break;
 527                 }
 528         }
 529
 530 end_of_wide_char_constant:
 531         pp_token.type       = TP_WIDE_CHARACTER_CONSTANT;
 532         /* TODO... */
 533 }
 534
 535 static void parse_wide_string_literal(void)
 536 {
 537         const unsigned start_linenr = input.position.linenr;
 538
 539         assert(CC == '"');
 540         next_char();
 541
 542         while(1) {
 543                 switch(CC) {
 544                 case '\\': {
 545                         wchar_rep_t tc = parse_escape_sequence();
 546                         obstack_grow(&symbol_obstack, &tc, sizeof(tc));
 547                         break;
 548                 }
 549
 550                 case EOF: {
 551                         source_position_t source_position;
 552                         source_position.input_name = pp_token.source_position.input_name;
 553                         source_position.linenr     = start_linenr;
 554                         errorf(&source_position, "string has no end");
 555                         pp_token.type = TP_ERROR;
 556                         return;
 557                 }
 558
 559                 case '"':
 560                         next_char();
 561                         goto end_of_string;
 562
 563                 default: {
 564                         wchar_rep_t tc = CC;
 565                         obstack_grow(&symbol_obstack, &tc, sizeof(tc));
 566                         next_char();
 567                         break;
 568                 }
 569                 }
 570         }
 571
 572 end_of_string:;
 573         /* add finishing 0 to the string */
 574         static const wchar_rep_t nul = L'\0';
 575         obstack_grow(&symbol_obstack, &nul, sizeof(nul));
 576
 577         const size_t size
 578                 = (size_t)obstack_object_size(&symbol_obstack) / sizeof(wchar_rep_t);
 579         const wchar_rep_t *const string = obstack_finish(&symbol_obstack);
 580
 581 #if 0 /* TODO hash */
 582         /* check if there is already a copy of the string */
 583         const wchar_rep_t *const result = strset_insert(&stringset, string);
 584         if(result != string) {
 585                 obstack_free(&symbol_obstack, string);
 586         }
 587 #else
 588         const wchar_rep_t *const result = string;
 589 #endif
 590
 591         pp_token.type                = TP_WIDE_STRING_LITERAL;
 592         pp_token.v.wide_string.begin = result;
 593         pp_token.v.wide_string.size  = size;
 594 }
 595
 596 static void parse_character_constant(void)
 597 {
 598         const unsigned start_linenr = input.position.linenr;
 599
 600         eat('\'');
 601
 602         int tc;
 603         while(1) {
 604                 switch(CC) {
 605                 case '\\':
 606                         tc = parse_escape_sequence();
 607                         obstack_1grow(&symbol_obstack, (char) tc);
 608                         break;
 609
 610                 MATCH_NEWLINE(
 611                         parse_error("newline while parsing character constant");
 612                         break;
 613                 )
 614
 615                 case EOF: {
 616                         source_position_t source_position;
 617                         source_position.input_name = pp_token.source_position.input_name;
 618                         source_position.linenr     = start_linenr;
 619                         errorf(&source_position, "EOF while parsing character constant");
 620                         pp_token.type = TP_ERROR;
 621                         return;
 622                 }
 623
 624                 case '\'':
 625                         next_char();
 626                         goto end_of_char_constant;
 627
 628                 default:
 629                         obstack_1grow(&symbol_obstack, (char) CC);
 630                         next_char();
 631                         break;
 632
 633                 }
 634         }
 635
 636 end_of_char_constant:;
 637         const size_t      size   = (size_t)obstack_object_size(&symbol_obstack);
 638         const char *const string = obstack_finish(&symbol_obstack);
 639
 640         pp_token.type           = TP_CHARACTER_CONSTANT;
 641         pp_token.v.string.begin = string;
 642         pp_token.v.string.size  = size;
 643 }
 644
 645 #define SYMBOL_CHARS_WITHOUT_E_P \
 646         case 'a': \
 647         case 'b': \
 648         case 'c': \
 649         case 'd': \
 650         case 'f': \
 651         case 'g': \
 652         case 'h': \
 653         case 'i': \
 654         case 'j': \
 655         case 'k': \
 656         case 'l': \
 657         case 'm': \
 658         case 'n': \
 659         case 'o': \
 660         case 'q': \
 661         case 'r': \
 662         case 's': \
 663         case 't': \
 664         case 'u': \
 665         case 'v': \
 666         case 'w': \
 667         case 'x': \
 668         case 'y': \
 669         case 'z': \
 670         case 'A': \
 671         case 'B': \
 672         case 'C': \
 673         case 'D': \
 674         case 'F': \
 675         case 'G': \
 676         case 'H': \
 677         case 'I': \
 678         case 'J': \
 679         case 'K': \
 680         case 'L': \
 681         case 'M': \
 682         case 'N': \
 683         case 'O': \
 684         case 'Q': \
 685         case 'R': \
 686         case 'S': \
 687         case 'T': \
 688         case 'U': \
 689         case 'V': \
 690         case 'W': \
 691         case 'X': \
 692         case 'Y': \
 693         case 'Z': \
 694         case '_':
 695
 696 #define SYMBOL_CHARS \
 697         SYMBOL_CHARS_WITHOUT_E_P \
 698         case 'e': \
 699         case 'p': \
 700         case 'E': \
 701         case 'P':
 702
 703 #define DIGITS \
 704         case '0':  \
 705         case '1':  \
 706         case '2':  \
 707         case '3':  \
 708         case '4':  \
 709         case '5':  \
 710         case '6':  \
 711         case '7':  \
 712         case '8':  \
 713         case '9':
 714
 715 /**
 716  * returns next final token from a preprocessor macro expansion
 717  */
 718 static void expand_next(void)
 719 {
 720         assert(current_expansion != NULL);
 721
 722         pp_definition_t *definition = current_expansion;
 723
 724 restart:
 725         if(definition->list_len == 0
 726                         || definition->expand_pos >= definition->list_len) {
 727                 /* we're finished with the current macro, move up 1 level in the
 728                  * expansion stack */
 729                 pp_definition_t *parent = definition->parent_expansion;
 730                 definition->parent_expansion = NULL;
 731                 definition->is_expanding     = false;
 732
 733                 /* it was the outermost expansion, parse normal pptoken */
 734                 if(parent == NULL) {
 735                         current_expansion = NULL;
 736                         next_preprocessing_token();
 737                         return;
 738                 }
 739                 definition        = parent;
 740                 current_expansion = definition;
 741                 goto restart;
 742         }
 743         pp_token = definition->token_list[definition->expand_pos];
 744         ++definition->expand_pos;
 745
 746         if(pp_token.type != TP_IDENTIFIER)
 747                 return;
 748
 749         /* if it was an identifier then we might need to expand again */
 750         pp_definition_t *symbol_definition = pp_token.v.symbol->pp_definition;
 751         if(symbol_definition != NULL && !symbol_definition->is_expanding) {
 752                 symbol_definition->parent_expansion = definition;
 753                 symbol_definition->expand_pos       = 0;
 754                 symbol_definition->is_expanding     = true;
 755                 definition                          = symbol_definition;
 756                 current_expansion                   = definition;
 757                 goto restart;
 758         }
 759 }
 760
 761 static void skip_line_comment(void)
 762 {
 763         if(do_print_spaces)
 764                 counted_spaces++;
 765
 766         while(1) {
 767                 switch(CC) {
 768                 case EOF:
 769                         return;
 770
 771                 case '\n':
 772                 case '\r':
 773                         return;
 774
 775                 default:
 776                         next_char();
 777                         break;
 778                 }
 779         }
 780 }
 781
 782 static void skip_multiline_comment(void)
 783 {
 784         if(do_print_spaces)
 785                 counted_spaces++;
 786
 787         unsigned start_linenr = input.position.linenr;
 788         while(1) {
 789                 switch(CC) {
 790                 case '/':
 791                         next_char();
 792                         if (CC == '*') {
 793                                 /* TODO: nested comment, warn here */
 794                         }
 795                         break;
 796                 case '*':
 797                         next_char();
 798                         if(CC == '/') {
 799                                 next_char();
 800                                 return;
 801                         }
 802                         break;
 803
 804                 MATCH_NEWLINE(
 805                         if(do_print_spaces) {
 806                                 counted_newlines++;
 807                                 counted_spaces = 0;
 808                         }
 809                         break;
 810                 )
 811
 812                 case EOF: {
 813                         source_position_t source_position;
 814                         source_position.input_name = pp_token.source_position.input_name;
 815                         source_position.linenr     = start_linenr;
 816                         errorf(&source_position, "at end of file while looking for comment end");
 817                         return;
 818                 }
 819
 820                 default:
 821                         next_char();
 822                         break;
 823                 }
 824         }
 825 }
 826
 827 /* skip spaces advancing at the start of the next preprocessing token */
 828 static void skip_spaces(bool skip_newline)
 829 {
 830         while (true) {
 831                 switch (CC) {
 832                 case ' ':
 833                 case '\t':
 834                         if(do_print_spaces)
 835                                 counted_spaces++;
 836                         next_char();
 837                         continue;
 838                 case '/':
 839                         next_char();
 840                         if (CC == '/') {
 841                                 next_char();
 842                                 skip_line_comment();
 843                                 continue;
 844                         } else if (CC == '*') {
 845                                 next_char();
 846                                 skip_multiline_comment();
 847                                 continue;
 848                         } else {
 849                                 put_back(CC);
 850                                 CC = '/';
 851                         }
 852                         return;
 853
 854                 case '\r':
 855                         if (!skip_newline)
 856                                 return;
 857
 858                         next_char();
 859                         if(CC == '\n') {
 860                                 next_char();
 861                         }
 862                         ++input.position.linenr;
 863                         if (do_print_spaces)
 864                                 ++counted_newlines;
 865                         continue;
 866
 867                 case '\n':
 868                         if (!skip_newline)
 869                                 return;
 870
 871                         next_char();
 872                         ++input.position.linenr;
 873                         if (do_print_spaces)
 874                                 ++counted_newlines;
 875                         continue;
 876
 877                 default:
 878                         return;
 879                 }
 880         }
 881 }
 882
 883 static void eat_pp(preprocessor_token_type_t type)
 884 {
 885         (void) type;
 886         assert(pp_token.type == type);
 887         next_preprocessing_token();
 888 }
 889
 890 static void parse_symbol(void)
 891 {
 892         obstack_1grow(&symbol_obstack, (char) CC);
 893         next_char();
 894
 895         while(1) {
 896                 switch(CC) {
 897                 DIGITS
 898                 SYMBOL_CHARS
 899                         obstack_1grow(&symbol_obstack, (char) CC);
 900                         next_char();
 901                         break;
 902
 903                 default:
 904                         goto end_symbol;
 905                 }
 906         }
 907
 908 end_symbol:
 909         obstack_1grow(&symbol_obstack, '\0');
 910         char *string = obstack_finish(&symbol_obstack);
 911
 912         /* might be a wide string or character constant ( L"string"/L'c' ) */
 913         if (CC == '"' && string[0] == 'L' && string[1] == '\0') {
 914                 obstack_free(&symbol_obstack, string);
 915                 parse_wide_string_literal();
 916                 return;
 917         } else if (CC == '\'' && string[0] == 'L' && string[1] == '\0') {
 918                 obstack_free(&symbol_obstack, string);
 919                 parse_wide_character_constant();
 920                 return;
 921         }
 922
 923         symbol_t *symbol = symbol_table_insert(string);
 924
 925         pp_token.type     = symbol->pp_ID;
 926         pp_token.v.symbol = symbol;
 927
 928         /* we can free the memory from symbol obstack if we already had an entry in
 929          * the symbol table */
 930         if (symbol->string != string) {
 931                 obstack_free(&symbol_obstack, string);
 932         }
 933         if (!do_expansions)
 934                 return;
 935
 936         pp_definition_t *pp_definition = symbol->pp_definition;
 937         if (pp_definition == NULL)
 938                 return;
 939
 940         if (pp_definition->has_parameters) {
 941                 skip_spaces(true);
 942                 /* no opening brace -> no expansion */
 943                 if (CC != '(')
 944                         return;
 945                 next_preprocessing_token();
 946                 eat_pp('(');
 947
 948                 /* parse arguments (TODO) */
 949                 while (pp_token.type != TP_EOF && pp_token.type != ')')
 950                         next_preprocessing_token();
 951                 next_preprocessing_token();
 952         }
 953
 954         pp_definition->expand_pos   = 0;
 955         pp_definition->is_expanding = true,
 956         current_expansion           = pp_definition;
 957         expand_next();
 958 }
 959
 960 static void parse_number(void)
 961 {
 962         obstack_1grow(&symbol_obstack, (char) CC);
 963         next_char();
 964
 965         while(1) {
 966                 switch(CC) {
 967                 case '.':
 968                 DIGITS
 969                 SYMBOL_CHARS_WITHOUT_E_P
 970                         obstack_1grow(&symbol_obstack, (char) CC);
 971                         next_char();
 972                         break;
 973
 974                 case 'e':
 975                 case 'p':
 976                 case 'E':
 977                 case 'P':
 978                         obstack_1grow(&symbol_obstack, (char) CC);
 979                         next_char();
 980                         if(CC == '+' || CC == '-') {
 981                                 obstack_1grow(&symbol_obstack, (char) CC);
 982                                 next_char();
 983                         }
 984                         break;
 985
 986                 default:
 987                         goto end_number;
 988                 }
 989         }
 990
 991 end_number:
 992         obstack_1grow(&symbol_obstack, '\0');
 993         size_t  size   = obstack_object_size(&symbol_obstack);
 994         char   *string = obstack_finish(&symbol_obstack);
 995
 996         pp_token.type           = TP_NUMBER;
 997         pp_token.v.string.begin = string;
 998         pp_token.v.string.size  = size;
 999 }
1000
1001
1002
1003 #define MAYBE_PROLOG                                       \
1004                         next_char();                                   \
1005                         while(1) {                                     \
1006                                 switch(CC) {
1007
1008 #define MAYBE(ch, set_type)                                \
1009                                 case ch:                                   \
1010                                         next_char();                           \
1011                                         pp_token.type = set_type;              \
1012                                         return;
1013
1014 #define ELSE_CODE(code)                                    \
1015                                 default:                                   \
1016                                         code;                                  \
1017                                 }                                          \
1018                         } /* end of while(1) */                        \
1019                         break;
1020
1021 #define ELSE(set_type)                                     \
1022                 ELSE_CODE(                                         \
1023                         pp_token.type = set_type;                      \
1024                         return;                                        \
1025                 )
1026
1027 static void next_preprocessing_token(void)
1028 {
1029         if(current_expansion != NULL) {
1030                 expand_next();
1031                 return;
1032         }
1033
1034         pp_token.source_position = input.position;
1035
1036 restart:
1037         switch(CC) {
1038         case ' ':
1039         case '\t':
1040                 if(do_print_spaces)
1041                         counted_spaces++;
1042                 next_char();
1043                 goto restart;
1044
1045         MATCH_NEWLINE(
1046                 counted_newlines++;
1047                 counted_spaces = 0;
1048                 pp_token.type = '\n';
1049                 return;
1050         )
1051
1052         SYMBOL_CHARS
1053                 parse_symbol();
1054                 return;
1055
1056         DIGITS
1057                 parse_number();
1058                 return;
1059
1060         case '"':
1061                 parse_string_literal();
1062                 return;
1063
1064         case '\'':
1065                 parse_character_constant();
1066                 return;
1067
1068         case '.':
1069                 MAYBE_PROLOG
1070                         case '0':
1071                         case '1':
1072                         case '2':
1073                         case '3':
1074                         case '4':
1075                         case '5':
1076                         case '6':
1077                         case '7':
1078                         case '8':
1079                         case '9':
1080                                 put_back(CC);
1081                                 CC = '.';
1082                                 parse_number();
1083                                 return;
1084
1085                         case '.':
1086                                 MAYBE_PROLOG
1087                                 MAYBE('.', TP_DOTDOTDOT)
1088                                 ELSE_CODE(
1089                                         put_back(CC);
1090                                         CC = '.';
1091                                         pp_token.type = '.';
1092                                         return;
1093                                 )
1094                 ELSE('.')
1095         case '&':
1096                 MAYBE_PROLOG
1097                 MAYBE('&', TP_ANDAND)
1098                 MAYBE('=', TP_ANDEQUAL)
1099                 ELSE('&')
1100         case '*':
1101                 MAYBE_PROLOG
1102                 MAYBE('=', TP_ASTERISKEQUAL)
1103                 ELSE('*')
1104         case '+':
1105                 MAYBE_PROLOG
1106                 MAYBE('+', TP_PLUSPLUS)
1107                 MAYBE('=', TP_PLUSEQUAL)
1108                 ELSE('+')
1109         case '-':
1110                 MAYBE_PROLOG
1111                 MAYBE('>', TP_MINUSGREATER)
1112                 MAYBE('-', TP_MINUSMINUS)
1113                 MAYBE('=', TP_MINUSEQUAL)
1114                 ELSE('-')
1115         case '!':
1116                 MAYBE_PROLOG
1117                 MAYBE('=', TP_EXCLAMATIONMARKEQUAL)
1118                 ELSE('!')
1119         case '/':
1120                 MAYBE_PROLOG
1121                 MAYBE('=', TP_SLASHEQUAL)
1122                         case '*':
1123                                 next_char();
1124                                 skip_multiline_comment();
1125                                 goto restart;
1126                         case '/':
1127                                 next_char();
1128                                 skip_line_comment();
1129                                 goto restart;
1130                 ELSE('/')
1131         case '%':
1132                 MAYBE_PROLOG
1133                 MAYBE('>', '}')
1134                 MAYBE('=', TP_PERCENTEQUAL)
1135                         case ':':
1136                                 MAYBE_PROLOG
1137                                         case '%':
1138                                                 MAYBE_PROLOG
1139                                                 MAYBE(':', TP_HASHHASH)
1140                                                 ELSE_CODE(
1141                                                         put_back(CC);
1142                                                         CC = '%';
1143                                                         pp_token.type = '#';
1144                                                         return;
1145                                                 )
1146                                 ELSE('#')
1147                 ELSE('%')
1148         case '<':
1149                 MAYBE_PROLOG
1150                 MAYBE(':', '[')
1151                 MAYBE('%', '{')
1152                 MAYBE('=', TP_LESSEQUAL)
1153                         case '<':
1154                                 MAYBE_PROLOG
1155                                 MAYBE('=', TP_LESSLESSEQUAL)
1156                                 ELSE(TP_LESSLESS)
1157                 ELSE('<')
1158         case '>':
1159                 MAYBE_PROLOG
1160                 MAYBE('=', TP_GREATEREQUAL)
1161                         case '>':
1162                                 MAYBE_PROLOG
1163                                 MAYBE('=', TP_GREATERGREATEREQUAL)
1164                                 ELSE(TP_GREATERGREATER)
1165                 ELSE('>')
1166         case '^':
1167                 MAYBE_PROLOG
1168                 MAYBE('=', TP_CARETEQUAL)
1169                 ELSE('^')
1170         case '|':
1171                 MAYBE_PROLOG
1172                 MAYBE('=', TP_PIPEEQUAL)
1173                 MAYBE('|', TP_PIPEPIPE)
1174                 ELSE('|')
1175         case ':':
1176                 MAYBE_PROLOG
1177                 MAYBE('>', ']')
1178                 ELSE(':')
1179         case '=':
1180                 MAYBE_PROLOG
1181                 MAYBE('=', TP_EQUALEQUAL)
1182                 ELSE('=')
1183         case '#':
1184                 MAYBE_PROLOG
1185                 MAYBE('#', TP_HASHHASH)
1186                 ELSE('#')
1187
1188         case '?':
1189         case '[':
1190         case ']':
1191         case '(':
1192         case ')':
1193         case '{':
1194         case '}':
1195         case '~':
1196         case ';':
1197         case ',':
1198         case '\\':
1199                 pp_token.type = CC;
1200                 next_char();
1201                 return;
1202
1203         case EOF:
1204                 if (input_stack != NULL) {
1205                         close_input();
1206                         pop_restore_input();
1207                         counted_newlines = 0;
1208                         counted_spaces   = 0;
1209                         /* hack to output correct line number */
1210                         print_line_directive(&input.position, "2");
1211                         next_preprocessing_token();
1212                 } else {
1213                         pp_token.type = TP_EOF;
1214                 }
1215                 return;
1216
1217         default:
1218                 next_char();
1219                 errorf(&pp_token.source_position, "unknown character '%c' found\n", CC);
1220                 pp_token.type = TP_ERROR;
1221                 return;
1222         }
1223 }
1224
1225 static void print_quoted_string(const char *const string)
1226 {
1227         fputc('"', out);
1228         for (const char *c = string; *c != 0; ++c) {
1229                 switch(*c) {
1230                 case '"': fputs("\\\"", out); break;
1231                 case '\\':  fputs("\\\\", out); break;
1232                 case '\a':  fputs("\\a", out); break;
1233                 case '\b':  fputs("\\b", out); break;
1234                 case '\f':  fputs("\\f", out); break;
1235                 case '\n':  fputs("\\n", out); break;
1236                 case '\r':  fputs("\\r", out); break;
1237                 case '\t':  fputs("\\t", out); break;
1238                 case '\v':  fputs("\\v", out); break;
1239                 case '\?':  fputs("\\?", out); break;
1240                 default:
1241                         if(!isprint(*c)) {
1242                                 fprintf(out, "\\%03o", *c);
1243                                 break;
1244                         }
1245                         fputc(*c, out);
1246                         break;
1247                 }
1248         }
1249         fputc('"', out);
1250 }
1251
1252 static void print_line_directive(const source_position_t *pos, const char *add)
1253 {
1254         fprintf(out, "# %d ", pos->linenr);
1255         print_quoted_string(pos->input_name);
1256         if (add != NULL) {
1257                 fputc(' ', out);
1258                 fputs(add, out);
1259         }
1260         fputc('\n', out);
1261
1262         printed_input_name = pos->input_name;
1263 }
1264
1265 static void print_spaces(void)
1266 {
1267         if (counted_newlines >= 8) {
1268                 if (input.had_non_space) {
1269                         fputc('\n', out);
1270                 }
1271                 print_line_directive(&pp_token.source_position, NULL);
1272                 counted_newlines = 0;
1273         } else {
1274                 for (unsigned i = 0; i < counted_newlines; ++i)
1275                         fputc('\n', out);
1276                 counted_newlines = 0;
1277         }
1278         for (unsigned i = 0; i < counted_spaces; ++i)
1279                 fputc(' ', out);
1280         counted_spaces = 0;
1281 }
1282
1283 static void emit_pp_token(void)
1284 {
1285         if (skip_mode)
1286                 return;
1287
1288         if (pp_token.type != '\n') {
1289                 print_spaces();
1290                 input.had_non_space = true;
1291         }
1292
1293         switch(pp_token.type) {
1294         case TP_IDENTIFIER:
1295                 fputs(pp_token.v.symbol->string, out);
1296                 break;
1297         case TP_NUMBER:
1298                 fputs(pp_token.v.string.begin, out);
1299                 break;
1300         case TP_STRING_LITERAL:
1301                 fputc('"', out);
1302                 fputs(pp_token.v.string.begin, out);
1303                 fputc('"', out);
1304                 break;
1305         case '\n':
1306                 break;
1307         default:
1308                 print_pp_token_type(out, pp_token.type);
1309                 break;
1310         }
1311 }
1312
1313 static void eat_pp_directive(void)
1314 {
1315         while(pp_token.type != '\n' && pp_token.type != TP_EOF) {
1316                 next_preprocessing_token();
1317         }
1318 }
1319
1320 static bool strings_equal(const string_t *string1, const string_t *string2)
1321 {
1322         size_t size = string1->size;
1323         if(size != string2->size)
1324                 return false;
1325
1326         const char *c1 = string1->begin;
1327         const char *c2 = string2->begin;
1328         for(size_t i = 0; i < size; ++i, ++c1, ++c2) {
1329                 if(*c1 != *c2)
1330                         return false;
1331         }
1332         return true;
1333 }
1334
1335 static bool wide_strings_equal(const wide_string_t *string1,
1336                                const wide_string_t *string2)
1337 {
1338         size_t size = string1->size;
1339         if(size != string2->size)
1340                 return false;
1341
1342         const wchar_rep_t *c1 = string1->begin;
1343         const wchar_rep_t *c2 = string2->begin;
1344         for(size_t i = 0; i < size; ++i, ++c1, ++c2) {
1345                 if(*c1 != *c2)
1346                         return false;
1347         }
1348         return true;
1349 }
1350
1351 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1352 {
1353         if(token1->type != token2->type)
1354                 return false;
1355
1356         switch(token1->type) {
1357         case TP_HEADERNAME:
1358                 /* TODO */
1359                 return false;
1360         case TP_IDENTIFIER:
1361                 return token1->v.symbol == token2->v.symbol;
1362         case TP_NUMBER:
1363         case TP_CHARACTER_CONSTANT:
1364         case TP_STRING_LITERAL:
1365                 return strings_equal(&token1->v.string, &token2->v.string);
1366
1367         case TP_WIDE_CHARACTER_CONSTANT:
1368         case TP_WIDE_STRING_LITERAL:
1369                 return wide_strings_equal(&token1->v.wide_string,
1370                                           &token2->v.wide_string);
1371         default:
1372                 return true;
1373         }
1374 }
1375
1376 static bool pp_definitions_equal(const pp_definition_t *definition1,
1377                                  const pp_definition_t *definition2)
1378 {
1379         if(definition1->list_len != definition2->list_len)
1380                 return false;
1381
1382         size_t         len = definition1->list_len;
1383         const token_t *t1  = definition1->token_list;
1384         const token_t *t2  = definition2->token_list;
1385         for(size_t i = 0; i < len; ++i, ++t1, ++t2) {
1386                 if(!pp_tokens_equal(t1, t2))
1387                         return false;
1388         }
1389         return true;
1390 }
1391
1392 static void parse_define_directive(void)
1393 {
1394         eat_pp(TP_define);
1395         assert(obstack_object_size(&pp_obstack) == 0);
1396
1397         if (pp_token.type != TP_IDENTIFIER) {
1398                 errorf(&pp_token.source_position,
1399                        "expected identifier after #define, got '%t'", &pp_token);
1400                 goto error_out;
1401         }
1402         symbol_t *symbol = pp_token.v.symbol;
1403
1404         pp_definition_t *new_definition
1405                 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1406         memset(new_definition, 0, sizeof(new_definition[0]));
1407         new_definition->source_position = input.position;
1408
1409         /* this is probably the only place where spaces are significant in the
1410          * lexer (except for the fact that they separate tokens). #define b(x)
1411          * is something else than #define b (x) */
1412         if (CC == '(') {
1413                 /* eat the '(' */
1414                 next_preprocessing_token();
1415                 /* get next token after '(' */
1416                 next_preprocessing_token();
1417
1418                 while (true) {
1419                         switch (pp_token.type) {
1420                         case TP_DOTDOTDOT:
1421                                 new_definition->is_variadic = true;
1422                                 next_preprocessing_token();
1423                                 if (pp_token.type != ')') {
1424                                         errorf(&input.position,
1425                                                         "'...' not at end of macro argument list");
1426                                         goto error_out;
1427                                 }
1428                                 break;
1429                         case TP_IDENTIFIER:
1430                                 obstack_ptr_grow(&pp_obstack, pp_token.v.symbol);
1431                                 next_preprocessing_token();
1432
1433                                 if (pp_token.type == ',') {
1434                                         next_preprocessing_token();
1435                                         break;
1436                                 }
1437
1438                                 if (pp_token.type != ')') {
1439                                         errorf(&pp_token.source_position,
1440                                                "expected ',' or ')' after identifier, got '%t'",
1441                                                &pp_token);
1442                                         goto error_out;
1443                                 }
1444                                 break;
1445                         case ')':
1446                                 next_preprocessing_token();
1447                                 goto finish_argument_list;
1448                         default:
1449                                 errorf(&pp_token.source_position,
1450                                        "expected identifier, '...' or ')' in #define argument list, got '%t'",
1451                                        &pp_token);
1452                                 goto error_out;
1453                         }
1454                 }
1455
1456         finish_argument_list:
1457                 new_definition->has_parameters = true;
1458                 new_definition->n_parameters
1459                         = obstack_object_size(&pp_obstack) / sizeof(new_definition->parameters[0]);
1460                 new_definition->parameters = obstack_finish(&pp_obstack);
1461         } else {
1462                 next_preprocessing_token();
1463         }
1464
1465         /* construct a new pp_definition on the obstack */
1466         assert(obstack_object_size(&pp_obstack) == 0);
1467         size_t list_len = 0;
1468         while (pp_token.type != '\n' && pp_token.type != TP_EOF) {
1469                 obstack_grow(&pp_obstack, &pp_token, sizeof(pp_token));
1470                 ++list_len;
1471                 next_preprocessing_token();
1472         }
1473
1474         new_definition->list_len   = list_len;
1475         new_definition->token_list = obstack_finish(&pp_obstack);
1476
1477         pp_definition_t *old_definition = symbol->pp_definition;
1478         if (old_definition != NULL) {
1479                 if (!pp_definitions_equal(old_definition, new_definition)) {
1480                         warningf(&input.position, "multiple definition of macro '%Y' (first defined %P)",
1481                                  symbol, &old_definition->source_position);
1482                 } else {
1483                         /* reuse the old definition */
1484                         obstack_free(&pp_obstack, new_definition);
1485                         new_definition = old_definition;
1486                 }
1487         }
1488
1489         symbol->pp_definition = new_definition;
1490         return;
1491
1492 error_out:
1493         if (obstack_object_size(&pp_obstack) > 0) {
1494                 char *ptr = obstack_finish(&pp_obstack);
1495                 obstack_free(&pp_obstack, ptr);
1496         }
1497         eat_pp_directive();
1498 }
1499
1500 static void parse_undef_directive(void)
1501 {
1502         eat_pp(TP_undef);
1503
1504         if(pp_token.type != TP_IDENTIFIER) {
1505                 errorf(&input.position,
1506                        "expected identifier after #undef, got '%t'", &pp_token);
1507                 eat_pp_directive();
1508                 return;
1509         }
1510
1511         symbol_t *symbol = pp_token.v.symbol;
1512         symbol->pp_definition = NULL;
1513         next_preprocessing_token();
1514
1515         if(pp_token.type != '\n') {
1516                 warningf(&input.position, "extra tokens at end of #undef directive");
1517         }
1518         /* eat until '\n' */
1519         eat_pp_directive();
1520 }
1521
1522 static const char *parse_headername(void)
1523 {
1524         /* behind an #include we can have the special headername lexems.
1525          * They're only allowed behind an #include so they're not recognized
1526          * by the normal next_preprocessing_token. We handle them as a special
1527          * exception here */
1528
1529         /* skip spaces so we reach start of next preprocessing token */
1530         skip_spaces(false);
1531
1532         assert(obstack_object_size(&input_obstack) == 0);
1533
1534         /* check wether we have a "... or <... headername */
1535         switch (CC) {
1536         case '<':
1537                 /* for now until we have proper searchpath handling */
1538                 obstack_1grow(&input_obstack, '.');
1539                 obstack_1grow(&input_obstack, '/');
1540
1541                 next_char();
1542                 while (true) {
1543                         switch (CC) {
1544                         case EOF:
1545                                 /* fallthrough */
1546                         MATCH_NEWLINE(
1547                                 parse_error("header name without closing '>'");
1548                                 return NULL;
1549                         )
1550                         case '>':
1551                                 next_char();
1552                                 goto finished_headername;
1553                         }
1554                         obstack_1grow(&input_obstack, (char) CC);
1555                         next_char();
1556                 }
1557                 /* we should never be here */
1558
1559         case '"':
1560                 /* for now until we have proper searchpath handling */
1561                 obstack_1grow(&input_obstack, '.');
1562                 obstack_1grow(&input_obstack, '/');
1563
1564                 next_char();
1565                 while (true) {
1566                         switch (CC) {
1567                         case EOF:
1568                                 /* fallthrough */
1569                         MATCH_NEWLINE(
1570                                 parse_error("header name without closing '>'");
1571                                 return NULL;
1572                         )
1573                         case '"':
1574                                 next_char();
1575                                 goto finished_headername;
1576                         }
1577                         obstack_1grow(&input_obstack, (char) CC);
1578                         next_char();
1579                 }
1580                 /* we should never be here */
1581
1582         default:
1583                 /* TODO: do normale pp_token parsing and concatenate results */
1584                 panic("pp_token concat include not implemented yet");
1585         }
1586
1587 finished_headername:
1588         obstack_1grow(&input_obstack, '\0');
1589         char *headername = obstack_finish(&input_obstack);
1590
1591         /* TODO: iterate search-path to find the file */
1592
1593         next_preprocessing_token();
1594
1595         return headername;
1596 }
1597
1598 static bool parse_include_directive(void)
1599 {
1600         /* don't eat the TP_include here!
1601          * we need an alternative parsing for the next token */
1602
1603         print_spaces();
1604
1605         const char *headername = parse_headername();
1606         if (headername == NULL) {
1607                 eat_pp_directive();
1608                 return false;
1609         }
1610
1611         if (pp_token.type != '\n' && pp_token.type != TP_EOF) {
1612                 warningf(&pp_token.source_position,
1613                          "extra tokens at end of #include directive");
1614                 eat_pp_directive();
1615         }
1616
1617         if (n_inputs > INCLUDE_LIMIT) {
1618                 errorf(&pp_token.source_position, "#include nested too deeply");
1619                 /* eat \n or EOF */
1620                 next_preprocessing_token();
1621                 return false;
1622         }
1623
1624         /* we have to reenable space counting and macro expansion here,
1625          * because it is still disabled in directive parsing,
1626          * but we will trigger a preprocessing token reading of the new file
1627          * now and need expansions/space counting */
1628         do_print_spaces = true;
1629         do_expansions   = true;
1630
1631         /* switch inputs */
1632         push_input();
1633         bool res = open_input(headername);
1634         if (!res) {
1635                 errorf(&pp_token.source_position,
1636                        "failed including '%s': %s", headername, strerror(errno));
1637                 pop_restore_input();
1638                 return false;
1639         }
1640
1641         return true;
1642 }
1643
1644 static pp_conditional_t *push_conditional(void)
1645 {
1646         pp_conditional_t *conditional
1647                 = obstack_alloc(&pp_obstack, sizeof(*conditional));
1648         memset(conditional, 0, sizeof(*conditional));
1649
1650         conditional->parent = conditional_stack;
1651         conditional_stack   = conditional;
1652
1653         return conditional;
1654 }
1655
1656 static void pop_conditional(void)
1657 {
1658         assert(conditional_stack != NULL);
1659         conditional_stack = conditional_stack->parent;
1660 }
1661
1662 static void check_unclosed_conditionals(void)
1663 {
1664         while (conditional_stack != NULL) {
1665                 pp_conditional_t *conditional = conditional_stack;
1666
1667                 if (conditional->in_else) {
1668                         errorf(&conditional->source_position, "unterminated #else");
1669                 } else {
1670                         errorf(&conditional->source_position, "unterminated condition");
1671                 }
1672                 pop_conditional();
1673         }
1674 }
1675
1676 static void parse_ifdef_ifndef_directive(void)
1677 {
1678         bool is_ifndef = (pp_token.type == TP_ifndef);
1679         bool condition;
1680         next_preprocessing_token();
1681
1682         if (skip_mode) {
1683                 eat_pp_directive();
1684                 pp_conditional_t *conditional = push_conditional();
1685                 conditional->source_position  = pp_token.source_position;
1686                 conditional->skip             = true;
1687                 return;
1688         }
1689
1690         if (pp_token.type != TP_IDENTIFIER) {
1691                 errorf(&pp_token.source_position,
1692                        "expected identifier after #%s, got '%t'",
1693                        is_ifndef ? "ifndef" : "ifdef", &pp_token);
1694                 eat_pp_directive();
1695
1696                 /* just take the true case in the hope to avoid further errors */
1697                 condition = true;
1698         } else {
1699                 symbol_t        *symbol        = pp_token.v.symbol;
1700                 pp_definition_t *pp_definition = symbol->pp_definition;
1701                 next_preprocessing_token();
1702
1703                 if (pp_token.type != '\n') {
1704                         errorf(&pp_token.source_position,
1705                                "extra tokens at end of #%s",
1706                                is_ifndef ? "ifndef" : "ifdef");
1707                         eat_pp_directive();
1708                 }
1709
1710                 /* evaluate wether we are in true or false case */
1711                 condition = is_ifndef ? pp_definition == NULL : pp_definition != NULL;
1712         }
1713
1714         pp_conditional_t *conditional = push_conditional();
1715         conditional->source_position  = pp_token.source_position;
1716         conditional->condition        = condition;
1717
1718         if (!condition) {
1719                 skip_mode = true;
1720         }
1721 }
1722
1723 static void parse_else_directive(void)
1724 {
1725         eat_pp(TP_else);
1726
1727         if (pp_token.type != '\n') {
1728                 if (!skip_mode) {
1729                         warningf(&pp_token.source_position, "extra tokens at end of #else");
1730                 }
1731                 eat_pp_directive();
1732         }
1733
1734         pp_conditional_t *conditional = conditional_stack;
1735         if (conditional == NULL) {
1736                 errorf(&pp_token.source_position, "#else without prior #if");
1737                 return;
1738         }
1739
1740         if (conditional->in_else) {
1741                 errorf(&pp_token.source_position,
1742                        "#else after #else (condition started %P)",
1743                        conditional->source_position);
1744                 skip_mode = true;
1745                 return;
1746         }
1747
1748         conditional->in_else = true;
1749         if (!conditional->skip) {
1750                 skip_mode = conditional->condition;
1751         }
1752         conditional->source_position = pp_token.source_position;
1753 }
1754
1755 static void parse_endif_directive(void)
1756 {
1757         eat_pp(TP_endif);
1758
1759         if (pp_token.type != '\n') {
1760                 if (!skip_mode) {
1761                         warningf(&pp_token.source_position,
1762                                  "extra tokens at end of #endif");
1763                 }
1764                 eat_pp_directive();
1765         }
1766
1767         pp_conditional_t *conditional = conditional_stack;
1768         if (conditional == NULL) {
1769                 errorf(&pp_token.source_position, "#endif without prior #if");
1770                 return;
1771         }
1772
1773         if (!conditional->skip) {
1774                 skip_mode = false;
1775         }
1776         pop_conditional();
1777 }
1778
1779 static void parse_preprocessing_directive(void)
1780 {
1781         do_print_spaces = false;
1782         do_expansions   = false;
1783         eat_pp('#');
1784
1785         if (skip_mode) {
1786                 switch(pp_token.type) {
1787                 case TP_ifdef:
1788                 case TP_ifndef:
1789                         parse_ifdef_ifndef_directive();
1790                         break;
1791                 case TP_else:
1792                         parse_else_directive();
1793                         break;
1794                 case TP_endif:
1795                         parse_endif_directive();
1796                         break;
1797                 default:
1798                         eat_pp_directive();
1799                         break;
1800                 }
1801         } else {
1802                 switch(pp_token.type) {
1803                 case TP_define:
1804                         parse_define_directive();
1805                         break;
1806                 case TP_undef:
1807                         parse_undef_directive();
1808                         break;
1809                 case TP_ifdef:
1810                 case TP_ifndef:
1811                         parse_ifdef_ifndef_directive();
1812                         break;
1813                 case TP_else:
1814                         parse_else_directive();
1815                         break;
1816                 case TP_endif:
1817                         parse_endif_directive();
1818                         break;
1819                 case TP_include: {
1820                         bool in_new_source = parse_include_directive();
1821                         /* no need to do anything if source file switched */
1822                         if (in_new_source)
1823                                 return;
1824                         break;
1825                 }
1826                 case '\n':
1827                         /* the nop directive */
1828                         break;
1829                 default:
1830                         errorf(&pp_token.source_position,
1831                                    "invalid preprocessing directive #%t", &pp_token);
1832                         eat_pp_directive();
1833                         break;
1834                 }
1835         }
1836
1837         do_print_spaces = true;
1838         do_expansions   = true;
1839
1840         /* eat '\n' */
1841         assert(pp_token.type == '\n' || pp_token.type == TP_EOF);
1842         next_preprocessing_token();
1843 }
1844
1845 #define GCC_COMPAT_MODE
1846
1847 int pptest_main(int argc, char **argv);
1848 int pptest_main(int argc, char **argv)
1849 {
1850         init_symbol_table();
1851         init_tokens();
1852
1853         obstack_init(&pp_obstack);
1854         obstack_init(&input_obstack);
1855
1856         const char *filename = "t.c";
1857         if (argc > 1)
1858                 filename = argv[1];
1859
1860         out = stdout;
1861
1862 #ifdef GCC_COMPAT_MODE
1863         /* this is here so we can directly compare "gcc -E" output and our output */
1864         fprintf(out, "# 1 \"%s\"\n", filename);
1865         fputs("# 1 \"<built-in>\"\n", out);
1866         fputs("# 1 \"<command-line>\"\n", out);
1867 #endif
1868
1869         bool ok = open_input(filename);
1870         assert(ok);
1871
1872         while(true) {
1873                 /* we're at a line begin */
1874                 if(pp_token.type == '#') {
1875                         parse_preprocessing_directive();
1876                 } else {
1877                         /* parse+emit a line */
1878                         while(pp_token.type != '\n') {
1879                                 if(pp_token.type == TP_EOF)
1880                                         goto end_of_main_loop;
1881                                 emit_pp_token();
1882                                 next_preprocessing_token();
1883                         }
1884                         emit_pp_token();
1885                         next_preprocessing_token();
1886                 }
1887         }
1888 end_of_main_loop:
1889
1890         check_unclosed_conditionals();
1891         close_input();
1892
1893         obstack_free(&input_obstack, NULL);
1894         obstack_free(&pp_obstack, NULL);
1895
1896         exit_tokens();
1897         exit_symbol_table();
1898
1899         return 0;
1900 }