nsz Git - cparser/blob - lexer.c

   1 #include <config.h>
   2
   3 #include "lexer.h"
   4 #include "token_t.h"
   5 #include "symbol_table_t.h"
   6 #include "adt/error.h"
   7 #include "adt/strset.h"
   8 #include "adt/util.h"
   9
  10 #include <assert.h>
  11 #include <errno.h>
  12 #include <string.h>
  13 #include <ctype.h>
  14
  15 #define DEBUG_CHARS
  16 #define MAX_PUTBACK 3
  17
  18 static int         c;
  19 token_t            lexer_token;
  20 static FILE       *input;
  21 static char        buf[1024 + MAX_PUTBACK];
  22 static const char *bufend;
  23 static const char *bufpos;
  24 static strset_t    stringset;
  25
  26 static void error_prefix_at(const char *input_name, unsigned linenr)
  27 {
  28         fprintf(stderr, "%s:%u: Error: ", input_name, linenr);
  29 }
  30
  31 static void error_prefix(void)
  32 {
  33         error_prefix_at(lexer_token.source_position.input_name,
  34                         lexer_token.source_position.linenr);
  35 }
  36
  37 static void parse_error(const char *msg)
  38 {
  39         error_prefix();
  40         fprintf(stderr, "%s\n", msg);
  41 }
  42
  43 static inline void next_real_char(void)
  44 {
  45         bufpos++;
  46         if(bufpos >= bufend) {
  47                 size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
  48                                  input);
  49                 if(s == 0) {
  50                         c = EOF;
  51                         return;
  52                 }
  53                 bufpos = buf + MAX_PUTBACK;
  54                 bufend = buf + MAX_PUTBACK + s;
  55         }
  56         c = *(bufpos);
  57 }
  58
  59 static inline void put_back(int pc)
  60 {
  61         char *p = (char*) bufpos - 1;
  62         bufpos--;
  63         assert(p >= buf);
  64         *p = pc;
  65
  66 #ifdef DEBUG_CHARS
  67         printf("putback '%c'\n", pc);
  68 #endif
  69 }
  70
  71 static inline void next_char(void);
  72
  73 #define MATCH_NEWLINE(code)                   \
  74         case '\r':                                \
  75                 next_char();                          \
  76                 if(c == '\n') {                       \
  77                         next_char();                      \
  78                 }                                     \
  79                 lexer_token.source_position.linenr++; \
  80                 code;                                 \
  81         case '\n':                                \
  82                 next_char();                          \
  83                 lexer_token.source_position.linenr++; \
  84                 code;
  85
  86 static inline void eat(char c_type)
  87 {
  88         assert(c == c_type);
  89         next_char();
  90 }
  91
  92 static void maybe_concat_lines(void)
  93 {
  94         eat('\\');
  95
  96         switch(c) {
  97         MATCH_NEWLINE(return;)
  98
  99         default:
 100                 break;
 101         }
 102
 103         put_back(c);
 104         c = '\\';
 105 }
 106
 107 static inline void next_char(void)
 108 {
 109         next_real_char();
 110
 111         /* filter trigraphs */
 112         if(UNLIKELY(c == '\\')) {
 113                 maybe_concat_lines();
 114                 goto end_of_next_char;
 115         }
 116
 117         if(LIKELY(c != '?'))
 118                 goto end_of_next_char;
 119
 120         next_real_char();
 121         if(LIKELY(c != '?')) {
 122                 put_back(c);
 123                 c = '?';
 124                 goto end_of_next_char;
 125         }
 126
 127         next_real_char();
 128         switch(c) {
 129         case '=': c = '#'; break;
 130         case '(': c = '['; break;
 131         case '/': c = '\\'; maybe_concat_lines(); break;
 132         case ')': c = ']'; break;
 133         case '\'': c = '^'; break;
 134         case '<': c = '{'; break;
 135         case '!': c = '|'; break;
 136         case '>': c = '}'; break;
 137         case '-': c = '~'; break;
 138         default:
 139                 put_back('?');
 140                 put_back(c);
 141                 c = '?';
 142                 break;
 143         }
 144
 145 end_of_next_char:
 146 #ifdef DEBUG_CHARS
 147         printf("nchar '%c'\n", c);
 148 #else
 149         ;
 150 #endif
 151 }
 152
 153 #define SYMBOL_CHARS  \
 154         case 'a':         \
 155         case 'b':         \
 156         case 'c':         \
 157         case 'd':         \
 158         case 'e':         \
 159         case 'f':         \
 160         case 'g':         \
 161         case 'h':         \
 162         case 'i':         \
 163         case 'j':         \
 164         case 'k':         \
 165         case 'l':         \
 166         case 'm':         \
 167         case 'n':         \
 168         case 'o':         \
 169         case 'p':         \
 170         case 'q':         \
 171         case 'r':         \
 172         case 's':         \
 173         case 't':         \
 174         case 'u':         \
 175         case 'v':         \
 176         case 'w':         \
 177         case 'x':         \
 178         case 'y':         \
 179         case 'z':         \
 180         case 'A':         \
 181         case 'B':         \
 182         case 'C':         \
 183         case 'D':         \
 184         case 'E':         \
 185         case 'F':         \
 186         case 'G':         \
 187         case 'H':         \
 188         case 'I':         \
 189         case 'J':         \
 190         case 'K':         \
 191         case 'L':         \
 192         case 'M':         \
 193         case 'N':         \
 194         case 'O':         \
 195         case 'P':         \
 196         case 'Q':         \
 197         case 'R':         \
 198         case 'S':         \
 199         case 'T':         \
 200         case 'U':         \
 201         case 'V':         \
 202         case 'W':         \
 203         case 'X':         \
 204         case 'Y':         \
 205         case 'Z':         \
 206         case '_':
 207
 208 #define DIGITS        \
 209         case '0':         \
 210         case '1':         \
 211         case '2':         \
 212         case '3':         \
 213         case '4':         \
 214         case '5':         \
 215         case '6':         \
 216         case '7':         \
 217         case '8':         \
 218         case '9':
 219
 220 static void parse_symbol(void)
 221 {
 222         symbol_t *symbol;
 223         char     *string;
 224
 225         obstack_1grow(&symbol_obstack, c);
 226         next_char();
 227
 228         while(1) {
 229                 switch(c) {
 230                 DIGITS
 231                 SYMBOL_CHARS
 232                         obstack_1grow(&symbol_obstack, c);
 233                         next_char();
 234                         break;
 235
 236                 default:
 237                         goto end_symbol;
 238                 }
 239         }
 240
 241 end_symbol:
 242         obstack_1grow(&symbol_obstack, '\0');
 243
 244         string = obstack_finish(&symbol_obstack);
 245         symbol = symbol_table_insert(string);
 246
 247         lexer_token.type     = symbol->ID;
 248         lexer_token.v.symbol = symbol;
 249
 250         if(symbol->string != string) {
 251                 obstack_free(&symbol_obstack, string);
 252         }
 253 }
 254
 255 static void parse_integer_suffix(void)
 256 {
 257         if(c == 'U' || c == 'U') {
 258                 /* TODO do something with the suffixes... */
 259                 next_char();
 260                 if(c == 'L' || c == 'l') {
 261                         next_char();
 262                         if(c == 'L' || c == 'l') {
 263                                 next_char();
 264                         }
 265                 }
 266         } else if(c == 'l' || c == 'L') {
 267                 next_char();
 268                 if(c == 'l' || c == 'L') {
 269                         next_char();
 270                         if(c == 'u' || c == 'U') {
 271                                 next_char();
 272                         }
 273                 } else if(c == 'u' || c == 'U') {
 274                         next_char();
 275                 }
 276         }
 277 }
 278
 279 static void parse_number_hex(void)
 280 {
 281         assert(c == 'x' || c == 'X');
 282         next_char();
 283
 284         if (!isdigit(c) &&
 285                 !('A' <= c && c <= 'F') &&
 286                 !('a' <= c && c <= 'f')) {
 287                 parse_error("premature end of hex number literal");
 288                 lexer_token.type = T_ERROR;
 289                 return;
 290         }
 291
 292         int value = 0;
 293         while(1) {
 294                 if (isdigit(c)) {
 295                         value = 16 * value + c - '0';
 296                 } else if ('A' <= c && c <= 'F') {
 297                         value = 16 * value + c - 'A' + 10;
 298                 } else if ('a' <= c && c <= 'f') {
 299                         value = 16 * value + c - 'a' + 10;
 300                 } else {
 301                         parse_integer_suffix();
 302
 303                         lexer_token.type       = T_INTEGER;
 304                         lexer_token.v.intvalue = value;
 305                         return;
 306                 }
 307                 next_char();
 308         }
 309
 310         if(c == '.' || c == 'p' || c == 'P') {
 311                 next_char();
 312                 panic("Hex floating point numbers not implemented yet");
 313         }
 314 }
 315
 316 static void parse_number_oct(void)
 317 {
 318         int value = 0;
 319         while(c >= '0' && c <= '7') {
 320                 value = 8 * value + c - '0';
 321                 next_char();
 322         }
 323         if (c == '8' || c == '9') {
 324                 parse_error("invalid octal number");
 325                 lexer_token.type = T_ERROR;
 326                 return;
 327         }
 328
 329         lexer_token.type       = T_INTEGER;
 330         lexer_token.v.intvalue = value;
 331
 332         parse_integer_suffix();
 333 }
 334
 335 static void parse_floatingpoint_exponent(long double value)
 336 {
 337         unsigned int expo = 0;
 338         long double  factor = 10.;
 339
 340         if(c == '-') {
 341                 next_char();
 342                 factor = 0.1;
 343         } else if(c == '+') {
 344                 next_char();
 345         }
 346
 347         while(c >= '0' && c <= '9') {
 348                 expo = 10 * expo + (c - '0');
 349                 next_char();
 350         }
 351
 352         while(1) {
 353                 if(expo & 1)
 354                         value *= factor;
 355                 expo >>= 1;
 356                 if(expo == 0)
 357                         break;
 358                 factor *= factor;
 359         }
 360
 361         lexer_token.type         = T_FLOATINGPOINT;
 362         lexer_token.v.floatvalue = value;
 363 }
 364
 365 static void parse_floatingpoint_fract(int integer_part)
 366 {
 367         long double value  = integer_part;
 368         long double factor = 1.;
 369
 370         while(c >= '0' && c <= '9') {
 371                 factor *= 0.1;
 372                 value  += (c - '0') * factor;
 373                 next_char();
 374         }
 375
 376         if(c == 'e' || c == 'E') {
 377                 next_char();
 378                 parse_floatingpoint_exponent(value);
 379                 return;
 380         }
 381
 382         lexer_token.type         = T_FLOATINGPOINT;
 383         lexer_token.v.floatvalue = value;
 384 }
 385
 386 static void parse_number_dec(void)
 387 {
 388         int value = 0;
 389
 390         while(isdigit(c)) {
 391                 value = 10 * value + c - '0';
 392                 next_char();
 393         }
 394
 395         if(c == '.') {
 396                 next_char();
 397                 parse_floatingpoint_fract(value);
 398                 return;
 399         }
 400         if(c == 'e' || c == 'E') {
 401                 next_char();
 402                 parse_floatingpoint_exponent(value);
 403                 return;
 404         }
 405         parse_integer_suffix();
 406
 407         lexer_token.type       = T_INTEGER;
 408         lexer_token.v.intvalue = value;
 409 }
 410
 411 static void parse_number(void)
 412 {
 413         if (c == '0') {
 414                 next_char();
 415                 switch (c) {
 416                         case 'X':
 417                         case 'x':
 418                                 parse_number_hex();
 419                                 break;
 420                         case '0':
 421                         case '1':
 422                         case '2':
 423                         case '3':
 424                         case '4':
 425                         case '5':
 426                         case '6':
 427                         case '7':
 428                                 parse_number_oct();
 429                                 break;
 430                         case '.':
 431                                 next_char();
 432                                 parse_floatingpoint_fract(0);
 433                                 break;
 434                         case 'e':
 435                         case 'E':
 436                                 parse_floatingpoint_exponent(0);
 437                                 break;
 438                         case '8':
 439                         case '9':
 440                                 next_char();
 441                                 parse_error("invalid octal number");
 442                                 lexer_token.type = T_ERROR;
 443                                 return;
 444                         default:
 445                                 put_back(c);
 446                                 c = '0';
 447                                 parse_number_dec();
 448                                 return;
 449                 }
 450         } else {
 451                 parse_number_dec();
 452         }
 453 }
 454
 455 static int parse_octal_sequence(void)
 456 {
 457         int value = 0;
 458         while(1) {
 459                 if(c < '0' || c > '7')
 460                         break;
 461                 value = 8 * value + c - '0';
 462                 next_char();
 463         }
 464
 465         return value;
 466 }
 467
 468 static int parse_hex_sequence(void)
 469 {
 470         int value = 0;
 471         while(1) {
 472                 if (c >= '0' && c <= '9') {
 473                         value = 16 * value + c - '0';
 474                 } else if ('A' <= c && c <= 'F') {
 475                         value = 16 * value + c - 'A' + 10;
 476                 } else if ('a' <= c && c <= 'f') {
 477                         value = 16 * value + c - 'a' + 10;
 478                 } else {
 479                         break;
 480                 }
 481                 next_char();
 482         }
 483
 484         return value;
 485 }
 486
 487 static int parse_escape_sequence(void)
 488 {
 489         eat('\\');
 490
 491         int ec = c;
 492         next_char();
 493
 494         switch(ec) {
 495         case '"':  return '"';
 496         case '\'': return'\'';
 497         case '\\': return '\\';
 498         case '?': return '\?';
 499         case 'a': return '\a';
 500         case 'b': return '\b';
 501         case 'f': return '\f';
 502         case 'n': return '\n';
 503         case 'r': return '\r';
 504         case 't': return '\t';
 505         case 'v': return '\v';
 506         case 'x':
 507                 return parse_hex_sequence();
 508         case '0':
 509         case '1':
 510         case '2':
 511         case '3':
 512         case '4':
 513         case '5':
 514         case '6':
 515         case '7':
 516                 return parse_octal_sequence();
 517         case EOF:
 518                 parse_error("reached end of file while parsing escape sequence");
 519                 return EOF;
 520         default:
 521                 parse_error("unknown escape sequence");
 522                 return EOF;
 523         }
 524 }
 525
 526 const char *concat_strings(const char *s1, const char *s2)
 527 {
 528         size_t  len1   = strlen(s1);
 529         size_t  len2   = strlen(s2);
 530
 531         char   *concat = obstack_alloc(&symbol_obstack, len1 + len2 + 1);
 532         memcpy(concat, s1, len1);
 533         memcpy(concat + len1, s2, len2 + 1);
 534
 535         const char *result = strset_insert(&stringset, concat);
 536         if(result != concat) {
 537                 obstack_free(&symbol_obstack, concat);
 538         }
 539
 540         return result;
 541 }
 542
 543 static void parse_string_literal(void)
 544 {
 545         unsigned    start_linenr = lexer_token.source_position.linenr;
 546         char       *string;
 547         const char *result;
 548
 549         assert(c == '"');
 550         next_char();
 551
 552         int tc;
 553         while(1) {
 554                 switch(c) {
 555                 case '\\':
 556                         tc = parse_escape_sequence();
 557                         obstack_1grow(&symbol_obstack, tc);
 558                         break;
 559
 560                 case EOF:
 561                         error_prefix_at(lexer_token.source_position.input_name,
 562                                         start_linenr);
 563                         fprintf(stderr, "string has no end\n");
 564                         lexer_token.type = T_ERROR;
 565                         return;
 566
 567                 case '"':
 568                         next_char();
 569                         goto end_of_string;
 570
 571                 default:
 572                         obstack_1grow(&symbol_obstack, c);
 573                         next_char();
 574                         break;
 575                 }
 576         }
 577
 578 end_of_string:
 579
 580         /* TODO: concatenate multiple strings separated by whitespace... */
 581
 582         /* add finishing 0 to the string */
 583         obstack_1grow(&symbol_obstack, '\0');
 584         string = obstack_finish(&symbol_obstack);
 585
 586         /* check if there is already a copy of the string */
 587         result = strset_insert(&stringset, string);
 588         if(result != string) {
 589                 obstack_free(&symbol_obstack, string);
 590         }
 591
 592         lexer_token.type     = T_STRING_LITERAL;
 593         lexer_token.v.string = result;
 594 }
 595
 596 static void parse_character_constant(void)
 597 {
 598         eat('\'');
 599
 600         int found_char = 0;
 601         while(1) {
 602                 switch(c) {
 603                 case '\\':
 604                         found_char = parse_escape_sequence();
 605                         break;
 606
 607                 MATCH_NEWLINE(
 608                         parse_error("newline while parsing character constant");
 609                         break;
 610                 )
 611
 612                 case '\'':
 613                         next_char();
 614                         goto end_of_char_constant;
 615
 616                 case EOF:
 617                         parse_error("EOF while parsing character constant");
 618                         lexer_token.type = T_ERROR;
 619                         return;
 620
 621                 default:
 622                         if(found_char != 0) {
 623                                 parse_error("more than 1 characters in character "
 624                                             "constant");
 625                                 goto end_of_char_constant;
 626                         } else {
 627                                 found_char = c;
 628                                 next_char();
 629                         }
 630                         break;
 631                 }
 632         }
 633
 634 end_of_char_constant:
 635         lexer_token.type       = T_INTEGER;
 636         lexer_token.v.intvalue = found_char;
 637 }
 638
 639 static void skip_multiline_comment(void)
 640 {
 641         unsigned start_linenr = lexer_token.source_position.linenr;
 642
 643         while(1) {
 644                 switch(c) {
 645                 case '*':
 646                         next_char();
 647                         if(c == '/') {
 648                                 next_char();
 649                                 return;
 650                         }
 651                         break;
 652
 653                 MATCH_NEWLINE(break;)
 654
 655                 case EOF:
 656                         error_prefix_at(lexer_token.source_position.input_name,
 657                                         start_linenr);
 658                         fprintf(stderr, "at end of file while looking for comment end\n");
 659                         return;
 660
 661                 default:
 662                         next_char();
 663                         break;
 664                 }
 665         }
 666 }
 667
 668 static void skip_line_comment(void)
 669 {
 670         while(1) {
 671                 switch(c) {
 672                 case EOF:
 673                         return;
 674
 675                 case '\n':
 676                 case '\r':
 677                         return;
 678
 679                 default:
 680                         next_char();
 681                         break;
 682                 }
 683         }
 684 }
 685
 686 static token_t pp_token;
 687
 688 static inline void next_pp_token(void)
 689 {
 690         lexer_next_preprocessing_token();
 691         pp_token = lexer_token;
 692 }
 693
 694 static void eat_until_newline(void)
 695 {
 696         while(pp_token.type != '\n' && pp_token.type != T_EOF) {
 697                 next_pp_token();
 698         }
 699 }
 700
 701 static void error_directive(void)
 702 {
 703         error_prefix();
 704         fprintf(stderr, "#error directive: \n");
 705
 706         /* parse pp-tokens until new-line */
 707 }
 708
 709 static void define_directive(void)
 710 {
 711         lexer_next_preprocessing_token();
 712         if(lexer_token.type != T_IDENTIFIER) {
 713                 parse_error("expected identifier after #define\n");
 714                 eat_until_newline();
 715         }
 716 }
 717
 718 static void ifdef_directive(int is_ifndef)
 719 {
 720         (void) is_ifndef;
 721         lexer_next_preprocessing_token();
 722         //expect_identifier();
 723         //extect_newline();
 724 }
 725
 726 static void endif_directive(void)
 727 {
 728         //expect_newline();
 729 }
 730
 731 static void parse_line_directive(void)
 732 {
 733         if(pp_token.type != T_INTEGER) {
 734                 parse_error("expected integer");
 735         } else {
 736                 lexer_token.source_position.linenr = pp_token.v.intvalue - 1;
 737                 next_pp_token();
 738         }
 739         if(pp_token.type == T_STRING_LITERAL) {
 740                 lexer_token.source_position.input_name = pp_token.v.string;
 741                 next_pp_token();
 742         }
 743
 744         eat_until_newline();
 745 }
 746
 747 static void parse_preprocessor_identifier(void)
 748 {
 749         assert(pp_token.type == T_IDENTIFIER);
 750         symbol_t *symbol = pp_token.v.symbol;
 751
 752         switch(symbol->pp_ID) {
 753         case TP_include:
 754                 printf("include - enable header name parsing!\n");
 755                 break;
 756         case TP_define:
 757                 define_directive();
 758                 break;
 759         case TP_ifdef:
 760                 ifdef_directive(0);
 761                 break;
 762         case TP_ifndef:
 763                 ifdef_directive(1);
 764                 break;
 765         case TP_endif:
 766                 endif_directive();
 767                 break;
 768         case TP_line:
 769                 next_pp_token();
 770                 parse_line_directive();
 771                 break;
 772         case TP_if:
 773         case TP_else:
 774         case TP_elif:
 775         case TP_undef:
 776         case TP_error:
 777                 error_directive();
 778                 break;
 779         case TP_pragma:
 780                 break;
 781         }
 782 }
 783
 784 static void parse_preprocessor_directive()
 785 {
 786         next_pp_token();
 787
 788         switch(pp_token.type) {
 789         case T_IDENTIFIER:
 790                 parse_preprocessor_identifier();
 791                 break;
 792         case T_INTEGER:
 793                 parse_line_directive();
 794                 break;
 795         default:
 796                 parse_error("invalid preprocessor directive");
 797                 eat_until_newline();
 798                 break;
 799         }
 800 }
 801
 802 #define MAYBE_PROLOG                                       \
 803                         next_char();                                   \
 804                         while(1) {                                     \
 805                                 switch(c) {
 806
 807 #define MAYBE(ch, set_type)                                \
 808                                 case ch:                                   \
 809                                         next_char();                           \
 810                                         lexer_token.type = set_type;           \
 811                                         return;
 812
 813 #define ELSE_CODE(code)                                    \
 814                                 default:                                   \
 815                                         code;                                  \
 816                                 }                                          \
 817                         } /* end of while(1) */                        \
 818                         break;
 819
 820 #define ELSE(set_type)                                     \
 821                 ELSE_CODE(                                         \
 822                         lexer_token.type = set_type;                   \
 823                         return;                                        \
 824                 )
 825
 826 void lexer_next_preprocessing_token(void)
 827 {
 828         while(1) {
 829                 switch(c) {
 830                 case ' ':
 831                 case '\t':
 832                         next_char();
 833                         break;
 834
 835                 MATCH_NEWLINE(
 836                         lexer_token.type = '\n';
 837                         return;
 838                 )
 839
 840                 SYMBOL_CHARS
 841                         parse_symbol();
 842                         return;
 843
 844                 DIGITS
 845                         parse_number();
 846                         return;
 847
 848                 case '"':
 849                         parse_string_literal();
 850                         return;
 851
 852                 case '\'':
 853                         parse_character_constant();
 854                         return;
 855
 856                 case '.':
 857                         MAYBE_PROLOG
 858                                 case '.':
 859                                         MAYBE_PROLOG
 860                                         MAYBE('.', T_DOTDOTDOT)
 861                                         ELSE_CODE(
 862                                                 put_back(c);
 863                                                 c = '.';
 864                                                 lexer_token.type = '.';
 865                                                 return;
 866                                         )
 867                         ELSE('.')
 868                 case '&':
 869                         MAYBE_PROLOG
 870                         MAYBE('&', T_ANDAND)
 871                         MAYBE('=', T_ANDEQUAL)
 872                         ELSE('&')
 873                 case '*':
 874                         MAYBE_PROLOG
 875                         MAYBE('=', T_ASTERISKEQUAL)
 876                         ELSE('*')
 877                 case '+':
 878                         MAYBE_PROLOG
 879                         MAYBE('+', T_PLUSPLUS)
 880                         MAYBE('=', T_PLUSEQUAL)
 881                         ELSE('+')
 882                 case '-':
 883                         MAYBE_PROLOG
 884                         MAYBE('>', T_MINUSGREATER)
 885                         MAYBE('-', T_MINUSMINUS)
 886                         MAYBE('=', T_MINUSEQUAL)
 887                         ELSE('-')
 888                 case '!':
 889                         MAYBE_PROLOG
 890                         MAYBE('=', T_EXCLAMATIONMARKEQUAL)
 891                         ELSE('!')
 892                 case '/':
 893                         MAYBE_PROLOG
 894                         MAYBE('=', T_SLASHEQUAL)
 895                                 case '*':
 896                                         next_char();
 897                                         skip_multiline_comment();
 898                                         lexer_next_preprocessing_token();
 899                                         return;
 900                                 case '/':
 901                                         next_char();
 902                                         skip_line_comment();
 903                                         lexer_next_preprocessing_token();
 904                                         return;
 905                         ELSE('/')
 906                 case '%':
 907                         MAYBE_PROLOG
 908                         MAYBE('>', T_PERCENTGREATER)
 909                         MAYBE('=', T_PERCENTEQUAL)
 910                                 case ':':
 911                                         MAYBE_PROLOG
 912                                                 case '%':
 913                                                         MAYBE_PROLOG
 914                                                         MAYBE(':', T_PERCENTCOLONPERCENTCOLON)
 915                                                         ELSE_CODE(
 916                                                                 put_back(c);
 917                                                                 c = '%';
 918                                                                 lexer_token.type = T_PERCENTCOLON;
 919                                                                 return;
 920                                                         )
 921                                         ELSE(T_PERCENTCOLON)
 922                         ELSE('%')
 923                 case '<':
 924                         MAYBE_PROLOG
 925                         MAYBE(':', T_LESSCOLON)
 926                         MAYBE('%', T_LESSPERCENT)
 927                         MAYBE('=', T_LESSEQUAL)
 928                                 case '<':
 929                                         MAYBE_PROLOG
 930                                         MAYBE('=', T_LESSLESSEQUAL)
 931                                         ELSE(T_LESSLESS)
 932                         ELSE('<')
 933                 case '>':
 934                         MAYBE_PROLOG
 935                         MAYBE('=', T_GREATEREQUAL)
 936                                 case '>':
 937                                         MAYBE_PROLOG
 938                                         MAYBE('=', T_GREATERGREATEREQUAL)
 939                                         ELSE(T_GREATERGREATER)
 940                         ELSE('>')
 941                 case '^':
 942                         MAYBE_PROLOG
 943                         MAYBE('=', T_CARETEQUAL)
 944                         ELSE('^')
 945                 case '|':
 946                         MAYBE_PROLOG
 947                         MAYBE('=', T_PIPEEQUAL)
 948                         MAYBE('|', T_PIPEPIPE)
 949                         ELSE('|')
 950                 case ':':
 951                         MAYBE_PROLOG
 952                         MAYBE('>', T_COLONGREATER)
 953                         ELSE(':')
 954                 case '=':
 955                         MAYBE_PROLOG
 956                         MAYBE('=', T_EQUALEQUAL)
 957                         ELSE('=')
 958                 case '#':
 959                         MAYBE_PROLOG
 960                         MAYBE('#', T_HASHHASH)
 961                         ELSE('#')
 962
 963                 case '?':
 964                 case '[':
 965                 case ']':
 966                 case '(':
 967                 case ')':
 968                 case '{':
 969                 case '}':
 970                 case '~':
 971                 case ';':
 972                 case ',':
 973                 case '\\':
 974                         lexer_token.type = c;
 975                         next_char();
 976                         return;
 977
 978                 case EOF:
 979                         lexer_token.type = T_EOF;
 980                         return;
 981
 982                 default:
 983                         next_char();
 984                         error_prefix();
 985                         fprintf(stderr, "unknown character '%c' found\n", c);
 986                         lexer_token.type = T_ERROR;
 987                         return;
 988                 }
 989         }
 990 }
 991
 992 void lexer_next_token(void)
 993 {
 994         lexer_next_preprocessing_token();
 995         if(lexer_token.type != '\n')
 996                 return;
 997
 998 newline_found:
 999         do {
1000                 lexer_next_preprocessing_token();
1001         } while(lexer_token.type == '\n');
1002
1003         if(lexer_token.type == '#') {
1004                 parse_preprocessor_directive();
1005                 goto newline_found;
1006         }
1007 }
1008
1009 void init_lexer(void)
1010 {
1011         strset_init(&stringset);
1012 }
1013
1014 void lexer_open_stream(FILE *stream, const char *input_name)
1015 {
1016         input                                  = stream;
1017         lexer_token.source_position.linenr     = 1;
1018         lexer_token.source_position.input_name = input_name;
1019
1020         next_char();
1021 }
1022
1023 void exit_lexer(void)
1024 {
1025         strset_destroy(&stringset);
1026 }
1027
1028 static __attribute__((unused))
1029 void dbg_pos(const source_position_t source_position)
1030 {
1031         fprintf(stdout, "%s:%d\n", source_position.input_name,
1032                 source_position.linenr);
1033         fflush(stdout);
1034 }