nsz Git - cparser/blob - lexer.c

   1 #include <config.h>
   2
   3 #include "lexer.h"
   4 #include "token_t.h"
   5 #include "symbol_table_t.h"
   6 #include "adt/error.h"
   7 #include "adt/strset.h"
   8 #include "adt/util.h"
   9
  10 #include <assert.h>
  11 #include <errno.h>
  12 #include <string.h>
  13 #include <ctype.h>
  14
  15 //#define DEBUG_CHARS
  16 #define MAX_PUTBACK 3
  17
  18 static int         c;
  19 token_t            lexer_token;
  20 static FILE       *input;
  21 static char        buf[1024 + MAX_PUTBACK];
  22 static const char *bufend;
  23 static const char *bufpos;
  24 static strset_t    stringset;
  25 //static FILE      **input_stack;
  26 //static char      **buf_stack;
  27
  28 static void error_prefix_at(const char *input_name, unsigned linenr)
  29 {
  30         fprintf(stderr, "%s:%u: Error: ", input_name, linenr);
  31 }
  32
  33 static void error_prefix(void)
  34 {
  35         error_prefix_at(lexer_token.source_position.input_name,
  36                         lexer_token.source_position.linenr);
  37 }
  38
  39 static void parse_error(const char *msg)
  40 {
  41         error_prefix();
  42         fprintf(stderr, "%s\n", msg);
  43 }
  44
  45 static inline void next_real_char(void)
  46 {
  47         bufpos++;
  48         if(bufpos >= bufend) {
  49                 size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
  50                                  input);
  51                 if(s == 0) {
  52                         c = EOF;
  53                         return;
  54                 }
  55                 bufpos = buf + MAX_PUTBACK;
  56                 bufend = buf + MAX_PUTBACK + s;
  57         }
  58         c = *(bufpos);
  59 }
  60
  61 static inline void put_back(int pc)
  62 {
  63         char *p = (char*) bufpos - 1;
  64         bufpos--;
  65         assert(p >= buf);
  66         *p = pc;
  67
  68 #ifdef DEBUG_CHARS
  69         printf("putback '%c'\n", pc);
  70 #endif
  71 }
  72
  73 static inline void next_char(void);
  74
  75 #define MATCH_NEWLINE(code)                   \
  76         case '\r':                                \
  77                 next_char();                          \
  78                 if(c == '\n') {                       \
  79                         next_char();                      \
  80                 }                                     \
  81                 lexer_token.source_position.linenr++; \
  82                 code;                                 \
  83         case '\n':                                \
  84                 next_char();                          \
  85                 lexer_token.source_position.linenr++; \
  86                 code;
  87
  88 static void maybe_concat_lines(void)
  89 {
  90         next_char();
  91         switch(c) {
  92         MATCH_NEWLINE(return;)
  93
  94         default:
  95                 break;
  96         }
  97
  98         put_back(c);
  99         c = '\\';
 100 }
 101
 102 static inline void next_char(void)
 103 {
 104         next_real_char();
 105
 106         /* filter trigraphs */
 107         if(UNLIKELY(c == '\\')) {
 108                 maybe_concat_lines();
 109                 goto end_of_next_char;
 110         }
 111
 112         if(LIKELY(c != '?'))
 113                 goto end_of_next_char;
 114
 115         next_real_char();
 116         if(LIKELY(c != '?')) {
 117                 put_back(c);
 118                 c = '?';
 119                 goto end_of_next_char;
 120         }
 121
 122         next_real_char();
 123         switch(c) {
 124         case '=': c = '#'; break;
 125         case '(': c = '['; break;
 126         case '/': c = '\\'; maybe_concat_lines(); break;
 127         case ')': c = ']'; break;
 128         case '\'': c = '^'; break;
 129         case '<': c = '{'; break;
 130         case '!': c = '|'; break;
 131         case '>': c = '}'; break;
 132         case '-': c = '~'; break;
 133         default:
 134                 put_back('?');
 135                 put_back(c);
 136                 c = '?';
 137                 break;
 138         }
 139
 140 end_of_next_char:
 141 #ifdef DEBUG_CHARS
 142         printf("nchar '%c'\n", c);
 143 #else
 144         ;
 145 #endif
 146 }
 147
 148 #define SYMBOL_CHARS  \
 149         case 'a':         \
 150         case 'b':         \
 151         case 'c':         \
 152         case 'd':         \
 153         case 'e':         \
 154         case 'f':         \
 155         case 'g':         \
 156         case 'h':         \
 157         case 'i':         \
 158         case 'j':         \
 159         case 'k':         \
 160         case 'l':         \
 161         case 'm':         \
 162         case 'n':         \
 163         case 'o':         \
 164         case 'p':         \
 165         case 'q':         \
 166         case 'r':         \
 167         case 's':         \
 168         case 't':         \
 169         case 'u':         \
 170         case 'v':         \
 171         case 'w':         \
 172         case 'x':         \
 173         case 'y':         \
 174         case 'z':         \
 175         case 'A':         \
 176         case 'B':         \
 177         case 'C':         \
 178         case 'D':         \
 179         case 'E':         \
 180         case 'F':         \
 181         case 'G':         \
 182         case 'H':         \
 183         case 'I':         \
 184         case 'J':         \
 185         case 'K':         \
 186         case 'L':         \
 187         case 'M':         \
 188         case 'N':         \
 189         case 'O':         \
 190         case 'P':         \
 191         case 'Q':         \
 192         case 'R':         \
 193         case 'S':         \
 194         case 'T':         \
 195         case 'U':         \
 196         case 'V':         \
 197         case 'W':         \
 198         case 'X':         \
 199         case 'Y':         \
 200         case 'Z':         \
 201         case '_':
 202
 203 #define DIGITS        \
 204         case '0':         \
 205         case '1':         \
 206         case '2':         \
 207         case '3':         \
 208         case '4':         \
 209         case '5':         \
 210         case '6':         \
 211         case '7':         \
 212         case '8':         \
 213         case '9':
 214
 215 static void parse_symbol(void)
 216 {
 217         symbol_t *symbol;
 218         char     *string;
 219
 220         obstack_1grow(&symbol_obstack, c);
 221         next_char();
 222
 223         while(1) {
 224                 switch(c) {
 225                 DIGITS
 226                 SYMBOL_CHARS
 227                         obstack_1grow(&symbol_obstack, c);
 228                         next_char();
 229                         break;
 230
 231                 default:
 232                         goto end_symbol;
 233                 }
 234         }
 235
 236 end_symbol:
 237         obstack_1grow(&symbol_obstack, '\0');
 238
 239         string = obstack_finish(&symbol_obstack);
 240         symbol = symbol_table_insert(string);
 241
 242         lexer_token.type     = symbol->ID;
 243         lexer_token.v.symbol = symbol;
 244
 245         if(symbol->string != string) {
 246                 obstack_free(&symbol_obstack, string);
 247         }
 248 }
 249
 250 static void parse_integer_suffix(void)
 251 {
 252         if(c == 'U' || c == 'U') {
 253                 /* TODO do something with the suffixes... */
 254                 next_char();
 255                 if(c == 'L' || c == 'l') {
 256                         next_char();
 257                         if(c == 'L' || c == 'l') {
 258                                 next_char();
 259                         }
 260                 }
 261         } else if(c == 'l' || c == 'L') {
 262                 next_char();
 263                 if(c == 'l' || c == 'L') {
 264                         next_char();
 265                         if(c == 'u' || c == 'U') {
 266                                 next_char();
 267                         }
 268                 } else if(c == 'u' || c == 'U') {
 269                         next_char();
 270                 }
 271         }
 272 }
 273
 274 static void parse_number_hex(void)
 275 {
 276         assert(c == 'x' || c == 'X');
 277         next_char();
 278
 279         if (!isdigit(c) &&
 280                 !('A' <= c && c <= 'F') &&
 281                 !('a' <= c && c <= 'f')) {
 282                 parse_error("premature end of hex number literal");
 283                 lexer_token.type = T_ERROR;
 284                 return;
 285         }
 286
 287         int value = 0;
 288         while(1) {
 289                 if (isdigit(c)) {
 290                         value = 16 * value + c - '0';
 291                 } else if ('A' <= c && c <= 'F') {
 292                         value = 16 * value + c - 'A' + 10;
 293                 } else if ('a' <= c && c <= 'f') {
 294                         value = 16 * value + c - 'a' + 10;
 295                 } else {
 296                         parse_integer_suffix();
 297
 298                         lexer_token.type       = T_INTEGER;
 299                         lexer_token.v.intvalue = value;
 300                         return;
 301                 }
 302                 next_char();
 303         }
 304
 305         if(c == '.' || c == 'p' || c == 'P') {
 306                 next_char();
 307                 panic("Hex floating point numbers not implemented yet");
 308         }
 309 }
 310
 311 static void parse_number_oct(void)
 312 {
 313         int value = 0;
 314         while(c >= '0' && c <= '7') {
 315                 value = 8 * value + c - '0';
 316                 next_char();
 317         }
 318         if (c == '8' || c == '9') {
 319                 parse_error("invalid octal number");
 320                 lexer_token.type = T_ERROR;
 321                 return;
 322         }
 323
 324         lexer_token.type       = T_INTEGER;
 325         lexer_token.v.intvalue = value;
 326
 327         parse_integer_suffix();
 328 }
 329
 330 static void parse_floatingpoint_exponent(long double value)
 331 {
 332         unsigned int expo = 0;
 333         long double  factor = 10.;
 334
 335         if(c == '-') {
 336                 next_char();
 337                 factor = 0.1;
 338         } else if(c == '+') {
 339                 next_char();
 340         }
 341
 342         while(c >= '0' && c <= '9') {
 343                 expo = 10 * expo + (c - '0');
 344                 next_char();
 345         }
 346
 347         while(1) {
 348                 if(expo & 1)
 349                         value *= factor;
 350                 expo >>= 1;
 351                 if(expo == 0)
 352                         break;
 353                 factor *= factor;
 354         }
 355
 356         lexer_token.type         = T_FLOATINGPOINT;
 357         lexer_token.v.floatvalue = value;
 358 }
 359
 360 static void parse_floatingpoint_fract(int integer_part)
 361 {
 362         long double value  = integer_part;
 363         long double factor = 1.;
 364
 365         while(c >= '0' && c <= '9') {
 366                 factor *= 0.1;
 367                 value  += (c - '0') * factor;
 368                 next_char();
 369         }
 370
 371         if(c == 'e' || c == 'E') {
 372                 next_char();
 373                 parse_floatingpoint_exponent(value);
 374                 return;
 375         }
 376
 377         lexer_token.type         = T_FLOATINGPOINT;
 378         lexer_token.v.floatvalue = value;
 379 }
 380
 381 static void parse_number_dec(void)
 382 {
 383         int value = 0;
 384
 385         while(isdigit(c)) {
 386                 value = 10 * value + c - '0';
 387                 next_char();
 388         }
 389
 390         if(c == '.') {
 391                 next_char();
 392                 parse_floatingpoint_fract(value);
 393                 return;
 394         }
 395         if(c == 'e' || c == 'E') {
 396                 next_char();
 397                 parse_floatingpoint_exponent(value);
 398                 return;
 399         }
 400         parse_integer_suffix();
 401
 402         lexer_token.type       = T_INTEGER;
 403         lexer_token.v.intvalue = value;
 404 }
 405
 406 static void parse_number(void)
 407 {
 408         if (c == '0') {
 409                 next_char();
 410                 switch (c) {
 411                         case 'X':
 412                         case 'x':
 413                                 parse_number_hex();
 414                                 break;
 415                         case '0':
 416                         case '1':
 417                         case '2':
 418                         case '3':
 419                         case '4':
 420                         case '5':
 421                         case '6':
 422                         case '7':
 423                                 parse_number_oct();
 424                                 break;
 425                         case '.':
 426                                 next_char();
 427                                 parse_floatingpoint_fract(0);
 428                                 break;
 429                         case 'e':
 430                         case 'E':
 431                                 parse_floatingpoint_exponent(0);
 432                                 break;
 433                         case '8':
 434                         case '9':
 435                                 next_char();
 436                                 parse_error("invalid octal number");
 437                                 lexer_token.type = T_ERROR;
 438                                 return;
 439                         default:
 440                                 put_back(c);
 441                                 c = '0';
 442                                 parse_number_dec();
 443                                 return;
 444                 }
 445         } else {
 446                 parse_number_dec();
 447         }
 448 }
 449
 450 static int parse_octal_sequence(void)
 451 {
 452         int value = 0;
 453         while(1) {
 454                 if(c < '0' || c > '7')
 455                         break;
 456                 value = 8 * value + c - '0';
 457                 next_char();
 458         }
 459
 460         return value;
 461 }
 462
 463 static int parse_hex_sequence(void)
 464 {
 465         int value = 0;
 466         while(1) {
 467                 if (c >= '0' && c <= '9') {
 468                         value = 16 * value + c - '0';
 469                 } else if ('A' <= c && c <= 'F') {
 470                         value = 16 * value + c - 'A' + 10;
 471                 } else if ('a' <= c && c <= 'f') {
 472                         value = 16 * value + c - 'a' + 10;
 473                 } else {
 474                         break;
 475                 }
 476                 next_char();
 477         }
 478
 479         return value;
 480 }
 481
 482 static int parse_escape_sequence(void)
 483 {
 484         while(1) {
 485                 int ec = c;
 486                 next_char();
 487
 488                 switch(ec) {
 489                 case '"':  return '"';
 490                 case '\'': return'\'';
 491                 case '\\': return '\\';
 492                 case '?': return '\?';
 493                 case 'a': return '\a';
 494                 case 'b': return '\b';
 495                 case 'f': return '\f';
 496                 case 'n': return '\n';
 497                 case 'r': return '\r';
 498                 case 't': return '\t';
 499                 case 'v': return '\v';
 500                 case 'x':
 501                         return parse_hex_sequence();
 502                 case '0':
 503                 case '1':
 504                 case '2':
 505                 case '3':
 506                 case '4':
 507                 case '5':
 508                 case '6':
 509                 case '7':
 510                         return parse_octal_sequence();
 511                 case EOF:
 512                         parse_error("reached end of file while parsing escape sequence");
 513                         return EOF;
 514                 default:
 515                         parse_error("unknown escape sequence");
 516                         return EOF;
 517                 }
 518         }
 519 }
 520
 521 const char *concat_strings(const char *s1, const char *s2)
 522 {
 523         size_t  len1   = strlen(s1);
 524         size_t  len2   = strlen(s2);
 525
 526         char   *concat = obstack_alloc(&symbol_obstack, len1 + len2 + 1);
 527         memcpy(concat, s1, len1);
 528         memcpy(concat + len1, s2, len2 + 1);
 529
 530         const char *result = strset_insert(&stringset, concat);
 531         if(result != concat) {
 532                 obstack_free(&symbol_obstack, concat);
 533         }
 534
 535         return result;
 536 }
 537
 538 static void parse_string_literal(void)
 539 {
 540         unsigned    start_linenr = lexer_token.source_position.linenr;
 541         char       *string;
 542         const char *result;
 543
 544         assert(c == '"');
 545         next_char();
 546
 547         while(1) {
 548                 switch(c) {
 549                 case '\\':
 550                         next_char();
 551                         int ec = parse_escape_sequence();
 552                         obstack_1grow(&symbol_obstack, ec);
 553                         break;
 554
 555                 case EOF:
 556                         error_prefix_at(lexer_token.source_position.input_name,
 557                                         start_linenr);
 558                         fprintf(stderr, "string has no end\n");
 559                         lexer_token.type = T_ERROR;
 560                         return;
 561
 562                 case '"':
 563                         next_char();
 564                         goto end_of_string;
 565
 566                 default:
 567                         obstack_1grow(&symbol_obstack, c);
 568                         next_char();
 569                         break;
 570                 }
 571         }
 572
 573 end_of_string:
 574
 575         /* TODO: concatenate multiple strings separated by whitespace... */
 576
 577         /* add finishing 0 to the string */
 578         obstack_1grow(&symbol_obstack, '\0');
 579         string = obstack_finish(&symbol_obstack);
 580
 581         /* check if there is already a copy of the string */
 582         result = strset_insert(&stringset, string);
 583         if(result != string) {
 584                 obstack_free(&symbol_obstack, string);
 585         }
 586
 587         lexer_token.type     = T_STRING_LITERAL;
 588         lexer_token.v.string = result;
 589 }
 590
 591 static void parse_character_constant(void)
 592 {
 593         assert(c == '\'');
 594         next_char();
 595
 596         int found_char = 0;
 597         while(1) {
 598                 switch(c) {
 599                 case '\\':
 600                         next_char();
 601                         found_char = parse_escape_sequence();
 602                         break;
 603
 604                 MATCH_NEWLINE(
 605                         parse_error("newline while parsing character constant");
 606                         break;
 607                 )
 608
 609                 case '\'':
 610                         next_char();
 611                         goto end_of_char_constant;
 612
 613                 case EOF:
 614                         parse_error("EOF while parsing character constant");
 615                         lexer_token.type = T_ERROR;
 616                         return;
 617
 618                 default:
 619                         if(found_char != 0) {
 620                                 parse_error("more than 1 characters in character "
 621                                             "constant");
 622                                 goto end_of_char_constant;
 623                         } else {
 624                                 found_char = c;
 625                                 next_char();
 626                         }
 627                         break;
 628                 }
 629         }
 630
 631 end_of_char_constant:
 632         lexer_token.type       = T_INTEGER;
 633         lexer_token.v.intvalue = found_char;
 634 }
 635
 636 static void skip_multiline_comment(void)
 637 {
 638         unsigned start_linenr = lexer_token.source_position.linenr;
 639
 640         while(1) {
 641                 switch(c) {
 642                 case '*':
 643                         next_char();
 644                         if(c == '/') {
 645                                 next_char();
 646                                 return;
 647                         }
 648                         break;
 649
 650                 MATCH_NEWLINE(break;)
 651
 652                 case EOF:
 653                         error_prefix_at(lexer_token.source_position.input_name,
 654                                         start_linenr);
 655                         fprintf(stderr, "at end of file while looking for comment end\n");
 656                         return;
 657
 658                 default:
 659                         next_char();
 660                         break;
 661                 }
 662         }
 663 }
 664
 665 static void skip_line_comment(void)
 666 {
 667         while(1) {
 668                 switch(c) {
 669                 case EOF:
 670                         return;
 671
 672                 case '\n':
 673                 case '\r':
 674                         return;
 675
 676                 default:
 677                         next_char();
 678                         break;
 679                 }
 680         }
 681 }
 682
 683 static token_t pp_token;
 684
 685 static inline void next_pp_token(void)
 686 {
 687         lexer_next_preprocessing_token();
 688         pp_token = lexer_token;
 689 }
 690
 691 static void eat_until_newline(void)
 692 {
 693         while(pp_token.type != '\n' && pp_token.type != T_EOF) {
 694                 next_pp_token();
 695         }
 696 }
 697
 698 static void error_directive(void)
 699 {
 700         error_prefix();
 701         fprintf(stderr, "#error directive: \n");
 702
 703         /* parse pp-tokens until new-line */
 704 }
 705
 706 static void define_directive(void)
 707 {
 708         lexer_next_preprocessing_token();
 709         if(lexer_token.type != T_IDENTIFIER) {
 710                 parse_error("expected identifier after #define\n");
 711                 eat_until_newline();
 712         }
 713 }
 714
 715 static void ifdef_directive(int is_ifndef)
 716 {
 717         (void) is_ifndef;
 718         lexer_next_preprocessing_token();
 719         //expect_identifier();
 720         //extect_newline();
 721 }
 722
 723 static void endif_directive(void)
 724 {
 725         //expect_newline();
 726 }
 727
 728 static void parse_line_directive(void)
 729 {
 730         if(pp_token.type != T_INTEGER) {
 731                 parse_error("expected integer");
 732         } else {
 733                 lexer_token.source_position.linenr = pp_token.v.intvalue - 1;
 734                 next_pp_token();
 735         }
 736         if(pp_token.type == T_STRING_LITERAL) {
 737                 lexer_token.source_position.input_name = pp_token.v.string;
 738                 next_pp_token();
 739         }
 740
 741         eat_until_newline();
 742 }
 743
 744 static void parse_preprocessor_identifier(void)
 745 {
 746         assert(pp_token.type == T_IDENTIFIER);
 747         symbol_t *symbol = pp_token.v.symbol;
 748
 749         switch(symbol->pp_ID) {
 750         case TP_include:
 751                 printf("include - enable header name parsing!\n");
 752                 break;
 753         case TP_define:
 754                 define_directive();
 755                 break;
 756         case TP_ifdef:
 757                 ifdef_directive(0);
 758                 break;
 759         case TP_ifndef:
 760                 ifdef_directive(1);
 761                 break;
 762         case TP_endif:
 763                 endif_directive();
 764                 break;
 765         case TP_line:
 766                 next_pp_token();
 767                 parse_line_directive();
 768                 break;
 769         case TP_if:
 770         case TP_else:
 771         case TP_elif:
 772         case TP_undef:
 773         case TP_error:
 774                 error_directive();
 775                 break;
 776         case TP_pragma:
 777                 break;
 778         }
 779 }
 780
 781 static void parse_preprocessor_directive()
 782 {
 783         next_pp_token();
 784
 785         switch(pp_token.type) {
 786         case T_IDENTIFIER:
 787                 parse_preprocessor_identifier();
 788                 break;
 789         case T_INTEGER:
 790                 parse_line_directive();
 791                 break;
 792         default:
 793                 parse_error("invalid preprocessor directive");
 794                 eat_until_newline();
 795                 break;
 796         }
 797 }
 798
 799 #define MAYBE_PROLOG                                       \
 800                         next_char();                                   \
 801                         while(1) {                                     \
 802                                 switch(c) {
 803
 804 #define MAYBE(ch, set_type)                                \
 805                                 case ch:                                   \
 806                                         next_char();                           \
 807                                         lexer_token.type = set_type;           \
 808                                         return;
 809
 810 #define ELSE_CODE(code)                                    \
 811                                 default:                                   \
 812                                         code;                                  \
 813                                 }                                          \
 814                         } /* end of while(1) */                        \
 815                         break;
 816
 817 #define ELSE(set_type)                                     \
 818                 ELSE_CODE(                                         \
 819                         lexer_token.type = set_type;                   \
 820                         return;                                        \
 821                 )
 822
 823 void lexer_next_preprocessing_token(void)
 824 {
 825         while(1) {
 826                 switch(c) {
 827                 case ' ':
 828                 case '\t':
 829                         next_char();
 830                         break;
 831
 832                 MATCH_NEWLINE(
 833                         lexer_token.type = '\n';
 834                         return;
 835                 )
 836
 837                 SYMBOL_CHARS
 838                         parse_symbol();
 839                         return;
 840
 841                 DIGITS
 842                         parse_number();
 843                         return;
 844
 845                 case '"':
 846                         parse_string_literal();
 847                         return;
 848
 849                 case '\'':
 850                         parse_character_constant();
 851                         return;
 852
 853                 case '.':
 854                         MAYBE_PROLOG
 855                                 case '.':
 856                                         MAYBE_PROLOG
 857                                         MAYBE('.', T_DOTDOTDOT)
 858                                         ELSE_CODE(
 859                                                 put_back(c);
 860                                                 c = '.';
 861                                                 lexer_token.type = '.';
 862                                                 return;
 863                                         )
 864                         ELSE('.')
 865                 case '&':
 866                         MAYBE_PROLOG
 867                         MAYBE('&', T_ANDAND)
 868                         MAYBE('=', T_ANDEQUAL)
 869                         ELSE('&')
 870                 case '*':
 871                         MAYBE_PROLOG
 872                         MAYBE('=', T_ASTERISKEQUAL)
 873                         ELSE('*')
 874                 case '+':
 875                         MAYBE_PROLOG
 876                         MAYBE('+', T_PLUSPLUS)
 877                         MAYBE('=', T_PLUSEQUAL)
 878                         ELSE('+')
 879                 case '-':
 880                         MAYBE_PROLOG
 881                         MAYBE('>', T_MINUSGREATER)
 882                         MAYBE('-', T_MINUSMINUS)
 883                         MAYBE('=', T_MINUSEQUAL)
 884                         ELSE('-')
 885                 case '!':
 886                         MAYBE_PROLOG
 887                         MAYBE('=', T_EXCLAMATIONMARKEQUAL)
 888                         ELSE('!')
 889                 case '/':
 890                         MAYBE_PROLOG
 891                         MAYBE('=', T_SLASHEQUAL)
 892                                 case '*':
 893                                         next_char();
 894                                         skip_multiline_comment();
 895                                         lexer_next_preprocessing_token();
 896                                         return;
 897                                 case '/':
 898                                         next_char();
 899                                         skip_line_comment();
 900                                         lexer_next_preprocessing_token();
 901                                         return;
 902                         ELSE('/')
 903                 case '%':
 904                         MAYBE_PROLOG
 905                         MAYBE('>', T_PERCENTGREATER)
 906                         MAYBE('=', T_PERCENTEQUAL)
 907                                 case ':':
 908                                         MAYBE_PROLOG
 909                                                 case '%':
 910                                                         MAYBE_PROLOG
 911                                                         MAYBE(':', T_PERCENTCOLONPERCENTCOLON)
 912                                                         ELSE_CODE(
 913                                                                 put_back(c);
 914                                                                 c = '%';
 915                                                                 lexer_token.type = T_PERCENTCOLON;
 916                                                                 return;
 917                                                         )
 918                                         ELSE(T_PERCENTCOLON)
 919                         ELSE('%')
 920                 case '<':
 921                         MAYBE_PROLOG
 922                         MAYBE(':', T_LESSCOLON)
 923                         MAYBE('%', T_LESSPERCENT)
 924                         MAYBE('=', T_LESSEQUAL)
 925                                 case '<':
 926                                         MAYBE_PROLOG
 927                                         MAYBE('=', T_LESSLESSEQUAL)
 928                                         ELSE(T_LESSLESS)
 929                         ELSE('<')
 930                 case '>':
 931                         MAYBE_PROLOG
 932                         MAYBE('=', T_GREATEREQUAL)
 933                                 case '>':
 934                                         MAYBE_PROLOG
 935                                         MAYBE('=', T_GREATERGREATEREQUAL)
 936                                         ELSE(T_GREATERGREATER)
 937                         ELSE('>')
 938                 case '^':
 939                         MAYBE_PROLOG
 940                         MAYBE('=', T_CARETEQUAL)
 941                         ELSE('^')
 942                 case '|':
 943                         MAYBE_PROLOG
 944                         MAYBE('=', T_PIPEEQUAL)
 945                         MAYBE('|', T_PIPEPIPE)
 946                         ELSE('|')
 947                 case ':':
 948                         MAYBE_PROLOG
 949                         MAYBE('>', T_COLONGREATER)
 950                         ELSE(':')
 951                 case '=':
 952                         MAYBE_PROLOG
 953                         MAYBE('=', T_EQUALEQUAL)
 954                         ELSE('=')
 955                 case '#':
 956                         MAYBE_PROLOG
 957                         MAYBE('#', T_HASHHASH)
 958                         ELSE('#')
 959
 960                 case '?':
 961                 case '[':
 962                 case ']':
 963                 case '(':
 964                 case ')':
 965                 case '{':
 966                 case '}':
 967                 case '~':
 968                 case ';':
 969                 case ',':
 970                 case '\\':
 971                         lexer_token.type = c;
 972                         next_char();
 973                         return;
 974
 975                 case EOF:
 976                         lexer_token.type = T_EOF;
 977                         return;
 978
 979                 default:
 980                         next_char();
 981                         error_prefix();
 982                         fprintf(stderr, "unknown character '%c' found\n", c);
 983                         lexer_token.type = T_ERROR;
 984                         return;
 985                 }
 986         }
 987 }
 988
 989 void lexer_next_token(void)
 990 {
 991         lexer_next_preprocessing_token();
 992         if(lexer_token.type != '\n')
 993                 return;
 994
 995 newline_found:
 996         do {
 997                 lexer_next_preprocessing_token();
 998         } while(lexer_token.type == '\n');
 999
1000         if(lexer_token.type == '#') {
1001                 parse_preprocessor_directive();
1002                 goto newline_found;
1003         }
1004 }
1005
1006 void init_lexer(void)
1007 {
1008         strset_init(&stringset);
1009 }
1010
1011 void lexer_open_stream(FILE *stream, const char *input_name)
1012 {
1013         input                                  = stream;
1014         lexer_token.source_position.linenr     = 1;
1015         lexer_token.source_position.input_name = input_name;
1016
1017         next_char();
1018 }
1019
1020 void exit_lexer(void)
1021 {
1022         strset_destroy(&stringset);
1023 }
1024
1025 static __attribute__((unused))
1026 void dbg_pos(const source_position_t source_position)
1027 {
1028         fprintf(stdout, "%s:%d\n", source_position.input_name,
1029                 source_position.linenr);
1030         fflush(stdout);
1031 }