nsz Git - cparser/blob - lexer.c

   1 #include <config.h>
   2
   3 #include "lexer.h"
   4 #include "token_t.h"
   5 #include "symbol_table_t.h"
   6 #include "adt/error.h"
   7 #include "adt/strset.h"
   8 #include "adt/util.h"
   9
  10 #include <assert.h>
  11 #include <errno.h>
  12 #include <string.h>
  13 #include <stdbool.h>
  14 #include <ctype.h>
  15
  16 //#define DEBUG_CHARS
  17 #define MAX_PUTBACK 3
  18
  19 static int         c;
  20 token_t            lexer_token;
  21 symbol_t          *symbol_L;
  22 static FILE       *input;
  23 static char        buf[1024 + MAX_PUTBACK];
  24 static const char *bufend;
  25 static const char *bufpos;
  26 static strset_t    stringset;
  27
  28 static void error_prefix_at(const char *input_name, unsigned linenr)
  29 {
  30         fprintf(stderr, "%s:%u: Error: ", input_name, linenr);
  31 }
  32
  33 static void error_prefix(void)
  34 {
  35         error_prefix_at(lexer_token.source_position.input_name,
  36                         lexer_token.source_position.linenr);
  37 }
  38
  39 static void parse_error(const char *msg)
  40 {
  41         error_prefix();
  42         fprintf(stderr, "%s\n", msg);
  43 }
  44
  45 static inline void next_real_char(void)
  46 {
  47         bufpos++;
  48         if(bufpos >= bufend) {
  49                 size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
  50                                  input);
  51                 if(s == 0) {
  52                         c = EOF;
  53                         return;
  54                 }
  55                 bufpos = buf + MAX_PUTBACK;
  56                 bufend = buf + MAX_PUTBACK + s;
  57         }
  58         c = *(bufpos);
  59 }
  60
  61 static inline void put_back(int pc)
  62 {
  63         assert(bufpos >= buf);
  64         //assert(bufpos < buf+MAX_PUTBACK || *bufpos == pc);
  65
  66         char *p = buf + (bufpos - buf);
  67         *p = pc;
  68
  69         /* going backwards in the buffer is legal as long as it's not more often
  70          * than MAX_PUTBACK */
  71         bufpos--;
  72
  73 #ifdef DEBUG_CHARS
  74         printf("putback '%c'\n", pc);
  75 #endif
  76 }
  77
  78 static inline void next_char(void);
  79
  80 #define MATCH_NEWLINE(code)                   \
  81         case '\r':                                \
  82                 next_char();                          \
  83                 if(c == '\n') {                       \
  84                         next_char();                      \
  85                 }                                     \
  86                 lexer_token.source_position.linenr++; \
  87                 code;                                 \
  88         case '\n':                                \
  89                 next_char();                          \
  90                 lexer_token.source_position.linenr++; \
  91                 code;
  92
  93 #define eat(c_type)  do { assert(c == c_type); next_char(); } while(0)
  94
  95 static void maybe_concat_lines(void)
  96 {
  97         eat('\\');
  98
  99         switch(c) {
 100         MATCH_NEWLINE(return;)
 101
 102         default:
 103                 break;
 104         }
 105
 106         put_back(c);
 107         c = '\\';
 108 }
 109
 110 static inline void next_char(void)
 111 {
 112         next_real_char();
 113
 114         /* filter trigraphs */
 115         if(UNLIKELY(c == '\\')) {
 116                 maybe_concat_lines();
 117                 goto end_of_next_char;
 118         }
 119
 120         if(LIKELY(c != '?'))
 121                 goto end_of_next_char;
 122
 123         next_real_char();
 124         if(LIKELY(c != '?')) {
 125                 put_back(c);
 126                 c = '?';
 127                 goto end_of_next_char;
 128         }
 129
 130         next_real_char();
 131         switch(c) {
 132         case '=': c = '#'; break;
 133         case '(': c = '['; break;
 134         case '/': c = '\\'; maybe_concat_lines(); break;
 135         case ')': c = ']'; break;
 136         case '\'': c = '^'; break;
 137         case '<': c = '{'; break;
 138         case '!': c = '|'; break;
 139         case '>': c = '}'; break;
 140         case '-': c = '~'; break;
 141         default:
 142                 put_back('?');
 143                 put_back(c);
 144                 c = '?';
 145                 break;
 146         }
 147
 148 end_of_next_char:;
 149 #ifdef DEBUG_CHARS
 150         printf("nchar '%c'\n", c);
 151 #endif
 152 }
 153
 154 #define SYMBOL_CHARS  \
 155         case 'a':         \
 156         case 'b':         \
 157         case 'c':         \
 158         case 'd':         \
 159         case 'e':         \
 160         case 'f':         \
 161         case 'g':         \
 162         case 'h':         \
 163         case 'i':         \
 164         case 'j':         \
 165         case 'k':         \
 166         case 'l':         \
 167         case 'm':         \
 168         case 'n':         \
 169         case 'o':         \
 170         case 'p':         \
 171         case 'q':         \
 172         case 'r':         \
 173         case 's':         \
 174         case 't':         \
 175         case 'u':         \
 176         case 'v':         \
 177         case 'w':         \
 178         case 'x':         \
 179         case 'y':         \
 180         case 'z':         \
 181         case 'A':         \
 182         case 'B':         \
 183         case 'C':         \
 184         case 'D':         \
 185         case 'E':         \
 186         case 'F':         \
 187         case 'G':         \
 188         case 'H':         \
 189         case 'I':         \
 190         case 'J':         \
 191         case 'K':         \
 192         case 'L':         \
 193         case 'M':         \
 194         case 'N':         \
 195         case 'O':         \
 196         case 'P':         \
 197         case 'Q':         \
 198         case 'R':         \
 199         case 'S':         \
 200         case 'T':         \
 201         case 'U':         \
 202         case 'V':         \
 203         case 'W':         \
 204         case 'X':         \
 205         case 'Y':         \
 206         case 'Z':         \
 207         case '_':
 208
 209 #define DIGITS        \
 210         case '0':         \
 211         case '1':         \
 212         case '2':         \
 213         case '3':         \
 214         case '4':         \
 215         case '5':         \
 216         case '6':         \
 217         case '7':         \
 218         case '8':         \
 219         case '9':
 220
 221 static void parse_symbol(void)
 222 {
 223         symbol_t *symbol;
 224         char     *string;
 225
 226         obstack_1grow(&symbol_obstack, c);
 227         next_char();
 228
 229         while(1) {
 230                 switch(c) {
 231                 DIGITS
 232                 SYMBOL_CHARS
 233                         obstack_1grow(&symbol_obstack, c);
 234                         next_char();
 235                         break;
 236
 237                 default:
 238                         goto end_symbol;
 239                 }
 240         }
 241
 242 end_symbol:
 243         obstack_1grow(&symbol_obstack, '\0');
 244
 245         string = obstack_finish(&symbol_obstack);
 246         symbol = symbol_table_insert(string);
 247
 248         lexer_token.type     = symbol->ID;
 249         lexer_token.v.symbol = symbol;
 250
 251         if(symbol->string != string) {
 252                 obstack_free(&symbol_obstack, string);
 253         }
 254 }
 255
 256 static void parse_integer_suffix(void)
 257 {
 258         if(c == 'U' || c == 'U') {
 259                 /* TODO do something with the suffixes... */
 260                 next_char();
 261                 if(c == 'L' || c == 'l') {
 262                         next_char();
 263                         if(c == 'L' || c == 'l') {
 264                                 next_char();
 265                         }
 266                 }
 267         } else if(c == 'l' || c == 'L') {
 268                 next_char();
 269                 if(c == 'l' || c == 'L') {
 270                         next_char();
 271                         if(c == 'u' || c == 'U') {
 272                                 next_char();
 273                         }
 274                 } else if(c == 'u' || c == 'U') {
 275                         next_char();
 276                 }
 277         }
 278 }
 279
 280 static void parse_floating_suffix(void)
 281 {
 282         switch(c) {
 283         /* TODO: do something usefull with the suffixes... */
 284         case 'f':
 285         case 'F':
 286         case 'l':
 287         case 'L':
 288                 next_char();
 289                 break;
 290         default:
 291                 break;
 292         }
 293 }
 294
 295 static inline bool is_hex_digit(int c)
 296 {
 297         return (c >= '0' && c <= '9')
 298                         || (c >= 'a' && c <= 'z')
 299                         || (c >= 'A' && c <= 'Z');
 300 }
 301
 302 static void parse_number_hex(void)
 303 {
 304         assert(c == 'x' || c == 'X');
 305         next_char();
 306
 307         while(is_hex_digit(c)) {
 308                 obstack_1grow(&symbol_obstack, c);
 309                 next_char();
 310         }
 311         obstack_1grow(&symbol_obstack, '\0');
 312         char *string = obstack_finish(&symbol_obstack);
 313
 314         if(c == '.' || c == 'p' || c == 'P') {
 315                 next_char();
 316                 panic("Hex floating point numbers not implemented yet");
 317         }
 318         if(*string == '\0') {
 319                 parse_error("invalid hex number");
 320                 lexer_token.type = T_ERROR;
 321         }
 322
 323         char *endptr;
 324         lexer_token.type       = T_INTEGER;
 325         lexer_token.v.intvalue = strtoll(string, &endptr, 16);
 326         if(*endptr != '\0') {
 327                 parse_error("hex number literal too long");
 328         }
 329
 330         obstack_free(&symbol_obstack, string);
 331 }
 332
 333 static inline bool is_octal_digit(int chr)
 334 {
 335         return '0' <= chr && chr <= '7';
 336 }
 337
 338 static void parse_number_oct(void)
 339 {
 340         while(is_octal_digit(c)) {
 341                 obstack_1grow(&symbol_obstack, c);
 342                 next_char();
 343         }
 344         obstack_1grow(&symbol_obstack, '\0');
 345         char *string = obstack_finish(&symbol_obstack);
 346
 347         char *endptr;
 348         lexer_token.type       = T_INTEGER;
 349         lexer_token.v.intvalue = strtoll(string, &endptr, 8);
 350         if(*endptr != '\0') {
 351                 parse_error("octal number literal too long");
 352         }
 353
 354         obstack_free(&symbol_obstack, string);
 355         parse_integer_suffix();
 356 }
 357
 358 static void parse_number_dec(void)
 359 {
 360         bool is_float = false;
 361         while(isdigit(c)) {
 362                 obstack_1grow(&symbol_obstack, c);
 363                 next_char();
 364         }
 365
 366         if(c == '.') {
 367                 obstack_1grow(&symbol_obstack, '.');
 368                 next_char();
 369
 370                 while(isdigit(c)) {
 371                         obstack_1grow(&symbol_obstack, c);
 372                         next_char();
 373                 }
 374                 is_float = true;
 375         }
 376         if(c == 'e' || c == 'E') {
 377                 obstack_1grow(&symbol_obstack, 'e');
 378                 next_char();
 379
 380                 if(c == '-' || c == '+') {
 381                         obstack_1grow(&symbol_obstack, c);
 382                         next_char();
 383                 }
 384
 385                 while(isdigit(c)) {
 386                         obstack_1grow(&symbol_obstack, c);
 387                         next_char();
 388                 }
 389                 is_float = true;
 390         }
 391
 392         obstack_1grow(&symbol_obstack, '\0');
 393         char *string = obstack_finish(&symbol_obstack);
 394
 395         char *endptr;
 396         if(is_float) {
 397                 lexer_token.type         = T_FLOATINGPOINT;
 398                 lexer_token.v.floatvalue = strtold(string, &endptr);
 399
 400                 if(*endptr != '\0') {
 401                         parse_error("invalid number literal");
 402                 }
 403
 404                 parse_floating_suffix();
 405         } else {
 406                 lexer_token.type       = T_INTEGER;
 407                 lexer_token.v.intvalue = strtoll(string, &endptr, 10);
 408
 409                 if(*endptr != '\0') {
 410                         parse_error("invalid number literal");
 411                 }
 412
 413                 parse_integer_suffix();
 414         }
 415         obstack_free(&symbol_obstack, string);
 416 }
 417
 418 static void parse_number(void)
 419 {
 420         if (c == '0') {
 421                 next_char();
 422                 switch (c) {
 423                         case 'X':
 424                         case 'x':
 425                                 parse_number_hex();
 426                                 break;
 427                         case '0':
 428                         case '1':
 429                         case '2':
 430                         case '3':
 431                         case '4':
 432                         case '5':
 433                         case '6':
 434                         case '7':
 435                                 parse_number_oct();
 436                                 break;
 437                         case '8':
 438                         case '9':
 439                                 next_char();
 440                                 parse_error("invalid octal number");
 441                                 lexer_token.type = T_ERROR;
 442                                 return;
 443                         case '.':
 444                         case 'e':
 445                         case 'E':
 446                         default:
 447                                 obstack_1grow(&symbol_obstack, '0');
 448                                 parse_number_dec();
 449                                 return;
 450                 }
 451         } else {
 452                 parse_number_dec();
 453         }
 454 }
 455
 456 static int parse_octal_sequence(const int first_digit)
 457 {
 458         assert(is_octal_digit(first_digit));
 459         int value = first_digit - '0';
 460         if (!is_octal_digit(c)) return value;
 461         value = 8 * value + c - '0';
 462         next_char();
 463         if (!is_octal_digit(c)) return value;
 464         value = 8 * value + c - '0';
 465         next_char();
 466         return value;
 467 }
 468
 469 static int parse_hex_sequence(void)
 470 {
 471         int value = 0;
 472         while(1) {
 473                 if (c >= '0' && c <= '9') {
 474                         value = 16 * value + c - '0';
 475                 } else if ('A' <= c && c <= 'F') {
 476                         value = 16 * value + c - 'A' + 10;
 477                 } else if ('a' <= c && c <= 'f') {
 478                         value = 16 * value + c - 'a' + 10;
 479                 } else {
 480                         break;
 481                 }
 482                 next_char();
 483         }
 484
 485         return value;
 486 }
 487
 488 static int parse_escape_sequence(void)
 489 {
 490         eat('\\');
 491
 492         int ec = c;
 493         next_char();
 494
 495         switch(ec) {
 496         case '"':  return '"';
 497         case '\'': return '\'';
 498         case '\\': return '\\';
 499         case '?': return '\?';
 500         case 'a': return '\a';
 501         case 'b': return '\b';
 502         case 'f': return '\f';
 503         case 'n': return '\n';
 504         case 'r': return '\r';
 505         case 't': return '\t';
 506         case 'v': return '\v';
 507         case 'x':
 508                 return parse_hex_sequence();
 509         case '0':
 510         case '1':
 511         case '2':
 512         case '3':
 513         case '4':
 514         case '5':
 515         case '6':
 516         case '7':
 517                 return parse_octal_sequence(ec);
 518         case EOF:
 519                 parse_error("reached end of file while parsing escape sequence");
 520                 return EOF;
 521         default:
 522                 parse_error("unknown escape sequence");
 523                 return EOF;
 524         }
 525 }
 526
 527 const char *concat_strings(const char *s1, const char *s2)
 528 {
 529         size_t  len1   = strlen(s1);
 530         size_t  len2   = strlen(s2);
 531
 532         char   *concat = obstack_alloc(&symbol_obstack, len1 + len2 + 1);
 533         memcpy(concat, s1, len1);
 534         memcpy(concat + len1, s2, len2 + 1);
 535
 536         const char *result = strset_insert(&stringset, concat);
 537         if(result != concat) {
 538                 obstack_free(&symbol_obstack, concat);
 539         }
 540
 541         return result;
 542 }
 543
 544 static void parse_string_literal(void)
 545 {
 546         unsigned    start_linenr = lexer_token.source_position.linenr;
 547         char       *string;
 548         const char *result;
 549
 550         assert(c == '"');
 551         next_char();
 552
 553         int tc;
 554         while(1) {
 555                 switch(c) {
 556                 case '\\':
 557                         tc = parse_escape_sequence();
 558                         obstack_1grow(&symbol_obstack, tc);
 559                         break;
 560
 561                 case EOF:
 562                         error_prefix_at(lexer_token.source_position.input_name,
 563                                         start_linenr);
 564                         fprintf(stderr, "string has no end\n");
 565                         lexer_token.type = T_ERROR;
 566                         return;
 567
 568                 case '"':
 569                         next_char();
 570                         goto end_of_string;
 571
 572                 default:
 573                         obstack_1grow(&symbol_obstack, c);
 574                         next_char();
 575                         break;
 576                 }
 577         }
 578
 579 end_of_string:
 580
 581         /* TODO: concatenate multiple strings separated by whitespace... */
 582
 583         /* add finishing 0 to the string */
 584         obstack_1grow(&symbol_obstack, '\0');
 585         string = obstack_finish(&symbol_obstack);
 586
 587         /* check if there is already a copy of the string */
 588         result = strset_insert(&stringset, string);
 589         if(result != string) {
 590                 obstack_free(&symbol_obstack, string);
 591         }
 592
 593         lexer_token.type     = T_STRING_LITERAL;
 594         lexer_token.v.string = result;
 595 }
 596
 597 static void parse_character_constant(void)
 598 {
 599         eat('\'');
 600
 601         int found_char = 0;
 602         while(1) {
 603                 switch(c) {
 604                 case '\\':
 605                         found_char = parse_escape_sequence();
 606                         break;
 607
 608                 MATCH_NEWLINE(
 609                         parse_error("newline while parsing character constant");
 610                         break;
 611                 )
 612
 613                 case '\'':
 614                         next_char();
 615                         goto end_of_char_constant;
 616
 617                 case EOF:
 618                         parse_error("EOF while parsing character constant");
 619                         lexer_token.type = T_ERROR;
 620                         return;
 621
 622                 default:
 623                         if(found_char != 0) {
 624                                 parse_error("more than 1 characters in character "
 625                                             "constant");
 626                                 goto end_of_char_constant;
 627                         } else {
 628                                 found_char = c;
 629                                 next_char();
 630                         }
 631                         break;
 632                 }
 633         }
 634
 635 end_of_char_constant:
 636         lexer_token.type       = T_INTEGER;
 637         lexer_token.v.intvalue = found_char;
 638 }
 639
 640 static void skip_multiline_comment(void)
 641 {
 642         unsigned start_linenr = lexer_token.source_position.linenr;
 643
 644         while(1) {
 645                 switch(c) {
 646                 case '*':
 647                         next_char();
 648                         if(c == '/') {
 649                                 next_char();
 650                                 return;
 651                         }
 652                         break;
 653
 654                 MATCH_NEWLINE(break;)
 655
 656                 case EOF:
 657                         error_prefix_at(lexer_token.source_position.input_name,
 658                                         start_linenr);
 659                         fprintf(stderr, "at end of file while looking for comment end\n");
 660                         return;
 661
 662                 default:
 663                         next_char();
 664                         break;
 665                 }
 666         }
 667 }
 668
 669 static void skip_line_comment(void)
 670 {
 671         while(1) {
 672                 switch(c) {
 673                 case EOF:
 674                         return;
 675
 676                 case '\n':
 677                 case '\r':
 678                         return;
 679
 680                 default:
 681                         next_char();
 682                         break;
 683                 }
 684         }
 685 }
 686
 687 static token_t pp_token;
 688
 689 static inline void next_pp_token(void)
 690 {
 691         lexer_next_preprocessing_token();
 692         pp_token = lexer_token;
 693 }
 694
 695 static void eat_until_newline(void)
 696 {
 697         while(pp_token.type != '\n' && pp_token.type != T_EOF) {
 698                 next_pp_token();
 699         }
 700 }
 701
 702 static void error_directive(void)
 703 {
 704         error_prefix();
 705         fprintf(stderr, "#error directive: \n");
 706
 707         /* parse pp-tokens until new-line */
 708 }
 709
 710 static void define_directive(void)
 711 {
 712         lexer_next_preprocessing_token();
 713         if(lexer_token.type != T_IDENTIFIER) {
 714                 parse_error("expected identifier after #define\n");
 715                 eat_until_newline();
 716         }
 717 }
 718
 719 static void ifdef_directive(int is_ifndef)
 720 {
 721         (void) is_ifndef;
 722         lexer_next_preprocessing_token();
 723         //expect_identifier();
 724         //extect_newline();
 725 }
 726
 727 static void endif_directive(void)
 728 {
 729         //expect_newline();
 730 }
 731
 732 static void parse_line_directive(void)
 733 {
 734         if(pp_token.type != T_INTEGER) {
 735                 parse_error("expected integer");
 736         } else {
 737                 lexer_token.source_position.linenr = pp_token.v.intvalue - 1;
 738                 next_pp_token();
 739         }
 740         if(pp_token.type == T_STRING_LITERAL) {
 741                 lexer_token.source_position.input_name = pp_token.v.string;
 742                 next_pp_token();
 743         }
 744
 745         eat_until_newline();
 746 }
 747
 748 static void parse_preprocessor_identifier(void)
 749 {
 750         assert(pp_token.type == T_IDENTIFIER);
 751         symbol_t *symbol = pp_token.v.symbol;
 752
 753         switch(symbol->pp_ID) {
 754         case TP_include:
 755                 printf("include - enable header name parsing!\n");
 756                 break;
 757         case TP_define:
 758                 define_directive();
 759                 break;
 760         case TP_ifdef:
 761                 ifdef_directive(0);
 762                 break;
 763         case TP_ifndef:
 764                 ifdef_directive(1);
 765                 break;
 766         case TP_endif:
 767                 endif_directive();
 768                 break;
 769         case TP_line:
 770                 next_pp_token();
 771                 parse_line_directive();
 772                 break;
 773         case TP_if:
 774         case TP_else:
 775         case TP_elif:
 776         case TP_undef:
 777         case TP_error:
 778                 error_directive();
 779                 break;
 780         case TP_pragma:
 781                 break;
 782         }
 783 }
 784
 785 static void parse_preprocessor_directive(void)
 786 {
 787         next_pp_token();
 788
 789         switch(pp_token.type) {
 790         case T_IDENTIFIER:
 791                 parse_preprocessor_identifier();
 792                 break;
 793         case T_INTEGER:
 794                 parse_line_directive();
 795                 break;
 796         default:
 797                 parse_error("invalid preprocessor directive");
 798                 eat_until_newline();
 799                 break;
 800         }
 801 }
 802
 803 #define MAYBE_PROLOG                                       \
 804                         next_char();                                   \
 805                         while(1) {                                     \
 806                                 switch(c) {
 807
 808 #define MAYBE(ch, set_type)                                \
 809                                 case ch:                                   \
 810                                         next_char();                           \
 811                                         lexer_token.type = set_type;           \
 812                                         return;
 813
 814 #define ELSE_CODE(code)                                    \
 815                                 default:                                   \
 816                                         code;                                  \
 817                                 }                                          \
 818                         } /* end of while(1) */                        \
 819                         break;
 820
 821 #define ELSE(set_type)                                     \
 822                 ELSE_CODE(                                         \
 823                         lexer_token.type = set_type;                   \
 824                         return;                                        \
 825                 )
 826
 827 void lexer_next_preprocessing_token(void)
 828 {
 829         while(1) {
 830                 switch(c) {
 831                 case ' ':
 832                 case '\t':
 833                         next_char();
 834                         break;
 835
 836                 MATCH_NEWLINE(
 837                         lexer_token.type = '\n';
 838                         return;
 839                 )
 840
 841                 SYMBOL_CHARS
 842                         parse_symbol();
 843                         /* might be a wide string ( L"string" ) */
 844                         if(c == '"' && (lexer_token.type == T_IDENTIFIER &&
 845                            lexer_token.v.symbol == symbol_L)) {
 846                                 parse_string_literal();
 847                                 return;
 848                         }
 849                         return;
 850
 851                 DIGITS
 852                         parse_number();
 853                         return;
 854
 855                 case '"':
 856                         parse_string_literal();
 857                         return;
 858
 859                 case '\'':
 860                         parse_character_constant();
 861                         return;
 862
 863                 case '.':
 864                         MAYBE_PROLOG
 865                                 case '.':
 866                                         MAYBE_PROLOG
 867                                         MAYBE('.', T_DOTDOTDOT)
 868                                         ELSE_CODE(
 869                                                 put_back(c);
 870                                                 c = '.';
 871                                                 lexer_token.type = '.';
 872                                                 return;
 873                                         )
 874                         ELSE('.')
 875                 case '&':
 876                         MAYBE_PROLOG
 877                         MAYBE('&', T_ANDAND)
 878                         MAYBE('=', T_ANDEQUAL)
 879                         ELSE('&')
 880                 case '*':
 881                         MAYBE_PROLOG
 882                         MAYBE('=', T_ASTERISKEQUAL)
 883                         ELSE('*')
 884                 case '+':
 885                         MAYBE_PROLOG
 886                         MAYBE('+', T_PLUSPLUS)
 887                         MAYBE('=', T_PLUSEQUAL)
 888                         ELSE('+')
 889                 case '-':
 890                         MAYBE_PROLOG
 891                         MAYBE('>', T_MINUSGREATER)
 892                         MAYBE('-', T_MINUSMINUS)
 893                         MAYBE('=', T_MINUSEQUAL)
 894                         ELSE('-')
 895                 case '!':
 896                         MAYBE_PROLOG
 897                         MAYBE('=', T_EXCLAMATIONMARKEQUAL)
 898                         ELSE('!')
 899                 case '/':
 900                         MAYBE_PROLOG
 901                         MAYBE('=', T_SLASHEQUAL)
 902                                 case '*':
 903                                         next_char();
 904                                         skip_multiline_comment();
 905                                         lexer_next_preprocessing_token();
 906                                         return;
 907                                 case '/':
 908                                         next_char();
 909                                         skip_line_comment();
 910                                         lexer_next_preprocessing_token();
 911                                         return;
 912                         ELSE('/')
 913                 case '%':
 914                         MAYBE_PROLOG
 915                         MAYBE('>', T_PERCENTGREATER)
 916                         MAYBE('=', T_PERCENTEQUAL)
 917                                 case ':':
 918                                         MAYBE_PROLOG
 919                                                 case '%':
 920                                                         MAYBE_PROLOG
 921                                                         MAYBE(':', T_PERCENTCOLONPERCENTCOLON)
 922                                                         ELSE_CODE(
 923                                                                 put_back(c);
 924                                                                 c = '%';
 925                                                                 lexer_token.type = T_PERCENTCOLON;
 926                                                                 return;
 927                                                         )
 928                                         ELSE(T_PERCENTCOLON)
 929                         ELSE('%')
 930                 case '<':
 931                         MAYBE_PROLOG
 932                         MAYBE(':', T_LESSCOLON)
 933                         MAYBE('%', T_LESSPERCENT)
 934                         MAYBE('=', T_LESSEQUAL)
 935                                 case '<':
 936                                         MAYBE_PROLOG
 937                                         MAYBE('=', T_LESSLESSEQUAL)
 938                                         ELSE(T_LESSLESS)
 939                         ELSE('<')
 940                 case '>':
 941                         MAYBE_PROLOG
 942                         MAYBE('=', T_GREATEREQUAL)
 943                                 case '>':
 944                                         MAYBE_PROLOG
 945                                         MAYBE('=', T_GREATERGREATEREQUAL)
 946                                         ELSE(T_GREATERGREATER)
 947                         ELSE('>')
 948                 case '^':
 949                         MAYBE_PROLOG
 950                         MAYBE('=', T_CARETEQUAL)
 951                         ELSE('^')
 952                 case '|':
 953                         MAYBE_PROLOG
 954                         MAYBE('=', T_PIPEEQUAL)
 955                         MAYBE('|', T_PIPEPIPE)
 956                         ELSE('|')
 957                 case ':':
 958                         MAYBE_PROLOG
 959                         MAYBE('>', T_COLONGREATER)
 960                         ELSE(':')
 961                 case '=':
 962                         MAYBE_PROLOG
 963                         MAYBE('=', T_EQUALEQUAL)
 964                         ELSE('=')
 965                 case '#':
 966                         MAYBE_PROLOG
 967                         MAYBE('#', T_HASHHASH)
 968                         ELSE('#')
 969
 970                 case '?':
 971                 case '[':
 972                 case ']':
 973                 case '(':
 974                 case ')':
 975                 case '{':
 976                 case '}':
 977                 case '~':
 978                 case ';':
 979                 case ',':
 980                 case '\\':
 981                         lexer_token.type = c;
 982                         next_char();
 983                         return;
 984
 985                 case EOF:
 986                         lexer_token.type = T_EOF;
 987                         return;
 988
 989                 default:
 990                         next_char();
 991                         error_prefix();
 992                         fprintf(stderr, "unknown character '%c' found\n", c);
 993                         lexer_token.type = T_ERROR;
 994                         return;
 995                 }
 996         }
 997 }
 998
 999 void lexer_next_token(void)
1000 {
1001         lexer_next_preprocessing_token();
1002         if(lexer_token.type != '\n')
1003                 return;
1004
1005 newline_found:
1006         do {
1007                 lexer_next_preprocessing_token();
1008         } while(lexer_token.type == '\n');
1009
1010         if(lexer_token.type == '#') {
1011                 parse_preprocessor_directive();
1012                 goto newline_found;
1013         }
1014 }
1015
1016 void init_lexer(void)
1017 {
1018         strset_init(&stringset);
1019 }
1020
1021 void lexer_open_stream(FILE *stream, const char *input_name)
1022 {
1023         input                                  = stream;
1024         lexer_token.source_position.linenr     = 0;
1025         lexer_token.source_position.input_name = input_name;
1026
1027         symbol_L = symbol_table_insert("L");
1028
1029         /* place a virtual \n at the beginning so the lexer knows that we're
1030          * at the beginning of a line */
1031         c = '\n';
1032 }
1033
1034 void exit_lexer(void)
1035 {
1036         strset_destroy(&stringset);
1037 }
1038
1039 static __attribute__((unused))
1040 void dbg_pos(const source_position_t source_position)
1041 {
1042         fprintf(stdout, "%s:%d\n", source_position.input_name,
1043                 source_position.linenr);
1044         fflush(stdout);
1045 }