nsz Git - cparser/blob - lexer.c

   1 #include <config.h>
   2
   3 #include "lexer.h"
   4 #include "token_t.h"
   5 #include "symbol_table_t.h"
   6 #include "adt/error.h"
   7 #include "adt/strset.h"
   8 #include "adt/util.h"
   9
  10 #include <assert.h>
  11 #include <errno.h>
  12 #include <string.h>
  13 #include <ctype.h>
  14
  15 //#define DEBUG_CHARS
  16 #define MAX_PUTBACK 3
  17
  18 static int         c;
  19 token_t            lexer_token;
  20 symbol_t          *symbol_L;
  21 static FILE       *input;
  22 static char        buf[1024 + MAX_PUTBACK];
  23 static const char *bufend;
  24 static const char *bufpos;
  25 static strset_t    stringset;
  26
  27 static void error_prefix_at(const char *input_name, unsigned linenr)
  28 {
  29         fprintf(stderr, "%s:%u: Error: ", input_name, linenr);
  30 }
  31
  32 static void error_prefix(void)
  33 {
  34         error_prefix_at(lexer_token.source_position.input_name,
  35                         lexer_token.source_position.linenr);
  36 }
  37
  38 static void parse_error(const char *msg)
  39 {
  40         error_prefix();
  41         fprintf(stderr, "%s\n", msg);
  42 }
  43
  44 static inline void next_real_char(void)
  45 {
  46         bufpos++;
  47         if(bufpos >= bufend) {
  48                 size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
  49                                  input);
  50                 if(s == 0) {
  51                         c = EOF;
  52                         return;
  53                 }
  54                 bufpos = buf + MAX_PUTBACK;
  55                 bufend = buf + MAX_PUTBACK + s;
  56         }
  57         c = *(bufpos);
  58 }
  59
  60 static inline void put_back(int pc)
  61 {
  62         assert(bufpos >= buf);
  63         assert(bufpos < buf+MAX_PUTBACK || *bufpos == pc);
  64
  65         char *p = buf + (bufpos - buf);
  66         *p = pc;
  67
  68         /* going backwards in the buffer is legal as long as it's not more often
  69          * than MAX_PUTBACK */
  70         bufpos--;
  71
  72 #ifdef DEBUG_CHARS
  73         printf("putback '%c'\n", pc);
  74 #endif
  75 }
  76
  77 static inline void next_char(void);
  78
  79 #define MATCH_NEWLINE(code)                   \
  80         case '\r':                                \
  81                 next_char();                          \
  82                 if(c == '\n') {                       \
  83                         next_char();                      \
  84                 }                                     \
  85                 lexer_token.source_position.linenr++; \
  86                 code;                                 \
  87         case '\n':                                \
  88                 next_char();                          \
  89                 lexer_token.source_position.linenr++; \
  90                 code;
  91
  92 #define eat(c_type)  do { assert(c == c_type); next_char(); } while(0)
  93
  94 static void maybe_concat_lines(void)
  95 {
  96         eat('\\');
  97
  98         switch(c) {
  99         MATCH_NEWLINE(return;)
 100
 101         default:
 102                 break;
 103         }
 104
 105         put_back(c);
 106         c = '\\';
 107 }
 108
 109 static inline void next_char(void)
 110 {
 111         next_real_char();
 112
 113 #if 0
 114         /* filter trigraphs */
 115         if(UNLIKELY(c == '\\')) {
 116                 maybe_concat_lines();
 117                 goto end_of_next_char;
 118         }
 119
 120         if(LIKELY(c != '?'))
 121                 goto end_of_next_char;
 122
 123         next_real_char();
 124         if(LIKELY(c != '?')) {
 125                 put_back(c);
 126                 c = '?';
 127                 goto end_of_next_char;
 128         }
 129
 130         next_real_char();
 131         switch(c) {
 132         case '=': c = '#'; break;
 133         case '(': c = '['; break;
 134         case '/': c = '\\'; maybe_concat_lines(); break;
 135         case ')': c = ']'; break;
 136         case '\'': c = '^'; break;
 137         case '<': c = '{'; break;
 138         case '!': c = '|'; break;
 139         case '>': c = '}'; break;
 140         case '-': c = '~'; break;
 141         default:
 142                 put_back('?');
 143                 put_back(c);
 144                 c = '?';
 145                 break;
 146         }
 147
 148 end_of_next_char:
 149 #endif
 150         (void) maybe_concat_lines;
 151 #ifdef DEBUG_CHARS
 152         printf("nchar '%c'\n", c);
 153 #else
 154         ;
 155 #endif
 156 }
 157
 158 #define SYMBOL_CHARS  \
 159         case 'a':         \
 160         case 'b':         \
 161         case 'c':         \
 162         case 'd':         \
 163         case 'e':         \
 164         case 'f':         \
 165         case 'g':         \
 166         case 'h':         \
 167         case 'i':         \
 168         case 'j':         \
 169         case 'k':         \
 170         case 'l':         \
 171         case 'm':         \
 172         case 'n':         \
 173         case 'o':         \
 174         case 'p':         \
 175         case 'q':         \
 176         case 'r':         \
 177         case 's':         \
 178         case 't':         \
 179         case 'u':         \
 180         case 'v':         \
 181         case 'w':         \
 182         case 'x':         \
 183         case 'y':         \
 184         case 'z':         \
 185         case 'A':         \
 186         case 'B':         \
 187         case 'C':         \
 188         case 'D':         \
 189         case 'E':         \
 190         case 'F':         \
 191         case 'G':         \
 192         case 'H':         \
 193         case 'I':         \
 194         case 'J':         \
 195         case 'K':         \
 196         case 'L':         \
 197         case 'M':         \
 198         case 'N':         \
 199         case 'O':         \
 200         case 'P':         \
 201         case 'Q':         \
 202         case 'R':         \
 203         case 'S':         \
 204         case 'T':         \
 205         case 'U':         \
 206         case 'V':         \
 207         case 'W':         \
 208         case 'X':         \
 209         case 'Y':         \
 210         case 'Z':         \
 211         case '_':
 212
 213 #define DIGITS        \
 214         case '0':         \
 215         case '1':         \
 216         case '2':         \
 217         case '3':         \
 218         case '4':         \
 219         case '5':         \
 220         case '6':         \
 221         case '7':         \
 222         case '8':         \
 223         case '9':
 224
 225 static void parse_symbol(void)
 226 {
 227         symbol_t *symbol;
 228         char     *string;
 229
 230         obstack_1grow(&symbol_obstack, c);
 231         next_char();
 232
 233         while(1) {
 234                 switch(c) {
 235                 DIGITS
 236                 SYMBOL_CHARS
 237                         obstack_1grow(&symbol_obstack, c);
 238                         next_char();
 239                         break;
 240
 241                 default:
 242                         goto end_symbol;
 243                 }
 244         }
 245
 246 end_symbol:
 247         obstack_1grow(&symbol_obstack, '\0');
 248
 249         string = obstack_finish(&symbol_obstack);
 250         symbol = symbol_table_insert(string);
 251
 252         lexer_token.type     = symbol->ID;
 253         lexer_token.v.symbol = symbol;
 254
 255         if(symbol->string != string) {
 256                 obstack_free(&symbol_obstack, string);
 257         }
 258 }
 259
 260 static void parse_integer_suffix(void)
 261 {
 262         if(c == 'U' || c == 'U') {
 263                 /* TODO do something with the suffixes... */
 264                 next_char();
 265                 if(c == 'L' || c == 'l') {
 266                         next_char();
 267                         if(c == 'L' || c == 'l') {
 268                                 next_char();
 269                         }
 270                 }
 271         } else if(c == 'l' || c == 'L') {
 272                 next_char();
 273                 if(c == 'l' || c == 'L') {
 274                         next_char();
 275                         if(c == 'u' || c == 'U') {
 276                                 next_char();
 277                         }
 278                 } else if(c == 'u' || c == 'U') {
 279                         next_char();
 280                 }
 281         }
 282 }
 283
 284 static void parse_floating_suffix(void)
 285 {
 286         switch(c) {
 287         /* TODO: do something usefull with the suffixes... */
 288         case 'f':
 289         case 'F':
 290         case 'l':
 291         case 'L':
 292                 next_char();
 293                 break;
 294         default:
 295                 break;
 296         }
 297 }
 298
 299 static void parse_number_hex(void)
 300 {
 301         assert(c == 'x' || c == 'X');
 302         next_char();
 303
 304         if (!isdigit(c) &&
 305                 !('A' <= c && c <= 'F') &&
 306                 !('a' <= c && c <= 'f')) {
 307                 parse_error("premature end of hex number literal");
 308                 lexer_token.type = T_ERROR;
 309                 return;
 310         }
 311
 312         int value = 0;
 313         while(1) {
 314                 if (isdigit(c)) {
 315                         value = 16 * value + c - '0';
 316                 } else if ('A' <= c && c <= 'F') {
 317                         value = 16 * value + c - 'A' + 10;
 318                 } else if ('a' <= c && c <= 'f') {
 319                         value = 16 * value + c - 'a' + 10;
 320                 } else {
 321                         parse_integer_suffix();
 322
 323                         lexer_token.type       = T_INTEGER;
 324                         lexer_token.v.intvalue = value;
 325                         return;
 326                 }
 327                 next_char();
 328         }
 329
 330         if(c == '.' || c == 'p' || c == 'P') {
 331                 next_char();
 332                 panic("Hex floating point numbers not implemented yet");
 333         }
 334 }
 335
 336 static void parse_number_oct(void)
 337 {
 338         int value = 0;
 339         while(c >= '0' && c <= '7') {
 340                 value = 8 * value + c - '0';
 341                 next_char();
 342         }
 343         if (c == '8' || c == '9') {
 344                 parse_error("invalid octal number");
 345                 lexer_token.type = T_ERROR;
 346                 return;
 347         }
 348
 349         lexer_token.type       = T_INTEGER;
 350         lexer_token.v.intvalue = value;
 351
 352         parse_integer_suffix();
 353 }
 354
 355 static void parse_floatingpoint_exponent(long double value)
 356 {
 357         unsigned int expo = 0;
 358         long double  factor = 10.;
 359
 360         if(c == '-') {
 361                 next_char();
 362                 factor = 0.1;
 363         } else if(c == '+') {
 364                 next_char();
 365         }
 366
 367         while(c >= '0' && c <= '9') {
 368                 expo = 10 * expo + (c - '0');
 369                 next_char();
 370         }
 371
 372         while(1) {
 373                 if(expo & 1)
 374                         value *= factor;
 375                 expo >>= 1;
 376                 if(expo == 0)
 377                         break;
 378                 factor *= factor;
 379         }
 380
 381         lexer_token.type         = T_FLOATINGPOINT;
 382         lexer_token.v.floatvalue = value;
 383
 384         parse_floating_suffix();
 385 }
 386
 387 static void parse_floatingpoint_fract(int integer_part)
 388 {
 389         long double value  = integer_part;
 390         long double factor = 1.;
 391
 392         while(c >= '0' && c <= '9') {
 393                 factor *= 0.1;
 394                 value  += (c - '0') * factor;
 395                 next_char();
 396         }
 397
 398         if(c == 'e' || c == 'E') {
 399                 next_char();
 400                 parse_floatingpoint_exponent(value);
 401                 return;
 402         }
 403
 404         lexer_token.type         = T_FLOATINGPOINT;
 405         lexer_token.v.floatvalue = value;
 406
 407         parse_floating_suffix();
 408 }
 409
 410 static void parse_number_dec(void)
 411 {
 412         int value = 0;
 413
 414         while(isdigit(c)) {
 415                 value = 10 * value + c - '0';
 416                 next_char();
 417         }
 418
 419         if(c == '.') {
 420                 next_char();
 421                 parse_floatingpoint_fract(value);
 422                 return;
 423         }
 424         if(c == 'e' || c == 'E') {
 425                 next_char();
 426                 parse_floatingpoint_exponent(value);
 427                 return;
 428         }
 429         parse_integer_suffix();
 430
 431         lexer_token.type       = T_INTEGER;
 432         lexer_token.v.intvalue = value;
 433 }
 434
 435 static void parse_number(void)
 436 {
 437         if (c == '0') {
 438                 next_char();
 439                 switch (c) {
 440                         case 'X':
 441                         case 'x':
 442                                 parse_number_hex();
 443                                 break;
 444                         case '0':
 445                         case '1':
 446                         case '2':
 447                         case '3':
 448                         case '4':
 449                         case '5':
 450                         case '6':
 451                         case '7':
 452                                 parse_number_oct();
 453                                 break;
 454                         case '.':
 455                                 next_char();
 456                                 parse_floatingpoint_fract(0);
 457                                 break;
 458                         case 'e':
 459                         case 'E':
 460                                 parse_floatingpoint_exponent(0);
 461                                 break;
 462                         case '8':
 463                         case '9':
 464                                 next_char();
 465                                 parse_error("invalid octal number");
 466                                 lexer_token.type = T_ERROR;
 467                                 return;
 468                         default:
 469                                 put_back(c);
 470                                 c = '0';
 471                                 parse_number_dec();
 472                                 return;
 473                 }
 474         } else {
 475                 parse_number_dec();
 476         }
 477 }
 478
 479 static int parse_octal_sequence(void)
 480 {
 481         int value = 0;
 482         while(1) {
 483                 if(c < '0' || c > '7')
 484                         break;
 485                 value = 8 * value + c - '0';
 486                 next_char();
 487         }
 488
 489         return value;
 490 }
 491
 492 static int parse_hex_sequence(void)
 493 {
 494         int value = 0;
 495         while(1) {
 496                 if (c >= '0' && c <= '9') {
 497                         value = 16 * value + c - '0';
 498                 } else if ('A' <= c && c <= 'F') {
 499                         value = 16 * value + c - 'A' + 10;
 500                 } else if ('a' <= c && c <= 'f') {
 501                         value = 16 * value + c - 'a' + 10;
 502                 } else {
 503                         break;
 504                 }
 505                 next_char();
 506         }
 507
 508         return value;
 509 }
 510
 511 static int parse_escape_sequence(void)
 512 {
 513         eat('\\');
 514
 515         int ec = c;
 516         next_char();
 517
 518         switch(ec) {
 519         case '"':  return '"';
 520         case '\'': return'\'';
 521         case '\\': return '\\';
 522         case '?': return '\?';
 523         case 'a': return '\a';
 524         case 'b': return '\b';
 525         case 'f': return '\f';
 526         case 'n': return '\n';
 527         case 'r': return '\r';
 528         case 't': return '\t';
 529         case 'v': return '\v';
 530         case 'x':
 531                 return parse_hex_sequence();
 532         case '0':
 533         case '1':
 534         case '2':
 535         case '3':
 536         case '4':
 537         case '5':
 538         case '6':
 539         case '7':
 540                 return parse_octal_sequence();
 541         case EOF:
 542                 parse_error("reached end of file while parsing escape sequence");
 543                 return EOF;
 544         default:
 545                 parse_error("unknown escape sequence");
 546                 return EOF;
 547         }
 548 }
 549
 550 const char *concat_strings(const char *s1, const char *s2)
 551 {
 552         size_t  len1   = strlen(s1);
 553         size_t  len2   = strlen(s2);
 554
 555         char   *concat = obstack_alloc(&symbol_obstack, len1 + len2 + 1);
 556         memcpy(concat, s1, len1);
 557         memcpy(concat + len1, s2, len2 + 1);
 558
 559         const char *result = strset_insert(&stringset, concat);
 560         if(result != concat) {
 561                 obstack_free(&symbol_obstack, concat);
 562         }
 563
 564         return result;
 565 }
 566
 567 static void parse_string_literal(void)
 568 {
 569         unsigned    start_linenr = lexer_token.source_position.linenr;
 570         char       *string;
 571         const char *result;
 572
 573         assert(c == '"');
 574         next_char();
 575
 576         int tc;
 577         while(1) {
 578                 switch(c) {
 579                 case '\\':
 580                         tc = parse_escape_sequence();
 581                         obstack_1grow(&symbol_obstack, tc);
 582                         break;
 583
 584                 case EOF:
 585                         error_prefix_at(lexer_token.source_position.input_name,
 586                                         start_linenr);
 587                         fprintf(stderr, "string has no end\n");
 588                         lexer_token.type = T_ERROR;
 589                         return;
 590
 591                 case '"':
 592                         next_char();
 593                         goto end_of_string;
 594
 595                 default:
 596                         obstack_1grow(&symbol_obstack, c);
 597                         next_char();
 598                         break;
 599                 }
 600         }
 601
 602 end_of_string:
 603
 604         /* TODO: concatenate multiple strings separated by whitespace... */
 605
 606         /* add finishing 0 to the string */
 607         obstack_1grow(&symbol_obstack, '\0');
 608         string = obstack_finish(&symbol_obstack);
 609
 610         /* check if there is already a copy of the string */
 611         result = strset_insert(&stringset, string);
 612         if(result != string) {
 613                 obstack_free(&symbol_obstack, string);
 614         }
 615
 616         lexer_token.type     = T_STRING_LITERAL;
 617         lexer_token.v.string = result;
 618 }
 619
 620 static void parse_character_constant(void)
 621 {
 622         eat('\'');
 623
 624         int found_char = 0;
 625         while(1) {
 626                 switch(c) {
 627                 case '\\':
 628                         found_char = parse_escape_sequence();
 629                         break;
 630
 631                 MATCH_NEWLINE(
 632                         parse_error("newline while parsing character constant");
 633                         break;
 634                 )
 635
 636                 case '\'':
 637                         next_char();
 638                         goto end_of_char_constant;
 639
 640                 case EOF:
 641                         parse_error("EOF while parsing character constant");
 642                         lexer_token.type = T_ERROR;
 643                         return;
 644
 645                 default:
 646                         if(found_char != 0) {
 647                                 parse_error("more than 1 characters in character "
 648                                             "constant");
 649                                 goto end_of_char_constant;
 650                         } else {
 651                                 found_char = c;
 652                                 next_char();
 653                         }
 654                         break;
 655                 }
 656         }
 657
 658 end_of_char_constant:
 659         lexer_token.type       = T_INTEGER;
 660         lexer_token.v.intvalue = found_char;
 661 }
 662
 663 static void skip_multiline_comment(void)
 664 {
 665         unsigned start_linenr = lexer_token.source_position.linenr;
 666
 667         while(1) {
 668                 switch(c) {
 669                 case '*':
 670                         next_char();
 671                         if(c == '/') {
 672                                 next_char();
 673                                 return;
 674                         }
 675                         break;
 676
 677                 MATCH_NEWLINE(break;)
 678
 679                 case EOF:
 680                         error_prefix_at(lexer_token.source_position.input_name,
 681                                         start_linenr);
 682                         fprintf(stderr, "at end of file while looking for comment end\n");
 683                         return;
 684
 685                 default:
 686                         next_char();
 687                         break;
 688                 }
 689         }
 690 }
 691
 692 static void skip_line_comment(void)
 693 {
 694         while(1) {
 695                 switch(c) {
 696                 case EOF:
 697                         return;
 698
 699                 case '\n':
 700                 case '\r':
 701                         return;
 702
 703                 default:
 704                         next_char();
 705                         break;
 706                 }
 707         }
 708 }
 709
 710 static token_t pp_token;
 711
 712 static inline void next_pp_token(void)
 713 {
 714         lexer_next_preprocessing_token();
 715         pp_token = lexer_token;
 716 }
 717
 718 static void eat_until_newline(void)
 719 {
 720         while(pp_token.type != '\n' && pp_token.type != T_EOF) {
 721                 next_pp_token();
 722         }
 723 }
 724
 725 static void error_directive(void)
 726 {
 727         error_prefix();
 728         fprintf(stderr, "#error directive: \n");
 729
 730         /* parse pp-tokens until new-line */
 731 }
 732
 733 static void define_directive(void)
 734 {
 735         lexer_next_preprocessing_token();
 736         if(lexer_token.type != T_IDENTIFIER) {
 737                 parse_error("expected identifier after #define\n");
 738                 eat_until_newline();
 739         }
 740 }
 741
 742 static void ifdef_directive(int is_ifndef)
 743 {
 744         (void) is_ifndef;
 745         lexer_next_preprocessing_token();
 746         //expect_identifier();
 747         //extect_newline();
 748 }
 749
 750 static void endif_directive(void)
 751 {
 752         //expect_newline();
 753 }
 754
 755 static void parse_line_directive(void)
 756 {
 757         if(pp_token.type != T_INTEGER) {
 758                 parse_error("expected integer");
 759         } else {
 760                 lexer_token.source_position.linenr = pp_token.v.intvalue - 1;
 761                 next_pp_token();
 762         }
 763         if(pp_token.type == T_STRING_LITERAL) {
 764                 lexer_token.source_position.input_name = pp_token.v.string;
 765                 next_pp_token();
 766         }
 767
 768         eat_until_newline();
 769 }
 770
 771 static void parse_preprocessor_identifier(void)
 772 {
 773         assert(pp_token.type == T_IDENTIFIER);
 774         symbol_t *symbol = pp_token.v.symbol;
 775
 776         switch(symbol->pp_ID) {
 777         case TP_include:
 778                 printf("include - enable header name parsing!\n");
 779                 break;
 780         case TP_define:
 781                 define_directive();
 782                 break;
 783         case TP_ifdef:
 784                 ifdef_directive(0);
 785                 break;
 786         case TP_ifndef:
 787                 ifdef_directive(1);
 788                 break;
 789         case TP_endif:
 790                 endif_directive();
 791                 break;
 792         case TP_line:
 793                 next_pp_token();
 794                 parse_line_directive();
 795                 break;
 796         case TP_if:
 797         case TP_else:
 798         case TP_elif:
 799         case TP_undef:
 800         case TP_error:
 801                 error_directive();
 802                 break;
 803         case TP_pragma:
 804                 break;
 805         }
 806 }
 807
 808 static void parse_preprocessor_directive(void)
 809 {
 810         next_pp_token();
 811
 812         switch(pp_token.type) {
 813         case T_IDENTIFIER:
 814                 parse_preprocessor_identifier();
 815                 break;
 816         case T_INTEGER:
 817                 parse_line_directive();
 818                 break;
 819         default:
 820                 parse_error("invalid preprocessor directive");
 821                 eat_until_newline();
 822                 break;
 823         }
 824 }
 825
 826 #define MAYBE_PROLOG                                       \
 827                         next_char();                                   \
 828                         while(1) {                                     \
 829                                 switch(c) {
 830
 831 #define MAYBE(ch, set_type)                                \
 832                                 case ch:                                   \
 833                                         next_char();                           \
 834                                         lexer_token.type = set_type;           \
 835                                         return;
 836
 837 #define ELSE_CODE(code)                                    \
 838                                 default:                                   \
 839                                         code;                                  \
 840                                 }                                          \
 841                         } /* end of while(1) */                        \
 842                         break;
 843
 844 #define ELSE(set_type)                                     \
 845                 ELSE_CODE(                                         \
 846                         lexer_token.type = set_type;                   \
 847                         return;                                        \
 848                 )
 849
 850 void lexer_next_preprocessing_token(void)
 851 {
 852         while(1) {
 853                 switch(c) {
 854                 case ' ':
 855                 case '\t':
 856                         next_char();
 857                         break;
 858
 859                 MATCH_NEWLINE(
 860                         lexer_token.type = '\n';
 861                         return;
 862                 )
 863
 864                 SYMBOL_CHARS
 865                         parse_symbol();
 866                         /* might be a wide string ( L"string" ) */
 867                         if(c == '"' && (lexer_token.type == T_IDENTIFIER &&
 868                            lexer_token.v.symbol == symbol_L)) {
 869                                 parse_string_literal();
 870                                 return;
 871                         }
 872                         return;
 873
 874                 DIGITS
 875                         parse_number();
 876                         return;
 877
 878                 case '"':
 879                         parse_string_literal();
 880                         return;
 881
 882                 case '\'':
 883                         parse_character_constant();
 884                         return;
 885
 886                 case '.':
 887                         MAYBE_PROLOG
 888                                 case '.':
 889                                         MAYBE_PROLOG
 890                                         MAYBE('.', T_DOTDOTDOT)
 891                                         ELSE_CODE(
 892                                                 put_back(c);
 893                                                 c = '.';
 894                                                 lexer_token.type = '.';
 895                                                 return;
 896                                         )
 897                         ELSE('.')
 898                 case '&':
 899                         MAYBE_PROLOG
 900                         MAYBE('&', T_ANDAND)
 901                         MAYBE('=', T_ANDEQUAL)
 902                         ELSE('&')
 903                 case '*':
 904                         MAYBE_PROLOG
 905                         MAYBE('=', T_ASTERISKEQUAL)
 906                         ELSE('*')
 907                 case '+':
 908                         MAYBE_PROLOG
 909                         MAYBE('+', T_PLUSPLUS)
 910                         MAYBE('=', T_PLUSEQUAL)
 911                         ELSE('+')
 912                 case '-':
 913                         MAYBE_PROLOG
 914                         MAYBE('>', T_MINUSGREATER)
 915                         MAYBE('-', T_MINUSMINUS)
 916                         MAYBE('=', T_MINUSEQUAL)
 917                         ELSE('-')
 918                 case '!':
 919                         MAYBE_PROLOG
 920                         MAYBE('=', T_EXCLAMATIONMARKEQUAL)
 921                         ELSE('!')
 922                 case '/':
 923                         MAYBE_PROLOG
 924                         MAYBE('=', T_SLASHEQUAL)
 925                                 case '*':
 926                                         next_char();
 927                                         skip_multiline_comment();
 928                                         lexer_next_preprocessing_token();
 929                                         return;
 930                                 case '/':
 931                                         next_char();
 932                                         skip_line_comment();
 933                                         lexer_next_preprocessing_token();
 934                                         return;
 935                         ELSE('/')
 936                 case '%':
 937                         MAYBE_PROLOG
 938                         MAYBE('>', T_PERCENTGREATER)
 939                         MAYBE('=', T_PERCENTEQUAL)
 940                                 case ':':
 941                                         MAYBE_PROLOG
 942                                                 case '%':
 943                                                         MAYBE_PROLOG
 944                                                         MAYBE(':', T_PERCENTCOLONPERCENTCOLON)
 945                                                         ELSE_CODE(
 946                                                                 put_back(c);
 947                                                                 c = '%';
 948                                                                 lexer_token.type = T_PERCENTCOLON;
 949                                                                 return;
 950                                                         )
 951                                         ELSE(T_PERCENTCOLON)
 952                         ELSE('%')
 953                 case '<':
 954                         MAYBE_PROLOG
 955                         MAYBE(':', T_LESSCOLON)
 956                         MAYBE('%', T_LESSPERCENT)
 957                         MAYBE('=', T_LESSEQUAL)
 958                                 case '<':
 959                                         MAYBE_PROLOG
 960                                         MAYBE('=', T_LESSLESSEQUAL)
 961                                         ELSE(T_LESSLESS)
 962                         ELSE('<')
 963                 case '>':
 964                         MAYBE_PROLOG
 965                         MAYBE('=', T_GREATEREQUAL)
 966                                 case '>':
 967                                         MAYBE_PROLOG
 968                                         MAYBE('=', T_GREATERGREATEREQUAL)
 969                                         ELSE(T_GREATERGREATER)
 970                         ELSE('>')
 971                 case '^':
 972                         MAYBE_PROLOG
 973                         MAYBE('=', T_CARETEQUAL)
 974                         ELSE('^')
 975                 case '|':
 976                         MAYBE_PROLOG
 977                         MAYBE('=', T_PIPEEQUAL)
 978                         MAYBE('|', T_PIPEPIPE)
 979                         ELSE('|')
 980                 case ':':
 981                         MAYBE_PROLOG
 982                         MAYBE('>', T_COLONGREATER)
 983                         ELSE(':')
 984                 case '=':
 985                         MAYBE_PROLOG
 986                         MAYBE('=', T_EQUALEQUAL)
 987                         ELSE('=')
 988                 case '#':
 989                         MAYBE_PROLOG
 990                         MAYBE('#', T_HASHHASH)
 991                         ELSE('#')
 992
 993                 case '?':
 994                 case '[':
 995                 case ']':
 996                 case '(':
 997                 case ')':
 998                 case '{':
 999                 case '}':
1000                 case '~':
1001                 case ';':
1002                 case ',':
1003                 case '\\':
1004                         lexer_token.type = c;
1005                         next_char();
1006                         return;
1007
1008                 case EOF:
1009                         lexer_token.type = T_EOF;
1010                         return;
1011
1012                 default:
1013                         next_char();
1014                         error_prefix();
1015                         fprintf(stderr, "unknown character '%c' found\n", c);
1016                         lexer_token.type = T_ERROR;
1017                         return;
1018                 }
1019         }
1020 }
1021
1022 void lexer_next_token(void)
1023 {
1024         lexer_next_preprocessing_token();
1025         if(lexer_token.type != '\n')
1026                 return;
1027
1028 newline_found:
1029         do {
1030                 lexer_next_preprocessing_token();
1031         } while(lexer_token.type == '\n');
1032
1033         if(lexer_token.type == '#') {
1034                 parse_preprocessor_directive();
1035                 goto newline_found;
1036         }
1037 }
1038
1039 void init_lexer(void)
1040 {
1041         strset_init(&stringset);
1042 }
1043
1044 void lexer_open_stream(FILE *stream, const char *input_name)
1045 {
1046         input                                  = stream;
1047         lexer_token.source_position.linenr     = 0;
1048         lexer_token.source_position.input_name = input_name;
1049
1050         symbol_L = symbol_table_insert("L");
1051
1052         /* place a virtual \n at the beginning so the lexer knows that we're
1053          * at the beginning of a line */
1054         c = '\n';
1055 }
1056
1057 void exit_lexer(void)
1058 {
1059         strset_destroy(&stringset);
1060 }
1061
1062 static __attribute__((unused))
1063 void dbg_pos(const source_position_t source_position)
1064 {
1065         fprintf(stdout, "%s:%d\n", source_position.input_name,
1066                 source_position.linenr);
1067         fflush(stdout);
1068 }