nsz Git - cparser/blob - lexer.c

   1 #include <config.h>
   2
   3 #include "lexer.h"
   4 #include "token_t.h"
   5 #include "symbol_table_t.h"
   6 #include "adt/error.h"
   7 #include "adt/strset.h"
   8 #include "adt/util.h"
   9
  10 #include <assert.h>
  11 #include <errno.h>
  12 #include <string.h>
  13 #include <ctype.h>
  14
  15 //#define DEBUG_CHARS
  16 #define MAX_PUTBACK 3
  17
  18 static int         c;
  19 token_t            lexer_token;
  20 static FILE       *input;
  21 static char        buf[1024 + MAX_PUTBACK];
  22 static const char *bufend;
  23 static const char *bufpos;
  24 static strset_t    stringset;
  25
  26 static void error_prefix_at(const char *input_name, unsigned linenr)
  27 {
  28         fprintf(stderr, "%s:%u: Error: ", input_name, linenr);
  29 }
  30
  31 static void error_prefix(void)
  32 {
  33         error_prefix_at(lexer_token.source_position.input_name,
  34                         lexer_token.source_position.linenr);
  35 }
  36
  37 static void parse_error(const char *msg)
  38 {
  39         error_prefix();
  40         fprintf(stderr, "%s\n", msg);
  41 }
  42
  43 static inline void next_real_char(void)
  44 {
  45         bufpos++;
  46         if(bufpos >= bufend) {
  47                 size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
  48                                  input);
  49                 if(s == 0) {
  50                         c = EOF;
  51                         return;
  52                 }
  53                 bufpos = buf + MAX_PUTBACK;
  54                 bufend = buf + MAX_PUTBACK + s;
  55         }
  56         c = *(bufpos);
  57 }
  58
  59 static inline void put_back(int pc)
  60 {
  61         assert(bufpos >= buf);
  62         assert(bufpos < buf+MAX_PUTBACK || *bufpos == pc);
  63
  64         char *p = buf + (bufpos - buf);
  65         *p = pc;
  66
  67         /* going backwards in the buffer is legal as long as it's not more often
  68          * than MAX_PUTBACK */
  69         bufpos--;
  70
  71 #ifdef DEBUG_CHARS
  72         printf("putback '%c'\n", pc);
  73 #endif
  74 }
  75
  76 static inline void next_char(void);
  77
  78 #define MATCH_NEWLINE(code)                   \
  79         case '\r':                                \
  80                 next_char();                          \
  81                 if(c == '\n') {                       \
  82                         next_char();                      \
  83                 }                                     \
  84                 lexer_token.source_position.linenr++; \
  85                 code;                                 \
  86         case '\n':                                \
  87                 next_char();                          \
  88                 lexer_token.source_position.linenr++; \
  89                 code;
  90
  91 static inline void eat(char c_type)
  92 {
  93         assert(c == c_type);
  94         next_char();
  95 }
  96
  97 static void maybe_concat_lines(void)
  98 {
  99         eat('\\');
 100
 101         switch(c) {
 102         MATCH_NEWLINE(return;)
 103
 104         default:
 105                 break;
 106         }
 107
 108         put_back(c);
 109         c = '\\';
 110 }
 111
 112 static inline void next_char(void)
 113 {
 114         next_real_char();
 115
 116         /* filter trigraphs */
 117         if(UNLIKELY(c == '\\')) {
 118                 maybe_concat_lines();
 119                 goto end_of_next_char;
 120         }
 121
 122         if(LIKELY(c != '?'))
 123                 goto end_of_next_char;
 124
 125         next_real_char();
 126         if(LIKELY(c != '?')) {
 127                 put_back(c);
 128                 c = '?';
 129                 goto end_of_next_char;
 130         }
 131
 132         next_real_char();
 133         switch(c) {
 134         case '=': c = '#'; break;
 135         case '(': c = '['; break;
 136         case '/': c = '\\'; maybe_concat_lines(); break;
 137         case ')': c = ']'; break;
 138         case '\'': c = '^'; break;
 139         case '<': c = '{'; break;
 140         case '!': c = '|'; break;
 141         case '>': c = '}'; break;
 142         case '-': c = '~'; break;
 143         default:
 144                 put_back('?');
 145                 put_back(c);
 146                 c = '?';
 147                 break;
 148         }
 149
 150 end_of_next_char:
 151 #ifdef DEBUG_CHARS
 152         printf("nchar '%c'\n", c);
 153 #else
 154         ;
 155 #endif
 156 }
 157
 158 #define SYMBOL_CHARS  \
 159         case 'a':         \
 160         case 'b':         \
 161         case 'c':         \
 162         case 'd':         \
 163         case 'e':         \
 164         case 'f':         \
 165         case 'g':         \
 166         case 'h':         \
 167         case 'i':         \
 168         case 'j':         \
 169         case 'k':         \
 170         case 'l':         \
 171         case 'm':         \
 172         case 'n':         \
 173         case 'o':         \
 174         case 'p':         \
 175         case 'q':         \
 176         case 'r':         \
 177         case 's':         \
 178         case 't':         \
 179         case 'u':         \
 180         case 'v':         \
 181         case 'w':         \
 182         case 'x':         \
 183         case 'y':         \
 184         case 'z':         \
 185         case 'A':         \
 186         case 'B':         \
 187         case 'C':         \
 188         case 'D':         \
 189         case 'E':         \
 190         case 'F':         \
 191         case 'G':         \
 192         case 'H':         \
 193         case 'I':         \
 194         case 'J':         \
 195         case 'K':         \
 196         case 'L':         \
 197         case 'M':         \
 198         case 'N':         \
 199         case 'O':         \
 200         case 'P':         \
 201         case 'Q':         \
 202         case 'R':         \
 203         case 'S':         \
 204         case 'T':         \
 205         case 'U':         \
 206         case 'V':         \
 207         case 'W':         \
 208         case 'X':         \
 209         case 'Y':         \
 210         case 'Z':         \
 211         case '_':
 212
 213 #define DIGITS        \
 214         case '0':         \
 215         case '1':         \
 216         case '2':         \
 217         case '3':         \
 218         case '4':         \
 219         case '5':         \
 220         case '6':         \
 221         case '7':         \
 222         case '8':         \
 223         case '9':
 224
 225 static void parse_symbol(void)
 226 {
 227         symbol_t *symbol;
 228         char     *string;
 229
 230         obstack_1grow(&symbol_obstack, c);
 231         next_char();
 232
 233         while(1) {
 234                 switch(c) {
 235                 DIGITS
 236                 SYMBOL_CHARS
 237                         obstack_1grow(&symbol_obstack, c);
 238                         next_char();
 239                         break;
 240
 241                 default:
 242                         goto end_symbol;
 243                 }
 244         }
 245
 246 end_symbol:
 247         obstack_1grow(&symbol_obstack, '\0');
 248
 249         string = obstack_finish(&symbol_obstack);
 250         symbol = symbol_table_insert(string);
 251
 252         lexer_token.type     = symbol->ID;
 253         lexer_token.v.symbol = symbol;
 254
 255         if(symbol->string != string) {
 256                 obstack_free(&symbol_obstack, string);
 257         }
 258 }
 259
 260 static void parse_integer_suffix(void)
 261 {
 262         if(c == 'U' || c == 'U') {
 263                 /* TODO do something with the suffixes... */
 264                 next_char();
 265                 if(c == 'L' || c == 'l') {
 266                         next_char();
 267                         if(c == 'L' || c == 'l') {
 268                                 next_char();
 269                         }
 270                 }
 271         } else if(c == 'l' || c == 'L') {
 272                 next_char();
 273                 if(c == 'l' || c == 'L') {
 274                         next_char();
 275                         if(c == 'u' || c == 'U') {
 276                                 next_char();
 277                         }
 278                 } else if(c == 'u' || c == 'U') {
 279                         next_char();
 280                 }
 281         }
 282 }
 283
 284 static void parse_number_hex(void)
 285 {
 286         assert(c == 'x' || c == 'X');
 287         next_char();
 288
 289         if (!isdigit(c) &&
 290                 !('A' <= c && c <= 'F') &&
 291                 !('a' <= c && c <= 'f')) {
 292                 parse_error("premature end of hex number literal");
 293                 lexer_token.type = T_ERROR;
 294                 return;
 295         }
 296
 297         int value = 0;
 298         while(1) {
 299                 if (isdigit(c)) {
 300                         value = 16 * value + c - '0';
 301                 } else if ('A' <= c && c <= 'F') {
 302                         value = 16 * value + c - 'A' + 10;
 303                 } else if ('a' <= c && c <= 'f') {
 304                         value = 16 * value + c - 'a' + 10;
 305                 } else {
 306                         parse_integer_suffix();
 307
 308                         lexer_token.type       = T_INTEGER;
 309                         lexer_token.v.intvalue = value;
 310                         return;
 311                 }
 312                 next_char();
 313         }
 314
 315         if(c == '.' || c == 'p' || c == 'P') {
 316                 next_char();
 317                 panic("Hex floating point numbers not implemented yet");
 318         }
 319 }
 320
 321 static void parse_number_oct(void)
 322 {
 323         int value = 0;
 324         while(c >= '0' && c <= '7') {
 325                 value = 8 * value + c - '0';
 326                 next_char();
 327         }
 328         if (c == '8' || c == '9') {
 329                 parse_error("invalid octal number");
 330                 lexer_token.type = T_ERROR;
 331                 return;
 332         }
 333
 334         lexer_token.type       = T_INTEGER;
 335         lexer_token.v.intvalue = value;
 336
 337         parse_integer_suffix();
 338 }
 339
 340 static void parse_floatingpoint_exponent(long double value)
 341 {
 342         unsigned int expo = 0;
 343         long double  factor = 10.;
 344
 345         if(c == '-') {
 346                 next_char();
 347                 factor = 0.1;
 348         } else if(c == '+') {
 349                 next_char();
 350         }
 351
 352         while(c >= '0' && c <= '9') {
 353                 expo = 10 * expo + (c - '0');
 354                 next_char();
 355         }
 356
 357         while(1) {
 358                 if(expo & 1)
 359                         value *= factor;
 360                 expo >>= 1;
 361                 if(expo == 0)
 362                         break;
 363                 factor *= factor;
 364         }
 365
 366         lexer_token.type         = T_FLOATINGPOINT;
 367         lexer_token.v.floatvalue = value;
 368 }
 369
 370 static void parse_floatingpoint_fract(int integer_part)
 371 {
 372         long double value  = integer_part;
 373         long double factor = 1.;
 374
 375         while(c >= '0' && c <= '9') {
 376                 factor *= 0.1;
 377                 value  += (c - '0') * factor;
 378                 next_char();
 379         }
 380
 381         if(c == 'e' || c == 'E') {
 382                 next_char();
 383                 parse_floatingpoint_exponent(value);
 384                 return;
 385         }
 386
 387         lexer_token.type         = T_FLOATINGPOINT;
 388         lexer_token.v.floatvalue = value;
 389 }
 390
 391 static void parse_number_dec(void)
 392 {
 393         int value = 0;
 394
 395         while(isdigit(c)) {
 396                 value = 10 * value + c - '0';
 397                 next_char();
 398         }
 399
 400         if(c == '.') {
 401                 next_char();
 402                 parse_floatingpoint_fract(value);
 403                 return;
 404         }
 405         if(c == 'e' || c == 'E') {
 406                 next_char();
 407                 parse_floatingpoint_exponent(value);
 408                 return;
 409         }
 410         parse_integer_suffix();
 411
 412         lexer_token.type       = T_INTEGER;
 413         lexer_token.v.intvalue = value;
 414 }
 415
 416 static void parse_number(void)
 417 {
 418         if (c == '0') {
 419                 next_char();
 420                 switch (c) {
 421                         case 'X':
 422                         case 'x':
 423                                 parse_number_hex();
 424                                 break;
 425                         case '0':
 426                         case '1':
 427                         case '2':
 428                         case '3':
 429                         case '4':
 430                         case '5':
 431                         case '6':
 432                         case '7':
 433                                 parse_number_oct();
 434                                 break;
 435                         case '.':
 436                                 next_char();
 437                                 parse_floatingpoint_fract(0);
 438                                 break;
 439                         case 'e':
 440                         case 'E':
 441                                 parse_floatingpoint_exponent(0);
 442                                 break;
 443                         case '8':
 444                         case '9':
 445                                 next_char();
 446                                 parse_error("invalid octal number");
 447                                 lexer_token.type = T_ERROR;
 448                                 return;
 449                         default:
 450                                 put_back(c);
 451                                 c = '0';
 452                                 parse_number_dec();
 453                                 return;
 454                 }
 455         } else {
 456                 parse_number_dec();
 457         }
 458 }
 459
 460 static int parse_octal_sequence(void)
 461 {
 462         int value = 0;
 463         while(1) {
 464                 if(c < '0' || c > '7')
 465                         break;
 466                 value = 8 * value + c - '0';
 467                 next_char();
 468         }
 469
 470         return value;
 471 }
 472
 473 static int parse_hex_sequence(void)
 474 {
 475         int value = 0;
 476         while(1) {
 477                 if (c >= '0' && c <= '9') {
 478                         value = 16 * value + c - '0';
 479                 } else if ('A' <= c && c <= 'F') {
 480                         value = 16 * value + c - 'A' + 10;
 481                 } else if ('a' <= c && c <= 'f') {
 482                         value = 16 * value + c - 'a' + 10;
 483                 } else {
 484                         break;
 485                 }
 486                 next_char();
 487         }
 488
 489         return value;
 490 }
 491
 492 static int parse_escape_sequence(void)
 493 {
 494         eat('\\');
 495
 496         int ec = c;
 497         next_char();
 498
 499         switch(ec) {
 500         case '"':  return '"';
 501         case '\'': return'\'';
 502         case '\\': return '\\';
 503         case '?': return '\?';
 504         case 'a': return '\a';
 505         case 'b': return '\b';
 506         case 'f': return '\f';
 507         case 'n': return '\n';
 508         case 'r': return '\r';
 509         case 't': return '\t';
 510         case 'v': return '\v';
 511         case 'x':
 512                 return parse_hex_sequence();
 513         case '0':
 514         case '1':
 515         case '2':
 516         case '3':
 517         case '4':
 518         case '5':
 519         case '6':
 520         case '7':
 521                 return parse_octal_sequence();
 522         case EOF:
 523                 parse_error("reached end of file while parsing escape sequence");
 524                 return EOF;
 525         default:
 526                 parse_error("unknown escape sequence");
 527                 return EOF;
 528         }
 529 }
 530
 531 const char *concat_strings(const char *s1, const char *s2)
 532 {
 533         size_t  len1   = strlen(s1);
 534         size_t  len2   = strlen(s2);
 535
 536         char   *concat = obstack_alloc(&symbol_obstack, len1 + len2 + 1);
 537         memcpy(concat, s1, len1);
 538         memcpy(concat + len1, s2, len2 + 1);
 539
 540         const char *result = strset_insert(&stringset, concat);
 541         if(result != concat) {
 542                 obstack_free(&symbol_obstack, concat);
 543         }
 544
 545         return result;
 546 }
 547
 548 static void parse_string_literal(void)
 549 {
 550         unsigned    start_linenr = lexer_token.source_position.linenr;
 551         char       *string;
 552         const char *result;
 553
 554         assert(c == '"');
 555         next_char();
 556
 557         int tc;
 558         while(1) {
 559                 switch(c) {
 560                 case '\\':
 561                         tc = parse_escape_sequence();
 562                         obstack_1grow(&symbol_obstack, tc);
 563                         break;
 564
 565                 case EOF:
 566                         error_prefix_at(lexer_token.source_position.input_name,
 567                                         start_linenr);
 568                         fprintf(stderr, "string has no end\n");
 569                         lexer_token.type = T_ERROR;
 570                         return;
 571
 572                 case '"':
 573                         next_char();
 574                         goto end_of_string;
 575
 576                 default:
 577                         obstack_1grow(&symbol_obstack, c);
 578                         next_char();
 579                         break;
 580                 }
 581         }
 582
 583 end_of_string:
 584
 585         /* TODO: concatenate multiple strings separated by whitespace... */
 586
 587         /* add finishing 0 to the string */
 588         obstack_1grow(&symbol_obstack, '\0');
 589         string = obstack_finish(&symbol_obstack);
 590
 591         /* check if there is already a copy of the string */
 592         result = strset_insert(&stringset, string);
 593         if(result != string) {
 594                 obstack_free(&symbol_obstack, string);
 595         }
 596
 597         lexer_token.type     = T_STRING_LITERAL;
 598         lexer_token.v.string = result;
 599 }
 600
 601 static void parse_character_constant(void)
 602 {
 603         eat('\'');
 604
 605         int found_char = 0;
 606         while(1) {
 607                 switch(c) {
 608                 case '\\':
 609                         found_char = parse_escape_sequence();
 610                         break;
 611
 612                 MATCH_NEWLINE(
 613                         parse_error("newline while parsing character constant");
 614                         break;
 615                 )
 616
 617                 case '\'':
 618                         next_char();
 619                         goto end_of_char_constant;
 620
 621                 case EOF:
 622                         parse_error("EOF while parsing character constant");
 623                         lexer_token.type = T_ERROR;
 624                         return;
 625
 626                 default:
 627                         if(found_char != 0) {
 628                                 parse_error("more than 1 characters in character "
 629                                             "constant");
 630                                 goto end_of_char_constant;
 631                         } else {
 632                                 found_char = c;
 633                                 next_char();
 634                         }
 635                         break;
 636                 }
 637         }
 638
 639 end_of_char_constant:
 640         lexer_token.type       = T_INTEGER;
 641         lexer_token.v.intvalue = found_char;
 642 }
 643
 644 static void skip_multiline_comment(void)
 645 {
 646         unsigned start_linenr = lexer_token.source_position.linenr;
 647
 648         while(1) {
 649                 switch(c) {
 650                 case '*':
 651                         next_char();
 652                         if(c == '/') {
 653                                 next_char();
 654                                 return;
 655                         }
 656                         break;
 657
 658                 MATCH_NEWLINE(break;)
 659
 660                 case EOF:
 661                         error_prefix_at(lexer_token.source_position.input_name,
 662                                         start_linenr);
 663                         fprintf(stderr, "at end of file while looking for comment end\n");
 664                         return;
 665
 666                 default:
 667                         next_char();
 668                         break;
 669                 }
 670         }
 671 }
 672
 673 static void skip_line_comment(void)
 674 {
 675         while(1) {
 676                 switch(c) {
 677                 case EOF:
 678                         return;
 679
 680                 case '\n':
 681                 case '\r':
 682                         return;
 683
 684                 default:
 685                         next_char();
 686                         break;
 687                 }
 688         }
 689 }
 690
 691 static token_t pp_token;
 692
 693 static inline void next_pp_token(void)
 694 {
 695         lexer_next_preprocessing_token();
 696         pp_token = lexer_token;
 697 }
 698
 699 static void eat_until_newline(void)
 700 {
 701         while(pp_token.type != '\n' && pp_token.type != T_EOF) {
 702                 next_pp_token();
 703         }
 704 }
 705
 706 static void error_directive(void)
 707 {
 708         error_prefix();
 709         fprintf(stderr, "#error directive: \n");
 710
 711         /* parse pp-tokens until new-line */
 712 }
 713
 714 static void define_directive(void)
 715 {
 716         lexer_next_preprocessing_token();
 717         if(lexer_token.type != T_IDENTIFIER) {
 718                 parse_error("expected identifier after #define\n");
 719                 eat_until_newline();
 720         }
 721 }
 722
 723 static void ifdef_directive(int is_ifndef)
 724 {
 725         (void) is_ifndef;
 726         lexer_next_preprocessing_token();
 727         //expect_identifier();
 728         //extect_newline();
 729 }
 730
 731 static void endif_directive(void)
 732 {
 733         //expect_newline();
 734 }
 735
 736 static void parse_line_directive(void)
 737 {
 738         if(pp_token.type != T_INTEGER) {
 739                 parse_error("expected integer");
 740         } else {
 741                 lexer_token.source_position.linenr = pp_token.v.intvalue - 1;
 742                 next_pp_token();
 743         }
 744         if(pp_token.type == T_STRING_LITERAL) {
 745                 lexer_token.source_position.input_name = pp_token.v.string;
 746                 next_pp_token();
 747         }
 748
 749         eat_until_newline();
 750 }
 751
 752 static void parse_preprocessor_identifier(void)
 753 {
 754         assert(pp_token.type == T_IDENTIFIER);
 755         symbol_t *symbol = pp_token.v.symbol;
 756
 757         switch(symbol->pp_ID) {
 758         case TP_include:
 759                 printf("include - enable header name parsing!\n");
 760                 break;
 761         case TP_define:
 762                 define_directive();
 763                 break;
 764         case TP_ifdef:
 765                 ifdef_directive(0);
 766                 break;
 767         case TP_ifndef:
 768                 ifdef_directive(1);
 769                 break;
 770         case TP_endif:
 771                 endif_directive();
 772                 break;
 773         case TP_line:
 774                 next_pp_token();
 775                 parse_line_directive();
 776                 break;
 777         case TP_if:
 778         case TP_else:
 779         case TP_elif:
 780         case TP_undef:
 781         case TP_error:
 782                 error_directive();
 783                 break;
 784         case TP_pragma:
 785                 break;
 786         }
 787 }
 788
 789 static void parse_preprocessor_directive()
 790 {
 791         next_pp_token();
 792
 793         switch(pp_token.type) {
 794         case T_IDENTIFIER:
 795                 parse_preprocessor_identifier();
 796                 break;
 797         case T_INTEGER:
 798                 parse_line_directive();
 799                 break;
 800         default:
 801                 parse_error("invalid preprocessor directive");
 802                 eat_until_newline();
 803                 break;
 804         }
 805 }
 806
 807 #define MAYBE_PROLOG                                       \
 808                         next_char();                                   \
 809                         while(1) {                                     \
 810                                 switch(c) {
 811
 812 #define MAYBE(ch, set_type)                                \
 813                                 case ch:                                   \
 814                                         next_char();                           \
 815                                         lexer_token.type = set_type;           \
 816                                         return;
 817
 818 #define ELSE_CODE(code)                                    \
 819                                 default:                                   \
 820                                         code;                                  \
 821                                 }                                          \
 822                         } /* end of while(1) */                        \
 823                         break;
 824
 825 #define ELSE(set_type)                                     \
 826                 ELSE_CODE(                                         \
 827                         lexer_token.type = set_type;                   \
 828                         return;                                        \
 829                 )
 830
 831 void lexer_next_preprocessing_token(void)
 832 {
 833         while(1) {
 834                 switch(c) {
 835                 case ' ':
 836                 case '\t':
 837                         next_char();
 838                         break;
 839
 840                 MATCH_NEWLINE(
 841                         lexer_token.type = '\n';
 842                         return;
 843                 )
 844
 845                 SYMBOL_CHARS
 846                         parse_symbol();
 847                         return;
 848
 849                 DIGITS
 850                         parse_number();
 851                         return;
 852
 853                 case '"':
 854                         parse_string_literal();
 855                         return;
 856
 857                 case '\'':
 858                         parse_character_constant();
 859                         return;
 860
 861                 case '.':
 862                         MAYBE_PROLOG
 863                                 case '.':
 864                                         MAYBE_PROLOG
 865                                         MAYBE('.', T_DOTDOTDOT)
 866                                         ELSE_CODE(
 867                                                 put_back(c);
 868                                                 c = '.';
 869                                                 lexer_token.type = '.';
 870                                                 return;
 871                                         )
 872                         ELSE('.')
 873                 case '&':
 874                         MAYBE_PROLOG
 875                         MAYBE('&', T_ANDAND)
 876                         MAYBE('=', T_ANDEQUAL)
 877                         ELSE('&')
 878                 case '*':
 879                         MAYBE_PROLOG
 880                         MAYBE('=', T_ASTERISKEQUAL)
 881                         ELSE('*')
 882                 case '+':
 883                         MAYBE_PROLOG
 884                         MAYBE('+', T_PLUSPLUS)
 885                         MAYBE('=', T_PLUSEQUAL)
 886                         ELSE('+')
 887                 case '-':
 888                         MAYBE_PROLOG
 889                         MAYBE('>', T_MINUSGREATER)
 890                         MAYBE('-', T_MINUSMINUS)
 891                         MAYBE('=', T_MINUSEQUAL)
 892                         ELSE('-')
 893                 case '!':
 894                         MAYBE_PROLOG
 895                         MAYBE('=', T_EXCLAMATIONMARKEQUAL)
 896                         ELSE('!')
 897                 case '/':
 898                         MAYBE_PROLOG
 899                         MAYBE('=', T_SLASHEQUAL)
 900                                 case '*':
 901                                         next_char();
 902                                         skip_multiline_comment();
 903                                         lexer_next_preprocessing_token();
 904                                         return;
 905                                 case '/':
 906                                         next_char();
 907                                         skip_line_comment();
 908                                         lexer_next_preprocessing_token();
 909                                         return;
 910                         ELSE('/')
 911                 case '%':
 912                         MAYBE_PROLOG
 913                         MAYBE('>', T_PERCENTGREATER)
 914                         MAYBE('=', T_PERCENTEQUAL)
 915                                 case ':':
 916                                         MAYBE_PROLOG
 917                                                 case '%':
 918                                                         MAYBE_PROLOG
 919                                                         MAYBE(':', T_PERCENTCOLONPERCENTCOLON)
 920                                                         ELSE_CODE(
 921                                                                 put_back(c);
 922                                                                 c = '%';
 923                                                                 lexer_token.type = T_PERCENTCOLON;
 924                                                                 return;
 925                                                         )
 926                                         ELSE(T_PERCENTCOLON)
 927                         ELSE('%')
 928                 case '<':
 929                         MAYBE_PROLOG
 930                         MAYBE(':', T_LESSCOLON)
 931                         MAYBE('%', T_LESSPERCENT)
 932                         MAYBE('=', T_LESSEQUAL)
 933                                 case '<':
 934                                         MAYBE_PROLOG
 935                                         MAYBE('=', T_LESSLESSEQUAL)
 936                                         ELSE(T_LESSLESS)
 937                         ELSE('<')
 938                 case '>':
 939                         MAYBE_PROLOG
 940                         MAYBE('=', T_GREATEREQUAL)
 941                                 case '>':
 942                                         MAYBE_PROLOG
 943                                         MAYBE('=', T_GREATERGREATEREQUAL)
 944                                         ELSE(T_GREATERGREATER)
 945                         ELSE('>')
 946                 case '^':
 947                         MAYBE_PROLOG
 948                         MAYBE('=', T_CARETEQUAL)
 949                         ELSE('^')
 950                 case '|':
 951                         MAYBE_PROLOG
 952                         MAYBE('=', T_PIPEEQUAL)
 953                         MAYBE('|', T_PIPEPIPE)
 954                         ELSE('|')
 955                 case ':':
 956                         MAYBE_PROLOG
 957                         MAYBE('>', T_COLONGREATER)
 958                         ELSE(':')
 959                 case '=':
 960                         MAYBE_PROLOG
 961                         MAYBE('=', T_EQUALEQUAL)
 962                         ELSE('=')
 963                 case '#':
 964                         MAYBE_PROLOG
 965                         MAYBE('#', T_HASHHASH)
 966                         ELSE('#')
 967
 968                 case '?':
 969                 case '[':
 970                 case ']':
 971                 case '(':
 972                 case ')':
 973                 case '{':
 974                 case '}':
 975                 case '~':
 976                 case ';':
 977                 case ',':
 978                 case '\\':
 979                         lexer_token.type = c;
 980                         next_char();
 981                         return;
 982
 983                 case EOF:
 984                         lexer_token.type = T_EOF;
 985                         return;
 986
 987                 default:
 988                         next_char();
 989                         error_prefix();
 990                         fprintf(stderr, "unknown character '%c' found\n", c);
 991                         lexer_token.type = T_ERROR;
 992                         return;
 993                 }
 994         }
 995 }
 996
 997 void lexer_next_token(void)
 998 {
 999         lexer_next_preprocessing_token();
1000         if(lexer_token.type != '\n')
1001                 return;
1002
1003 newline_found:
1004         do {
1005                 lexer_next_preprocessing_token();
1006         } while(lexer_token.type == '\n');
1007
1008         if(lexer_token.type == '#') {
1009                 parse_preprocessor_directive();
1010                 goto newline_found;
1011         }
1012 }
1013
1014 void init_lexer(void)
1015 {
1016         strset_init(&stringset);
1017 }
1018
1019 void lexer_open_stream(FILE *stream, const char *input_name)
1020 {
1021         input                                  = stream;
1022         lexer_token.source_position.linenr     = 1;
1023         lexer_token.source_position.input_name = input_name;
1024
1025         next_char();
1026 }
1027
1028 void exit_lexer(void)
1029 {
1030         strset_destroy(&stringset);
1031 }
1032
1033 static __attribute__((unused))
1034 void dbg_pos(const source_position_t source_position)
1035 {
1036         fprintf(stdout, "%s:%d\n", source_position.input_name,
1037                 source_position.linenr);
1038         fflush(stdout);
1039 }