nsz Git - cparser/blob - lexer.c

   1 #include <config.h>
   2
   3 #include "lexer.h"
   4 #include "token_t.h"
   5 #include "symbol_table_t.h"
   6 #include "adt/error.h"
   7 #include "adt/strset.h"
   8 #include "adt/util.h"
   9
  10 #include <assert.h>
  11 #include <errno.h>
  12 #include <string.h>
  13 #include <ctype.h>
  14
  15 #define DEBUG_CHARS
  16 #define MAX_PUTBACK 3
  17
  18 static int         c;
  19 token_t            lexer_token;
  20 static FILE       *input;
  21 static char        buf[1024 + MAX_PUTBACK];
  22 static const char *bufend;
  23 static const char *bufpos;
  24 static strset_t    stringset;
  25 //static FILE      **input_stack;
  26 //static char      **buf_stack;
  27
  28 static void error_prefix_at(const char *input_name, unsigned linenr)
  29 {
  30         fprintf(stderr, "%s:%u: Error: ", input_name, linenr);
  31 }
  32
  33 static void error_prefix(void)
  34 {
  35         error_prefix_at(lexer_token.source_position.input_name,
  36                         lexer_token.source_position.linenr);
  37 }
  38
  39 static void parse_error(const char *msg)
  40 {
  41         error_prefix();
  42         fprintf(stderr, "%s\n", msg);
  43 }
  44
  45 static inline void next_real_char(void)
  46 {
  47         bufpos++;
  48         if(bufpos >= bufend) {
  49                 size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
  50                                  input);
  51                 if(s == 0) {
  52                         c = EOF;
  53                         return;
  54                 }
  55                 bufpos = buf + MAX_PUTBACK;
  56                 bufend = buf + MAX_PUTBACK + s;
  57         }
  58         c = *(bufpos);
  59 }
  60
  61 static inline void put_back(int pc)
  62 {
  63         char *p = (char*) bufpos - 1;
  64         bufpos--;
  65         assert(p >= buf);
  66         *p = pc;
  67
  68 #ifdef DEBUG_CHARS
  69         printf("putback '%c'\n", pc);
  70 #endif
  71 }
  72
  73 static inline void next_char(void);
  74
  75 #define MATCH_NEWLINE(code)                   \
  76         case '\r':                                \
  77                 next_char();                          \
  78                 if(c == '\n') {                       \
  79                         next_char();                      \
  80                 }                                     \
  81                 lexer_token.source_position.linenr++; \
  82                 code;                                 \
  83         case '\n':                                \
  84                 next_char();                          \
  85                 lexer_token.source_position.linenr++; \
  86                 code;
  87
  88 static inline void eat(char c_type)
  89 {
  90         assert(c == c_type);
  91         next_char();
  92 }
  93
  94 static void maybe_concat_lines(void)
  95 {
  96         eat('\\');
  97
  98         switch(c) {
  99         MATCH_NEWLINE(return;)
 100
 101         default:
 102                 break;
 103         }
 104
 105         put_back(c);
 106         c = '\\';
 107 }
 108
 109 static inline void next_char(void)
 110 {
 111         next_real_char();
 112
 113         /* filter trigraphs */
 114         if(UNLIKELY(c == '\\')) {
 115                 maybe_concat_lines();
 116                 goto end_of_next_char;
 117         }
 118
 119         if(LIKELY(c != '?'))
 120                 goto end_of_next_char;
 121
 122         next_real_char();
 123         if(LIKELY(c != '?')) {
 124                 put_back(c);
 125                 c = '?';
 126                 goto end_of_next_char;
 127         }
 128
 129         next_real_char();
 130         switch(c) {
 131         case '=': c = '#'; break;
 132         case '(': c = '['; break;
 133         case '/': c = '\\'; maybe_concat_lines(); break;
 134         case ')': c = ']'; break;
 135         case '\'': c = '^'; break;
 136         case '<': c = '{'; break;
 137         case '!': c = '|'; break;
 138         case '>': c = '}'; break;
 139         case '-': c = '~'; break;
 140         default:
 141                 put_back('?');
 142                 put_back(c);
 143                 c = '?';
 144                 break;
 145         }
 146
 147 end_of_next_char:
 148 #ifdef DEBUG_CHARS
 149         printf("nchar '%c'\n", c);
 150 #else
 151         ;
 152 #endif
 153 }
 154
 155 #define SYMBOL_CHARS  \
 156         case 'a':         \
 157         case 'b':         \
 158         case 'c':         \
 159         case 'd':         \
 160         case 'e':         \
 161         case 'f':         \
 162         case 'g':         \
 163         case 'h':         \
 164         case 'i':         \
 165         case 'j':         \
 166         case 'k':         \
 167         case 'l':         \
 168         case 'm':         \
 169         case 'n':         \
 170         case 'o':         \
 171         case 'p':         \
 172         case 'q':         \
 173         case 'r':         \
 174         case 's':         \
 175         case 't':         \
 176         case 'u':         \
 177         case 'v':         \
 178         case 'w':         \
 179         case 'x':         \
 180         case 'y':         \
 181         case 'z':         \
 182         case 'A':         \
 183         case 'B':         \
 184         case 'C':         \
 185         case 'D':         \
 186         case 'E':         \
 187         case 'F':         \
 188         case 'G':         \
 189         case 'H':         \
 190         case 'I':         \
 191         case 'J':         \
 192         case 'K':         \
 193         case 'L':         \
 194         case 'M':         \
 195         case 'N':         \
 196         case 'O':         \
 197         case 'P':         \
 198         case 'Q':         \
 199         case 'R':         \
 200         case 'S':         \
 201         case 'T':         \
 202         case 'U':         \
 203         case 'V':         \
 204         case 'W':         \
 205         case 'X':         \
 206         case 'Y':         \
 207         case 'Z':         \
 208         case '_':
 209
 210 #define DIGITS        \
 211         case '0':         \
 212         case '1':         \
 213         case '2':         \
 214         case '3':         \
 215         case '4':         \
 216         case '5':         \
 217         case '6':         \
 218         case '7':         \
 219         case '8':         \
 220         case '9':
 221
 222 static void parse_symbol(void)
 223 {
 224         symbol_t *symbol;
 225         char     *string;
 226
 227         obstack_1grow(&symbol_obstack, c);
 228         next_char();
 229
 230         while(1) {
 231                 switch(c) {
 232                 DIGITS
 233                 SYMBOL_CHARS
 234                         obstack_1grow(&symbol_obstack, c);
 235                         next_char();
 236                         break;
 237
 238                 default:
 239                         goto end_symbol;
 240                 }
 241         }
 242
 243 end_symbol:
 244         obstack_1grow(&symbol_obstack, '\0');
 245
 246         string = obstack_finish(&symbol_obstack);
 247         symbol = symbol_table_insert(string);
 248
 249         lexer_token.type     = symbol->ID;
 250         lexer_token.v.symbol = symbol;
 251
 252         if(symbol->string != string) {
 253                 obstack_free(&symbol_obstack, string);
 254         }
 255 }
 256
 257 static void parse_integer_suffix(void)
 258 {
 259         if(c == 'U' || c == 'U') {
 260                 /* TODO do something with the suffixes... */
 261                 next_char();
 262                 if(c == 'L' || c == 'l') {
 263                         next_char();
 264                         if(c == 'L' || c == 'l') {
 265                                 next_char();
 266                         }
 267                 }
 268         } else if(c == 'l' || c == 'L') {
 269                 next_char();
 270                 if(c == 'l' || c == 'L') {
 271                         next_char();
 272                         if(c == 'u' || c == 'U') {
 273                                 next_char();
 274                         }
 275                 } else if(c == 'u' || c == 'U') {
 276                         next_char();
 277                 }
 278         }
 279 }
 280
 281 static void parse_number_hex(void)
 282 {
 283         assert(c == 'x' || c == 'X');
 284         next_char();
 285
 286         if (!isdigit(c) &&
 287                 !('A' <= c && c <= 'F') &&
 288                 !('a' <= c && c <= 'f')) {
 289                 parse_error("premature end of hex number literal");
 290                 lexer_token.type = T_ERROR;
 291                 return;
 292         }
 293
 294         int value = 0;
 295         while(1) {
 296                 if (isdigit(c)) {
 297                         value = 16 * value + c - '0';
 298                 } else if ('A' <= c && c <= 'F') {
 299                         value = 16 * value + c - 'A' + 10;
 300                 } else if ('a' <= c && c <= 'f') {
 301                         value = 16 * value + c - 'a' + 10;
 302                 } else {
 303                         parse_integer_suffix();
 304
 305                         lexer_token.type       = T_INTEGER;
 306                         lexer_token.v.intvalue = value;
 307                         return;
 308                 }
 309                 next_char();
 310         }
 311
 312         if(c == '.' || c == 'p' || c == 'P') {
 313                 next_char();
 314                 panic("Hex floating point numbers not implemented yet");
 315         }
 316 }
 317
 318 static void parse_number_oct(void)
 319 {
 320         int value = 0;
 321         while(c >= '0' && c <= '7') {
 322                 value = 8 * value + c - '0';
 323                 next_char();
 324         }
 325         if (c == '8' || c == '9') {
 326                 parse_error("invalid octal number");
 327                 lexer_token.type = T_ERROR;
 328                 return;
 329         }
 330
 331         lexer_token.type       = T_INTEGER;
 332         lexer_token.v.intvalue = value;
 333
 334         parse_integer_suffix();
 335 }
 336
 337 static void parse_floatingpoint_exponent(long double value)
 338 {
 339         unsigned int expo = 0;
 340         long double  factor = 10.;
 341
 342         if(c == '-') {
 343                 next_char();
 344                 factor = 0.1;
 345         } else if(c == '+') {
 346                 next_char();
 347         }
 348
 349         while(c >= '0' && c <= '9') {
 350                 expo = 10 * expo + (c - '0');
 351                 next_char();
 352         }
 353
 354         while(1) {
 355                 if(expo & 1)
 356                         value *= factor;
 357                 expo >>= 1;
 358                 if(expo == 0)
 359                         break;
 360                 factor *= factor;
 361         }
 362
 363         lexer_token.type         = T_FLOATINGPOINT;
 364         lexer_token.v.floatvalue = value;
 365 }
 366
 367 static void parse_floatingpoint_fract(int integer_part)
 368 {
 369         long double value  = integer_part;
 370         long double factor = 1.;
 371
 372         while(c >= '0' && c <= '9') {
 373                 factor *= 0.1;
 374                 value  += (c - '0') * factor;
 375                 next_char();
 376         }
 377
 378         if(c == 'e' || c == 'E') {
 379                 next_char();
 380                 parse_floatingpoint_exponent(value);
 381                 return;
 382         }
 383
 384         lexer_token.type         = T_FLOATINGPOINT;
 385         lexer_token.v.floatvalue = value;
 386 }
 387
 388 static void parse_number_dec(void)
 389 {
 390         int value = 0;
 391
 392         while(isdigit(c)) {
 393                 value = 10 * value + c - '0';
 394                 next_char();
 395         }
 396
 397         if(c == '.') {
 398                 next_char();
 399                 parse_floatingpoint_fract(value);
 400                 return;
 401         }
 402         if(c == 'e' || c == 'E') {
 403                 next_char();
 404                 parse_floatingpoint_exponent(value);
 405                 return;
 406         }
 407         parse_integer_suffix();
 408
 409         lexer_token.type       = T_INTEGER;
 410         lexer_token.v.intvalue = value;
 411 }
 412
 413 static void parse_number(void)
 414 {
 415         if (c == '0') {
 416                 next_char();
 417                 switch (c) {
 418                         case 'X':
 419                         case 'x':
 420                                 parse_number_hex();
 421                                 break;
 422                         case '0':
 423                         case '1':
 424                         case '2':
 425                         case '3':
 426                         case '4':
 427                         case '5':
 428                         case '6':
 429                         case '7':
 430                                 parse_number_oct();
 431                                 break;
 432                         case '.':
 433                                 next_char();
 434                                 parse_floatingpoint_fract(0);
 435                                 break;
 436                         case 'e':
 437                         case 'E':
 438                                 parse_floatingpoint_exponent(0);
 439                                 break;
 440                         case '8':
 441                         case '9':
 442                                 next_char();
 443                                 parse_error("invalid octal number");
 444                                 lexer_token.type = T_ERROR;
 445                                 return;
 446                         default:
 447                                 put_back(c);
 448                                 c = '0';
 449                                 parse_number_dec();
 450                                 return;
 451                 }
 452         } else {
 453                 parse_number_dec();
 454         }
 455 }
 456
 457 static int parse_octal_sequence(void)
 458 {
 459         int value = 0;
 460         while(1) {
 461                 if(c < '0' || c > '7')
 462                         break;
 463                 value = 8 * value + c - '0';
 464                 next_char();
 465         }
 466
 467         return value;
 468 }
 469
 470 static int parse_hex_sequence(void)
 471 {
 472         int value = 0;
 473         while(1) {
 474                 if (c >= '0' && c <= '9') {
 475                         value = 16 * value + c - '0';
 476                 } else if ('A' <= c && c <= 'F') {
 477                         value = 16 * value + c - 'A' + 10;
 478                 } else if ('a' <= c && c <= 'f') {
 479                         value = 16 * value + c - 'a' + 10;
 480                 } else {
 481                         break;
 482                 }
 483                 next_char();
 484         }
 485
 486         return value;
 487 }
 488
 489 static int parse_escape_sequence(void)
 490 {
 491         eat('\\');
 492
 493         int ec = c;
 494         next_char();
 495
 496         switch(ec) {
 497         case '"':  return '"';
 498         case '\'': return'\'';
 499         case '\\': return '\\';
 500         case '?': return '\?';
 501         case 'a': return '\a';
 502         case 'b': return '\b';
 503         case 'f': return '\f';
 504         case 'n': return '\n';
 505         case 'r': return '\r';
 506         case 't': return '\t';
 507         case 'v': return '\v';
 508         case 'x':
 509                 return parse_hex_sequence();
 510         case '0':
 511         case '1':
 512         case '2':
 513         case '3':
 514         case '4':
 515         case '5':
 516         case '6':
 517         case '7':
 518                 return parse_octal_sequence();
 519         case EOF:
 520                 parse_error("reached end of file while parsing escape sequence");
 521                 return EOF;
 522         default:
 523                 parse_error("unknown escape sequence");
 524                 return EOF;
 525         }
 526 }
 527
 528 const char *concat_strings(const char *s1, const char *s2)
 529 {
 530         size_t  len1   = strlen(s1);
 531         size_t  len2   = strlen(s2);
 532
 533         char   *concat = obstack_alloc(&symbol_obstack, len1 + len2 + 1);
 534         memcpy(concat, s1, len1);
 535         memcpy(concat + len1, s2, len2 + 1);
 536
 537         const char *result = strset_insert(&stringset, concat);
 538         if(result != concat) {
 539                 obstack_free(&symbol_obstack, concat);
 540         }
 541
 542         return result;
 543 }
 544
 545 static void parse_string_literal(void)
 546 {
 547         unsigned    start_linenr = lexer_token.source_position.linenr;
 548         char       *string;
 549         const char *result;
 550
 551         assert(c == '"');
 552         next_char();
 553
 554         int tc;
 555         while(1) {
 556                 switch(c) {
 557                 case '\\':
 558                         tc = parse_escape_sequence();
 559                         obstack_1grow(&symbol_obstack, tc);
 560                         break;
 561
 562                 case EOF:
 563                         error_prefix_at(lexer_token.source_position.input_name,
 564                                         start_linenr);
 565                         fprintf(stderr, "string has no end\n");
 566                         lexer_token.type = T_ERROR;
 567                         return;
 568
 569                 case '"':
 570                         next_char();
 571                         goto end_of_string;
 572
 573                 default:
 574                         obstack_1grow(&symbol_obstack, c);
 575                         next_char();
 576                         break;
 577                 }
 578         }
 579
 580 end_of_string:
 581
 582         /* TODO: concatenate multiple strings separated by whitespace... */
 583
 584         /* add finishing 0 to the string */
 585         obstack_1grow(&symbol_obstack, '\0');
 586         string = obstack_finish(&symbol_obstack);
 587
 588         /* check if there is already a copy of the string */
 589         result = strset_insert(&stringset, string);
 590         if(result != string) {
 591                 obstack_free(&symbol_obstack, string);
 592         }
 593
 594         lexer_token.type     = T_STRING_LITERAL;
 595         lexer_token.v.string = result;
 596 }
 597
 598 static void parse_character_constant(void)
 599 {
 600         eat('\'');
 601
 602         int found_char = 0;
 603         while(1) {
 604                 switch(c) {
 605                 case '\\':
 606                         found_char = parse_escape_sequence();
 607                         break;
 608
 609                 MATCH_NEWLINE(
 610                         parse_error("newline while parsing character constant");
 611                         break;
 612                 )
 613
 614                 case '\'':
 615                         next_char();
 616                         goto end_of_char_constant;
 617
 618                 case EOF:
 619                         parse_error("EOF while parsing character constant");
 620                         lexer_token.type = T_ERROR;
 621                         return;
 622
 623                 default:
 624                         if(found_char != 0) {
 625                                 parse_error("more than 1 characters in character "
 626                                             "constant");
 627                                 goto end_of_char_constant;
 628                         } else {
 629                                 found_char = c;
 630                                 next_char();
 631                         }
 632                         break;
 633                 }
 634         }
 635
 636 end_of_char_constant:
 637         lexer_token.type       = T_INTEGER;
 638         lexer_token.v.intvalue = found_char;
 639 }
 640
 641 static void skip_multiline_comment(void)
 642 {
 643         unsigned start_linenr = lexer_token.source_position.linenr;
 644
 645         while(1) {
 646                 switch(c) {
 647                 case '*':
 648                         next_char();
 649                         if(c == '/') {
 650                                 next_char();
 651                                 return;
 652                         }
 653                         break;
 654
 655                 MATCH_NEWLINE(break;)
 656
 657                 case EOF:
 658                         error_prefix_at(lexer_token.source_position.input_name,
 659                                         start_linenr);
 660                         fprintf(stderr, "at end of file while looking for comment end\n");
 661                         return;
 662
 663                 default:
 664                         next_char();
 665                         break;
 666                 }
 667         }
 668 }
 669
 670 static void skip_line_comment(void)
 671 {
 672         while(1) {
 673                 switch(c) {
 674                 case EOF:
 675                         return;
 676
 677                 case '\n':
 678                 case '\r':
 679                         return;
 680
 681                 default:
 682                         next_char();
 683                         break;
 684                 }
 685         }
 686 }
 687
 688 static token_t pp_token;
 689
 690 static inline void next_pp_token(void)
 691 {
 692         lexer_next_preprocessing_token();
 693         pp_token = lexer_token;
 694 }
 695
 696 static void eat_until_newline(void)
 697 {
 698         while(pp_token.type != '\n' && pp_token.type != T_EOF) {
 699                 next_pp_token();
 700         }
 701 }
 702
 703 static void error_directive(void)
 704 {
 705         error_prefix();
 706         fprintf(stderr, "#error directive: \n");
 707
 708         /* parse pp-tokens until new-line */
 709 }
 710
 711 static void define_directive(void)
 712 {
 713         lexer_next_preprocessing_token();
 714         if(lexer_token.type != T_IDENTIFIER) {
 715                 parse_error("expected identifier after #define\n");
 716                 eat_until_newline();
 717         }
 718 }
 719
 720 static void ifdef_directive(int is_ifndef)
 721 {
 722         (void) is_ifndef;
 723         lexer_next_preprocessing_token();
 724         //expect_identifier();
 725         //extect_newline();
 726 }
 727
 728 static void endif_directive(void)
 729 {
 730         //expect_newline();
 731 }
 732
 733 static void parse_line_directive(void)
 734 {
 735         if(pp_token.type != T_INTEGER) {
 736                 parse_error("expected integer");
 737         } else {
 738                 lexer_token.source_position.linenr = pp_token.v.intvalue - 1;
 739                 next_pp_token();
 740         }
 741         if(pp_token.type == T_STRING_LITERAL) {
 742                 lexer_token.source_position.input_name = pp_token.v.string;
 743                 next_pp_token();
 744         }
 745
 746         eat_until_newline();
 747 }
 748
 749 static void parse_preprocessor_identifier(void)
 750 {
 751         assert(pp_token.type == T_IDENTIFIER);
 752         symbol_t *symbol = pp_token.v.symbol;
 753
 754         switch(symbol->pp_ID) {
 755         case TP_include:
 756                 printf("include - enable header name parsing!\n");
 757                 break;
 758         case TP_define:
 759                 define_directive();
 760                 break;
 761         case TP_ifdef:
 762                 ifdef_directive(0);
 763                 break;
 764         case TP_ifndef:
 765                 ifdef_directive(1);
 766                 break;
 767         case TP_endif:
 768                 endif_directive();
 769                 break;
 770         case TP_line:
 771                 next_pp_token();
 772                 parse_line_directive();
 773                 break;
 774         case TP_if:
 775         case TP_else:
 776         case TP_elif:
 777         case TP_undef:
 778         case TP_error:
 779                 error_directive();
 780                 break;
 781         case TP_pragma:
 782                 break;
 783         }
 784 }
 785
 786 static void parse_preprocessor_directive()
 787 {
 788         next_pp_token();
 789
 790         switch(pp_token.type) {
 791         case T_IDENTIFIER:
 792                 parse_preprocessor_identifier();
 793                 break;
 794         case T_INTEGER:
 795                 parse_line_directive();
 796                 break;
 797         default:
 798                 parse_error("invalid preprocessor directive");
 799                 eat_until_newline();
 800                 break;
 801         }
 802 }
 803
 804 #define MAYBE_PROLOG                                       \
 805                         next_char();                                   \
 806                         while(1) {                                     \
 807                                 switch(c) {
 808
 809 #define MAYBE(ch, set_type)                                \
 810                                 case ch:                                   \
 811                                         next_char();                           \
 812                                         lexer_token.type = set_type;           \
 813                                         return;
 814
 815 #define ELSE_CODE(code)                                    \
 816                                 default:                                   \
 817                                         code;                                  \
 818                                 }                                          \
 819                         } /* end of while(1) */                        \
 820                         break;
 821
 822 #define ELSE(set_type)                                     \
 823                 ELSE_CODE(                                         \
 824                         lexer_token.type = set_type;                   \
 825                         return;                                        \
 826                 )
 827
 828 void lexer_next_preprocessing_token(void)
 829 {
 830         while(1) {
 831                 switch(c) {
 832                 case ' ':
 833                 case '\t':
 834                         next_char();
 835                         break;
 836
 837                 MATCH_NEWLINE(
 838                         lexer_token.type = '\n';
 839                         return;
 840                 )
 841
 842                 SYMBOL_CHARS
 843                         parse_symbol();
 844                         return;
 845
 846                 DIGITS
 847                         parse_number();
 848                         return;
 849
 850                 case '"':
 851                         parse_string_literal();
 852                         return;
 853
 854                 case '\'':
 855                         parse_character_constant();
 856                         return;
 857
 858                 case '.':
 859                         MAYBE_PROLOG
 860                                 case '.':
 861                                         MAYBE_PROLOG
 862                                         MAYBE('.', T_DOTDOTDOT)
 863                                         ELSE_CODE(
 864                                                 put_back(c);
 865                                                 c = '.';
 866                                                 lexer_token.type = '.';
 867                                                 return;
 868                                         )
 869                         ELSE('.')
 870                 case '&':
 871                         MAYBE_PROLOG
 872                         MAYBE('&', T_ANDAND)
 873                         MAYBE('=', T_ANDEQUAL)
 874                         ELSE('&')
 875                 case '*':
 876                         MAYBE_PROLOG
 877                         MAYBE('=', T_ASTERISKEQUAL)
 878                         ELSE('*')
 879                 case '+':
 880                         MAYBE_PROLOG
 881                         MAYBE('+', T_PLUSPLUS)
 882                         MAYBE('=', T_PLUSEQUAL)
 883                         ELSE('+')
 884                 case '-':
 885                         MAYBE_PROLOG
 886                         MAYBE('>', T_MINUSGREATER)
 887                         MAYBE('-', T_MINUSMINUS)
 888                         MAYBE('=', T_MINUSEQUAL)
 889                         ELSE('-')
 890                 case '!':
 891                         MAYBE_PROLOG
 892                         MAYBE('=', T_EXCLAMATIONMARKEQUAL)
 893                         ELSE('!')
 894                 case '/':
 895                         MAYBE_PROLOG
 896                         MAYBE('=', T_SLASHEQUAL)
 897                                 case '*':
 898                                         next_char();
 899                                         skip_multiline_comment();
 900                                         lexer_next_preprocessing_token();
 901                                         return;
 902                                 case '/':
 903                                         next_char();
 904                                         skip_line_comment();
 905                                         lexer_next_preprocessing_token();
 906                                         return;
 907                         ELSE('/')
 908                 case '%':
 909                         MAYBE_PROLOG
 910                         MAYBE('>', T_PERCENTGREATER)
 911                         MAYBE('=', T_PERCENTEQUAL)
 912                                 case ':':
 913                                         MAYBE_PROLOG
 914                                                 case '%':
 915                                                         MAYBE_PROLOG
 916                                                         MAYBE(':', T_PERCENTCOLONPERCENTCOLON)
 917                                                         ELSE_CODE(
 918                                                                 put_back(c);
 919                                                                 c = '%';
 920                                                                 lexer_token.type = T_PERCENTCOLON;
 921                                                                 return;
 922                                                         )
 923                                         ELSE(T_PERCENTCOLON)
 924                         ELSE('%')
 925                 case '<':
 926                         MAYBE_PROLOG
 927                         MAYBE(':', T_LESSCOLON)
 928                         MAYBE('%', T_LESSPERCENT)
 929                         MAYBE('=', T_LESSEQUAL)
 930                                 case '<':
 931                                         MAYBE_PROLOG
 932                                         MAYBE('=', T_LESSLESSEQUAL)
 933                                         ELSE(T_LESSLESS)
 934                         ELSE('<')
 935                 case '>':
 936                         MAYBE_PROLOG
 937                         MAYBE('=', T_GREATEREQUAL)
 938                                 case '>':
 939                                         MAYBE_PROLOG
 940                                         MAYBE('=', T_GREATERGREATEREQUAL)
 941                                         ELSE(T_GREATERGREATER)
 942                         ELSE('>')
 943                 case '^':
 944                         MAYBE_PROLOG
 945                         MAYBE('=', T_CARETEQUAL)
 946                         ELSE('^')
 947                 case '|':
 948                         MAYBE_PROLOG
 949                         MAYBE('=', T_PIPEEQUAL)
 950                         MAYBE('|', T_PIPEPIPE)
 951                         ELSE('|')
 952                 case ':':
 953                         MAYBE_PROLOG
 954                         MAYBE('>', T_COLONGREATER)
 955                         ELSE(':')
 956                 case '=':
 957                         MAYBE_PROLOG
 958                         MAYBE('=', T_EQUALEQUAL)
 959                         ELSE('=')
 960                 case '#':
 961                         MAYBE_PROLOG
 962                         MAYBE('#', T_HASHHASH)
 963                         ELSE('#')
 964
 965                 case '?':
 966                 case '[':
 967                 case ']':
 968                 case '(':
 969                 case ')':
 970                 case '{':
 971                 case '}':
 972                 case '~':
 973                 case ';':
 974                 case ',':
 975                 case '\\':
 976                         lexer_token.type = c;
 977                         next_char();
 978                         return;
 979
 980                 case EOF:
 981                         lexer_token.type = T_EOF;
 982                         return;
 983
 984                 default:
 985                         next_char();
 986                         error_prefix();
 987                         fprintf(stderr, "unknown character '%c' found\n", c);
 988                         lexer_token.type = T_ERROR;
 989                         return;
 990                 }
 991         }
 992 }
 993
 994 void lexer_next_token(void)
 995 {
 996         lexer_next_preprocessing_token();
 997         if(lexer_token.type != '\n')
 998                 return;
 999
1000 newline_found:
1001         do {
1002                 lexer_next_preprocessing_token();
1003         } while(lexer_token.type == '\n');
1004
1005         if(lexer_token.type == '#') {
1006                 parse_preprocessor_directive();
1007                 goto newline_found;
1008         }
1009 }
1010
1011 void init_lexer(void)
1012 {
1013         strset_init(&stringset);
1014 }
1015
1016 void lexer_open_stream(FILE *stream, const char *input_name)
1017 {
1018         input                                  = stream;
1019         lexer_token.source_position.linenr     = 1;
1020         lexer_token.source_position.input_name = input_name;
1021
1022         next_char();
1023 }
1024
1025 void exit_lexer(void)
1026 {
1027         strset_destroy(&stringset);
1028 }
1029
1030 static __attribute__((unused))
1031 void dbg_pos(const source_position_t source_position)
1032 {
1033         fprintf(stdout, "%s:%d\n", source_position.input_name,
1034                 source_position.linenr);
1035         fflush(stdout);
1036 }