nsz Git - cparser/blob - lexer.c

   1 #include <config.h>
   2
   3 #include "lexer_t.h"
   4 #include "token_t.h"
   5 #include "symbol_table_t.h"
   6 #include "adt/error.h"
   7
   8 #include <assert.h>
   9 #include <errno.h>
  10 #include <string.h>
  11 #include <ctype.h>
  12
  13 //#define DEBUG_CHARS
  14 #define MAX_PUTBACK 3
  15
  16 static int               c;
  17 source_position_t source_position;
  18 static FILE             *input;
  19 static char              buf[1027];
  20 static const char       *bufend;
  21 static const char       *bufpos;
  22 static strset_t          stringset;
  23 //static FILE            **input_stack;
  24 //static char            **buf_stack;
  25
  26 static
  27 void error_prefix_at(const char *input_name, unsigned linenr)
  28 {
  29         fprintf(stderr, "%s:%d: Error: ", input_name, linenr);
  30 }
  31
  32 static
  33 void error_prefix()
  34 {
  35         error_prefix_at(source_position.input_name, source_position.linenr);
  36 }
  37
  38 static
  39 void parse_error(const char *msg)
  40 {
  41         error_prefix();
  42         fprintf(stderr, "%s\n", msg);
  43 }
  44
  45 static inline
  46 void next_char()
  47 {
  48         bufpos++;
  49         if(bufpos >= bufend) {
  50                 size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
  51                                  input);
  52                 if(s == 0) {
  53                         c = EOF;
  54                         return;
  55                 }
  56                 bufpos = buf + MAX_PUTBACK;
  57                 bufend = buf + MAX_PUTBACK + s;
  58         }
  59         c = *(bufpos);
  60 #ifdef DEBUG_CHARS
  61         printf("nchar '%c'\n", c);
  62 #endif
  63 }
  64
  65 static inline
  66 void put_back(int pc)
  67 {
  68         char *p = (char*) bufpos - 1;
  69         bufpos--;
  70         assert(p >= buf);
  71         *p = pc;
  72
  73 #ifdef DEBUG_CHARS
  74         printf("putback '%c'\n", pc);
  75 #endif
  76 }
  77
  78
  79 static
  80 int replace_trigraph(void)
  81 {
  82 #define MATCH_TRIGRAPH(ch,replacement)           \
  83         case ch:                                     \
  84                 c = replacement;                         \
  85                 return 1;
  86
  87         switch(c) {
  88         MATCH_TRIGRAPH('=', '#')
  89         MATCH_TRIGRAPH('(', '[')
  90         MATCH_TRIGRAPH('/', '\\')
  91         MATCH_TRIGRAPH(')', ']')
  92         MATCH_TRIGRAPH('\'', '^')
  93         MATCH_TRIGRAPH('<', '{')
  94         MATCH_TRIGRAPH('!', '|')
  95         MATCH_TRIGRAPH('>', '}')
  96         MATCH_TRIGRAPH('-', '~')
  97         default:
  98                 break;
  99         }
 100
 101         return 0;
 102 }
 103
 104 #define SKIP_TRIGRAPHS(custom_putback, no_trigraph_code) \
 105         case '?':                                  \
 106                 next_char();                           \
 107                 if(c != '?') {                         \
 108                         custom_putback;                    \
 109                         put_back(c);                       \
 110                         c = '?';                           \
 111                         no_trigraph_code;                  \
 112                 }                                      \
 113                 next_char();                           \
 114                 if(replace_trigraph()) {               \
 115                         break;                             \
 116                 }                                      \
 117                 custom_putback;                        \
 118                 put_back('?');                         \
 119                 put_back(c);                           \
 120                 c = '?';                               \
 121                 no_trigraph_code;
 122
 123 #define EAT_NEWLINE(newline_code)              \
 124         if(c == '\r') {                            \
 125                 next_char();                           \
 126                 if(c == '\n')                          \
 127                         next_char();                       \
 128                 source_position.linenr++;              \
 129                 newline_code;                          \
 130         } else if(c == '\n') {                     \
 131                 next_char();                           \
 132                 source_position.linenr++;              \
 133                 newline_code;                          \
 134         }
 135
 136 static
 137 void parse_symbol(token_t *token)
 138 {
 139         symbol_t *symbol;
 140         char     *string;
 141
 142         obstack_1grow(&symbol_obstack, c);
 143         next_char();
 144
 145         while(1) {
 146                 switch(c) {
 147                 case '\\':
 148                         next_char();
 149                         EAT_NEWLINE(break;)
 150                         goto end_symbol;
 151
 152                 case 'A' ... 'Z':
 153                 case 'a' ... 'z':
 154                 case '_':
 155                         obstack_1grow(&symbol_obstack, c);
 156                         next_char();
 157                         break;
 158
 159                 case '?':
 160                         next_char();
 161                         if(c != '?') {
 162                                 put_back(c);
 163                                 c = '?';
 164                                 goto end_symbol;
 165                         }
 166                         next_char();
 167                         if(replace_trigraph())
 168                                 break;
 169                         put_back('?');
 170                         put_back(c);
 171                         c = '?';
 172                         goto end_symbol;
 173
 174                 default:
 175                         goto end_symbol;
 176                 }
 177         }
 178 end_symbol:
 179         obstack_1grow(&symbol_obstack, '\0');
 180
 181         string = obstack_finish(&symbol_obstack);
 182         symbol = symbol_table_insert(string);
 183
 184         if(symbol->ID > 0) {
 185                 token->type = symbol->ID;
 186         } else {
 187                 token->type = T_IDENTIFIER;
 188         }
 189         token->v.symbol = symbol;
 190
 191         if(symbol->string != string) {
 192                 obstack_free(&symbol_obstack, string);
 193         }
 194 }
 195
 196 static
 197 void parse_number_hex(token_t *token)
 198 {
 199         assert(c == 'x' || c == 'X');
 200         next_char();
 201
 202         if (!isdigit(c) &&
 203                 !('A' <= c && c <= 'F') &&
 204                 !('a' <= c && c <= 'f')) {
 205                 parse_error("premature end of hex number literal");
 206                 token->type = T_ERROR;
 207                 return;
 208         }
 209
 210         int value = 0;
 211         for(;;) {
 212                 if (isdigit(c)) {
 213                         value = 16 * value + c - '0';
 214                 } else if ('A' <= c && c <= 'F') {
 215                         value = 16 * value + c - 'A' + 10;
 216                 } else if ('a' <= c && c <= 'f') {
 217                         value = 16 * value + c - 'a' + 10;
 218                 } else {
 219                         token->type     = T_INTEGER;
 220                         token->v.intvalue = value;
 221                         return;
 222                 }
 223                 next_char();
 224         }
 225 }
 226
 227 static
 228 void parse_number_oct(token_t *token)
 229 {
 230         assert(c == 'o' || c == 'O');
 231         next_char();
 232
 233         int value = 0;
 234         for(;;) {
 235                 if ('0' <= c && c <= '7') {
 236                         value = 8 * value + c - '0';
 237                 } else {
 238                         token->type     = T_INTEGER;
 239                         token->v.intvalue = value;
 240                         return;
 241                 }
 242                 next_char();
 243         }
 244 }
 245
 246 static
 247 void parse_number_dec(token_t *token, int first_char)
 248 {
 249         int value = 0;
 250         if(first_char > 0) {
 251                 assert(first_char >= '0' && first_char <= '9');
 252                 value = first_char - '0';
 253         }
 254
 255         for(;;) {
 256                 if (isdigit(c)) {
 257                         value = 10 * value + c - '0';
 258                 } else {
 259                         token->type     = T_INTEGER;
 260                         token->v.intvalue = value;
 261                         return;
 262                 }
 263                 next_char();
 264         }
 265 }
 266
 267 static
 268 void parse_number(token_t *token)
 269 {
 270         // TODO check for overflow
 271         // TODO check for various invalid inputs sequences
 272
 273         if (c == '0') {
 274                 next_char();
 275                 switch (c) {
 276                         case 'X':
 277                         case 'x': parse_number_hex(token); break;
 278                         case 'o':
 279                         case 'O': parse_number_oct(token); break;
 280                         default:  parse_number_dec(token, '0');
 281                 }
 282         } else {
 283                 parse_number_dec(token, 0);
 284         }
 285 }
 286
 287 static
 288 int parse_escape_sequence()
 289 {
 290         while(1) {
 291                 int ec = c;
 292                 next_char();
 293
 294                 switch(ec) {
 295                 case '"': return '"';
 296                 case '\'': return'\'';
 297                 case '\\':
 298                         EAT_NEWLINE(break;)
 299                         return '\\';
 300                 case 'a': return '\a';
 301                 case 'b': return '\b';
 302                 case 'f': return '\f';
 303                 case 'n': return '\n';
 304                 case 'r': return '\r';
 305                 case 't': return '\t';
 306                 case 'v': return '\v';
 307                 case 'x': /* TODO parse hex number ... */
 308                         parse_error("hex escape sequences not implemented yet");
 309                         return EOF;
 310                 case 0 ... 8: /* TODO parse octal number ... */
 311                         parse_error("octal escape sequences not implemented yet");
 312                         return EOF;
 313                 case '?':
 314                         if(c != '?') {
 315                                 return '?';
 316                         }
 317                         /* might be a trigraph */
 318                         next_char();
 319                         if(replace_trigraph()) {
 320                                 break;
 321                         }
 322                         put_back(c);
 323                         c = '?';
 324                         return '?';
 325
 326                 case EOF:
 327                         parse_error("reached end of file while parsing escape sequence");
 328                         return EOF;
 329                 default:
 330                         parse_error("unknown escape sequence");
 331                         return EOF;
 332                 }
 333         }
 334 }
 335
 336 static
 337 void parse_string_literal(token_t *token)
 338 {
 339         unsigned    start_linenr = source_position.linenr;
 340         char       *string;
 341         const char *result;
 342
 343         assert(c == '"');
 344         next_char();
 345
 346         while(1) {
 347                 switch(c) {
 348                 SKIP_TRIGRAPHS(,
 349                         obstack_1grow(&symbol_obstack, '?');
 350                         next_char();
 351                         break;
 352                 )
 353
 354                 case '\\':
 355                         next_char();
 356                         EAT_NEWLINE(break;)
 357                         int ec = parse_escape_sequence();
 358                         obstack_1grow(&symbol_obstack, ec);
 359                         break;
 360
 361                 case EOF:
 362                         error_prefix_at(source_position.input_name, start_linenr);
 363                         fprintf(stderr, "string has no end\n");
 364                         token->type = T_ERROR;
 365                         return;
 366
 367                 case '"':
 368                         next_char();
 369                         goto end_of_string;
 370
 371                 default:
 372                         obstack_1grow(&symbol_obstack, c);
 373                         next_char();
 374                         break;
 375                 }
 376         }
 377
 378 end_of_string:
 379
 380         /* TODO: concatenate multiple strings separated by whitespace... */
 381
 382         /* add finishing 0 to the string */
 383         obstack_1grow(&symbol_obstack, '\0');
 384         string = obstack_finish(&symbol_obstack);
 385
 386         /* check if there is already a copy of the string */
 387         result = strset_insert(&stringset, string);
 388         if(result != string) {
 389                 obstack_free(&symbol_obstack, string);
 390         }
 391
 392         token->type     = T_STRING_LITERAL;
 393         token->v.string = result;
 394 }
 395
 396 #define MATCH_NEWLINE(code)                 \
 397         case '\r':                              \
 398                 next_char();                        \
 399                 if(c == '\n') {                     \
 400                         next_char();                    \
 401                 }                                   \
 402                 source_position.linenr++;           \
 403                 code;                               \
 404         case '\n':                              \
 405                 next_char();                        \
 406                 source_position.linenr++;           \
 407                 code;
 408
 409 static
 410 void parse_character_constant(token_t *token)
 411 {
 412         assert(c == '\'');
 413         next_char();
 414
 415         int found_char = 0;
 416         while(1) {
 417                 switch(c) {
 418                 SKIP_TRIGRAPHS(,
 419                         found_char = '?';
 420                         break;
 421                 )
 422
 423                 case '\\':
 424                         next_char();
 425                         EAT_NEWLINE(break;)
 426                         found_char = '\\';
 427                         break;
 428
 429                 MATCH_NEWLINE(
 430                         parse_error("newline while parsing character constant");
 431                         break;
 432                 )
 433
 434                 case '\'':
 435                         next_char();
 436                         goto end_of_char_constant;
 437
 438                 case EOF:
 439                         parse_error("EOF while parsing character constant");
 440                         token->type = T_ERROR;
 441                         return;
 442
 443                 default:
 444                         if(found_char != 0) {
 445                                 parse_error("more than 1 characters in character "
 446                                             "constant");
 447                                 goto end_of_char_constant;
 448                         } else {
 449                                 found_char = c;
 450                                 next_char();
 451                         }
 452                         break;
 453                 }
 454         }
 455
 456 end_of_char_constant:
 457         token->type       = T_INTEGER;
 458         token->v.intvalue = found_char;
 459 }
 460
 461 static
 462 void skip_multiline_comment(void)
 463 {
 464         unsigned start_linenr = source_position.linenr;
 465         int had_star = 0;
 466
 467         while(1) {
 468                 switch(c) {
 469                 case '*':
 470                         next_char();
 471                         had_star = 1;
 472                         break;
 473
 474                 case '/':
 475                         next_char();
 476                         if(had_star) {
 477                                 return;
 478                         }
 479                         had_star = 0;
 480                         break;
 481
 482                 case '\\':
 483                         next_char();
 484                         EAT_NEWLINE(break;)
 485                         had_star = 0;
 486                         break;
 487
 488                 case '?':
 489                         next_char();
 490                         if(c != '?') {
 491                                 had_star = 0;
 492                                 break;
 493                         }
 494                         next_char();
 495                         if(replace_trigraph())
 496                                 break;
 497                         put_back(c);
 498                         c = '?';
 499                         had_star = 0;
 500                         /* we don't put back the 2nd ? as the comment text is discarded
 501                          * anyway */
 502                         break;
 503
 504                 MATCH_NEWLINE(had_star = 0; break;)
 505
 506                 case EOF:
 507                         error_prefix_at(source_position.input_name, start_linenr);
 508                         fprintf(stderr, "at end of file while looking for comment end\n");
 509                         return;
 510                 default:
 511                         had_star = 0;
 512                         next_char();
 513                         break;
 514                 }
 515         }
 516 }
 517
 518 static
 519 void skip_line_comment(void)
 520 {
 521         while(1) {
 522                 switch(c) {
 523                 case '?':
 524                         next_char();
 525                         if(c != '?')
 526                                 break;
 527                         next_char();
 528                         if(replace_trigraph())
 529                                 break;
 530                         put_back('?');
 531                         /* we don't put back the 2nd ? as the comment text is discarded
 532                          * anyway */
 533                         break;
 534
 535                 case '\\':
 536                         next_char();
 537                         if(c == '\n') {
 538                                 next_char();
 539                                 source_position.linenr++;
 540                         }
 541                         break;
 542
 543                 case EOF:
 544                 case '\r':
 545                 case '\n':
 546                         return;
 547
 548                 default:
 549                         next_char();
 550                         break;
 551                 }
 552         }
 553 }
 554
 555 static
 556 void lexer_next_preprocessing_token(token_t *token);
 557
 558 static
 559 void eat_until_newline(void)
 560 {
 561         /* TODO */
 562 }
 563
 564 static
 565 void error_directive(void)
 566 {
 567         error_prefix();
 568         fprintf(stderr, "#error directive: \n");
 569
 570         /* parse pp-tokens until new-line */
 571 }
 572
 573 static
 574 void define_directive(void)
 575 {
 576         token_t temptoken;
 577
 578         lexer_next_preprocessing_token(&temptoken);
 579         if(temptoken.type != T_IDENTIFIER) {
 580                 parse_error("expected identifier after #define\n");
 581                 eat_until_newline();
 582         }
 583 }
 584
 585 static
 586 void ifdef_directive(int is_ifndef)
 587 {
 588         (void) is_ifndef;
 589         token_t temptoken;
 590         lexer_next_preprocessing_token(&temptoken);
 591         //expect_identifier();
 592         //extect_newline();
 593 }
 594
 595 static
 596 void endif_directive(void)
 597 {
 598         //expect_newline();
 599 }
 600
 601 static
 602 void found_preprocessor_identifier(symbol_t *symbol)
 603 {
 604         switch(symbol->pp_ID) {
 605         case TP_include:
 606                 printf("include - enable header name parsing!\n");
 607                 break;
 608         case TP_define:
 609                 define_directive();
 610                 break;
 611         case TP_ifdef:
 612                 ifdef_directive(0);
 613                 break;
 614         case TP_ifndef:
 615                 ifdef_directive(1);
 616                 break;
 617         case TP_endif:
 618                 endif_directive();
 619                 break;
 620         case TP_if:
 621         case TP_else:
 622         case TP_elif:
 623         case TP_undef:
 624         case TP_line:
 625         case TP_error:
 626                 error_directive();
 627                 break;
 628         case TP_pragma:
 629                 break;
 630         }
 631 }
 632
 633 static
 634 void parse_preprocessor_directive(token_t *result_token)
 635 {
 636         token_t temptoken;
 637
 638         (void) result_token;
 639         lexer_next_preprocessing_token(&temptoken);
 640         switch(temptoken.type) {
 641         case T_IDENTIFIER:
 642                 found_preprocessor_identifier(temptoken.v.symbol);
 643                 break;
 644         }
 645 }
 646
 647 #define MAYBE_PROLOG                                       \
 648                         next_char();                                   \
 649                         while(1) {                                     \
 650                                 switch(c) {
 651
 652 #define MAYBE(ch, set_type)                                \
 653                                 case ch:                                   \
 654                                         next_char();                           \
 655                                         token->type = set_type;                \
 656                                         return;
 657
 658 #define ELSE_CODE(code)                                    \
 659                                 SKIP_TRIGRAPHS(,                           \
 660                                         code;                                  \
 661                                 )                                          \
 662                                                                                                                    \
 663                                 case '\\':                                 \
 664                                         next_char();                           \
 665                                         EAT_NEWLINE(break;)                    \
 666                                         /* fallthrough */                      \
 667                                 default:                                   \
 668                                         code;                                  \
 669                                 }                                          \
 670                         } /* end of while(1) */                        \
 671                         break;
 672
 673 #define ELSE(set_type)                                     \
 674                 ELSE_CODE(                                         \
 675                         token->type = set_type;                        \
 676                         return;                                        \
 677                 )
 678
 679 static
 680 void eat_whitespace()
 681 {
 682         while(1) {
 683                 switch(c) {
 684                 case ' ':
 685                 case '\t':
 686                         next_char();
 687                         break;
 688
 689                 case '\r':
 690                 case '\n':
 691                         return;
 692
 693                 case '\\':
 694                         next_char();
 695                         if(c == '\n') {
 696                                 next_char();
 697                                 source_position.linenr++;
 698                                 break;
 699                         }
 700
 701                         put_back(c);
 702                         c = '\\';
 703                         return;
 704
 705                 SKIP_TRIGRAPHS(,
 706                         return;
 707                 )
 708
 709                 case '/':
 710                         next_char();
 711                         while(1) {
 712                                 switch(c) {
 713                                 case '*':
 714                                         next_char();
 715                                         skip_multiline_comment();
 716                                         eat_whitespace();
 717                                         return;
 718                                 case '/':
 719                                         next_char();
 720                                         skip_line_comment();
 721                                         eat_whitespace();
 722                                         return;
 723
 724                                 SKIP_TRIGRAPHS(
 725                                                 put_back('?');
 726                                         ,
 727                                                 c = '/';
 728                                                 return;
 729                                 )
 730
 731                                 case '\\':
 732                                         next_char();
 733                                         EAT_NEWLINE(break;)
 734                                         /* fallthrough */
 735                                 default:
 736                                         return;
 737                                 }
 738                         }
 739                         break;
 740
 741                 default:
 742                         return;
 743                 }
 744         }
 745 }
 746
 747 static
 748 void lexer_next_preprocessing_token(token_t *token)
 749 {
 750         while(1) {
 751                 switch(c) {
 752                 case ' ':
 753                 case '\t':
 754                         next_char();
 755                         break;
 756
 757                 MATCH_NEWLINE(
 758                         eat_whitespace();
 759                         if(c == '#') {
 760                                 next_char();
 761                                 parse_preprocessor_directive(token);
 762                                 return;
 763                         }
 764                         token->type = '\n';
 765                         return;
 766                 )
 767
 768                 case 'A' ... 'Z':
 769                 case 'a' ... 'z':
 770                 case '_':
 771                         parse_symbol(token);
 772                         return;
 773
 774                 case '0' ... '9':
 775                         parse_number(token);
 776                         return;
 777
 778                 case '"':
 779                         parse_string_literal(token);
 780                         return;
 781
 782                 case '\'':
 783                         parse_character_constant(token);
 784                         return;
 785
 786                 case '\\':
 787                         next_char();
 788                         if(c == '\n') {
 789                                 next_char();
 790                                 source_position.linenr++;
 791                                 break;
 792                         } else {
 793                                 parse_error("unexpected '\\' found");
 794                                 token->type = T_ERROR;
 795                         }
 796                         return;
 797
 798                 case '.':
 799                         MAYBE_PROLOG
 800                                 case '.':
 801                                         MAYBE_PROLOG
 802                                         MAYBE('.', T_DOTDOTDOT)
 803                                         ELSE_CODE(
 804                                                 put_back(c);
 805                                                 c = '.';
 806                                                 token->type = '.';
 807                                                 return;
 808                                         )
 809                         ELSE('.')
 810                 case '&':
 811                         MAYBE_PROLOG
 812                         MAYBE('&', T_ANDAND)
 813                         MAYBE('=', T_ANDEQUAL)
 814                         ELSE('&')
 815                 case '*':
 816                         MAYBE_PROLOG
 817                         MAYBE('=', T_ASTERISKEQUAL)
 818                         ELSE('*')
 819                 case '+':
 820                         MAYBE_PROLOG
 821                         MAYBE('+', T_PLUSPLUS)
 822                         MAYBE('=', T_PLUSEQUAL)
 823                         ELSE('+')
 824                 case '-':
 825                         MAYBE_PROLOG
 826                         MAYBE('-', T_MINUSMINUS)
 827                         MAYBE('=', T_MINUSEQUAL)
 828                         ELSE('-')
 829                 case '!':
 830                         MAYBE_PROLOG
 831                         MAYBE('=', T_EXCLAMATIONMARKEQUAL)
 832                         ELSE('!')
 833                 case '/':
 834                         MAYBE_PROLOG
 835                         MAYBE('=', T_SLASHEQUAL)
 836                                 case '*':
 837                                         next_char();
 838                                         skip_multiline_comment();
 839                                         lexer_next_preprocessing_token(token);
 840                                         return;
 841                                 case '/':
 842                                         next_char();
 843                                         skip_line_comment();
 844                                         lexer_next_preprocessing_token(token);
 845                                         return;
 846                         ELSE('/')
 847                 case '%':
 848                         MAYBE_PROLOG
 849                         MAYBE('>', T_PERCENTGREATER)
 850                         MAYBE('=', T_PERCENTEQUAL)
 851                                 case ':':
 852                                         MAYBE_PROLOG
 853                                                 case '%':
 854                                                         MAYBE_PROLOG
 855                                                         MAYBE(':', T_PERCENTCOLONPERCENTCOLON)
 856                                                         ELSE_CODE(
 857                                                                 put_back(c);
 858                                                                 c = '%';
 859                                                                 token->type = T_PERCENTCOLON;
 860                                                                 return;
 861                                                         )
 862                                         ELSE(T_PERCENTCOLON)
 863                         ELSE('%')
 864                 case '<':
 865                         MAYBE_PROLOG
 866                         MAYBE(':', T_LESSCOLON)
 867                         MAYBE('%', T_LESSPERCENT)
 868                                 case '<':
 869                                         MAYBE_PROLOG
 870                                         MAYBE('=', T_LESSLESSEQUAL)
 871                                         ELSE(T_LESSLESS)
 872                         ELSE('<')
 873                 case '>':
 874                         MAYBE_PROLOG
 875                                 case '>':
 876                                         MAYBE_PROLOG
 877                                         MAYBE('=', T_GREATERGREATEREQUAL)
 878                                         ELSE(T_GREATERGREATER)
 879                         ELSE('>')
 880                 case '^':
 881                         MAYBE_PROLOG
 882                         MAYBE('=', T_CARETEQUAL)
 883                         ELSE('^')
 884                 case '|':
 885                         MAYBE_PROLOG
 886                         MAYBE('=', T_PIPEEQUAL)
 887                         MAYBE('|', T_PIPEPIPE)
 888                         ELSE('|')
 889                 case ':':
 890                         MAYBE_PROLOG
 891                         MAYBE('>', T_COLONGREATER)
 892                         ELSE(':')
 893                 case '=':
 894                         MAYBE_PROLOG
 895                         MAYBE('=', T_EQUALEQUAL)
 896                         ELSE('=')
 897                 case '#':
 898                         MAYBE_PROLOG
 899                         MAYBE('#', T_HASHHASH)
 900                         ELSE('#')
 901
 902                 case '?':
 903                         next_char();
 904                         /* just a simple ? */
 905                         if(c != '?') {
 906                                 token->type = '?';
 907                                 return;
 908                         }
 909                         /* might be a trigraph */
 910                         next_char();
 911                         if(replace_trigraph()) {
 912                                 break;
 913                         }
 914                         put_back(c);
 915                         c = '?';
 916                         token->type = '?';
 917                         return;
 918
 919                 case '[':
 920                 case ']':
 921                 case '(':
 922                 case ')':
 923                 case '{':
 924                 case '}':
 925                 case '~':
 926                 case ';':
 927                 case ',':
 928                         token->type = c;
 929                         next_char();
 930                         return;
 931
 932                 case EOF:
 933                         token->type = T_EOF;
 934                         return;
 935
 936                 default:
 937                         next_char();
 938                         error_prefix();
 939                         fprintf(stderr, "unknown character '%c' found\n", c);
 940                         token->type = T_ERROR;
 941                         return;
 942                 }
 943         }
 944 }
 945
 946 void lexer_next_token(token_t *token)
 947 {
 948         do {
 949                 lexer_next_preprocessing_token(token);
 950         } while(token->type == '\n');
 951 }
 952
 953 void init_lexer(void)
 954 {
 955         strset_init(&stringset);
 956 }
 957
 958 void lexer_open_stream(FILE *stream, const char *input_name)
 959 {
 960         input                      = stream;
 961         source_position.linenr     = 0;
 962         source_position.input_name = input_name;
 963
 964         /* we place a virtual '\n' at the beginning so the lexer knows we're at the
 965          * beginning of a line */
 966         c = '\n';
 967 }
 968
 969 void exit_lexer(void)
 970 {
 971         strset_destroy(&stringset);
 972 }
 973
 974 static __attribute__((unused))
 975 void dbg_pos(const source_position_t source_position)
 976 {
 977         fprintf(stdout, "%s:%d\n", source_position.input_name, source_position.linenr);
 978         fflush(stdout);
 979 }