nsz Git - cparser/blob - lexer.c

   1 #include <config.h>
   2
   3 #include "lexer_t.h"
   4 #include "token_t.h"
   5 #include "symbol_table_t.h"
   6 #include "adt/error.h"
   7
   8 #include <assert.h>
   9 #include <errno.h>
  10 #include <string.h>
  11 #include <ctype.h>
  12
  13 //#define DEBUG_CHARS
  14 #define MAX_PUTBACK 3
  15
  16 static int               c;
  17 source_position_t source_position;
  18 static FILE             *input;
  19 static char              buf[1027];
  20 static const char       *bufend;
  21 static const char       *bufpos;
  22 static strset_t          stringset;
  23 //static FILE            **input_stack;
  24 //static char            **buf_stack;
  25
  26 static
  27 void error_prefix_at(const char *input_name, unsigned linenr)
  28 {
  29         fprintf(stderr, "%s:%d: Error: ", input_name, linenr);
  30 }
  31
  32 static
  33 void error_prefix()
  34 {
  35         error_prefix_at(source_position.input_name, source_position.linenr);
  36 }
  37
  38 static
  39 void parse_error(const char *msg)
  40 {
  41         error_prefix();
  42         fprintf(stderr, "%s\n", msg);
  43 }
  44
  45 static inline
  46 void next_char()
  47 {
  48         bufpos++;
  49         if(bufpos >= bufend) {
  50                 size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
  51                                  input);
  52                 if(s == 0) {
  53                         c = EOF;
  54                         return;
  55                 }
  56                 bufpos = buf + MAX_PUTBACK;
  57                 bufend = buf + MAX_PUTBACK + s;
  58         }
  59         c = *(bufpos);
  60 #ifdef DEBUG_CHARS
  61         printf("nchar '%c'\n", c);
  62 #endif
  63 }
  64
  65 static inline
  66 void put_back(int pc)
  67 {
  68         char *p = (char*) bufpos - 1;
  69         bufpos--;
  70         assert(p >= buf);
  71         *p = pc;
  72
  73 #ifdef DEBUG_CHARS
  74         printf("putback '%c'\n", pc);
  75 #endif
  76 }
  77
  78
  79 static
  80 int replace_trigraph(void)
  81 {
  82 #define MATCH_TRIGRAPH(ch,replacement)           \
  83         case ch:                                     \
  84                 c = replacement;                         \
  85                 return 1;
  86
  87         switch(c) {
  88         MATCH_TRIGRAPH('=', '#')
  89         MATCH_TRIGRAPH('(', '[')
  90         MATCH_TRIGRAPH('/', '\\')
  91         MATCH_TRIGRAPH(')', ']')
  92         MATCH_TRIGRAPH('\'', '^')
  93         MATCH_TRIGRAPH('<', '{')
  94         MATCH_TRIGRAPH('!', '|')
  95         MATCH_TRIGRAPH('>', '}')
  96         MATCH_TRIGRAPH('-', '~')
  97         default:
  98                 break;
  99         }
 100
 101         return 0;
 102 }
 103
 104 #define SKIP_TRIGRAPHS(custom_putback, no_trigraph_code) \
 105         case '?':                                  \
 106                 next_char();                           \
 107                 if(c != '?') {                         \
 108                         custom_putback;                    \
 109                         put_back(c);                       \
 110                         c = '?';                           \
 111                         no_trigraph_code;                  \
 112                 }                                      \
 113                 next_char();                           \
 114                 if(replace_trigraph()) {               \
 115                         break;                             \
 116                 }                                      \
 117                 custom_putback;                        \
 118                 put_back('?');                         \
 119                 put_back(c);                           \
 120                 c = '?';                               \
 121                 no_trigraph_code;
 122
 123 #define EAT_NEWLINE(newline_code)              \
 124         if(c == '\r') {                            \
 125                 next_char();                           \
 126                 if(c == '\n')                          \
 127                         next_char();                       \
 128                 source_position.linenr++;              \
 129                 newline_code;                          \
 130         } else if(c == '\n') {                     \
 131                 next_char();                           \
 132                 source_position.linenr++;              \
 133                 newline_code;                          \
 134         }
 135
 136 static
 137 void parse_symbol(token_t *token)
 138 {
 139         symbol_t *symbol;
 140         char     *string;
 141
 142         obstack_1grow(&symbol_obstack, c);
 143         next_char();
 144
 145         while(1) {
 146                 switch(c) {
 147                 case '\\':
 148                         next_char();
 149                         EAT_NEWLINE(break;)
 150                         goto end_symbol;
 151
 152                 case 'A' ... 'Z':
 153                 case 'a' ... 'z':
 154                 case '_':
 155                         obstack_1grow(&symbol_obstack, c);
 156                         next_char();
 157                         break;
 158
 159                 case '?':
 160                         next_char();
 161                         if(c != '?') {
 162                                 put_back(c);
 163                                 c = '?';
 164                                 goto end_symbol;
 165                         }
 166                         next_char();
 167                         if(replace_trigraph())
 168                                 break;
 169                         put_back('?');
 170                         put_back(c);
 171                         c = '?';
 172                         goto end_symbol;
 173
 174                 default:
 175                         goto end_symbol;
 176                 }
 177         }
 178 end_symbol:
 179         obstack_1grow(&symbol_obstack, '\0');
 180
 181         string = obstack_finish(&symbol_obstack);
 182         symbol = symbol_table_insert(string);
 183
 184 #if 0
 185         if(symbol->ID > 0) {
 186                 token->type = symbol->ID;
 187         } else {
 188                 token->type = T_IDENTIFIER;
 189         }
 190 #endif
 191         token->type     = T_IDENTIFIER;
 192         token->v.symbol = symbol;
 193
 194         if(symbol->string != string) {
 195                 obstack_free(&symbol_obstack, string);
 196         }
 197 }
 198
 199 static
 200 void parse_number_hex(token_t *token)
 201 {
 202         assert(c == 'x' || c == 'X');
 203         next_char();
 204
 205         if (!isdigit(c) &&
 206                 !('A' <= c && c <= 'F') &&
 207                 !('a' <= c && c <= 'f')) {
 208                 parse_error("premature end of hex number literal");
 209                 token->type = T_ERROR;
 210                 return;
 211         }
 212
 213         int value = 0;
 214         for(;;) {
 215                 if (isdigit(c)) {
 216                         value = 16 * value + c - '0';
 217                 } else if ('A' <= c && c <= 'F') {
 218                         value = 16 * value + c - 'A' + 10;
 219                 } else if ('a' <= c && c <= 'f') {
 220                         value = 16 * value + c - 'a' + 10;
 221                 } else {
 222                         token->type     = T_INTEGER;
 223                         token->v.intvalue = value;
 224                         return;
 225                 }
 226                 next_char();
 227         }
 228 }
 229
 230 static
 231 void parse_number_oct(token_t *token)
 232 {
 233         assert(c == 'o' || c == 'O');
 234         next_char();
 235
 236         int value = 0;
 237         for(;;) {
 238                 if ('0' <= c && c <= '7') {
 239                         value = 8 * value + c - '0';
 240                 } else {
 241                         token->type     = T_INTEGER;
 242                         token->v.intvalue = value;
 243                         return;
 244                 }
 245                 next_char();
 246         }
 247 }
 248
 249 static
 250 void parse_number_dec(token_t *token, int first_char)
 251 {
 252         int value = 0;
 253         if(first_char > 0) {
 254                 assert(first_char >= '0' && first_char <= '9');
 255                 value = first_char - '0';
 256         }
 257
 258         for(;;) {
 259                 if (isdigit(c)) {
 260                         value = 10 * value + c - '0';
 261                 } else {
 262                         token->type     = T_INTEGER;
 263                         token->v.intvalue = value;
 264                         return;
 265                 }
 266                 next_char();
 267         }
 268 }
 269
 270 static
 271 void parse_number(token_t *token)
 272 {
 273         // TODO check for overflow
 274         // TODO check for various invalid inputs sequences
 275
 276         if (c == '0') {
 277                 next_char();
 278                 switch (c) {
 279                         case 'X':
 280                         case 'x': parse_number_hex(token); break;
 281                         case 'o':
 282                         case 'O': parse_number_oct(token); break;
 283                         default:  parse_number_dec(token, '0');
 284                 }
 285         } else {
 286                 parse_number_dec(token, 0);
 287         }
 288 }
 289
 290 static
 291 int parse_escape_sequence()
 292 {
 293         while(1) {
 294                 int ec = c;
 295                 next_char();
 296
 297                 switch(ec) {
 298                 case '"': return '"';
 299                 case '\'': return'\'';
 300                 case '\\':
 301                         EAT_NEWLINE(break;)
 302                         return '\\';
 303                 case 'a': return '\a';
 304                 case 'b': return '\b';
 305                 case 'f': return '\f';
 306                 case 'n': return '\n';
 307                 case 'r': return '\r';
 308                 case 't': return '\t';
 309                 case 'v': return '\v';
 310                 case 'x': /* TODO parse hex number ... */
 311                         parse_error("hex escape sequences not implemented yet");
 312                         return EOF;
 313                 case 0 ... 8: /* TODO parse octal number ... */
 314                         parse_error("octal escape sequences not implemented yet");
 315                         return EOF;
 316                 case '?':
 317                         if(c != '?') {
 318                                 return '?';
 319                         }
 320                         /* might be a trigraph */
 321                         next_char();
 322                         if(replace_trigraph()) {
 323                                 break;
 324                         }
 325                         put_back(c);
 326                         c = '?';
 327                         return '?';
 328
 329                 case EOF:
 330                         parse_error("reached end of file while parsing escape sequence");
 331                         return EOF;
 332                 default:
 333                         parse_error("unknown escape sequence");
 334                         return EOF;
 335                 }
 336         }
 337 }
 338
 339 static
 340 void parse_string_literal(token_t *token)
 341 {
 342         unsigned    start_linenr = source_position.linenr;
 343         char       *string;
 344         const char *result;
 345
 346         assert(c == '"');
 347         next_char();
 348
 349         while(1) {
 350                 switch(c) {
 351                 SKIP_TRIGRAPHS(,
 352                         obstack_1grow(&symbol_obstack, '?');
 353                         next_char();
 354                         break;
 355                 )
 356
 357                 case '\\':
 358                         next_char();
 359                         EAT_NEWLINE(break;)
 360                         int ec = parse_escape_sequence();
 361                         obstack_1grow(&symbol_obstack, ec);
 362                         break;
 363
 364                 case EOF:
 365                         error_prefix_at(source_position.input_name, start_linenr);
 366                         fprintf(stderr, "string has no end\n");
 367                         token->type = T_ERROR;
 368                         return;
 369
 370                 case '"':
 371                         next_char();
 372                         goto end_of_string;
 373
 374                 default:
 375                         obstack_1grow(&symbol_obstack, c);
 376                         next_char();
 377                         break;
 378                 }
 379         }
 380
 381 end_of_string:
 382
 383         /* TODO: concatenate multiple strings separated by whitespace... */
 384
 385         /* add finishing 0 to the string */
 386         obstack_1grow(&symbol_obstack, '\0');
 387         string = obstack_finish(&symbol_obstack);
 388
 389         /* check if there is already a copy of the string */
 390         result = strset_insert(&stringset, string);
 391         if(result != string) {
 392                 obstack_free(&symbol_obstack, string);
 393         }
 394
 395         token->type     = T_STRING_LITERAL;
 396         token->v.string = result;
 397 }
 398
 399 #define MATCH_NEWLINE(code)                 \
 400         case '\r':                              \
 401                 next_char();                        \
 402                 if(c == '\n') {                     \
 403                         next_char();                    \
 404                 }                                   \
 405                 source_position.linenr++;           \
 406                 code;                               \
 407         case '\n':                              \
 408                 next_char();                        \
 409                 source_position.linenr++;           \
 410                 code;
 411
 412 static
 413 void parse_character_constant(token_t *token)
 414 {
 415         assert(c == '\'');
 416         next_char();
 417
 418         int found_char = 0;
 419         while(1) {
 420                 switch(c) {
 421                 SKIP_TRIGRAPHS(,
 422                         found_char = '?';
 423                         break;
 424                 )
 425
 426                 case '\\':
 427                         next_char();
 428                         EAT_NEWLINE(break;)
 429                         found_char = '\\';
 430                         break;
 431
 432                 MATCH_NEWLINE(
 433                         parse_error("newline while parsing character constant");
 434                         break;
 435                 )
 436
 437                 case '\'':
 438                         next_char();
 439                         goto end_of_char_constant;
 440
 441                 case EOF:
 442                         parse_error("EOF while parsing character constant");
 443                         token->type = T_ERROR;
 444                         return;
 445
 446                 default:
 447                         if(found_char != 0) {
 448                                 parse_error("more than 1 characters in character "
 449                                             "constant");
 450                                 goto end_of_char_constant;
 451                         } else {
 452                                 found_char = c;
 453                                 next_char();
 454                         }
 455                         break;
 456                 }
 457         }
 458
 459 end_of_char_constant:
 460         token->type       = T_INTEGER;
 461         token->v.intvalue = found_char;
 462 }
 463
 464 static
 465 void skip_multiline_comment(void)
 466 {
 467         unsigned start_linenr = source_position.linenr;
 468         int had_star = 0;
 469
 470         while(1) {
 471                 switch(c) {
 472                 case '*':
 473                         next_char();
 474                         had_star = 1;
 475                         break;
 476
 477                 case '/':
 478                         next_char();
 479                         if(had_star) {
 480                                 return;
 481                         }
 482                         had_star = 0;
 483                         break;
 484
 485                 case '\\':
 486                         next_char();
 487                         EAT_NEWLINE(break;)
 488                         had_star = 0;
 489                         break;
 490
 491                 case '?':
 492                         next_char();
 493                         if(c != '?') {
 494                                 had_star = 0;
 495                                 break;
 496                         }
 497                         next_char();
 498                         if(replace_trigraph())
 499                                 break;
 500                         put_back(c);
 501                         c = '?';
 502                         had_star = 0;
 503                         /* we don't put back the 2nd ? as the comment text is discarded
 504                          * anyway */
 505                         break;
 506
 507                 MATCH_NEWLINE(had_star = 0; break;)
 508
 509                 case EOF:
 510                         error_prefix_at(source_position.input_name, start_linenr);
 511                         fprintf(stderr, "at end of file while looking for comment end\n");
 512                         return;
 513                 default:
 514                         had_star = 0;
 515                         next_char();
 516                         break;
 517                 }
 518         }
 519 }
 520
 521 static
 522 void skip_line_comment(void)
 523 {
 524         while(1) {
 525                 switch(c) {
 526                 case '?':
 527                         next_char();
 528                         if(c != '?')
 529                                 break;
 530                         next_char();
 531                         if(replace_trigraph())
 532                                 break;
 533                         put_back('?');
 534                         /* we don't put back the 2nd ? as the comment text is discarded
 535                          * anyway */
 536                         break;
 537
 538                 case '\\':
 539                         next_char();
 540                         if(c == '\n') {
 541                                 next_char();
 542                                 source_position.linenr++;
 543                         }
 544                         break;
 545
 546                 case EOF:
 547                 case '\r':
 548                 case '\n':
 549                         return;
 550
 551                 default:
 552                         next_char();
 553                         break;
 554                 }
 555         }
 556 }
 557
 558 static
 559 void lexer_next_preprocessing_token(token_t *token);
 560
 561 static
 562 void eat_until_newline(void)
 563 {
 564         /* TODO */
 565 }
 566
 567 static
 568 void error_directive(void)
 569 {
 570         error_prefix();
 571         fprintf(stderr, "#error directive: \n");
 572
 573         /* parse pp-tokens until new-line */
 574 }
 575
 576 static
 577 void define_directive(void)
 578 {
 579         token_t temptoken;
 580
 581         lexer_next_preprocessing_token(&temptoken);
 582         if(temptoken.type != T_IDENTIFIER) {
 583                 parse_error("expected identifier after #define\n");
 584                 eat_until_newline();
 585         }
 586 }
 587
 588 static
 589 void ifdef_directive(int is_ifndef)
 590 {
 591         (void) is_ifndef;
 592         token_t temptoken;
 593         lexer_next_preprocessing_token(&temptoken);
 594         //expect_identifier();
 595         //extect_newline();
 596 }
 597
 598 static
 599 void endif_directive(void)
 600 {
 601         //expect_newline();
 602 }
 603
 604 static
 605 void found_preprocessor_identifier(symbol_t *symbol)
 606 {
 607         switch(symbol->pp_ID) {
 608         case TP_include:
 609                 printf("include - enable header name parsing!\n");
 610                 break;
 611         case TP_define:
 612                 define_directive();
 613                 break;
 614         case TP_ifdef:
 615                 ifdef_directive(0);
 616                 break;
 617         case TP_ifndef:
 618                 ifdef_directive(1);
 619                 break;
 620         case TP_endif:
 621                 endif_directive();
 622                 break;
 623         case TP_if:
 624         case TP_else:
 625         case TP_elif:
 626         case TP_undef:
 627         case TP_line:
 628         case TP_error:
 629                 error_directive();
 630                 break;
 631         case TP_pragma:
 632                 break;
 633         }
 634 }
 635
 636 static
 637 void parse_preprocessor_directive(token_t *result_token)
 638 {
 639         token_t temptoken;
 640
 641         (void) result_token;
 642         lexer_next_preprocessing_token(&temptoken);
 643         switch(temptoken.type) {
 644         case T_IDENTIFIER:
 645                 found_preprocessor_identifier(temptoken.v.symbol);
 646                 break;
 647         }
 648 }
 649
 650 #define MAYBE_PROLOG                                       \
 651                         next_char();                                   \
 652                         while(1) {                                     \
 653                                 switch(c) {
 654
 655 #define MAYBE(ch, set_type)                                \
 656                                 case ch:                                   \
 657                                         next_char();                           \
 658                                         token->type = set_type;                \
 659                                         return;
 660
 661 #define ELSE_CODE(code)                                    \
 662                                 SKIP_TRIGRAPHS(,                           \
 663                                         code;                                  \
 664                                 )                                          \
 665                                                                                                                    \
 666                                 case '\\':                                 \
 667                                         next_char();                           \
 668                                         EAT_NEWLINE(break;)                    \
 669                                         /* fallthrough */                      \
 670                                 default:                                   \
 671                                         code;                                  \
 672                                 }                                          \
 673                         } /* end of while(1) */                        \
 674                         break;
 675
 676 #define ELSE(set_type)                                     \
 677                 ELSE_CODE(                                         \
 678                         token->type = set_type;                        \
 679                         return;                                        \
 680                 )
 681
 682 static
 683 void eat_whitespace()
 684 {
 685         while(1) {
 686                 switch(c) {
 687                 case ' ':
 688                 case '\t':
 689                         next_char();
 690                         break;
 691
 692                 case '\r':
 693                 case '\n':
 694                         return;
 695
 696                 case '\\':
 697                         next_char();
 698                         if(c == '\n') {
 699                                 next_char();
 700                                 source_position.linenr++;
 701                                 break;
 702                         }
 703
 704                         put_back(c);
 705                         c = '\\';
 706                         return;
 707
 708                 SKIP_TRIGRAPHS(,
 709                         return;
 710                 )
 711
 712                 case '/':
 713                         next_char();
 714                         while(1) {
 715                                 switch(c) {
 716                                 case '*':
 717                                         next_char();
 718                                         skip_multiline_comment();
 719                                         eat_whitespace();
 720                                         return;
 721                                 case '/':
 722                                         next_char();
 723                                         skip_line_comment();
 724                                         eat_whitespace();
 725                                         return;
 726
 727                                 SKIP_TRIGRAPHS(
 728                                                 put_back('?');
 729                                         ,
 730                                                 c = '/';
 731                                                 return;
 732                                 )
 733
 734                                 case '\\':
 735                                         next_char();
 736                                         EAT_NEWLINE(break;)
 737                                         /* fallthrough */
 738                                 default:
 739                                         return;
 740                                 }
 741                         }
 742                         break;
 743
 744                 default:
 745                         return;
 746                 }
 747         }
 748 }
 749
 750 static
 751 void lexer_next_preprocessing_token(token_t *token)
 752 {
 753         while(1) {
 754                 switch(c) {
 755                 case ' ':
 756                 case '\t':
 757                         next_char();
 758                         break;
 759
 760                 MATCH_NEWLINE(
 761                         eat_whitespace();
 762                         if(c == '#') {
 763                                 next_char();
 764                                 parse_preprocessor_directive(token);
 765                                 return;
 766                         }
 767                         token->type = '\n';
 768                         return;
 769                 )
 770
 771                 case 'A' ... 'Z':
 772                 case 'a' ... 'z':
 773                 case '_':
 774                         parse_symbol(token);
 775                         return;
 776
 777                 case '0' ... '9':
 778                         parse_number(token);
 779                         return;
 780
 781                 case '"':
 782                         parse_string_literal(token);
 783                         return;
 784
 785                 case '\'':
 786                         parse_character_constant(token);
 787                         return;
 788
 789                 case '\\':
 790                         next_char();
 791                         if(c == '\n') {
 792                                 next_char();
 793                                 source_position.linenr++;
 794                                 break;
 795                         } else {
 796                                 parse_error("unexpected '\\' found");
 797                                 token->type = T_ERROR;
 798                         }
 799                         return;
 800
 801                 case '.':
 802                         MAYBE_PROLOG
 803                                 case '.':
 804                                         MAYBE_PROLOG
 805                                         MAYBE('.', T_DOTDOTDOT)
 806                                         ELSE_CODE(
 807                                                 put_back(c);
 808                                                 c = '.';
 809                                                 token->type = '.';
 810                                                 return;
 811                                         )
 812                         ELSE('.')
 813                 case '&':
 814                         MAYBE_PROLOG
 815                         MAYBE('&', T_ANDAND)
 816                         MAYBE('=', T_ANDEQUAL)
 817                         ELSE('&')
 818                 case '*':
 819                         MAYBE_PROLOG
 820                         MAYBE('=', T_ASTERISKEQUAL)
 821                         ELSE('*')
 822                 case '+':
 823                         MAYBE_PROLOG
 824                         MAYBE('+', T_PLUSPLUS)
 825                         MAYBE('=', T_PLUSEQUAL)
 826                         ELSE('+')
 827                 case '-':
 828                         MAYBE_PROLOG
 829                         MAYBE('-', T_MINUSMINUS)
 830                         MAYBE('=', T_MINUSEQUAL)
 831                         ELSE('-')
 832                 case '!':
 833                         MAYBE_PROLOG
 834                         MAYBE('=', T_EXCLAMATIONMARKEQUAL)
 835                         ELSE('!')
 836                 case '/':
 837                         MAYBE_PROLOG
 838                         MAYBE('=', T_SLASHEQUAL)
 839                                 case '*':
 840                                         next_char();
 841                                         skip_multiline_comment();
 842                                         lexer_next_preprocessing_token(token);
 843                                         return;
 844                                 case '/':
 845                                         next_char();
 846                                         skip_line_comment();
 847                                         lexer_next_preprocessing_token(token);
 848                                         return;
 849                         ELSE('/')
 850                 case '%':
 851                         MAYBE_PROLOG
 852                         MAYBE('>', T_PERCENTGREATER)
 853                         MAYBE('=', T_PERCENTEQUAL)
 854                                 case ':':
 855                                         MAYBE_PROLOG
 856                                                 case '%':
 857                                                         MAYBE_PROLOG
 858                                                         MAYBE(':', T_PERCENTCOLONPERCENTCOLON)
 859                                                         ELSE_CODE(
 860                                                                 put_back(c);
 861                                                                 c = '%';
 862                                                                 token->type = T_PERCENTCOLON;
 863                                                                 return;
 864                                                         )
 865                                         ELSE(T_PERCENTCOLON)
 866                         ELSE('%')
 867                 case '<':
 868                         MAYBE_PROLOG
 869                         MAYBE(':', T_LESSCOLON)
 870                         MAYBE('%', T_LESSPERCENT)
 871                                 case '<':
 872                                         MAYBE_PROLOG
 873                                         MAYBE('=', T_LESSLESSEQUAL)
 874                                         ELSE(T_LESSLESS)
 875                         ELSE('<')
 876                 case '>':
 877                         MAYBE_PROLOG
 878                                 case '>':
 879                                         MAYBE_PROLOG
 880                                         MAYBE('=', T_GREATERGREATEREQUAL)
 881                                         ELSE(T_GREATERGREATER)
 882                         ELSE('>')
 883                 case '^':
 884                         MAYBE_PROLOG
 885                         MAYBE('=', T_CARETEQUAL)
 886                         ELSE('^')
 887                 case '|':
 888                         MAYBE_PROLOG
 889                         MAYBE('=', T_PIPEEQUAL)
 890                         MAYBE('|', T_PIPEPIPE)
 891                         ELSE('|')
 892                 case ':':
 893                         MAYBE_PROLOG
 894                         MAYBE('>', T_COLONGREATER)
 895                         ELSE(':')
 896                 case '=':
 897                         MAYBE_PROLOG
 898                         MAYBE('=', T_EQUALEQUAL)
 899                         ELSE('=')
 900                 case '#':
 901                         MAYBE_PROLOG
 902                         MAYBE('#', T_HASHHASH)
 903                         ELSE('#')
 904
 905                 case '?':
 906                         next_char();
 907                         /* just a simple ? */
 908                         if(c != '?') {
 909                                 token->type = '?';
 910                                 return;
 911                         }
 912                         /* might be a trigraph */
 913                         next_char();
 914                         if(replace_trigraph()) {
 915                                 break;
 916                         }
 917                         put_back(c);
 918                         c = '?';
 919                         token->type = '?';
 920                         return;
 921
 922                 case '[':
 923                 case ']':
 924                 case '(':
 925                 case ')':
 926                 case '{':
 927                 case '}':
 928                 case '~':
 929                 case ';':
 930                 case ',':
 931                         token->type = c;
 932                         next_char();
 933                         return;
 934
 935                 case EOF:
 936                         token->type = T_EOF;
 937                         return;
 938
 939                 default:
 940                         next_char();
 941                         error_prefix();
 942                         fprintf(stderr, "unknown character '%c' found\n", c);
 943                         token->type = T_ERROR;
 944                         return;
 945                 }
 946         }
 947 }
 948
 949 void lexer_next_token(token_t *token)
 950 {
 951         do {
 952                 lexer_next_preprocessing_token(token);
 953         } while(token->type == '\n');
 954 }
 955
 956 void init_lexer(void)
 957 {
 958         strset_init(&stringset);
 959 }
 960
 961 void lexer_open_stream(FILE *stream, const char *input_name)
 962 {
 963         input                      = stream;
 964         source_position.linenr     = 0;
 965         source_position.input_name = input_name;
 966
 967         /* we place a virtual '\n' at the beginning so the lexer knows we're at the
 968          * beginning of a line */
 969         c = '\n';
 970 }
 971
 972 void exit_lexer(void)
 973 {
 974         strset_destroy(&stringset);
 975 }
 976
 977 static __attribute__((unused))
 978 void dbg_pos(const source_position_t source_position)
 979 {
 980         fprintf(stdout, "%s:%d\n", source_position.input_name, source_position.linenr);
 981         fflush(stdout);
 982 }