nsz Git - cparser/blob - lexer.c

   1 #include <config.h>
   2
   3 #include "lexer.h"
   4 #include "token_t.h"
   5 #include "symbol_table_t.h"
   6 #include "adt/error.h"
   7 #include "adt/strset.h"
   8 #include "adt/util.h"
   9
  10 #include <assert.h>
  11 #include <errno.h>
  12 #include <string.h>
  13 #include <ctype.h>
  14
  15 //#define DEBUG_CHARS
  16 #define MAX_PUTBACK 3
  17
  18 static int         c;
  19 token_t            lexer_token;
  20 static FILE       *input;
  21 static char        buf[1024 + MAX_PUTBACK];
  22 static const char *bufend;
  23 static const char *bufpos;
  24 static strset_t    stringset;
  25 //static FILE      **input_stack;
  26 //static char      **buf_stack;
  27
  28 static void error_prefix_at(const char *input_name, unsigned linenr)
  29 {
  30         fprintf(stderr, "%s:%u: Error: ", input_name, linenr);
  31 }
  32
  33 static void error_prefix(void)
  34 {
  35         error_prefix_at(lexer_token.source_position.input_name,
  36                         lexer_token.source_position.linenr);
  37 }
  38
  39 static void parse_error(const char *msg)
  40 {
  41         error_prefix();
  42         fprintf(stderr, "%s\n", msg);
  43 }
  44
  45 static inline void next_real_char(void)
  46 {
  47         bufpos++;
  48         if(bufpos >= bufend) {
  49                 size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
  50                                  input);
  51                 if(s == 0) {
  52                         c = EOF;
  53                         return;
  54                 }
  55                 bufpos = buf + MAX_PUTBACK;
  56                 bufend = buf + MAX_PUTBACK + s;
  57         }
  58         c = *(bufpos);
  59 }
  60
  61 static inline void put_back(int pc)
  62 {
  63         char *p = (char*) bufpos - 1;
  64         bufpos--;
  65         assert(p >= buf);
  66         *p = pc;
  67
  68 #ifdef DEBUG_CHARS
  69         printf("putback '%c'\n", pc);
  70 #endif
  71 }
  72
  73 static inline void next_char(void);
  74
  75 #define MATCH_NEWLINE(code)                   \
  76         case '\r':                                \
  77                 next_char();                          \
  78                 if(c == '\n') {                       \
  79                         next_char();                      \
  80                 }                                     \
  81                 lexer_token.source_position.linenr++; \
  82                 code;                                 \
  83         case '\n':                                \
  84                 next_char();                          \
  85                 lexer_token.source_position.linenr++; \
  86                 code;
  87
  88 static void maybe_concat_lines(void)
  89 {
  90         next_char();
  91         switch(c) {
  92         MATCH_NEWLINE(return;)
  93
  94         default:
  95                 break;
  96         }
  97
  98         put_back(c);
  99         c = '\\';
 100 }
 101
 102 static inline void next_char(void)
 103 {
 104         next_real_char();
 105
 106         /* filter trigraphs */
 107         if(UNLIKELY(c == '\\')) {
 108                 maybe_concat_lines();
 109                 goto end_of_next_char;
 110         }
 111
 112         if(LIKELY(c != '?'))
 113                 goto end_of_next_char;
 114
 115         next_real_char();
 116         if(LIKELY(c != '?')) {
 117                 put_back(c);
 118                 c = '?';
 119                 goto end_of_next_char;
 120         }
 121
 122         next_real_char();
 123         switch(c) {
 124         case '=': c = '#'; break;
 125         case '(': c = '['; break;
 126         case '/': c = '\\'; maybe_concat_lines(); break;
 127         case ')': c = ']'; break;
 128         case '\'': c = '^'; break;
 129         case '<': c = '{'; break;
 130         case '!': c = '|'; break;
 131         case '>': c = '}'; break;
 132         case '-': c = '~'; break;
 133         default:
 134                 put_back('?');
 135                 put_back(c);
 136                 c = '?';
 137                 break;
 138         }
 139
 140 end_of_next_char:
 141 #ifdef DEBUG_CHARS
 142         printf("nchar '%c'\n", c);
 143 #else
 144         ;
 145 #endif
 146 }
 147
 148 #define SYMBOL_CHARS  \
 149         case 'a':         \
 150         case 'b':         \
 151         case 'c':         \
 152         case 'd':         \
 153         case 'e':         \
 154         case 'f':         \
 155         case 'g':         \
 156         case 'h':         \
 157         case 'i':         \
 158         case 'j':         \
 159         case 'k':         \
 160         case 'l':         \
 161         case 'm':         \
 162         case 'n':         \
 163         case 'o':         \
 164         case 'p':         \
 165         case 'q':         \
 166         case 'r':         \
 167         case 's':         \
 168         case 't':         \
 169         case 'u':         \
 170         case 'v':         \
 171         case 'w':         \
 172         case 'x':         \
 173         case 'y':         \
 174         case 'z':         \
 175         case 'A':         \
 176         case 'B':         \
 177         case 'C':         \
 178         case 'D':         \
 179         case 'E':         \
 180         case 'F':         \
 181         case 'G':         \
 182         case 'H':         \
 183         case 'I':         \
 184         case 'J':         \
 185         case 'K':         \
 186         case 'L':         \
 187         case 'M':         \
 188         case 'N':         \
 189         case 'O':         \
 190         case 'P':         \
 191         case 'Q':         \
 192         case 'R':         \
 193         case 'S':         \
 194         case 'T':         \
 195         case 'U':         \
 196         case 'V':         \
 197         case 'W':         \
 198         case 'X':         \
 199         case 'Y':         \
 200         case 'Z':         \
 201         case '_':
 202
 203 #define DIGITS        \
 204         case '0':         \
 205         case '1':         \
 206         case '2':         \
 207         case '3':         \
 208         case '4':         \
 209         case '5':         \
 210         case '6':         \
 211         case '7':         \
 212         case '8':         \
 213         case '9':
 214
 215 static void parse_symbol(void)
 216 {
 217         symbol_t *symbol;
 218         char     *string;
 219
 220         obstack_1grow(&symbol_obstack, c);
 221         next_char();
 222
 223         while(1) {
 224                 switch(c) {
 225                 DIGITS
 226                 SYMBOL_CHARS
 227                         obstack_1grow(&symbol_obstack, c);
 228                         next_char();
 229                         break;
 230
 231                 default:
 232                         goto end_symbol;
 233                 }
 234         }
 235
 236 end_symbol:
 237         obstack_1grow(&symbol_obstack, '\0');
 238
 239         string = obstack_finish(&symbol_obstack);
 240         symbol = symbol_table_insert(string);
 241
 242         lexer_token.type     = symbol->ID;
 243         lexer_token.v.symbol = symbol;
 244
 245         if(symbol->string != string) {
 246                 obstack_free(&symbol_obstack, string);
 247         }
 248 }
 249
 250 static void parse_number_hex(void)
 251 {
 252         assert(c == 'x' || c == 'X');
 253         next_char();
 254
 255         if (!isdigit(c) &&
 256                 !('A' <= c && c <= 'F') &&
 257                 !('a' <= c && c <= 'f')) {
 258                 parse_error("premature end of hex number literal");
 259                 lexer_token.type = T_ERROR;
 260                 return;
 261         }
 262
 263         int value = 0;
 264         while(1) {
 265                 if (isdigit(c)) {
 266                         value = 16 * value + c - '0';
 267                 } else if ('A' <= c && c <= 'F') {
 268                         value = 16 * value + c - 'A' + 10;
 269                 } else if ('a' <= c && c <= 'f') {
 270                         value = 16 * value + c - 'a' + 10;
 271                 } else {
 272                         lexer_token.type     = T_INTEGER;
 273                         lexer_token.v.intvalue = value;
 274                         return;
 275                 }
 276                 next_char();
 277         }
 278 }
 279
 280 static void parse_number_oct(void)
 281 {
 282         int value = 0;
 283         while(1) {
 284                 if ('0' <= c && c <= '7') {
 285                         value = 8 * value + c - '0';
 286                 } else if (c == '8' || c == '9') {
 287                         parse_error("invalid octal number");
 288                         lexer_token.type = T_ERROR;
 289                         return;
 290                 } else {
 291                         lexer_token.type       = T_INTEGER;
 292                         lexer_token.v.intvalue = value;
 293                         return;
 294                 }
 295                 next_char();
 296         }
 297 }
 298
 299 static void parse_number_dec(void)
 300 {
 301         int value = 0;
 302
 303         for(;;) {
 304                 if (isdigit(c)) {
 305                         value = 10 * value + c - '0';
 306                 } else {
 307                         lexer_token.type       = T_INTEGER;
 308                         lexer_token.v.intvalue = value;
 309                         return;
 310                 }
 311                 next_char();
 312         }
 313 }
 314
 315 static void parse_integer_suffix(void)
 316 {
 317         if(c == 'U' || c == 'U') {
 318                 /* TODO do something with the suffixes... */
 319                 next_char();
 320                 if(c == 'L' || c == 'l') {
 321                         next_char();
 322                         if(c == 'L' || c == 'l') {
 323                                 next_char();
 324                         }
 325                 }
 326         } else if(c == 'l' || c == 'L') {
 327                 next_char();
 328                 if(c == 'l' || c == 'L') {
 329                         next_char();
 330                         if(c == 'u' || c == 'U') {
 331                                 next_char();
 332                         }
 333                 } else if(c == 'u' || c == 'U') {
 334                         next_char();
 335                 }
 336         }
 337 }
 338
 339 static void parse_number(void)
 340 {
 341         if (c == '0') {
 342                 next_char();
 343                 switch (c) {
 344                         case 'X':
 345                         case 'x': parse_number_hex(); break;
 346                         default:  parse_number_oct(); break;
 347                 }
 348         } else {
 349                 parse_number_dec();
 350         }
 351
 352         parse_integer_suffix();
 353 }
 354
 355 static int parse_octal_sequence(void)
 356 {
 357         int value = 0;
 358         while(1) {
 359                 if(c < '0' || c > '7')
 360                         break;
 361                 value = 8 * value + c - '0';
 362                 next_char();
 363         }
 364
 365         return value;
 366 }
 367
 368 static int parse_hex_sequence(void)
 369 {
 370         int value = 0;
 371         while(1) {
 372                 if (c >= '0' && c <= '9') {
 373                         value = 16 * value + c - '0';
 374                 } else if ('A' <= c && c <= 'F') {
 375                         value = 16 * value + c - 'A' + 10;
 376                 } else if ('a' <= c && c <= 'f') {
 377                         value = 16 * value + c - 'a' + 10;
 378                 } else {
 379                         break;
 380                 }
 381                 next_char();
 382         }
 383
 384         return value;
 385 }
 386
 387 static int parse_escape_sequence(void)
 388 {
 389         while(1) {
 390                 int ec = c;
 391                 next_char();
 392
 393                 switch(ec) {
 394                 case '"':  return '"';
 395                 case '\'': return'\'';
 396                 case '\\': return '\\';
 397                 case '?': return '\?';
 398                 case 'a': return '\a';
 399                 case 'b': return '\b';
 400                 case 'f': return '\f';
 401                 case 'n': return '\n';
 402                 case 'r': return '\r';
 403                 case 't': return '\t';
 404                 case 'v': return '\v';
 405                 case 'x':
 406                         return parse_hex_sequence();
 407                 case '0':
 408                 case '1':
 409                 case '2':
 410                 case '3':
 411                 case '4':
 412                 case '5':
 413                 case '6':
 414                 case '7':
 415                         return parse_octal_sequence();
 416                 case EOF:
 417                         parse_error("reached end of file while parsing escape sequence");
 418                         return EOF;
 419                 default:
 420                         parse_error("unknown escape sequence");
 421                         return EOF;
 422                 }
 423         }
 424 }
 425
 426 const char *concat_strings(const char *s1, const char *s2)
 427 {
 428         size_t  len1   = strlen(s1);
 429         size_t  len2   = strlen(s2);
 430
 431         char   *concat = obstack_alloc(&symbol_obstack, len1 + len2 + 1);
 432         memcpy(concat, s1, len1);
 433         memcpy(concat + len1, s2, len2 + 1);
 434
 435         const char *result = strset_insert(&stringset, concat);
 436         if(result != concat) {
 437                 obstack_free(&symbol_obstack, concat);
 438         }
 439
 440         return result;
 441 }
 442
 443 static void parse_string_literal(void)
 444 {
 445         unsigned    start_linenr = lexer_token.source_position.linenr;
 446         char       *string;
 447         const char *result;
 448
 449         assert(c == '"');
 450         next_char();
 451
 452         while(1) {
 453                 switch(c) {
 454                 case '\\':
 455                         next_char();
 456                         int ec = parse_escape_sequence();
 457                         obstack_1grow(&symbol_obstack, ec);
 458                         break;
 459
 460                 case EOF:
 461                         error_prefix_at(lexer_token.source_position.input_name,
 462                                         start_linenr);
 463                         fprintf(stderr, "string has no end\n");
 464                         lexer_token.type = T_ERROR;
 465                         return;
 466
 467                 case '"':
 468                         next_char();
 469                         goto end_of_string;
 470
 471                 default:
 472                         obstack_1grow(&symbol_obstack, c);
 473                         next_char();
 474                         break;
 475                 }
 476         }
 477
 478 end_of_string:
 479
 480         /* TODO: concatenate multiple strings separated by whitespace... */
 481
 482         /* add finishing 0 to the string */
 483         obstack_1grow(&symbol_obstack, '\0');
 484         string = obstack_finish(&symbol_obstack);
 485
 486         /* check if there is already a copy of the string */
 487         result = strset_insert(&stringset, string);
 488         if(result != string) {
 489                 obstack_free(&symbol_obstack, string);
 490         }
 491
 492         lexer_token.type     = T_STRING_LITERAL;
 493         lexer_token.v.string = result;
 494 }
 495
 496 static void parse_character_constant(void)
 497 {
 498         assert(c == '\'');
 499         next_char();
 500
 501         int found_char = 0;
 502         while(1) {
 503                 switch(c) {
 504                 case '\\':
 505                         next_char();
 506                         found_char = parse_escape_sequence();
 507                         break;
 508
 509                 MATCH_NEWLINE(
 510                         parse_error("newline while parsing character constant");
 511                         break;
 512                 )
 513
 514                 case '\'':
 515                         next_char();
 516                         goto end_of_char_constant;
 517
 518                 case EOF:
 519                         parse_error("EOF while parsing character constant");
 520                         lexer_token.type = T_ERROR;
 521                         return;
 522
 523                 default:
 524                         if(found_char != 0) {
 525                                 parse_error("more than 1 characters in character "
 526                                             "constant");
 527                                 goto end_of_char_constant;
 528                         } else {
 529                                 found_char = c;
 530                                 next_char();
 531                         }
 532                         break;
 533                 }
 534         }
 535
 536 end_of_char_constant:
 537         lexer_token.type       = T_INTEGER;
 538         lexer_token.v.intvalue = found_char;
 539 }
 540
 541 static void skip_multiline_comment(void)
 542 {
 543         unsigned start_linenr = lexer_token.source_position.linenr;
 544
 545         while(1) {
 546                 switch(c) {
 547                 case '*':
 548                         next_char();
 549                         if(c == '/') {
 550                                 next_char();
 551                                 return;
 552                         }
 553                         break;
 554
 555                 MATCH_NEWLINE(break;)
 556
 557                 case EOF:
 558                         error_prefix_at(lexer_token.source_position.input_name,
 559                                         start_linenr);
 560                         fprintf(stderr, "at end of file while looking for comment end\n");
 561                         return;
 562
 563                 default:
 564                         next_char();
 565                         break;
 566                 }
 567         }
 568 }
 569
 570 static void skip_line_comment(void)
 571 {
 572         while(1) {
 573                 switch(c) {
 574                 case EOF:
 575                         return;
 576
 577                 case '\n':
 578                 case '\r':
 579                         return;
 580
 581                 default:
 582                         next_char();
 583                         break;
 584                 }
 585         }
 586 }
 587
 588 static token_t pp_token;
 589
 590 static inline void next_pp_token(void)
 591 {
 592         lexer_next_preprocessing_token();
 593         pp_token = lexer_token;
 594 }
 595
 596 static void eat_until_newline(void)
 597 {
 598         while(pp_token.type != '\n' && pp_token.type != T_EOF) {
 599                 next_pp_token();
 600         }
 601 }
 602
 603 static void error_directive(void)
 604 {
 605         error_prefix();
 606         fprintf(stderr, "#error directive: \n");
 607
 608         /* parse pp-tokens until new-line */
 609 }
 610
 611 static void define_directive(void)
 612 {
 613         lexer_next_preprocessing_token();
 614         if(lexer_token.type != T_IDENTIFIER) {
 615                 parse_error("expected identifier after #define\n");
 616                 eat_until_newline();
 617         }
 618 }
 619
 620 static void ifdef_directive(int is_ifndef)
 621 {
 622         (void) is_ifndef;
 623         lexer_next_preprocessing_token();
 624         //expect_identifier();
 625         //extect_newline();
 626 }
 627
 628 static void endif_directive(void)
 629 {
 630         //expect_newline();
 631 }
 632
 633 static void parse_line_directive(void)
 634 {
 635         if(pp_token.type != T_INTEGER) {
 636                 parse_error("expected integer");
 637         } else {
 638                 lexer_token.source_position.linenr = pp_token.v.intvalue - 1;
 639                 next_pp_token();
 640         }
 641         if(pp_token.type == T_STRING_LITERAL) {
 642                 lexer_token.source_position.input_name = pp_token.v.string;
 643                 next_pp_token();
 644         }
 645
 646         eat_until_newline();
 647 }
 648
 649 static void parse_preprocessor_identifier(void)
 650 {
 651         assert(pp_token.type == T_IDENTIFIER);
 652         symbol_t *symbol = pp_token.v.symbol;
 653
 654         switch(symbol->pp_ID) {
 655         case TP_include:
 656                 printf("include - enable header name parsing!\n");
 657                 break;
 658         case TP_define:
 659                 define_directive();
 660                 break;
 661         case TP_ifdef:
 662                 ifdef_directive(0);
 663                 break;
 664         case TP_ifndef:
 665                 ifdef_directive(1);
 666                 break;
 667         case TP_endif:
 668                 endif_directive();
 669                 break;
 670         case TP_line:
 671                 next_pp_token();
 672                 parse_line_directive();
 673                 break;
 674         case TP_if:
 675         case TP_else:
 676         case TP_elif:
 677         case TP_undef:
 678         case TP_error:
 679                 error_directive();
 680                 break;
 681         case TP_pragma:
 682                 break;
 683         }
 684 }
 685
 686 static void parse_preprocessor_directive()
 687 {
 688         next_pp_token();
 689
 690         switch(pp_token.type) {
 691         case T_IDENTIFIER:
 692                 parse_preprocessor_identifier();
 693                 break;
 694         case T_INTEGER:
 695                 parse_line_directive();
 696                 break;
 697         default:
 698                 parse_error("invalid preprocessor directive");
 699                 eat_until_newline();
 700                 break;
 701         }
 702 }
 703
 704 #define MAYBE_PROLOG                                       \
 705                         next_char();                                   \
 706                         while(1) {                                     \
 707                                 switch(c) {
 708
 709 #define MAYBE(ch, set_type)                                \
 710                                 case ch:                                   \
 711                                         next_char();                           \
 712                                         lexer_token.type = set_type;           \
 713                                         return;
 714
 715 #define ELSE_CODE(code)                                    \
 716                                 default:                                   \
 717                                         code;                                  \
 718                                 }                                          \
 719                         } /* end of while(1) */                        \
 720                         break;
 721
 722 #define ELSE(set_type)                                     \
 723                 ELSE_CODE(                                         \
 724                         lexer_token.type = set_type;                   \
 725                         return;                                        \
 726                 )
 727
 728 void lexer_next_preprocessing_token(void)
 729 {
 730         while(1) {
 731                 switch(c) {
 732                 case ' ':
 733                 case '\t':
 734                         next_char();
 735                         break;
 736
 737                 MATCH_NEWLINE(
 738                         lexer_token.type = '\n';
 739                         return;
 740                 )
 741
 742                 SYMBOL_CHARS
 743                         parse_symbol();
 744                         return;
 745
 746                 DIGITS
 747                         parse_number();
 748                         return;
 749
 750                 case '"':
 751                         parse_string_literal();
 752                         return;
 753
 754                 case '\'':
 755                         parse_character_constant();
 756                         return;
 757
 758                 case '.':
 759                         MAYBE_PROLOG
 760                                 case '.':
 761                                         MAYBE_PROLOG
 762                                         MAYBE('.', T_DOTDOTDOT)
 763                                         ELSE_CODE(
 764                                                 put_back(c);
 765                                                 c = '.';
 766                                                 lexer_token.type = '.';
 767                                                 return;
 768                                         )
 769                         ELSE('.')
 770                 case '&':
 771                         MAYBE_PROLOG
 772                         MAYBE('&', T_ANDAND)
 773                         MAYBE('=', T_ANDEQUAL)
 774                         ELSE('&')
 775                 case '*':
 776                         MAYBE_PROLOG
 777                         MAYBE('=', T_ASTERISKEQUAL)
 778                         ELSE('*')
 779                 case '+':
 780                         MAYBE_PROLOG
 781                         MAYBE('+', T_PLUSPLUS)
 782                         MAYBE('=', T_PLUSEQUAL)
 783                         ELSE('+')
 784                 case '-':
 785                         MAYBE_PROLOG
 786                         MAYBE('>', T_MINUSGREATER)
 787                         MAYBE('-', T_MINUSMINUS)
 788                         MAYBE('=', T_MINUSEQUAL)
 789                         ELSE('-')
 790                 case '!':
 791                         MAYBE_PROLOG
 792                         MAYBE('=', T_EXCLAMATIONMARKEQUAL)
 793                         ELSE('!')
 794                 case '/':
 795                         MAYBE_PROLOG
 796                         MAYBE('=', T_SLASHEQUAL)
 797                                 case '*':
 798                                         next_char();
 799                                         skip_multiline_comment();
 800                                         lexer_next_preprocessing_token();
 801                                         return;
 802                                 case '/':
 803                                         next_char();
 804                                         skip_line_comment();
 805                                         lexer_next_preprocessing_token();
 806                                         return;
 807                         ELSE('/')
 808                 case '%':
 809                         MAYBE_PROLOG
 810                         MAYBE('>', T_PERCENTGREATER)
 811                         MAYBE('=', T_PERCENTEQUAL)
 812                                 case ':':
 813                                         MAYBE_PROLOG
 814                                                 case '%':
 815                                                         MAYBE_PROLOG
 816                                                         MAYBE(':', T_PERCENTCOLONPERCENTCOLON)
 817                                                         ELSE_CODE(
 818                                                                 put_back(c);
 819                                                                 c = '%';
 820                                                                 lexer_token.type = T_PERCENTCOLON;
 821                                                                 return;
 822                                                         )
 823                                         ELSE(T_PERCENTCOLON)
 824                         ELSE('%')
 825                 case '<':
 826                         MAYBE_PROLOG
 827                         MAYBE(':', T_LESSCOLON)
 828                         MAYBE('%', T_LESSPERCENT)
 829                         MAYBE('=', T_LESSEQUAL)
 830                                 case '<':
 831                                         MAYBE_PROLOG
 832                                         MAYBE('=', T_LESSLESSEQUAL)
 833                                         ELSE(T_LESSLESS)
 834                         ELSE('<')
 835                 case '>':
 836                         MAYBE_PROLOG
 837                         MAYBE('=', T_GREATEREQUAL)
 838                                 case '>':
 839                                         MAYBE_PROLOG
 840                                         MAYBE('=', T_GREATERGREATEREQUAL)
 841                                         ELSE(T_GREATERGREATER)
 842                         ELSE('>')
 843                 case '^':
 844                         MAYBE_PROLOG
 845                         MAYBE('=', T_CARETEQUAL)
 846                         ELSE('^')
 847                 case '|':
 848                         MAYBE_PROLOG
 849                         MAYBE('=', T_PIPEEQUAL)
 850                         MAYBE('|', T_PIPEPIPE)
 851                         ELSE('|')
 852                 case ':':
 853                         MAYBE_PROLOG
 854                         MAYBE('>', T_COLONGREATER)
 855                         ELSE(':')
 856                 case '=':
 857                         MAYBE_PROLOG
 858                         MAYBE('=', T_EQUALEQUAL)
 859                         ELSE('=')
 860                 case '#':
 861                         MAYBE_PROLOG
 862                         MAYBE('#', T_HASHHASH)
 863                         ELSE('#')
 864
 865                 case '?':
 866                 case '[':
 867                 case ']':
 868                 case '(':
 869                 case ')':
 870                 case '{':
 871                 case '}':
 872                 case '~':
 873                 case ';':
 874                 case ',':
 875                 case '\\':
 876                         lexer_token.type = c;
 877                         next_char();
 878                         return;
 879
 880                 case EOF:
 881                         lexer_token.type = T_EOF;
 882                         return;
 883
 884                 default:
 885                         next_char();
 886                         error_prefix();
 887                         fprintf(stderr, "unknown character '%c' found\n", c);
 888                         lexer_token.type = T_ERROR;
 889                         return;
 890                 }
 891         }
 892 }
 893
 894 void lexer_next_token(void)
 895 {
 896         lexer_next_preprocessing_token();
 897         if(lexer_token.type != '\n')
 898                 return;
 899
 900 newline_found:
 901         do {
 902                 lexer_next_preprocessing_token();
 903         } while(lexer_token.type == '\n');
 904
 905         if(lexer_token.type == '#') {
 906                 parse_preprocessor_directive();
 907                 goto newline_found;
 908         }
 909 }
 910
 911 void init_lexer(void)
 912 {
 913         strset_init(&stringset);
 914 }
 915
 916 void lexer_open_stream(FILE *stream, const char *input_name)
 917 {
 918         input                                  = stream;
 919         lexer_token.source_position.linenr     = 1;
 920         lexer_token.source_position.input_name = input_name;
 921
 922         next_char();
 923 }
 924
 925 void exit_lexer(void)
 926 {
 927         strset_destroy(&stringset);
 928 }
 929
 930 static __attribute__((unused))
 931 void dbg_pos(const source_position_t source_position)
 932 {
 933         fprintf(stdout, "%s:%d\n", source_position.input_name,
 934                 source_position.linenr);
 935         fflush(stdout);
 936 }