Remove unused macro.
[cparser] / lexer.c
1 /*
2  * This file is part of cparser.
3  * Copyright (C) 2007-2009 Matthias Braun <matze@braunis.de>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License
7  * as published by the Free Software Foundation; either version 2
8  * of the License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
18  * 02111-1307, USA.
19  */
20 #include <config.h>
21
22 #include "diagnostic.h"
23 #include "lexer.h"
24 #include "symbol_t.h"
25 #include "token_t.h"
26 #include "symbol_table_t.h"
27 #include "adt/error.h"
28 #include "adt/strset.h"
29 #include "adt/util.h"
30 #include "types.h"
31 #include "type_t.h"
32 #include "target_architecture.h"
33 #include "parser.h"
34 #include "warning.h"
35 #include "lang_features.h"
36
37 #include <assert.h>
38 #include <errno.h>
39 #include <string.h>
40 #include <stdbool.h>
41 #include <ctype.h>
42
43 #ifndef _WIN32
44 #include <strings.h>
45 #endif
46
47 //#define DEBUG_CHARS
48 #define MAX_PUTBACK 3
49 #define BUF_SIZE    1024
50
51 static utf32             c;
52 static source_position_t lexer_pos;
53 token_t                  lexer_token;
54 static symbol_t         *symbol_L;
55 static FILE             *input;
56 static utf32             buf[BUF_SIZE + MAX_PUTBACK];
57 static const utf32      *bufend;
58 static const utf32      *bufpos;
59 static strset_t          stringset;
60 bool                     allow_dollar_in_symbol = true;
61
62 /**
63  * Prints a parse error message at the current token.
64  *
65  * @param msg   the error message
66  */
67 static void parse_error(const char *msg)
68 {
69         errorf(&lexer_pos, "%s", msg);
70 }
71
72 /**
73  * Prints an internal error message at the current token.
74  *
75  * @param msg   the error message
76  */
77 static NORETURN internal_error(const char *msg)
78 {
79         internal_errorf(&lexer_pos, "%s", msg);
80 }
81
82 static size_t read_block(unsigned char *const read_buf, size_t const n)
83 {
84         size_t const s = fread(read_buf, 1, n, input);
85         if (s == 0) {
86                 /* on OS/X ferror appears to return true on eof as well when running
87                  * the application in gdb... */
88                 if (!feof(input) && ferror(input))
89                         parse_error("read from input failed");
90                 buf[MAX_PUTBACK] = EOF;
91                 bufpos           = buf + MAX_PUTBACK;
92                 bufend           = buf + MAX_PUTBACK + 1;
93         }
94         return s;
95 }
96
97 static void decode_iso_8859_1(void)
98 {
99         unsigned char read_buf[BUF_SIZE];
100         size_t const s = read_block(read_buf, sizeof(read_buf));
101         if (s == 0)
102                 return;
103
104         unsigned char const *src = read_buf;
105         unsigned char const *end = read_buf + s;
106         utf32               *dst = buf + MAX_PUTBACK;
107         while (src != end)
108                 *dst++ = *src++;
109
110         bufpos = buf + MAX_PUTBACK;
111         bufend = dst;
112 }
113
114 static void decode_iso_8859_15(void)
115 {
116         unsigned char read_buf[BUF_SIZE];
117         size_t const s = read_block(read_buf, sizeof(read_buf));
118         if (s == 0)
119                 return;
120
121         unsigned char const *src = read_buf;
122         unsigned char const *end = read_buf + s;
123         utf32               *dst = buf + MAX_PUTBACK;
124         while (src != end) {
125                 utf32 tc = *src++;
126                 switch (tc) {
127                         case 0xA4: tc = 0x20AC; break; // €
128                         case 0xA6: tc = 0x0160; break; // Š
129                         case 0xA8: tc = 0x0161; break; // š
130                         case 0xB4: tc = 0x017D; break; // Ž
131                         case 0xB8: tc = 0x017E; break; // ž
132                         case 0xBC: tc = 0x0152; break; // Œ
133                         case 0xBD: tc = 0x0153; break; // œ
134                         case 0xBE: tc = 0x0178; break; // Ÿ
135                 }
136                 *dst++ = tc;
137         }
138
139         bufpos = buf + MAX_PUTBACK;
140         bufend = dst;
141 }
142
143 static void decode_utf8(void)
144 {
145         static utf32  part_decoded_min_code;
146         static utf32  part_decoded_char;
147         static size_t part_decoded_rest_len;
148
149         do {
150                 unsigned char read_buf[BUF_SIZE];
151                 size_t const s = read_block(read_buf, sizeof(read_buf));
152                 if (s == 0) {
153                         if (part_decoded_rest_len > 0)
154                                 parse_error("incomplete input char at end of input");
155                         return;
156                 }
157
158                 unsigned char const *src = read_buf;
159                 unsigned char const *end = read_buf + s;
160                 utf32               *dst = buf + MAX_PUTBACK;
161                 utf32                decoded;
162                 utf32                min_code;
163
164                 if (part_decoded_rest_len != 0) {
165                         min_code              = part_decoded_min_code;
166                         decoded               = part_decoded_char;
167                         size_t const rest_len = part_decoded_rest_len;
168                         part_decoded_rest_len = 0;
169                         switch (rest_len) {
170                                 case 4:  goto realign;
171                                 case 3:  goto three_more;
172                                 case 2:  goto two_more;
173                                 default: goto one_more;
174                         }
175                 }
176
177                 while (src != end) {
178                         if ((*src & 0x80) == 0) {
179                                 decoded = *src++;
180                         } else if ((*src & 0xE0) == 0xC0) {
181                                 min_code = 0x80;
182                                 decoded  = *src++ & 0x1F;
183 one_more:
184                                 if (src == end) {
185                                         part_decoded_min_code = min_code;
186                                         part_decoded_char     = decoded;
187                                         part_decoded_rest_len = 1;
188                                         break;
189                                 }
190                                 if ((*src & 0xC0) == 0x80) {
191                                         decoded = (decoded << 6) | (*src++ & 0x3F);
192                                 } else {
193                                         goto invalid_char;
194                                 }
195                                 if (decoded < min_code                      ||
196                                                 decoded > 0x10FFFF                      ||
197                                                 (0xD800 <= decoded && decoded < 0xE000) || // high/low surrogates
198                                                 (0xFDD0 <= decoded && decoded < 0xFDF0) || // noncharacters
199                                                 (decoded & 0xFFFE) == 0xFFFE) {            // noncharacters
200                                         parse_error("invalid byte sequence in input");
201                                 }
202                         } else if ((*src & 0xF0) == 0xE0) {
203                                 min_code = 0x800;
204                                 decoded  = *src++ & 0x0F;
205 two_more:
206                                 if (src == end) {
207                                         part_decoded_min_code = min_code;
208                                         part_decoded_char     = decoded;
209                                         part_decoded_rest_len = 2;
210                                         break;
211                                 }
212                                 if ((*src & 0xC0) == 0x80) {
213                                         decoded = (decoded << 6) | (*src++ & 0x3F);
214                                 } else {
215                                         goto invalid_char;
216                                 }
217                                 goto one_more;
218                         } else if ((*src & 0xF8) == 0xF0) {
219                                 min_code = 0x10000;
220                                 decoded  = *src++ & 0x07;
221 three_more:
222                                 if (src == end) {
223                                         part_decoded_min_code = min_code;
224                                         part_decoded_char     = decoded;
225                                         part_decoded_rest_len = 3;
226                                         break;
227                                 }
228                                 if ((*src & 0xC0) == 0x80) {
229                                         decoded = (decoded << 6) | (*src++ & 0x3F);
230                                 } else {
231                                         goto invalid_char;
232                                 }
233                                 goto two_more;
234                         } else {
235 invalid_char:
236                                 parse_error("invalid byte sequence in input");
237 realign:
238                                 do {
239                                         ++src;
240                                         if (src == end) {
241                                                 part_decoded_rest_len = 4;
242                                                 break;
243                                         }
244                                 } while ((*src & 0xC0) == 0x80 || (*src & 0xF8) == 0xF8);
245                                 continue;
246                         }
247                         *dst++ = decoded;
248                 }
249
250                 bufpos = buf + MAX_PUTBACK;
251                 bufend = dst;
252         } while (bufpos == bufend);
253 }
254
255 static void decode_windows_1252(void)
256 {
257         unsigned char read_buf[BUF_SIZE];
258         size_t const s = read_block(read_buf, sizeof(read_buf));
259         if (s == 0)
260                 return;
261
262         unsigned char const *src = read_buf;
263         unsigned char const *end = read_buf + s;
264         utf32               *dst = buf + MAX_PUTBACK;
265         while (src != end) {
266                 utf32 tc = *src++;
267                 switch (tc) {
268                         case 0x80: tc = 0x20AC; break; // €
269                         case 0x82: tc = 0x201A; break; // ‚
270                         case 0x83: tc = 0x0192; break; // ƒ
271                         case 0x84: tc = 0x201E; break; // „
272                         case 0x85: tc = 0x2026; break; // …
273                         case 0x86: tc = 0x2020; break; // †
274                         case 0x87: tc = 0x2021; break; // ‡
275                         case 0x88: tc = 0x02C6; break; // ˆ
276                         case 0x89: tc = 0x2030; break; // ‰
277                         case 0x8A: tc = 0x0160; break; // Š
278                         case 0x8B: tc = 0x2039; break; // ‹
279                         case 0x8C: tc = 0x0152; break; // Œ
280                         case 0x8E: tc = 0x017D; break; // Ž
281                         case 0x91: tc = 0x2018; break; // ‘
282                         case 0x92: tc = 0x2019; break; // ’
283                         case 0x93: tc = 0x201C; break; // “
284                         case 0x94: tc = 0x201D; break; // ”
285                         case 0x95: tc = 0x2022; break; // •
286                         case 0x96: tc = 0x2013; break; // –
287                         case 0x97: tc = 0x2014; break; // —
288                         case 0x98: tc = 0x02DC; break; // ˜
289                         case 0x99: tc = 0x2122; break; // ™
290                         case 0x9A: tc = 0x0161; break; // š
291                         case 0x9B: tc = 0x203A; break; // ›
292                         case 0x9C: tc = 0x0153; break; // œ
293                         case 0x9E: tc = 0x017E; break; // ž
294                         case 0x9F: tc = 0x0178; break; // Ÿ
295                 }
296                 *dst++ = tc;
297         }
298
299         bufpos = buf + MAX_PUTBACK;
300         bufend = dst;
301 }
302
303 typedef void (*decoder_t)(void);
304
305 static decoder_t decoder = decode_utf8;
306
307 typedef struct named_decoder_t {
308         char const *name;
309         decoder_t   decoder;
310 } named_decoder_t;
311
312 static named_decoder_t const decoders[] = {
313         { "CP819",           decode_iso_8859_1   }, // offical alias
314         { "IBM819",          decode_iso_8859_1   }, // offical alias
315         { "ISO-8859-1",      decode_iso_8859_1   }, // offical alias
316         { "ISO-8859-15",     decode_iso_8859_15  }, // offical name
317         { "ISO8859-1",       decode_iso_8859_1   },
318         { "ISO8859-15",      decode_iso_8859_15  },
319         { "ISO_8859-1",      decode_iso_8859_1   }, // offical alias
320         { "ISO_8859-15",     decode_iso_8859_15  }, // offical alias
321         { "ISO_8859-1:1987", decode_iso_8859_1   }, // offical name
322         { "Latin-9",         decode_iso_8859_15  }, // offical alias
323         { "UTF-8",           decode_utf8         }, // offical name
324         { "csISOLatin1",     decode_iso_8859_1   }, // offical alias
325         { "cp1252",          decode_windows_1252 },
326         { "iso-ir-100",      decode_iso_8859_1   }, // offical alias
327         { "l1",              decode_iso_8859_1   }, // offical alias
328         { "latin1",          decode_iso_8859_1   }, // offical alias
329         { "windows-1252",    decode_windows_1252 }, // official name
330
331         { NULL,              NULL                }
332 };
333
334 /** strcasecmp is not part of C99 so we need our own implementation here */
335 static int my_strcasecmp(const char *s1, const char *s2)
336 {
337         for ( ; *s1 != 0; ++s1, ++s2) {
338                 if (tolower(*s1) != tolower(*s2))
339                         break;
340         }
341         return (unsigned char)*s1 - (unsigned char)*s2;
342 }
343
344 void select_input_encoding(char const* const encoding)
345 {
346         for (named_decoder_t const *i = decoders; i->name != NULL; ++i) {
347                 if (my_strcasecmp(encoding, i->name) != 0)
348                         continue;
349                 decoder = i->decoder;
350                 return;
351         }
352         fprintf(stderr, "error: input encoding \"%s\" not supported\n", encoding);
353 }
354
355 static inline void next_real_char(void)
356 {
357         assert(bufpos <= bufend);
358         if (bufpos >= bufend) {
359                 if (input == NULL) {
360                         c = EOF;
361                         return;
362                 }
363                 decoder();
364         }
365         c = *bufpos++;
366         ++lexer_pos.colno;
367 }
368
369 /**
370  * Put a character back into the buffer.
371  *
372  * @param pc  the character to put back
373  */
374 static inline void put_back(utf32 const pc)
375 {
376         assert(bufpos > buf);
377         *(--bufpos - buf + buf) = pc;
378         --lexer_pos.colno;
379
380 #ifdef DEBUG_CHARS
381         printf("putback '%lc'\n", pc);
382 #endif
383 }
384
385 static inline void next_char(void);
386
387 #define MATCH_NEWLINE(code)  \
388         case '\r':               \
389                 next_char();         \
390                 if (c == '\n') {     \
391         case '\n':               \
392                         next_char();     \
393                 }                    \
394                 lexer_pos.lineno++;  \
395                 lexer_pos.colno = 1; \
396                 code
397
398 #define eat(c_type) (assert(c == c_type), next_char())
399
400 static void maybe_concat_lines(void)
401 {
402         eat('\\');
403
404         switch (c) {
405         MATCH_NEWLINE(return;)
406
407         default:
408                 break;
409         }
410
411         put_back(c);
412         c = '\\';
413 }
414
415 /**
416  * Set c to the next input character, ie.
417  * after expanding trigraphs.
418  */
419 static inline void next_char(void)
420 {
421         next_real_char();
422
423         /* filter trigraphs */
424         if (UNLIKELY(c == '\\')) {
425                 maybe_concat_lines();
426                 goto end_of_next_char;
427         }
428
429         if (LIKELY(c != '?'))
430                 goto end_of_next_char;
431
432         next_real_char();
433         if (LIKELY(c != '?')) {
434                 put_back(c);
435                 c = '?';
436                 goto end_of_next_char;
437         }
438
439         next_real_char();
440         switch (c) {
441         case '=': c = '#'; break;
442         case '(': c = '['; break;
443         case '/': c = '\\'; maybe_concat_lines(); break;
444         case ')': c = ']'; break;
445         case '\'': c = '^'; break;
446         case '<': c = '{'; break;
447         case '!': c = '|'; break;
448         case '>': c = '}'; break;
449         case '-': c = '~'; break;
450         default:
451                 put_back(c);
452                 put_back('?');
453                 c = '?';
454                 break;
455         }
456
457 end_of_next_char:;
458 #ifdef DEBUG_CHARS
459         printf("nchar '%c'\n", c);
460 #endif
461 }
462
463 #define SYMBOL_CHARS  \
464         case '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
465         case 'a':         \
466         case 'b':         \
467         case 'c':         \
468         case 'd':         \
469         case 'e':         \
470         case 'f':         \
471         case 'g':         \
472         case 'h':         \
473         case 'i':         \
474         case 'j':         \
475         case 'k':         \
476         case 'l':         \
477         case 'm':         \
478         case 'n':         \
479         case 'o':         \
480         case 'p':         \
481         case 'q':         \
482         case 'r':         \
483         case 's':         \
484         case 't':         \
485         case 'u':         \
486         case 'v':         \
487         case 'w':         \
488         case 'x':         \
489         case 'y':         \
490         case 'z':         \
491         case 'A':         \
492         case 'B':         \
493         case 'C':         \
494         case 'D':         \
495         case 'E':         \
496         case 'F':         \
497         case 'G':         \
498         case 'H':         \
499         case 'I':         \
500         case 'J':         \
501         case 'K':         \
502         case 'L':         \
503         case 'M':         \
504         case 'N':         \
505         case 'O':         \
506         case 'P':         \
507         case 'Q':         \
508         case 'R':         \
509         case 'S':         \
510         case 'T':         \
511         case 'U':         \
512         case 'V':         \
513         case 'W':         \
514         case 'X':         \
515         case 'Y':         \
516         case 'Z':         \
517         case '_':
518
519 #define DIGITS        \
520         case '0':         \
521         case '1':         \
522         case '2':         \
523         case '3':         \
524         case '4':         \
525         case '5':         \
526         case '6':         \
527         case '7':         \
528         case '8':         \
529         case '9':
530
531 /**
532  * Read a symbol from the input and build
533  * the lexer_token.
534  */
535 static void parse_symbol(void)
536 {
537         obstack_1grow(&symbol_obstack, (char) c);
538         next_char();
539
540         while (true) {
541                 switch (c) {
542                 DIGITS
543                 SYMBOL_CHARS
544                         obstack_1grow(&symbol_obstack, (char) c);
545                         next_char();
546                         break;
547
548                 default:
549 dollar_sign:
550                         goto end_symbol;
551                 }
552         }
553
554 end_symbol:
555         obstack_1grow(&symbol_obstack, '\0');
556
557         char     *string = obstack_finish(&symbol_obstack);
558         symbol_t *symbol = symbol_table_insert(string);
559
560         lexer_token.type   = symbol->ID;
561         lexer_token.symbol = symbol;
562
563         if (symbol->string != string) {
564                 obstack_free(&symbol_obstack, string);
565         }
566 }
567
568 /**
569  * parse suffixes like 'LU' or 'f' after numbers
570  */
571 static void parse_number_suffix(void)
572 {
573         assert(obstack_object_size(&symbol_obstack) == 0);
574         while (true) {
575                 switch (c) {
576                 SYMBOL_CHARS
577                         obstack_1grow(&symbol_obstack, (char) c);
578                         next_char();
579                         break;
580                 default:
581                 dollar_sign:
582                         goto finish_suffix;
583                 }
584         }
585 finish_suffix:
586         if (obstack_object_size(&symbol_obstack) == 0) {
587                 lexer_token.symbol = NULL;
588                 return;
589         }
590
591         obstack_1grow(&symbol_obstack, '\0');
592         char     *string = obstack_finish(&symbol_obstack);
593         symbol_t *symbol = symbol_table_insert(string);
594
595         if (symbol->string != string) {
596                 obstack_free(&symbol_obstack, string);
597         }
598         lexer_token.symbol = symbol;
599 }
600
601 static string_t identify_string(char *string, size_t len)
602 {
603         /* TODO hash */
604 #if 0
605         const char *result = strset_insert(&stringset, concat);
606         if (result != concat) {
607                 obstack_free(&symbol_obstack, concat);
608         }
609 #else
610         const char *result = string;
611 #endif
612         return (string_t) {result, len};
613 }
614
615 /**
616  * Parses a hex number including hex floats and set the
617  * lexer_token.
618  */
619 static void parse_number_hex(void)
620 {
621         bool is_float   = false;
622         bool has_digits = false;
623
624         assert(obstack_object_size(&symbol_obstack) == 0);
625         while (isxdigit(c)) {
626                 has_digits = true;
627                 obstack_1grow(&symbol_obstack, (char) c);
628                 next_char();
629         }
630
631         if (c == '.') {
632                 is_float = true;
633                 obstack_1grow(&symbol_obstack, (char) c);
634                 next_char();
635
636                 while (isxdigit(c)) {
637                         has_digits = true;
638                         obstack_1grow(&symbol_obstack, (char) c);
639                         next_char();
640                 }
641         }
642         if (c == 'p' || c == 'P') {
643                 is_float = true;
644                 obstack_1grow(&symbol_obstack, (char) c);
645                 next_char();
646
647                 if (c == '-' || c == '+') {
648                         obstack_1grow(&symbol_obstack, (char) c);
649                         next_char();
650                 }
651
652                 while (isxdigit(c)) {
653                         obstack_1grow(&symbol_obstack, (char) c);
654                         next_char();
655                 }
656         } else if (is_float) {
657                 errorf(&lexer_token.source_position,
658                        "hexadecimal floatingpoint constant requires an exponent");
659         }
660         obstack_1grow(&symbol_obstack, '\0');
661
662         size_t  size   = obstack_object_size(&symbol_obstack) - 1;
663         char   *string = obstack_finish(&symbol_obstack);
664         lexer_token.literal = identify_string(string, size);
665
666         lexer_token.type    =
667                 is_float ? T_FLOATINGPOINT_HEXADECIMAL : T_INTEGER_HEXADECIMAL;
668
669         if (!has_digits) {
670                 errorf(&lexer_token.source_position, "invalid number literal '0x%S'",
671                        &lexer_token.literal);
672                 lexer_token.literal.begin = "0";
673                 lexer_token.literal.size  = 1;
674         }
675
676         parse_number_suffix();
677 }
678
679 /**
680  * Returns true if the given char is a octal digit.
681  *
682  * @param char  the character to check
683  */
684 static bool is_octal_digit(utf32 chr)
685 {
686         return '0' <= chr && chr <= '7';
687 }
688
689 /**
690  * Parses a number and sets the lexer_token.
691  */
692 static void parse_number(void)
693 {
694         bool is_float   = false;
695         bool has_digits = false;
696
697         assert(obstack_object_size(&symbol_obstack) == 0);
698         if (c == '0') {
699                 next_char();
700                 if (c == 'x' || c == 'X') {
701                         next_char();
702                         parse_number_hex();
703                         return;
704                 } else {
705                         has_digits = true;
706                 }
707                 obstack_1grow(&symbol_obstack, '0');
708         }
709
710         while (isdigit(c)) {
711                 has_digits = true;
712                 obstack_1grow(&symbol_obstack, (char) c);
713                 next_char();
714         }
715
716         if (c == '.') {
717                 is_float = true;
718                 obstack_1grow(&symbol_obstack, '.');
719                 next_char();
720
721                 while (isdigit(c)) {
722                         has_digits = true;
723                         obstack_1grow(&symbol_obstack, (char) c);
724                         next_char();
725                 }
726         }
727         if (c == 'e' || c == 'E') {
728                 is_float = true;
729                 obstack_1grow(&symbol_obstack, 'e');
730                 next_char();
731
732                 if (c == '-' || c == '+') {
733                         obstack_1grow(&symbol_obstack, (char) c);
734                         next_char();
735                 }
736
737                 while (isdigit(c)) {
738                         obstack_1grow(&symbol_obstack, (char) c);
739                         next_char();
740                 }
741         }
742
743         obstack_1grow(&symbol_obstack, '\0');
744         size_t  size   = obstack_object_size(&symbol_obstack) - 1;
745         char   *string = obstack_finish(&symbol_obstack);
746         lexer_token.literal = identify_string(string, size);
747
748         /* is it an octal number? */
749         if (is_float) {
750                 lexer_token.type = T_FLOATINGPOINT;
751         } else if (string[0] == '0') {
752                 lexer_token.type = T_INTEGER_OCTAL;
753
754                 /* check for invalid octal digits */
755                 for (size_t i= 0; i < size; ++i) {
756                         char t = string[i];
757                         if (t >= '8')
758                                 errorf(&lexer_token.source_position,
759                                        "invalid digit '%c' in octal number", t);
760                 }
761         } else {
762                 lexer_token.type = T_INTEGER;
763         }
764
765         if (!has_digits) {
766                 errorf(&lexer_token.source_position, "invalid number literal '%S'",
767                        &lexer_token.literal);
768         }
769
770         parse_number_suffix();
771 }
772
773 /**
774  * Returns the value of a digit.
775  * The only portable way to do it ...
776  */
777 static int digit_value(utf32 const digit)
778 {
779         switch (digit) {
780         case '0': return 0;
781         case '1': return 1;
782         case '2': return 2;
783         case '3': return 3;
784         case '4': return 4;
785         case '5': return 5;
786         case '6': return 6;
787         case '7': return 7;
788         case '8': return 8;
789         case '9': return 9;
790         case 'a':
791         case 'A': return 10;
792         case 'b':
793         case 'B': return 11;
794         case 'c':
795         case 'C': return 12;
796         case 'd':
797         case 'D': return 13;
798         case 'e':
799         case 'E': return 14;
800         case 'f':
801         case 'F': return 15;
802         default:
803                 internal_error("wrong character given");
804         }
805 }
806
807 /**
808  * Parses an octal character sequence.
809  *
810  * @param first_digit  the already read first digit
811  */
812 static utf32 parse_octal_sequence(utf32 const first_digit)
813 {
814         assert(is_octal_digit(first_digit));
815         utf32 value = digit_value(first_digit);
816         if (!is_octal_digit(c)) return value;
817         value = 8 * value + digit_value(c);
818         next_char();
819         if (!is_octal_digit(c)) return value;
820         value = 8 * value + digit_value(c);
821         next_char();
822         return value;
823 }
824
825 /**
826  * Parses a hex character sequence.
827  */
828 static utf32 parse_hex_sequence(void)
829 {
830         utf32 value = 0;
831         while (isxdigit(c)) {
832                 value = 16 * value + digit_value(c);
833                 next_char();
834         }
835         return value;
836 }
837
838 /**
839  * Parse an escape sequence.
840  */
841 static utf32 parse_escape_sequence(void)
842 {
843         eat('\\');
844
845         utf32 const ec = c;
846         next_char();
847
848         switch (ec) {
849         case '"':  return '"';
850         case '\'': return '\'';
851         case '\\': return '\\';
852         case '?': return '\?';
853         case 'a': return '\a';
854         case 'b': return '\b';
855         case 'f': return '\f';
856         case 'n': return '\n';
857         case 'r': return '\r';
858         case 't': return '\t';
859         case 'v': return '\v';
860         case 'x':
861                 return parse_hex_sequence();
862         case '0':
863         case '1':
864         case '2':
865         case '3':
866         case '4':
867         case '5':
868         case '6':
869         case '7':
870                 return parse_octal_sequence(ec);
871         case EOF:
872                 parse_error("reached end of file while parsing escape sequence");
873                 return EOF;
874         /* \E is not documented, but handled, by GCC.  It is acceptable according
875          * to §6.11.4, whereas \e is not. */
876         case 'E':
877         case 'e':
878                 if (c_mode & _GNUC)
879                         return 27;   /* hopefully 27 is ALWAYS the code for ESCAPE */
880                 break;
881         case 'u':
882         case 'U':
883                 parse_error("universal character parsing not implemented yet");
884                 return EOF;
885         default:
886                 break;
887         }
888         /* §6.4.4.4:8 footnote 64 */
889         parse_error("unknown escape sequence");
890         return EOF;
891 }
892
893 /**
894  * Concatenate two strings.
895  */
896 string_t concat_strings(const string_t *const s1, const string_t *const s2)
897 {
898         const size_t len1 = s1->size - 1;
899         const size_t len2 = s2->size - 1;
900
901         char *const concat = obstack_alloc(&symbol_obstack, len1 + len2 + 1);
902         memcpy(concat, s1->begin, len1);
903         memcpy(concat + len1, s2->begin, len2 + 1);
904
905         return identify_string(concat, len1 + len2 + 1);
906 }
907
908 string_t make_string(const char *string)
909 {
910         size_t      len   = strlen(string) + 1;
911         char *const space = obstack_alloc(&symbol_obstack, len);
912         memcpy(space, string, len);
913
914         return identify_string(space, len);
915 }
916
917 static void grow_symbol(utf32 const tc)
918 {
919         struct obstack *const o  = &symbol_obstack;
920         if (tc < 0x80U) {
921                 obstack_1grow(o, tc);
922         } else if (tc < 0x800) {
923                 obstack_1grow(o, 0xC0 | (tc >> 6));
924                 obstack_1grow(o, 0x80 | (tc & 0x3F));
925         } else if (tc < 0x10000) {
926                 obstack_1grow(o, 0xE0 | ( tc >> 12));
927                 obstack_1grow(o, 0x80 | ((tc >>  6) & 0x3F));
928                 obstack_1grow(o, 0x80 | ( tc        & 0x3F));
929         } else {
930                 obstack_1grow(o, 0xF0 | ( tc >> 18));
931                 obstack_1grow(o, 0x80 | ((tc >> 12) & 0x3F));
932                 obstack_1grow(o, 0x80 | ((tc >>  6) & 0x3F));
933                 obstack_1grow(o, 0x80 | ( tc        & 0x3F));
934         }
935 }
936
937 /**
938  * Parse a string literal and set lexer_token.
939  */
940 static void parse_string_literal(void)
941 {
942         eat('"');
943
944         while (true) {
945                 switch (c) {
946                 case '\\': {
947                         utf32 const tc = parse_escape_sequence();
948                         if (tc >= 0x100) {
949                                 warningf(WARN_OTHER, &lexer_pos, "escape sequence out of range");
950                         }
951                         obstack_1grow(&symbol_obstack, tc);
952                         break;
953                 }
954
955                 case EOF: {
956                         errorf(&lexer_token.source_position, "string has no end");
957                         lexer_token.type = T_ERROR;
958                         return;
959                 }
960
961                 case '"':
962                         next_char();
963                         goto end_of_string;
964
965                 default:
966                         grow_symbol(c);
967                         next_char();
968                         break;
969                 }
970         }
971
972 end_of_string:
973
974         /* TODO: concatenate multiple strings separated by whitespace... */
975
976         /* add finishing 0 to the string */
977         obstack_1grow(&symbol_obstack, '\0');
978         const size_t  size   = (size_t)obstack_object_size(&symbol_obstack);
979         char         *string = obstack_finish(&symbol_obstack);
980
981         lexer_token.type    = T_STRING_LITERAL;
982         lexer_token.literal = identify_string(string, size);
983 }
984
985 /**
986  * Parse a wide character constant and set lexer_token.
987  */
988 static void parse_wide_character_constant(void)
989 {
990         eat('\'');
991
992         while (true) {
993                 switch (c) {
994                 case '\\': {
995                         const utf32 tc = parse_escape_sequence();
996                         grow_symbol(tc);
997                         break;
998                 }
999
1000                 MATCH_NEWLINE(
1001                         parse_error("newline while parsing character constant");
1002                         break;
1003                 )
1004
1005                 case '\'':
1006                         next_char();
1007                         goto end_of_wide_char_constant;
1008
1009                 case EOF: {
1010                         errorf(&lexer_token.source_position, "EOF while parsing character constant");
1011                         lexer_token.type = T_ERROR;
1012                         return;
1013                 }
1014
1015                 default:
1016                         grow_symbol(c);
1017                         next_char();
1018                         break;
1019                 }
1020         }
1021
1022 end_of_wide_char_constant:;
1023         obstack_1grow(&symbol_obstack, '\0');
1024         size_t  size   = (size_t) obstack_object_size(&symbol_obstack) - 1;
1025         char   *string = obstack_finish(&symbol_obstack);
1026
1027         lexer_token.type     = T_WIDE_CHARACTER_CONSTANT;
1028         lexer_token.literal  = identify_string(string, size);
1029
1030         if (size == 0) {
1031                 errorf(&lexer_token.source_position, "empty character constant");
1032         }
1033 }
1034
1035 /**
1036  * Parse a wide string literal and set lexer_token.
1037  */
1038 static void parse_wide_string_literal(void)
1039 {
1040         parse_string_literal();
1041         if (lexer_token.type == T_STRING_LITERAL)
1042                 lexer_token.type = T_WIDE_STRING_LITERAL;
1043 }
1044
1045 /**
1046  * Parse a character constant and set lexer_token.
1047  */
1048 static void parse_character_constant(void)
1049 {
1050         eat('\'');
1051
1052         while (true) {
1053                 switch (c) {
1054                 case '\\': {
1055                         utf32 const tc = parse_escape_sequence();
1056                         if (tc >= 0x100) {
1057                                 warningf(WARN_OTHER, &lexer_pos, "escape sequence out of range");
1058                         }
1059                         obstack_1grow(&symbol_obstack, tc);
1060                         break;
1061                 }
1062
1063                 MATCH_NEWLINE(
1064                         parse_error("newline while parsing character constant");
1065                         break;
1066                 )
1067
1068                 case '\'':
1069                         next_char();
1070                         goto end_of_char_constant;
1071
1072                 case EOF: {
1073                         errorf(&lexer_token.source_position, "EOF while parsing character constant");
1074                         lexer_token.type = T_ERROR;
1075                         return;
1076                 }
1077
1078                 default:
1079                         grow_symbol(c);
1080                         next_char();
1081                         break;
1082
1083                 }
1084         }
1085
1086 end_of_char_constant:;
1087         obstack_1grow(&symbol_obstack, '\0');
1088         const size_t        size   = (size_t)obstack_object_size(&symbol_obstack)-1;
1089         char         *const string = obstack_finish(&symbol_obstack);
1090
1091         lexer_token.type    = T_CHARACTER_CONSTANT;
1092         lexer_token.literal = identify_string(string, size);
1093
1094         if (size == 0) {
1095                 errorf(&lexer_token.source_position, "empty character constant");
1096         }
1097 }
1098
1099 /**
1100  * Skip a multiline comment.
1101  */
1102 static void skip_multiline_comment(void)
1103 {
1104         while (true) {
1105                 switch (c) {
1106                 case '/':
1107                         next_char();
1108                         if (c == '*') {
1109                                 /* nested comment, warn here */
1110                                 warningf(WARN_COMMENT, &lexer_pos, "'/*' within comment");
1111                         }
1112                         break;
1113                 case '*':
1114                         next_char();
1115                         if (c == '/') {
1116                                 next_char();
1117                                 return;
1118                         }
1119                         break;
1120
1121                 MATCH_NEWLINE(break;)
1122
1123                 case EOF: {
1124                         errorf(&lexer_token.source_position, "at end of file while looking for comment end");
1125                         return;
1126                 }
1127
1128                 default:
1129                         next_char();
1130                         break;
1131                 }
1132         }
1133 }
1134
1135 /**
1136  * Skip a single line comment.
1137  */
1138 static void skip_line_comment(void)
1139 {
1140         while (true) {
1141                 switch (c) {
1142                 case EOF:
1143                         return;
1144
1145                 case '\n':
1146                 case '\r':
1147                         return;
1148
1149                 case '\\':
1150                         next_char();
1151                         if (c == '\n' || c == '\r') {
1152                                 warningf(WARN_COMMENT, &lexer_pos, "multi-line comment");
1153                                 return;
1154                         }
1155                         break;
1156
1157                 default:
1158                         next_char();
1159                         break;
1160                 }
1161         }
1162 }
1163
1164 /** The current preprocessor token. */
1165 static token_t pp_token;
1166
1167 /**
1168  * Read the next preprocessor token.
1169  */
1170 static inline void next_pp_token(void)
1171 {
1172         lexer_next_preprocessing_token();
1173         pp_token = lexer_token;
1174 }
1175
1176 /**
1177  * Eat all preprocessor tokens until newline.
1178  */
1179 static void eat_until_newline(void)
1180 {
1181         while (pp_token.type != '\n' && pp_token.type != T_EOF) {
1182                 next_pp_token();
1183         }
1184 }
1185
1186 /**
1187  * Handle the define directive.
1188  */
1189 static void define_directive(void)
1190 {
1191         lexer_next_preprocessing_token();
1192         if (lexer_token.type != T_IDENTIFIER) {
1193                 parse_error("expected identifier after #define\n");
1194                 eat_until_newline();
1195         }
1196 }
1197
1198 /**
1199  * Handle the ifdef directive.
1200  */
1201 static void ifdef_directive(int is_ifndef)
1202 {
1203         (void) is_ifndef;
1204         lexer_next_preprocessing_token();
1205         //expect_identifier();
1206         //extect_newline();
1207 }
1208
1209 /**
1210  * Handle the endif directive.
1211  */
1212 static void endif_directive(void)
1213 {
1214         //expect_newline();
1215 }
1216
1217 /**
1218  * Parse the line directive.
1219  */
1220 static void parse_line_directive(void)
1221 {
1222         if (pp_token.type != T_INTEGER) {
1223                 parse_error("expected integer");
1224         } else {
1225                 /* use offset -1 as this is about the next line */
1226                 lexer_pos.lineno = atoi(pp_token.literal.begin) - 1;
1227                 next_pp_token();
1228         }
1229         if (pp_token.type == T_STRING_LITERAL) {
1230                 lexer_pos.input_name = pp_token.literal.begin;
1231                 next_pp_token();
1232         }
1233
1234         eat_until_newline();
1235 }
1236
1237 /**
1238  * STDC pragmas.
1239  */
1240 typedef enum stdc_pragma_kind_t {
1241         STDC_UNKNOWN,
1242         STDC_FP_CONTRACT,
1243         STDC_FENV_ACCESS,
1244         STDC_CX_LIMITED_RANGE
1245 } stdc_pragma_kind_t;
1246
1247 /**
1248  * STDC pragma values.
1249  */
1250 typedef enum stdc_pragma_value_kind_t {
1251         STDC_VALUE_UNKNOWN,
1252         STDC_VALUE_ON,
1253         STDC_VALUE_OFF,
1254         STDC_VALUE_DEFAULT
1255 } stdc_pragma_value_kind_t;
1256
1257 /**
1258  * Parse a pragma directive.
1259  */
1260 static void parse_pragma(void)
1261 {
1262         bool unknown_pragma = true;
1263
1264         next_pp_token();
1265         if (pp_token.symbol->pp_ID == TP_STDC) {
1266                 stdc_pragma_kind_t kind = STDC_UNKNOWN;
1267                 /* a STDC pragma */
1268                 if (c_mode & _C99) {
1269                         next_pp_token();
1270
1271                         switch (pp_token.symbol->pp_ID) {
1272                         case TP_FP_CONTRACT:
1273                                 kind = STDC_FP_CONTRACT;
1274                                 break;
1275                         case TP_FENV_ACCESS:
1276                                 kind = STDC_FENV_ACCESS;
1277                                 break;
1278                         case TP_CX_LIMITED_RANGE:
1279                                 kind = STDC_CX_LIMITED_RANGE;
1280                                 break;
1281                         default:
1282                                 break;
1283                         }
1284                         if (kind != STDC_UNKNOWN) {
1285                                 stdc_pragma_value_kind_t value = STDC_VALUE_UNKNOWN;
1286                                 next_pp_token();
1287                                 switch (pp_token.symbol->pp_ID) {
1288                                 case TP_ON:
1289                                         value = STDC_VALUE_ON;
1290                                         break;
1291                                 case TP_OFF:
1292                                         value = STDC_VALUE_OFF;
1293                                         break;
1294                                 case TP_DEFAULT:
1295                                         value = STDC_VALUE_DEFAULT;
1296                                         break;
1297                                 default:
1298                                         break;
1299                                 }
1300                                 if (value != STDC_VALUE_UNKNOWN) {
1301                                         unknown_pragma = false;
1302                                 } else {
1303                                         errorf(&pp_token.source_position, "bad STDC pragma argument");
1304                                 }
1305                         }
1306                 }
1307         } else {
1308                 unknown_pragma = true;
1309         }
1310         eat_until_newline();
1311         if (unknown_pragma) {
1312                 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.source_position, "encountered unknown #pragma");
1313         }
1314 }
1315
1316 /**
1317  * Parse a preprocessor non-null directive.
1318  */
1319 static void parse_preprocessor_identifier(void)
1320 {
1321         assert(pp_token.type == T_IDENTIFIER);
1322         symbol_t *symbol = pp_token.symbol;
1323
1324         switch (symbol->pp_ID) {
1325         case TP_include:
1326                 printf("include - enable header name parsing!\n");
1327                 break;
1328         case TP_define:
1329                 define_directive();
1330                 break;
1331         case TP_ifdef:
1332                 ifdef_directive(0);
1333                 break;
1334         case TP_ifndef:
1335                 ifdef_directive(1);
1336                 break;
1337         case TP_endif:
1338                 endif_directive();
1339                 break;
1340         case TP_line:
1341                 next_pp_token();
1342                 parse_line_directive();
1343                 break;
1344         case TP_if:
1345         case TP_else:
1346         case TP_elif:
1347         case TP_undef:
1348         case TP_error:
1349                 /* TODO; output the rest of the line */
1350                 parse_error("#error directive: ");
1351                 break;
1352         case TP_pragma:
1353                 parse_pragma();
1354                 break;
1355         }
1356 }
1357
1358 /**
1359  * Parse a preprocessor directive.
1360  */
1361 static void parse_preprocessor_directive(void)
1362 {
1363         next_pp_token();
1364
1365         switch (pp_token.type) {
1366         case T_IDENTIFIER:
1367                 parse_preprocessor_identifier();
1368                 break;
1369         case T_INTEGER:
1370                 parse_line_directive();
1371                 break;
1372         case '\n':
1373                 /* NULL directive, see §6.10.7 */
1374                 break;
1375         default:
1376                 parse_error("invalid preprocessor directive");
1377                 eat_until_newline();
1378                 break;
1379         }
1380 }
1381
1382 #define MAYBE_PROLOG                                       \
1383                         next_char();                                   \
1384                         while (true) {                                 \
1385                                 switch (c) {
1386
1387 #define MAYBE(ch, set_type)                                \
1388                                 case ch:                                   \
1389                                         next_char();                           \
1390                                         lexer_token.type = set_type;           \
1391                                         return;
1392
1393 /* must use this as last thing */
1394 #define MAYBE_MODE(ch, set_type, mode)                     \
1395                                 case ch:                                   \
1396                                         if (c_mode & mode) {                   \
1397                                                 next_char();                       \
1398                                                 lexer_token.type = set_type;       \
1399                                                 return;                            \
1400                                         }                                      \
1401                                         /* fallthrough */
1402
1403 #define ELSE_CODE(code)                                    \
1404                                 default:                                   \
1405                                         code                                   \
1406                                         return;                                \
1407                                 }                                          \
1408                         } /* end of while (true) */                    \
1409
1410 #define ELSE(set_type)                                     \
1411                 ELSE_CODE(                                         \
1412                         lexer_token.type = set_type;                   \
1413                 )
1414
1415 void lexer_next_preprocessing_token(void)
1416 {
1417         while (true) {
1418                 lexer_token.source_position = lexer_pos;
1419
1420                 switch (c) {
1421                 case ' ':
1422                 case '\t':
1423                         next_char();
1424                         break;
1425
1426                 MATCH_NEWLINE(
1427                         lexer_token.type = '\n';
1428                         return;
1429                 )
1430
1431                 SYMBOL_CHARS
1432                         parse_symbol();
1433                         /* might be a wide string ( L"string" ) */
1434                         if (lexer_token.symbol == symbol_L) {
1435                                 switch (c) {
1436                                         case '"':  parse_wide_string_literal();     break;
1437                                         case '\'': parse_wide_character_constant(); break;
1438                                 }
1439                         }
1440                         return;
1441
1442                 DIGITS
1443                         parse_number();
1444                         return;
1445
1446                 case '"':
1447                         parse_string_literal();
1448                         return;
1449
1450                 case '\'':
1451                         parse_character_constant();
1452                         return;
1453
1454                 case '.':
1455                         MAYBE_PROLOG
1456                                 DIGITS
1457                                         put_back(c);
1458                                         c = '.';
1459                                         parse_number();
1460                                         return;
1461
1462                                 case '.':
1463                                         MAYBE_PROLOG
1464                                         MAYBE('.', T_DOTDOTDOT)
1465                                         ELSE_CODE(
1466                                                 put_back(c);
1467                                                 c = '.';
1468                                                 lexer_token.type = '.';
1469                                         )
1470                         ELSE('.')
1471                 case '&':
1472                         MAYBE_PROLOG
1473                         MAYBE('&', T_ANDAND)
1474                         MAYBE('=', T_ANDEQUAL)
1475                         ELSE('&')
1476                 case '*':
1477                         MAYBE_PROLOG
1478                         MAYBE('=', T_ASTERISKEQUAL)
1479                         ELSE('*')
1480                 case '+':
1481                         MAYBE_PROLOG
1482                         MAYBE('+', T_PLUSPLUS)
1483                         MAYBE('=', T_PLUSEQUAL)
1484                         ELSE('+')
1485                 case '-':
1486                         MAYBE_PROLOG
1487                         MAYBE('>', T_MINUSGREATER)
1488                         MAYBE('-', T_MINUSMINUS)
1489                         MAYBE('=', T_MINUSEQUAL)
1490                         ELSE('-')
1491                 case '!':
1492                         MAYBE_PROLOG
1493                         MAYBE('=', T_EXCLAMATIONMARKEQUAL)
1494                         ELSE('!')
1495                 case '/':
1496                         MAYBE_PROLOG
1497                         MAYBE('=', T_SLASHEQUAL)
1498                                 case '*':
1499                                         next_char();
1500                                         skip_multiline_comment();
1501                                         lexer_next_preprocessing_token();
1502                                         return;
1503                                 case '/':
1504                                         next_char();
1505                                         skip_line_comment();
1506                                         lexer_next_preprocessing_token();
1507                                         return;
1508                         ELSE('/')
1509                 case '%':
1510                         MAYBE_PROLOG
1511                         MAYBE('>', '}')
1512                         MAYBE('=', T_PERCENTEQUAL)
1513                                 case ':':
1514                                         MAYBE_PROLOG
1515                                                 case '%':
1516                                                         MAYBE_PROLOG
1517                                                         MAYBE(':', T_HASHHASH)
1518                                                         ELSE_CODE(
1519                                                                 put_back(c);
1520                                                                 c = '%';
1521                                                                 lexer_token.type = '#';
1522                                                         )
1523                                         ELSE('#')
1524                         ELSE('%')
1525                 case '<':
1526                         MAYBE_PROLOG
1527                         MAYBE(':', '[')
1528                         MAYBE('%', '{')
1529                         MAYBE('=', T_LESSEQUAL)
1530                                 case '<':
1531                                         MAYBE_PROLOG
1532                                         MAYBE('=', T_LESSLESSEQUAL)
1533                                         ELSE(T_LESSLESS)
1534                         ELSE('<')
1535                 case '>':
1536                         MAYBE_PROLOG
1537                         MAYBE('=', T_GREATEREQUAL)
1538                                 case '>':
1539                                         MAYBE_PROLOG
1540                                         MAYBE('=', T_GREATERGREATEREQUAL)
1541                                         ELSE(T_GREATERGREATER)
1542                         ELSE('>')
1543                 case '^':
1544                         MAYBE_PROLOG
1545                         MAYBE('=', T_CARETEQUAL)
1546                         ELSE('^')
1547                 case '|':
1548                         MAYBE_PROLOG
1549                         MAYBE('=', T_PIPEEQUAL)
1550                         MAYBE('|', T_PIPEPIPE)
1551                         ELSE('|')
1552                 case ':':
1553                         MAYBE_PROLOG
1554                         MAYBE('>', ']')
1555                         MAYBE_MODE(':', T_COLONCOLON, _CXX)
1556                         ELSE(':')
1557                 case '=':
1558                         MAYBE_PROLOG
1559                         MAYBE('=', T_EQUALEQUAL)
1560                         ELSE('=')
1561                 case '#':
1562                         MAYBE_PROLOG
1563                         MAYBE('#', T_HASHHASH)
1564                         ELSE('#')
1565
1566                 case '?':
1567                 case '[':
1568                 case ']':
1569                 case '(':
1570                 case ')':
1571                 case '{':
1572                 case '}':
1573                 case '~':
1574                 case ';':
1575                 case ',':
1576                 case '\\':
1577                         lexer_token.type = c;
1578                         next_char();
1579                         return;
1580
1581                 case EOF:
1582                         lexer_token.type = T_EOF;
1583                         return;
1584
1585                 default:
1586 dollar_sign:
1587                         errorf(&lexer_pos, "unknown character '%c' found", c);
1588                         next_char();
1589                         lexer_token.type = T_ERROR;
1590                         return;
1591                 }
1592         }
1593 }
1594
1595 void lexer_next_token(void)
1596 {
1597         lexer_next_preprocessing_token();
1598
1599         while (lexer_token.type == '\n') {
1600 newline_found:
1601                 lexer_next_preprocessing_token();
1602         }
1603
1604         if (lexer_token.type == '#') {
1605                 parse_preprocessor_directive();
1606                 goto newline_found;
1607         }
1608 }
1609
1610 void init_lexer(void)
1611 {
1612         strset_init(&stringset);
1613         symbol_L = symbol_table_insert("L");
1614 }
1615
1616 void lexer_open_stream(FILE *stream, const char *input_name)
1617 {
1618         input                = stream;
1619         lexer_pos.lineno     = 0;
1620         lexer_pos.colno      = 0;
1621         lexer_pos.input_name = input_name;
1622
1623         bufpos = NULL;
1624         bufend = NULL;
1625
1626         /* place a virtual \n at the beginning so the lexer knows that we're
1627          * at the beginning of a line */
1628         c = '\n';
1629 }
1630
1631 void lexer_open_buffer(const char *buffer, size_t len, const char *input_name)
1632 {
1633         input                = NULL;
1634         lexer_pos.lineno     = 0;
1635         lexer_pos.colno      = 0;
1636         lexer_pos.input_name = input_name;
1637
1638 #if 0 // TODO
1639         bufpos = buffer;
1640         bufend = buffer + len;
1641
1642         /* place a virtual \n at the beginning so the lexer knows that we're
1643          * at the beginning of a line */
1644         c = '\n';
1645 #else
1646         (void)buffer;
1647         (void)len;
1648         panic("builtin lexing not done yet");
1649 #endif
1650 }
1651
1652 void exit_lexer(void)
1653 {
1654         strset_destroy(&stringset);
1655 }
1656
1657 static __attribute__((unused))
1658 void dbg_pos(const source_position_t source_position)
1659 {
1660         fprintf(stdout, "%s:%u:%u\n", source_position.input_name,
1661                 source_position.lineno, source_position.colno);
1662         fflush(stdout);
1663 }