2 * This file is part of cparser.
3 * Copyright (C) 2012 Matthias Braun <matze@braunis.de>
13 #include "preprocessor.h"
17 #include "adt/error.h"
18 #include "adt/strutil.h"
19 #include "adt/strset.h"
20 #include "lang_features.h"
21 #include "diagnostic.h"
22 #include "string_rep.h"
26 #define INCLUDE_LIMIT 199 /* 199 is for gcc "compatibility" */
28 typedef struct saved_token_t {
33 typedef struct whitespace_info_t {
34 /** current token had whitespace in front of it */
36 /** current token is at the beginning of a line.
37 * => a "#" at line begin starts a preprocessing directive. */
39 /** number of spaces before the first token in a line */
40 unsigned whitespace_at_line_begin;
43 struct pp_definition_t {
46 pp_definition_t *parent_expansion;
48 whitespace_info_t expand_info;
50 bool is_expanding : 1;
51 bool has_parameters : 1;
52 bool is_parameter : 1;
53 pp_definition_t *function_definition;
55 pp_definition_t *parameters;
59 saved_token_t *token_list;
62 typedef struct pp_conditional_t pp_conditional_t;
63 struct pp_conditional_t {
67 /** conditional in skip mode (then+else gets skipped) */
69 pp_conditional_t *parent;
72 typedef struct pp_input_t pp_input_t;
77 utf32 buf[1024+MAX_PUTBACK];
83 searchpath_entry_t *path;
86 struct searchpath_entry_t {
88 searchpath_entry_t *next;
92 static pp_input_t input;
94 static pp_input_t *input_stack;
95 static unsigned n_inputs;
96 static struct obstack input_obstack;
98 static pp_conditional_t *conditional_stack;
101 bool allow_dollar_in_symbol = true;
102 static bool resolve_escape_sequences = true;
103 static bool error_on_unknown_chars = true;
104 static bool skip_mode;
106 static struct obstack pp_obstack;
107 static struct obstack config_obstack;
108 static const char *printed_input_name = NULL;
109 static position_t expansion_pos;
110 static pp_definition_t *current_expansion = NULL;
111 static pp_definition_t *current_call = NULL;
112 static pp_definition_t *current_argument = NULL;
113 static pp_definition_t *argument_expanding = NULL;
114 static unsigned argument_brace_count;
115 static strset_t stringset;
116 static token_kind_t last_token;
118 struct searchpath_t {
119 searchpath_entry_t *first;
120 searchpath_entry_t **anchor;
124 searchpath_t bracket_searchpath = { NULL, &bracket_searchpath.first, false };
125 searchpath_t quote_searchpath = { NULL, "e_searchpath.first, false };
126 searchpath_t system_searchpath = { NULL, &system_searchpath.first, true };
127 searchpath_t after_searchpath = { NULL, &after_searchpath.first, true };
129 static whitespace_info_t next_info; /* valid if had_whitespace is true */
130 static whitespace_info_t info;
132 static inline void next_char(void);
133 static void next_input_token(void);
134 static void print_line_directive(const position_t *pos, const char *add);
136 static symbol_t *symbol_colongreater;
137 static symbol_t *symbol_lesscolon;
138 static symbol_t *symbol_lesspercent;
139 static symbol_t *symbol_percentcolon;
140 static symbol_t *symbol_percentcolonpercentcolon;
141 static symbol_t *symbol_percentgreater;
143 static symbol_t *symbol_L;
144 static symbol_t *symbol_U;
145 static symbol_t *symbol_u;
146 static symbol_t *symbol_u8;
148 static void init_symbols(void)
150 symbol_colongreater = symbol_table_insert(":>");
151 symbol_lesscolon = symbol_table_insert("<:");
152 symbol_lesspercent = symbol_table_insert("<%");
153 symbol_percentcolon = symbol_table_insert("%:");
154 symbol_percentcolonpercentcolon = symbol_table_insert("%:%:");
155 symbol_percentgreater = symbol_table_insert("%>");
157 symbol_L = symbol_table_insert("L");
158 symbol_U = symbol_table_insert("U");
159 symbol_u = symbol_table_insert("u");
160 symbol_u8 = symbol_table_insert("u8");
163 void switch_pp_input(FILE *const file, char const *const filename, searchpath_entry_t *const path, bool const is_system_header)
166 input.input = input_from_stream(file, NULL);
169 input.output_line = 0;
170 input.pos.input_name = filename;
171 input.pos.lineno = 1;
172 input.pos.is_system_header = is_system_header;
175 /* indicate that we're at a new input */
176 print_line_directive(&input.pos, input_stack != NULL ? "1" : NULL);
178 /* place a virtual '\n' so we realize we're at line begin */
179 input.pos.lineno = 0;
183 FILE *close_pp_input(void)
185 input_free(input.input);
187 FILE* const file = input.file;
199 static void push_input(void)
201 pp_input_t *const saved_input = obstack_copy(&input_obstack, &input, sizeof(input));
203 /* adjust buffer positions */
204 if (input.bufpos != NULL)
205 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
206 if (input.bufend != NULL)
207 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
209 saved_input->parent = input_stack;
210 input_stack = saved_input;
214 static void pop_restore_input(void)
216 assert(n_inputs > 0);
217 assert(input_stack != NULL);
219 pp_input_t *saved_input = input_stack;
221 memcpy(&input, saved_input, sizeof(input));
224 /* adjust buffer positions */
225 if (saved_input->bufpos != NULL)
226 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
227 if (saved_input->bufend != NULL)
228 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
230 input_stack = saved_input->parent;
231 obstack_free(&input_obstack, saved_input);
236 * Prints a parse error message at the current token.
238 * @param msg the error message
240 static void parse_error(const char *msg)
242 errorf(&pp_token.base.pos, "%s", msg);
245 static inline void next_real_char(void)
247 assert(input.bufpos <= input.bufend);
248 if (input.bufpos >= input.bufend) {
249 size_t const n = decode(input.input, input.buf + MAX_PUTBACK, lengthof(input.buf) - MAX_PUTBACK);
254 input.bufpos = input.buf + MAX_PUTBACK;
255 input.bufend = input.bufpos + n;
257 input.c = *input.bufpos++;
262 * Put a character back into the buffer.
264 * @param pc the character to put back
266 static inline void put_back(utf32 const pc)
268 assert(input.bufpos > input.buf);
269 *(--input.bufpos - input.buf + input.buf) = (char) pc;
276 if (input.c == '\n') { \
280 ++input.pos.lineno; \
281 input.pos.colno = 1; \
283 newline // Let it look like an ordinary case label.
285 #define eat(c_type) (assert(input.c == c_type), next_char())
287 static void maybe_concat_lines(void)
293 info.whitespace_at_line_begin = 0;
305 * Set c to the next input character, ie.
306 * after expanding trigraphs.
308 static inline void next_char(void)
312 /* filter trigraphs and concatenated lines */
313 if (UNLIKELY(input.c == '\\')) {
314 maybe_concat_lines();
315 goto end_of_next_char;
318 if (LIKELY(input.c != '?'))
319 goto end_of_next_char;
322 if (LIKELY(input.c != '?')) {
325 goto end_of_next_char;
330 case '=': input.c = '#'; break;
331 case '(': input.c = '['; break;
332 case '/': input.c = '\\'; maybe_concat_lines(); break;
333 case ')': input.c = ']'; break;
334 case '\'': input.c = '^'; break;
335 case '<': input.c = '{'; break;
336 case '!': input.c = '|'; break;
337 case '>': input.c = '}'; break;
338 case '-': input.c = '~'; break;
348 printf("nchar '%c'\n", input.c);
355 * Returns true if the given char is a octal digit.
357 * @param char the character to check
359 static inline bool is_octal_digit(int chr)
377 * Returns the value of a digit.
378 * The only portable way to do it ...
380 static int digit_value(int digit)
406 panic("wrong character given");
411 * Parses an octal character sequence.
413 * @param first_digit the already read first digit
415 static utf32 parse_octal_sequence(const utf32 first_digit)
417 assert(is_octal_digit(first_digit));
418 utf32 value = digit_value(first_digit);
419 if (!is_octal_digit(input.c)) return value;
420 value = 8 * value + digit_value(input.c);
422 if (!is_octal_digit(input.c)) return value;
423 value = 8 * value + digit_value(input.c);
430 * Parses a hex character sequence.
432 static utf32 parse_hex_sequence(void)
435 while (isxdigit(input.c)) {
436 value = 16 * value + digit_value(input.c);
442 static bool is_universal_char_valid(utf32 const v)
445 if (v < 0xA0U && v != 0x24 && v != 0x40 && v != 0x60)
447 if (0xD800 <= v && v <= 0xDFFF)
452 static utf32 parse_universal_char(unsigned const n_digits)
455 for (unsigned k = n_digits; k != 0; --k) {
456 if (isxdigit(input.c)) {
457 v = 16 * v + digit_value(input.c);
458 if (!resolve_escape_sequences)
459 obstack_1grow(&symbol_obstack, input.c);
463 "short universal character name, expected %u more digits",
468 if (!is_universal_char_valid(v)) {
470 "\\%c%0*X is not a valid universal character name",
471 n_digits == 4 ? 'u' : 'U', (int)n_digits, v);
476 static bool is_universal_char_valid_identifier_c99(utf32 const v)
478 static const utf32 single_chars[] = {
479 0x00AA, 0x00BA, 0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0,
480 0x1F59, 0x1F5B, 0x1F5D, 0x05BF, 0x09B2, 0x0A02, 0x0A5E, 0x0A74,
481 0x0A8D, 0x0AD0, 0x0AE0, 0x0B9C, 0x0CDE, 0x0E84, 0x0E8A, 0x0E8D,
482 0x0EA5, 0x0EA7, 0x0EC6, 0x0F00, 0x0F35, 0x0F37, 0x0F39, 0x0F97,
483 0x0FB9, 0x00B5, 0x00B7, 0x02BB, 0x037A, 0x0559, 0x093D, 0x0B3D,
484 0x1FBE, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128
487 static const utf32 ranges[][2] = {
488 {0x00C0, 0x00D6}, {0x00D8, 0x00F6}, {0x00F8, 0x01F5}, {0x01FA, 0x0217},
489 {0x0250, 0x02A8}, {0x1E00, 0x1E9B}, {0x1EA0, 0x1EF9}, {0x0388, 0x038A},
490 {0x038E, 0x03A1}, {0x03A3, 0x03CE}, {0x03D0, 0x03D6}, {0x03E2, 0x03F3},
491 {0x1F00, 0x1F15}, {0x1F18, 0x1F1D}, {0x1F20, 0x1F45}, {0x1F48, 0x1F4D},
492 {0x1F50, 0x1F57}, {0x1F5F, 0x1F7D}, {0x1F80, 0x1FB4}, {0x1FB6, 0x1FBC},
493 {0x1FC2, 0x1FC4}, {0x1FC6, 0x1FCC}, {0x1FD0, 0x1FD3}, {0x1FD6, 0x1FDB},
494 {0x1FE0, 0x1FEC}, {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFC}, {0x0401, 0x040C},
495 {0x040E, 0x044F}, {0x0451, 0x045C}, {0x045E, 0x0481}, {0x0490, 0x04C4},
496 {0x04C7, 0x04C8}, {0x04CB, 0x04CC}, {0x04D0, 0x04EB}, {0x04EE, 0x04F5},
497 {0x04F8, 0x04F9}, {0x0531, 0x0556}, {0x0561, 0x0587}, {0x05B0, 0x05B9},
498 {0x05BB, 0x05BD}, {0x05C1, 0x05C2}, {0x05D0, 0x05EA}, {0x05F0, 0x05F2},
499 {0x0621, 0x063A}, {0x0640, 0x0652}, {0x0670, 0x06B7}, {0x06BA, 0x06BE},
500 {0x06C0, 0x06CE}, {0x06D0, 0x06DC}, {0x06E5, 0x06E8}, {0x06EA, 0x06ED},
501 {0x0901, 0x0903}, {0x0905, 0x0939}, {0x093E, 0x094D}, {0x0950, 0x0952},
502 {0x0958, 0x0963}, {0x0981, 0x0983}, {0x0985, 0x098C}, {0x098F, 0x0990},
503 {0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B6, 0x09B9}, {0x09BE, 0x09C4},
504 {0x09C7, 0x09C8}, {0x09CB, 0x09CD}, {0x09DC, 0x09DD}, {0x09DF, 0x09E3},
505 {0x09F0, 0x09F1}, {0x0A05, 0x0A0A}, {0x0A0F, 0x0A10}, {0x0A13, 0x0A28},
506 {0x0A2A, 0x0A30}, {0x0A32, 0x0A33}, {0x0A35, 0x0A36}, {0x0A38, 0x0A39},
507 {0x0A3E, 0x0A42}, {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A59, 0x0A5C},
508 {0x0A81, 0x0A83}, {0x0A85, 0x0A8B}, {0x0A8F, 0x0A91}, {0x0A93, 0x0AA8},
509 {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3}, {0x0AB5, 0x0AB9}, {0x0ABD, 0x0AC5},
510 {0x0AC7, 0x0AC9}, {0x0ACB, 0x0ACD}, {0x0B01, 0x0B03}, {0x0B05, 0x0B0C},
511 {0x0B0F, 0x0B10}, {0x0B13, 0x0B28}, {0x0B2A, 0x0B30}, {0x0B32, 0x0B33},
512 {0x0B36, 0x0B39}, {0x0B3E, 0x0B43}, {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D},
513 {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B61}, {0x0B82, 0x0B83}, {0x0B85, 0x0B8A},
514 {0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, {0x0B99, 0x0B9A}, {0x0B9E, 0x0B9F},
515 {0x0BA3, 0x0BA4}, {0x0BA8, 0x0BAA}, {0x0BAE, 0x0BB5}, {0x0BB7, 0x0BB9},
516 {0x0BBE, 0x0BC2}, {0x0BC6, 0x0BC8}, {0x0BCA, 0x0BCD}, {0x0C01, 0x0C03},
517 {0x0C05, 0x0C0C}, {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C33},
518 {0x0C35, 0x0C39}, {0x0C3E, 0x0C44}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D},
519 {0x0C60, 0x0C61}, {0x0C82, 0x0C83}, {0x0C85, 0x0C8C}, {0x0C8E, 0x0C90},
520 {0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9}, {0x0CBE, 0x0CC4},
521 {0x0CC6, 0x0CC8}, {0x0CCA, 0x0CCD}, {0x0CE0, 0x0CE1}, {0x0D02, 0x0D03},
522 {0x0D05, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D28}, {0x0D2A, 0x0D39},
523 {0x0D3E, 0x0D43}, {0x0D46, 0x0D48}, {0x0D4A, 0x0D4D}, {0x0D60, 0x0D61},
524 {0x0E01, 0x0E3A}, {0x0E40, 0x0E5B}, {0x0E81, 0x0E82}, {0x0E87, 0x0E88},
525 {0x0E94, 0x0E97}, {0x0E99, 0x0E9F}, {0x0EA1, 0x0EA3}, {0x0EAA, 0x0EAB},
526 {0x0EAD, 0x0EAE}, {0x0EB0, 0x0EB9}, {0x0EBB, 0x0EBD}, {0x0EC0, 0x0EC4},
527 {0x0EC8, 0x0ECD}, {0x0EDC, 0x0EDD}, {0x0F18, 0x0F19}, {0x0F3E, 0x0F47},
528 {0x0F49, 0x0F69}, {0x0F71, 0x0F84}, {0x0F86, 0x0F8B}, {0x0F90, 0x0F95},
529 {0x0F99, 0x0FAD}, {0x0FB1, 0x0FB7}, {0x10A0, 0x10C5}, {0x10D0, 0x10F6},
530 {0x3041, 0x3093}, {0x309B, 0x309C}, {0x30A1, 0x30F6}, {0x30FB, 0x30FC},
531 {0x3105, 0x312C}, {0x4E00, 0x9FA5}, {0xAC00, 0xD7A3}, {0x0660, 0x0669},
532 {0x06F0, 0x06F9}, {0x0966, 0x096F}, {0x09E6, 0x09EF}, {0x0A66, 0x0A6F},
533 {0x0AE6, 0x0AEF}, {0x0B66, 0x0B6F}, {0x0BE7, 0x0BEF}, {0x0C66, 0x0C6F},
534 {0x0CE6, 0x0CEF}, {0x0D66, 0x0D6F}, {0x0E50, 0x0E59}, {0x0ED0, 0x0ED9},
535 {0x0F20, 0x0F33}, {0x02B0, 0x02B8}, {0x02BD, 0x02C1}, {0x02D0, 0x02D1},
536 {0x02E0, 0x02E4}, {0x203F, 0x2040}, {0x210A, 0x2113}, {0x2118, 0x211D},
537 {0x212A, 0x2131}, {0x2133, 0x2138}, {0x2160, 0x2182}, {0x3005, 0x3007},
540 for (size_t i = 0; i < sizeof(ranges)/sizeof(ranges[0]); ++i) {
541 if (ranges[i][0] <= v && v <= ranges[i][1])
544 for (size_t i = 0; i < sizeof(single_chars)/sizeof(single_chars[0]); ++i) {
545 if (v == single_chars[i])
551 static bool is_universal_char_valid_identifier_c11(utf32 const v)
554 if ( v == 0x000A8) return true;
555 if ( v == 0x000AA) return true;
556 if ( v == 0x000AD) return true;
557 if ( v == 0x000AF) return true;
558 if (0x000B2 <= v && v <= 0x000B5) return true;
559 if (0x000B7 <= v && v <= 0x000BA) return true;
560 if (0x000BC <= v && v <= 0x000BE) return true;
561 if (0x000C0 <= v && v <= 0x000D6) return true;
562 if (0x000D8 <= v && v <= 0x000F6) return true;
563 if (0x000F8 <= v && v <= 0x000FF) return true;
564 if (0x00100 <= v && v <= 0x0167F) return true;
565 if (0x01681 <= v && v <= 0x0180D) return true;
566 if (0x0180F <= v && v <= 0x01FFF) return true;
567 if (0x0200B <= v && v <= 0x0200D) return true;
568 if (0x0202A <= v && v <= 0x0202E) return true;
569 if (0x0203F <= v && v <= 0x02040) return true;
570 if ( v == 0x02054) return true;
571 if (0x02060 <= v && v <= 0x0206F) return true;
572 if (0x02070 <= v && v <= 0x0218F) return true;
573 if (0x02460 <= v && v <= 0x024FF) return true;
574 if (0x02776 <= v && v <= 0x02793) return true;
575 if (0x02C00 <= v && v <= 0x02DFF) return true;
576 if (0x02E80 <= v && v <= 0x02FFF) return true;
577 if (0x03004 <= v && v <= 0x03007) return true;
578 if (0x03021 <= v && v <= 0x0302F) return true;
579 if (0x03031 <= v && v <= 0x0303F) return true;
580 if (0x03040 <= v && v <= 0x0D7FF) return true;
581 if (0x0F900 <= v && v <= 0x0FD3D) return true;
582 if (0x0FD40 <= v && v <= 0x0FDCF) return true;
583 if (0x0FDF0 <= v && v <= 0x0FE44) return true;
584 if (0x0FE47 <= v && v <= 0x0FFFD) return true;
585 if (0x10000 <= v && v <= 0x1FFFD) return true;
586 if (0x20000 <= v && v <= 0x2FFFD) return true;
587 if (0x30000 <= v && v <= 0x3FFFD) return true;
588 if (0x40000 <= v && v <= 0x4FFFD) return true;
589 if (0x50000 <= v && v <= 0x5FFFD) return true;
590 if (0x60000 <= v && v <= 0x6FFFD) return true;
591 if (0x70000 <= v && v <= 0x7FFFD) return true;
592 if (0x80000 <= v && v <= 0x8FFFD) return true;
593 if (0x90000 <= v && v <= 0x9FFFD) return true;
594 if (0xA0000 <= v && v <= 0xAFFFD) return true;
595 if (0xB0000 <= v && v <= 0xBFFFD) return true;
596 if (0xC0000 <= v && v <= 0xCFFFD) return true;
597 if (0xD0000 <= v && v <= 0xDFFFD) return true;
598 if (0xE0000 <= v && v <= 0xEFFFD) return true;
602 static bool is_universal_char_valid_identifier(utf32 const v)
605 return is_universal_char_valid_identifier_c11(v);
606 return is_universal_char_valid_identifier_c99(v);
609 static bool is_universal_char_invalid_identifier_start(utf32 const v)
611 if (! (c_mode & _C11))
615 if (0x0300 <= v && v <= 0x036F) return true;
616 if (0x1DC0 <= v && v <= 0x1DFF) return true;
617 if (0x20D0 <= v && v <= 0x20FF) return true;
618 if (0xFE20 <= v && v <= 0xFE2F) return true;
623 * Parse an escape sequence.
625 static utf32 parse_escape_sequence(void)
629 utf32 const ec = input.c;
633 case '"': return '"';
634 case '\'': return '\'';
635 case '\\': return '\\';
636 case '?': return '\?';
637 case 'a': return '\a';
638 case 'b': return '\b';
639 case 'f': return '\f';
640 case 'n': return '\n';
641 case 'r': return '\r';
642 case 't': return '\t';
643 case 'v': return '\v';
645 return parse_hex_sequence();
654 return parse_octal_sequence(ec);
656 parse_error("reached end of file while parsing escape sequence");
658 /* \E is not documented, but handled, by GCC. It is acceptable according
659 * to §6.11.4, whereas \e is not. */
663 return 27; /* hopefully 27 is ALWAYS the code for ESCAPE */
666 case 'U': return parse_universal_char(8);
667 case 'u': return parse_universal_char(4);
672 /* §6.4.4.4:8 footnote 64 */
673 parse_error("unknown escape sequence");
677 static const char *identify_string(char *string)
679 const char *result = strset_insert(&stringset, string);
680 if (result != string) {
681 obstack_free(&symbol_obstack, string);
686 static string_t sym_make_string(string_encoding_t const enc)
688 obstack_1grow(&symbol_obstack, '\0');
689 size_t const len = obstack_object_size(&symbol_obstack) - 1;
690 char *const string = obstack_finish(&symbol_obstack);
691 char const *const result = identify_string(string);
692 return (string_t){ result, len, enc };
695 string_t make_string(char const *const string)
697 obstack_grow(&symbol_obstack, string, strlen(string));
698 return sym_make_string(STRING_ENCODING_CHAR);
701 static utf32 get_string_encoding_limit(string_encoding_t const enc)
704 case STRING_ENCODING_CHAR: return 0xFF;
705 case STRING_ENCODING_CHAR16: return 0xFFFF;
706 case STRING_ENCODING_CHAR32: return 0xFFFFFFFF;
707 case STRING_ENCODING_UTF8: return 0xFFFFFFFF;
708 case STRING_ENCODING_WIDE: return 0xFFFFFFFF; // FIXME depends on settings
710 panic("invalid string encoding");
713 static void parse_string(utf32 const delimiter, token_kind_t const kind,
714 string_encoding_t const enc,
715 char const *const context)
719 utf32 const limit = get_string_encoding_limit(enc);
723 if (resolve_escape_sequences) {
724 utf32 const tc = parse_escape_sequence();
726 warningf(WARN_OTHER, &pp_token.base.pos,
727 "escape sequence out of range");
729 if (enc == STRING_ENCODING_CHAR) {
730 obstack_1grow(&symbol_obstack, tc);
732 obstack_grow_utf8(&symbol_obstack, tc);
735 obstack_1grow(&symbol_obstack, (char)input.c);
737 obstack_1grow(&symbol_obstack, (char)input.c);
744 errorf(&pp_token.base.pos, "newline while parsing %s", context);
748 errorf(&pp_token.base.pos, "EOF while parsing %s", context);
752 if (input.c == delimiter) {
756 obstack_grow_utf8(&symbol_obstack, input.c);
764 pp_token.kind = kind;
765 pp_token.literal.string = sym_make_string(enc);
768 static void parse_string_literal(string_encoding_t const enc)
770 parse_string('"', T_STRING_LITERAL, enc, "string literal");
773 static void parse_character_constant(string_encoding_t const enc)
775 parse_string('\'', T_CHARACTER_CONSTANT, enc, "character constant");
776 if (pp_token.literal.string.size == 0) {
777 parse_error("empty character constant");
781 #define SYMBOL_CASES_WITHOUT_E_P \
782 '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
833 #define SYMBOL_CASES \
834 SYMBOL_CASES_WITHOUT_E_P: \
840 #define DIGIT_CASES \
852 static void start_expanding(pp_definition_t *definition)
854 definition->parent_expansion = current_expansion;
855 definition->expand_pos = 0;
856 definition->is_expanding = true;
857 if (definition->list_len > 0) {
858 definition->token_list[0].had_whitespace
859 = info.had_whitespace;
861 current_expansion = definition;
864 static void finished_expanding(pp_definition_t *definition)
866 assert(definition->is_expanding);
867 pp_definition_t *parent = definition->parent_expansion;
868 definition->parent_expansion = NULL;
869 definition->is_expanding = false;
871 /* stop further expanding once we expanded a parameter used in a
873 if (definition == argument_expanding)
874 argument_expanding = NULL;
876 assert(current_expansion == definition);
877 current_expansion = parent;
880 static void grow_string_escaped(struct obstack *obst, const string_t *string, char const *delimiter)
882 char const *prefix = get_string_encoding_prefix(string->encoding);
883 obstack_printf(obst, "%s%s", prefix, delimiter);
884 size_t size = string->size;
885 const char *str = string->begin;
886 if (resolve_escape_sequences) {
887 obstack_grow(obst, str, size);
889 for (size_t i = 0; i < size; ++i) {
890 const char c = str[i];
891 if (c == '\\' || c == '"')
892 obstack_1grow(obst, '\\');
893 obstack_1grow(obst, c);
896 obstack_printf(obst, "%s", delimiter);
899 static void grow_token(struct obstack *obst, const token_t *token)
901 switch (token->kind) {
903 obstack_grow(obst, token->literal.string.begin, token->literal.string.size);
906 case T_STRING_LITERAL: {
907 char const *const delimiter = resolve_escape_sequences ? "\"" : "\\\"";
908 grow_string_escaped(obst, &token->literal.string, delimiter);
912 case T_CHARACTER_CONSTANT:
913 grow_string_escaped(obst, &token->literal.string, "'");
918 const char *str = token->base.symbol->string;
919 size_t len = strlen(str);
920 obstack_grow(obst, str, len);
926 static void stringify(const pp_definition_t *definition)
928 assert(obstack_object_size(&symbol_obstack) == 0);
930 size_t list_len = definition->list_len;
931 for (size_t p = 0; p < list_len; ++p) {
932 const saved_token_t *saved = &definition->token_list[p];
933 if (p > 0 && saved->had_whitespace)
934 obstack_1grow(&symbol_obstack, ' ');
935 grow_token(&symbol_obstack, &saved->token);
937 pp_token.kind = T_STRING_LITERAL;
938 pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
941 static inline void set_punctuator(token_kind_t const kind)
943 pp_token.kind = kind;
944 pp_token.base.symbol = token_symbols[kind];
947 static inline void set_digraph(token_kind_t const kind, symbol_t *const symbol)
949 pp_token.kind = kind;
950 pp_token.base.symbol = symbol;
954 * returns next final token from a preprocessor macro expansion
956 static bool expand_next(void)
958 if (current_expansion == NULL)
962 size_t pos = current_expansion->expand_pos;
963 if (pos >= current_expansion->list_len) {
964 finished_expanding(current_expansion);
965 /* it was the outermost expansion, parse pptoken normally */
966 if (current_expansion == NULL) {
971 const saved_token_t *saved = ¤t_expansion->token_list[pos++];
972 pp_token = saved->token;
973 if (pp_token.kind == '#') {
974 if (pos < current_expansion->list_len) {
975 const saved_token_t *next = ¤t_expansion->token_list[pos];
976 if (next->token.kind == T_MACRO_PARAMETER) {
977 pp_definition_t *def = next->token.macro_parameter.def;
978 assert(def != NULL && def->is_parameter);
985 if (current_expansion->expand_pos > 0)
986 info.had_whitespace = saved->had_whitespace;
987 current_expansion->expand_pos = pos;
988 pp_token.base.pos = expansion_pos;
994 * Returns the next token kind found when continuing the current expansions
995 * without starting new sub-expansions.
997 static token_kind_t peek_expansion(void)
999 for (pp_definition_t *e = current_expansion; e; e = e->parent_expansion) {
1000 if (e->expand_pos < e->list_len)
1001 return e->token_list[e->expand_pos].token.kind;
1006 static void skip_line_comment(void)
1008 info.had_whitespace = true;
1025 static void skip_multiline_comment(void)
1027 info.had_whitespace = true;
1029 position_t const start_pos = input.pos;
1034 if (input.c == '*') {
1035 /* TODO: nested comment, warn here */
1040 if (input.c == '/') {
1041 if (input.pos.lineno != input.output_line)
1042 info.whitespace_at_line_begin = input.pos.colno;
1052 errorf(&start_pos, "at end of file while looking for comment end");
1062 static bool skip_till_newline(bool stop_at_non_whitespace)
1074 if (input.c == '/') {
1076 skip_line_comment();
1078 } else if (input.c == '*') {
1080 skip_multiline_comment();
1092 if (stop_at_non_whitespace)
1101 static void skip_whitespace(void)
1107 ++info.whitespace_at_line_begin;
1108 info.had_whitespace = true;
1113 info.at_line_begin = true;
1114 info.had_whitespace = true;
1115 info.whitespace_at_line_begin = 0;
1120 if (input.c == '/') {
1122 skip_line_comment();
1124 } else if (input.c == '*') {
1126 skip_multiline_comment();
1140 static inline void eat_pp(pp_token_kind_t const kind)
1142 assert(pp_token.base.symbol->pp_ID == kind);
1147 static inline void eat_token(token_kind_t const kind)
1149 assert(pp_token.kind == kind);
1154 static string_encoding_t identify_encoding_prefix(symbol_t *const sym)
1156 if (sym == symbol_L) return STRING_ENCODING_WIDE;
1157 if (c_mode & _C11) {
1158 if (sym == symbol_U) return STRING_ENCODING_CHAR32;
1159 if (sym == symbol_u) return STRING_ENCODING_CHAR16;
1160 if (sym == symbol_u8) return STRING_ENCODING_UTF8;
1162 return STRING_ENCODING_CHAR;
1165 static void parse_symbol(void)
1167 assert(obstack_object_size(&symbol_obstack) == 0);
1172 obstack_1grow(&symbol_obstack, (char) input.c);
1181 case 'U': n = 8; goto universal;
1182 case 'u': n = 4; goto universal;
1184 if (!resolve_escape_sequences) {
1185 obstack_1grow(&symbol_obstack, '\\');
1186 obstack_1grow(&symbol_obstack, input.c);
1189 utf32 const v = parse_universal_char(n);
1190 if (!is_universal_char_valid_identifier(v)) {
1191 if (is_universal_char_valid(v)) {
1193 "universal character \\%c%0*X is not valid in an identifier",
1194 n == 4 ? 'u' : 'U', (int)n, v);
1196 } else if (obstack_object_size(&symbol_obstack) == 0 && is_universal_char_invalid_identifier_start(v)) {
1198 "universal character \\%c%0*X is not valid as start of an identifier",
1199 n == 4 ? 'u' : 'U', (int)n, v);
1200 } else if (resolve_escape_sequences) {
1201 obstack_grow_utf8(&symbol_obstack, v);
1219 obstack_1grow(&symbol_obstack, '\0');
1220 char *string = obstack_finish(&symbol_obstack);
1222 symbol_t *symbol = symbol_table_insert(string);
1224 /* Might be a prefixed string or character constant: L/U/u/u8"string". */
1225 if (input.c == '"') {
1226 string_encoding_t const enc = identify_encoding_prefix(symbol);
1227 if (enc != STRING_ENCODING_CHAR) {
1228 parse_string_literal(enc);
1231 } else if (input.c == '\'') {
1232 string_encoding_t const enc = identify_encoding_prefix(symbol);
1233 if (enc != STRING_ENCODING_CHAR) {
1234 if (enc == STRING_ENCODING_UTF8) {
1235 errorf(&pp_token.base.pos,
1236 "'u8' is not a valid encoding for a chracter constant");
1238 parse_character_constant(enc);
1243 pp_token.kind = symbol->ID;
1244 pp_token.base.symbol = symbol;
1246 /* we can free the memory from symbol obstack if we already had an entry in
1247 * the symbol table */
1248 if (symbol->string != string) {
1249 obstack_free(&symbol_obstack, string);
1253 static void parse_number(void)
1255 obstack_1grow(&symbol_obstack, (char) input.c);
1262 case SYMBOL_CASES_WITHOUT_E_P:
1263 obstack_1grow(&symbol_obstack, (char) input.c);
1271 obstack_1grow(&symbol_obstack, (char) input.c);
1273 if (input.c == '+' || input.c == '-') {
1274 obstack_1grow(&symbol_obstack, (char) input.c);
1286 pp_token.kind = T_NUMBER;
1287 pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
1290 #define MAYBE_PROLOG \
1294 #define MAYBE(ch, kind) \
1297 set_punctuator(kind); \
1300 #define MAYBE_DIGRAPH(ch, kind, symbol) \
1303 set_digraph(kind, symbol); \
1306 #define ELSE_CODE(code) \
1311 #define ELSE(kind) ELSE_CODE(set_punctuator(kind); return;)
1313 /** identifies and returns the next preprocessing token contained in the
1314 * input stream. No macro expansion is performed. */
1315 static void next_input_token(void)
1317 if (next_info.had_whitespace) {
1319 next_info.had_whitespace = false;
1321 info.at_line_begin = false;
1322 info.had_whitespace = false;
1325 pp_token.base.pos = input.pos;
1326 pp_token.base.symbol = NULL;
1331 info.whitespace_at_line_begin++;
1332 info.had_whitespace = true;
1337 info.at_line_begin = true;
1338 info.had_whitespace = true;
1339 info.whitespace_at_line_begin = 0;
1351 parse_string_literal(STRING_ENCODING_CHAR);
1355 parse_character_constant(STRING_ENCODING_CHAR);
1377 MAYBE('.', T_DOTDOTDOT)
1381 set_punctuator('.');
1387 MAYBE('&', T_ANDAND)
1388 MAYBE('=', T_ANDEQUAL)
1392 MAYBE('=', T_ASTERISKEQUAL)
1396 MAYBE('+', T_PLUSPLUS)
1397 MAYBE('=', T_PLUSEQUAL)
1401 MAYBE('>', T_MINUSGREATER)
1402 MAYBE('-', T_MINUSMINUS)
1403 MAYBE('=', T_MINUSEQUAL)
1407 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
1411 MAYBE('=', T_SLASHEQUAL)
1414 skip_multiline_comment();
1418 skip_line_comment();
1423 MAYBE_DIGRAPH('>', '}', symbol_percentgreater)
1424 MAYBE('=', T_PERCENTEQUAL)
1429 MAYBE_DIGRAPH(':', T_HASHHASH, symbol_percentcolonpercentcolon)
1433 goto digraph_percentcolon;
1436 digraph_percentcolon:
1437 set_digraph('#', symbol_percentcolon);
1443 MAYBE_DIGRAPH(':', '[', symbol_lesscolon)
1444 MAYBE_DIGRAPH('%', '{', symbol_lesspercent)
1445 MAYBE('=', T_LESSEQUAL)
1448 MAYBE('=', T_LESSLESSEQUAL)
1453 MAYBE('=', T_GREATEREQUAL)
1456 MAYBE('=', T_GREATERGREATEREQUAL)
1457 ELSE(T_GREATERGREATER)
1461 MAYBE('=', T_CARETEQUAL)
1465 MAYBE('=', T_PIPEEQUAL)
1466 MAYBE('|', T_PIPEPIPE)
1470 MAYBE_DIGRAPH('>', ']', symbol_colongreater)
1472 if (c_mode & _CXX) {
1474 set_punctuator(T_COLONCOLON);
1481 MAYBE('=', T_EQUALEQUAL)
1485 MAYBE('#', T_HASHHASH)
1498 set_punctuator(input.c);
1503 if (input_stack != NULL) {
1504 fclose(close_pp_input());
1505 pop_restore_input();
1508 if (input.c == (utf32)EOF)
1510 print_line_directive(&input.pos, "2");
1513 info.at_line_begin = true;
1514 set_punctuator(T_EOF);
1520 int next_c = input.c;
1523 if (next_c == 'U' || next_c == 'u') {
1530 if (error_on_unknown_chars) {
1531 errorf(&pp_token.base.pos, "unknown character '%lc' found", input.c);
1535 assert(obstack_object_size(&symbol_obstack) == 0);
1536 obstack_grow_utf8(&symbol_obstack, input.c);
1537 obstack_1grow(&symbol_obstack, '\0');
1538 char *const string = obstack_finish(&symbol_obstack);
1539 symbol_t *const symbol = symbol_table_insert(string);
1540 if (symbol->string != string)
1541 obstack_free(&symbol_obstack, string);
1543 pp_token.kind = T_UNKNOWN_CHAR;
1544 pp_token.base.symbol = symbol;
1551 static void print_quoted_string(const char *const string)
1554 for (const char *c = string; *c != 0; ++c) {
1556 case '"': fputs("\\\"", out); break;
1557 case '\\': fputs("\\\\", out); break;
1558 case '\a': fputs("\\a", out); break;
1559 case '\b': fputs("\\b", out); break;
1560 case '\f': fputs("\\f", out); break;
1561 case '\n': fputs("\\n", out); break;
1562 case '\r': fputs("\\r", out); break;
1563 case '\t': fputs("\\t", out); break;
1564 case '\v': fputs("\\v", out); break;
1565 case '\?': fputs("\\?", out); break;
1568 fprintf(out, "\\%03o", (unsigned)*c);
1578 static void print_line_directive(const position_t *pos, const char *add)
1583 fprintf(out, "# %u ", pos->lineno);
1584 print_quoted_string(pos->input_name);
1589 if (pos->is_system_header) {
1593 printed_input_name = pos->input_name;
1594 input.output_line = pos->lineno-1;
1597 static bool emit_newlines(void)
1602 unsigned delta = pp_token.base.pos.lineno - input.output_line;
1608 print_line_directive(&pp_token.base.pos, NULL);
1611 for (unsigned i = 0; i < delta; ++i) {
1615 input.output_line = pp_token.base.pos.lineno;
1617 unsigned whitespace = info.whitespace_at_line_begin;
1618 /* make sure there is at least 1 whitespace before a (macro-expanded)
1619 * '#' at line begin. I'm not sure why this is good, but gcc does it. */
1620 if (pp_token.kind == '#' && whitespace == 0)
1622 for (unsigned i = 0; i < whitespace; ++i)
1628 void set_preprocessor_output(FILE *output)
1632 error_on_unknown_chars = false;
1633 resolve_escape_sequences = false;
1635 error_on_unknown_chars = true;
1636 resolve_escape_sequences = true;
1640 void emit_pp_token(void)
1642 if (!emit_newlines() &&
1643 (info.had_whitespace || tokens_would_paste(last_token, pp_token.kind)))
1646 switch (pp_token.kind) {
1648 fputs(pp_token.literal.string.begin, out);
1651 case T_STRING_LITERAL:
1652 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1654 fputs(pp_token.literal.string.begin, out);
1658 case T_CHARACTER_CONSTANT:
1659 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1661 fputs(pp_token.literal.string.begin, out);
1665 case T_MACRO_PARAMETER:
1666 panic("macro parameter not expanded");
1669 fputs(pp_token.base.symbol->string, out);
1672 last_token = pp_token.kind;
1675 static void eat_pp_directive(void)
1677 while (!info.at_line_begin) {
1682 static bool strings_equal(const string_t *string1, const string_t *string2)
1684 size_t size = string1->size;
1685 if (size != string2->size)
1688 const char *c1 = string1->begin;
1689 const char *c2 = string2->begin;
1690 for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1697 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1699 if (token1->kind != token2->kind)
1702 switch (token1->kind) {
1704 case T_CHARACTER_CONSTANT:
1705 case T_STRING_LITERAL:
1706 return strings_equal(&token1->literal.string, &token2->literal.string);
1708 case T_MACRO_PARAMETER:
1709 return token1->macro_parameter.def->symbol
1710 == token2->macro_parameter.def->symbol;
1713 return token1->base.symbol == token2->base.symbol;
1717 static bool pp_definitions_equal(const pp_definition_t *definition1,
1718 const pp_definition_t *definition2)
1720 if (definition1->list_len != definition2->list_len)
1723 size_t len = definition1->list_len;
1724 const saved_token_t *t1 = definition1->token_list;
1725 const saved_token_t *t2 = definition2->token_list;
1726 for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1727 if (!pp_tokens_equal(&t1->token, &t2->token))
1729 if (t1->had_whitespace != t2->had_whitespace)
1735 static void missing_macro_param_error(void)
1737 errorf(&pp_token.base.pos, "'#' is not followed by a macro parameter");
1740 static bool is_defineable_token(char const *const context)
1742 if (info.at_line_begin) {
1743 errorf(&pp_token.base.pos, "unexpected end of line after %s", context);
1746 symbol_t *const symbol = pp_token.base.symbol;
1750 if (pp_token.kind != T_IDENTIFIER) {
1751 switch (symbol->string[0]) {
1758 errorf(&pp_token.base.pos, "expected identifier after %s, got %K",
1759 context, &pp_token);
1764 /* TODO turn this into a flag in pp_def. */
1765 switch (symbol->pp_ID) {
1768 errorf(&pp_token.base.pos, "%K cannot be used as macro name in %s",
1769 &pp_token, context);
1777 static void parse_define_directive(void)
1785 assert(obstack_object_size(&pp_obstack) == 0);
1787 if (!is_defineable_token("#define"))
1789 symbol_t *const symbol = pp_token.base.symbol;
1791 pp_definition_t *new_definition
1792 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1793 memset(new_definition, 0, sizeof(new_definition[0]));
1794 new_definition->symbol = symbol;
1795 new_definition->pos = input.pos;
1797 /* this is probably the only place where spaces are significant in the
1798 * lexer (except for the fact that they separate tokens). #define b(x)
1799 * is something else than #define b (x) */
1800 if (input.c == '(') {
1805 switch (pp_token.kind) {
1807 new_definition->is_variadic = true;
1808 eat_token(T_DOTDOTDOT);
1809 if (pp_token.kind != ')') {
1811 "'...' not at end of macro argument list");
1816 case T_IDENTIFIER: {
1817 pp_definition_t parameter;
1818 memset(¶meter, 0, sizeof(parameter));
1819 parameter.pos = pp_token.base.pos;
1820 parameter.symbol = pp_token.base.symbol;
1821 parameter.is_parameter = true;
1822 obstack_grow(&pp_obstack, ¶meter, sizeof(parameter));
1823 eat_token(T_IDENTIFIER);
1825 if (pp_token.kind == ',') {
1830 if (pp_token.kind != ')') {
1831 errorf(&pp_token.base.pos,
1832 "expected ',' or ')' after identifier, got %K",
1841 goto finish_argument_list;
1844 errorf(&pp_token.base.pos,
1845 "expected identifier, '...' or ')' in #define argument list, got %K",
1851 finish_argument_list:
1852 new_definition->has_parameters = true;
1853 size_t size = obstack_object_size(&pp_obstack);
1854 new_definition->n_parameters
1855 = size / sizeof(new_definition->parameters[0]);
1856 new_definition->parameters = obstack_finish(&pp_obstack);
1857 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1858 pp_definition_t *const param = &new_definition->parameters[i];
1859 symbol_t *const param_sym = param->symbol;
1860 pp_definition_t *const previous = param_sym->pp_definition;
1861 if (previous != NULL
1862 && previous->function_definition == new_definition) {
1863 errorf(¶m->pos, "duplicate macro parameter '%Y'", param_sym);
1864 param->symbol = sym_anonymous;
1867 param->parent_expansion = previous;
1868 param->function_definition = new_definition;
1869 param_sym->pp_definition = param;
1875 /* construct token list */
1876 assert(obstack_object_size(&pp_obstack) == 0);
1877 bool next_must_be_param = false;
1878 while (!info.at_line_begin) {
1879 if (pp_token.kind == T_IDENTIFIER) {
1880 pp_definition_t *const definition = pp_token.base.symbol->pp_definition;
1881 if (definition != NULL
1882 && definition->function_definition == new_definition) {
1883 pp_token.kind = T_MACRO_PARAMETER;
1884 pp_token.macro_parameter.def = definition;
1887 if (next_must_be_param && pp_token.kind != T_MACRO_PARAMETER) {
1888 missing_macro_param_error();
1890 saved_token_t saved_token;
1891 saved_token.token = pp_token;
1892 saved_token.had_whitespace = info.had_whitespace;
1893 obstack_grow(&pp_obstack, &saved_token, sizeof(saved_token));
1895 = new_definition->has_parameters && pp_token.kind == '#';
1898 if (next_must_be_param)
1899 missing_macro_param_error();
1901 new_definition->list_len = obstack_object_size(&pp_obstack)
1902 / sizeof(new_definition->token_list[0]);
1903 new_definition->token_list = obstack_finish(&pp_obstack);
1905 if (new_definition->has_parameters) {
1906 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1907 pp_definition_t *const param = &new_definition->parameters[i];
1908 symbol_t *const param_sym = param->symbol;
1909 if (param_sym == sym_anonymous)
1911 assert(param_sym->pp_definition == param);
1912 assert(param->function_definition == new_definition);
1913 param_sym->pp_definition = param->parent_expansion;
1914 param->parent_expansion = NULL;
1918 pp_definition_t *old_definition = symbol->pp_definition;
1919 if (old_definition != NULL) {
1920 if (!pp_definitions_equal(old_definition, new_definition)) {
1921 warningf(WARN_OTHER, &input.pos,
1922 "multiple definition of macro '%Y' (first defined %P)",
1923 symbol, &old_definition->pos);
1925 /* reuse the old definition */
1926 obstack_free(&pp_obstack, new_definition);
1927 new_definition = old_definition;
1931 symbol->pp_definition = new_definition;
1935 if (obstack_object_size(&pp_obstack) > 0) {
1936 char *ptr = obstack_finish(&pp_obstack);
1937 obstack_free(&pp_obstack, ptr);
1942 static void parse_undef_directive(void)
1950 if (!is_defineable_token("#undef")) {
1955 pp_token.base.symbol->pp_definition = NULL;
1958 if (!info.at_line_begin) {
1959 warningf(WARN_OTHER, &input.pos, "extra tokens at end of #undef directive");
1964 /** behind an #include we can have the special headername lexems.
1965 * They're only allowed behind an #include so they're not recognized
1966 * by the normal next_preprocessing_token. We handle them as a special
1968 static const char *parse_headername(bool *system_include)
1970 if (info.at_line_begin) {
1971 parse_error("expected headername after #include");
1975 /* check whether we have a "... or <... headername */
1976 position_t pos = input.pos;
1980 case '<': delimiter = '>'; *system_include = true; goto parse_name;
1981 case '"': delimiter = '"'; *system_include = false; goto parse_name;
1983 assert(obstack_object_size(&symbol_obstack) == 0);
1990 char *dummy = obstack_finish(&symbol_obstack);
1991 obstack_free(&symbol_obstack, dummy);
1993 errorf(&pp_token.base.pos,
1994 "header name without closing '%c'", (char)delimiter);
1998 if (input.c == delimiter) {
2000 goto finish_headername;
2002 obstack_1grow(&symbol_obstack, (char)input.c);
2008 /* we should never be here */
2012 next_preprocessing_token();
2013 if (info.at_line_begin) {
2014 /* TODO: if we are already in the new line then we parsed more than
2015 * wanted. We reuse the token, but could produce following errors
2016 * misbehaviours... */
2017 goto error_invalid_input;
2019 if (pp_token.kind == T_STRING_LITERAL) {
2020 *system_include = false;
2021 return pp_token.literal.string.begin;
2022 } else if (pp_token.kind == '<') {
2023 *system_include = true;
2024 assert(obstack_object_size(&pp_obstack) == 0);
2026 next_preprocessing_token();
2027 if (info.at_line_begin) {
2028 /* TODO: we shouldn't have parsed/expanded something on the
2029 * next line yet... */
2030 char *dummy = obstack_finish(&pp_obstack);
2031 obstack_free(&pp_obstack, dummy);
2032 goto error_invalid_input;
2034 if (pp_token.kind == '>')
2037 saved_token_t saved;
2038 saved.token = pp_token;
2039 saved.had_whitespace = info.had_whitespace;
2040 obstack_grow(&pp_obstack, &saved, sizeof(saved));
2042 size_t size = obstack_object_size(&pp_obstack);
2043 assert(size % sizeof(saved_token_t) == 0);
2044 size_t n_tokens = size / sizeof(saved_token_t);
2045 saved_token_t *tokens = obstack_finish(&pp_obstack);
2046 assert(obstack_object_size(&symbol_obstack) == 0);
2047 for (size_t i = 0; i < n_tokens; ++i) {
2048 const saved_token_t *saved = &tokens[i];
2049 if (i > 0 && saved->had_whitespace)
2050 obstack_1grow(&symbol_obstack, ' ');
2051 grow_token(&symbol_obstack, &saved->token);
2053 obstack_free(&pp_obstack, tokens);
2054 goto finish_headername;
2056 error_invalid_input:
2058 char *dummy = obstack_finish(&symbol_obstack);
2059 obstack_free(&symbol_obstack, dummy);
2062 errorf(&pp_token.base.pos,
2063 "expected \"FILENAME\" or <FILENAME> after #include");
2069 obstack_1grow(&symbol_obstack, '\0');
2070 char *const headername = obstack_finish(&symbol_obstack);
2071 const char *identified = identify_string(headername);
2072 pp_token.base.pos = pos;
2076 static bool do_include(bool const bracket_include, bool const include_next, char const *const headername)
2078 size_t const headername_len = strlen(headername);
2079 searchpath_entry_t *entry;
2081 entry = input.path ? input.path->next
2082 : bracket_include ? bracket_searchpath.first
2083 : quote_searchpath.first;
2085 if (!bracket_include) {
2086 /* put dirname of current input on obstack */
2087 const char *filename = input.pos.input_name;
2088 const char *last_slash = strrchr(filename, '/');
2089 const char *full_name;
2090 if (last_slash != NULL) {
2091 size_t len = last_slash - filename;
2092 obstack_grow(&symbol_obstack, filename, len + 1);
2093 obstack_grow0(&symbol_obstack, headername, headername_len);
2094 char *complete_path = obstack_finish(&symbol_obstack);
2095 full_name = identify_string(complete_path);
2097 full_name = headername;
2100 FILE *file = fopen(full_name, "r");
2102 switch_pp_input(file, full_name, NULL, false);
2105 entry = quote_searchpath.first;
2107 entry = bracket_searchpath.first;
2111 assert(obstack_object_size(&symbol_obstack) == 0);
2112 /* check searchpath */
2113 for (; entry; entry = entry->next) {
2114 const char *path = entry->path;
2115 size_t len = strlen(path);
2116 obstack_grow(&symbol_obstack, path, len);
2117 if (path[len-1] != '/')
2118 obstack_1grow(&symbol_obstack, '/');
2119 obstack_grow(&symbol_obstack, headername, headername_len+1);
2121 char *complete_path = obstack_finish(&symbol_obstack);
2122 FILE *file = fopen(complete_path, "r");
2124 const char *filename = identify_string(complete_path);
2125 switch_pp_input(file, filename, entry, entry->is_system_path);
2128 obstack_free(&symbol_obstack, complete_path);
2135 static void parse_include_directive(bool const include_next)
2142 /* do not eat the TP_include, since it would already parse the next token
2143 * which needs special handling here. */
2144 skip_till_newline(true);
2145 bool system_include;
2146 const char *headername = parse_headername(&system_include);
2147 if (headername == NULL) {
2152 bool had_nonwhitespace = skip_till_newline(false);
2153 if (had_nonwhitespace) {
2154 warningf(WARN_OTHER, &input.pos,
2155 "extra tokens at end of #include directive");
2158 if (n_inputs > INCLUDE_LIMIT) {
2159 errorf(&pp_token.base.pos, "#include nested too deeply");
2166 info.whitespace_at_line_begin = 0;
2167 info.had_whitespace = false;
2168 info.at_line_begin = true;
2171 bool res = do_include(system_include, include_next, headername);
2175 errorf(&pp_token.base.pos, "failed including '%s': %s", headername, strerror(errno));
2176 pop_restore_input();
2180 static pp_conditional_t *push_conditional(void)
2182 pp_conditional_t *conditional
2183 = obstack_alloc(&pp_obstack, sizeof(*conditional));
2184 memset(conditional, 0, sizeof(*conditional));
2186 conditional->parent = conditional_stack;
2187 conditional_stack = conditional;
2192 static void pop_conditional(void)
2194 assert(conditional_stack != NULL);
2195 conditional_stack = conditional_stack->parent;
2198 void check_unclosed_conditionals(void)
2200 while (conditional_stack != NULL) {
2201 pp_conditional_t *conditional = conditional_stack;
2203 if (conditional->in_else) {
2204 errorf(&conditional->pos, "unterminated #else");
2206 errorf(&conditional->pos, "unterminated condition");
2212 static void parse_ifdef_ifndef_directive(bool const is_ifdef)
2215 eat_pp(is_ifdef ? TP_ifdef : TP_ifndef);
2219 pp_conditional_t *conditional = push_conditional();
2220 conditional->pos = pp_token.base.pos;
2221 conditional->skip = true;
2225 if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
2226 errorf(&pp_token.base.pos, "expected identifier after #%s, got %K",
2227 is_ifdef ? "ifdef" : "ifndef", &pp_token);
2230 /* just take the true case in the hope to avoid further errors */
2233 /* evaluate whether we are in true or false case */
2234 condition = (bool)pp_token.base.symbol->pp_definition == is_ifdef;
2235 eat_token(T_IDENTIFIER);
2237 if (!info.at_line_begin) {
2238 errorf(&pp_token.base.pos, "extra tokens at end of #%s",
2239 is_ifdef ? "ifdef" : "ifndef");
2244 pp_conditional_t *conditional = push_conditional();
2245 conditional->pos = pp_token.base.pos;
2246 conditional->condition = condition;
2253 static void parse_else_directive(void)
2257 if (!info.at_line_begin) {
2259 warningf(WARN_OTHER, &pp_token.base.pos, "extra tokens at end of #else");
2264 pp_conditional_t *conditional = conditional_stack;
2265 if (conditional == NULL) {
2266 errorf(&pp_token.base.pos, "#else without prior #if");
2270 if (conditional->in_else) {
2271 errorf(&pp_token.base.pos,
2272 "#else after #else (condition started %P)",
2278 conditional->in_else = true;
2279 if (!conditional->skip) {
2280 skip_mode = conditional->condition;
2282 conditional->pos = pp_token.base.pos;
2285 static void parse_endif_directive(void)
2289 if (!info.at_line_begin) {
2291 warningf(WARN_OTHER, &pp_token.base.pos, "extra tokens at end of #endif");
2296 pp_conditional_t *conditional = conditional_stack;
2297 if (conditional == NULL) {
2298 errorf(&pp_token.base.pos, "#endif without prior #if");
2302 if (!conditional->skip) {
2308 typedef enum stdc_pragma_kind_t {
2312 STDC_CX_LIMITED_RANGE
2313 } stdc_pragma_kind_t;
2315 typedef enum stdc_pragma_value_kind_t {
2320 } stdc_pragma_value_kind_t;
2322 static void parse_pragma_directive(void)
2330 if (pp_token.kind != T_IDENTIFIER) {
2331 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.pos,
2332 "expected identifier after #pragma");
2337 stdc_pragma_kind_t kind = STDC_UNKNOWN;
2338 if (pp_token.base.symbol->pp_ID == TP_STDC && c_mode & _C99) {
2342 switch (pp_token.base.symbol->pp_ID) {
2343 case TP_FP_CONTRACT: kind = STDC_FP_CONTRACT; break;
2344 case TP_FENV_ACCESS: kind = STDC_FENV_ACCESS; break;
2345 case TP_CX_LIMITED_RANGE: kind = STDC_CX_LIMITED_RANGE; break;
2348 if (kind != STDC_UNKNOWN) {
2350 stdc_pragma_value_kind_t value;
2351 switch (pp_token.base.symbol->pp_ID) {
2352 case TP_ON: value = STDC_VALUE_ON; break;
2353 case TP_OFF: value = STDC_VALUE_OFF; break;
2354 case TP_DEFAULT: value = STDC_VALUE_DEFAULT; break;
2355 default: value = STDC_VALUE_UNKNOWN; break;
2357 if (value == STDC_VALUE_UNKNOWN) {
2358 kind = STDC_UNKNOWN;
2359 errorf(&pp_token.base.pos, "bad STDC pragma argument");
2364 if (kind == STDC_UNKNOWN) {
2365 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.pos,
2366 "encountered unknown #pragma");
2370 static void parse_line_directive(void)
2372 if (pp_token.kind != T_NUMBER) {
2374 parse_error("expected integer");
2377 long const line = strtol(pp_token.literal.string.begin, &end, 0);
2379 /* use offset -1 as this is about the next line */
2380 input.pos.lineno = line - 1;
2381 /* force output of line */
2382 input.output_line = input.pos.lineno - 20;
2385 errorf(&input.pos, "'%S' is not a valid line number",
2386 &pp_token.literal.string);
2390 if (info.at_line_begin)
2393 if (pp_token.kind == T_STRING_LITERAL
2394 && pp_token.literal.string.encoding == STRING_ENCODING_CHAR) {
2395 input.pos.input_name = pp_token.literal.string.begin;
2396 input.pos.is_system_header = false;
2399 /* attempt to parse numeric flags as outputted by gcc preprocessor */
2400 while (!info.at_line_begin && pp_token.kind == T_NUMBER) {
2402 * 1 - indicates start of a new file
2403 * 2 - indicates return from a file
2404 * 3 - indicates system header
2405 * 4 - indicates implicit extern "C" in C++ mode
2407 * currently we're only interested in "3"
2409 if (streq(pp_token.literal.string.begin, "3")) {
2410 input.pos.is_system_header = true;
2419 static void parse_error_directive(void)
2426 bool const old_resolve_escape_sequences = resolve_escape_sequences;
2427 resolve_escape_sequences = false;
2429 position_t const pos = pp_token.base.pos;
2431 if (info.had_whitespace && obstack_object_size(&pp_obstack) != 0)
2432 obstack_1grow(&pp_obstack, ' ');
2434 switch (pp_token.kind) {
2436 string_t const *const str = &pp_token.literal.string;
2437 obstack_grow(&pp_obstack, str->begin, str->size);
2443 case T_STRING_LITERAL: delim = '"'; goto string;
2444 case T_CHARACTER_CONSTANT: delim = '\''; goto string;
2446 string_t const *const str = &pp_token.literal.string;
2447 char const *const enc = get_string_encoding_prefix(str->encoding);
2448 obstack_printf(&pp_obstack, "%s%c%s%c", enc, delim, str->begin, delim);
2453 char const *const str = pp_token.base.symbol->string;
2454 obstack_grow(&pp_obstack, str, strlen(str));
2460 } while (!info.at_line_begin);
2462 resolve_escape_sequences = old_resolve_escape_sequences;
2464 obstack_1grow(&pp_obstack, '\0');
2465 char *const str = obstack_finish(&pp_obstack);
2466 errorf(&pos, "#%s", str);
2467 obstack_free(&pp_obstack, str);
2470 static void parse_preprocessing_directive(void)
2474 if (info.at_line_begin) {
2475 /* empty directive */
2479 if (pp_token.base.symbol) {
2480 switch (pp_token.base.symbol->pp_ID) {
2481 case TP_define: parse_define_directive(); break;
2482 case TP_else: parse_else_directive(); break;
2483 case TP_endif: parse_endif_directive(); break;
2484 case TP_error: parse_error_directive(); break;
2485 case TP_ifdef: parse_ifdef_ifndef_directive(true); break;
2486 case TP_ifndef: parse_ifdef_ifndef_directive(false); break;
2487 case TP_include: parse_include_directive(false); break;
2488 case TP_include_next: parse_include_directive(true); break;
2489 case TP_line: next_input_token(); goto line_directive;
2490 case TP_pragma: parse_pragma_directive(); break;
2491 case TP_undef: parse_undef_directive(); break;
2494 } else if (pp_token.kind == T_NUMBER) {
2496 parse_line_directive();
2500 errorf(&pp_token.base.pos, "invalid preprocessing directive #%K", &pp_token);
2505 assert(info.at_line_begin);
2508 static void finish_current_argument(void)
2510 if (current_argument == NULL)
2512 size_t size = obstack_object_size(&pp_obstack);
2513 current_argument->list_len = size/sizeof(current_argument->token_list[0]);
2514 current_argument->token_list = obstack_finish(&pp_obstack);
2517 void next_preprocessing_token(void)
2520 if (!expand_next()) {
2523 while (pp_token.kind == '#' && info.at_line_begin) {
2524 parse_preprocessing_directive();
2526 } while (skip_mode && pp_token.kind != T_EOF);
2529 const token_kind_t kind = pp_token.kind;
2530 if (current_call == NULL || argument_expanding != NULL) {
2531 symbol_t *const symbol = pp_token.base.symbol;
2533 if (kind == T_MACRO_PARAMETER) {
2534 assert(current_expansion != NULL);
2535 start_expanding(pp_token.macro_parameter.def);
2539 pp_definition_t *const pp_definition = symbol->pp_definition;
2540 if (pp_definition != NULL && !pp_definition->is_expanding) {
2541 if (pp_definition->has_parameters) {
2543 /* check if next token is a '(' */
2544 whitespace_info_t old_info = info;
2545 token_kind_t next_token = peek_expansion();
2546 if (next_token == T_EOF) {
2547 info.at_line_begin = false;
2548 info.had_whitespace = false;
2550 if (input.c == '(') {
2555 if (next_token == '(') {
2556 if (current_expansion == NULL)
2557 expansion_pos = pp_token.base.pos;
2558 next_preprocessing_token();
2559 assert(pp_token.kind == '(');
2561 pp_definition->parent_expansion = current_expansion;
2562 current_call = pp_definition;
2563 current_call->expand_pos = 0;
2564 current_call->expand_info = old_info;
2565 if (current_call->n_parameters > 0) {
2566 current_argument = ¤t_call->parameters[0];
2567 assert(argument_brace_count == 0);
2571 /* skip_whitespaces() skipped newlines and whitespace,
2572 * remember results for next token */
2578 if (current_expansion == NULL)
2579 expansion_pos = pp_token.base.pos;
2580 start_expanding(pp_definition);
2587 if (current_call != NULL) {
2588 /* current_call != NULL */
2590 ++argument_brace_count;
2591 } else if (kind == ')') {
2592 if (argument_brace_count > 0) {
2593 --argument_brace_count;
2595 finish_current_argument();
2596 assert(kind == ')');
2597 start_expanding(current_call);
2598 info = current_call->expand_info;
2599 current_call = NULL;
2600 current_argument = NULL;
2603 } else if (kind == ',' && argument_brace_count == 0) {
2604 finish_current_argument();
2605 current_call->expand_pos++;
2606 if (current_call->expand_pos >= current_call->n_parameters) {
2607 errorf(&pp_token.base.pos,
2608 "too many arguments passed for macro '%Y'",
2609 current_call->symbol);
2610 current_argument = NULL;
2613 = ¤t_call->parameters[current_call->expand_pos];
2616 } else if (kind == T_MACRO_PARAMETER) {
2617 /* parameters have to be fully expanded before being used as
2618 * parameters for another macro-call */
2619 assert(current_expansion != NULL);
2620 pp_definition_t *argument = pp_token.macro_parameter.def;
2621 argument_expanding = argument;
2622 start_expanding(argument);
2624 } else if (kind == T_EOF) {
2625 errorf(&expansion_pos,
2626 "reached end of file while parsing arguments for '%Y'",
2627 current_call->symbol);
2630 if (current_argument != NULL) {
2631 saved_token_t saved;
2632 saved.token = pp_token;
2633 saved.had_whitespace = info.had_whitespace;
2634 obstack_grow(&pp_obstack, &saved, sizeof(saved));
2640 void append_include_path(searchpath_t *paths, const char *path)
2642 searchpath_entry_t *entry = OALLOCZ(&config_obstack, searchpath_entry_t);
2644 entry->is_system_path = paths->is_system_path;
2646 *paths->anchor = entry;
2647 paths->anchor = &entry->next;
2650 static void append_env_paths(searchpath_t *paths, const char *envvar)
2652 const char *val = getenv(envvar);
2653 if (val != NULL && *val != '\0') {
2654 const char *begin = val;
2658 while (*c != '\0' && *c != ':')
2661 size_t len = c-begin;
2663 /* use "." for gcc compatibility (Matze: I would expect that
2664 * nothing happens for an empty entry...) */
2665 append_include_path(paths, ".");
2667 char *const string = obstack_copy0(&config_obstack, begin, len);
2668 append_include_path(paths, string);
2675 } while (*c != '\0');
2679 static void append_searchpath(searchpath_t *path, const searchpath_t *append)
2681 *path->anchor = append->first;
2684 static void setup_include_path(void)
2686 /* built-in paths */
2687 append_include_path(&system_searchpath, "/usr/include");
2689 /* parse environment variable */
2690 append_env_paths(&bracket_searchpath, "CPATH");
2691 append_env_paths(&system_searchpath,
2692 c_mode & _CXX ? "CPLUS_INCLUDE_PATH" : "C_INCLUDE_PATH");
2694 /* append system search path to bracket searchpath */
2695 append_searchpath(&system_searchpath, &after_searchpath);
2696 append_searchpath(&bracket_searchpath, &system_searchpath);
2697 append_searchpath("e_searchpath, &bracket_searchpath);
2700 static void input_error(unsigned const delta_lines, unsigned const delta_cols, char const *const message)
2702 position_t pos = pp_token.base.pos;
2703 pos.lineno += delta_lines;
2704 pos.colno += delta_cols;
2705 errorf(&pos, "%s", message);
2708 void init_include_paths(void)
2710 obstack_init(&config_obstack);
2713 void init_preprocessor(void)
2717 obstack_init(&pp_obstack);
2718 obstack_init(&input_obstack);
2719 strset_init(&stringset);
2721 setup_include_path();
2723 set_input_error_callback(input_error);
2726 void exit_preprocessor(void)
2728 obstack_free(&input_obstack, NULL);
2729 obstack_free(&pp_obstack, NULL);
2730 obstack_free(&config_obstack, NULL);
2732 strset_destroy(&stringset);
2735 int pptest_main(int argc, char **argv);
2736 int pptest_main(int argc, char **argv)
2738 init_symbol_table();
2739 init_include_paths();
2740 init_preprocessor();
2743 error_on_unknown_chars = false;
2744 resolve_escape_sequences = false;
2746 /* simplistic commandline parser */
2747 const char *filename = NULL;
2748 const char *output = NULL;
2749 for (int i = 1; i < argc; ++i) {
2750 const char *opt = argv[i];
2751 if (streq(opt, "-I")) {
2752 append_include_path(&bracket_searchpath, argv[++i]);
2754 } else if (streq(opt, "-E")) {
2756 } else if (streq(opt, "-o")) {
2759 } else if (opt[0] == '-') {
2760 fprintf(stderr, "Unknown option '%s'\n", opt);
2762 if (filename != NULL)
2763 fprintf(stderr, "Multiple inputs not supported\n");
2767 if (filename == NULL) {
2768 fprintf(stderr, "No input specified\n");
2772 if (output == NULL) {
2775 out = fopen(output, "w");
2777 fprintf(stderr, "Couldn't open output '%s'\n", output);
2782 /* just here for gcc compatibility */
2783 fprintf(out, "# 1 \"%s\"\n", filename);
2784 fprintf(out, "# 1 \"<built-in>\"\n");
2785 fprintf(out, "# 1 \"<command-line>\"\n");
2787 FILE *file = fopen(filename, "r");
2789 fprintf(stderr, "Couldn't open input '%s'\n", filename);
2792 switch_pp_input(file, filename, NULL, false);
2795 next_preprocessing_token();
2796 if (pp_token.kind == T_EOF)
2802 check_unclosed_conditionals();
2803 fclose(close_pp_input());
2808 exit_preprocessor();
2809 exit_symbol_table();