2 * This file is part of cparser.
3 * Copyright (C) 2012 Matthias Braun <matze@braunis.de>
13 #include "preprocessor.h"
17 #include "adt/error.h"
18 #include "adt/strutil.h"
19 #include "adt/strset.h"
20 #include "lang_features.h"
21 #include "diagnostic.h"
22 #include "string_rep.h"
26 #define INCLUDE_LIMIT 199 /* 199 is for gcc "compatibility" */
28 typedef struct saved_token_t {
33 typedef struct whitespace_info_t {
34 /** current token had whitespace in front of it */
36 /** current token is at the beginning of a line.
37 * => a "#" at line begin starts a preprocessing directive. */
39 /** number of spaces before the first token in a line */
40 unsigned whitespace_at_line_begin;
43 struct pp_definition_t {
46 pp_definition_t *parent_expansion;
48 whitespace_info_t expand_info;
50 bool is_expanding : 1;
51 bool has_parameters : 1;
52 bool is_parameter : 1;
53 pp_definition_t *function_definition;
55 pp_definition_t *parameters;
59 saved_token_t *token_list;
62 typedef struct pp_conditional_t pp_conditional_t;
63 struct pp_conditional_t {
67 /** conditional in skip mode (then+else gets skipped) */
69 pp_conditional_t *parent;
72 typedef struct pp_input_t pp_input_t;
77 utf32 buf[1024+MAX_PUTBACK];
83 searchpath_entry_t *path;
86 struct searchpath_entry_t {
88 searchpath_entry_t *next;
92 static pp_input_t input;
94 static pp_input_t *input_stack;
95 static unsigned n_inputs;
96 static struct obstack input_obstack;
98 static pp_conditional_t *conditional_stack;
101 bool allow_dollar_in_symbol = true;
102 static bool resolve_escape_sequences = true;
103 static bool error_on_unknown_chars = true;
104 static bool skip_mode;
106 static struct obstack pp_obstack;
107 static struct obstack config_obstack;
108 static const char *printed_input_name = NULL;
109 static position_t expansion_pos;
110 static pp_definition_t *current_expansion = NULL;
111 static pp_definition_t *current_call = NULL;
112 static pp_definition_t *current_argument = NULL;
113 static pp_definition_t *argument_expanding = NULL;
114 static unsigned argument_brace_count;
115 static strset_t stringset;
116 static token_kind_t last_token;
118 struct searchpath_t {
119 searchpath_entry_t *first;
120 searchpath_entry_t **anchor;
124 searchpath_t bracket_searchpath = { NULL, &bracket_searchpath.first, false };
125 searchpath_t quote_searchpath = { NULL, "e_searchpath.first, false };
126 searchpath_t system_searchpath = { NULL, &system_searchpath.first, true };
127 searchpath_t after_searchpath = { NULL, &after_searchpath.first, true };
129 static whitespace_info_t next_info; /* valid if had_whitespace is true */
130 static whitespace_info_t info;
132 static inline void next_char(void);
133 static void next_input_token(void);
134 static void print_line_directive(const position_t *pos, const char *add);
136 static symbol_t *symbol_colongreater;
137 static symbol_t *symbol_lesscolon;
138 static symbol_t *symbol_lesspercent;
139 static symbol_t *symbol_percentcolon;
140 static symbol_t *symbol_percentcolonpercentcolon;
141 static symbol_t *symbol_percentgreater;
143 static symbol_t *symbol_L;
144 static symbol_t *symbol_U;
145 static symbol_t *symbol_u;
146 static symbol_t *symbol_u8;
148 static void init_symbols(void)
150 symbol_colongreater = symbol_table_insert(":>");
151 symbol_lesscolon = symbol_table_insert("<:");
152 symbol_lesspercent = symbol_table_insert("<%");
153 symbol_percentcolon = symbol_table_insert("%:");
154 symbol_percentcolonpercentcolon = symbol_table_insert("%:%:");
155 symbol_percentgreater = symbol_table_insert("%>");
157 symbol_L = symbol_table_insert("L");
158 symbol_U = symbol_table_insert("U");
159 symbol_u = symbol_table_insert("u");
160 symbol_u8 = symbol_table_insert("u8");
163 void switch_pp_input(FILE *const file, char const *const filename, searchpath_entry_t *const path, bool const is_system_header)
166 input.input = input_from_stream(file, NULL);
169 input.output_line = 0;
170 input.pos.input_name = filename;
171 input.pos.lineno = 1;
172 input.pos.is_system_header = is_system_header;
175 /* indicate that we're at a new input */
176 print_line_directive(&input.pos, input_stack != NULL ? "1" : NULL);
178 /* place a virtual '\n' so we realize we're at line begin */
179 input.pos.lineno = 0;
183 FILE *close_pp_input(void)
185 input_free(input.input);
187 FILE* const file = input.file;
199 static void push_input(void)
201 pp_input_t *const saved_input = obstack_copy(&input_obstack, &input, sizeof(input));
203 /* adjust buffer positions */
204 if (input.bufpos != NULL)
205 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
206 if (input.bufend != NULL)
207 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
209 saved_input->parent = input_stack;
210 input_stack = saved_input;
214 static void pop_restore_input(void)
216 assert(n_inputs > 0);
217 assert(input_stack != NULL);
219 pp_input_t *saved_input = input_stack;
221 memcpy(&input, saved_input, sizeof(input));
224 /* adjust buffer positions */
225 if (saved_input->bufpos != NULL)
226 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
227 if (saved_input->bufend != NULL)
228 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
230 input_stack = saved_input->parent;
231 obstack_free(&input_obstack, saved_input);
236 * Prints a parse error message at the current token.
238 * @param msg the error message
240 static void parse_error(const char *msg)
242 errorf(&pp_token.base.pos, "%s", msg);
245 static inline void next_real_char(void)
247 assert(input.bufpos <= input.bufend);
248 if (input.bufpos >= input.bufend) {
249 size_t const n = decode(input.input, input.buf + MAX_PUTBACK, lengthof(input.buf) - MAX_PUTBACK);
254 input.bufpos = input.buf + MAX_PUTBACK;
255 input.bufend = input.bufpos + n;
257 input.c = *input.bufpos++;
262 * Put a character back into the buffer.
264 * @param pc the character to put back
266 static inline void put_back(utf32 const pc)
268 assert(input.bufpos > input.buf);
269 *(--input.bufpos - input.buf + input.buf) = (char) pc;
276 if (input.c == '\n') { \
280 ++input.pos.lineno; \
281 input.pos.colno = 1; \
283 newline // Let it look like an ordinary case label.
285 #define eat(c_type) (assert(input.c == c_type), next_char())
287 static void maybe_concat_lines(void)
293 info.whitespace_at_line_begin = 0;
305 * Set c to the next input character, ie.
306 * after expanding trigraphs.
308 static inline void next_char(void)
312 /* filter trigraphs and concatenated lines */
313 if (UNLIKELY(input.c == '\\')) {
314 maybe_concat_lines();
315 goto end_of_next_char;
318 if (LIKELY(input.c != '?'))
319 goto end_of_next_char;
322 if (LIKELY(input.c != '?')) {
325 goto end_of_next_char;
330 case '=': input.c = '#'; break;
331 case '(': input.c = '['; break;
332 case '/': input.c = '\\'; maybe_concat_lines(); break;
333 case ')': input.c = ']'; break;
334 case '\'': input.c = '^'; break;
335 case '<': input.c = '{'; break;
336 case '!': input.c = '|'; break;
337 case '>': input.c = '}'; break;
338 case '-': input.c = '~'; break;
348 printf("nchar '%c'\n", input.c);
355 * Returns true if the given char is a octal digit.
357 * @param char the character to check
359 static inline bool is_octal_digit(int chr)
377 * Returns the value of a digit.
378 * The only portable way to do it ...
380 static int digit_value(int digit)
406 panic("wrong character given");
411 * Parses an octal character sequence.
413 * @param first_digit the already read first digit
415 static utf32 parse_octal_sequence(const utf32 first_digit)
417 assert(is_octal_digit(first_digit));
418 utf32 value = digit_value(first_digit);
419 if (!is_octal_digit(input.c)) return value;
420 value = 8 * value + digit_value(input.c);
422 if (!is_octal_digit(input.c)) return value;
423 value = 8 * value + digit_value(input.c);
430 * Parses a hex character sequence.
432 static utf32 parse_hex_sequence(void)
435 while (isxdigit(input.c)) {
436 value = 16 * value + digit_value(input.c);
442 static bool is_universal_char_valid(utf32 const v)
445 if (v < 0xA0U && v != 0x24 && v != 0x40 && v != 0x60)
447 if (0xD800 <= v && v <= 0xDFFF)
452 static utf32 parse_universal_char(unsigned const n_digits)
455 for (unsigned k = n_digits; k != 0; --k) {
456 if (isxdigit(input.c)) {
457 v = 16 * v + digit_value(input.c);
458 if (!resolve_escape_sequences)
459 obstack_1grow(&symbol_obstack, input.c);
463 "short universal character name, expected %u more digits",
468 if (!is_universal_char_valid(v)) {
470 "\\%c%0*X is not a valid universal character name",
471 n_digits == 4 ? 'u' : 'U', (int)n_digits, v);
476 static bool is_universal_char_valid_identifier_c99(utf32 const v)
478 static const utf32 single_chars[] = {
479 0x00AA, 0x00BA, 0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0,
480 0x1F59, 0x1F5B, 0x1F5D, 0x05BF, 0x09B2, 0x0A02, 0x0A5E, 0x0A74,
481 0x0A8D, 0x0AD0, 0x0AE0, 0x0B9C, 0x0CDE, 0x0E84, 0x0E8A, 0x0E8D,
482 0x0EA5, 0x0EA7, 0x0EC6, 0x0F00, 0x0F35, 0x0F37, 0x0F39, 0x0F97,
483 0x0FB9, 0x00B5, 0x00B7, 0x02BB, 0x037A, 0x0559, 0x093D, 0x0B3D,
484 0x1FBE, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128
487 static const utf32 ranges[][2] = {
488 {0x00C0, 0x00D6}, {0x00D8, 0x00F6}, {0x00F8, 0x01F5}, {0x01FA, 0x0217},
489 {0x0250, 0x02A8}, {0x1E00, 0x1E9B}, {0x1EA0, 0x1EF9}, {0x0388, 0x038A},
490 {0x038E, 0x03A1}, {0x03A3, 0x03CE}, {0x03D0, 0x03D6}, {0x03E2, 0x03F3},
491 {0x1F00, 0x1F15}, {0x1F18, 0x1F1D}, {0x1F20, 0x1F45}, {0x1F48, 0x1F4D},
492 {0x1F50, 0x1F57}, {0x1F5F, 0x1F7D}, {0x1F80, 0x1FB4}, {0x1FB6, 0x1FBC},
493 {0x1FC2, 0x1FC4}, {0x1FC6, 0x1FCC}, {0x1FD0, 0x1FD3}, {0x1FD6, 0x1FDB},
494 {0x1FE0, 0x1FEC}, {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFC}, {0x0401, 0x040C},
495 {0x040E, 0x044F}, {0x0451, 0x045C}, {0x045E, 0x0481}, {0x0490, 0x04C4},
496 {0x04C7, 0x04C8}, {0x04CB, 0x04CC}, {0x04D0, 0x04EB}, {0x04EE, 0x04F5},
497 {0x04F8, 0x04F9}, {0x0531, 0x0556}, {0x0561, 0x0587}, {0x05B0, 0x05B9},
498 {0x05BB, 0x05BD}, {0x05C1, 0x05C2}, {0x05D0, 0x05EA}, {0x05F0, 0x05F2},
499 {0x0621, 0x063A}, {0x0640, 0x0652}, {0x0670, 0x06B7}, {0x06BA, 0x06BE},
500 {0x06C0, 0x06CE}, {0x06D0, 0x06DC}, {0x06E5, 0x06E8}, {0x06EA, 0x06ED},
501 {0x0901, 0x0903}, {0x0905, 0x0939}, {0x093E, 0x094D}, {0x0950, 0x0952},
502 {0x0958, 0x0963}, {0x0981, 0x0983}, {0x0985, 0x098C}, {0x098F, 0x0990},
503 {0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B6, 0x09B9}, {0x09BE, 0x09C4},
504 {0x09C7, 0x09C8}, {0x09CB, 0x09CD}, {0x09DC, 0x09DD}, {0x09DF, 0x09E3},
505 {0x09F0, 0x09F1}, {0x0A05, 0x0A0A}, {0x0A0F, 0x0A10}, {0x0A13, 0x0A28},
506 {0x0A2A, 0x0A30}, {0x0A32, 0x0A33}, {0x0A35, 0x0A36}, {0x0A38, 0x0A39},
507 {0x0A3E, 0x0A42}, {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A59, 0x0A5C},
508 {0x0A81, 0x0A83}, {0x0A85, 0x0A8B}, {0x0A8F, 0x0A91}, {0x0A93, 0x0AA8},
509 {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3}, {0x0AB5, 0x0AB9}, {0x0ABD, 0x0AC5},
510 {0x0AC7, 0x0AC9}, {0x0ACB, 0x0ACD}, {0x0B01, 0x0B03}, {0x0B05, 0x0B0C},
511 {0x0B0F, 0x0B10}, {0x0B13, 0x0B28}, {0x0B2A, 0x0B30}, {0x0B32, 0x0B33},
512 {0x0B36, 0x0B39}, {0x0B3E, 0x0B43}, {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D},
513 {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B61}, {0x0B82, 0x0B83}, {0x0B85, 0x0B8A},
514 {0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, {0x0B99, 0x0B9A}, {0x0B9E, 0x0B9F},
515 {0x0BA3, 0x0BA4}, {0x0BA8, 0x0BAA}, {0x0BAE, 0x0BB5}, {0x0BB7, 0x0BB9},
516 {0x0BBE, 0x0BC2}, {0x0BC6, 0x0BC8}, {0x0BCA, 0x0BCD}, {0x0C01, 0x0C03},
517 {0x0C05, 0x0C0C}, {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C33},
518 {0x0C35, 0x0C39}, {0x0C3E, 0x0C44}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D},
519 {0x0C60, 0x0C61}, {0x0C82, 0x0C83}, {0x0C85, 0x0C8C}, {0x0C8E, 0x0C90},
520 {0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9}, {0x0CBE, 0x0CC4},
521 {0x0CC6, 0x0CC8}, {0x0CCA, 0x0CCD}, {0x0CE0, 0x0CE1}, {0x0D02, 0x0D03},
522 {0x0D05, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D28}, {0x0D2A, 0x0D39},
523 {0x0D3E, 0x0D43}, {0x0D46, 0x0D48}, {0x0D4A, 0x0D4D}, {0x0D60, 0x0D61},
524 {0x0E01, 0x0E3A}, {0x0E40, 0x0E5B}, {0x0E81, 0x0E82}, {0x0E87, 0x0E88},
525 {0x0E94, 0x0E97}, {0x0E99, 0x0E9F}, {0x0EA1, 0x0EA3}, {0x0EAA, 0x0EAB},
526 {0x0EAD, 0x0EAE}, {0x0EB0, 0x0EB9}, {0x0EBB, 0x0EBD}, {0x0EC0, 0x0EC4},
527 {0x0EC8, 0x0ECD}, {0x0EDC, 0x0EDD}, {0x0F18, 0x0F19}, {0x0F3E, 0x0F47},
528 {0x0F49, 0x0F69}, {0x0F71, 0x0F84}, {0x0F86, 0x0F8B}, {0x0F90, 0x0F95},
529 {0x0F99, 0x0FAD}, {0x0FB1, 0x0FB7}, {0x10A0, 0x10C5}, {0x10D0, 0x10F6},
530 {0x3041, 0x3093}, {0x309B, 0x309C}, {0x30A1, 0x30F6}, {0x30FB, 0x30FC},
531 {0x3105, 0x312C}, {0x4E00, 0x9FA5}, {0xAC00, 0xD7A3}, {0x0660, 0x0669},
532 {0x06F0, 0x06F9}, {0x0966, 0x096F}, {0x09E6, 0x09EF}, {0x0A66, 0x0A6F},
533 {0x0AE6, 0x0AEF}, {0x0B66, 0x0B6F}, {0x0BE7, 0x0BEF}, {0x0C66, 0x0C6F},
534 {0x0CE6, 0x0CEF}, {0x0D66, 0x0D6F}, {0x0E50, 0x0E59}, {0x0ED0, 0x0ED9},
535 {0x0F20, 0x0F33}, {0x02B0, 0x02B8}, {0x02BD, 0x02C1}, {0x02D0, 0x02D1},
536 {0x02E0, 0x02E4}, {0x203F, 0x2040}, {0x210A, 0x2113}, {0x2118, 0x211D},
537 {0x212A, 0x2131}, {0x2133, 0x2138}, {0x2160, 0x2182}, {0x3005, 0x3007},
540 for (size_t i = 0; i < sizeof(ranges)/sizeof(ranges[0]); ++i) {
541 if (ranges[i][0] <= v && v <= ranges[i][1])
544 for (size_t i = 0; i < sizeof(single_chars)/sizeof(single_chars[0]); ++i) {
545 if (v == single_chars[i])
551 static bool is_universal_char_valid_identifier_c11(utf32 const v)
554 if ( v == 0x000A8) return true;
555 if ( v == 0x000AA) return true;
556 if ( v == 0x000AD) return true;
557 if ( v == 0x000AF) return true;
558 if (0x000B2 <= v && v <= 0x000B5) return true;
559 if (0x000B7 <= v && v <= 0x000BA) return true;
560 if (0x000BC <= v && v <= 0x000BE) return true;
561 if (0x000C0 <= v && v <= 0x000D6) return true;
562 if (0x000D8 <= v && v <= 0x000F6) return true;
563 if (0x000F8 <= v && v <= 0x000FF) return true;
564 if (0x00100 <= v && v <= 0x0167F) return true;
565 if (0x01681 <= v && v <= 0x0180D) return true;
566 if (0x0180F <= v && v <= 0x01FFF) return true;
567 if (0x0200B <= v && v <= 0x0200D) return true;
568 if (0x0202A <= v && v <= 0x0202E) return true;
569 if (0x0203F <= v && v <= 0x02040) return true;
570 if ( v == 0x02054) return true;
571 if (0x02060 <= v && v <= 0x0206F) return true;
572 if (0x02070 <= v && v <= 0x0218F) return true;
573 if (0x02460 <= v && v <= 0x024FF) return true;
574 if (0x02776 <= v && v <= 0x02793) return true;
575 if (0x02C00 <= v && v <= 0x02DFF) return true;
576 if (0x02E80 <= v && v <= 0x02FFF) return true;
577 if (0x03004 <= v && v <= 0x03007) return true;
578 if (0x03021 <= v && v <= 0x0302F) return true;
579 if (0x03031 <= v && v <= 0x0303F) return true;
580 if (0x03040 <= v && v <= 0x0D7FF) return true;
581 if (0x0F900 <= v && v <= 0x0FD3D) return true;
582 if (0x0FD40 <= v && v <= 0x0FDCF) return true;
583 if (0x0FDF0 <= v && v <= 0x0FE44) return true;
584 if (0x0FE47 <= v && v <= 0x0FFFD) return true;
585 if (0x10000 <= v && v <= 0x1FFFD) return true;
586 if (0x20000 <= v && v <= 0x2FFFD) return true;
587 if (0x30000 <= v && v <= 0x3FFFD) return true;
588 if (0x40000 <= v && v <= 0x4FFFD) return true;
589 if (0x50000 <= v && v <= 0x5FFFD) return true;
590 if (0x60000 <= v && v <= 0x6FFFD) return true;
591 if (0x70000 <= v && v <= 0x7FFFD) return true;
592 if (0x80000 <= v && v <= 0x8FFFD) return true;
593 if (0x90000 <= v && v <= 0x9FFFD) return true;
594 if (0xA0000 <= v && v <= 0xAFFFD) return true;
595 if (0xB0000 <= v && v <= 0xBFFFD) return true;
596 if (0xC0000 <= v && v <= 0xCFFFD) return true;
597 if (0xD0000 <= v && v <= 0xDFFFD) return true;
598 if (0xE0000 <= v && v <= 0xEFFFD) return true;
602 static bool is_universal_char_valid_identifier(utf32 const v)
605 return is_universal_char_valid_identifier_c11(v);
606 return is_universal_char_valid_identifier_c99(v);
609 static bool is_universal_char_invalid_identifier_start(utf32 const v)
611 if (! (c_mode & _C11))
615 if (0x0300 <= v && v <= 0x036F) return true;
616 if (0x1DC0 <= v && v <= 0x1DFF) return true;
617 if (0x20D0 <= v && v <= 0x20FF) return true;
618 if (0xFE20 <= v && v <= 0xFE2F) return true;
623 * Parse an escape sequence.
625 static utf32 parse_escape_sequence(void)
629 utf32 const ec = input.c;
633 case '"': return '"';
634 case '\'': return '\'';
635 case '\\': return '\\';
636 case '?': return '\?';
637 case 'a': return '\a';
638 case 'b': return '\b';
639 case 'f': return '\f';
640 case 'n': return '\n';
641 case 'r': return '\r';
642 case 't': return '\t';
643 case 'v': return '\v';
645 return parse_hex_sequence();
654 return parse_octal_sequence(ec);
656 parse_error("reached end of file while parsing escape sequence");
658 /* \E is not documented, but handled, by GCC. It is acceptable according
659 * to §6.11.4, whereas \e is not. */
663 return 27; /* hopefully 27 is ALWAYS the code for ESCAPE */
666 case 'U': return parse_universal_char(8);
667 case 'u': return parse_universal_char(4);
672 /* §6.4.4.4:8 footnote 64 */
673 parse_error("unknown escape sequence");
677 static const char *identify_string(char *string)
679 const char *result = strset_insert(&stringset, string);
680 if (result != string) {
681 obstack_free(&symbol_obstack, string);
686 static string_t sym_make_string(string_encoding_t const enc)
688 obstack_1grow(&symbol_obstack, '\0');
689 size_t const len = obstack_object_size(&symbol_obstack) - 1;
690 char *const string = obstack_finish(&symbol_obstack);
691 char const *const result = identify_string(string);
692 return (string_t){ result, len, enc };
695 string_t make_string(char const *const string)
697 obstack_grow(&symbol_obstack, string, strlen(string));
698 return sym_make_string(STRING_ENCODING_CHAR);
701 static utf32 get_string_encoding_limit(string_encoding_t const enc)
704 case STRING_ENCODING_CHAR: return 0xFF;
705 case STRING_ENCODING_CHAR16: return 0xFFFF;
706 case STRING_ENCODING_CHAR32: return 0xFFFFFFFF;
707 case STRING_ENCODING_UTF8: return 0xFFFFFFFF;
708 case STRING_ENCODING_WIDE: return 0xFFFFFFFF; // FIXME depends on settings
710 panic("invalid string encoding");
713 static void parse_string(utf32 const delimiter, token_kind_t const kind,
714 string_encoding_t const enc,
715 char const *const context)
719 utf32 const limit = get_string_encoding_limit(enc);
723 if (resolve_escape_sequences) {
724 utf32 const tc = parse_escape_sequence();
726 warningf(WARN_OTHER, &pp_token.base.pos,
727 "escape sequence out of range");
729 if (enc == STRING_ENCODING_CHAR) {
730 obstack_1grow(&symbol_obstack, tc);
732 obstack_grow_utf8(&symbol_obstack, tc);
735 obstack_1grow(&symbol_obstack, (char)input.c);
737 obstack_1grow(&symbol_obstack, (char)input.c);
744 errorf(&pp_token.base.pos, "newline while parsing %s", context);
748 errorf(&pp_token.base.pos, "EOF while parsing %s", context);
752 if (input.c == delimiter) {
756 obstack_grow_utf8(&symbol_obstack, input.c);
764 pp_token.kind = kind;
765 pp_token.literal.string = sym_make_string(enc);
768 static void parse_string_literal(string_encoding_t const enc)
770 parse_string('"', T_STRING_LITERAL, enc, "string literal");
773 static void parse_character_constant(string_encoding_t const enc)
775 parse_string('\'', T_CHARACTER_CONSTANT, enc, "character constant");
776 if (pp_token.literal.string.size == 0) {
777 parse_error("empty character constant");
781 #define SYMBOL_CASES_WITHOUT_E_P \
782 '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
833 #define SYMBOL_CASES \
834 SYMBOL_CASES_WITHOUT_E_P: \
840 #define DIGIT_CASES \
852 static void start_expanding(pp_definition_t *definition)
854 definition->parent_expansion = current_expansion;
855 definition->expand_pos = 0;
856 definition->is_expanding = true;
857 if (definition->list_len > 0) {
858 definition->token_list[0].had_whitespace
859 = info.had_whitespace;
861 current_expansion = definition;
864 static void finished_expanding(pp_definition_t *definition)
866 assert(definition->is_expanding);
867 pp_definition_t *parent = definition->parent_expansion;
868 definition->parent_expansion = NULL;
869 definition->is_expanding = false;
871 /* stop further expanding once we expanded a parameter used in a
873 if (definition == argument_expanding)
874 argument_expanding = NULL;
876 assert(current_expansion == definition);
877 current_expansion = parent;
880 static void grow_string_escaped(struct obstack *obst, const string_t *string, char const *delimiter)
882 char const *prefix = get_string_encoding_prefix(string->encoding);
883 obstack_printf(obst, "%s%s", prefix, delimiter);
884 size_t size = string->size;
885 const char *str = string->begin;
886 if (resolve_escape_sequences) {
887 obstack_grow(obst, str, size);
889 for (size_t i = 0; i < size; ++i) {
890 const char c = str[i];
891 if (c == '\\' || c == '"')
892 obstack_1grow(obst, '\\');
893 obstack_1grow(obst, c);
896 obstack_printf(obst, "%s", delimiter);
899 static void grow_token(struct obstack *obst, const token_t *token)
901 switch (token->kind) {
903 obstack_grow(obst, token->literal.string.begin, token->literal.string.size);
906 case T_STRING_LITERAL: {
907 char const *const delimiter = resolve_escape_sequences ? "\"" : "\\\"";
908 grow_string_escaped(obst, &token->literal.string, delimiter);
912 case T_CHARACTER_CONSTANT:
913 grow_string_escaped(obst, &token->literal.string, "'");
918 const char *str = token->base.symbol->string;
919 size_t len = strlen(str);
920 obstack_grow(obst, str, len);
926 static void stringify(const pp_definition_t *definition)
928 assert(obstack_object_size(&symbol_obstack) == 0);
930 size_t list_len = definition->list_len;
931 for (size_t p = 0; p < list_len; ++p) {
932 const saved_token_t *saved = &definition->token_list[p];
933 if (p > 0 && saved->had_whitespace)
934 obstack_1grow(&symbol_obstack, ' ');
935 grow_token(&symbol_obstack, &saved->token);
937 pp_token.kind = T_STRING_LITERAL;
938 pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
941 static inline void set_punctuator(token_kind_t const kind)
943 pp_token.kind = kind;
944 pp_token.base.symbol = token_symbols[kind];
947 static inline void set_digraph(token_kind_t const kind, symbol_t *const symbol)
949 pp_token.kind = kind;
950 pp_token.base.symbol = symbol;
954 * returns next final token from a preprocessor macro expansion
956 static bool expand_next(void)
958 if (current_expansion == NULL)
962 size_t pos = current_expansion->expand_pos;
963 if (pos >= current_expansion->list_len) {
964 finished_expanding(current_expansion);
965 /* it was the outermost expansion, parse pptoken normally */
966 if (current_expansion == NULL) {
971 const saved_token_t *saved = ¤t_expansion->token_list[pos++];
972 pp_token = saved->token;
973 if (pp_token.kind == '#') {
974 if (pos < current_expansion->list_len) {
975 const saved_token_t *next = ¤t_expansion->token_list[pos];
976 if (next->token.kind == T_MACRO_PARAMETER) {
977 pp_definition_t *def = next->token.macro_parameter.def;
978 assert(def != NULL && def->is_parameter);
985 if (current_expansion->expand_pos > 0)
986 info.had_whitespace = saved->had_whitespace;
987 current_expansion->expand_pos = pos;
988 pp_token.base.pos = expansion_pos;
994 * Returns the next token kind found when continuing the current expansions
995 * without starting new sub-expansions.
997 static token_kind_t peek_expansion(void)
999 for (pp_definition_t *e = current_expansion; e; e = e->parent_expansion) {
1000 if (e->expand_pos < e->list_len)
1001 return e->token_list[e->expand_pos].token.kind;
1006 static void skip_line_comment(void)
1008 info.had_whitespace = true;
1025 static void skip_multiline_comment(void)
1027 info.had_whitespace = true;
1029 position_t const start_pos = input.pos;
1034 if (input.c == '*') {
1035 /* TODO: nested comment, warn here */
1040 if (input.c == '/') {
1041 if (input.pos.lineno != input.output_line)
1042 info.whitespace_at_line_begin = input.pos.colno;
1052 errorf(&start_pos, "at end of file while looking for comment end");
1062 static bool skip_till_newline(bool stop_at_non_whitespace)
1074 if (input.c == '/') {
1076 skip_line_comment();
1078 } else if (input.c == '*') {
1080 skip_multiline_comment();
1092 if (stop_at_non_whitespace)
1101 static void skip_whitespace(void)
1107 ++info.whitespace_at_line_begin;
1108 info.had_whitespace = true;
1113 info.at_line_begin = true;
1114 info.had_whitespace = true;
1115 info.whitespace_at_line_begin = 0;
1120 if (input.c == '/') {
1122 skip_line_comment();
1124 } else if (input.c == '*') {
1126 skip_multiline_comment();
1140 static inline void eat_pp(pp_token_kind_t const kind)
1142 assert(pp_token.base.symbol->pp_ID == kind);
1147 static inline void eat_token(token_kind_t const kind)
1149 assert(pp_token.kind == kind);
1154 static string_encoding_t identify_encoding_prefix(symbol_t *const sym)
1156 if (sym == symbol_L) return STRING_ENCODING_WIDE;
1157 if (c_mode & _C11) {
1158 if (sym == symbol_U) return STRING_ENCODING_CHAR32;
1159 if (sym == symbol_u) return STRING_ENCODING_CHAR16;
1160 if (sym == symbol_u8) return STRING_ENCODING_UTF8;
1162 return STRING_ENCODING_CHAR;
1165 static void parse_symbol(void)
1167 assert(obstack_object_size(&symbol_obstack) == 0);
1172 obstack_1grow(&symbol_obstack, (char) input.c);
1181 case 'U': n = 8; goto universal;
1182 case 'u': n = 4; goto universal;
1184 if (!resolve_escape_sequences) {
1185 obstack_1grow(&symbol_obstack, '\\');
1186 obstack_1grow(&symbol_obstack, input.c);
1189 utf32 const v = parse_universal_char(n);
1190 if (!is_universal_char_valid_identifier(v)) {
1191 if (is_universal_char_valid(v)) {
1193 "universal character \\%c%0*X is not valid in an identifier",
1194 n == 4 ? 'u' : 'U', (int)n, v);
1196 } else if (obstack_object_size(&symbol_obstack) == 0 && is_universal_char_invalid_identifier_start(v)) {
1198 "universal character \\%c%0*X is not valid as start of an identifier",
1199 n == 4 ? 'u' : 'U', (int)n, v);
1200 } else if (resolve_escape_sequences) {
1201 obstack_grow_utf8(&symbol_obstack, v);
1219 obstack_1grow(&symbol_obstack, '\0');
1220 char *string = obstack_finish(&symbol_obstack);
1222 symbol_t *symbol = symbol_table_insert(string);
1224 /* Might be a prefixed string or character constant: L/U/u/u8"string". */
1225 if (input.c == '"') {
1226 string_encoding_t const enc = identify_encoding_prefix(symbol);
1227 if (enc != STRING_ENCODING_CHAR) {
1228 parse_string_literal(enc);
1231 } else if (input.c == '\'') {
1232 string_encoding_t const enc = identify_encoding_prefix(symbol);
1233 if (enc != STRING_ENCODING_CHAR) {
1234 if (enc == STRING_ENCODING_UTF8) {
1235 errorf(&pp_token.base.pos,
1236 "'u8' is not a valid encoding for a chracter constant");
1238 parse_character_constant(enc);
1243 pp_token.kind = symbol->ID;
1244 pp_token.base.symbol = symbol;
1246 /* we can free the memory from symbol obstack if we already had an entry in
1247 * the symbol table */
1248 if (symbol->string != string) {
1249 obstack_free(&symbol_obstack, string);
1253 static void parse_number(void)
1255 obstack_1grow(&symbol_obstack, (char) input.c);
1262 case SYMBOL_CASES_WITHOUT_E_P:
1263 obstack_1grow(&symbol_obstack, (char) input.c);
1271 obstack_1grow(&symbol_obstack, (char) input.c);
1273 if (input.c == '+' || input.c == '-') {
1274 obstack_1grow(&symbol_obstack, (char) input.c);
1286 pp_token.kind = T_NUMBER;
1287 pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
1290 #define MAYBE_PROLOG \
1294 #define MAYBE(ch, kind) \
1297 set_punctuator(kind); \
1300 #define MAYBE_DIGRAPH(ch, kind, symbol) \
1303 set_digraph(kind, symbol); \
1306 #define ELSE_CODE(code) \
1311 #define ELSE(kind) ELSE_CODE(set_punctuator(kind); return;)
1313 /** identifies and returns the next preprocessing token contained in the
1314 * input stream. No macro expansion is performed. */
1315 static void next_input_token(void)
1317 if (next_info.had_whitespace) {
1319 next_info.had_whitespace = false;
1321 info.at_line_begin = false;
1322 info.had_whitespace = false;
1325 pp_token.base.pos = input.pos;
1326 pp_token.base.symbol = NULL;
1331 info.whitespace_at_line_begin++;
1332 info.had_whitespace = true;
1337 info.at_line_begin = true;
1338 info.had_whitespace = true;
1339 info.whitespace_at_line_begin = 0;
1351 parse_string_literal(STRING_ENCODING_CHAR);
1355 parse_character_constant(STRING_ENCODING_CHAR);
1377 MAYBE('.', T_DOTDOTDOT)
1381 set_punctuator('.');
1387 MAYBE('&', T_ANDAND)
1388 MAYBE('=', T_ANDEQUAL)
1392 MAYBE('=', T_ASTERISKEQUAL)
1396 MAYBE('+', T_PLUSPLUS)
1397 MAYBE('=', T_PLUSEQUAL)
1401 MAYBE('>', T_MINUSGREATER)
1402 MAYBE('-', T_MINUSMINUS)
1403 MAYBE('=', T_MINUSEQUAL)
1407 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
1411 MAYBE('=', T_SLASHEQUAL)
1414 skip_multiline_comment();
1418 skip_line_comment();
1423 MAYBE_DIGRAPH('>', '}', symbol_percentgreater)
1424 MAYBE('=', T_PERCENTEQUAL)
1429 MAYBE_DIGRAPH(':', T_HASHHASH, symbol_percentcolonpercentcolon)
1433 goto digraph_percentcolon;
1436 digraph_percentcolon:
1437 set_digraph('#', symbol_percentcolon);
1443 MAYBE_DIGRAPH(':', '[', symbol_lesscolon)
1444 MAYBE_DIGRAPH('%', '{', symbol_lesspercent)
1445 MAYBE('=', T_LESSEQUAL)
1448 MAYBE('=', T_LESSLESSEQUAL)
1453 MAYBE('=', T_GREATEREQUAL)
1456 MAYBE('=', T_GREATERGREATEREQUAL)
1457 ELSE(T_GREATERGREATER)
1461 MAYBE('=', T_CARETEQUAL)
1465 MAYBE('=', T_PIPEEQUAL)
1466 MAYBE('|', T_PIPEPIPE)
1470 MAYBE_DIGRAPH('>', ']', symbol_colongreater)
1472 if (c_mode & _CXX) {
1474 set_punctuator(T_COLONCOLON);
1481 MAYBE('=', T_EQUALEQUAL)
1485 MAYBE('#', T_HASHHASH)
1498 set_punctuator(input.c);
1503 if (input_stack != NULL) {
1504 fclose(close_pp_input());
1505 pop_restore_input();
1508 if (input.c == (utf32)EOF)
1510 print_line_directive(&input.pos, "2");
1513 info.at_line_begin = true;
1514 set_punctuator(T_EOF);
1520 int next_c = input.c;
1523 if (next_c == 'U' || next_c == 'u') {
1530 if (error_on_unknown_chars) {
1531 errorf(&pp_token.base.pos, "unknown character '%lc' found", input.c);
1535 assert(obstack_object_size(&symbol_obstack) == 0);
1536 obstack_grow_utf8(&symbol_obstack, input.c);
1537 obstack_1grow(&symbol_obstack, '\0');
1538 char *const string = obstack_finish(&symbol_obstack);
1539 symbol_t *const symbol = symbol_table_insert(string);
1540 if (symbol->string != string)
1541 obstack_free(&symbol_obstack, string);
1543 pp_token.kind = T_UNKNOWN_CHAR;
1544 pp_token.base.symbol = symbol;
1551 static void print_quoted_string(const char *const string)
1554 for (const char *c = string; *c != 0; ++c) {
1556 case '"': fputs("\\\"", out); break;
1557 case '\\': fputs("\\\\", out); break;
1558 case '\a': fputs("\\a", out); break;
1559 case '\b': fputs("\\b", out); break;
1560 case '\f': fputs("\\f", out); break;
1561 case '\n': fputs("\\n", out); break;
1562 case '\r': fputs("\\r", out); break;
1563 case '\t': fputs("\\t", out); break;
1564 case '\v': fputs("\\v", out); break;
1565 case '\?': fputs("\\?", out); break;
1568 fprintf(out, "\\%03o", (unsigned)*c);
1578 static void print_line_directive(const position_t *pos, const char *add)
1583 fprintf(out, "# %u ", pos->lineno);
1584 print_quoted_string(pos->input_name);
1589 if (pos->is_system_header) {
1593 printed_input_name = pos->input_name;
1594 input.output_line = pos->lineno-1;
1597 static bool emit_newlines(void)
1602 unsigned delta = pp_token.base.pos.lineno - input.output_line;
1608 print_line_directive(&pp_token.base.pos, NULL);
1611 for (unsigned i = 0; i < delta; ++i) {
1615 input.output_line = pp_token.base.pos.lineno;
1617 unsigned whitespace = info.whitespace_at_line_begin;
1618 /* make sure there is at least 1 whitespace before a (macro-expanded)
1619 * '#' at line begin. I'm not sure why this is good, but gcc does it. */
1620 if (pp_token.kind == '#' && whitespace == 0)
1622 for (unsigned i = 0; i < whitespace; ++i)
1628 void set_preprocessor_output(FILE *output)
1632 error_on_unknown_chars = false;
1633 resolve_escape_sequences = false;
1635 error_on_unknown_chars = true;
1636 resolve_escape_sequences = true;
1640 void emit_pp_token(void)
1642 if (!emit_newlines() &&
1643 (info.had_whitespace || tokens_would_paste(last_token, pp_token.kind)))
1646 switch (pp_token.kind) {
1648 fputs(pp_token.literal.string.begin, out);
1651 case T_STRING_LITERAL:
1652 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1654 fputs(pp_token.literal.string.begin, out);
1658 case T_CHARACTER_CONSTANT:
1659 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1661 fputs(pp_token.literal.string.begin, out);
1665 case T_MACRO_PARAMETER:
1666 panic("macro parameter not expanded");
1669 fputs(pp_token.base.symbol->string, out);
1672 last_token = pp_token.kind;
1675 static void eat_pp_directive(void)
1677 while (!info.at_line_begin) {
1682 static bool strings_equal(const string_t *string1, const string_t *string2)
1684 size_t size = string1->size;
1685 if (size != string2->size)
1688 const char *c1 = string1->begin;
1689 const char *c2 = string2->begin;
1690 for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1697 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1699 if (token1->kind != token2->kind)
1702 switch (token1->kind) {
1704 case T_CHARACTER_CONSTANT:
1705 case T_STRING_LITERAL:
1706 return strings_equal(&token1->literal.string, &token2->literal.string);
1708 case T_MACRO_PARAMETER:
1709 return token1->macro_parameter.def->symbol
1710 == token2->macro_parameter.def->symbol;
1713 return token1->base.symbol == token2->base.symbol;
1717 static bool pp_definitions_equal(const pp_definition_t *definition1,
1718 const pp_definition_t *definition2)
1720 if (definition1->list_len != definition2->list_len)
1723 size_t len = definition1->list_len;
1724 const saved_token_t *t1 = definition1->token_list;
1725 const saved_token_t *t2 = definition2->token_list;
1726 for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1727 if (!pp_tokens_equal(&t1->token, &t2->token))
1729 if (t1->had_whitespace != t2->had_whitespace)
1735 static void missing_macro_param_error(void)
1737 errorf(&pp_token.base.pos, "'#' is not followed by a macro parameter");
1740 static bool is_defineable_token(char const *const context)
1742 if (info.at_line_begin) {
1743 errorf(&pp_token.base.pos, "unexpected end of line after %s", context);
1746 symbol_t *const symbol = pp_token.base.symbol;
1750 if (pp_token.kind != T_IDENTIFIER) {
1751 switch (symbol->string[0]) {
1758 errorf(&pp_token.base.pos, "expected identifier after %s, got %K",
1759 context, &pp_token);
1764 /* TODO turn this into a flag in pp_def. */
1765 switch (symbol->pp_ID) {
1768 errorf(&pp_token.base.pos, "%K cannot be used as macro name in %s",
1769 &pp_token, context);
1777 static void parse_define_directive(void)
1785 assert(obstack_object_size(&pp_obstack) == 0);
1787 if (!is_defineable_token("#define"))
1789 symbol_t *const symbol = pp_token.base.symbol;
1791 pp_definition_t *new_definition
1792 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1793 memset(new_definition, 0, sizeof(new_definition[0]));
1794 new_definition->symbol = symbol;
1795 new_definition->pos = input.pos;
1797 /* this is probably the only place where spaces are significant in the
1798 * lexer (except for the fact that they separate tokens). #define b(x)
1799 * is something else than #define b (x) */
1800 if (input.c == '(') {
1805 switch (pp_token.kind) {
1807 new_definition->is_variadic = true;
1808 eat_token(T_DOTDOTDOT);
1809 if (pp_token.kind != ')') {
1811 "'...' not at end of macro argument list");
1816 case T_IDENTIFIER: {
1817 pp_definition_t parameter;
1818 memset(¶meter, 0, sizeof(parameter));
1819 parameter.pos = pp_token.base.pos;
1820 parameter.symbol = pp_token.base.symbol;
1821 parameter.is_parameter = true;
1822 obstack_grow(&pp_obstack, ¶meter, sizeof(parameter));
1823 eat_token(T_IDENTIFIER);
1825 if (pp_token.kind == ',') {
1830 if (pp_token.kind != ')') {
1831 errorf(&pp_token.base.pos,
1832 "expected ',' or ')' after identifier, got %K",
1841 goto finish_argument_list;
1844 errorf(&pp_token.base.pos,
1845 "expected identifier, '...' or ')' in #define argument list, got %K",
1851 finish_argument_list:
1852 new_definition->has_parameters = true;
1853 size_t size = obstack_object_size(&pp_obstack);
1854 new_definition->n_parameters
1855 = size / sizeof(new_definition->parameters[0]);
1856 new_definition->parameters = obstack_finish(&pp_obstack);
1857 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1858 pp_definition_t *param = &new_definition->parameters[i];
1859 symbol_t *symbol = param->symbol;
1860 pp_definition_t *previous = symbol->pp_definition;
1861 if (previous != NULL
1862 && previous->function_definition == new_definition) {
1863 errorf(¶m->pos, "duplicate macro parameter '%Y'", symbol);
1864 param->symbol = sym_anonymous;
1867 param->parent_expansion = previous;
1868 param->function_definition = new_definition;
1869 symbol->pp_definition = param;
1875 /* construct token list */
1876 assert(obstack_object_size(&pp_obstack) == 0);
1877 bool next_must_be_param = false;
1878 while (!info.at_line_begin) {
1879 if (pp_token.kind == T_IDENTIFIER) {
1880 const symbol_t *symbol = pp_token.base.symbol;
1881 pp_definition_t *definition = symbol->pp_definition;
1882 if (definition != NULL
1883 && definition->function_definition == new_definition) {
1884 pp_token.kind = T_MACRO_PARAMETER;
1885 pp_token.macro_parameter.def = definition;
1888 if (next_must_be_param && pp_token.kind != T_MACRO_PARAMETER) {
1889 missing_macro_param_error();
1891 saved_token_t saved_token;
1892 saved_token.token = pp_token;
1893 saved_token.had_whitespace = info.had_whitespace;
1894 obstack_grow(&pp_obstack, &saved_token, sizeof(saved_token));
1896 = new_definition->has_parameters && pp_token.kind == '#';
1899 if (next_must_be_param)
1900 missing_macro_param_error();
1902 new_definition->list_len = obstack_object_size(&pp_obstack)
1903 / sizeof(new_definition->token_list[0]);
1904 new_definition->token_list = obstack_finish(&pp_obstack);
1906 if (new_definition->has_parameters) {
1907 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1908 pp_definition_t *param = &new_definition->parameters[i];
1909 symbol_t *symbol = param->symbol;
1910 if (symbol == sym_anonymous)
1912 assert(symbol->pp_definition == param);
1913 assert(param->function_definition == new_definition);
1914 symbol->pp_definition = param->parent_expansion;
1915 param->parent_expansion = NULL;
1919 pp_definition_t *old_definition = symbol->pp_definition;
1920 if (old_definition != NULL) {
1921 if (!pp_definitions_equal(old_definition, new_definition)) {
1922 warningf(WARN_OTHER, &input.pos,
1923 "multiple definition of macro '%Y' (first defined %P)",
1924 symbol, &old_definition->pos);
1926 /* reuse the old definition */
1927 obstack_free(&pp_obstack, new_definition);
1928 new_definition = old_definition;
1932 symbol->pp_definition = new_definition;
1936 if (obstack_object_size(&pp_obstack) > 0) {
1937 char *ptr = obstack_finish(&pp_obstack);
1938 obstack_free(&pp_obstack, ptr);
1943 static void parse_undef_directive(void)
1951 if (!is_defineable_token("#undef")) {
1956 pp_token.base.symbol->pp_definition = NULL;
1959 if (!info.at_line_begin) {
1960 warningf(WARN_OTHER, &input.pos, "extra tokens at end of #undef directive");
1965 /** behind an #include we can have the special headername lexems.
1966 * They're only allowed behind an #include so they're not recognized
1967 * by the normal next_preprocessing_token. We handle them as a special
1969 static const char *parse_headername(bool *system_include)
1971 if (info.at_line_begin) {
1972 parse_error("expected headername after #include");
1976 /* check whether we have a "... or <... headername */
1977 position_t pos = input.pos;
1981 case '<': delimiter = '>'; *system_include = true; goto parse_name;
1982 case '"': delimiter = '"'; *system_include = false; goto parse_name;
1984 assert(obstack_object_size(&symbol_obstack) == 0);
1991 char *dummy = obstack_finish(&symbol_obstack);
1992 obstack_free(&symbol_obstack, dummy);
1994 errorf(&pp_token.base.pos,
1995 "header name without closing '%c'", (char)delimiter);
1999 if (input.c == delimiter) {
2001 goto finish_headername;
2003 obstack_1grow(&symbol_obstack, (char)input.c);
2009 /* we should never be here */
2013 next_preprocessing_token();
2014 if (info.at_line_begin) {
2015 /* TODO: if we are already in the new line then we parsed more than
2016 * wanted. We reuse the token, but could produce following errors
2017 * misbehaviours... */
2018 goto error_invalid_input;
2020 if (pp_token.kind == T_STRING_LITERAL) {
2021 *system_include = false;
2022 return pp_token.literal.string.begin;
2023 } else if (pp_token.kind == '<') {
2024 *system_include = true;
2025 assert(obstack_object_size(&pp_obstack) == 0);
2027 next_preprocessing_token();
2028 if (info.at_line_begin) {
2029 /* TODO: we shouldn't have parsed/expanded something on the
2030 * next line yet... */
2031 char *dummy = obstack_finish(&pp_obstack);
2032 obstack_free(&pp_obstack, dummy);
2033 goto error_invalid_input;
2035 if (pp_token.kind == '>')
2038 saved_token_t saved;
2039 saved.token = pp_token;
2040 saved.had_whitespace = info.had_whitespace;
2041 obstack_grow(&pp_obstack, &saved, sizeof(saved));
2043 size_t size = obstack_object_size(&pp_obstack);
2044 assert(size % sizeof(saved_token_t) == 0);
2045 size_t n_tokens = size / sizeof(saved_token_t);
2046 saved_token_t *tokens = obstack_finish(&pp_obstack);
2047 assert(obstack_object_size(&symbol_obstack) == 0);
2048 for (size_t i = 0; i < n_tokens; ++i) {
2049 const saved_token_t *saved = &tokens[i];
2050 if (i > 0 && saved->had_whitespace)
2051 obstack_1grow(&symbol_obstack, ' ');
2052 grow_token(&symbol_obstack, &saved->token);
2054 obstack_free(&pp_obstack, tokens);
2055 goto finish_headername;
2057 error_invalid_input:
2059 char *dummy = obstack_finish(&symbol_obstack);
2060 obstack_free(&symbol_obstack, dummy);
2063 errorf(&pp_token.base.pos,
2064 "expected \"FILENAME\" or <FILENAME> after #include");
2070 obstack_1grow(&symbol_obstack, '\0');
2071 char *const headername = obstack_finish(&symbol_obstack);
2072 const char *identified = identify_string(headername);
2073 pp_token.base.pos = pos;
2077 static bool do_include(bool const bracket_include, bool const include_next, char const *const headername)
2079 size_t const headername_len = strlen(headername);
2080 searchpath_entry_t *entry;
2082 entry = input.path ? input.path->next
2083 : bracket_include ? bracket_searchpath.first
2084 : quote_searchpath.first;
2086 if (!bracket_include) {
2087 /* put dirname of current input on obstack */
2088 const char *filename = input.pos.input_name;
2089 const char *last_slash = strrchr(filename, '/');
2090 const char *full_name;
2091 if (last_slash != NULL) {
2092 size_t len = last_slash - filename;
2093 obstack_grow(&symbol_obstack, filename, len + 1);
2094 obstack_grow0(&symbol_obstack, headername, headername_len);
2095 char *complete_path = obstack_finish(&symbol_obstack);
2096 full_name = identify_string(complete_path);
2098 full_name = headername;
2101 FILE *file = fopen(full_name, "r");
2103 switch_pp_input(file, full_name, NULL, false);
2106 entry = quote_searchpath.first;
2108 entry = bracket_searchpath.first;
2112 assert(obstack_object_size(&symbol_obstack) == 0);
2113 /* check searchpath */
2114 for (; entry; entry = entry->next) {
2115 const char *path = entry->path;
2116 size_t len = strlen(path);
2117 obstack_grow(&symbol_obstack, path, len);
2118 if (path[len-1] != '/')
2119 obstack_1grow(&symbol_obstack, '/');
2120 obstack_grow(&symbol_obstack, headername, headername_len+1);
2122 char *complete_path = obstack_finish(&symbol_obstack);
2123 FILE *file = fopen(complete_path, "r");
2125 const char *filename = identify_string(complete_path);
2126 switch_pp_input(file, filename, entry, entry->is_system_path);
2129 obstack_free(&symbol_obstack, complete_path);
2136 static void parse_include_directive(bool const include_next)
2143 /* do not eat the TP_include, since it would already parse the next token
2144 * which needs special handling here. */
2145 skip_till_newline(true);
2146 bool system_include;
2147 const char *headername = parse_headername(&system_include);
2148 if (headername == NULL) {
2153 bool had_nonwhitespace = skip_till_newline(false);
2154 if (had_nonwhitespace) {
2155 warningf(WARN_OTHER, &input.pos,
2156 "extra tokens at end of #include directive");
2159 if (n_inputs > INCLUDE_LIMIT) {
2160 errorf(&pp_token.base.pos, "#include nested too deeply");
2167 info.whitespace_at_line_begin = 0;
2168 info.had_whitespace = false;
2169 info.at_line_begin = true;
2172 bool res = do_include(system_include, include_next, headername);
2176 errorf(&pp_token.base.pos, "failed including '%s': %s", headername, strerror(errno));
2177 pop_restore_input();
2181 static pp_conditional_t *push_conditional(void)
2183 pp_conditional_t *conditional
2184 = obstack_alloc(&pp_obstack, sizeof(*conditional));
2185 memset(conditional, 0, sizeof(*conditional));
2187 conditional->parent = conditional_stack;
2188 conditional_stack = conditional;
2193 static void pop_conditional(void)
2195 assert(conditional_stack != NULL);
2196 conditional_stack = conditional_stack->parent;
2199 void check_unclosed_conditionals(void)
2201 while (conditional_stack != NULL) {
2202 pp_conditional_t *conditional = conditional_stack;
2204 if (conditional->in_else) {
2205 errorf(&conditional->pos, "unterminated #else");
2207 errorf(&conditional->pos, "unterminated condition");
2213 static void parse_ifdef_ifndef_directive(bool const is_ifdef)
2216 eat_pp(is_ifdef ? TP_ifdef : TP_ifndef);
2220 pp_conditional_t *conditional = push_conditional();
2221 conditional->pos = pp_token.base.pos;
2222 conditional->skip = true;
2226 if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
2227 errorf(&pp_token.base.pos, "expected identifier after #%s, got %K",
2228 is_ifdef ? "ifdef" : "ifndef", &pp_token);
2231 /* just take the true case in the hope to avoid further errors */
2234 /* evaluate whether we are in true or false case */
2235 condition = (bool)pp_token.base.symbol->pp_definition == is_ifdef;
2236 eat_token(T_IDENTIFIER);
2238 if (!info.at_line_begin) {
2239 errorf(&pp_token.base.pos, "extra tokens at end of #%s",
2240 is_ifdef ? "ifdef" : "ifndef");
2245 pp_conditional_t *conditional = push_conditional();
2246 conditional->pos = pp_token.base.pos;
2247 conditional->condition = condition;
2254 static void parse_else_directive(void)
2258 if (!info.at_line_begin) {
2260 warningf(WARN_OTHER, &pp_token.base.pos, "extra tokens at end of #else");
2265 pp_conditional_t *conditional = conditional_stack;
2266 if (conditional == NULL) {
2267 errorf(&pp_token.base.pos, "#else without prior #if");
2271 if (conditional->in_else) {
2272 errorf(&pp_token.base.pos,
2273 "#else after #else (condition started %P)",
2279 conditional->in_else = true;
2280 if (!conditional->skip) {
2281 skip_mode = conditional->condition;
2283 conditional->pos = pp_token.base.pos;
2286 static void parse_endif_directive(void)
2290 if (!info.at_line_begin) {
2292 warningf(WARN_OTHER, &pp_token.base.pos, "extra tokens at end of #endif");
2297 pp_conditional_t *conditional = conditional_stack;
2298 if (conditional == NULL) {
2299 errorf(&pp_token.base.pos, "#endif without prior #if");
2303 if (!conditional->skip) {
2309 typedef enum stdc_pragma_kind_t {
2313 STDC_CX_LIMITED_RANGE
2314 } stdc_pragma_kind_t;
2316 typedef enum stdc_pragma_value_kind_t {
2321 } stdc_pragma_value_kind_t;
2323 static void parse_pragma_directive(void)
2331 if (pp_token.kind != T_IDENTIFIER) {
2332 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.pos,
2333 "expected identifier after #pragma");
2338 stdc_pragma_kind_t kind = STDC_UNKNOWN;
2339 if (pp_token.base.symbol->pp_ID == TP_STDC && c_mode & _C99) {
2343 switch (pp_token.base.symbol->pp_ID) {
2344 case TP_FP_CONTRACT: kind = STDC_FP_CONTRACT; break;
2345 case TP_FENV_ACCESS: kind = STDC_FENV_ACCESS; break;
2346 case TP_CX_LIMITED_RANGE: kind = STDC_CX_LIMITED_RANGE; break;
2349 if (kind != STDC_UNKNOWN) {
2351 stdc_pragma_value_kind_t value;
2352 switch (pp_token.base.symbol->pp_ID) {
2353 case TP_ON: value = STDC_VALUE_ON; break;
2354 case TP_OFF: value = STDC_VALUE_OFF; break;
2355 case TP_DEFAULT: value = STDC_VALUE_DEFAULT; break;
2356 default: value = STDC_VALUE_UNKNOWN; break;
2358 if (value == STDC_VALUE_UNKNOWN) {
2359 kind = STDC_UNKNOWN;
2360 errorf(&pp_token.base.pos, "bad STDC pragma argument");
2365 if (kind == STDC_UNKNOWN) {
2366 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.pos,
2367 "encountered unknown #pragma");
2371 static void parse_line_directive(void)
2373 if (pp_token.kind != T_NUMBER) {
2375 parse_error("expected integer");
2378 long const line = strtol(pp_token.literal.string.begin, &end, 0);
2380 /* use offset -1 as this is about the next line */
2381 input.pos.lineno = line - 1;
2382 /* force output of line */
2383 input.output_line = input.pos.lineno - 20;
2386 errorf(&input.pos, "'%S' is not a valid line number",
2387 &pp_token.literal.string);
2391 if (info.at_line_begin)
2394 if (pp_token.kind == T_STRING_LITERAL
2395 && pp_token.literal.string.encoding == STRING_ENCODING_CHAR) {
2396 input.pos.input_name = pp_token.literal.string.begin;
2397 input.pos.is_system_header = false;
2400 /* attempt to parse numeric flags as outputted by gcc preprocessor */
2401 while (!info.at_line_begin && pp_token.kind == T_NUMBER) {
2403 * 1 - indicates start of a new file
2404 * 2 - indicates return from a file
2405 * 3 - indicates system header
2406 * 4 - indicates implicit extern "C" in C++ mode
2408 * currently we're only interested in "3"
2410 if (streq(pp_token.literal.string.begin, "3")) {
2411 input.pos.is_system_header = true;
2420 static void parse_error_directive(void)
2427 bool const old_resolve_escape_sequences = resolve_escape_sequences;
2428 resolve_escape_sequences = false;
2430 position_t const pos = pp_token.base.pos;
2432 if (info.had_whitespace && obstack_object_size(&pp_obstack) != 0)
2433 obstack_1grow(&pp_obstack, ' ');
2435 switch (pp_token.kind) {
2437 string_t const *const str = &pp_token.literal.string;
2438 obstack_grow(&pp_obstack, str->begin, str->size);
2444 case T_STRING_LITERAL: delim = '"'; goto string;
2445 case T_CHARACTER_CONSTANT: delim = '\''; goto string;
2447 string_t const *const str = &pp_token.literal.string;
2448 char const *const enc = get_string_encoding_prefix(str->encoding);
2449 obstack_printf(&pp_obstack, "%s%c%s%c", enc, delim, str->begin, delim);
2454 char const *const str = pp_token.base.symbol->string;
2455 obstack_grow(&pp_obstack, str, strlen(str));
2461 } while (!info.at_line_begin);
2463 resolve_escape_sequences = old_resolve_escape_sequences;
2465 obstack_1grow(&pp_obstack, '\0');
2466 char *const str = obstack_finish(&pp_obstack);
2467 errorf(&pos, "#%s", str);
2468 obstack_free(&pp_obstack, str);
2471 static void parse_preprocessing_directive(void)
2475 if (info.at_line_begin) {
2476 /* empty directive */
2480 if (pp_token.base.symbol) {
2481 switch (pp_token.base.symbol->pp_ID) {
2482 case TP_define: parse_define_directive(); break;
2483 case TP_else: parse_else_directive(); break;
2484 case TP_endif: parse_endif_directive(); break;
2485 case TP_error: parse_error_directive(); break;
2486 case TP_ifdef: parse_ifdef_ifndef_directive(true); break;
2487 case TP_ifndef: parse_ifdef_ifndef_directive(false); break;
2488 case TP_include: parse_include_directive(false); break;
2489 case TP_include_next: parse_include_directive(true); break;
2490 case TP_line: next_input_token(); goto line_directive;
2491 case TP_pragma: parse_pragma_directive(); break;
2492 case TP_undef: parse_undef_directive(); break;
2495 } else if (pp_token.kind == T_NUMBER) {
2497 parse_line_directive();
2501 errorf(&pp_token.base.pos, "invalid preprocessing directive #%K", &pp_token);
2506 assert(info.at_line_begin);
2509 static void finish_current_argument(void)
2511 if (current_argument == NULL)
2513 size_t size = obstack_object_size(&pp_obstack);
2514 current_argument->list_len = size/sizeof(current_argument->token_list[0]);
2515 current_argument->token_list = obstack_finish(&pp_obstack);
2518 void next_preprocessing_token(void)
2521 if (!expand_next()) {
2524 while (pp_token.kind == '#' && info.at_line_begin) {
2525 parse_preprocessing_directive();
2527 } while (skip_mode && pp_token.kind != T_EOF);
2530 const token_kind_t kind = pp_token.kind;
2531 if (current_call == NULL || argument_expanding != NULL) {
2532 symbol_t *const symbol = pp_token.base.symbol;
2534 if (kind == T_MACRO_PARAMETER) {
2535 assert(current_expansion != NULL);
2536 start_expanding(pp_token.macro_parameter.def);
2540 pp_definition_t *const pp_definition = symbol->pp_definition;
2541 if (pp_definition != NULL && !pp_definition->is_expanding) {
2542 if (pp_definition->has_parameters) {
2544 /* check if next token is a '(' */
2545 whitespace_info_t old_info = info;
2546 token_kind_t next_token = peek_expansion();
2547 if (next_token == T_EOF) {
2548 info.at_line_begin = false;
2549 info.had_whitespace = false;
2551 if (input.c == '(') {
2556 if (next_token == '(') {
2557 if (current_expansion == NULL)
2558 expansion_pos = pp_token.base.pos;
2559 next_preprocessing_token();
2560 assert(pp_token.kind == '(');
2562 pp_definition->parent_expansion = current_expansion;
2563 current_call = pp_definition;
2564 current_call->expand_pos = 0;
2565 current_call->expand_info = old_info;
2566 if (current_call->n_parameters > 0) {
2567 current_argument = ¤t_call->parameters[0];
2568 assert(argument_brace_count == 0);
2572 /* skip_whitespaces() skipped newlines and whitespace,
2573 * remember results for next token */
2579 if (current_expansion == NULL)
2580 expansion_pos = pp_token.base.pos;
2581 start_expanding(pp_definition);
2588 if (current_call != NULL) {
2589 /* current_call != NULL */
2591 ++argument_brace_count;
2592 } else if (kind == ')') {
2593 if (argument_brace_count > 0) {
2594 --argument_brace_count;
2596 finish_current_argument();
2597 assert(kind == ')');
2598 start_expanding(current_call);
2599 info = current_call->expand_info;
2600 current_call = NULL;
2601 current_argument = NULL;
2604 } else if (kind == ',' && argument_brace_count == 0) {
2605 finish_current_argument();
2606 current_call->expand_pos++;
2607 if (current_call->expand_pos >= current_call->n_parameters) {
2608 errorf(&pp_token.base.pos,
2609 "too many arguments passed for macro '%Y'",
2610 current_call->symbol);
2611 current_argument = NULL;
2614 = ¤t_call->parameters[current_call->expand_pos];
2617 } else if (kind == T_MACRO_PARAMETER) {
2618 /* parameters have to be fully expanded before being used as
2619 * parameters for another macro-call */
2620 assert(current_expansion != NULL);
2621 pp_definition_t *argument = pp_token.macro_parameter.def;
2622 argument_expanding = argument;
2623 start_expanding(argument);
2625 } else if (kind == T_EOF) {
2626 errorf(&expansion_pos,
2627 "reached end of file while parsing arguments for '%Y'",
2628 current_call->symbol);
2631 if (current_argument != NULL) {
2632 saved_token_t saved;
2633 saved.token = pp_token;
2634 saved.had_whitespace = info.had_whitespace;
2635 obstack_grow(&pp_obstack, &saved, sizeof(saved));
2641 void append_include_path(searchpath_t *paths, const char *path)
2643 searchpath_entry_t *entry = OALLOCZ(&config_obstack, searchpath_entry_t);
2645 entry->is_system_path = paths->is_system_path;
2647 *paths->anchor = entry;
2648 paths->anchor = &entry->next;
2651 static void append_env_paths(searchpath_t *paths, const char *envvar)
2653 const char *val = getenv(envvar);
2654 if (val != NULL && *val != '\0') {
2655 const char *begin = val;
2659 while (*c != '\0' && *c != ':')
2662 size_t len = c-begin;
2664 /* use "." for gcc compatibility (Matze: I would expect that
2665 * nothing happens for an empty entry...) */
2666 append_include_path(paths, ".");
2668 char *const string = obstack_copy0(&config_obstack, begin, len);
2669 append_include_path(paths, string);
2676 } while (*c != '\0');
2680 static void append_searchpath(searchpath_t *path, const searchpath_t *append)
2682 *path->anchor = append->first;
2685 static void setup_include_path(void)
2687 /* built-in paths */
2688 append_include_path(&system_searchpath, "/usr/include");
2690 /* parse environment variable */
2691 append_env_paths(&bracket_searchpath, "CPATH");
2692 append_env_paths(&system_searchpath,
2693 c_mode & _CXX ? "CPLUS_INCLUDE_PATH" : "C_INCLUDE_PATH");
2695 /* append system search path to bracket searchpath */
2696 append_searchpath(&system_searchpath, &after_searchpath);
2697 append_searchpath(&bracket_searchpath, &system_searchpath);
2698 append_searchpath("e_searchpath, &bracket_searchpath);
2701 static void input_error(unsigned const delta_lines, unsigned const delta_cols, char const *const message)
2703 position_t pos = pp_token.base.pos;
2704 pos.lineno += delta_lines;
2705 pos.colno += delta_cols;
2706 errorf(&pos, "%s", message);
2709 void init_include_paths(void)
2711 obstack_init(&config_obstack);
2714 void init_preprocessor(void)
2718 obstack_init(&pp_obstack);
2719 obstack_init(&input_obstack);
2720 strset_init(&stringset);
2722 setup_include_path();
2724 set_input_error_callback(input_error);
2727 void exit_preprocessor(void)
2729 obstack_free(&input_obstack, NULL);
2730 obstack_free(&pp_obstack, NULL);
2731 obstack_free(&config_obstack, NULL);
2733 strset_destroy(&stringset);
2736 int pptest_main(int argc, char **argv);
2737 int pptest_main(int argc, char **argv)
2739 init_symbol_table();
2740 init_include_paths();
2741 init_preprocessor();
2744 error_on_unknown_chars = false;
2745 resolve_escape_sequences = false;
2747 /* simplistic commandline parser */
2748 const char *filename = NULL;
2749 const char *output = NULL;
2750 for (int i = 1; i < argc; ++i) {
2751 const char *opt = argv[i];
2752 if (streq(opt, "-I")) {
2753 append_include_path(&bracket_searchpath, argv[++i]);
2755 } else if (streq(opt, "-E")) {
2757 } else if (streq(opt, "-o")) {
2760 } else if (opt[0] == '-') {
2761 fprintf(stderr, "Unknown option '%s'\n", opt);
2763 if (filename != NULL)
2764 fprintf(stderr, "Multiple inputs not supported\n");
2768 if (filename == NULL) {
2769 fprintf(stderr, "No input specified\n");
2773 if (output == NULL) {
2776 out = fopen(output, "w");
2778 fprintf(stderr, "Couldn't open output '%s'\n", output);
2783 /* just here for gcc compatibility */
2784 fprintf(out, "# 1 \"%s\"\n", filename);
2785 fprintf(out, "# 1 \"<built-in>\"\n");
2786 fprintf(out, "# 1 \"<command-line>\"\n");
2788 FILE *file = fopen(filename, "r");
2790 fprintf(stderr, "Couldn't open input '%s'\n", filename);
2793 switch_pp_input(file, filename, NULL, false);
2796 next_preprocessing_token();
2797 if (pp_token.kind == T_EOF)
2803 check_unclosed_conditionals();
2804 fclose(close_pp_input());
2809 exit_preprocessor();
2810 exit_symbol_table();