9 #include "preprocessor.h"
13 #include "adt/error.h"
14 #include "adt/strutil.h"
15 #include "adt/strset.h"
16 #include "lang_features.h"
17 #include "diagnostic.h"
18 #include "string_rep.h"
22 #define INCLUDE_LIMIT 199 /* 199 is for gcc "compatibility" */
24 typedef struct saved_token_t {
29 typedef struct whitespace_info_t {
30 /** current token had whitespace in front of it */
32 /** current token is at the beginning of a line.
33 * => a "#" at line begin starts a preprocessing directive. */
35 /** number of spaces before the first token in a line */
36 unsigned whitespace_at_line_begin;
39 struct pp_definition_t {
41 source_position_t source_position;
42 pp_definition_t *parent_expansion;
44 whitespace_info_t expand_info;
46 bool is_expanding : 1;
47 bool has_parameters : 1;
48 bool is_parameter : 1;
49 pp_definition_t *function_definition;
51 pp_definition_t *parameters;
55 saved_token_t *token_list;
58 typedef struct pp_conditional_t pp_conditional_t;
59 struct pp_conditional_t {
60 source_position_t source_position;
63 /** conditional in skip mode (then+else gets skipped) */
65 pp_conditional_t *parent;
68 typedef struct pp_input_t pp_input_t;
73 utf32 buf[1024+MAX_PUTBACK];
76 source_position_t position;
79 searchpath_entry_t *path;
82 struct searchpath_entry_t {
84 searchpath_entry_t *next;
88 static pp_input_t input;
90 static pp_input_t *input_stack;
91 static unsigned n_inputs;
92 static struct obstack input_obstack;
94 static pp_conditional_t *conditional_stack;
97 bool allow_dollar_in_symbol = true;
98 static bool resolve_escape_sequences = true;
99 static bool error_on_unknown_chars = true;
100 static bool skip_mode;
102 static struct obstack pp_obstack;
103 static struct obstack config_obstack;
104 static const char *printed_input_name = NULL;
105 static source_position_t expansion_pos;
106 static pp_definition_t *current_expansion = NULL;
107 static pp_definition_t *current_call = NULL;
108 static pp_definition_t *current_argument = NULL;
109 static pp_definition_t *argument_expanding = NULL;
110 static unsigned argument_brace_count;
111 static strset_t stringset;
112 static token_kind_t last_token;
114 struct searchpath_t {
115 searchpath_entry_t *first;
116 searchpath_entry_t **anchor;
120 searchpath_t bracket_searchpath = { NULL, &bracket_searchpath.first, false };
121 searchpath_t quote_searchpath = { NULL, "e_searchpath.first, false };
122 searchpath_t system_searchpath = { NULL, &system_searchpath.first, true };
123 searchpath_t after_searchpath = { NULL, &after_searchpath.first, true };
125 static whitespace_info_t next_info; /* valid if had_whitespace is true */
126 static whitespace_info_t info;
128 static inline void next_char(void);
129 static void next_input_token(void);
130 static void print_line_directive(const source_position_t *pos, const char *add);
132 static symbol_t *symbol_colongreater;
133 static symbol_t *symbol_lesscolon;
134 static symbol_t *symbol_lesspercent;
135 static symbol_t *symbol_percentcolon;
136 static symbol_t *symbol_percentcolonpercentcolon;
137 static symbol_t *symbol_percentgreater;
139 static symbol_t *symbol_L;
140 static symbol_t *symbol_U;
141 static symbol_t *symbol_u;
142 static symbol_t *symbol_u8;
144 static void init_symbols(void)
146 symbol_colongreater = symbol_table_insert(":>");
147 symbol_lesscolon = symbol_table_insert("<:");
148 symbol_lesspercent = symbol_table_insert("<%");
149 symbol_percentcolon = symbol_table_insert("%:");
150 symbol_percentcolonpercentcolon = symbol_table_insert("%:%:");
151 symbol_percentgreater = symbol_table_insert("%>");
153 symbol_L = symbol_table_insert("L");
154 symbol_U = symbol_table_insert("U");
155 symbol_u = symbol_table_insert("u");
156 symbol_u8 = symbol_table_insert("u8");
159 void switch_pp_input(FILE *const file, char const *const filename, searchpath_entry_t *const path, bool const is_system_header)
162 input.input = input_from_stream(file, NULL);
165 input.output_line = 0;
166 input.position.input_name = filename;
167 input.position.lineno = 1;
168 input.position.is_system_header = is_system_header;
171 /* indicate that we're at a new input */
172 print_line_directive(&input.position, input_stack != NULL ? "1" : NULL);
174 /* place a virtual '\n' so we realize we're at line begin */
175 input.position.lineno = 0;
179 FILE *close_pp_input(void)
181 input_free(input.input);
183 FILE* const file = input.file;
195 static void push_input(void)
197 pp_input_t *const saved_input = obstack_copy(&input_obstack, &input, sizeof(input));
199 /* adjust buffer positions */
200 if (input.bufpos != NULL)
201 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
202 if (input.bufend != NULL)
203 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
205 saved_input->parent = input_stack;
206 input_stack = saved_input;
210 static void pop_restore_input(void)
212 assert(n_inputs > 0);
213 assert(input_stack != NULL);
215 pp_input_t *saved_input = input_stack;
217 memcpy(&input, saved_input, sizeof(input));
220 /* adjust buffer positions */
221 if (saved_input->bufpos != NULL)
222 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
223 if (saved_input->bufend != NULL)
224 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
226 input_stack = saved_input->parent;
227 obstack_free(&input_obstack, saved_input);
232 * Prints a parse error message at the current token.
234 * @param msg the error message
236 static void parse_error(const char *msg)
238 errorf(&pp_token.base.source_position, "%s", msg);
241 static inline void next_real_char(void)
243 assert(input.bufpos <= input.bufend);
244 if (input.bufpos >= input.bufend) {
245 size_t const n = decode(input.input, input.buf + MAX_PUTBACK, lengthof(input.buf) - MAX_PUTBACK);
250 input.bufpos = input.buf + MAX_PUTBACK;
251 input.bufend = input.bufpos + n;
253 input.c = *input.bufpos++;
254 ++input.position.colno;
258 * Put a character back into the buffer.
260 * @param pc the character to put back
262 static inline void put_back(utf32 const pc)
264 assert(input.bufpos > input.buf);
265 *(--input.bufpos - input.buf + input.buf) = (char) pc;
266 --input.position.colno;
272 if (input.c == '\n') { \
276 ++input.position.lineno; \
277 input.position.colno = 1; \
279 newline // Let it look like an ordinary case label.
281 #define eat(c_type) (assert(input.c == c_type), next_char())
283 static void maybe_concat_lines(void)
289 info.whitespace_at_line_begin = 0;
301 * Set c to the next input character, ie.
302 * after expanding trigraphs.
304 static inline void next_char(void)
308 /* filter trigraphs and concatenated lines */
309 if (UNLIKELY(input.c == '\\')) {
310 maybe_concat_lines();
311 goto end_of_next_char;
314 if (LIKELY(input.c != '?'))
315 goto end_of_next_char;
318 if (LIKELY(input.c != '?')) {
321 goto end_of_next_char;
326 case '=': input.c = '#'; break;
327 case '(': input.c = '['; break;
328 case '/': input.c = '\\'; maybe_concat_lines(); break;
329 case ')': input.c = ']'; break;
330 case '\'': input.c = '^'; break;
331 case '<': input.c = '{'; break;
332 case '!': input.c = '|'; break;
333 case '>': input.c = '}'; break;
334 case '-': input.c = '~'; break;
344 printf("nchar '%c'\n", input.c);
351 * Returns true if the given char is a octal digit.
353 * @param char the character to check
355 static inline bool is_octal_digit(int chr)
373 * Returns the value of a digit.
374 * The only portable way to do it ...
376 static int digit_value(int digit)
402 panic("wrong character given");
407 * Parses an octal character sequence.
409 * @param first_digit the already read first digit
411 static utf32 parse_octal_sequence(const utf32 first_digit)
413 assert(is_octal_digit(first_digit));
414 utf32 value = digit_value(first_digit);
415 if (!is_octal_digit(input.c)) return value;
416 value = 8 * value + digit_value(input.c);
418 if (!is_octal_digit(input.c)) return value;
419 value = 8 * value + digit_value(input.c);
426 * Parses a hex character sequence.
428 static utf32 parse_hex_sequence(void)
431 while (isxdigit(input.c)) {
432 value = 16 * value + digit_value(input.c);
438 static bool is_universal_char_valid(utf32 const v)
441 if (v < 0xA0U && v != 0x24 && v != 0x40 && v != 0x60)
443 if (0xD800 <= v && v <= 0xDFFF)
448 static utf32 parse_universal_char(unsigned const n_digits)
451 for (unsigned k = n_digits; k != 0; --k) {
452 if (isxdigit(input.c)) {
453 v = 16 * v + digit_value(input.c);
454 if (!resolve_escape_sequences)
455 obstack_1grow(&symbol_obstack, input.c);
458 errorf(&input.position,
459 "short universal character name, expected %u more digits",
464 if (!is_universal_char_valid(v)) {
465 errorf(&input.position,
466 "\\%c%0*X is not a valid universal character name",
467 n_digits == 4 ? 'u' : 'U', (int)n_digits, v);
472 static bool is_universal_char_valid_identifier_c99(utf32 const v)
474 static const utf32 single_chars[] = {
475 0x00AA, 0x00BA, 0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0,
476 0x1F59, 0x1F5B, 0x1F5D, 0x05BF, 0x09B2, 0x0A02, 0x0A5E, 0x0A74,
477 0x0A8D, 0x0AD0, 0x0AE0, 0x0B9C, 0x0CDE, 0x0E84, 0x0E8A, 0x0E8D,
478 0x0EA5, 0x0EA7, 0x0EC6, 0x0F00, 0x0F35, 0x0F37, 0x0F39, 0x0F97,
479 0x0FB9, 0x00B5, 0x00B7, 0x02BB, 0x037A, 0x0559, 0x093D, 0x0B3D,
480 0x1FBE, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128
483 static const utf32 ranges[][2] = {
484 {0x00C0, 0x00D6}, {0x00D8, 0x00F6}, {0x00F8, 0x01F5}, {0x01FA, 0x0217},
485 {0x0250, 0x02A8}, {0x1E00, 0x1E9B}, {0x1EA0, 0x1EF9}, {0x0388, 0x038A},
486 {0x038E, 0x03A1}, {0x03A3, 0x03CE}, {0x03D0, 0x03D6}, {0x03E2, 0x03F3},
487 {0x1F00, 0x1F15}, {0x1F18, 0x1F1D}, {0x1F20, 0x1F45}, {0x1F48, 0x1F4D},
488 {0x1F50, 0x1F57}, {0x1F5F, 0x1F7D}, {0x1F80, 0x1FB4}, {0x1FB6, 0x1FBC},
489 {0x1FC2, 0x1FC4}, {0x1FC6, 0x1FCC}, {0x1FD0, 0x1FD3}, {0x1FD6, 0x1FDB},
490 {0x1FE0, 0x1FEC}, {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFC}, {0x0401, 0x040C},
491 {0x040E, 0x044F}, {0x0451, 0x045C}, {0x045E, 0x0481}, {0x0490, 0x04C4},
492 {0x04C7, 0x04C8}, {0x04CB, 0x04CC}, {0x04D0, 0x04EB}, {0x04EE, 0x04F5},
493 {0x04F8, 0x04F9}, {0x0531, 0x0556}, {0x0561, 0x0587}, {0x05B0, 0x05B9},
494 {0x05BB, 0x05BD}, {0x05C1, 0x05C2}, {0x05D0, 0x05EA}, {0x05F0, 0x05F2},
495 {0x0621, 0x063A}, {0x0640, 0x0652}, {0x0670, 0x06B7}, {0x06BA, 0x06BE},
496 {0x06C0, 0x06CE}, {0x06D0, 0x06DC}, {0x06E5, 0x06E8}, {0x06EA, 0x06ED},
497 {0x0901, 0x0903}, {0x0905, 0x0939}, {0x093E, 0x094D}, {0x0950, 0x0952},
498 {0x0958, 0x0963}, {0x0981, 0x0983}, {0x0985, 0x098C}, {0x098F, 0x0990},
499 {0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B6, 0x09B9}, {0x09BE, 0x09C4},
500 {0x09C7, 0x09C8}, {0x09CB, 0x09CD}, {0x09DC, 0x09DD}, {0x09DF, 0x09E3},
501 {0x09F0, 0x09F1}, {0x0A05, 0x0A0A}, {0x0A0F, 0x0A10}, {0x0A13, 0x0A28},
502 {0x0A2A, 0x0A30}, {0x0A32, 0x0A33}, {0x0A35, 0x0A36}, {0x0A38, 0x0A39},
503 {0x0A3E, 0x0A42}, {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A59, 0x0A5C},
504 {0x0A81, 0x0A83}, {0x0A85, 0x0A8B}, {0x0A8F, 0x0A91}, {0x0A93, 0x0AA8},
505 {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3}, {0x0AB5, 0x0AB9}, {0x0ABD, 0x0AC5},
506 {0x0AC7, 0x0AC9}, {0x0ACB, 0x0ACD}, {0x0B01, 0x0B03}, {0x0B05, 0x0B0C},
507 {0x0B0F, 0x0B10}, {0x0B13, 0x0B28}, {0x0B2A, 0x0B30}, {0x0B32, 0x0B33},
508 {0x0B36, 0x0B39}, {0x0B3E, 0x0B43}, {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D},
509 {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B61}, {0x0B82, 0x0B83}, {0x0B85, 0x0B8A},
510 {0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, {0x0B99, 0x0B9A}, {0x0B9E, 0x0B9F},
511 {0x0BA3, 0x0BA4}, {0x0BA8, 0x0BAA}, {0x0BAE, 0x0BB5}, {0x0BB7, 0x0BB9},
512 {0x0BBE, 0x0BC2}, {0x0BC6, 0x0BC8}, {0x0BCA, 0x0BCD}, {0x0C01, 0x0C03},
513 {0x0C05, 0x0C0C}, {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C33},
514 {0x0C35, 0x0C39}, {0x0C3E, 0x0C44}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D},
515 {0x0C60, 0x0C61}, {0x0C82, 0x0C83}, {0x0C85, 0x0C8C}, {0x0C8E, 0x0C90},
516 {0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9}, {0x0CBE, 0x0CC4},
517 {0x0CC6, 0x0CC8}, {0x0CCA, 0x0CCD}, {0x0CE0, 0x0CE1}, {0x0D02, 0x0D03},
518 {0x0D05, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D28}, {0x0D2A, 0x0D39},
519 {0x0D3E, 0x0D43}, {0x0D46, 0x0D48}, {0x0D4A, 0x0D4D}, {0x0D60, 0x0D61},
520 {0x0E01, 0x0E3A}, {0x0E40, 0x0E5B}, {0x0E81, 0x0E82}, {0x0E87, 0x0E88},
521 {0x0E94, 0x0E97}, {0x0E99, 0x0E9F}, {0x0EA1, 0x0EA3}, {0x0EAA, 0x0EAB},
522 {0x0EAD, 0x0EAE}, {0x0EB0, 0x0EB9}, {0x0EBB, 0x0EBD}, {0x0EC0, 0x0EC4},
523 {0x0EC8, 0x0ECD}, {0x0EDC, 0x0EDD}, {0x0F18, 0x0F19}, {0x0F3E, 0x0F47},
524 {0x0F49, 0x0F69}, {0x0F71, 0x0F84}, {0x0F86, 0x0F8B}, {0x0F90, 0x0F95},
525 {0x0F99, 0x0FAD}, {0x0FB1, 0x0FB7}, {0x10A0, 0x10C5}, {0x10D0, 0x10F6},
526 {0x3041, 0x3093}, {0x309B, 0x309C}, {0x30A1, 0x30F6}, {0x30FB, 0x30FC},
527 {0x3105, 0x312C}, {0x4E00, 0x9FA5}, {0xAC00, 0xD7A3}, {0x0660, 0x0669},
528 {0x06F0, 0x06F9}, {0x0966, 0x096F}, {0x09E6, 0x09EF}, {0x0A66, 0x0A6F},
529 {0x0AE6, 0x0AEF}, {0x0B66, 0x0B6F}, {0x0BE7, 0x0BEF}, {0x0C66, 0x0C6F},
530 {0x0CE6, 0x0CEF}, {0x0D66, 0x0D6F}, {0x0E50, 0x0E59}, {0x0ED0, 0x0ED9},
531 {0x0F20, 0x0F33}, {0x02B0, 0x02B8}, {0x02BD, 0x02C1}, {0x02D0, 0x02D1},
532 {0x02E0, 0x02E4}, {0x203F, 0x2040}, {0x210A, 0x2113}, {0x2118, 0x211D},
533 {0x212A, 0x2131}, {0x2133, 0x2138}, {0x2160, 0x2182}, {0x3005, 0x3007},
536 for (size_t i = 0; i < sizeof(ranges)/sizeof(ranges[0]); ++i) {
537 if (ranges[i][0] <= v && v <= ranges[i][1])
540 for (size_t i = 0; i < sizeof(single_chars)/sizeof(single_chars[0]); ++i) {
541 if (v == single_chars[i])
547 static bool is_universal_char_valid_identifier_c11(utf32 const v)
550 if ( v == 0x000A8) return true;
551 if ( v == 0x000AA) return true;
552 if ( v == 0x000AD) return true;
553 if ( v == 0x000AF) return true;
554 if (0x000B2 <= v && v <= 0x000B5) return true;
555 if (0x000B7 <= v && v <= 0x000BA) return true;
556 if (0x000BC <= v && v <= 0x000BE) return true;
557 if (0x000C0 <= v && v <= 0x000D6) return true;
558 if (0x000D8 <= v && v <= 0x000F6) return true;
559 if (0x000F8 <= v && v <= 0x000FF) return true;
560 if (0x00100 <= v && v <= 0x0167F) return true;
561 if (0x01681 <= v && v <= 0x0180D) return true;
562 if (0x0180F <= v && v <= 0x01FFF) return true;
563 if (0x0200B <= v && v <= 0x0200D) return true;
564 if (0x0202A <= v && v <= 0x0202E) return true;
565 if (0x0203F <= v && v <= 0x02040) return true;
566 if ( v == 0x02054) return true;
567 if (0x02060 <= v && v <= 0x0206F) return true;
568 if (0x02070 <= v && v <= 0x0218F) return true;
569 if (0x02460 <= v && v <= 0x024FF) return true;
570 if (0x02776 <= v && v <= 0x02793) return true;
571 if (0x02C00 <= v && v <= 0x02DFF) return true;
572 if (0x02E80 <= v && v <= 0x02FFF) return true;
573 if (0x03004 <= v && v <= 0x03007) return true;
574 if (0x03021 <= v && v <= 0x0302F) return true;
575 if (0x03031 <= v && v <= 0x0303F) return true;
576 if (0x03040 <= v && v <= 0x0D7FF) return true;
577 if (0x0F900 <= v && v <= 0x0FD3D) return true;
578 if (0x0FD40 <= v && v <= 0x0FDCF) return true;
579 if (0x0FDF0 <= v && v <= 0x0FE44) return true;
580 if (0x0FE47 <= v && v <= 0x0FFFD) return true;
581 if (0x10000 <= v && v <= 0x1FFFD) return true;
582 if (0x20000 <= v && v <= 0x2FFFD) return true;
583 if (0x30000 <= v && v <= 0x3FFFD) return true;
584 if (0x40000 <= v && v <= 0x4FFFD) return true;
585 if (0x50000 <= v && v <= 0x5FFFD) return true;
586 if (0x60000 <= v && v <= 0x6FFFD) return true;
587 if (0x70000 <= v && v <= 0x7FFFD) return true;
588 if (0x80000 <= v && v <= 0x8FFFD) return true;
589 if (0x90000 <= v && v <= 0x9FFFD) return true;
590 if (0xA0000 <= v && v <= 0xAFFFD) return true;
591 if (0xB0000 <= v && v <= 0xBFFFD) return true;
592 if (0xC0000 <= v && v <= 0xCFFFD) return true;
593 if (0xD0000 <= v && v <= 0xDFFFD) return true;
594 if (0xE0000 <= v && v <= 0xEFFFD) return true;
598 static bool is_universal_char_valid_identifier(utf32 const v)
601 return is_universal_char_valid_identifier_c11(v);
602 return is_universal_char_valid_identifier_c99(v);
605 static bool is_universal_char_invalid_identifier_start(utf32 const v)
607 if (! (c_mode & _C11))
611 if (0x0300 <= v && v <= 0x036F) return true;
612 if (0x1DC0 <= v && v <= 0x1DFF) return true;
613 if (0x20D0 <= v && v <= 0x20FF) return true;
614 if (0xFE20 <= v && v <= 0xFE2F) return true;
619 * Parse an escape sequence.
621 static utf32 parse_escape_sequence(void)
625 utf32 const ec = input.c;
629 case '"': return '"';
630 case '\'': return '\'';
631 case '\\': return '\\';
632 case '?': return '\?';
633 case 'a': return '\a';
634 case 'b': return '\b';
635 case 'f': return '\f';
636 case 'n': return '\n';
637 case 'r': return '\r';
638 case 't': return '\t';
639 case 'v': return '\v';
641 return parse_hex_sequence();
650 return parse_octal_sequence(ec);
652 parse_error("reached end of file while parsing escape sequence");
654 /* \E is not documented, but handled, by GCC. It is acceptable according
655 * to §6.11.4, whereas \e is not. */
659 return 27; /* hopefully 27 is ALWAYS the code for ESCAPE */
662 case 'U': return parse_universal_char(8);
663 case 'u': return parse_universal_char(4);
668 /* §6.4.4.4:8 footnote 64 */
669 parse_error("unknown escape sequence");
673 static const char *identify_string(char *string)
675 const char *result = strset_insert(&stringset, string);
676 if (result != string) {
677 obstack_free(&symbol_obstack, string);
682 static string_t sym_make_string(string_encoding_t const enc)
684 obstack_1grow(&symbol_obstack, '\0');
685 size_t const len = obstack_object_size(&symbol_obstack) - 1;
686 char *const string = obstack_finish(&symbol_obstack);
687 char const *const result = identify_string(string);
688 return (string_t){ result, len, enc };
691 string_t make_string(char const *const string)
693 obstack_grow(&symbol_obstack, string, strlen(string));
694 return sym_make_string(STRING_ENCODING_CHAR);
697 static utf32 get_string_encoding_limit(string_encoding_t const enc)
700 case STRING_ENCODING_CHAR: return 0xFF;
701 case STRING_ENCODING_CHAR16: return 0xFFFF;
702 case STRING_ENCODING_CHAR32: return 0xFFFFFFFF;
703 case STRING_ENCODING_UTF8: return 0xFFFFFFFF;
704 case STRING_ENCODING_WIDE: return 0xFFFFFFFF; // FIXME depends on settings
706 panic("invalid string encoding");
709 static void parse_string(utf32 const delimiter, token_kind_t const kind,
710 string_encoding_t const enc,
711 char const *const context)
713 const unsigned start_linenr = input.position.lineno;
717 utf32 const limit = get_string_encoding_limit(enc);
721 if (resolve_escape_sequences) {
722 utf32 const tc = parse_escape_sequence();
724 warningf(WARN_OTHER, &pp_token.base.source_position, "escape sequence out of range");
726 if (enc == STRING_ENCODING_CHAR) {
727 obstack_1grow(&symbol_obstack, tc);
729 obstack_grow_utf8(&symbol_obstack, tc);
732 obstack_1grow(&symbol_obstack, (char)input.c);
734 obstack_1grow(&symbol_obstack, (char)input.c);
741 errorf(&pp_token.base.source_position, "newline while parsing %s", context);
745 source_position_t source_position;
746 source_position.input_name = pp_token.base.source_position.input_name;
747 source_position.lineno = start_linenr;
748 errorf(&source_position, "EOF while parsing %s", context);
753 if (input.c == delimiter) {
757 obstack_grow_utf8(&symbol_obstack, input.c);
765 pp_token.kind = kind;
766 pp_token.literal.string = sym_make_string(enc);
769 static void parse_string_literal(string_encoding_t const enc)
771 parse_string('"', T_STRING_LITERAL, enc, "string literal");
774 static void parse_character_constant(string_encoding_t const enc)
776 parse_string('\'', T_CHARACTER_CONSTANT, enc, "character constant");
777 if (pp_token.literal.string.size == 0) {
778 parse_error("empty character constant");
782 #define SYMBOL_CASES_WITHOUT_E_P \
783 '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
834 #define SYMBOL_CASES \
835 SYMBOL_CASES_WITHOUT_E_P: \
841 #define DIGIT_CASES \
853 static void start_expanding(pp_definition_t *definition)
855 definition->parent_expansion = current_expansion;
856 definition->expand_pos = 0;
857 definition->is_expanding = true;
858 if (definition->list_len > 0) {
859 definition->token_list[0].had_whitespace
860 = info.had_whitespace;
862 current_expansion = definition;
865 static void finished_expanding(pp_definition_t *definition)
867 assert(definition->is_expanding);
868 pp_definition_t *parent = definition->parent_expansion;
869 definition->parent_expansion = NULL;
870 definition->is_expanding = false;
872 /* stop further expanding once we expanded a parameter used in a
874 if (definition == argument_expanding)
875 argument_expanding = NULL;
877 assert(current_expansion == definition);
878 current_expansion = parent;
881 static void grow_string_escaped(struct obstack *obst, const string_t *string, char const *delimiter)
883 char const *prefix = get_string_encoding_prefix(string->encoding);
884 obstack_printf(obst, "%s%s", prefix, delimiter);
885 size_t size = string->size;
886 const char *str = string->begin;
887 if (resolve_escape_sequences) {
888 obstack_grow(obst, str, size);
890 for (size_t i = 0; i < size; ++i) {
891 const char c = str[i];
892 if (c == '\\' || c == '"')
893 obstack_1grow(obst, '\\');
894 obstack_1grow(obst, c);
897 obstack_printf(obst, "%s", delimiter);
900 static void grow_token(struct obstack *obst, const token_t *token)
902 switch (token->kind) {
904 obstack_grow(obst, token->literal.string.begin, token->literal.string.size);
907 case T_STRING_LITERAL: {
908 char const *const delimiter = resolve_escape_sequences ? "\"" : "\\\"";
909 grow_string_escaped(obst, &token->literal.string, delimiter);
913 case T_CHARACTER_CONSTANT:
914 grow_string_escaped(obst, &token->literal.string, "'");
919 const char *str = token->base.symbol->string;
920 size_t len = strlen(str);
921 obstack_grow(obst, str, len);
927 static void stringify(const pp_definition_t *definition)
929 assert(obstack_object_size(&symbol_obstack) == 0);
931 size_t list_len = definition->list_len;
932 for (size_t p = 0; p < list_len; ++p) {
933 const saved_token_t *saved = &definition->token_list[p];
934 if (p > 0 && saved->had_whitespace)
935 obstack_1grow(&symbol_obstack, ' ');
936 grow_token(&symbol_obstack, &saved->token);
938 pp_token.kind = T_STRING_LITERAL;
939 pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
942 static inline void set_punctuator(token_kind_t const kind)
944 pp_token.kind = kind;
945 pp_token.base.symbol = token_symbols[kind];
948 static inline void set_digraph(token_kind_t const kind, symbol_t *const symbol)
950 pp_token.kind = kind;
951 pp_token.base.symbol = symbol;
955 * returns next final token from a preprocessor macro expansion
957 static bool expand_next(void)
959 if (current_expansion == NULL)
963 size_t pos = current_expansion->expand_pos;
964 if (pos >= current_expansion->list_len) {
965 finished_expanding(current_expansion);
966 /* it was the outermost expansion, parse pptoken normally */
967 if (current_expansion == NULL) {
972 const saved_token_t *saved = ¤t_expansion->token_list[pos++];
973 pp_token = saved->token;
974 if (pp_token.kind == '#') {
975 if (pos < current_expansion->list_len) {
976 const saved_token_t *next = ¤t_expansion->token_list[pos];
977 if (next->token.kind == T_MACRO_PARAMETER) {
978 pp_definition_t *def = next->token.macro_parameter.def;
979 assert(def != NULL && def->is_parameter);
986 if (current_expansion->expand_pos > 0)
987 info.had_whitespace = saved->had_whitespace;
988 current_expansion->expand_pos = pos;
989 pp_token.base.source_position = expansion_pos;
995 * Returns the next token kind found when continuing the current expansions
996 * without starting new sub-expansions.
998 static token_kind_t peek_expansion(void)
1000 for (pp_definition_t *e = current_expansion; e; e = e->parent_expansion) {
1001 if (e->expand_pos < e->list_len)
1002 return e->token_list[e->expand_pos].token.kind;
1007 static void skip_line_comment(void)
1009 info.had_whitespace = true;
1026 static void skip_multiline_comment(void)
1028 info.had_whitespace = true;
1030 unsigned start_linenr = input.position.lineno;
1035 if (input.c == '*') {
1036 /* TODO: nested comment, warn here */
1041 if (input.c == '/') {
1042 if (input.position.lineno != input.output_line)
1043 info.whitespace_at_line_begin = input.position.colno;
1053 source_position_t source_position;
1054 source_position.input_name = pp_token.base.source_position.input_name;
1055 source_position.lineno = start_linenr;
1056 errorf(&source_position, "at end of file while looking for comment end");
1067 static bool skip_till_newline(bool stop_at_non_whitespace)
1079 if (input.c == '/') {
1081 skip_line_comment();
1083 } else if (input.c == '*') {
1085 skip_multiline_comment();
1097 if (stop_at_non_whitespace)
1106 static void skip_whitespace(void)
1112 ++info.whitespace_at_line_begin;
1113 info.had_whitespace = true;
1118 info.at_line_begin = true;
1119 info.had_whitespace = true;
1120 info.whitespace_at_line_begin = 0;
1125 if (input.c == '/') {
1127 skip_line_comment();
1129 } else if (input.c == '*') {
1131 skip_multiline_comment();
1145 static inline void eat_pp(pp_token_kind_t const kind)
1147 assert(pp_token.base.symbol->pp_ID == kind);
1152 static inline void eat_token(token_kind_t const kind)
1154 assert(pp_token.kind == kind);
1159 static string_encoding_t identify_encoding_prefix(symbol_t *const sym)
1161 if (sym == symbol_L) return STRING_ENCODING_WIDE;
1162 if (c_mode & _C11) {
1163 if (sym == symbol_U) return STRING_ENCODING_CHAR32;
1164 if (sym == symbol_u) return STRING_ENCODING_CHAR16;
1165 if (sym == symbol_u8) return STRING_ENCODING_UTF8;
1167 return STRING_ENCODING_CHAR;
1170 static void parse_symbol(void)
1172 assert(obstack_object_size(&symbol_obstack) == 0);
1177 obstack_1grow(&symbol_obstack, (char) input.c);
1186 case 'U': n = 8; goto universal;
1187 case 'u': n = 4; goto universal;
1189 if (!resolve_escape_sequences) {
1190 obstack_1grow(&symbol_obstack, '\\');
1191 obstack_1grow(&symbol_obstack, input.c);
1194 utf32 const v = parse_universal_char(n);
1195 if (!is_universal_char_valid_identifier(v)) {
1196 if (is_universal_char_valid(v)) {
1197 errorf(&input.position,
1198 "universal character \\%c%0*X is not valid in an identifier",
1199 n == 4 ? 'u' : 'U', (int)n, v);
1201 } else if (obstack_object_size(&symbol_obstack) == 0 && is_universal_char_invalid_identifier_start(v)) {
1202 errorf(&input.position,
1203 "universal character \\%c%0*X is not valid as start of an identifier",
1204 n == 4 ? 'u' : 'U', (int)n, v);
1205 } else if (resolve_escape_sequences) {
1206 obstack_grow_utf8(&symbol_obstack, v);
1224 obstack_1grow(&symbol_obstack, '\0');
1225 char *string = obstack_finish(&symbol_obstack);
1227 symbol_t *symbol = symbol_table_insert(string);
1229 /* Might be a prefixed string or character constant: L/U/u/u8"string". */
1230 if (input.c == '"') {
1231 string_encoding_t const enc = identify_encoding_prefix(symbol);
1232 if (enc != STRING_ENCODING_CHAR) {
1233 parse_string_literal(enc);
1236 } else if (input.c == '\'') {
1237 string_encoding_t const enc = identify_encoding_prefix(symbol);
1238 if (enc != STRING_ENCODING_CHAR) {
1239 if (enc == STRING_ENCODING_UTF8) {
1240 errorf(&pp_token.base.source_position, "'u8' is not a valid encoding for a chracter constant");
1242 parse_character_constant(enc);
1247 pp_token.kind = symbol->ID;
1248 pp_token.base.symbol = symbol;
1250 /* we can free the memory from symbol obstack if we already had an entry in
1251 * the symbol table */
1252 if (symbol->string != string) {
1253 obstack_free(&symbol_obstack, string);
1257 static void parse_number(void)
1259 obstack_1grow(&symbol_obstack, (char) input.c);
1266 case SYMBOL_CASES_WITHOUT_E_P:
1267 obstack_1grow(&symbol_obstack, (char) input.c);
1275 obstack_1grow(&symbol_obstack, (char) input.c);
1277 if (input.c == '+' || input.c == '-') {
1278 obstack_1grow(&symbol_obstack, (char) input.c);
1290 pp_token.kind = T_NUMBER;
1291 pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
1294 #define MAYBE_PROLOG \
1298 #define MAYBE(ch, kind) \
1301 set_punctuator(kind); \
1304 #define MAYBE_DIGRAPH(ch, kind, symbol) \
1307 set_digraph(kind, symbol); \
1310 #define ELSE_CODE(code) \
1315 #define ELSE(kind) ELSE_CODE(set_punctuator(kind); return;)
1317 /** identifies and returns the next preprocessing token contained in the
1318 * input stream. No macro expansion is performed. */
1319 static void next_input_token(void)
1321 if (next_info.had_whitespace) {
1323 next_info.had_whitespace = false;
1325 info.at_line_begin = false;
1326 info.had_whitespace = false;
1329 pp_token.base.source_position = input.position;
1330 pp_token.base.symbol = NULL;
1335 info.whitespace_at_line_begin++;
1336 info.had_whitespace = true;
1341 info.at_line_begin = true;
1342 info.had_whitespace = true;
1343 info.whitespace_at_line_begin = 0;
1355 parse_string_literal(STRING_ENCODING_CHAR);
1359 parse_character_constant(STRING_ENCODING_CHAR);
1381 MAYBE('.', T_DOTDOTDOT)
1385 set_punctuator('.');
1391 MAYBE('&', T_ANDAND)
1392 MAYBE('=', T_ANDEQUAL)
1396 MAYBE('=', T_ASTERISKEQUAL)
1400 MAYBE('+', T_PLUSPLUS)
1401 MAYBE('=', T_PLUSEQUAL)
1405 MAYBE('>', T_MINUSGREATER)
1406 MAYBE('-', T_MINUSMINUS)
1407 MAYBE('=', T_MINUSEQUAL)
1411 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
1415 MAYBE('=', T_SLASHEQUAL)
1418 skip_multiline_comment();
1422 skip_line_comment();
1427 MAYBE_DIGRAPH('>', '}', symbol_percentgreater)
1428 MAYBE('=', T_PERCENTEQUAL)
1433 MAYBE_DIGRAPH(':', T_HASHHASH, symbol_percentcolonpercentcolon)
1437 goto digraph_percentcolon;
1440 digraph_percentcolon:
1441 set_digraph('#', symbol_percentcolon);
1447 MAYBE_DIGRAPH(':', '[', symbol_lesscolon)
1448 MAYBE_DIGRAPH('%', '{', symbol_lesspercent)
1449 MAYBE('=', T_LESSEQUAL)
1452 MAYBE('=', T_LESSLESSEQUAL)
1457 MAYBE('=', T_GREATEREQUAL)
1460 MAYBE('=', T_GREATERGREATEREQUAL)
1461 ELSE(T_GREATERGREATER)
1465 MAYBE('=', T_CARETEQUAL)
1469 MAYBE('=', T_PIPEEQUAL)
1470 MAYBE('|', T_PIPEPIPE)
1474 MAYBE_DIGRAPH('>', ']', symbol_colongreater)
1476 if (c_mode & _CXX) {
1478 set_punctuator(T_COLONCOLON);
1485 MAYBE('=', T_EQUALEQUAL)
1489 MAYBE('#', T_HASHHASH)
1502 set_punctuator(input.c);
1507 if (input_stack != NULL) {
1508 fclose(close_pp_input());
1509 pop_restore_input();
1512 if (input.c == (utf32)EOF)
1513 --input.position.lineno;
1514 print_line_directive(&input.position, "2");
1517 info.at_line_begin = true;
1518 set_punctuator(T_EOF);
1524 int next_c = input.c;
1527 if (next_c == 'U' || next_c == 'u') {
1534 if (error_on_unknown_chars) {
1535 errorf(&pp_token.base.source_position, "unknown character '%lc' found", input.c);
1539 assert(obstack_object_size(&symbol_obstack) == 0);
1540 obstack_grow_utf8(&symbol_obstack, input.c);
1541 obstack_1grow(&symbol_obstack, '\0');
1542 char *const string = obstack_finish(&symbol_obstack);
1543 symbol_t *const symbol = symbol_table_insert(string);
1544 if (symbol->string != string)
1545 obstack_free(&symbol_obstack, string);
1547 pp_token.kind = T_UNKNOWN_CHAR;
1548 pp_token.base.symbol = symbol;
1555 static void print_quoted_string(const char *const string)
1558 for (const char *c = string; *c != 0; ++c) {
1560 case '"': fputs("\\\"", out); break;
1561 case '\\': fputs("\\\\", out); break;
1562 case '\a': fputs("\\a", out); break;
1563 case '\b': fputs("\\b", out); break;
1564 case '\f': fputs("\\f", out); break;
1565 case '\n': fputs("\\n", out); break;
1566 case '\r': fputs("\\r", out); break;
1567 case '\t': fputs("\\t", out); break;
1568 case '\v': fputs("\\v", out); break;
1569 case '\?': fputs("\\?", out); break;
1572 fprintf(out, "\\%03o", (unsigned)*c);
1582 static void print_line_directive(const source_position_t *pos, const char *add)
1587 fprintf(out, "# %u ", pos->lineno);
1588 print_quoted_string(pos->input_name);
1593 if (pos->is_system_header) {
1597 printed_input_name = pos->input_name;
1598 input.output_line = pos->lineno-1;
1601 static bool emit_newlines(void)
1606 unsigned delta = pp_token.base.source_position.lineno - input.output_line;
1612 print_line_directive(&pp_token.base.source_position, NULL);
1615 for (unsigned i = 0; i < delta; ++i) {
1619 input.output_line = pp_token.base.source_position.lineno;
1621 unsigned whitespace = info.whitespace_at_line_begin;
1622 /* make sure there is at least 1 whitespace before a (macro-expanded)
1623 * '#' at line begin. I'm not sure why this is good, but gcc does it. */
1624 if (pp_token.kind == '#' && whitespace == 0)
1626 for (unsigned i = 0; i < whitespace; ++i)
1632 void set_preprocessor_output(FILE *output)
1636 error_on_unknown_chars = false;
1637 resolve_escape_sequences = false;
1639 error_on_unknown_chars = true;
1640 resolve_escape_sequences = true;
1644 void emit_pp_token(void)
1646 if (!emit_newlines() &&
1647 (info.had_whitespace || tokens_would_paste(last_token, pp_token.kind)))
1650 switch (pp_token.kind) {
1652 fputs(pp_token.literal.string.begin, out);
1655 case T_STRING_LITERAL:
1656 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1658 fputs(pp_token.literal.string.begin, out);
1662 case T_CHARACTER_CONSTANT:
1663 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1665 fputs(pp_token.literal.string.begin, out);
1669 case T_MACRO_PARAMETER:
1670 panic("macro parameter not expanded");
1673 fputs(pp_token.base.symbol->string, out);
1676 last_token = pp_token.kind;
1679 static void eat_pp_directive(void)
1681 while (!info.at_line_begin) {
1686 static bool strings_equal(const string_t *string1, const string_t *string2)
1688 size_t size = string1->size;
1689 if (size != string2->size)
1692 const char *c1 = string1->begin;
1693 const char *c2 = string2->begin;
1694 for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1701 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1703 if (token1->kind != token2->kind)
1706 switch (token1->kind) {
1708 case T_CHARACTER_CONSTANT:
1709 case T_STRING_LITERAL:
1710 return strings_equal(&token1->literal.string, &token2->literal.string);
1712 case T_MACRO_PARAMETER:
1713 return token1->macro_parameter.def->symbol
1714 == token2->macro_parameter.def->symbol;
1717 return token1->base.symbol == token2->base.symbol;
1721 static bool pp_definitions_equal(const pp_definition_t *definition1,
1722 const pp_definition_t *definition2)
1724 if (definition1->list_len != definition2->list_len)
1727 size_t len = definition1->list_len;
1728 const saved_token_t *t1 = definition1->token_list;
1729 const saved_token_t *t2 = definition2->token_list;
1730 for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1731 if (!pp_tokens_equal(&t1->token, &t2->token))
1733 if (t1->had_whitespace != t2->had_whitespace)
1739 static void missing_macro_param_error(void)
1741 errorf(&pp_token.base.source_position,
1742 "'#' is not followed by a macro parameter");
1745 static bool is_defineable_token(char const *const context)
1747 if (info.at_line_begin) {
1748 errorf(&pp_token.base.source_position, "unexpected end of line after %s", context);
1751 symbol_t *const symbol = pp_token.base.symbol;
1755 if (pp_token.kind != T_IDENTIFIER) {
1756 switch (symbol->string[0]) {
1763 errorf(&pp_token.base.source_position, "expected identifier after %s, got %K", context, &pp_token);
1768 /* TODO turn this into a flag in pp_def. */
1769 switch (symbol->pp_ID) {
1772 errorf(&pp_token.base.source_position, "%K cannot be used as macro name in %s", &pp_token, context);
1780 static void parse_define_directive(void)
1788 assert(obstack_object_size(&pp_obstack) == 0);
1790 if (!is_defineable_token("#define"))
1792 symbol_t *const symbol = pp_token.base.symbol;
1794 pp_definition_t *new_definition
1795 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1796 memset(new_definition, 0, sizeof(new_definition[0]));
1797 new_definition->symbol = symbol;
1798 new_definition->source_position = input.position;
1800 /* this is probably the only place where spaces are significant in the
1801 * lexer (except for the fact that they separate tokens). #define b(x)
1802 * is something else than #define b (x) */
1803 if (input.c == '(') {
1808 switch (pp_token.kind) {
1810 new_definition->is_variadic = true;
1811 eat_token(T_DOTDOTDOT);
1812 if (pp_token.kind != ')') {
1813 errorf(&input.position,
1814 "'...' not at end of macro argument list");
1819 case T_IDENTIFIER: {
1820 pp_definition_t parameter;
1821 memset(¶meter, 0, sizeof(parameter));
1822 parameter.source_position = pp_token.base.source_position;
1823 parameter.symbol = pp_token.base.symbol;
1824 parameter.is_parameter = true;
1825 obstack_grow(&pp_obstack, ¶meter, sizeof(parameter));
1826 eat_token(T_IDENTIFIER);
1828 if (pp_token.kind == ',') {
1833 if (pp_token.kind != ')') {
1834 errorf(&pp_token.base.source_position,
1835 "expected ',' or ')' after identifier, got %K",
1844 goto finish_argument_list;
1847 errorf(&pp_token.base.source_position,
1848 "expected identifier, '...' or ')' in #define argument list, got %K",
1854 finish_argument_list:
1855 new_definition->has_parameters = true;
1856 size_t size = obstack_object_size(&pp_obstack);
1857 new_definition->n_parameters
1858 = size / sizeof(new_definition->parameters[0]);
1859 new_definition->parameters = obstack_finish(&pp_obstack);
1860 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1861 pp_definition_t *param = &new_definition->parameters[i];
1862 symbol_t *symbol = param->symbol;
1863 pp_definition_t *previous = symbol->pp_definition;
1864 if (previous != NULL
1865 && previous->function_definition == new_definition) {
1866 errorf(¶m->source_position,
1867 "duplicate macro parameter '%Y'", symbol);
1868 param->symbol = sym_anonymous;
1871 param->parent_expansion = previous;
1872 param->function_definition = new_definition;
1873 symbol->pp_definition = param;
1879 /* construct token list */
1880 assert(obstack_object_size(&pp_obstack) == 0);
1881 bool next_must_be_param = false;
1882 while (!info.at_line_begin) {
1883 if (pp_token.kind == T_IDENTIFIER) {
1884 const symbol_t *symbol = pp_token.base.symbol;
1885 pp_definition_t *definition = symbol->pp_definition;
1886 if (definition != NULL
1887 && definition->function_definition == new_definition) {
1888 pp_token.kind = T_MACRO_PARAMETER;
1889 pp_token.macro_parameter.def = definition;
1892 if (next_must_be_param && pp_token.kind != T_MACRO_PARAMETER) {
1893 missing_macro_param_error();
1895 saved_token_t saved_token;
1896 saved_token.token = pp_token;
1897 saved_token.had_whitespace = info.had_whitespace;
1898 obstack_grow(&pp_obstack, &saved_token, sizeof(saved_token));
1900 = new_definition->has_parameters && pp_token.kind == '#';
1903 if (next_must_be_param)
1904 missing_macro_param_error();
1906 new_definition->list_len = obstack_object_size(&pp_obstack)
1907 / sizeof(new_definition->token_list[0]);
1908 new_definition->token_list = obstack_finish(&pp_obstack);
1910 if (new_definition->has_parameters) {
1911 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1912 pp_definition_t *param = &new_definition->parameters[i];
1913 symbol_t *symbol = param->symbol;
1914 if (symbol == sym_anonymous)
1916 assert(symbol->pp_definition == param);
1917 assert(param->function_definition == new_definition);
1918 symbol->pp_definition = param->parent_expansion;
1919 param->parent_expansion = NULL;
1923 pp_definition_t *old_definition = symbol->pp_definition;
1924 if (old_definition != NULL) {
1925 if (!pp_definitions_equal(old_definition, new_definition)) {
1926 warningf(WARN_OTHER, &input.position, "multiple definition of macro '%Y' (first defined %P)", symbol, &old_definition->source_position);
1928 /* reuse the old definition */
1929 obstack_free(&pp_obstack, new_definition);
1930 new_definition = old_definition;
1934 symbol->pp_definition = new_definition;
1938 if (obstack_object_size(&pp_obstack) > 0) {
1939 char *ptr = obstack_finish(&pp_obstack);
1940 obstack_free(&pp_obstack, ptr);
1945 static void parse_undef_directive(void)
1953 if (!is_defineable_token("#undef")) {
1958 pp_token.base.symbol->pp_definition = NULL;
1961 if (!info.at_line_begin) {
1962 warningf(WARN_OTHER, &input.position, "extra tokens at end of #undef directive");
1967 /** behind an #include we can have the special headername lexems.
1968 * They're only allowed behind an #include so they're not recognized
1969 * by the normal next_preprocessing_token. We handle them as a special
1971 static const char *parse_headername(bool *system_include)
1973 if (info.at_line_begin) {
1974 parse_error("expected headername after #include");
1978 /* check wether we have a "... or <... headername */
1979 source_position_t position = input.position;
1983 case '<': delimiter = '>'; *system_include = true; goto parse_name;
1984 case '"': delimiter = '"'; *system_include = false; goto parse_name;
1986 assert(obstack_object_size(&symbol_obstack) == 0);
1993 char *dummy = obstack_finish(&symbol_obstack);
1994 obstack_free(&symbol_obstack, dummy);
1996 errorf(&pp_token.base.source_position,
1997 "header name without closing '%c'", (char)delimiter);
2001 if (input.c == delimiter) {
2003 goto finish_headername;
2005 obstack_1grow(&symbol_obstack, (char)input.c);
2011 /* we should never be here */
2015 next_preprocessing_token();
2016 if (info.at_line_begin) {
2017 /* TODO: if we are already in the new line then we parsed more than
2018 * wanted. We reuse the token, but could produce following errors
2019 * misbehaviours... */
2020 goto error_invalid_input;
2022 if (pp_token.kind == T_STRING_LITERAL) {
2023 *system_include = false;
2024 return pp_token.literal.string.begin;
2025 } else if (pp_token.kind == '<') {
2026 *system_include = true;
2027 assert(obstack_object_size(&pp_obstack) == 0);
2029 next_preprocessing_token();
2030 if (info.at_line_begin) {
2031 /* TODO: we shouldn't have parsed/expanded something on the
2032 * next line yet... */
2033 char *dummy = obstack_finish(&pp_obstack);
2034 obstack_free(&pp_obstack, dummy);
2035 goto error_invalid_input;
2037 if (pp_token.kind == '>')
2040 saved_token_t saved;
2041 saved.token = pp_token;
2042 saved.had_whitespace = info.had_whitespace;
2043 obstack_grow(&pp_obstack, &saved, sizeof(saved));
2045 size_t size = obstack_object_size(&pp_obstack);
2046 assert(size % sizeof(saved_token_t) == 0);
2047 size_t n_tokens = size / sizeof(saved_token_t);
2048 saved_token_t *tokens = obstack_finish(&pp_obstack);
2049 assert(obstack_object_size(&symbol_obstack) == 0);
2050 for (size_t i = 0; i < n_tokens; ++i) {
2051 const saved_token_t *saved = &tokens[i];
2052 if (i > 0 && saved->had_whitespace)
2053 obstack_1grow(&symbol_obstack, ' ');
2054 grow_token(&symbol_obstack, &saved->token);
2056 obstack_free(&pp_obstack, tokens);
2057 goto finish_headername;
2059 error_invalid_input:
2061 char *dummy = obstack_finish(&symbol_obstack);
2062 obstack_free(&symbol_obstack, dummy);
2065 errorf(&pp_token.base.source_position,
2066 "expected \"FILENAME\" or <FILENAME> after #include");
2072 obstack_1grow(&symbol_obstack, '\0');
2073 char *const headername = obstack_finish(&symbol_obstack);
2074 const char *identified = identify_string(headername);
2075 pp_token.base.source_position = position;
2079 static bool do_include(bool const bracket_include, bool const include_next, char const *const headername)
2081 size_t const headername_len = strlen(headername);
2082 searchpath_entry_t *entry;
2084 entry = input.path ? input.path->next
2085 : bracket_include ? bracket_searchpath.first
2086 : quote_searchpath.first;
2088 if (!bracket_include) {
2089 /* put dirname of current input on obstack */
2090 const char *filename = input.position.input_name;
2091 const char *last_slash = strrchr(filename, '/');
2092 const char *full_name;
2093 if (last_slash != NULL) {
2094 size_t len = last_slash - filename;
2095 obstack_grow(&symbol_obstack, filename, len + 1);
2096 obstack_grow0(&symbol_obstack, headername, headername_len);
2097 char *complete_path = obstack_finish(&symbol_obstack);
2098 full_name = identify_string(complete_path);
2100 full_name = headername;
2103 FILE *file = fopen(full_name, "r");
2105 switch_pp_input(file, full_name, NULL, false);
2108 entry = quote_searchpath.first;
2110 entry = bracket_searchpath.first;
2114 assert(obstack_object_size(&symbol_obstack) == 0);
2115 /* check searchpath */
2116 for (; entry; entry = entry->next) {
2117 const char *path = entry->path;
2118 size_t len = strlen(path);
2119 obstack_grow(&symbol_obstack, path, len);
2120 if (path[len-1] != '/')
2121 obstack_1grow(&symbol_obstack, '/');
2122 obstack_grow(&symbol_obstack, headername, headername_len+1);
2124 char *complete_path = obstack_finish(&symbol_obstack);
2125 FILE *file = fopen(complete_path, "r");
2127 const char *filename = identify_string(complete_path);
2128 switch_pp_input(file, filename, entry, entry->is_system_path);
2131 obstack_free(&symbol_obstack, complete_path);
2138 static void parse_include_directive(bool const include_next)
2145 /* do not eat the TP_include, since it would already parse the next token
2146 * which needs special handling here. */
2147 skip_till_newline(true);
2148 bool system_include;
2149 const char *headername = parse_headername(&system_include);
2150 if (headername == NULL) {
2155 bool had_nonwhitespace = skip_till_newline(false);
2156 if (had_nonwhitespace) {
2157 warningf(WARN_OTHER, &input.position,
2158 "extra tokens at end of #include directive");
2161 if (n_inputs > INCLUDE_LIMIT) {
2162 errorf(&pp_token.base.source_position, "#include nested too deeply");
2169 info.whitespace_at_line_begin = 0;
2170 info.had_whitespace = false;
2171 info.at_line_begin = true;
2174 bool res = do_include(system_include, include_next, headername);
2178 errorf(&pp_token.base.source_position, "failed including '%s': %s", headername, strerror(errno));
2179 pop_restore_input();
2183 static pp_conditional_t *push_conditional(void)
2185 pp_conditional_t *conditional
2186 = obstack_alloc(&pp_obstack, sizeof(*conditional));
2187 memset(conditional, 0, sizeof(*conditional));
2189 conditional->parent = conditional_stack;
2190 conditional_stack = conditional;
2195 static void pop_conditional(void)
2197 assert(conditional_stack != NULL);
2198 conditional_stack = conditional_stack->parent;
2201 void check_unclosed_conditionals(void)
2203 while (conditional_stack != NULL) {
2204 pp_conditional_t *conditional = conditional_stack;
2206 if (conditional->in_else) {
2207 errorf(&conditional->source_position, "unterminated #else");
2209 errorf(&conditional->source_position, "unterminated condition");
2215 static void parse_ifdef_ifndef_directive(bool const is_ifdef)
2218 eat_pp(is_ifdef ? TP_ifdef : TP_ifndef);
2222 pp_conditional_t *conditional = push_conditional();
2223 conditional->source_position = pp_token.base.source_position;
2224 conditional->skip = true;
2228 if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
2229 errorf(&pp_token.base.source_position,
2230 "expected identifier after #%s, got %K",
2231 is_ifdef ? "ifdef" : "ifndef", &pp_token);
2234 /* just take the true case in the hope to avoid further errors */
2237 /* evaluate wether we are in true or false case */
2238 condition = (bool)pp_token.base.symbol->pp_definition == is_ifdef;
2239 eat_token(T_IDENTIFIER);
2241 if (!info.at_line_begin) {
2242 errorf(&pp_token.base.source_position,
2243 "extra tokens at end of #%s",
2244 is_ifdef ? "ifdef" : "ifndef");
2249 pp_conditional_t *conditional = push_conditional();
2250 conditional->source_position = pp_token.base.source_position;
2251 conditional->condition = condition;
2258 static void parse_else_directive(void)
2262 if (!info.at_line_begin) {
2264 warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #else");
2269 pp_conditional_t *conditional = conditional_stack;
2270 if (conditional == NULL) {
2271 errorf(&pp_token.base.source_position, "#else without prior #if");
2275 if (conditional->in_else) {
2276 errorf(&pp_token.base.source_position,
2277 "#else after #else (condition started %P)",
2278 &conditional->source_position);
2283 conditional->in_else = true;
2284 if (!conditional->skip) {
2285 skip_mode = conditional->condition;
2287 conditional->source_position = pp_token.base.source_position;
2290 static void parse_endif_directive(void)
2294 if (!info.at_line_begin) {
2296 warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #endif");
2301 pp_conditional_t *conditional = conditional_stack;
2302 if (conditional == NULL) {
2303 errorf(&pp_token.base.source_position, "#endif without prior #if");
2307 if (!conditional->skip) {
2313 typedef enum stdc_pragma_kind_t {
2317 STDC_CX_LIMITED_RANGE
2318 } stdc_pragma_kind_t;
2320 typedef enum stdc_pragma_value_kind_t {
2325 } stdc_pragma_value_kind_t;
2327 static void parse_pragma_directive(void)
2335 if (pp_token.kind != T_IDENTIFIER) {
2336 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2337 "expected identifier after #pragma");
2342 stdc_pragma_kind_t kind = STDC_UNKNOWN;
2343 if (pp_token.base.symbol->pp_ID == TP_STDC && c_mode & _C99) {
2347 switch (pp_token.base.symbol->pp_ID) {
2348 case TP_FP_CONTRACT: kind = STDC_FP_CONTRACT; break;
2349 case TP_FENV_ACCESS: kind = STDC_FENV_ACCESS; break;
2350 case TP_CX_LIMITED_RANGE: kind = STDC_CX_LIMITED_RANGE; break;
2353 if (kind != STDC_UNKNOWN) {
2355 stdc_pragma_value_kind_t value;
2356 switch (pp_token.base.symbol->pp_ID) {
2357 case TP_ON: value = STDC_VALUE_ON; break;
2358 case TP_OFF: value = STDC_VALUE_OFF; break;
2359 case TP_DEFAULT: value = STDC_VALUE_DEFAULT; break;
2360 default: value = STDC_VALUE_UNKNOWN; break;
2362 if (value == STDC_VALUE_UNKNOWN) {
2363 kind = STDC_UNKNOWN;
2364 errorf(&pp_token.base.source_position, "bad STDC pragma argument");
2369 if (kind == STDC_UNKNOWN) {
2370 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2371 "encountered unknown #pragma");
2375 static void parse_line_directive(void)
2377 if (pp_token.kind != T_NUMBER) {
2379 parse_error("expected integer");
2382 long const line = strtol(pp_token.literal.string.begin, &end, 0);
2384 /* use offset -1 as this is about the next line */
2385 input.position.lineno = line - 1;
2386 /* force output of line */
2387 input.output_line = input.position.lineno - 20;
2390 errorf(&input.position, "'%S' is not a valid line number",
2391 &pp_token.literal.string);
2395 if (info.at_line_begin)
2398 if (pp_token.kind == T_STRING_LITERAL
2399 && pp_token.literal.string.encoding == STRING_ENCODING_CHAR) {
2400 input.position.input_name = pp_token.literal.string.begin;
2401 input.position.is_system_header = false;
2404 /* attempt to parse numeric flags as outputted by gcc preprocessor */
2405 while (!info.at_line_begin && pp_token.kind == T_NUMBER) {
2407 * 1 - indicates start of a new file
2408 * 2 - indicates return from a file
2409 * 3 - indicates system header
2410 * 4 - indicates implicit extern "C" in C++ mode
2412 * currently we're only interested in "3"
2414 if (streq(pp_token.literal.string.begin, "3")) {
2415 input.position.is_system_header = true;
2424 static void parse_error_directive(void)
2431 bool const old_resolve_escape_sequences = resolve_escape_sequences;
2432 resolve_escape_sequences = false;
2434 source_position_t const pos = pp_token.base.source_position;
2436 if (info.had_whitespace && obstack_object_size(&pp_obstack) != 0)
2437 obstack_1grow(&pp_obstack, ' ');
2439 switch (pp_token.kind) {
2441 string_t const *const str = &pp_token.literal.string;
2442 obstack_grow(&pp_obstack, str->begin, str->size);
2448 case T_STRING_LITERAL: delim = '"'; goto string;
2449 case T_CHARACTER_CONSTANT: delim = '\''; goto string;
2451 string_t const *const str = &pp_token.literal.string;
2452 char const *const enc = get_string_encoding_prefix(str->encoding);
2453 obstack_printf(&pp_obstack, "%s%c%s%c", enc, delim, str->begin, delim);
2458 char const *const str = pp_token.base.symbol->string;
2459 obstack_grow(&pp_obstack, str, strlen(str));
2465 } while (!info.at_line_begin);
2467 resolve_escape_sequences = old_resolve_escape_sequences;
2469 obstack_1grow(&pp_obstack, '\0');
2470 char *const str = obstack_finish(&pp_obstack);
2471 errorf(&pos, "#%s", str);
2472 obstack_free(&pp_obstack, str);
2475 static void parse_preprocessing_directive(void)
2479 if (info.at_line_begin) {
2480 /* empty directive */
2484 if (pp_token.base.symbol) {
2485 switch (pp_token.base.symbol->pp_ID) {
2486 case TP_define: parse_define_directive(); break;
2487 case TP_else: parse_else_directive(); break;
2488 case TP_endif: parse_endif_directive(); break;
2489 case TP_error: parse_error_directive(); break;
2490 case TP_ifdef: parse_ifdef_ifndef_directive(true); break;
2491 case TP_ifndef: parse_ifdef_ifndef_directive(false); break;
2492 case TP_include: parse_include_directive(false); break;
2493 case TP_include_next: parse_include_directive(true); break;
2494 case TP_line: next_input_token(); goto line_directive;
2495 case TP_pragma: parse_pragma_directive(); break;
2496 case TP_undef: parse_undef_directive(); break;
2499 } else if (pp_token.kind == T_NUMBER) {
2501 parse_line_directive();
2505 errorf(&pp_token.base.source_position, "invalid preprocessing directive #%K", &pp_token);
2510 assert(info.at_line_begin);
2513 static void finish_current_argument(void)
2515 if (current_argument == NULL)
2517 size_t size = obstack_object_size(&pp_obstack);
2518 current_argument->list_len = size/sizeof(current_argument->token_list[0]);
2519 current_argument->token_list = obstack_finish(&pp_obstack);
2522 void next_preprocessing_token(void)
2525 if (!expand_next()) {
2528 while (pp_token.kind == '#' && info.at_line_begin) {
2529 parse_preprocessing_directive();
2531 } while (skip_mode && pp_token.kind != T_EOF);
2534 const token_kind_t kind = pp_token.kind;
2535 if (current_call == NULL || argument_expanding != NULL) {
2536 symbol_t *const symbol = pp_token.base.symbol;
2538 if (kind == T_MACRO_PARAMETER) {
2539 assert(current_expansion != NULL);
2540 start_expanding(pp_token.macro_parameter.def);
2544 pp_definition_t *const pp_definition = symbol->pp_definition;
2545 if (pp_definition != NULL && !pp_definition->is_expanding) {
2546 if (pp_definition->has_parameters) {
2548 /* check if next token is a '(' */
2549 whitespace_info_t old_info = info;
2550 token_kind_t next_token = peek_expansion();
2551 if (next_token == T_EOF) {
2552 info.at_line_begin = false;
2553 info.had_whitespace = false;
2555 if (input.c == '(') {
2560 if (next_token == '(') {
2561 if (current_expansion == NULL)
2562 expansion_pos = pp_token.base.source_position;
2563 next_preprocessing_token();
2564 assert(pp_token.kind == '(');
2566 pp_definition->parent_expansion = current_expansion;
2567 current_call = pp_definition;
2568 current_call->expand_pos = 0;
2569 current_call->expand_info = old_info;
2570 if (current_call->n_parameters > 0) {
2571 current_argument = ¤t_call->parameters[0];
2572 assert(argument_brace_count == 0);
2576 /* skip_whitespaces() skipped newlines and whitespace,
2577 * remember results for next token */
2583 if (current_expansion == NULL)
2584 expansion_pos = pp_token.base.source_position;
2585 start_expanding(pp_definition);
2592 if (current_call != NULL) {
2593 /* current_call != NULL */
2595 ++argument_brace_count;
2596 } else if (kind == ')') {
2597 if (argument_brace_count > 0) {
2598 --argument_brace_count;
2600 finish_current_argument();
2601 assert(kind == ')');
2602 start_expanding(current_call);
2603 info = current_call->expand_info;
2604 current_call = NULL;
2605 current_argument = NULL;
2608 } else if (kind == ',' && argument_brace_count == 0) {
2609 finish_current_argument();
2610 current_call->expand_pos++;
2611 if (current_call->expand_pos >= current_call->n_parameters) {
2612 errorf(&pp_token.base.source_position,
2613 "too many arguments passed for macro '%Y'",
2614 current_call->symbol);
2615 current_argument = NULL;
2618 = ¤t_call->parameters[current_call->expand_pos];
2621 } else if (kind == T_MACRO_PARAMETER) {
2622 /* parameters have to be fully expanded before being used as
2623 * parameters for another macro-call */
2624 assert(current_expansion != NULL);
2625 pp_definition_t *argument = pp_token.macro_parameter.def;
2626 argument_expanding = argument;
2627 start_expanding(argument);
2629 } else if (kind == T_EOF) {
2630 errorf(&expansion_pos,
2631 "reached end of file while parsing arguments for '%Y'",
2632 current_call->symbol);
2635 if (current_argument != NULL) {
2636 saved_token_t saved;
2637 saved.token = pp_token;
2638 saved.had_whitespace = info.had_whitespace;
2639 obstack_grow(&pp_obstack, &saved, sizeof(saved));
2645 void append_include_path(searchpath_t *paths, const char *path)
2647 searchpath_entry_t *entry = OALLOCZ(&config_obstack, searchpath_entry_t);
2649 entry->is_system_path = paths->is_system_path;
2651 *paths->anchor = entry;
2652 paths->anchor = &entry->next;
2655 static void append_env_paths(searchpath_t *paths, const char *envvar)
2657 const char *val = getenv(envvar);
2658 if (val != NULL && *val != '\0') {
2659 const char *begin = val;
2663 while (*c != '\0' && *c != ':')
2666 size_t len = c-begin;
2668 /* use "." for gcc compatibility (Matze: I would expect that
2669 * nothing happens for an empty entry...) */
2670 append_include_path(paths, ".");
2672 char *const string = obstack_copy0(&config_obstack, begin, len);
2673 append_include_path(paths, string);
2680 } while(*c != '\0');
2684 static void append_searchpath(searchpath_t *path, const searchpath_t *append)
2686 *path->anchor = append->first;
2689 static void setup_include_path(void)
2691 /* built-in paths */
2692 append_include_path(&system_searchpath, "/usr/include");
2694 /* parse environment variable */
2695 append_env_paths(&bracket_searchpath, "CPATH");
2696 append_env_paths(&system_searchpath,
2697 c_mode & _CXX ? "CPLUS_INCLUDE_PATH" : "C_INCLUDE_PATH");
2699 /* append system search path to bracket searchpath */
2700 append_searchpath(&system_searchpath, &after_searchpath);
2701 append_searchpath(&bracket_searchpath, &system_searchpath);
2702 append_searchpath("e_searchpath, &bracket_searchpath);
2705 static void input_error(unsigned const delta_lines, unsigned const delta_cols, char const *const message)
2707 source_position_t pos = pp_token.base.source_position;
2708 pos.lineno += delta_lines;
2709 pos.colno += delta_cols;
2710 errorf(&pos, "%s", message);
2713 void init_include_paths(void)
2715 obstack_init(&config_obstack);
2718 void init_preprocessor(void)
2722 obstack_init(&pp_obstack);
2723 obstack_init(&input_obstack);
2724 strset_init(&stringset);
2726 setup_include_path();
2728 set_input_error_callback(input_error);
2731 void exit_preprocessor(void)
2733 obstack_free(&input_obstack, NULL);
2734 obstack_free(&pp_obstack, NULL);
2735 obstack_free(&config_obstack, NULL);
2737 strset_destroy(&stringset);
2740 int pptest_main(int argc, char **argv);
2741 int pptest_main(int argc, char **argv)
2743 init_symbol_table();
2744 init_include_paths();
2745 init_preprocessor();
2748 error_on_unknown_chars = false;
2749 resolve_escape_sequences = false;
2751 /* simplistic commandline parser */
2752 const char *filename = NULL;
2753 const char *output = NULL;
2754 for (int i = 1; i < argc; ++i) {
2755 const char *opt = argv[i];
2756 if (streq(opt, "-I")) {
2757 append_include_path(&bracket_searchpath, argv[++i]);
2759 } else if (streq(opt, "-E")) {
2761 } else if (streq(opt, "-o")) {
2764 } else if (opt[0] == '-') {
2765 fprintf(stderr, "Unknown option '%s'\n", opt);
2767 if (filename != NULL)
2768 fprintf(stderr, "Multiple inputs not supported\n");
2772 if (filename == NULL) {
2773 fprintf(stderr, "No input specified\n");
2777 if (output == NULL) {
2780 out = fopen(output, "w");
2782 fprintf(stderr, "Couldn't open output '%s'\n", output);
2787 /* just here for gcc compatibility */
2788 fprintf(out, "# 1 \"%s\"\n", filename);
2789 fprintf(out, "# 1 \"<built-in>\"\n");
2790 fprintf(out, "# 1 \"<command-line>\"\n");
2792 FILE *file = fopen(filename, "r");
2794 fprintf(stderr, "Couldn't open input '%s'\n", filename);
2797 switch_pp_input(file, filename, NULL, false);
2800 next_preprocessing_token();
2801 if (pp_token.kind == T_EOF)
2807 check_unclosed_conditionals();
2808 fclose(close_pp_input());
2813 exit_preprocessor();
2814 exit_symbol_table();