9 #include "preprocessor.h"
13 #include "adt/error.h"
14 #include "adt/strutil.h"
15 #include "adt/strset.h"
16 #include "lang_features.h"
17 #include "diagnostic.h"
18 #include "string_rep.h"
22 #define INCLUDE_LIMIT 199 /* 199 is for gcc "compatibility" */
24 typedef struct saved_token_t {
29 typedef struct whitespace_info_t {
30 /** current token had whitespace in front of it */
32 /** current token is at the beginning of a line.
33 * => a "#" at line begin starts a preprocessing directive. */
35 /** number of spaces before the first token in a line */
36 unsigned whitespace_at_line_begin;
39 struct pp_definition_t {
41 source_position_t source_position;
42 pp_definition_t *parent_expansion;
44 whitespace_info_t expand_info;
46 bool is_expanding : 1;
47 bool has_parameters : 1;
48 bool is_parameter : 1;
49 pp_definition_t *function_definition;
51 pp_definition_t *parameters;
55 saved_token_t *token_list;
58 typedef struct pp_conditional_t pp_conditional_t;
59 struct pp_conditional_t {
60 source_position_t source_position;
63 /** conditional in skip mode (then+else gets skipped) */
65 pp_conditional_t *parent;
68 typedef struct pp_input_t pp_input_t;
73 utf32 buf[1024+MAX_PUTBACK];
76 source_position_t position;
79 searchpath_entry_t *path;
82 struct searchpath_entry_t {
84 searchpath_entry_t *next;
88 static pp_input_t input;
90 static pp_input_t *input_stack;
91 static unsigned n_inputs;
92 static struct obstack input_obstack;
94 static pp_conditional_t *conditional_stack;
97 bool allow_dollar_in_symbol = true;
98 static bool resolve_escape_sequences = true;
99 static bool error_on_unknown_chars = true;
100 static bool skip_mode;
102 static struct obstack pp_obstack;
103 static struct obstack config_obstack;
104 static const char *printed_input_name = NULL;
105 static source_position_t expansion_pos;
106 static pp_definition_t *current_expansion = NULL;
107 static pp_definition_t *current_call = NULL;
108 static pp_definition_t *current_argument = NULL;
109 static pp_definition_t *argument_expanding = NULL;
110 static unsigned argument_brace_count;
111 static strset_t stringset;
112 static token_kind_t last_token;
114 struct searchpath_t {
115 searchpath_entry_t *first;
116 searchpath_entry_t **anchor;
120 searchpath_t bracket_searchpath = { NULL, &bracket_searchpath.first, false };
121 searchpath_t quote_searchpath = { NULL, "e_searchpath.first, false };
122 searchpath_t system_searchpath = { NULL, &system_searchpath.first, true };
123 searchpath_t after_searchpath = { NULL, &after_searchpath.first, true };
125 static whitespace_info_t next_info; /* valid if had_whitespace is true */
126 static whitespace_info_t info;
128 static inline void next_char(void);
129 static void next_input_token(void);
130 static void print_line_directive(const source_position_t *pos, const char *add);
132 static symbol_t *symbol_colongreater;
133 static symbol_t *symbol_lesscolon;
134 static symbol_t *symbol_lesspercent;
135 static symbol_t *symbol_percentcolon;
136 static symbol_t *symbol_percentcolonpercentcolon;
137 static symbol_t *symbol_percentgreater;
139 static symbol_t *symbol_L;
140 static symbol_t *symbol_U;
141 static symbol_t *symbol_u;
142 static symbol_t *symbol_u8;
144 static void init_symbols(void)
146 symbol_colongreater = symbol_table_insert(":>");
147 symbol_lesscolon = symbol_table_insert("<:");
148 symbol_lesspercent = symbol_table_insert("<%");
149 symbol_percentcolon = symbol_table_insert("%:");
150 symbol_percentcolonpercentcolon = symbol_table_insert("%:%:");
151 symbol_percentgreater = symbol_table_insert("%>");
153 symbol_L = symbol_table_insert("L");
154 symbol_U = symbol_table_insert("U");
155 symbol_u = symbol_table_insert("u");
156 symbol_u8 = symbol_table_insert("u8");
159 void switch_pp_input(FILE *const file, char const *const filename, searchpath_entry_t *const path, bool const is_system_header)
162 input.input = input_from_stream(file, NULL);
165 input.output_line = 0;
166 input.position.input_name = filename;
167 input.position.lineno = 1;
168 input.position.is_system_header = is_system_header;
171 /* indicate that we're at a new input */
172 print_line_directive(&input.position, input_stack != NULL ? "1" : NULL);
174 /* place a virtual '\n' so we realize we're at line begin */
175 input.position.lineno = 0;
179 FILE *close_pp_input(void)
181 input_free(input.input);
183 FILE* const file = input.file;
195 static void push_input(void)
197 pp_input_t *const saved_input = obstack_copy(&input_obstack, &input, sizeof(input));
199 /* adjust buffer positions */
200 if (input.bufpos != NULL)
201 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
202 if (input.bufend != NULL)
203 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
205 saved_input->parent = input_stack;
206 input_stack = saved_input;
210 static void pop_restore_input(void)
212 assert(n_inputs > 0);
213 assert(input_stack != NULL);
215 pp_input_t *saved_input = input_stack;
217 memcpy(&input, saved_input, sizeof(input));
220 /* adjust buffer positions */
221 if (saved_input->bufpos != NULL)
222 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
223 if (saved_input->bufend != NULL)
224 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
226 input_stack = saved_input->parent;
227 obstack_free(&input_obstack, saved_input);
232 * Prints a parse error message at the current token.
234 * @param msg the error message
236 static void parse_error(const char *msg)
238 errorf(&pp_token.base.source_position, "%s", msg);
241 static inline void next_real_char(void)
243 assert(input.bufpos <= input.bufend);
244 if (input.bufpos >= input.bufend) {
245 size_t const n = decode(input.input, input.buf + MAX_PUTBACK, lengthof(input.buf) - MAX_PUTBACK);
250 input.bufpos = input.buf + MAX_PUTBACK;
251 input.bufend = input.bufpos + n;
253 input.c = *input.bufpos++;
254 ++input.position.colno;
258 * Put a character back into the buffer.
260 * @param pc the character to put back
262 static inline void put_back(utf32 const pc)
264 assert(input.bufpos > input.buf);
265 *(--input.bufpos - input.buf + input.buf) = (char) pc;
266 --input.position.colno;
272 if (input.c == '\n') { \
276 ++input.position.lineno; \
277 input.position.colno = 1; \
279 newline // Let it look like an ordinary case label.
281 #define eat(c_type) (assert(input.c == c_type), next_char())
283 static void maybe_concat_lines(void)
289 info.whitespace_at_line_begin = 0;
301 * Set c to the next input character, ie.
302 * after expanding trigraphs.
304 static inline void next_char(void)
308 /* filter trigraphs and concatenated lines */
309 if (UNLIKELY(input.c == '\\')) {
310 maybe_concat_lines();
311 goto end_of_next_char;
314 if (LIKELY(input.c != '?'))
315 goto end_of_next_char;
318 if (LIKELY(input.c != '?')) {
321 goto end_of_next_char;
326 case '=': input.c = '#'; break;
327 case '(': input.c = '['; break;
328 case '/': input.c = '\\'; maybe_concat_lines(); break;
329 case ')': input.c = ']'; break;
330 case '\'': input.c = '^'; break;
331 case '<': input.c = '{'; break;
332 case '!': input.c = '|'; break;
333 case '>': input.c = '}'; break;
334 case '-': input.c = '~'; break;
344 printf("nchar '%c'\n", input.c);
351 * Returns true if the given char is a octal digit.
353 * @param char the character to check
355 static inline bool is_octal_digit(int chr)
373 * Returns the value of a digit.
374 * The only portable way to do it ...
376 static int digit_value(int digit)
402 panic("wrong character given");
407 * Parses an octal character sequence.
409 * @param first_digit the already read first digit
411 static utf32 parse_octal_sequence(const utf32 first_digit)
413 assert(is_octal_digit(first_digit));
414 utf32 value = digit_value(first_digit);
415 if (!is_octal_digit(input.c)) return value;
416 value = 8 * value + digit_value(input.c);
418 if (!is_octal_digit(input.c)) return value;
419 value = 8 * value + digit_value(input.c);
426 * Parses a hex character sequence.
428 static utf32 parse_hex_sequence(void)
431 while (isxdigit(input.c)) {
432 value = 16 * value + digit_value(input.c);
438 static bool is_universal_char_valid(utf32 const v)
441 if (v < 0xA0U && v != 0x24 && v != 0x40 && v != 0x60)
443 if (0xD800 <= v && v <= 0xDFFF)
448 static utf32 parse_universal_char(unsigned const n_digits)
451 for (unsigned k = n_digits; k != 0; --k) {
452 if (isxdigit(input.c)) {
453 v = 16 * v + digit_value(input.c);
454 if (!resolve_escape_sequences)
455 obstack_1grow(&symbol_obstack, input.c);
458 errorf(&input.position,
459 "short universal character name, expected %u more digits",
464 if (!is_universal_char_valid(v)) {
465 errorf(&input.position,
466 "\\%c%0*X is not a valid universal character name",
467 n_digits == 4 ? 'u' : 'U', (int)n_digits, v);
472 static bool is_universal_char_valid_identifier_c99(utf32 const v)
474 static const utf32 single_chars[] = {
475 0x00AA, 0x00BA, 0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0,
476 0x1F59, 0x1F5B, 0x1F5D, 0x05BF, 0x09B2, 0x0A02, 0x0A5E, 0x0A74,
477 0x0A8D, 0x0AD0, 0x0AE0, 0x0B9C, 0x0CDE, 0x0E84, 0x0E8A, 0x0E8D,
478 0x0EA5, 0x0EA7, 0x0EC6, 0x0F00, 0x0F35, 0x0F37, 0x0F39, 0x0F97,
479 0x0FB9, 0x00B5, 0x00B7, 0x02BB, 0x037A, 0x0559, 0x093D, 0x0B3D,
480 0x1FBE, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128
483 static const utf32 ranges[][2] = {
484 {0x00C0, 0x00D6}, {0x00D8, 0x00F6}, {0x00F8, 0x01F5}, {0x01FA, 0x0217},
485 {0x0250, 0x02A8}, {0x1E00, 0x1E9B}, {0x1EA0, 0x1EF9}, {0x0388, 0x038A},
486 {0x038E, 0x03A1}, {0x03A3, 0x03CE}, {0x03D0, 0x03D6}, {0x03E2, 0x03F3},
487 {0x1F00, 0x1F15}, {0x1F18, 0x1F1D}, {0x1F20, 0x1F45}, {0x1F48, 0x1F4D},
488 {0x1F50, 0x1F57}, {0x1F5F, 0x1F7D}, {0x1F80, 0x1FB4}, {0x1FB6, 0x1FBC},
489 {0x1FC2, 0x1FC4}, {0x1FC6, 0x1FCC}, {0x1FD0, 0x1FD3}, {0x1FD6, 0x1FDB},
490 {0x1FE0, 0x1FEC}, {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFC}, {0x0401, 0x040C},
491 {0x040E, 0x044F}, {0x0451, 0x045C}, {0x045E, 0x0481}, {0x0490, 0x04C4},
492 {0x04C7, 0x04C8}, {0x04CB, 0x04CC}, {0x04D0, 0x04EB}, {0x04EE, 0x04F5},
493 {0x04F8, 0x04F9}, {0x0531, 0x0556}, {0x0561, 0x0587}, {0x05B0, 0x05B9},
494 {0x05BB, 0x05BD}, {0x05C1, 0x05C2}, {0x05D0, 0x05EA}, {0x05F0, 0x05F2},
495 {0x0621, 0x063A}, {0x0640, 0x0652}, {0x0670, 0x06B7}, {0x06BA, 0x06BE},
496 {0x06C0, 0x06CE}, {0x06D0, 0x06DC}, {0x06E5, 0x06E8}, {0x06EA, 0x06ED},
497 {0x0901, 0x0903}, {0x0905, 0x0939}, {0x093E, 0x094D}, {0x0950, 0x0952},
498 {0x0958, 0x0963}, {0x0981, 0x0983}, {0x0985, 0x098C}, {0x098F, 0x0990},
499 {0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B6, 0x09B9}, {0x09BE, 0x09C4},
500 {0x09C7, 0x09C8}, {0x09CB, 0x09CD}, {0x09DC, 0x09DD}, {0x09DF, 0x09E3},
501 {0x09F0, 0x09F1}, {0x0A05, 0x0A0A}, {0x0A0F, 0x0A10}, {0x0A13, 0x0A28},
502 {0x0A2A, 0x0A30}, {0x0A32, 0x0A33}, {0x0A35, 0x0A36}, {0x0A38, 0x0A39},
503 {0x0A3E, 0x0A42}, {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A59, 0x0A5C},
504 {0x0A81, 0x0A83}, {0x0A85, 0x0A8B}, {0x0A8F, 0x0A91}, {0x0A93, 0x0AA8},
505 {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3}, {0x0AB5, 0x0AB9}, {0x0ABD, 0x0AC5},
506 {0x0AC7, 0x0AC9}, {0x0ACB, 0x0ACD}, {0x0B01, 0x0B03}, {0x0B05, 0x0B0C},
507 {0x0B0F, 0x0B10}, {0x0B13, 0x0B28}, {0x0B2A, 0x0B30}, {0x0B32, 0x0B33},
508 {0x0B36, 0x0B39}, {0x0B3E, 0x0B43}, {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D},
509 {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B61}, {0x0B82, 0x0B83}, {0x0B85, 0x0B8A},
510 {0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, {0x0B99, 0x0B9A}, {0x0B9E, 0x0B9F},
511 {0x0BA3, 0x0BA4}, {0x0BA8, 0x0BAA}, {0x0BAE, 0x0BB5}, {0x0BB7, 0x0BB9},
512 {0x0BBE, 0x0BC2}, {0x0BC6, 0x0BC8}, {0x0BCA, 0x0BCD}, {0x0C01, 0x0C03},
513 {0x0C05, 0x0C0C}, {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C33},
514 {0x0C35, 0x0C39}, {0x0C3E, 0x0C44}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D},
515 {0x0C60, 0x0C61}, {0x0C82, 0x0C83}, {0x0C85, 0x0C8C}, {0x0C8E, 0x0C90},
516 {0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9}, {0x0CBE, 0x0CC4},
517 {0x0CC6, 0x0CC8}, {0x0CCA, 0x0CCD}, {0x0CE0, 0x0CE1}, {0x0D02, 0x0D03},
518 {0x0D05, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D28}, {0x0D2A, 0x0D39},
519 {0x0D3E, 0x0D43}, {0x0D46, 0x0D48}, {0x0D4A, 0x0D4D}, {0x0D60, 0x0D61},
520 {0x0E01, 0x0E3A}, {0x0E40, 0x0E5B}, {0x0E81, 0x0E82}, {0x0E87, 0x0E88},
521 {0x0E94, 0x0E97}, {0x0E99, 0x0E9F}, {0x0EA1, 0x0EA3}, {0x0EAA, 0x0EAB},
522 {0x0EAD, 0x0EAE}, {0x0EB0, 0x0EB9}, {0x0EBB, 0x0EBD}, {0x0EC0, 0x0EC4},
523 {0x0EC8, 0x0ECD}, {0x0EDC, 0x0EDD}, {0x0F18, 0x0F19}, {0x0F3E, 0x0F47},
524 {0x0F49, 0x0F69}, {0x0F71, 0x0F84}, {0x0F86, 0x0F8B}, {0x0F90, 0x0F95},
525 {0x0F99, 0x0FAD}, {0x0FB1, 0x0FB7}, {0x10A0, 0x10C5}, {0x10D0, 0x10F6},
526 {0x3041, 0x3093}, {0x309B, 0x309C}, {0x30A1, 0x30F6}, {0x30FB, 0x30FC},
527 {0x3105, 0x312C}, {0x4E00, 0x9FA5}, {0xAC00, 0xD7A3}, {0x0660, 0x0669},
528 {0x06F0, 0x06F9}, {0x0966, 0x096F}, {0x09E6, 0x09EF}, {0x0A66, 0x0A6F},
529 {0x0AE6, 0x0AEF}, {0x0B66, 0x0B6F}, {0x0BE7, 0x0BEF}, {0x0C66, 0x0C6F},
530 {0x0CE6, 0x0CEF}, {0x0D66, 0x0D6F}, {0x0E50, 0x0E59}, {0x0ED0, 0x0ED9},
531 {0x0F20, 0x0F33}, {0x02B0, 0x02B8}, {0x02BD, 0x02C1}, {0x02D0, 0x02D1},
532 {0x02E0, 0x02E4}, {0x203F, 0x2040}, {0x210A, 0x2113}, {0x2118, 0x211D},
533 {0x212A, 0x2131}, {0x2133, 0x2138}, {0x2160, 0x2182}, {0x3005, 0x3007},
536 for (size_t i = 0; i < sizeof(ranges)/sizeof(ranges[0]); ++i) {
537 if (ranges[i][0] <= v && v <= ranges[i][1])
540 for (size_t i = 0; i < sizeof(single_chars)/sizeof(single_chars[0]); ++i) {
541 if (v == single_chars[i])
547 static bool is_universal_char_valid_identifier_c11(utf32 const v)
550 if ( v == 0x000A8) return true;
551 if ( v == 0x000AA) return true;
552 if ( v == 0x000AD) return true;
553 if ( v == 0x000AF) return true;
554 if (0x000B2 <= v && v <= 0x000B5) return true;
555 if (0x000B7 <= v && v <= 0x000BA) return true;
556 if (0x000BC <= v && v <= 0x000BE) return true;
557 if (0x000C0 <= v && v <= 0x000D6) return true;
558 if (0x000D8 <= v && v <= 0x000F6) return true;
559 if (0x000F8 <= v && v <= 0x000FF) return true;
560 if (0x00100 <= v && v <= 0x0167F) return true;
561 if (0x01681 <= v && v <= 0x0180D) return true;
562 if (0x0180F <= v && v <= 0x01FFF) return true;
563 if (0x0200B <= v && v <= 0x0200D) return true;
564 if (0x0202A <= v && v <= 0x0202E) return true;
565 if (0x0203F <= v && v <= 0x02040) return true;
566 if ( v == 0x02054) return true;
567 if (0x02060 <= v && v <= 0x0206F) return true;
568 if (0x02070 <= v && v <= 0x0218F) return true;
569 if (0x02460 <= v && v <= 0x024FF) return true;
570 if (0x02776 <= v && v <= 0x02793) return true;
571 if (0x02C00 <= v && v <= 0x02DFF) return true;
572 if (0x02E80 <= v && v <= 0x02FFF) return true;
573 if (0x03004 <= v && v <= 0x03007) return true;
574 if (0x03021 <= v && v <= 0x0302F) return true;
575 if (0x03031 <= v && v <= 0x0303F) return true;
576 if (0x03040 <= v && v <= 0x0D7FF) return true;
577 if (0x0F900 <= v && v <= 0x0FD3D) return true;
578 if (0x0FD40 <= v && v <= 0x0FDCF) return true;
579 if (0x0FDF0 <= v && v <= 0x0FE44) return true;
580 if (0x0FE47 <= v && v <= 0x0FFFD) return true;
581 if (0x10000 <= v && v <= 0x1FFFD) return true;
582 if (0x20000 <= v && v <= 0x2FFFD) return true;
583 if (0x30000 <= v && v <= 0x3FFFD) return true;
584 if (0x40000 <= v && v <= 0x4FFFD) return true;
585 if (0x50000 <= v && v <= 0x5FFFD) return true;
586 if (0x60000 <= v && v <= 0x6FFFD) return true;
587 if (0x70000 <= v && v <= 0x7FFFD) return true;
588 if (0x80000 <= v && v <= 0x8FFFD) return true;
589 if (0x90000 <= v && v <= 0x9FFFD) return true;
590 if (0xA0000 <= v && v <= 0xAFFFD) return true;
591 if (0xB0000 <= v && v <= 0xBFFFD) return true;
592 if (0xC0000 <= v && v <= 0xCFFFD) return true;
593 if (0xD0000 <= v && v <= 0xDFFFD) return true;
594 if (0xE0000 <= v && v <= 0xEFFFD) return true;
598 static bool is_universal_char_valid_identifier(utf32 const v)
601 return is_universal_char_valid_identifier_c11(v);
602 return is_universal_char_valid_identifier_c99(v);
605 static bool is_universal_char_invalid_identifier_start(utf32 const v)
607 if (! (c_mode & _C11))
611 if (0x0300 <= v && v <= 0x036F) return true;
612 if (0x1DC0 <= v && v <= 0x1DFF) return true;
613 if (0x20D0 <= v && v <= 0x20FF) return true;
614 if (0xFE20 <= v && v <= 0xFE2F) return true;
619 * Parse an escape sequence.
621 static utf32 parse_escape_sequence(void)
625 utf32 const ec = input.c;
629 case '"': return '"';
630 case '\'': return '\'';
631 case '\\': return '\\';
632 case '?': return '\?';
633 case 'a': return '\a';
634 case 'b': return '\b';
635 case 'f': return '\f';
636 case 'n': return '\n';
637 case 'r': return '\r';
638 case 't': return '\t';
639 case 'v': return '\v';
641 return parse_hex_sequence();
650 return parse_octal_sequence(ec);
652 parse_error("reached end of file while parsing escape sequence");
654 /* \E is not documented, but handled, by GCC. It is acceptable according
655 * to §6.11.4, whereas \e is not. */
659 return 27; /* hopefully 27 is ALWAYS the code for ESCAPE */
662 case 'U': return parse_universal_char(8);
663 case 'u': return parse_universal_char(4);
668 /* §6.4.4.4:8 footnote 64 */
669 parse_error("unknown escape sequence");
673 static const char *identify_string(char *string)
675 const char *result = strset_insert(&stringset, string);
676 if (result != string) {
677 obstack_free(&symbol_obstack, string);
682 static string_t sym_make_string(string_encoding_t const enc)
684 obstack_1grow(&symbol_obstack, '\0');
685 size_t const len = obstack_object_size(&symbol_obstack) - 1;
686 char *const string = obstack_finish(&symbol_obstack);
687 char const *const result = identify_string(string);
688 return (string_t){ result, len, enc };
691 string_t make_string(char const *const string)
693 obstack_grow(&symbol_obstack, string, strlen(string));
694 return sym_make_string(STRING_ENCODING_CHAR);
697 static utf32 get_string_encoding_limit(string_encoding_t const enc)
700 case STRING_ENCODING_CHAR: return 0xFF;
701 case STRING_ENCODING_CHAR16: return 0xFFFF;
702 case STRING_ENCODING_CHAR32: return 0xFFFFFFFF;
703 case STRING_ENCODING_UTF8: return 0xFFFFFFFF;
704 case STRING_ENCODING_WIDE: return 0xFFFFFFFF; // FIXME depends on settings
706 panic("invalid string encoding");
709 static void parse_string(utf32 const delimiter, token_kind_t const kind,
710 string_encoding_t const enc,
711 char const *const context)
715 utf32 const limit = get_string_encoding_limit(enc);
719 if (resolve_escape_sequences) {
720 utf32 const tc = parse_escape_sequence();
722 warningf(WARN_OTHER, &pp_token.base.source_position, "escape sequence out of range");
724 if (enc == STRING_ENCODING_CHAR) {
725 obstack_1grow(&symbol_obstack, tc);
727 obstack_grow_utf8(&symbol_obstack, tc);
730 obstack_1grow(&symbol_obstack, (char)input.c);
732 obstack_1grow(&symbol_obstack, (char)input.c);
739 errorf(&pp_token.base.source_position, "newline while parsing %s", context);
743 errorf(&pp_token.base.source_position, "EOF while parsing %s", context);
747 if (input.c == delimiter) {
751 obstack_grow_utf8(&symbol_obstack, input.c);
759 pp_token.kind = kind;
760 pp_token.literal.string = sym_make_string(enc);
763 static void parse_string_literal(string_encoding_t const enc)
765 parse_string('"', T_STRING_LITERAL, enc, "string literal");
768 static void parse_character_constant(string_encoding_t const enc)
770 parse_string('\'', T_CHARACTER_CONSTANT, enc, "character constant");
771 if (pp_token.literal.string.size == 0) {
772 parse_error("empty character constant");
776 #define SYMBOL_CASES_WITHOUT_E_P \
777 '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
828 #define SYMBOL_CASES \
829 SYMBOL_CASES_WITHOUT_E_P: \
835 #define DIGIT_CASES \
847 static void start_expanding(pp_definition_t *definition)
849 definition->parent_expansion = current_expansion;
850 definition->expand_pos = 0;
851 definition->is_expanding = true;
852 if (definition->list_len > 0) {
853 definition->token_list[0].had_whitespace
854 = info.had_whitespace;
856 current_expansion = definition;
859 static void finished_expanding(pp_definition_t *definition)
861 assert(definition->is_expanding);
862 pp_definition_t *parent = definition->parent_expansion;
863 definition->parent_expansion = NULL;
864 definition->is_expanding = false;
866 /* stop further expanding once we expanded a parameter used in a
868 if (definition == argument_expanding)
869 argument_expanding = NULL;
871 assert(current_expansion == definition);
872 current_expansion = parent;
875 static void grow_string_escaped(struct obstack *obst, const string_t *string, char const *delimiter)
877 char const *prefix = get_string_encoding_prefix(string->encoding);
878 obstack_printf(obst, "%s%s", prefix, delimiter);
879 size_t size = string->size;
880 const char *str = string->begin;
881 if (resolve_escape_sequences) {
882 obstack_grow(obst, str, size);
884 for (size_t i = 0; i < size; ++i) {
885 const char c = str[i];
886 if (c == '\\' || c == '"')
887 obstack_1grow(obst, '\\');
888 obstack_1grow(obst, c);
891 obstack_printf(obst, "%s", delimiter);
894 static void grow_token(struct obstack *obst, const token_t *token)
896 switch (token->kind) {
898 obstack_grow(obst, token->literal.string.begin, token->literal.string.size);
901 case T_STRING_LITERAL: {
902 char const *const delimiter = resolve_escape_sequences ? "\"" : "\\\"";
903 grow_string_escaped(obst, &token->literal.string, delimiter);
907 case T_CHARACTER_CONSTANT:
908 grow_string_escaped(obst, &token->literal.string, "'");
913 const char *str = token->base.symbol->string;
914 size_t len = strlen(str);
915 obstack_grow(obst, str, len);
921 static void stringify(const pp_definition_t *definition)
923 assert(obstack_object_size(&symbol_obstack) == 0);
925 size_t list_len = definition->list_len;
926 for (size_t p = 0; p < list_len; ++p) {
927 const saved_token_t *saved = &definition->token_list[p];
928 if (p > 0 && saved->had_whitespace)
929 obstack_1grow(&symbol_obstack, ' ');
930 grow_token(&symbol_obstack, &saved->token);
932 pp_token.kind = T_STRING_LITERAL;
933 pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
936 static inline void set_punctuator(token_kind_t const kind)
938 pp_token.kind = kind;
939 pp_token.base.symbol = token_symbols[kind];
942 static inline void set_digraph(token_kind_t const kind, symbol_t *const symbol)
944 pp_token.kind = kind;
945 pp_token.base.symbol = symbol;
949 * returns next final token from a preprocessor macro expansion
951 static bool expand_next(void)
953 if (current_expansion == NULL)
957 size_t pos = current_expansion->expand_pos;
958 if (pos >= current_expansion->list_len) {
959 finished_expanding(current_expansion);
960 /* it was the outermost expansion, parse pptoken normally */
961 if (current_expansion == NULL) {
966 const saved_token_t *saved = ¤t_expansion->token_list[pos++];
967 pp_token = saved->token;
968 if (pp_token.kind == '#') {
969 if (pos < current_expansion->list_len) {
970 const saved_token_t *next = ¤t_expansion->token_list[pos];
971 if (next->token.kind == T_MACRO_PARAMETER) {
972 pp_definition_t *def = next->token.macro_parameter.def;
973 assert(def != NULL && def->is_parameter);
980 if (current_expansion->expand_pos > 0)
981 info.had_whitespace = saved->had_whitespace;
982 current_expansion->expand_pos = pos;
983 pp_token.base.source_position = expansion_pos;
989 * Returns the next token kind found when continuing the current expansions
990 * without starting new sub-expansions.
992 static token_kind_t peek_expansion(void)
994 for (pp_definition_t *e = current_expansion; e; e = e->parent_expansion) {
995 if (e->expand_pos < e->list_len)
996 return e->token_list[e->expand_pos].token.kind;
1001 static void skip_line_comment(void)
1003 info.had_whitespace = true;
1020 static void skip_multiline_comment(void)
1022 info.had_whitespace = true;
1024 source_position_t const start_pos = input.position;
1029 if (input.c == '*') {
1030 /* TODO: nested comment, warn here */
1035 if (input.c == '/') {
1036 if (input.position.lineno != input.output_line)
1037 info.whitespace_at_line_begin = input.position.colno;
1047 errorf(&start_pos, "at end of file while looking for comment end");
1057 static bool skip_till_newline(bool stop_at_non_whitespace)
1069 if (input.c == '/') {
1071 skip_line_comment();
1073 } else if (input.c == '*') {
1075 skip_multiline_comment();
1087 if (stop_at_non_whitespace)
1096 static void skip_whitespace(void)
1102 ++info.whitespace_at_line_begin;
1103 info.had_whitespace = true;
1108 info.at_line_begin = true;
1109 info.had_whitespace = true;
1110 info.whitespace_at_line_begin = 0;
1115 if (input.c == '/') {
1117 skip_line_comment();
1119 } else if (input.c == '*') {
1121 skip_multiline_comment();
1135 static inline void eat_pp(pp_token_kind_t const kind)
1137 assert(pp_token.base.symbol->pp_ID == kind);
1142 static inline void eat_token(token_kind_t const kind)
1144 assert(pp_token.kind == kind);
1149 static string_encoding_t identify_encoding_prefix(symbol_t *const sym)
1151 if (sym == symbol_L) return STRING_ENCODING_WIDE;
1152 if (c_mode & _C11) {
1153 if (sym == symbol_U) return STRING_ENCODING_CHAR32;
1154 if (sym == symbol_u) return STRING_ENCODING_CHAR16;
1155 if (sym == symbol_u8) return STRING_ENCODING_UTF8;
1157 return STRING_ENCODING_CHAR;
1160 static void parse_symbol(void)
1162 assert(obstack_object_size(&symbol_obstack) == 0);
1167 obstack_1grow(&symbol_obstack, (char) input.c);
1176 case 'U': n = 8; goto universal;
1177 case 'u': n = 4; goto universal;
1179 if (!resolve_escape_sequences) {
1180 obstack_1grow(&symbol_obstack, '\\');
1181 obstack_1grow(&symbol_obstack, input.c);
1184 utf32 const v = parse_universal_char(n);
1185 if (!is_universal_char_valid_identifier(v)) {
1186 if (is_universal_char_valid(v)) {
1187 errorf(&input.position,
1188 "universal character \\%c%0*X is not valid in an identifier",
1189 n == 4 ? 'u' : 'U', (int)n, v);
1191 } else if (obstack_object_size(&symbol_obstack) == 0 && is_universal_char_invalid_identifier_start(v)) {
1192 errorf(&input.position,
1193 "universal character \\%c%0*X is not valid as start of an identifier",
1194 n == 4 ? 'u' : 'U', (int)n, v);
1195 } else if (resolve_escape_sequences) {
1196 obstack_grow_utf8(&symbol_obstack, v);
1214 obstack_1grow(&symbol_obstack, '\0');
1215 char *string = obstack_finish(&symbol_obstack);
1217 symbol_t *symbol = symbol_table_insert(string);
1219 /* Might be a prefixed string or character constant: L/U/u/u8"string". */
1220 if (input.c == '"') {
1221 string_encoding_t const enc = identify_encoding_prefix(symbol);
1222 if (enc != STRING_ENCODING_CHAR) {
1223 parse_string_literal(enc);
1226 } else if (input.c == '\'') {
1227 string_encoding_t const enc = identify_encoding_prefix(symbol);
1228 if (enc != STRING_ENCODING_CHAR) {
1229 if (enc == STRING_ENCODING_UTF8) {
1230 errorf(&pp_token.base.source_position, "'u8' is not a valid encoding for a chracter constant");
1232 parse_character_constant(enc);
1237 pp_token.kind = symbol->ID;
1238 pp_token.base.symbol = symbol;
1240 /* we can free the memory from symbol obstack if we already had an entry in
1241 * the symbol table */
1242 if (symbol->string != string) {
1243 obstack_free(&symbol_obstack, string);
1247 static void parse_number(void)
1249 obstack_1grow(&symbol_obstack, (char) input.c);
1256 case SYMBOL_CASES_WITHOUT_E_P:
1257 obstack_1grow(&symbol_obstack, (char) input.c);
1265 obstack_1grow(&symbol_obstack, (char) input.c);
1267 if (input.c == '+' || input.c == '-') {
1268 obstack_1grow(&symbol_obstack, (char) input.c);
1280 pp_token.kind = T_NUMBER;
1281 pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
1284 #define MAYBE_PROLOG \
1288 #define MAYBE(ch, kind) \
1291 set_punctuator(kind); \
1294 #define MAYBE_DIGRAPH(ch, kind, symbol) \
1297 set_digraph(kind, symbol); \
1300 #define ELSE_CODE(code) \
1305 #define ELSE(kind) ELSE_CODE(set_punctuator(kind); return;)
1307 /** identifies and returns the next preprocessing token contained in the
1308 * input stream. No macro expansion is performed. */
1309 static void next_input_token(void)
1311 if (next_info.had_whitespace) {
1313 next_info.had_whitespace = false;
1315 info.at_line_begin = false;
1316 info.had_whitespace = false;
1319 pp_token.base.source_position = input.position;
1320 pp_token.base.symbol = NULL;
1325 info.whitespace_at_line_begin++;
1326 info.had_whitespace = true;
1331 info.at_line_begin = true;
1332 info.had_whitespace = true;
1333 info.whitespace_at_line_begin = 0;
1345 parse_string_literal(STRING_ENCODING_CHAR);
1349 parse_character_constant(STRING_ENCODING_CHAR);
1371 MAYBE('.', T_DOTDOTDOT)
1375 set_punctuator('.');
1381 MAYBE('&', T_ANDAND)
1382 MAYBE('=', T_ANDEQUAL)
1386 MAYBE('=', T_ASTERISKEQUAL)
1390 MAYBE('+', T_PLUSPLUS)
1391 MAYBE('=', T_PLUSEQUAL)
1395 MAYBE('>', T_MINUSGREATER)
1396 MAYBE('-', T_MINUSMINUS)
1397 MAYBE('=', T_MINUSEQUAL)
1401 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
1405 MAYBE('=', T_SLASHEQUAL)
1408 skip_multiline_comment();
1412 skip_line_comment();
1417 MAYBE_DIGRAPH('>', '}', symbol_percentgreater)
1418 MAYBE('=', T_PERCENTEQUAL)
1423 MAYBE_DIGRAPH(':', T_HASHHASH, symbol_percentcolonpercentcolon)
1427 goto digraph_percentcolon;
1430 digraph_percentcolon:
1431 set_digraph('#', symbol_percentcolon);
1437 MAYBE_DIGRAPH(':', '[', symbol_lesscolon)
1438 MAYBE_DIGRAPH('%', '{', symbol_lesspercent)
1439 MAYBE('=', T_LESSEQUAL)
1442 MAYBE('=', T_LESSLESSEQUAL)
1447 MAYBE('=', T_GREATEREQUAL)
1450 MAYBE('=', T_GREATERGREATEREQUAL)
1451 ELSE(T_GREATERGREATER)
1455 MAYBE('=', T_CARETEQUAL)
1459 MAYBE('=', T_PIPEEQUAL)
1460 MAYBE('|', T_PIPEPIPE)
1464 MAYBE_DIGRAPH('>', ']', symbol_colongreater)
1466 if (c_mode & _CXX) {
1468 set_punctuator(T_COLONCOLON);
1475 MAYBE('=', T_EQUALEQUAL)
1479 MAYBE('#', T_HASHHASH)
1492 set_punctuator(input.c);
1497 if (input_stack != NULL) {
1498 fclose(close_pp_input());
1499 pop_restore_input();
1502 if (input.c == (utf32)EOF)
1503 --input.position.lineno;
1504 print_line_directive(&input.position, "2");
1507 info.at_line_begin = true;
1508 set_punctuator(T_EOF);
1514 int next_c = input.c;
1517 if (next_c == 'U' || next_c == 'u') {
1524 if (error_on_unknown_chars) {
1525 errorf(&pp_token.base.source_position, "unknown character '%lc' found", input.c);
1529 assert(obstack_object_size(&symbol_obstack) == 0);
1530 obstack_grow_utf8(&symbol_obstack, input.c);
1531 obstack_1grow(&symbol_obstack, '\0');
1532 char *const string = obstack_finish(&symbol_obstack);
1533 symbol_t *const symbol = symbol_table_insert(string);
1534 if (symbol->string != string)
1535 obstack_free(&symbol_obstack, string);
1537 pp_token.kind = T_UNKNOWN_CHAR;
1538 pp_token.base.symbol = symbol;
1545 static void print_quoted_string(const char *const string)
1548 for (const char *c = string; *c != 0; ++c) {
1550 case '"': fputs("\\\"", out); break;
1551 case '\\': fputs("\\\\", out); break;
1552 case '\a': fputs("\\a", out); break;
1553 case '\b': fputs("\\b", out); break;
1554 case '\f': fputs("\\f", out); break;
1555 case '\n': fputs("\\n", out); break;
1556 case '\r': fputs("\\r", out); break;
1557 case '\t': fputs("\\t", out); break;
1558 case '\v': fputs("\\v", out); break;
1559 case '\?': fputs("\\?", out); break;
1562 fprintf(out, "\\%03o", (unsigned)*c);
1572 static void print_line_directive(const source_position_t *pos, const char *add)
1577 fprintf(out, "# %u ", pos->lineno);
1578 print_quoted_string(pos->input_name);
1583 if (pos->is_system_header) {
1587 printed_input_name = pos->input_name;
1588 input.output_line = pos->lineno-1;
1591 static bool emit_newlines(void)
1596 unsigned delta = pp_token.base.source_position.lineno - input.output_line;
1602 print_line_directive(&pp_token.base.source_position, NULL);
1605 for (unsigned i = 0; i < delta; ++i) {
1609 input.output_line = pp_token.base.source_position.lineno;
1611 unsigned whitespace = info.whitespace_at_line_begin;
1612 /* make sure there is at least 1 whitespace before a (macro-expanded)
1613 * '#' at line begin. I'm not sure why this is good, but gcc does it. */
1614 if (pp_token.kind == '#' && whitespace == 0)
1616 for (unsigned i = 0; i < whitespace; ++i)
1622 void set_preprocessor_output(FILE *output)
1626 error_on_unknown_chars = false;
1627 resolve_escape_sequences = false;
1629 error_on_unknown_chars = true;
1630 resolve_escape_sequences = true;
1634 void emit_pp_token(void)
1636 if (!emit_newlines() &&
1637 (info.had_whitespace || tokens_would_paste(last_token, pp_token.kind)))
1640 switch (pp_token.kind) {
1642 fputs(pp_token.literal.string.begin, out);
1645 case T_STRING_LITERAL:
1646 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1648 fputs(pp_token.literal.string.begin, out);
1652 case T_CHARACTER_CONSTANT:
1653 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1655 fputs(pp_token.literal.string.begin, out);
1659 case T_MACRO_PARAMETER:
1660 panic("macro parameter not expanded");
1663 fputs(pp_token.base.symbol->string, out);
1666 last_token = pp_token.kind;
1669 static void eat_pp_directive(void)
1671 while (!info.at_line_begin) {
1676 static bool strings_equal(const string_t *string1, const string_t *string2)
1678 size_t size = string1->size;
1679 if (size != string2->size)
1682 const char *c1 = string1->begin;
1683 const char *c2 = string2->begin;
1684 for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1691 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1693 if (token1->kind != token2->kind)
1696 switch (token1->kind) {
1698 case T_CHARACTER_CONSTANT:
1699 case T_STRING_LITERAL:
1700 return strings_equal(&token1->literal.string, &token2->literal.string);
1702 case T_MACRO_PARAMETER:
1703 return token1->macro_parameter.def->symbol
1704 == token2->macro_parameter.def->symbol;
1707 return token1->base.symbol == token2->base.symbol;
1711 static bool pp_definitions_equal(const pp_definition_t *definition1,
1712 const pp_definition_t *definition2)
1714 if (definition1->list_len != definition2->list_len)
1717 size_t len = definition1->list_len;
1718 const saved_token_t *t1 = definition1->token_list;
1719 const saved_token_t *t2 = definition2->token_list;
1720 for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1721 if (!pp_tokens_equal(&t1->token, &t2->token))
1723 if (t1->had_whitespace != t2->had_whitespace)
1729 static void missing_macro_param_error(void)
1731 errorf(&pp_token.base.source_position,
1732 "'#' is not followed by a macro parameter");
1735 static bool is_defineable_token(char const *const context)
1737 if (info.at_line_begin) {
1738 errorf(&pp_token.base.source_position, "unexpected end of line after %s", context);
1741 symbol_t *const symbol = pp_token.base.symbol;
1745 if (pp_token.kind != T_IDENTIFIER) {
1746 switch (symbol->string[0]) {
1753 errorf(&pp_token.base.source_position, "expected identifier after %s, got %K", context, &pp_token);
1758 /* TODO turn this into a flag in pp_def. */
1759 switch (symbol->pp_ID) {
1762 errorf(&pp_token.base.source_position, "%K cannot be used as macro name in %s", &pp_token, context);
1770 static void parse_define_directive(void)
1778 assert(obstack_object_size(&pp_obstack) == 0);
1780 if (!is_defineable_token("#define"))
1782 symbol_t *const symbol = pp_token.base.symbol;
1784 pp_definition_t *new_definition
1785 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1786 memset(new_definition, 0, sizeof(new_definition[0]));
1787 new_definition->symbol = symbol;
1788 new_definition->source_position = input.position;
1790 /* this is probably the only place where spaces are significant in the
1791 * lexer (except for the fact that they separate tokens). #define b(x)
1792 * is something else than #define b (x) */
1793 if (input.c == '(') {
1798 switch (pp_token.kind) {
1800 new_definition->is_variadic = true;
1801 eat_token(T_DOTDOTDOT);
1802 if (pp_token.kind != ')') {
1803 errorf(&input.position,
1804 "'...' not at end of macro argument list");
1809 case T_IDENTIFIER: {
1810 pp_definition_t parameter;
1811 memset(¶meter, 0, sizeof(parameter));
1812 parameter.source_position = pp_token.base.source_position;
1813 parameter.symbol = pp_token.base.symbol;
1814 parameter.is_parameter = true;
1815 obstack_grow(&pp_obstack, ¶meter, sizeof(parameter));
1816 eat_token(T_IDENTIFIER);
1818 if (pp_token.kind == ',') {
1823 if (pp_token.kind != ')') {
1824 errorf(&pp_token.base.source_position,
1825 "expected ',' or ')' after identifier, got %K",
1834 goto finish_argument_list;
1837 errorf(&pp_token.base.source_position,
1838 "expected identifier, '...' or ')' in #define argument list, got %K",
1844 finish_argument_list:
1845 new_definition->has_parameters = true;
1846 size_t size = obstack_object_size(&pp_obstack);
1847 new_definition->n_parameters
1848 = size / sizeof(new_definition->parameters[0]);
1849 new_definition->parameters = obstack_finish(&pp_obstack);
1850 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1851 pp_definition_t *param = &new_definition->parameters[i];
1852 symbol_t *symbol = param->symbol;
1853 pp_definition_t *previous = symbol->pp_definition;
1854 if (previous != NULL
1855 && previous->function_definition == new_definition) {
1856 errorf(¶m->source_position,
1857 "duplicate macro parameter '%Y'", symbol);
1858 param->symbol = sym_anonymous;
1861 param->parent_expansion = previous;
1862 param->function_definition = new_definition;
1863 symbol->pp_definition = param;
1869 /* construct token list */
1870 assert(obstack_object_size(&pp_obstack) == 0);
1871 bool next_must_be_param = false;
1872 while (!info.at_line_begin) {
1873 if (pp_token.kind == T_IDENTIFIER) {
1874 const symbol_t *symbol = pp_token.base.symbol;
1875 pp_definition_t *definition = symbol->pp_definition;
1876 if (definition != NULL
1877 && definition->function_definition == new_definition) {
1878 pp_token.kind = T_MACRO_PARAMETER;
1879 pp_token.macro_parameter.def = definition;
1882 if (next_must_be_param && pp_token.kind != T_MACRO_PARAMETER) {
1883 missing_macro_param_error();
1885 saved_token_t saved_token;
1886 saved_token.token = pp_token;
1887 saved_token.had_whitespace = info.had_whitespace;
1888 obstack_grow(&pp_obstack, &saved_token, sizeof(saved_token));
1890 = new_definition->has_parameters && pp_token.kind == '#';
1893 if (next_must_be_param)
1894 missing_macro_param_error();
1896 new_definition->list_len = obstack_object_size(&pp_obstack)
1897 / sizeof(new_definition->token_list[0]);
1898 new_definition->token_list = obstack_finish(&pp_obstack);
1900 if (new_definition->has_parameters) {
1901 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1902 pp_definition_t *param = &new_definition->parameters[i];
1903 symbol_t *symbol = param->symbol;
1904 if (symbol == sym_anonymous)
1906 assert(symbol->pp_definition == param);
1907 assert(param->function_definition == new_definition);
1908 symbol->pp_definition = param->parent_expansion;
1909 param->parent_expansion = NULL;
1913 pp_definition_t *old_definition = symbol->pp_definition;
1914 if (old_definition != NULL) {
1915 if (!pp_definitions_equal(old_definition, new_definition)) {
1916 warningf(WARN_OTHER, &input.position, "multiple definition of macro '%Y' (first defined %P)", symbol, &old_definition->source_position);
1918 /* reuse the old definition */
1919 obstack_free(&pp_obstack, new_definition);
1920 new_definition = old_definition;
1924 symbol->pp_definition = new_definition;
1928 if (obstack_object_size(&pp_obstack) > 0) {
1929 char *ptr = obstack_finish(&pp_obstack);
1930 obstack_free(&pp_obstack, ptr);
1935 static void parse_undef_directive(void)
1943 if (!is_defineable_token("#undef")) {
1948 pp_token.base.symbol->pp_definition = NULL;
1951 if (!info.at_line_begin) {
1952 warningf(WARN_OTHER, &input.position, "extra tokens at end of #undef directive");
1957 /** behind an #include we can have the special headername lexems.
1958 * They're only allowed behind an #include so they're not recognized
1959 * by the normal next_preprocessing_token. We handle them as a special
1961 static const char *parse_headername(bool *system_include)
1963 if (info.at_line_begin) {
1964 parse_error("expected headername after #include");
1968 /* check wether we have a "... or <... headername */
1969 source_position_t position = input.position;
1973 case '<': delimiter = '>'; *system_include = true; goto parse_name;
1974 case '"': delimiter = '"'; *system_include = false; goto parse_name;
1976 assert(obstack_object_size(&symbol_obstack) == 0);
1983 char *dummy = obstack_finish(&symbol_obstack);
1984 obstack_free(&symbol_obstack, dummy);
1986 errorf(&pp_token.base.source_position,
1987 "header name without closing '%c'", (char)delimiter);
1991 if (input.c == delimiter) {
1993 goto finish_headername;
1995 obstack_1grow(&symbol_obstack, (char)input.c);
2001 /* we should never be here */
2005 next_preprocessing_token();
2006 if (info.at_line_begin) {
2007 /* TODO: if we are already in the new line then we parsed more than
2008 * wanted. We reuse the token, but could produce following errors
2009 * misbehaviours... */
2010 goto error_invalid_input;
2012 if (pp_token.kind == T_STRING_LITERAL) {
2013 *system_include = false;
2014 return pp_token.literal.string.begin;
2015 } else if (pp_token.kind == '<') {
2016 *system_include = true;
2017 assert(obstack_object_size(&pp_obstack) == 0);
2019 next_preprocessing_token();
2020 if (info.at_line_begin) {
2021 /* TODO: we shouldn't have parsed/expanded something on the
2022 * next line yet... */
2023 char *dummy = obstack_finish(&pp_obstack);
2024 obstack_free(&pp_obstack, dummy);
2025 goto error_invalid_input;
2027 if (pp_token.kind == '>')
2030 saved_token_t saved;
2031 saved.token = pp_token;
2032 saved.had_whitespace = info.had_whitespace;
2033 obstack_grow(&pp_obstack, &saved, sizeof(saved));
2035 size_t size = obstack_object_size(&pp_obstack);
2036 assert(size % sizeof(saved_token_t) == 0);
2037 size_t n_tokens = size / sizeof(saved_token_t);
2038 saved_token_t *tokens = obstack_finish(&pp_obstack);
2039 assert(obstack_object_size(&symbol_obstack) == 0);
2040 for (size_t i = 0; i < n_tokens; ++i) {
2041 const saved_token_t *saved = &tokens[i];
2042 if (i > 0 && saved->had_whitespace)
2043 obstack_1grow(&symbol_obstack, ' ');
2044 grow_token(&symbol_obstack, &saved->token);
2046 obstack_free(&pp_obstack, tokens);
2047 goto finish_headername;
2049 error_invalid_input:
2051 char *dummy = obstack_finish(&symbol_obstack);
2052 obstack_free(&symbol_obstack, dummy);
2055 errorf(&pp_token.base.source_position,
2056 "expected \"FILENAME\" or <FILENAME> after #include");
2062 obstack_1grow(&symbol_obstack, '\0');
2063 char *const headername = obstack_finish(&symbol_obstack);
2064 const char *identified = identify_string(headername);
2065 pp_token.base.source_position = position;
2069 static bool do_include(bool const bracket_include, bool const include_next, char const *const headername)
2071 size_t const headername_len = strlen(headername);
2072 searchpath_entry_t *entry;
2074 entry = input.path ? input.path->next
2075 : bracket_include ? bracket_searchpath.first
2076 : quote_searchpath.first;
2078 if (!bracket_include) {
2079 /* put dirname of current input on obstack */
2080 const char *filename = input.position.input_name;
2081 const char *last_slash = strrchr(filename, '/');
2082 const char *full_name;
2083 if (last_slash != NULL) {
2084 size_t len = last_slash - filename;
2085 obstack_grow(&symbol_obstack, filename, len + 1);
2086 obstack_grow0(&symbol_obstack, headername, headername_len);
2087 char *complete_path = obstack_finish(&symbol_obstack);
2088 full_name = identify_string(complete_path);
2090 full_name = headername;
2093 FILE *file = fopen(full_name, "r");
2095 switch_pp_input(file, full_name, NULL, false);
2098 entry = quote_searchpath.first;
2100 entry = bracket_searchpath.first;
2104 assert(obstack_object_size(&symbol_obstack) == 0);
2105 /* check searchpath */
2106 for (; entry; entry = entry->next) {
2107 const char *path = entry->path;
2108 size_t len = strlen(path);
2109 obstack_grow(&symbol_obstack, path, len);
2110 if (path[len-1] != '/')
2111 obstack_1grow(&symbol_obstack, '/');
2112 obstack_grow(&symbol_obstack, headername, headername_len+1);
2114 char *complete_path = obstack_finish(&symbol_obstack);
2115 FILE *file = fopen(complete_path, "r");
2117 const char *filename = identify_string(complete_path);
2118 switch_pp_input(file, filename, entry, entry->is_system_path);
2121 obstack_free(&symbol_obstack, complete_path);
2128 static void parse_include_directive(bool const include_next)
2135 /* do not eat the TP_include, since it would already parse the next token
2136 * which needs special handling here. */
2137 skip_till_newline(true);
2138 bool system_include;
2139 const char *headername = parse_headername(&system_include);
2140 if (headername == NULL) {
2145 bool had_nonwhitespace = skip_till_newline(false);
2146 if (had_nonwhitespace) {
2147 warningf(WARN_OTHER, &input.position,
2148 "extra tokens at end of #include directive");
2151 if (n_inputs > INCLUDE_LIMIT) {
2152 errorf(&pp_token.base.source_position, "#include nested too deeply");
2159 info.whitespace_at_line_begin = 0;
2160 info.had_whitespace = false;
2161 info.at_line_begin = true;
2164 bool res = do_include(system_include, include_next, headername);
2168 errorf(&pp_token.base.source_position, "failed including '%s': %s", headername, strerror(errno));
2169 pop_restore_input();
2173 static pp_conditional_t *push_conditional(void)
2175 pp_conditional_t *conditional
2176 = obstack_alloc(&pp_obstack, sizeof(*conditional));
2177 memset(conditional, 0, sizeof(*conditional));
2179 conditional->parent = conditional_stack;
2180 conditional_stack = conditional;
2185 static void pop_conditional(void)
2187 assert(conditional_stack != NULL);
2188 conditional_stack = conditional_stack->parent;
2191 void check_unclosed_conditionals(void)
2193 while (conditional_stack != NULL) {
2194 pp_conditional_t *conditional = conditional_stack;
2196 if (conditional->in_else) {
2197 errorf(&conditional->source_position, "unterminated #else");
2199 errorf(&conditional->source_position, "unterminated condition");
2205 static void parse_ifdef_ifndef_directive(bool const is_ifdef)
2208 eat_pp(is_ifdef ? TP_ifdef : TP_ifndef);
2212 pp_conditional_t *conditional = push_conditional();
2213 conditional->source_position = pp_token.base.source_position;
2214 conditional->skip = true;
2218 if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
2219 errorf(&pp_token.base.source_position,
2220 "expected identifier after #%s, got %K",
2221 is_ifdef ? "ifdef" : "ifndef", &pp_token);
2224 /* just take the true case in the hope to avoid further errors */
2227 /* evaluate wether we are in true or false case */
2228 condition = (bool)pp_token.base.symbol->pp_definition == is_ifdef;
2229 eat_token(T_IDENTIFIER);
2231 if (!info.at_line_begin) {
2232 errorf(&pp_token.base.source_position,
2233 "extra tokens at end of #%s",
2234 is_ifdef ? "ifdef" : "ifndef");
2239 pp_conditional_t *conditional = push_conditional();
2240 conditional->source_position = pp_token.base.source_position;
2241 conditional->condition = condition;
2248 static void parse_else_directive(void)
2252 if (!info.at_line_begin) {
2254 warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #else");
2259 pp_conditional_t *conditional = conditional_stack;
2260 if (conditional == NULL) {
2261 errorf(&pp_token.base.source_position, "#else without prior #if");
2265 if (conditional->in_else) {
2266 errorf(&pp_token.base.source_position,
2267 "#else after #else (condition started %P)",
2268 &conditional->source_position);
2273 conditional->in_else = true;
2274 if (!conditional->skip) {
2275 skip_mode = conditional->condition;
2277 conditional->source_position = pp_token.base.source_position;
2280 static void parse_endif_directive(void)
2284 if (!info.at_line_begin) {
2286 warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #endif");
2291 pp_conditional_t *conditional = conditional_stack;
2292 if (conditional == NULL) {
2293 errorf(&pp_token.base.source_position, "#endif without prior #if");
2297 if (!conditional->skip) {
2303 typedef enum stdc_pragma_kind_t {
2307 STDC_CX_LIMITED_RANGE
2308 } stdc_pragma_kind_t;
2310 typedef enum stdc_pragma_value_kind_t {
2315 } stdc_pragma_value_kind_t;
2317 static void parse_pragma_directive(void)
2325 if (pp_token.kind != T_IDENTIFIER) {
2326 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2327 "expected identifier after #pragma");
2332 stdc_pragma_kind_t kind = STDC_UNKNOWN;
2333 if (pp_token.base.symbol->pp_ID == TP_STDC && c_mode & _C99) {
2337 switch (pp_token.base.symbol->pp_ID) {
2338 case TP_FP_CONTRACT: kind = STDC_FP_CONTRACT; break;
2339 case TP_FENV_ACCESS: kind = STDC_FENV_ACCESS; break;
2340 case TP_CX_LIMITED_RANGE: kind = STDC_CX_LIMITED_RANGE; break;
2343 if (kind != STDC_UNKNOWN) {
2345 stdc_pragma_value_kind_t value;
2346 switch (pp_token.base.symbol->pp_ID) {
2347 case TP_ON: value = STDC_VALUE_ON; break;
2348 case TP_OFF: value = STDC_VALUE_OFF; break;
2349 case TP_DEFAULT: value = STDC_VALUE_DEFAULT; break;
2350 default: value = STDC_VALUE_UNKNOWN; break;
2352 if (value == STDC_VALUE_UNKNOWN) {
2353 kind = STDC_UNKNOWN;
2354 errorf(&pp_token.base.source_position, "bad STDC pragma argument");
2359 if (kind == STDC_UNKNOWN) {
2360 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2361 "encountered unknown #pragma");
2365 static void parse_line_directive(void)
2367 if (pp_token.kind != T_NUMBER) {
2369 parse_error("expected integer");
2372 long const line = strtol(pp_token.literal.string.begin, &end, 0);
2374 /* use offset -1 as this is about the next line */
2375 input.position.lineno = line - 1;
2376 /* force output of line */
2377 input.output_line = input.position.lineno - 20;
2380 errorf(&input.position, "'%S' is not a valid line number",
2381 &pp_token.literal.string);
2385 if (info.at_line_begin)
2388 if (pp_token.kind == T_STRING_LITERAL
2389 && pp_token.literal.string.encoding == STRING_ENCODING_CHAR) {
2390 input.position.input_name = pp_token.literal.string.begin;
2391 input.position.is_system_header = false;
2394 /* attempt to parse numeric flags as outputted by gcc preprocessor */
2395 while (!info.at_line_begin && pp_token.kind == T_NUMBER) {
2397 * 1 - indicates start of a new file
2398 * 2 - indicates return from a file
2399 * 3 - indicates system header
2400 * 4 - indicates implicit extern "C" in C++ mode
2402 * currently we're only interested in "3"
2404 if (streq(pp_token.literal.string.begin, "3")) {
2405 input.position.is_system_header = true;
2414 static void parse_error_directive(void)
2421 bool const old_resolve_escape_sequences = resolve_escape_sequences;
2422 resolve_escape_sequences = false;
2424 source_position_t const pos = pp_token.base.source_position;
2426 if (info.had_whitespace && obstack_object_size(&pp_obstack) != 0)
2427 obstack_1grow(&pp_obstack, ' ');
2429 switch (pp_token.kind) {
2431 string_t const *const str = &pp_token.literal.string;
2432 obstack_grow(&pp_obstack, str->begin, str->size);
2438 case T_STRING_LITERAL: delim = '"'; goto string;
2439 case T_CHARACTER_CONSTANT: delim = '\''; goto string;
2441 string_t const *const str = &pp_token.literal.string;
2442 char const *const enc = get_string_encoding_prefix(str->encoding);
2443 obstack_printf(&pp_obstack, "%s%c%s%c", enc, delim, str->begin, delim);
2448 char const *const str = pp_token.base.symbol->string;
2449 obstack_grow(&pp_obstack, str, strlen(str));
2455 } while (!info.at_line_begin);
2457 resolve_escape_sequences = old_resolve_escape_sequences;
2459 obstack_1grow(&pp_obstack, '\0');
2460 char *const str = obstack_finish(&pp_obstack);
2461 errorf(&pos, "#%s", str);
2462 obstack_free(&pp_obstack, str);
2465 static void parse_preprocessing_directive(void)
2469 if (info.at_line_begin) {
2470 /* empty directive */
2474 if (pp_token.base.symbol) {
2475 switch (pp_token.base.symbol->pp_ID) {
2476 case TP_define: parse_define_directive(); break;
2477 case TP_else: parse_else_directive(); break;
2478 case TP_endif: parse_endif_directive(); break;
2479 case TP_error: parse_error_directive(); break;
2480 case TP_ifdef: parse_ifdef_ifndef_directive(true); break;
2481 case TP_ifndef: parse_ifdef_ifndef_directive(false); break;
2482 case TP_include: parse_include_directive(false); break;
2483 case TP_include_next: parse_include_directive(true); break;
2484 case TP_line: next_input_token(); goto line_directive;
2485 case TP_pragma: parse_pragma_directive(); break;
2486 case TP_undef: parse_undef_directive(); break;
2489 } else if (pp_token.kind == T_NUMBER) {
2491 parse_line_directive();
2495 errorf(&pp_token.base.source_position, "invalid preprocessing directive #%K", &pp_token);
2500 assert(info.at_line_begin);
2503 static void finish_current_argument(void)
2505 if (current_argument == NULL)
2507 size_t size = obstack_object_size(&pp_obstack);
2508 current_argument->list_len = size/sizeof(current_argument->token_list[0]);
2509 current_argument->token_list = obstack_finish(&pp_obstack);
2512 void next_preprocessing_token(void)
2515 if (!expand_next()) {
2518 while (pp_token.kind == '#' && info.at_line_begin) {
2519 parse_preprocessing_directive();
2521 } while (skip_mode && pp_token.kind != T_EOF);
2524 const token_kind_t kind = pp_token.kind;
2525 if (current_call == NULL || argument_expanding != NULL) {
2526 symbol_t *const symbol = pp_token.base.symbol;
2528 if (kind == T_MACRO_PARAMETER) {
2529 assert(current_expansion != NULL);
2530 start_expanding(pp_token.macro_parameter.def);
2534 pp_definition_t *const pp_definition = symbol->pp_definition;
2535 if (pp_definition != NULL && !pp_definition->is_expanding) {
2536 if (pp_definition->has_parameters) {
2538 /* check if next token is a '(' */
2539 whitespace_info_t old_info = info;
2540 token_kind_t next_token = peek_expansion();
2541 if (next_token == T_EOF) {
2542 info.at_line_begin = false;
2543 info.had_whitespace = false;
2545 if (input.c == '(') {
2550 if (next_token == '(') {
2551 if (current_expansion == NULL)
2552 expansion_pos = pp_token.base.source_position;
2553 next_preprocessing_token();
2554 assert(pp_token.kind == '(');
2556 pp_definition->parent_expansion = current_expansion;
2557 current_call = pp_definition;
2558 current_call->expand_pos = 0;
2559 current_call->expand_info = old_info;
2560 if (current_call->n_parameters > 0) {
2561 current_argument = ¤t_call->parameters[0];
2562 assert(argument_brace_count == 0);
2566 /* skip_whitespaces() skipped newlines and whitespace,
2567 * remember results for next token */
2573 if (current_expansion == NULL)
2574 expansion_pos = pp_token.base.source_position;
2575 start_expanding(pp_definition);
2582 if (current_call != NULL) {
2583 /* current_call != NULL */
2585 ++argument_brace_count;
2586 } else if (kind == ')') {
2587 if (argument_brace_count > 0) {
2588 --argument_brace_count;
2590 finish_current_argument();
2591 assert(kind == ')');
2592 start_expanding(current_call);
2593 info = current_call->expand_info;
2594 current_call = NULL;
2595 current_argument = NULL;
2598 } else if (kind == ',' && argument_brace_count == 0) {
2599 finish_current_argument();
2600 current_call->expand_pos++;
2601 if (current_call->expand_pos >= current_call->n_parameters) {
2602 errorf(&pp_token.base.source_position,
2603 "too many arguments passed for macro '%Y'",
2604 current_call->symbol);
2605 current_argument = NULL;
2608 = ¤t_call->parameters[current_call->expand_pos];
2611 } else if (kind == T_MACRO_PARAMETER) {
2612 /* parameters have to be fully expanded before being used as
2613 * parameters for another macro-call */
2614 assert(current_expansion != NULL);
2615 pp_definition_t *argument = pp_token.macro_parameter.def;
2616 argument_expanding = argument;
2617 start_expanding(argument);
2619 } else if (kind == T_EOF) {
2620 errorf(&expansion_pos,
2621 "reached end of file while parsing arguments for '%Y'",
2622 current_call->symbol);
2625 if (current_argument != NULL) {
2626 saved_token_t saved;
2627 saved.token = pp_token;
2628 saved.had_whitespace = info.had_whitespace;
2629 obstack_grow(&pp_obstack, &saved, sizeof(saved));
2635 void append_include_path(searchpath_t *paths, const char *path)
2637 searchpath_entry_t *entry = OALLOCZ(&config_obstack, searchpath_entry_t);
2639 entry->is_system_path = paths->is_system_path;
2641 *paths->anchor = entry;
2642 paths->anchor = &entry->next;
2645 static void append_env_paths(searchpath_t *paths, const char *envvar)
2647 const char *val = getenv(envvar);
2648 if (val != NULL && *val != '\0') {
2649 const char *begin = val;
2653 while (*c != '\0' && *c != ':')
2656 size_t len = c-begin;
2658 /* use "." for gcc compatibility (Matze: I would expect that
2659 * nothing happens for an empty entry...) */
2660 append_include_path(paths, ".");
2662 char *const string = obstack_copy0(&config_obstack, begin, len);
2663 append_include_path(paths, string);
2670 } while(*c != '\0');
2674 static void append_searchpath(searchpath_t *path, const searchpath_t *append)
2676 *path->anchor = append->first;
2679 static void setup_include_path(void)
2681 /* built-in paths */
2682 append_include_path(&system_searchpath, "/usr/include");
2684 /* parse environment variable */
2685 append_env_paths(&bracket_searchpath, "CPATH");
2686 append_env_paths(&system_searchpath,
2687 c_mode & _CXX ? "CPLUS_INCLUDE_PATH" : "C_INCLUDE_PATH");
2689 /* append system search path to bracket searchpath */
2690 append_searchpath(&system_searchpath, &after_searchpath);
2691 append_searchpath(&bracket_searchpath, &system_searchpath);
2692 append_searchpath("e_searchpath, &bracket_searchpath);
2695 static void input_error(unsigned const delta_lines, unsigned const delta_cols, char const *const message)
2697 source_position_t pos = pp_token.base.source_position;
2698 pos.lineno += delta_lines;
2699 pos.colno += delta_cols;
2700 errorf(&pos, "%s", message);
2703 void init_include_paths(void)
2705 obstack_init(&config_obstack);
2708 void init_preprocessor(void)
2712 obstack_init(&pp_obstack);
2713 obstack_init(&input_obstack);
2714 strset_init(&stringset);
2716 setup_include_path();
2718 set_input_error_callback(input_error);
2721 void exit_preprocessor(void)
2723 obstack_free(&input_obstack, NULL);
2724 obstack_free(&pp_obstack, NULL);
2725 obstack_free(&config_obstack, NULL);
2727 strset_destroy(&stringset);
2730 int pptest_main(int argc, char **argv);
2731 int pptest_main(int argc, char **argv)
2733 init_symbol_table();
2734 init_include_paths();
2735 init_preprocessor();
2738 error_on_unknown_chars = false;
2739 resolve_escape_sequences = false;
2741 /* simplistic commandline parser */
2742 const char *filename = NULL;
2743 const char *output = NULL;
2744 for (int i = 1; i < argc; ++i) {
2745 const char *opt = argv[i];
2746 if (streq(opt, "-I")) {
2747 append_include_path(&bracket_searchpath, argv[++i]);
2749 } else if (streq(opt, "-E")) {
2751 } else if (streq(opt, "-o")) {
2754 } else if (opt[0] == '-') {
2755 fprintf(stderr, "Unknown option '%s'\n", opt);
2757 if (filename != NULL)
2758 fprintf(stderr, "Multiple inputs not supported\n");
2762 if (filename == NULL) {
2763 fprintf(stderr, "No input specified\n");
2767 if (output == NULL) {
2770 out = fopen(output, "w");
2772 fprintf(stderr, "Couldn't open output '%s'\n", output);
2777 /* just here for gcc compatibility */
2778 fprintf(out, "# 1 \"%s\"\n", filename);
2779 fprintf(out, "# 1 \"<built-in>\"\n");
2780 fprintf(out, "# 1 \"<command-line>\"\n");
2782 FILE *file = fopen(filename, "r");
2784 fprintf(stderr, "Couldn't open input '%s'\n", filename);
2787 switch_pp_input(file, filename, NULL, false);
2790 next_preprocessing_token();
2791 if (pp_token.kind == T_EOF)
2797 check_unclosed_conditionals();
2798 fclose(close_pp_input());
2803 exit_preprocessor();
2804 exit_symbol_table();