9 #include "preprocessor.h"
13 #include "adt/error.h"
14 #include "adt/strutil.h"
15 #include "adt/strset.h"
16 #include "lang_features.h"
17 #include "diagnostic.h"
18 #include "string_rep.h"
22 #define INCLUDE_LIMIT 199 /* 199 is for gcc "compatibility" */
24 typedef struct saved_token_t {
29 typedef struct whitespace_info_t {
30 /** current token had whitespace in front of it */
32 /** current token is at the beginning of a line.
33 * => a "#" at line begin starts a preprocessing directive. */
35 /** number of spaces before the first token in a line */
36 unsigned whitespace_at_line_begin;
39 struct pp_definition_t {
41 source_position_t source_position;
42 pp_definition_t *parent_expansion;
44 whitespace_info_t expand_info;
46 bool is_expanding : 1;
47 bool has_parameters : 1;
48 bool is_parameter : 1;
49 pp_definition_t *function_definition;
51 pp_definition_t *parameters;
55 saved_token_t *token_list;
58 typedef struct pp_conditional_t pp_conditional_t;
59 struct pp_conditional_t {
60 source_position_t source_position;
63 /** conditional in skip mode (then+else gets skipped) */
65 pp_conditional_t *parent;
68 typedef struct pp_input_t pp_input_t;
73 utf32 buf[1024+MAX_PUTBACK];
76 source_position_t position;
79 searchpath_entry_t *path;
82 struct searchpath_entry_t {
84 searchpath_entry_t *next;
88 static pp_input_t input;
90 static pp_input_t *input_stack;
91 static unsigned n_inputs;
92 static struct obstack input_obstack;
94 static pp_conditional_t *conditional_stack;
97 bool allow_dollar_in_symbol = true;
98 static bool resolve_escape_sequences = true;
99 static bool error_on_unknown_chars = true;
100 static bool skip_mode;
102 static struct obstack pp_obstack;
103 static struct obstack config_obstack;
104 static const char *printed_input_name = NULL;
105 static source_position_t expansion_pos;
106 static pp_definition_t *current_expansion = NULL;
107 static pp_definition_t *current_call = NULL;
108 static pp_definition_t *current_argument = NULL;
109 static pp_definition_t *argument_expanding = NULL;
110 static unsigned argument_brace_count;
111 static strset_t stringset;
112 static token_kind_t last_token;
114 struct searchpath_t {
115 searchpath_entry_t *first;
116 searchpath_entry_t **anchor;
120 searchpath_t bracket_searchpath = { NULL, &bracket_searchpath.first, false };
121 searchpath_t quote_searchpath = { NULL, "e_searchpath.first, false };
122 searchpath_t system_searchpath = { NULL, &system_searchpath.first, true };
123 searchpath_t after_searchpath = { NULL, &after_searchpath.first, true };
125 static whitespace_info_t next_info; /* valid if had_whitespace is true */
126 static whitespace_info_t info;
128 static inline void next_char(void);
129 static void next_input_token(void);
130 static void print_line_directive(const source_position_t *pos, const char *add);
132 static symbol_t *symbol_colongreater;
133 static symbol_t *symbol_lesscolon;
134 static symbol_t *symbol_lesspercent;
135 static symbol_t *symbol_percentcolon;
136 static symbol_t *symbol_percentcolonpercentcolon;
137 static symbol_t *symbol_percentgreater;
139 static symbol_t *symbol_L;
140 static symbol_t *symbol_U;
141 static symbol_t *symbol_u;
142 static symbol_t *symbol_u8;
144 static void init_symbols(void)
146 symbol_colongreater = symbol_table_insert(":>");
147 symbol_lesscolon = symbol_table_insert("<:");
148 symbol_lesspercent = symbol_table_insert("<%");
149 symbol_percentcolon = symbol_table_insert("%:");
150 symbol_percentcolonpercentcolon = symbol_table_insert("%:%:");
151 symbol_percentgreater = symbol_table_insert("%>");
153 symbol_L = symbol_table_insert("L");
154 symbol_U = symbol_table_insert("U");
155 symbol_u = symbol_table_insert("u");
156 symbol_u8 = symbol_table_insert("u8");
159 void switch_pp_input(FILE *const file, char const *const filename, searchpath_entry_t *const path, bool const is_system_header)
162 input.input = input_from_stream(file, NULL);
165 input.output_line = 0;
166 input.position.input_name = filename;
167 input.position.lineno = 1;
168 input.position.is_system_header = is_system_header;
171 /* indicate that we're at a new input */
172 print_line_directive(&input.position, input_stack != NULL ? "1" : NULL);
174 /* place a virtual '\n' so we realize we're at line begin */
175 input.position.lineno = 0;
179 FILE *close_pp_input(void)
181 input_free(input.input);
183 FILE* const file = input.file;
195 static void push_input(void)
197 pp_input_t *const saved_input = obstack_copy(&input_obstack, &input, sizeof(input));
199 /* adjust buffer positions */
200 if (input.bufpos != NULL)
201 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
202 if (input.bufend != NULL)
203 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
205 saved_input->parent = input_stack;
206 input_stack = saved_input;
210 static void pop_restore_input(void)
212 assert(n_inputs > 0);
213 assert(input_stack != NULL);
215 pp_input_t *saved_input = input_stack;
217 memcpy(&input, saved_input, sizeof(input));
220 /* adjust buffer positions */
221 if (saved_input->bufpos != NULL)
222 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
223 if (saved_input->bufend != NULL)
224 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
226 input_stack = saved_input->parent;
227 obstack_free(&input_obstack, saved_input);
232 * Prints a parse error message at the current token.
234 * @param msg the error message
236 static void parse_error(const char *msg)
238 errorf(&pp_token.base.source_position, "%s", msg);
241 static inline void next_real_char(void)
243 assert(input.bufpos <= input.bufend);
244 if (input.bufpos >= input.bufend) {
245 size_t const n = decode(input.input, input.buf + MAX_PUTBACK, lengthof(input.buf) - MAX_PUTBACK);
250 input.bufpos = input.buf + MAX_PUTBACK;
251 input.bufend = input.bufpos + n;
253 input.c = *input.bufpos++;
254 ++input.position.colno;
258 * Put a character back into the buffer.
260 * @param pc the character to put back
262 static inline void put_back(utf32 const pc)
264 assert(input.bufpos > input.buf);
265 *(--input.bufpos - input.buf + input.buf) = (char) pc;
266 --input.position.colno;
272 if (input.c == '\n') { \
276 ++input.position.lineno; \
277 input.position.colno = 1; \
279 newline // Let it look like an ordinary case label.
281 #define eat(c_type) (assert(input.c == c_type), next_char())
283 static void maybe_concat_lines(void)
289 info.whitespace_at_line_begin = 0;
301 * Set c to the next input character, ie.
302 * after expanding trigraphs.
304 static inline void next_char(void)
308 /* filter trigraphs and concatenated lines */
309 if (UNLIKELY(input.c == '\\')) {
310 maybe_concat_lines();
311 goto end_of_next_char;
314 if (LIKELY(input.c != '?'))
315 goto end_of_next_char;
318 if (LIKELY(input.c != '?')) {
321 goto end_of_next_char;
326 case '=': input.c = '#'; break;
327 case '(': input.c = '['; break;
328 case '/': input.c = '\\'; maybe_concat_lines(); break;
329 case ')': input.c = ']'; break;
330 case '\'': input.c = '^'; break;
331 case '<': input.c = '{'; break;
332 case '!': input.c = '|'; break;
333 case '>': input.c = '}'; break;
334 case '-': input.c = '~'; break;
344 printf("nchar '%c'\n", input.c);
351 * Returns true if the given char is a octal digit.
353 * @param char the character to check
355 static inline bool is_octal_digit(int chr)
373 * Returns the value of a digit.
374 * The only portable way to do it ...
376 static int digit_value(int digit)
402 panic("wrong character given");
407 * Parses an octal character sequence.
409 * @param first_digit the already read first digit
411 static utf32 parse_octal_sequence(const utf32 first_digit)
413 assert(is_octal_digit(first_digit));
414 utf32 value = digit_value(first_digit);
415 if (!is_octal_digit(input.c)) return value;
416 value = 8 * value + digit_value(input.c);
418 if (!is_octal_digit(input.c)) return value;
419 value = 8 * value + digit_value(input.c);
426 * Parses a hex character sequence.
428 static utf32 parse_hex_sequence(void)
431 while (isxdigit(input.c)) {
432 value = 16 * value + digit_value(input.c);
438 static bool is_universal_char_valid(utf32 const v)
441 if (v < 0xA0U && v != 0x24 && v != 0x40 && v != 0x60)
443 if (0xD800 <= v && v <= 0xDFFF)
448 static utf32 parse_universal_char(unsigned const n_digits)
451 for (unsigned k = n_digits; k != 0; --k) {
452 if (isxdigit(input.c)) {
453 v = 16 * v + digit_value(input.c);
454 if (!resolve_escape_sequences)
455 obstack_1grow(&symbol_obstack, input.c);
458 errorf(&input.position,
459 "short universal character name, expected %u more digits",
464 if (!is_universal_char_valid(v)) {
465 errorf(&input.position,
466 "\\%c%0*X is not a valid universal character name",
467 n_digits == 4 ? 'u' : 'U', (int)n_digits, v);
472 static bool is_universal_char_valid_identifier_c99(utf32 const v)
474 static const utf32 single_chars[] = {
475 0x00AA, 0x00BA, 0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0,
476 0x1F59, 0x1F5B, 0x1F5D, 0x05BF, 0x09B2, 0x0A02, 0x0A5E, 0x0A74,
477 0x0A8D, 0x0AD0, 0x0AE0, 0x0B9C, 0x0CDE, 0x0E84, 0x0E8A, 0x0E8D,
478 0x0EA5, 0x0EA7, 0x0EC6, 0x0F00, 0x0F35, 0x0F37, 0x0F39, 0x0F97,
479 0x0FB9, 0x00B5, 0x00B7, 0x02BB, 0x037A, 0x0559, 0x093D, 0x0B3D,
480 0x1FBE, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128
483 static const utf32 ranges[][2] = {
484 {0x00C0, 0x00D6}, {0x00D8, 0x00F6}, {0x00F8, 0x01F5}, {0x01FA, 0x0217},
485 {0x0250, 0x02A8}, {0x1E00, 0x1E9B}, {0x1EA0, 0x1EF9}, {0x0388, 0x038A},
486 {0x038E, 0x03A1}, {0x03A3, 0x03CE}, {0x03D0, 0x03D6}, {0x03E2, 0x03F3},
487 {0x1F00, 0x1F15}, {0x1F18, 0x1F1D}, {0x1F20, 0x1F45}, {0x1F48, 0x1F4D},
488 {0x1F50, 0x1F57}, {0x1F5F, 0x1F7D}, {0x1F80, 0x1FB4}, {0x1FB6, 0x1FBC},
489 {0x1FC2, 0x1FC4}, {0x1FC6, 0x1FCC}, {0x1FD0, 0x1FD3}, {0x1FD6, 0x1FDB},
490 {0x1FE0, 0x1FEC}, {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFC}, {0x0401, 0x040C},
491 {0x040E, 0x044F}, {0x0451, 0x045C}, {0x045E, 0x0481}, {0x0490, 0x04C4},
492 {0x04C7, 0x04C8}, {0x04CB, 0x04CC}, {0x04D0, 0x04EB}, {0x04EE, 0x04F5},
493 {0x04F8, 0x04F9}, {0x0531, 0x0556}, {0x0561, 0x0587}, {0x05B0, 0x05B9},
494 {0x05BB, 0x05BD}, {0x05C1, 0x05C2}, {0x05D0, 0x05EA}, {0x05F0, 0x05F2},
495 {0x0621, 0x063A}, {0x0640, 0x0652}, {0x0670, 0x06B7}, {0x06BA, 0x06BE},
496 {0x06C0, 0x06CE}, {0x06D0, 0x06DC}, {0x06E5, 0x06E8}, {0x06EA, 0x06ED},
497 {0x0901, 0x0903}, {0x0905, 0x0939}, {0x093E, 0x094D}, {0x0950, 0x0952},
498 {0x0958, 0x0963}, {0x0981, 0x0983}, {0x0985, 0x098C}, {0x098F, 0x0990},
499 {0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B6, 0x09B9}, {0x09BE, 0x09C4},
500 {0x09C7, 0x09C8}, {0x09CB, 0x09CD}, {0x09DC, 0x09DD}, {0x09DF, 0x09E3},
501 {0x09F0, 0x09F1}, {0x0A05, 0x0A0A}, {0x0A0F, 0x0A10}, {0x0A13, 0x0A28},
502 {0x0A2A, 0x0A30}, {0x0A32, 0x0A33}, {0x0A35, 0x0A36}, {0x0A38, 0x0A39},
503 {0x0A3E, 0x0A42}, {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A59, 0x0A5C},
504 {0x0A81, 0x0A83}, {0x0A85, 0x0A8B}, {0x0A8F, 0x0A91}, {0x0A93, 0x0AA8},
505 {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3}, {0x0AB5, 0x0AB9}, {0x0ABD, 0x0AC5},
506 {0x0AC7, 0x0AC9}, {0x0ACB, 0x0ACD}, {0x0B01, 0x0B03}, {0x0B05, 0x0B0C},
507 {0x0B0F, 0x0B10}, {0x0B13, 0x0B28}, {0x0B2A, 0x0B30}, {0x0B32, 0x0B33},
508 {0x0B36, 0x0B39}, {0x0B3E, 0x0B43}, {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D},
509 {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B61}, {0x0B82, 0x0B83}, {0x0B85, 0x0B8A},
510 {0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, {0x0B99, 0x0B9A}, {0x0B9E, 0x0B9F},
511 {0x0BA3, 0x0BA4}, {0x0BA8, 0x0BAA}, {0x0BAE, 0x0BB5}, {0x0BB7, 0x0BB9},
512 {0x0BBE, 0x0BC2}, {0x0BC6, 0x0BC8}, {0x0BCA, 0x0BCD}, {0x0C01, 0x0C03},
513 {0x0C05, 0x0C0C}, {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C33},
514 {0x0C35, 0x0C39}, {0x0C3E, 0x0C44}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D},
515 {0x0C60, 0x0C61}, {0x0C82, 0x0C83}, {0x0C85, 0x0C8C}, {0x0C8E, 0x0C90},
516 {0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9}, {0x0CBE, 0x0CC4},
517 {0x0CC6, 0x0CC8}, {0x0CCA, 0x0CCD}, {0x0CE0, 0x0CE1}, {0x0D02, 0x0D03},
518 {0x0D05, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D28}, {0x0D2A, 0x0D39},
519 {0x0D3E, 0x0D43}, {0x0D46, 0x0D48}, {0x0D4A, 0x0D4D}, {0x0D60, 0x0D61},
520 {0x0E01, 0x0E3A}, {0x0E40, 0x0E5B}, {0x0E81, 0x0E82}, {0x0E87, 0x0E88},
521 {0x0E94, 0x0E97}, {0x0E99, 0x0E9F}, {0x0EA1, 0x0EA3}, {0x0EAA, 0x0EAB},
522 {0x0EAD, 0x0EAE}, {0x0EB0, 0x0EB9}, {0x0EBB, 0x0EBD}, {0x0EC0, 0x0EC4},
523 {0x0EC8, 0x0ECD}, {0x0EDC, 0x0EDD}, {0x0F18, 0x0F19}, {0x0F3E, 0x0F47},
524 {0x0F49, 0x0F69}, {0x0F71, 0x0F84}, {0x0F86, 0x0F8B}, {0x0F90, 0x0F95},
525 {0x0F99, 0x0FAD}, {0x0FB1, 0x0FB7}, {0x10A0, 0x10C5}, {0x10D0, 0x10F6},
526 {0x3041, 0x3093}, {0x309B, 0x309C}, {0x30A1, 0x30F6}, {0x30FB, 0x30FC},
527 {0x3105, 0x312C}, {0x4E00, 0x9FA5}, {0xAC00, 0xD7A3}, {0x0660, 0x0669},
528 {0x06F0, 0x06F9}, {0x0966, 0x096F}, {0x09E6, 0x09EF}, {0x0A66, 0x0A6F},
529 {0x0AE6, 0x0AEF}, {0x0B66, 0x0B6F}, {0x0BE7, 0x0BEF}, {0x0C66, 0x0C6F},
530 {0x0CE6, 0x0CEF}, {0x0D66, 0x0D6F}, {0x0E50, 0x0E59}, {0x0ED0, 0x0ED9},
531 {0x0F20, 0x0F33}, {0x02B0, 0x02B8}, {0x02BD, 0x02C1}, {0x02D0, 0x02D1},
532 {0x02E0, 0x02E4}, {0x203F, 0x2040}, {0x210A, 0x2113}, {0x2118, 0x211D},
533 {0x212A, 0x2131}, {0x2133, 0x2138}, {0x2160, 0x2182}, {0x3005, 0x3007},
536 for (size_t i = 0; i < sizeof(ranges)/sizeof(ranges[0]); ++i) {
537 if (ranges[i][0] <= v && v <= ranges[i][1])
540 for (size_t i = 0; i < sizeof(single_chars)/sizeof(single_chars[0]); ++i) {
541 if (v == single_chars[i])
547 static bool is_universal_char_valid_identifier_c11(utf32 const v)
550 if ( v == 0x000A8) return true;
551 if ( v == 0x000AA) return true;
552 if ( v == 0x000AD) return true;
553 if ( v == 0x000AF) return true;
554 if (0x000B2 <= v && v <= 0x000B5) return true;
555 if (0x000B7 <= v && v <= 0x000BA) return true;
556 if (0x000BC <= v && v <= 0x000BE) return true;
557 if (0x000C0 <= v && v <= 0x000D6) return true;
558 if (0x000D8 <= v && v <= 0x000F6) return true;
559 if (0x000F8 <= v && v <= 0x000FF) return true;
560 if (0x00100 <= v && v <= 0x0167F) return true;
561 if (0x01681 <= v && v <= 0x0180D) return true;
562 if (0x0180F <= v && v <= 0x01FFF) return true;
563 if (0x0200B <= v && v <= 0x0200D) return true;
564 if (0x0202A <= v && v <= 0x0202E) return true;
565 if (0x0203F <= v && v <= 0x02040) return true;
566 if ( v == 0x02054) return true;
567 if (0x02060 <= v && v <= 0x0206F) return true;
568 if (0x02070 <= v && v <= 0x0218F) return true;
569 if (0x02460 <= v && v <= 0x024FF) return true;
570 if (0x02776 <= v && v <= 0x02793) return true;
571 if (0x02C00 <= v && v <= 0x02DFF) return true;
572 if (0x02E80 <= v && v <= 0x02FFF) return true;
573 if (0x03004 <= v && v <= 0x03007) return true;
574 if (0x03021 <= v && v <= 0x0302F) return true;
575 if (0x03031 <= v && v <= 0x0303F) return true;
576 if (0x03040 <= v && v <= 0x0D7FF) return true;
577 if (0x0F900 <= v && v <= 0x0FD3D) return true;
578 if (0x0FD40 <= v && v <= 0x0FDCF) return true;
579 if (0x0FDF0 <= v && v <= 0x0FE44) return true;
580 if (0x0FE47 <= v && v <= 0x0FFFD) return true;
581 if (0x10000 <= v && v <= 0x1FFFD) return true;
582 if (0x20000 <= v && v <= 0x2FFFD) return true;
583 if (0x30000 <= v && v <= 0x3FFFD) return true;
584 if (0x40000 <= v && v <= 0x4FFFD) return true;
585 if (0x50000 <= v && v <= 0x5FFFD) return true;
586 if (0x60000 <= v && v <= 0x6FFFD) return true;
587 if (0x70000 <= v && v <= 0x7FFFD) return true;
588 if (0x80000 <= v && v <= 0x8FFFD) return true;
589 if (0x90000 <= v && v <= 0x9FFFD) return true;
590 if (0xA0000 <= v && v <= 0xAFFFD) return true;
591 if (0xB0000 <= v && v <= 0xBFFFD) return true;
592 if (0xC0000 <= v && v <= 0xCFFFD) return true;
593 if (0xD0000 <= v && v <= 0xDFFFD) return true;
594 if (0xE0000 <= v && v <= 0xEFFFD) return true;
598 static bool is_universal_char_valid_identifier(utf32 const v)
601 return is_universal_char_valid_identifier_c11(v);
602 return is_universal_char_valid_identifier_c99(v);
605 static bool is_universal_char_invalid_identifier_start(utf32 const v)
607 if (! (c_mode & _C11))
611 if (0x0300 <= v && v <= 0x036F) return true;
612 if (0x1DC0 <= v && v <= 0x1DFF) return true;
613 if (0x20D0 <= v && v <= 0x20FF) return true;
614 if (0xFE20 <= v && v <= 0xFE2F) return true;
619 * Parse an escape sequence.
621 static utf32 parse_escape_sequence(void)
625 utf32 const ec = input.c;
629 case '"': return '"';
630 case '\'': return '\'';
631 case '\\': return '\\';
632 case '?': return '\?';
633 case 'a': return '\a';
634 case 'b': return '\b';
635 case 'f': return '\f';
636 case 'n': return '\n';
637 case 'r': return '\r';
638 case 't': return '\t';
639 case 'v': return '\v';
641 return parse_hex_sequence();
650 return parse_octal_sequence(ec);
652 parse_error("reached end of file while parsing escape sequence");
654 /* \E is not documented, but handled, by GCC. It is acceptable according
655 * to §6.11.4, whereas \e is not. */
659 return 27; /* hopefully 27 is ALWAYS the code for ESCAPE */
662 case 'U': return parse_universal_char(8);
663 case 'u': return parse_universal_char(4);
668 /* §6.4.4.4:8 footnote 64 */
669 parse_error("unknown escape sequence");
673 static const char *identify_string(char *string)
675 const char *result = strset_insert(&stringset, string);
676 if (result != string) {
677 obstack_free(&symbol_obstack, string);
682 static string_t sym_make_string(string_encoding_t const enc)
684 obstack_1grow(&symbol_obstack, '\0');
685 size_t const len = obstack_object_size(&symbol_obstack) - 1;
686 char *const string = obstack_finish(&symbol_obstack);
687 char const *const result = identify_string(string);
688 return (string_t){ result, len, enc };
691 string_t make_string(char const *const string)
693 obstack_grow(&symbol_obstack, string, strlen(string));
694 return sym_make_string(STRING_ENCODING_CHAR);
697 static utf32 get_string_encoding_limit(string_encoding_t const enc)
700 case STRING_ENCODING_CHAR: return 0xFF;
701 case STRING_ENCODING_CHAR16: return 0xFFFF;
702 case STRING_ENCODING_CHAR32: return 0xFFFFFFFF;
703 case STRING_ENCODING_UTF8: return 0xFFFFFFFF;
704 case STRING_ENCODING_WIDE: return 0xFFFFFFFF; // FIXME depends on settings
706 panic("invalid string encoding");
709 static void parse_string(utf32 const delimiter, token_kind_t const kind,
710 string_encoding_t const enc,
711 char const *const context)
713 const unsigned start_linenr = input.position.lineno;
717 utf32 const limit = get_string_encoding_limit(enc);
721 if (resolve_escape_sequences) {
722 utf32 const tc = parse_escape_sequence();
724 warningf(WARN_OTHER, &pp_token.base.source_position, "escape sequence out of range");
726 if (enc == STRING_ENCODING_CHAR) {
727 obstack_1grow(&symbol_obstack, tc);
729 obstack_grow_utf8(&symbol_obstack, tc);
732 obstack_1grow(&symbol_obstack, (char)input.c);
734 obstack_1grow(&symbol_obstack, (char)input.c);
741 errorf(&pp_token.base.source_position, "newline while parsing %s", context);
745 source_position_t source_position;
746 source_position.input_name = pp_token.base.source_position.input_name;
747 source_position.lineno = start_linenr;
748 errorf(&source_position, "EOF while parsing %s", context);
753 if (input.c == delimiter) {
757 obstack_grow_utf8(&symbol_obstack, input.c);
765 pp_token.kind = kind;
766 pp_token.literal.string = sym_make_string(enc);
769 static void parse_string_literal(string_encoding_t const enc)
771 parse_string('"', T_STRING_LITERAL, enc, "string literal");
774 static void parse_character_constant(string_encoding_t const enc)
776 parse_string('\'', T_CHARACTER_CONSTANT, enc, "character constant");
777 if (pp_token.literal.string.size == 0) {
778 parse_error("empty character constant");
782 #define SYMBOL_CASES_WITHOUT_E_P \
783 '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
834 #define SYMBOL_CASES \
835 SYMBOL_CASES_WITHOUT_E_P: \
841 #define DIGIT_CASES \
853 static void start_expanding(pp_definition_t *definition)
855 definition->parent_expansion = current_expansion;
856 definition->expand_pos = 0;
857 definition->is_expanding = true;
858 if (definition->list_len > 0) {
859 definition->token_list[0].had_whitespace
860 = info.had_whitespace;
862 current_expansion = definition;
865 static void finished_expanding(pp_definition_t *definition)
867 assert(definition->is_expanding);
868 pp_definition_t *parent = definition->parent_expansion;
869 definition->parent_expansion = NULL;
870 definition->is_expanding = false;
872 /* stop further expanding once we expanded a parameter used in a
874 if (definition == argument_expanding)
875 argument_expanding = NULL;
877 assert(current_expansion == definition);
878 current_expansion = parent;
881 static void grow_string_escaped(struct obstack *obst, const string_t *string, char const *delimiter)
883 char const *prefix = get_string_encoding_prefix(string->encoding);
884 obstack_printf(obst, "%s%s", prefix, delimiter);
885 size_t size = string->size;
886 const char *str = string->begin;
887 if (resolve_escape_sequences) {
888 obstack_grow(obst, str, size);
890 for (size_t i = 0; i < size; ++i) {
891 const char c = str[i];
892 if (c == '\\' || c == '"')
893 obstack_1grow(obst, '\\');
894 obstack_1grow(obst, c);
897 obstack_printf(obst, "%s", delimiter);
900 static void grow_token(struct obstack *obst, const token_t *token)
902 switch (token->kind) {
904 obstack_grow(obst, token->literal.string.begin, token->literal.string.size);
907 case T_STRING_LITERAL: {
908 char const *const delimiter = resolve_escape_sequences ? "\"" : "\\\"";
909 grow_string_escaped(obst, &token->literal.string, delimiter);
913 case T_CHARACTER_CONSTANT:
914 grow_string_escaped(obst, &token->literal.string, "'");
919 const char *str = token->base.symbol->string;
920 size_t len = strlen(str);
921 obstack_grow(obst, str, len);
927 static void stringify(const pp_definition_t *definition)
929 assert(obstack_object_size(&symbol_obstack) == 0);
931 size_t list_len = definition->list_len;
932 for (size_t p = 0; p < list_len; ++p) {
933 const saved_token_t *saved = &definition->token_list[p];
934 if (p > 0 && saved->had_whitespace)
935 obstack_1grow(&symbol_obstack, ' ');
936 grow_token(&symbol_obstack, &saved->token);
938 pp_token.kind = T_STRING_LITERAL;
939 pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
942 static inline void set_punctuator(token_kind_t const kind)
944 pp_token.kind = kind;
945 pp_token.base.symbol = token_symbols[kind];
948 static inline void set_digraph(token_kind_t const kind, symbol_t *const symbol)
950 pp_token.kind = kind;
951 pp_token.base.symbol = symbol;
955 * returns next final token from a preprocessor macro expansion
957 static bool expand_next(void)
959 if (current_expansion == NULL)
963 size_t pos = current_expansion->expand_pos;
964 if (pos >= current_expansion->list_len) {
965 finished_expanding(current_expansion);
966 /* it was the outermost expansion, parse pptoken normally */
967 if (current_expansion == NULL) {
972 const saved_token_t *saved = ¤t_expansion->token_list[pos++];
973 pp_token = saved->token;
974 if (pp_token.kind == '#') {
975 if (pos < current_expansion->list_len) {
976 const saved_token_t *next = ¤t_expansion->token_list[pos];
977 if (next->token.kind == T_MACRO_PARAMETER) {
978 pp_definition_t *def = next->token.macro_parameter.def;
979 assert(def != NULL && def->is_parameter);
986 if (current_expansion->expand_pos > 0)
987 info.had_whitespace = saved->had_whitespace;
988 current_expansion->expand_pos = pos;
989 pp_token.base.source_position = expansion_pos;
995 * Returns the next token kind found when continuing the current expansions
996 * without starting new sub-expansions.
998 static token_kind_t peek_expansion(void)
1000 for (pp_definition_t *e = current_expansion; e; e = e->parent_expansion) {
1001 if (e->expand_pos < e->list_len)
1002 return e->token_list[e->expand_pos].token.kind;
1007 static void skip_line_comment(void)
1009 info.had_whitespace = true;
1026 static void skip_multiline_comment(void)
1028 info.had_whitespace = true;
1030 unsigned start_linenr = input.position.lineno;
1035 if (input.c == '*') {
1036 /* TODO: nested comment, warn here */
1041 if (input.c == '/') {
1042 if (input.position.lineno != input.output_line)
1043 info.whitespace_at_line_begin = input.position.colno;
1053 source_position_t source_position;
1054 source_position.input_name = pp_token.base.source_position.input_name;
1055 source_position.lineno = start_linenr;
1056 errorf(&source_position, "at end of file while looking for comment end");
1067 static bool skip_till_newline(bool stop_at_non_whitespace)
1079 if (input.c == '/') {
1081 skip_line_comment();
1083 } else if (input.c == '*') {
1085 skip_multiline_comment();
1097 if (stop_at_non_whitespace)
1106 static void skip_whitespace(void)
1112 ++info.whitespace_at_line_begin;
1113 info.had_whitespace = true;
1118 info.at_line_begin = true;
1119 info.had_whitespace = true;
1120 info.whitespace_at_line_begin = 0;
1125 if (input.c == '/') {
1127 skip_line_comment();
1129 } else if (input.c == '*') {
1131 skip_multiline_comment();
1145 static inline void eat_pp(pp_token_kind_t const kind)
1147 assert(pp_token.base.symbol->pp_ID == kind);
1152 static inline void eat_token(token_kind_t const kind)
1154 assert(pp_token.kind == kind);
1159 static string_encoding_t identify_encoding_prefix(symbol_t *const sym)
1161 if (sym == symbol_L) return STRING_ENCODING_WIDE;
1162 if (c_mode & _C11) {
1163 if (sym == symbol_U) return STRING_ENCODING_CHAR32;
1164 if (sym == symbol_u) return STRING_ENCODING_CHAR16;
1165 if (sym == symbol_u8) return STRING_ENCODING_UTF8;
1167 return STRING_ENCODING_CHAR;
1170 static void parse_symbol(void)
1172 assert(obstack_object_size(&symbol_obstack) == 0);
1177 obstack_1grow(&symbol_obstack, (char) input.c);
1186 case 'U': n = 8; goto universal;
1187 case 'u': n = 4; goto universal;
1189 if (!resolve_escape_sequences) {
1190 obstack_1grow(&symbol_obstack, '\\');
1191 obstack_1grow(&symbol_obstack, input.c);
1194 utf32 const v = parse_universal_char(n);
1195 if (!is_universal_char_valid_identifier(v)) {
1196 if (is_universal_char_valid(v)) {
1197 errorf(&input.position,
1198 "universal character \\%c%0*X is not valid in an identifier",
1199 n == 4 ? 'u' : 'U', (int)n, v);
1201 } else if (obstack_object_size(&symbol_obstack) == 0 && is_universal_char_invalid_identifier_start(v)) {
1202 errorf(&input.position,
1203 "universal character \\%c%0*X is not valid as start of an identifier",
1204 n == 4 ? 'u' : 'U', (int)n, v);
1205 } else if (resolve_escape_sequences) {
1206 obstack_grow_utf8(&symbol_obstack, v);
1224 obstack_1grow(&symbol_obstack, '\0');
1225 char *string = obstack_finish(&symbol_obstack);
1227 symbol_t *symbol = symbol_table_insert(string);
1229 /* Might be a prefixed string or character constant: L/U/u/u8"string". */
1230 if (input.c == '"') {
1231 string_encoding_t const enc = identify_encoding_prefix(symbol);
1232 if (enc != STRING_ENCODING_CHAR) {
1233 parse_string_literal(enc);
1236 } else if (input.c == '\'') {
1237 string_encoding_t const enc = identify_encoding_prefix(symbol);
1238 if (enc != STRING_ENCODING_CHAR) {
1239 if (enc == STRING_ENCODING_UTF8) {
1240 errorf(&pp_token.base.source_position, "'u8' is not a valid encoding for a chracter constant");
1242 parse_character_constant(enc);
1247 pp_token.kind = symbol->ID;
1248 pp_token.base.symbol = symbol;
1250 /* we can free the memory from symbol obstack if we already had an entry in
1251 * the symbol table */
1252 if (symbol->string != string) {
1253 obstack_free(&symbol_obstack, string);
1257 static void parse_number(void)
1259 obstack_1grow(&symbol_obstack, (char) input.c);
1266 case SYMBOL_CASES_WITHOUT_E_P:
1267 obstack_1grow(&symbol_obstack, (char) input.c);
1275 obstack_1grow(&symbol_obstack, (char) input.c);
1277 if (input.c == '+' || input.c == '-') {
1278 obstack_1grow(&symbol_obstack, (char) input.c);
1290 pp_token.kind = T_NUMBER;
1291 pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
1294 #define MAYBE_PROLOG \
1298 #define MAYBE(ch, kind) \
1301 set_punctuator(kind); \
1304 #define MAYBE_DIGRAPH(ch, kind, symbol) \
1307 set_digraph(kind, symbol); \
1310 #define ELSE_CODE(code) \
1315 #define ELSE(kind) ELSE_CODE(set_punctuator(kind); return;)
1317 /** identifies and returns the next preprocessing token contained in the
1318 * input stream. No macro expansion is performed. */
1319 static void next_input_token(void)
1321 if (next_info.had_whitespace) {
1323 next_info.had_whitespace = false;
1325 info.at_line_begin = false;
1326 info.had_whitespace = false;
1329 pp_token.base.source_position = input.position;
1330 pp_token.base.symbol = NULL;
1335 info.whitespace_at_line_begin++;
1336 info.had_whitespace = true;
1341 info.at_line_begin = true;
1342 info.had_whitespace = true;
1343 info.whitespace_at_line_begin = 0;
1355 parse_string_literal(STRING_ENCODING_CHAR);
1359 parse_character_constant(STRING_ENCODING_CHAR);
1381 MAYBE('.', T_DOTDOTDOT)
1385 set_punctuator('.');
1391 MAYBE('&', T_ANDAND)
1392 MAYBE('=', T_ANDEQUAL)
1396 MAYBE('=', T_ASTERISKEQUAL)
1400 MAYBE('+', T_PLUSPLUS)
1401 MAYBE('=', T_PLUSEQUAL)
1405 MAYBE('>', T_MINUSGREATER)
1406 MAYBE('-', T_MINUSMINUS)
1407 MAYBE('=', T_MINUSEQUAL)
1411 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
1415 MAYBE('=', T_SLASHEQUAL)
1418 skip_multiline_comment();
1422 skip_line_comment();
1427 MAYBE_DIGRAPH('>', '}', symbol_percentgreater)
1428 MAYBE('=', T_PERCENTEQUAL)
1433 MAYBE_DIGRAPH(':', T_HASHHASH, symbol_percentcolonpercentcolon)
1437 goto digraph_percentcolon;
1440 digraph_percentcolon:
1441 set_digraph('#', symbol_percentcolon);
1447 MAYBE_DIGRAPH(':', '[', symbol_lesscolon)
1448 MAYBE_DIGRAPH('%', '{', symbol_lesspercent)
1449 MAYBE('=', T_LESSEQUAL)
1452 MAYBE('=', T_LESSLESSEQUAL)
1457 MAYBE('=', T_GREATEREQUAL)
1460 MAYBE('=', T_GREATERGREATEREQUAL)
1461 ELSE(T_GREATERGREATER)
1465 MAYBE('=', T_CARETEQUAL)
1469 MAYBE('=', T_PIPEEQUAL)
1470 MAYBE('|', T_PIPEPIPE)
1474 MAYBE_DIGRAPH('>', ']', symbol_colongreater)
1476 if (c_mode & _CXX) {
1478 set_punctuator(T_COLONCOLON);
1485 MAYBE('=', T_EQUALEQUAL)
1489 MAYBE('#', T_HASHHASH)
1502 set_punctuator(input.c);
1507 if (input_stack != NULL) {
1508 fclose(close_pp_input());
1509 pop_restore_input();
1512 if (input.c == (utf32)EOF)
1513 --input.position.lineno;
1514 print_line_directive(&input.position, "2");
1517 info.at_line_begin = true;
1518 set_punctuator(T_EOF);
1524 int next_c = input.c;
1527 if (next_c == 'U' || next_c == 'u') {
1534 if (error_on_unknown_chars) {
1535 errorf(&pp_token.base.source_position,
1536 "unknown character '%lc' found\n", input.c);
1540 assert(obstack_object_size(&symbol_obstack) == 0);
1541 obstack_grow_utf8(&symbol_obstack, input.c);
1542 obstack_1grow(&symbol_obstack, '\0');
1543 char *const string = obstack_finish(&symbol_obstack);
1544 symbol_t *const symbol = symbol_table_insert(string);
1545 if (symbol->string != string)
1546 obstack_free(&symbol_obstack, string);
1548 pp_token.kind = T_UNKNOWN_CHAR;
1549 pp_token.base.symbol = symbol;
1556 static void print_quoted_string(const char *const string)
1559 for (const char *c = string; *c != 0; ++c) {
1561 case '"': fputs("\\\"", out); break;
1562 case '\\': fputs("\\\\", out); break;
1563 case '\a': fputs("\\a", out); break;
1564 case '\b': fputs("\\b", out); break;
1565 case '\f': fputs("\\f", out); break;
1566 case '\n': fputs("\\n", out); break;
1567 case '\r': fputs("\\r", out); break;
1568 case '\t': fputs("\\t", out); break;
1569 case '\v': fputs("\\v", out); break;
1570 case '\?': fputs("\\?", out); break;
1573 fprintf(out, "\\%03o", (unsigned)*c);
1583 static void print_line_directive(const source_position_t *pos, const char *add)
1588 fprintf(out, "# %u ", pos->lineno);
1589 print_quoted_string(pos->input_name);
1594 if (pos->is_system_header) {
1598 printed_input_name = pos->input_name;
1599 input.output_line = pos->lineno-1;
1602 static bool emit_newlines(void)
1607 unsigned delta = pp_token.base.source_position.lineno - input.output_line;
1613 print_line_directive(&pp_token.base.source_position, NULL);
1616 for (unsigned i = 0; i < delta; ++i) {
1620 input.output_line = pp_token.base.source_position.lineno;
1622 unsigned whitespace = info.whitespace_at_line_begin;
1623 /* make sure there is at least 1 whitespace before a (macro-expanded)
1624 * '#' at line begin. I'm not sure why this is good, but gcc does it. */
1625 if (pp_token.kind == '#' && whitespace == 0)
1627 for (unsigned i = 0; i < whitespace; ++i)
1633 void set_preprocessor_output(FILE *output)
1637 error_on_unknown_chars = false;
1638 resolve_escape_sequences = false;
1640 error_on_unknown_chars = true;
1641 resolve_escape_sequences = true;
1645 void emit_pp_token(void)
1647 if (!emit_newlines() &&
1648 (info.had_whitespace || tokens_would_paste(last_token, pp_token.kind)))
1651 switch (pp_token.kind) {
1653 fputs(pp_token.literal.string.begin, out);
1656 case T_STRING_LITERAL:
1657 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1659 fputs(pp_token.literal.string.begin, out);
1663 case T_CHARACTER_CONSTANT:
1664 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1666 fputs(pp_token.literal.string.begin, out);
1670 case T_MACRO_PARAMETER:
1671 panic("macro parameter not expanded");
1674 fputs(pp_token.base.symbol->string, out);
1677 last_token = pp_token.kind;
1680 static void eat_pp_directive(void)
1682 while (!info.at_line_begin) {
1687 static bool strings_equal(const string_t *string1, const string_t *string2)
1689 size_t size = string1->size;
1690 if (size != string2->size)
1693 const char *c1 = string1->begin;
1694 const char *c2 = string2->begin;
1695 for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1702 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1704 if (token1->kind != token2->kind)
1707 switch (token1->kind) {
1709 case T_CHARACTER_CONSTANT:
1710 case T_STRING_LITERAL:
1711 return strings_equal(&token1->literal.string, &token2->literal.string);
1713 case T_MACRO_PARAMETER:
1714 return token1->macro_parameter.def->symbol
1715 == token2->macro_parameter.def->symbol;
1718 return token1->base.symbol == token2->base.symbol;
1722 static bool pp_definitions_equal(const pp_definition_t *definition1,
1723 const pp_definition_t *definition2)
1725 if (definition1->list_len != definition2->list_len)
1728 size_t len = definition1->list_len;
1729 const saved_token_t *t1 = definition1->token_list;
1730 const saved_token_t *t2 = definition2->token_list;
1731 for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1732 if (!pp_tokens_equal(&t1->token, &t2->token))
1734 if (t1->had_whitespace != t2->had_whitespace)
1740 static void missing_macro_param_error(void)
1742 errorf(&pp_token.base.source_position,
1743 "'#' is not followed by a macro parameter");
1746 static bool is_defineable_token(char const *const context)
1748 if (info.at_line_begin) {
1749 errorf(&pp_token.base.source_position, "unexpected end of line after %s", context);
1752 symbol_t *const symbol = pp_token.base.symbol;
1756 if (pp_token.kind != T_IDENTIFIER) {
1757 switch (symbol->string[0]) {
1764 errorf(&pp_token.base.source_position, "expected identifier after %s, got %K", context, &pp_token);
1769 /* TODO turn this into a flag in pp_def. */
1770 switch (symbol->pp_ID) {
1773 errorf(&pp_token.base.source_position, "%K cannot be used as macro name in %s", &pp_token, context);
1781 static void parse_define_directive(void)
1789 assert(obstack_object_size(&pp_obstack) == 0);
1791 if (!is_defineable_token("#define"))
1793 symbol_t *const symbol = pp_token.base.symbol;
1795 pp_definition_t *new_definition
1796 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1797 memset(new_definition, 0, sizeof(new_definition[0]));
1798 new_definition->symbol = symbol;
1799 new_definition->source_position = input.position;
1801 /* this is probably the only place where spaces are significant in the
1802 * lexer (except for the fact that they separate tokens). #define b(x)
1803 * is something else than #define b (x) */
1804 if (input.c == '(') {
1809 switch (pp_token.kind) {
1811 new_definition->is_variadic = true;
1812 eat_token(T_DOTDOTDOT);
1813 if (pp_token.kind != ')') {
1814 errorf(&input.position,
1815 "'...' not at end of macro argument list");
1820 case T_IDENTIFIER: {
1821 pp_definition_t parameter;
1822 memset(¶meter, 0, sizeof(parameter));
1823 parameter.source_position = pp_token.base.source_position;
1824 parameter.symbol = pp_token.base.symbol;
1825 parameter.is_parameter = true;
1826 obstack_grow(&pp_obstack, ¶meter, sizeof(parameter));
1827 eat_token(T_IDENTIFIER);
1829 if (pp_token.kind == ',') {
1834 if (pp_token.kind != ')') {
1835 errorf(&pp_token.base.source_position,
1836 "expected ',' or ')' after identifier, got %K",
1845 goto finish_argument_list;
1848 errorf(&pp_token.base.source_position,
1849 "expected identifier, '...' or ')' in #define argument list, got %K",
1855 finish_argument_list:
1856 new_definition->has_parameters = true;
1857 size_t size = obstack_object_size(&pp_obstack);
1858 new_definition->n_parameters
1859 = size / sizeof(new_definition->parameters[0]);
1860 new_definition->parameters = obstack_finish(&pp_obstack);
1861 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1862 pp_definition_t *param = &new_definition->parameters[i];
1863 symbol_t *symbol = param->symbol;
1864 pp_definition_t *previous = symbol->pp_definition;
1865 if (previous != NULL
1866 && previous->function_definition == new_definition) {
1867 errorf(¶m->source_position,
1868 "duplicate macro parameter '%Y'", symbol);
1869 param->symbol = sym_anonymous;
1872 param->parent_expansion = previous;
1873 param->function_definition = new_definition;
1874 symbol->pp_definition = param;
1880 /* construct token list */
1881 assert(obstack_object_size(&pp_obstack) == 0);
1882 bool next_must_be_param = false;
1883 while (!info.at_line_begin) {
1884 if (pp_token.kind == T_IDENTIFIER) {
1885 const symbol_t *symbol = pp_token.base.symbol;
1886 pp_definition_t *definition = symbol->pp_definition;
1887 if (definition != NULL
1888 && definition->function_definition == new_definition) {
1889 pp_token.kind = T_MACRO_PARAMETER;
1890 pp_token.macro_parameter.def = definition;
1893 if (next_must_be_param && pp_token.kind != T_MACRO_PARAMETER) {
1894 missing_macro_param_error();
1896 saved_token_t saved_token;
1897 saved_token.token = pp_token;
1898 saved_token.had_whitespace = info.had_whitespace;
1899 obstack_grow(&pp_obstack, &saved_token, sizeof(saved_token));
1901 = new_definition->has_parameters && pp_token.kind == '#';
1904 if (next_must_be_param)
1905 missing_macro_param_error();
1907 new_definition->list_len = obstack_object_size(&pp_obstack)
1908 / sizeof(new_definition->token_list[0]);
1909 new_definition->token_list = obstack_finish(&pp_obstack);
1911 if (new_definition->has_parameters) {
1912 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1913 pp_definition_t *param = &new_definition->parameters[i];
1914 symbol_t *symbol = param->symbol;
1915 if (symbol == sym_anonymous)
1917 assert(symbol->pp_definition == param);
1918 assert(param->function_definition == new_definition);
1919 symbol->pp_definition = param->parent_expansion;
1920 param->parent_expansion = NULL;
1924 pp_definition_t *old_definition = symbol->pp_definition;
1925 if (old_definition != NULL) {
1926 if (!pp_definitions_equal(old_definition, new_definition)) {
1927 warningf(WARN_OTHER, &input.position, "multiple definition of macro '%Y' (first defined %P)", symbol, &old_definition->source_position);
1929 /* reuse the old definition */
1930 obstack_free(&pp_obstack, new_definition);
1931 new_definition = old_definition;
1935 symbol->pp_definition = new_definition;
1939 if (obstack_object_size(&pp_obstack) > 0) {
1940 char *ptr = obstack_finish(&pp_obstack);
1941 obstack_free(&pp_obstack, ptr);
1946 static void parse_undef_directive(void)
1954 if (!is_defineable_token("#undef")) {
1959 pp_token.base.symbol->pp_definition = NULL;
1962 if (!info.at_line_begin) {
1963 warningf(WARN_OTHER, &input.position, "extra tokens at end of #undef directive");
1968 /** behind an #include we can have the special headername lexems.
1969 * They're only allowed behind an #include so they're not recognized
1970 * by the normal next_preprocessing_token. We handle them as a special
1972 static const char *parse_headername(bool *system_include)
1974 if (info.at_line_begin) {
1975 parse_error("expected headername after #include");
1979 /* check wether we have a "... or <... headername */
1980 source_position_t position = input.position;
1984 case '<': delimiter = '>'; *system_include = true; goto parse_name;
1985 case '"': delimiter = '"'; *system_include = false; goto parse_name;
1987 assert(obstack_object_size(&symbol_obstack) == 0);
1994 char *dummy = obstack_finish(&symbol_obstack);
1995 obstack_free(&symbol_obstack, dummy);
1997 errorf(&pp_token.base.source_position,
1998 "header name without closing '%c'", (char)delimiter);
2002 if (input.c == delimiter) {
2004 goto finish_headername;
2006 obstack_1grow(&symbol_obstack, (char)input.c);
2012 /* we should never be here */
2016 next_preprocessing_token();
2017 if (info.at_line_begin) {
2018 /* TODO: if we are already in the new line then we parsed more than
2019 * wanted. We reuse the token, but could produce following errors
2020 * misbehaviours... */
2021 goto error_invalid_input;
2023 if (pp_token.kind == T_STRING_LITERAL) {
2024 *system_include = false;
2025 return pp_token.literal.string.begin;
2026 } else if (pp_token.kind == '<') {
2027 *system_include = true;
2028 assert(obstack_object_size(&pp_obstack) == 0);
2030 next_preprocessing_token();
2031 if (info.at_line_begin) {
2032 /* TODO: we shouldn't have parsed/expanded something on the
2033 * next line yet... */
2034 char *dummy = obstack_finish(&pp_obstack);
2035 obstack_free(&pp_obstack, dummy);
2036 goto error_invalid_input;
2038 if (pp_token.kind == '>')
2041 saved_token_t saved;
2042 saved.token = pp_token;
2043 saved.had_whitespace = info.had_whitespace;
2044 obstack_grow(&pp_obstack, &saved, sizeof(saved));
2046 size_t size = obstack_object_size(&pp_obstack);
2047 assert(size % sizeof(saved_token_t) == 0);
2048 size_t n_tokens = size / sizeof(saved_token_t);
2049 saved_token_t *tokens = obstack_finish(&pp_obstack);
2050 assert(obstack_object_size(&symbol_obstack) == 0);
2051 for (size_t i = 0; i < n_tokens; ++i) {
2052 const saved_token_t *saved = &tokens[i];
2053 if (i > 0 && saved->had_whitespace)
2054 obstack_1grow(&symbol_obstack, ' ');
2055 grow_token(&symbol_obstack, &saved->token);
2057 obstack_free(&pp_obstack, tokens);
2058 goto finish_headername;
2060 error_invalid_input:
2062 char *dummy = obstack_finish(&symbol_obstack);
2063 obstack_free(&symbol_obstack, dummy);
2066 errorf(&pp_token.base.source_position,
2067 "expected \"FILENAME\" or <FILENAME> after #include");
2073 obstack_1grow(&symbol_obstack, '\0');
2074 char *const headername = obstack_finish(&symbol_obstack);
2075 const char *identified = identify_string(headername);
2076 pp_token.base.source_position = position;
2080 static bool do_include(bool const bracket_include, bool const include_next, char const *const headername)
2082 size_t const headername_len = strlen(headername);
2083 searchpath_entry_t *entry;
2085 entry = input.path ? input.path->next
2086 : bracket_include ? bracket_searchpath.first
2087 : quote_searchpath.first;
2089 if (!bracket_include) {
2090 /* put dirname of current input on obstack */
2091 const char *filename = input.position.input_name;
2092 const char *last_slash = strrchr(filename, '/');
2093 const char *full_name;
2094 if (last_slash != NULL) {
2095 size_t len = last_slash - filename;
2096 obstack_grow(&symbol_obstack, filename, len + 1);
2097 obstack_grow0(&symbol_obstack, headername, headername_len);
2098 char *complete_path = obstack_finish(&symbol_obstack);
2099 full_name = identify_string(complete_path);
2101 full_name = headername;
2104 FILE *file = fopen(full_name, "r");
2106 switch_pp_input(file, full_name, NULL, false);
2109 entry = quote_searchpath.first;
2111 entry = bracket_searchpath.first;
2115 assert(obstack_object_size(&symbol_obstack) == 0);
2116 /* check searchpath */
2117 for (; entry; entry = entry->next) {
2118 const char *path = entry->path;
2119 size_t len = strlen(path);
2120 obstack_grow(&symbol_obstack, path, len);
2121 if (path[len-1] != '/')
2122 obstack_1grow(&symbol_obstack, '/');
2123 obstack_grow(&symbol_obstack, headername, headername_len+1);
2125 char *complete_path = obstack_finish(&symbol_obstack);
2126 FILE *file = fopen(complete_path, "r");
2128 const char *filename = identify_string(complete_path);
2129 switch_pp_input(file, filename, entry, entry->is_system_path);
2132 obstack_free(&symbol_obstack, complete_path);
2139 static void parse_include_directive(bool const include_next)
2146 /* do not eat the TP_include, since it would already parse the next token
2147 * which needs special handling here. */
2148 skip_till_newline(true);
2149 bool system_include;
2150 const char *headername = parse_headername(&system_include);
2151 if (headername == NULL) {
2156 bool had_nonwhitespace = skip_till_newline(false);
2157 if (had_nonwhitespace) {
2158 warningf(WARN_OTHER, &input.position,
2159 "extra tokens at end of #include directive");
2162 if (n_inputs > INCLUDE_LIMIT) {
2163 errorf(&pp_token.base.source_position, "#include nested too deeply");
2170 info.whitespace_at_line_begin = 0;
2171 info.had_whitespace = false;
2172 info.at_line_begin = true;
2175 bool res = do_include(system_include, include_next, headername);
2179 errorf(&pp_token.base.source_position, "failed including '%s': %s", headername, strerror(errno));
2180 pop_restore_input();
2184 static pp_conditional_t *push_conditional(void)
2186 pp_conditional_t *conditional
2187 = obstack_alloc(&pp_obstack, sizeof(*conditional));
2188 memset(conditional, 0, sizeof(*conditional));
2190 conditional->parent = conditional_stack;
2191 conditional_stack = conditional;
2196 static void pop_conditional(void)
2198 assert(conditional_stack != NULL);
2199 conditional_stack = conditional_stack->parent;
2202 void check_unclosed_conditionals(void)
2204 while (conditional_stack != NULL) {
2205 pp_conditional_t *conditional = conditional_stack;
2207 if (conditional->in_else) {
2208 errorf(&conditional->source_position, "unterminated #else");
2210 errorf(&conditional->source_position, "unterminated condition");
2216 static void parse_ifdef_ifndef_directive(bool const is_ifdef)
2219 eat_pp(is_ifdef ? TP_ifdef : TP_ifndef);
2223 pp_conditional_t *conditional = push_conditional();
2224 conditional->source_position = pp_token.base.source_position;
2225 conditional->skip = true;
2229 if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
2230 errorf(&pp_token.base.source_position,
2231 "expected identifier after #%s, got %K",
2232 is_ifdef ? "ifdef" : "ifndef", &pp_token);
2235 /* just take the true case in the hope to avoid further errors */
2238 /* evaluate wether we are in true or false case */
2239 condition = (bool)pp_token.base.symbol->pp_definition == is_ifdef;
2240 eat_token(T_IDENTIFIER);
2242 if (!info.at_line_begin) {
2243 errorf(&pp_token.base.source_position,
2244 "extra tokens at end of #%s",
2245 is_ifdef ? "ifdef" : "ifndef");
2250 pp_conditional_t *conditional = push_conditional();
2251 conditional->source_position = pp_token.base.source_position;
2252 conditional->condition = condition;
2259 static void parse_else_directive(void)
2263 if (!info.at_line_begin) {
2265 warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #else");
2270 pp_conditional_t *conditional = conditional_stack;
2271 if (conditional == NULL) {
2272 errorf(&pp_token.base.source_position, "#else without prior #if");
2276 if (conditional->in_else) {
2277 errorf(&pp_token.base.source_position,
2278 "#else after #else (condition started %P)",
2279 &conditional->source_position);
2284 conditional->in_else = true;
2285 if (!conditional->skip) {
2286 skip_mode = conditional->condition;
2288 conditional->source_position = pp_token.base.source_position;
2291 static void parse_endif_directive(void)
2295 if (!info.at_line_begin) {
2297 warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #endif");
2302 pp_conditional_t *conditional = conditional_stack;
2303 if (conditional == NULL) {
2304 errorf(&pp_token.base.source_position, "#endif without prior #if");
2308 if (!conditional->skip) {
2314 typedef enum stdc_pragma_kind_t {
2318 STDC_CX_LIMITED_RANGE
2319 } stdc_pragma_kind_t;
2321 typedef enum stdc_pragma_value_kind_t {
2326 } stdc_pragma_value_kind_t;
2328 static void parse_pragma_directive(void)
2336 if (pp_token.kind != T_IDENTIFIER) {
2337 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2338 "expected identifier after #pragma");
2343 stdc_pragma_kind_t kind = STDC_UNKNOWN;
2344 if (pp_token.base.symbol->pp_ID == TP_STDC && c_mode & _C99) {
2348 switch (pp_token.base.symbol->pp_ID) {
2349 case TP_FP_CONTRACT: kind = STDC_FP_CONTRACT; break;
2350 case TP_FENV_ACCESS: kind = STDC_FENV_ACCESS; break;
2351 case TP_CX_LIMITED_RANGE: kind = STDC_CX_LIMITED_RANGE; break;
2354 if (kind != STDC_UNKNOWN) {
2356 stdc_pragma_value_kind_t value;
2357 switch (pp_token.base.symbol->pp_ID) {
2358 case TP_ON: value = STDC_VALUE_ON; break;
2359 case TP_OFF: value = STDC_VALUE_OFF; break;
2360 case TP_DEFAULT: value = STDC_VALUE_DEFAULT; break;
2361 default: value = STDC_VALUE_UNKNOWN; break;
2363 if (value == STDC_VALUE_UNKNOWN) {
2364 kind = STDC_UNKNOWN;
2365 errorf(&pp_token.base.source_position, "bad STDC pragma argument");
2370 if (kind == STDC_UNKNOWN) {
2371 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2372 "encountered unknown #pragma");
2376 static void parse_line_directive(void)
2378 if (pp_token.kind != T_NUMBER) {
2380 parse_error("expected integer");
2383 long const line = strtol(pp_token.literal.string.begin, &end, 0);
2385 /* use offset -1 as this is about the next line */
2386 input.position.lineno = line - 1;
2387 /* force output of line */
2388 input.output_line = input.position.lineno - 20;
2391 errorf(&input.position, "'%S' is not a valid line number",
2392 &pp_token.literal.string);
2396 if (info.at_line_begin)
2399 if (pp_token.kind == T_STRING_LITERAL
2400 && pp_token.literal.string.encoding == STRING_ENCODING_CHAR) {
2401 input.position.input_name = pp_token.literal.string.begin;
2402 input.position.is_system_header = false;
2405 /* attempt to parse numeric flags as outputted by gcc preprocessor */
2406 while (!info.at_line_begin && pp_token.kind == T_NUMBER) {
2408 * 1 - indicates start of a new file
2409 * 2 - indicates return from a file
2410 * 3 - indicates system header
2411 * 4 - indicates implicit extern "C" in C++ mode
2413 * currently we're only interested in "3"
2415 if (streq(pp_token.literal.string.begin, "3")) {
2416 input.position.is_system_header = true;
2425 static void parse_error_directive(void)
2432 bool const old_resolve_escape_sequences = resolve_escape_sequences;
2433 resolve_escape_sequences = false;
2435 source_position_t const pos = pp_token.base.source_position;
2437 if (info.had_whitespace && obstack_object_size(&pp_obstack) != 0)
2438 obstack_1grow(&pp_obstack, ' ');
2440 switch (pp_token.kind) {
2442 string_t const *const str = &pp_token.literal.string;
2443 obstack_grow(&pp_obstack, str->begin, str->size);
2449 case T_STRING_LITERAL: delim = '"'; goto string;
2450 case T_CHARACTER_CONSTANT: delim = '\''; goto string;
2452 string_t const *const str = &pp_token.literal.string;
2453 char const *const enc = get_string_encoding_prefix(str->encoding);
2454 obstack_printf(&pp_obstack, "%s%c%s%c", enc, delim, str->begin, delim);
2459 char const *const str = pp_token.base.symbol->string;
2460 obstack_grow(&pp_obstack, str, strlen(str));
2466 } while (!info.at_line_begin);
2468 resolve_escape_sequences = old_resolve_escape_sequences;
2470 obstack_1grow(&pp_obstack, '\0');
2471 char *const str = obstack_finish(&pp_obstack);
2472 errorf(&pos, "#%s", str);
2473 obstack_free(&pp_obstack, str);
2476 static void parse_preprocessing_directive(void)
2480 if (info.at_line_begin) {
2481 /* empty directive */
2485 if (pp_token.base.symbol) {
2486 switch (pp_token.base.symbol->pp_ID) {
2487 case TP_define: parse_define_directive(); break;
2488 case TP_else: parse_else_directive(); break;
2489 case TP_endif: parse_endif_directive(); break;
2490 case TP_error: parse_error_directive(); break;
2491 case TP_ifdef: parse_ifdef_ifndef_directive(true); break;
2492 case TP_ifndef: parse_ifdef_ifndef_directive(false); break;
2493 case TP_include: parse_include_directive(false); break;
2494 case TP_include_next: parse_include_directive(true); break;
2495 case TP_line: next_input_token(); goto line_directive;
2496 case TP_pragma: parse_pragma_directive(); break;
2497 case TP_undef: parse_undef_directive(); break;
2500 } else if (pp_token.kind == T_NUMBER) {
2502 parse_line_directive();
2506 errorf(&pp_token.base.source_position, "invalid preprocessing directive #%K", &pp_token);
2511 assert(info.at_line_begin);
2514 static void finish_current_argument(void)
2516 if (current_argument == NULL)
2518 size_t size = obstack_object_size(&pp_obstack);
2519 current_argument->list_len = size/sizeof(current_argument->token_list[0]);
2520 current_argument->token_list = obstack_finish(&pp_obstack);
2523 void next_preprocessing_token(void)
2526 if (!expand_next()) {
2529 while (pp_token.kind == '#' && info.at_line_begin) {
2530 parse_preprocessing_directive();
2532 } while (skip_mode && pp_token.kind != T_EOF);
2535 const token_kind_t kind = pp_token.kind;
2536 if (current_call == NULL || argument_expanding != NULL) {
2537 symbol_t *const symbol = pp_token.base.symbol;
2539 if (kind == T_MACRO_PARAMETER) {
2540 assert(current_expansion != NULL);
2541 start_expanding(pp_token.macro_parameter.def);
2545 pp_definition_t *const pp_definition = symbol->pp_definition;
2546 if (pp_definition != NULL && !pp_definition->is_expanding) {
2547 if (pp_definition->has_parameters) {
2549 /* check if next token is a '(' */
2550 whitespace_info_t old_info = info;
2551 token_kind_t next_token = peek_expansion();
2552 if (next_token == T_EOF) {
2553 info.at_line_begin = false;
2554 info.had_whitespace = false;
2556 if (input.c == '(') {
2561 if (next_token == '(') {
2562 if (current_expansion == NULL)
2563 expansion_pos = pp_token.base.source_position;
2564 next_preprocessing_token();
2565 assert(pp_token.kind == '(');
2567 pp_definition->parent_expansion = current_expansion;
2568 current_call = pp_definition;
2569 current_call->expand_pos = 0;
2570 current_call->expand_info = old_info;
2571 if (current_call->n_parameters > 0) {
2572 current_argument = ¤t_call->parameters[0];
2573 assert(argument_brace_count == 0);
2577 /* skip_whitespaces() skipped newlines and whitespace,
2578 * remember results for next token */
2584 if (current_expansion == NULL)
2585 expansion_pos = pp_token.base.source_position;
2586 start_expanding(pp_definition);
2593 if (current_call != NULL) {
2594 /* current_call != NULL */
2596 ++argument_brace_count;
2597 } else if (kind == ')') {
2598 if (argument_brace_count > 0) {
2599 --argument_brace_count;
2601 finish_current_argument();
2602 assert(kind == ')');
2603 start_expanding(current_call);
2604 info = current_call->expand_info;
2605 current_call = NULL;
2606 current_argument = NULL;
2609 } else if (kind == ',' && argument_brace_count == 0) {
2610 finish_current_argument();
2611 current_call->expand_pos++;
2612 if (current_call->expand_pos >= current_call->n_parameters) {
2613 errorf(&pp_token.base.source_position,
2614 "too many arguments passed for macro '%Y'",
2615 current_call->symbol);
2616 current_argument = NULL;
2619 = ¤t_call->parameters[current_call->expand_pos];
2622 } else if (kind == T_MACRO_PARAMETER) {
2623 /* parameters have to be fully expanded before being used as
2624 * parameters for another macro-call */
2625 assert(current_expansion != NULL);
2626 pp_definition_t *argument = pp_token.macro_parameter.def;
2627 argument_expanding = argument;
2628 start_expanding(argument);
2630 } else if (kind == T_EOF) {
2631 errorf(&expansion_pos,
2632 "reached end of file while parsing arguments for '%Y'",
2633 current_call->symbol);
2636 if (current_argument != NULL) {
2637 saved_token_t saved;
2638 saved.token = pp_token;
2639 saved.had_whitespace = info.had_whitespace;
2640 obstack_grow(&pp_obstack, &saved, sizeof(saved));
2646 void append_include_path(searchpath_t *paths, const char *path)
2648 searchpath_entry_t *entry = OALLOCZ(&config_obstack, searchpath_entry_t);
2650 entry->is_system_path = paths->is_system_path;
2652 *paths->anchor = entry;
2653 paths->anchor = &entry->next;
2656 static void append_env_paths(searchpath_t *paths, const char *envvar)
2658 const char *val = getenv(envvar);
2659 if (val != NULL && *val != '\0') {
2660 const char *begin = val;
2664 while (*c != '\0' && *c != ':')
2667 size_t len = c-begin;
2669 /* use "." for gcc compatibility (Matze: I would expect that
2670 * nothing happens for an empty entry...) */
2671 append_include_path(paths, ".");
2673 char *const string = obstack_copy0(&config_obstack, begin, len);
2674 append_include_path(paths, string);
2681 } while(*c != '\0');
2685 static void append_searchpath(searchpath_t *path, const searchpath_t *append)
2687 *path->anchor = append->first;
2690 static void setup_include_path(void)
2692 /* built-in paths */
2693 append_include_path(&system_searchpath, "/usr/include");
2695 /* parse environment variable */
2696 append_env_paths(&bracket_searchpath, "CPATH");
2697 append_env_paths(&system_searchpath,
2698 c_mode & _CXX ? "CPLUS_INCLUDE_PATH" : "C_INCLUDE_PATH");
2700 /* append system search path to bracket searchpath */
2701 append_searchpath(&system_searchpath, &after_searchpath);
2702 append_searchpath(&bracket_searchpath, &system_searchpath);
2703 append_searchpath("e_searchpath, &bracket_searchpath);
2706 static void input_error(unsigned const delta_lines, unsigned const delta_cols, char const *const message)
2708 source_position_t pos = pp_token.base.source_position;
2709 pos.lineno += delta_lines;
2710 pos.colno += delta_cols;
2711 errorf(&pos, "%s", message);
2714 void init_include_paths(void)
2716 obstack_init(&config_obstack);
2719 void init_preprocessor(void)
2723 obstack_init(&pp_obstack);
2724 obstack_init(&input_obstack);
2725 strset_init(&stringset);
2727 setup_include_path();
2729 set_input_error_callback(input_error);
2732 void exit_preprocessor(void)
2734 obstack_free(&input_obstack, NULL);
2735 obstack_free(&pp_obstack, NULL);
2736 obstack_free(&config_obstack, NULL);
2738 strset_destroy(&stringset);
2741 int pptest_main(int argc, char **argv);
2742 int pptest_main(int argc, char **argv)
2744 init_symbol_table();
2745 init_include_paths();
2746 init_preprocessor();
2749 error_on_unknown_chars = false;
2750 resolve_escape_sequences = false;
2752 /* simplistic commandline parser */
2753 const char *filename = NULL;
2754 const char *output = NULL;
2755 for (int i = 1; i < argc; ++i) {
2756 const char *opt = argv[i];
2757 if (streq(opt, "-I")) {
2758 append_include_path(&bracket_searchpath, argv[++i]);
2760 } else if (streq(opt, "-E")) {
2762 } else if (streq(opt, "-o")) {
2765 } else if (opt[0] == '-') {
2766 fprintf(stderr, "Unknown option '%s'\n", opt);
2768 if (filename != NULL)
2769 fprintf(stderr, "Multiple inputs not supported\n");
2773 if (filename == NULL) {
2774 fprintf(stderr, "No input specified\n");
2778 if (output == NULL) {
2781 out = fopen(output, "w");
2783 fprintf(stderr, "Couldn't open output '%s'\n", output);
2788 /* just here for gcc compatibility */
2789 fprintf(out, "# 1 \"%s\"\n", filename);
2790 fprintf(out, "# 1 \"<built-in>\"\n");
2791 fprintf(out, "# 1 \"<command-line>\"\n");
2793 FILE *file = fopen(filename, "r");
2795 fprintf(stderr, "Couldn't open input '%s'\n", filename);
2798 switch_pp_input(file, filename, NULL, false);
2801 next_preprocessing_token();
2802 if (pp_token.kind == T_EOF)
2808 check_unclosed_conditionals();
2809 fclose(close_pp_input());
2814 exit_preprocessor();
2815 exit_symbol_table();