9 #include "preprocessor.h"
13 #include "adt/error.h"
14 #include "adt/strutil.h"
15 #include "adt/strset.h"
16 #include "lang_features.h"
17 #include "diagnostic.h"
18 #include "string_rep.h"
22 #define INCLUDE_LIMIT 199 /* 199 is for gcc "compatibility" */
24 typedef struct saved_token_t {
29 typedef struct whitespace_info_t {
30 /** current token had whitespace in front of it */
32 /** current token is at the beginning of a line.
33 * => a "#" at line begin starts a preprocessing directive. */
35 /** number of spaces before the first token in a line */
36 unsigned whitespace_at_line_begin;
39 struct pp_definition_t {
41 source_position_t source_position;
42 pp_definition_t *parent_expansion;
44 whitespace_info_t expand_info;
46 bool is_expanding : 1;
47 bool has_parameters : 1;
48 bool is_parameter : 1;
49 pp_definition_t *function_definition;
51 pp_definition_t *parameters;
55 saved_token_t *token_list;
58 typedef struct pp_conditional_t pp_conditional_t;
59 struct pp_conditional_t {
60 source_position_t source_position;
63 /** conditional in skip mode (then+else gets skipped) */
65 pp_conditional_t *parent;
68 typedef struct pp_input_t pp_input_t;
73 utf32 buf[1024+MAX_PUTBACK];
76 source_position_t position;
79 searchpath_entry_t *path;
82 struct searchpath_entry_t {
84 searchpath_entry_t *next;
88 static pp_input_t input;
90 static pp_input_t *input_stack;
91 static unsigned n_inputs;
92 static struct obstack input_obstack;
94 static pp_conditional_t *conditional_stack;
97 bool allow_dollar_in_symbol = true;
98 static bool resolve_escape_sequences = true;
99 static bool error_on_unknown_chars = true;
100 static bool skip_mode;
102 static struct obstack pp_obstack;
103 static struct obstack config_obstack;
104 static const char *printed_input_name = NULL;
105 static source_position_t expansion_pos;
106 static pp_definition_t *current_expansion = NULL;
107 static pp_definition_t *current_call = NULL;
108 static pp_definition_t *current_argument = NULL;
109 static pp_definition_t *argument_expanding = NULL;
110 static unsigned argument_brace_count;
111 static strset_t stringset;
112 static token_kind_t last_token;
114 struct searchpath_t {
115 searchpath_entry_t *first;
116 searchpath_entry_t **anchor;
120 searchpath_t bracket_searchpath = { NULL, &bracket_searchpath.first, false };
121 searchpath_t quote_searchpath = { NULL, "e_searchpath.first, false };
122 searchpath_t system_searchpath = { NULL, &system_searchpath.first, true };
123 searchpath_t after_searchpath = { NULL, &after_searchpath.first, true };
125 static whitespace_info_t next_info; /* valid if had_whitespace is true */
126 static whitespace_info_t info;
128 static inline void next_char(void);
129 static void next_input_token(void);
130 static void print_line_directive(const source_position_t *pos, const char *add);
132 static symbol_t *symbol_colongreater;
133 static symbol_t *symbol_lesscolon;
134 static symbol_t *symbol_lesspercent;
135 static symbol_t *symbol_percentcolon;
136 static symbol_t *symbol_percentcolonpercentcolon;
137 static symbol_t *symbol_percentgreater;
139 static void init_symbols(void)
141 symbol_colongreater = symbol_table_insert(":>");
142 symbol_lesscolon = symbol_table_insert("<:");
143 symbol_lesspercent = symbol_table_insert("<%");
144 symbol_percentcolon = symbol_table_insert("%:");
145 symbol_percentcolonpercentcolon = symbol_table_insert("%:%:");
146 symbol_percentgreater = symbol_table_insert("%>");
149 void switch_pp_input(FILE *const file, char const *const filename, searchpath_entry_t *const path, bool const is_system_header)
152 input.input = input_from_stream(file, NULL);
155 input.output_line = 0;
156 input.position.input_name = filename;
157 input.position.lineno = 1;
158 input.position.is_system_header = is_system_header;
161 /* indicate that we're at a new input */
162 print_line_directive(&input.position, input_stack != NULL ? "1" : NULL);
164 /* place a virtual '\n' so we realize we're at line begin */
165 input.position.lineno = 0;
169 FILE *close_pp_input(void)
171 input_free(input.input);
173 FILE* const file = input.file;
185 static void push_input(void)
187 pp_input_t *const saved_input = obstack_copy(&input_obstack, &input, sizeof(input));
189 /* adjust buffer positions */
190 if (input.bufpos != NULL)
191 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
192 if (input.bufend != NULL)
193 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
195 saved_input->parent = input_stack;
196 input_stack = saved_input;
200 static void pop_restore_input(void)
202 assert(n_inputs > 0);
203 assert(input_stack != NULL);
205 pp_input_t *saved_input = input_stack;
207 memcpy(&input, saved_input, sizeof(input));
210 /* adjust buffer positions */
211 if (saved_input->bufpos != NULL)
212 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
213 if (saved_input->bufend != NULL)
214 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
216 input_stack = saved_input->parent;
217 obstack_free(&input_obstack, saved_input);
222 * Prints a parse error message at the current token.
224 * @param msg the error message
226 static void parse_error(const char *msg)
228 errorf(&pp_token.base.source_position, "%s", msg);
231 static inline void next_real_char(void)
233 assert(input.bufpos <= input.bufend);
234 if (input.bufpos >= input.bufend) {
235 size_t const n = decode(input.input, input.buf + MAX_PUTBACK, lengthof(input.buf) - MAX_PUTBACK);
240 input.bufpos = input.buf + MAX_PUTBACK;
241 input.bufend = input.bufpos + n;
243 input.c = *input.bufpos++;
244 ++input.position.colno;
248 * Put a character back into the buffer.
250 * @param pc the character to put back
252 static inline void put_back(utf32 const pc)
254 assert(input.bufpos > input.buf);
255 *(--input.bufpos - input.buf + input.buf) = (char) pc;
256 --input.position.colno;
262 if (input.c == '\n') { \
266 ++input.position.lineno; \
267 input.position.colno = 1; \
269 newline // Let it look like an ordinary case label.
271 #define eat(c_type) (assert(input.c == c_type), next_char())
273 static void maybe_concat_lines(void)
279 info.whitespace_at_line_begin = 0;
291 * Set c to the next input character, ie.
292 * after expanding trigraphs.
294 static inline void next_char(void)
298 /* filter trigraphs and concatenated lines */
299 if (UNLIKELY(input.c == '\\')) {
300 maybe_concat_lines();
301 goto end_of_next_char;
304 if (LIKELY(input.c != '?'))
305 goto end_of_next_char;
308 if (LIKELY(input.c != '?')) {
311 goto end_of_next_char;
316 case '=': input.c = '#'; break;
317 case '(': input.c = '['; break;
318 case '/': input.c = '\\'; maybe_concat_lines(); break;
319 case ')': input.c = ']'; break;
320 case '\'': input.c = '^'; break;
321 case '<': input.c = '{'; break;
322 case '!': input.c = '|'; break;
323 case '>': input.c = '}'; break;
324 case '-': input.c = '~'; break;
334 printf("nchar '%c'\n", input.c);
341 * Returns true if the given char is a octal digit.
343 * @param char the character to check
345 static inline bool is_octal_digit(int chr)
363 * Returns the value of a digit.
364 * The only portable way to do it ...
366 static int digit_value(int digit)
392 panic("wrong character given");
397 * Parses an octal character sequence.
399 * @param first_digit the already read first digit
401 static utf32 parse_octal_sequence(const utf32 first_digit)
403 assert(is_octal_digit(first_digit));
404 utf32 value = digit_value(first_digit);
405 if (!is_octal_digit(input.c)) return value;
406 value = 8 * value + digit_value(input.c);
408 if (!is_octal_digit(input.c)) return value;
409 value = 8 * value + digit_value(input.c);
416 * Parses a hex character sequence.
418 static utf32 parse_hex_sequence(void)
421 while (isxdigit(input.c)) {
422 value = 16 * value + digit_value(input.c);
428 static bool is_universal_char_valid(utf32 const v)
431 if (v < 0xA0U && v != 0x24 && v != 0x40 && v != 0x60)
433 if (0xD800 <= v && v <= 0xDFFF)
438 static utf32 parse_universal_char(unsigned const n_digits)
441 for (unsigned k = n_digits; k != 0; --k) {
442 if (isxdigit(input.c)) {
443 v = 16 * v + digit_value(input.c);
444 if (!resolve_escape_sequences)
445 obstack_1grow(&symbol_obstack, input.c);
448 errorf(&input.position,
449 "short universal character name, expected %u more digits",
454 if (!is_universal_char_valid(v)) {
455 errorf(&input.position,
456 "\\%c%0*X is not a valid universal character name",
457 n_digits == 4 ? 'u' : 'U', (int)n_digits, v);
462 static bool is_universal_char_valid_identifier_c99(utf32 const v)
464 static const utf32 single_chars[] = {
465 0x00AA, 0x00BA, 0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0,
466 0x1F59, 0x1F5B, 0x1F5D, 0x05BF, 0x09B2, 0x0A02, 0x0A5E, 0x0A74,
467 0x0A8D, 0x0AD0, 0x0AE0, 0x0B9C, 0x0CDE, 0x0E84, 0x0E8A, 0x0E8D,
468 0x0EA5, 0x0EA7, 0x0EC6, 0x0F00, 0x0F35, 0x0F37, 0x0F39, 0x0F97,
469 0x0FB9, 0x00B5, 0x00B7, 0x02BB, 0x037A, 0x0559, 0x093D, 0x0B3D,
470 0x1FBE, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128
473 static const utf32 ranges[][2] = {
474 {0x00C0, 0x00D6}, {0x00D8, 0x00F6}, {0x00F8, 0x01F5}, {0x01FA, 0x0217},
475 {0x0250, 0x02A8}, {0x1E00, 0x1E9B}, {0x1EA0, 0x1EF9}, {0x0388, 0x038A},
476 {0x038E, 0x03A1}, {0x03A3, 0x03CE}, {0x03D0, 0x03D6}, {0x03E2, 0x03F3},
477 {0x1F00, 0x1F15}, {0x1F18, 0x1F1D}, {0x1F20, 0x1F45}, {0x1F48, 0x1F4D},
478 {0x1F50, 0x1F57}, {0x1F5F, 0x1F7D}, {0x1F80, 0x1FB4}, {0x1FB6, 0x1FBC},
479 {0x1FC2, 0x1FC4}, {0x1FC6, 0x1FCC}, {0x1FD0, 0x1FD3}, {0x1FD6, 0x1FDB},
480 {0x1FE0, 0x1FEC}, {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFC}, {0x0401, 0x040C},
481 {0x040E, 0x044F}, {0x0451, 0x045C}, {0x045E, 0x0481}, {0x0490, 0x04C4},
482 {0x04C7, 0x04C8}, {0x04CB, 0x04CC}, {0x04D0, 0x04EB}, {0x04EE, 0x04F5},
483 {0x04F8, 0x04F9}, {0x0531, 0x0556}, {0x0561, 0x0587}, {0x05B0, 0x05B9},
484 {0x05BB, 0x05BD}, {0x05C1, 0x05C2}, {0x05D0, 0x05EA}, {0x05F0, 0x05F2},
485 {0x0621, 0x063A}, {0x0640, 0x0652}, {0x0670, 0x06B7}, {0x06BA, 0x06BE},
486 {0x06C0, 0x06CE}, {0x06D0, 0x06DC}, {0x06E5, 0x06E8}, {0x06EA, 0x06ED},
487 {0x0901, 0x0903}, {0x0905, 0x0939}, {0x093E, 0x094D}, {0x0950, 0x0952},
488 {0x0958, 0x0963}, {0x0981, 0x0983}, {0x0985, 0x098C}, {0x098F, 0x0990},
489 {0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B6, 0x09B9}, {0x09BE, 0x09C4},
490 {0x09C7, 0x09C8}, {0x09CB, 0x09CD}, {0x09DC, 0x09DD}, {0x09DF, 0x09E3},
491 {0x09F0, 0x09F1}, {0x0A05, 0x0A0A}, {0x0A0F, 0x0A10}, {0x0A13, 0x0A28},
492 {0x0A2A, 0x0A30}, {0x0A32, 0x0A33}, {0x0A35, 0x0A36}, {0x0A38, 0x0A39},
493 {0x0A3E, 0x0A42}, {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A59, 0x0A5C},
494 {0x0A81, 0x0A83}, {0x0A85, 0x0A8B}, {0x0A8F, 0x0A91}, {0x0A93, 0x0AA8},
495 {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3}, {0x0AB5, 0x0AB9}, {0x0ABD, 0x0AC5},
496 {0x0AC7, 0x0AC9}, {0x0ACB, 0x0ACD}, {0x0B01, 0x0B03}, {0x0B05, 0x0B0C},
497 {0x0B0F, 0x0B10}, {0x0B13, 0x0B28}, {0x0B2A, 0x0B30}, {0x0B32, 0x0B33},
498 {0x0B36, 0x0B39}, {0x0B3E, 0x0B43}, {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D},
499 {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B61}, {0x0B82, 0x0B83}, {0x0B85, 0x0B8A},
500 {0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, {0x0B99, 0x0B9A}, {0x0B9E, 0x0B9F},
501 {0x0BA3, 0x0BA4}, {0x0BA8, 0x0BAA}, {0x0BAE, 0x0BB5}, {0x0BB7, 0x0BB9},
502 {0x0BBE, 0x0BC2}, {0x0BC6, 0x0BC8}, {0x0BCA, 0x0BCD}, {0x0C01, 0x0C03},
503 {0x0C05, 0x0C0C}, {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C33},
504 {0x0C35, 0x0C39}, {0x0C3E, 0x0C44}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D},
505 {0x0C60, 0x0C61}, {0x0C82, 0x0C83}, {0x0C85, 0x0C8C}, {0x0C8E, 0x0C90},
506 {0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9}, {0x0CBE, 0x0CC4},
507 {0x0CC6, 0x0CC8}, {0x0CCA, 0x0CCD}, {0x0CE0, 0x0CE1}, {0x0D02, 0x0D03},
508 {0x0D05, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D28}, {0x0D2A, 0x0D39},
509 {0x0D3E, 0x0D43}, {0x0D46, 0x0D48}, {0x0D4A, 0x0D4D}, {0x0D60, 0x0D61},
510 {0x0E01, 0x0E3A}, {0x0E40, 0x0E5B}, {0x0E81, 0x0E82}, {0x0E87, 0x0E88},
511 {0x0E94, 0x0E97}, {0x0E99, 0x0E9F}, {0x0EA1, 0x0EA3}, {0x0EAA, 0x0EAB},
512 {0x0EAD, 0x0EAE}, {0x0EB0, 0x0EB9}, {0x0EBB, 0x0EBD}, {0x0EC0, 0x0EC4},
513 {0x0EC8, 0x0ECD}, {0x0EDC, 0x0EDD}, {0x0F18, 0x0F19}, {0x0F3E, 0x0F47},
514 {0x0F49, 0x0F69}, {0x0F71, 0x0F84}, {0x0F86, 0x0F8B}, {0x0F90, 0x0F95},
515 {0x0F99, 0x0FAD}, {0x0FB1, 0x0FB7}, {0x10A0, 0x10C5}, {0x10D0, 0x10F6},
516 {0x3041, 0x3093}, {0x309B, 0x309C}, {0x30A1, 0x30F6}, {0x30FB, 0x30FC},
517 {0x3105, 0x312C}, {0x4E00, 0x9FA5}, {0xAC00, 0xD7A3}, {0x0660, 0x0669},
518 {0x06F0, 0x06F9}, {0x0966, 0x096F}, {0x09E6, 0x09EF}, {0x0A66, 0x0A6F},
519 {0x0AE6, 0x0AEF}, {0x0B66, 0x0B6F}, {0x0BE7, 0x0BEF}, {0x0C66, 0x0C6F},
520 {0x0CE6, 0x0CEF}, {0x0D66, 0x0D6F}, {0x0E50, 0x0E59}, {0x0ED0, 0x0ED9},
521 {0x0F20, 0x0F33}, {0x02B0, 0x02B8}, {0x02BD, 0x02C1}, {0x02D0, 0x02D1},
522 {0x02E0, 0x02E4}, {0x203F, 0x2040}, {0x210A, 0x2113}, {0x2118, 0x211D},
523 {0x212A, 0x2131}, {0x2133, 0x2138}, {0x2160, 0x2182}, {0x3005, 0x3007},
526 for (size_t i = 0; i < sizeof(ranges)/sizeof(ranges[0]); ++i) {
527 if (ranges[i][0] <= v && v <= ranges[i][1])
530 for (size_t i = 0; i < sizeof(single_chars)/sizeof(single_chars[0]); ++i) {
531 if (v == single_chars[i])
537 static bool is_universal_char_valid_identifier_c11(utf32 const v)
540 if ( v == 0x000A8) return true;
541 if ( v == 0x000AA) return true;
542 if ( v == 0x000AD) return true;
543 if ( v == 0x000AF) return true;
544 if (0x000B2 <= v && v <= 0x000B5) return true;
545 if (0x000B7 <= v && v <= 0x000BA) return true;
546 if (0x000BC <= v && v <= 0x000BE) return true;
547 if (0x000C0 <= v && v <= 0x000D6) return true;
548 if (0x000D8 <= v && v <= 0x000F6) return true;
549 if (0x000F8 <= v && v <= 0x000FF) return true;
550 if (0x00100 <= v && v <= 0x0167F) return true;
551 if (0x01681 <= v && v <= 0x0180D) return true;
552 if (0x0180F <= v && v <= 0x01FFF) return true;
553 if (0x0200B <= v && v <= 0x0200D) return true;
554 if (0x0202A <= v && v <= 0x0202E) return true;
555 if (0x0203F <= v && v <= 0x02040) return true;
556 if ( v == 0x02054) return true;
557 if (0x02060 <= v && v <= 0x0206F) return true;
558 if (0x02070 <= v && v <= 0x0218F) return true;
559 if (0x02460 <= v && v <= 0x024FF) return true;
560 if (0x02776 <= v && v <= 0x02793) return true;
561 if (0x02C00 <= v && v <= 0x02DFF) return true;
562 if (0x02E80 <= v && v <= 0x02FFF) return true;
563 if (0x03004 <= v && v <= 0x03007) return true;
564 if (0x03021 <= v && v <= 0x0302F) return true;
565 if (0x03031 <= v && v <= 0x0303F) return true;
566 if (0x03040 <= v && v <= 0x0D7FF) return true;
567 if (0x0F900 <= v && v <= 0x0FD3D) return true;
568 if (0x0FD40 <= v && v <= 0x0FDCF) return true;
569 if (0x0FDF0 <= v && v <= 0x0FE44) return true;
570 if (0x0FE47 <= v && v <= 0x0FFFD) return true;
571 if (0x10000 <= v && v <= 0x1FFFD) return true;
572 if (0x20000 <= v && v <= 0x2FFFD) return true;
573 if (0x30000 <= v && v <= 0x3FFFD) return true;
574 if (0x40000 <= v && v <= 0x4FFFD) return true;
575 if (0x50000 <= v && v <= 0x5FFFD) return true;
576 if (0x60000 <= v && v <= 0x6FFFD) return true;
577 if (0x70000 <= v && v <= 0x7FFFD) return true;
578 if (0x80000 <= v && v <= 0x8FFFD) return true;
579 if (0x90000 <= v && v <= 0x9FFFD) return true;
580 if (0xA0000 <= v && v <= 0xAFFFD) return true;
581 if (0xB0000 <= v && v <= 0xBFFFD) return true;
582 if (0xC0000 <= v && v <= 0xCFFFD) return true;
583 if (0xD0000 <= v && v <= 0xDFFFD) return true;
584 if (0xE0000 <= v && v <= 0xEFFFD) return true;
588 static bool is_universal_char_valid_identifier(utf32 const v)
591 return is_universal_char_valid_identifier_c11(v);
592 return is_universal_char_valid_identifier_c99(v);
595 static bool is_universal_char_invalid_identifier_start(utf32 const v)
597 if (! (c_mode & _C11))
601 if (0x0300 <= v && v <= 0x036F) return true;
602 if (0x1DC0 <= v && v <= 0x1DFF) return true;
603 if (0x20D0 <= v && v <= 0x20FF) return true;
604 if (0xFE20 <= v && v <= 0xFE2F) return true;
609 * Parse an escape sequence.
611 static utf32 parse_escape_sequence(void)
615 utf32 const ec = input.c;
619 case '"': return '"';
620 case '\'': return '\'';
621 case '\\': return '\\';
622 case '?': return '\?';
623 case 'a': return '\a';
624 case 'b': return '\b';
625 case 'f': return '\f';
626 case 'n': return '\n';
627 case 'r': return '\r';
628 case 't': return '\t';
629 case 'v': return '\v';
631 return parse_hex_sequence();
640 return parse_octal_sequence(ec);
642 parse_error("reached end of file while parsing escape sequence");
644 /* \E is not documented, but handled, by GCC. It is acceptable according
645 * to §6.11.4, whereas \e is not. */
649 return 27; /* hopefully 27 is ALWAYS the code for ESCAPE */
652 case 'U': return parse_universal_char(8);
653 case 'u': return parse_universal_char(4);
658 /* §6.4.4.4:8 footnote 64 */
659 parse_error("unknown escape sequence");
663 static const char *identify_string(char *string)
665 const char *result = strset_insert(&stringset, string);
666 if (result != string) {
667 obstack_free(&symbol_obstack, string);
672 static string_t sym_make_string(string_encoding_t const enc)
674 obstack_1grow(&symbol_obstack, '\0');
675 size_t const len = obstack_object_size(&symbol_obstack) - 1;
676 char *const string = obstack_finish(&symbol_obstack);
677 char const *const result = identify_string(string);
678 return (string_t){ result, len, enc };
681 string_t make_string(char const *const string)
683 obstack_grow(&symbol_obstack, string, strlen(string));
684 return sym_make_string(STRING_ENCODING_CHAR);
687 static void parse_string(utf32 const delimiter, token_kind_t const kind,
688 string_encoding_t const enc,
689 char const *const context)
691 const unsigned start_linenr = input.position.lineno;
698 if (resolve_escape_sequences) {
699 utf32 const tc = parse_escape_sequence();
700 if (enc == STRING_ENCODING_CHAR) {
702 warningf(WARN_OTHER, &pp_token.base.source_position, "escape sequence out of range");
704 obstack_1grow(&symbol_obstack, tc);
706 obstack_grow_utf8(&symbol_obstack, tc);
709 obstack_1grow(&symbol_obstack, (char)input.c);
711 obstack_1grow(&symbol_obstack, (char)input.c);
718 errorf(&pp_token.base.source_position, "newline while parsing %s", context);
722 source_position_t source_position;
723 source_position.input_name = pp_token.base.source_position.input_name;
724 source_position.lineno = start_linenr;
725 errorf(&source_position, "EOF while parsing %s", context);
730 if (input.c == delimiter) {
734 obstack_grow_utf8(&symbol_obstack, input.c);
742 pp_token.kind = kind;
743 pp_token.literal.string = sym_make_string(enc);
746 static void parse_string_literal(string_encoding_t const enc)
748 parse_string('"', T_STRING_LITERAL, enc, "string literal");
751 static void parse_character_constant(string_encoding_t const enc)
753 parse_string('\'', T_CHARACTER_CONSTANT, enc, "character constant");
754 if (pp_token.literal.string.size == 0) {
755 parse_error("empty character constant");
759 #define SYMBOL_CASES_WITHOUT_E_P \
760 '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
811 #define SYMBOL_CASES \
812 SYMBOL_CASES_WITHOUT_E_P: \
818 #define DIGIT_CASES \
830 static void start_expanding(pp_definition_t *definition)
832 definition->parent_expansion = current_expansion;
833 definition->expand_pos = 0;
834 definition->is_expanding = true;
835 if (definition->list_len > 0) {
836 definition->token_list[0].had_whitespace
837 = info.had_whitespace;
839 current_expansion = definition;
842 static void finished_expanding(pp_definition_t *definition)
844 assert(definition->is_expanding);
845 pp_definition_t *parent = definition->parent_expansion;
846 definition->parent_expansion = NULL;
847 definition->is_expanding = false;
849 /* stop further expanding once we expanded a parameter used in a
851 if (definition == argument_expanding)
852 argument_expanding = NULL;
854 assert(current_expansion == definition);
855 current_expansion = parent;
858 static void grow_string_escaped(struct obstack *obst, const string_t *string, char const *delimiter)
860 char const *prefix = get_string_encoding_prefix(string->encoding);
861 obstack_printf(obst, "%s%s", prefix, delimiter);
862 size_t size = string->size;
863 const char *str = string->begin;
864 if (resolve_escape_sequences) {
865 obstack_grow(obst, str, size);
867 for (size_t i = 0; i < size; ++i) {
868 const char c = str[i];
869 if (c == '\\' || c == '"')
870 obstack_1grow(obst, '\\');
871 obstack_1grow(obst, c);
874 obstack_printf(obst, "%s", delimiter);
877 static void grow_token(struct obstack *obst, const token_t *token)
879 switch (token->kind) {
881 obstack_grow(obst, token->literal.string.begin, token->literal.string.size);
884 case T_STRING_LITERAL: {
885 char const *const delimiter = resolve_escape_sequences ? "\"" : "\\\"";
886 grow_string_escaped(obst, &token->literal.string, delimiter);
890 case T_CHARACTER_CONSTANT:
891 grow_string_escaped(obst, &token->literal.string, "'");
896 const char *str = token->base.symbol->string;
897 size_t len = strlen(str);
898 obstack_grow(obst, str, len);
904 static void stringify(const pp_definition_t *definition)
906 assert(obstack_object_size(&symbol_obstack) == 0);
908 size_t list_len = definition->list_len;
909 for (size_t p = 0; p < list_len; ++p) {
910 const saved_token_t *saved = &definition->token_list[p];
911 if (p > 0 && saved->had_whitespace)
912 obstack_1grow(&symbol_obstack, ' ');
913 grow_token(&symbol_obstack, &saved->token);
915 pp_token.kind = T_STRING_LITERAL;
916 pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
919 static inline void set_punctuator(token_kind_t const kind)
921 pp_token.kind = kind;
922 pp_token.base.symbol = token_symbols[kind];
925 static inline void set_digraph(token_kind_t const kind, symbol_t *const symbol)
927 pp_token.kind = kind;
928 pp_token.base.symbol = symbol;
932 * returns next final token from a preprocessor macro expansion
934 static bool expand_next(void)
936 if (current_expansion == NULL)
940 size_t pos = current_expansion->expand_pos;
941 if (pos >= current_expansion->list_len) {
942 finished_expanding(current_expansion);
943 /* it was the outermost expansion, parse pptoken normally */
944 if (current_expansion == NULL) {
949 const saved_token_t *saved = ¤t_expansion->token_list[pos++];
950 pp_token = saved->token;
951 if (pp_token.kind == '#') {
952 if (pos < current_expansion->list_len) {
953 const saved_token_t *next = ¤t_expansion->token_list[pos];
954 if (next->token.kind == T_MACRO_PARAMETER) {
955 pp_definition_t *def = next->token.macro_parameter.def;
956 assert(def != NULL && def->is_parameter);
963 if (current_expansion->expand_pos > 0)
964 info.had_whitespace = saved->had_whitespace;
965 current_expansion->expand_pos = pos;
966 pp_token.base.source_position = expansion_pos;
972 * Returns the next token kind found when continuing the current expansions
973 * without starting new sub-expansions.
975 static token_kind_t peek_expansion(void)
977 for (pp_definition_t *e = current_expansion; e; e = e->parent_expansion) {
978 if (e->expand_pos < e->list_len)
979 return e->token_list[e->expand_pos].token.kind;
984 static void skip_line_comment(void)
986 info.had_whitespace = true;
1003 static void skip_multiline_comment(void)
1005 info.had_whitespace = true;
1007 unsigned start_linenr = input.position.lineno;
1012 if (input.c == '*') {
1013 /* TODO: nested comment, warn here */
1018 if (input.c == '/') {
1019 if (input.position.lineno != input.output_line)
1020 info.whitespace_at_line_begin = input.position.colno;
1030 source_position_t source_position;
1031 source_position.input_name = pp_token.base.source_position.input_name;
1032 source_position.lineno = start_linenr;
1033 errorf(&source_position, "at end of file while looking for comment end");
1044 static bool skip_till_newline(bool stop_at_non_whitespace)
1056 if (input.c == '/') {
1058 skip_line_comment();
1060 } else if (input.c == '*') {
1062 skip_multiline_comment();
1074 if (stop_at_non_whitespace)
1083 static void skip_whitespace(void)
1089 ++info.whitespace_at_line_begin;
1090 info.had_whitespace = true;
1095 info.at_line_begin = true;
1096 info.had_whitespace = true;
1097 info.whitespace_at_line_begin = 0;
1102 if (input.c == '/') {
1104 skip_line_comment();
1106 } else if (input.c == '*') {
1108 skip_multiline_comment();
1122 static inline void eat_pp(pp_token_kind_t const kind)
1124 assert(pp_token.base.symbol->pp_ID == kind);
1129 static inline void eat_token(token_kind_t const kind)
1131 assert(pp_token.kind == kind);
1136 static void parse_symbol(void)
1138 assert(obstack_object_size(&symbol_obstack) == 0);
1143 obstack_1grow(&symbol_obstack, (char) input.c);
1152 case 'U': n = 8; goto universal;
1153 case 'u': n = 4; goto universal;
1155 if (!resolve_escape_sequences) {
1156 obstack_1grow(&symbol_obstack, '\\');
1157 obstack_1grow(&symbol_obstack, input.c);
1160 utf32 const v = parse_universal_char(n);
1161 if (!is_universal_char_valid_identifier(v)) {
1162 if (is_universal_char_valid(v)) {
1163 errorf(&input.position,
1164 "universal character \\%c%0*X is not valid in an identifier",
1165 n == 4 ? 'u' : 'U', (int)n, v);
1167 } else if (obstack_object_size(&symbol_obstack) == 0 && is_universal_char_invalid_identifier_start(v)) {
1168 errorf(&input.position,
1169 "universal character \\%c%0*X is not valid as start of an identifier",
1170 n == 4 ? 'u' : 'U', (int)n, v);
1171 } else if (resolve_escape_sequences) {
1172 obstack_grow_utf8(&symbol_obstack, v);
1190 obstack_1grow(&symbol_obstack, '\0');
1191 char *string = obstack_finish(&symbol_obstack);
1193 /* might be a wide string or character constant ( L"string"/L'c' ) */
1194 if (input.c == '"' && string[0] == 'L' && string[1] == '\0') {
1195 obstack_free(&symbol_obstack, string);
1196 parse_string_literal(STRING_ENCODING_WIDE);
1198 } else if (input.c == '\'' && string[0] == 'L' && string[1] == '\0') {
1199 obstack_free(&symbol_obstack, string);
1200 parse_character_constant(STRING_ENCODING_WIDE);
1204 symbol_t *symbol = symbol_table_insert(string);
1206 pp_token.kind = symbol->ID;
1207 pp_token.base.symbol = symbol;
1209 /* we can free the memory from symbol obstack if we already had an entry in
1210 * the symbol table */
1211 if (symbol->string != string) {
1212 obstack_free(&symbol_obstack, string);
1216 static void parse_number(void)
1218 obstack_1grow(&symbol_obstack, (char) input.c);
1225 case SYMBOL_CASES_WITHOUT_E_P:
1226 obstack_1grow(&symbol_obstack, (char) input.c);
1234 obstack_1grow(&symbol_obstack, (char) input.c);
1236 if (input.c == '+' || input.c == '-') {
1237 obstack_1grow(&symbol_obstack, (char) input.c);
1249 pp_token.kind = T_NUMBER;
1250 pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
1253 #define MAYBE_PROLOG \
1257 #define MAYBE(ch, kind) \
1260 set_punctuator(kind); \
1263 #define MAYBE_DIGRAPH(ch, kind, symbol) \
1266 set_digraph(kind, symbol); \
1269 #define ELSE_CODE(code) \
1274 #define ELSE(kind) ELSE_CODE(set_punctuator(kind); return;)
1276 /** identifies and returns the next preprocessing token contained in the
1277 * input stream. No macro expansion is performed. */
1278 static void next_input_token(void)
1280 if (next_info.had_whitespace) {
1282 next_info.had_whitespace = false;
1284 info.at_line_begin = false;
1285 info.had_whitespace = false;
1288 pp_token.base.source_position = input.position;
1289 pp_token.base.symbol = NULL;
1294 info.whitespace_at_line_begin++;
1295 info.had_whitespace = true;
1300 info.at_line_begin = true;
1301 info.had_whitespace = true;
1302 info.whitespace_at_line_begin = 0;
1314 parse_string_literal(STRING_ENCODING_CHAR);
1318 parse_character_constant(STRING_ENCODING_CHAR);
1340 MAYBE('.', T_DOTDOTDOT)
1344 set_punctuator('.');
1350 MAYBE('&', T_ANDAND)
1351 MAYBE('=', T_ANDEQUAL)
1355 MAYBE('=', T_ASTERISKEQUAL)
1359 MAYBE('+', T_PLUSPLUS)
1360 MAYBE('=', T_PLUSEQUAL)
1364 MAYBE('>', T_MINUSGREATER)
1365 MAYBE('-', T_MINUSMINUS)
1366 MAYBE('=', T_MINUSEQUAL)
1370 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
1374 MAYBE('=', T_SLASHEQUAL)
1377 skip_multiline_comment();
1381 skip_line_comment();
1386 MAYBE_DIGRAPH('>', '}', symbol_percentgreater)
1387 MAYBE('=', T_PERCENTEQUAL)
1392 MAYBE_DIGRAPH(':', T_HASHHASH, symbol_percentcolonpercentcolon)
1396 goto digraph_percentcolon;
1399 digraph_percentcolon:
1400 set_digraph('#', symbol_percentcolon);
1406 MAYBE_DIGRAPH(':', '[', symbol_lesscolon)
1407 MAYBE_DIGRAPH('%', '{', symbol_lesspercent)
1408 MAYBE('=', T_LESSEQUAL)
1411 MAYBE('=', T_LESSLESSEQUAL)
1416 MAYBE('=', T_GREATEREQUAL)
1419 MAYBE('=', T_GREATERGREATEREQUAL)
1420 ELSE(T_GREATERGREATER)
1424 MAYBE('=', T_CARETEQUAL)
1428 MAYBE('=', T_PIPEEQUAL)
1429 MAYBE('|', T_PIPEPIPE)
1433 MAYBE_DIGRAPH('>', ']', symbol_colongreater)
1435 if (c_mode & _CXX) {
1437 set_punctuator(T_COLONCOLON);
1444 MAYBE('=', T_EQUALEQUAL)
1448 MAYBE('#', T_HASHHASH)
1461 set_punctuator(input.c);
1466 if (input_stack != NULL) {
1467 fclose(close_pp_input());
1468 pop_restore_input();
1471 if (input.c == (utf32)EOF)
1472 --input.position.lineno;
1473 print_line_directive(&input.position, "2");
1476 info.at_line_begin = true;
1477 set_punctuator(T_EOF);
1483 int next_c = input.c;
1486 if (next_c == 'U' || next_c == 'u') {
1493 if (error_on_unknown_chars) {
1494 errorf(&pp_token.base.source_position,
1495 "unknown character '%lc' found\n", input.c);
1499 assert(obstack_object_size(&symbol_obstack) == 0);
1500 obstack_grow_utf8(&symbol_obstack, input.c);
1501 obstack_1grow(&symbol_obstack, '\0');
1502 char *const string = obstack_finish(&symbol_obstack);
1503 symbol_t *const symbol = symbol_table_insert(string);
1504 if (symbol->string != string)
1505 obstack_free(&symbol_obstack, string);
1507 pp_token.kind = T_UNKNOWN_CHAR;
1508 pp_token.base.symbol = symbol;
1515 static void print_quoted_string(const char *const string)
1518 for (const char *c = string; *c != 0; ++c) {
1520 case '"': fputs("\\\"", out); break;
1521 case '\\': fputs("\\\\", out); break;
1522 case '\a': fputs("\\a", out); break;
1523 case '\b': fputs("\\b", out); break;
1524 case '\f': fputs("\\f", out); break;
1525 case '\n': fputs("\\n", out); break;
1526 case '\r': fputs("\\r", out); break;
1527 case '\t': fputs("\\t", out); break;
1528 case '\v': fputs("\\v", out); break;
1529 case '\?': fputs("\\?", out); break;
1532 fprintf(out, "\\%03o", (unsigned)*c);
1542 static void print_line_directive(const source_position_t *pos, const char *add)
1547 fprintf(out, "# %u ", pos->lineno);
1548 print_quoted_string(pos->input_name);
1553 if (pos->is_system_header) {
1557 printed_input_name = pos->input_name;
1558 input.output_line = pos->lineno-1;
1561 static bool emit_newlines(void)
1566 unsigned delta = pp_token.base.source_position.lineno - input.output_line;
1572 print_line_directive(&pp_token.base.source_position, NULL);
1575 for (unsigned i = 0; i < delta; ++i) {
1579 input.output_line = pp_token.base.source_position.lineno;
1581 unsigned whitespace = info.whitespace_at_line_begin;
1582 /* make sure there is at least 1 whitespace before a (macro-expanded)
1583 * '#' at line begin. I'm not sure why this is good, but gcc does it. */
1584 if (pp_token.kind == '#' && whitespace == 0)
1586 for (unsigned i = 0; i < whitespace; ++i)
1592 void set_preprocessor_output(FILE *output)
1596 error_on_unknown_chars = false;
1597 resolve_escape_sequences = false;
1599 error_on_unknown_chars = true;
1600 resolve_escape_sequences = true;
1604 void emit_pp_token(void)
1606 if (!emit_newlines() &&
1607 (info.had_whitespace || tokens_would_paste(last_token, pp_token.kind)))
1610 switch (pp_token.kind) {
1612 fputs(pp_token.literal.string.begin, out);
1615 case T_STRING_LITERAL:
1616 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1618 fputs(pp_token.literal.string.begin, out);
1622 case T_CHARACTER_CONSTANT:
1623 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1625 fputs(pp_token.literal.string.begin, out);
1629 case T_MACRO_PARAMETER:
1630 panic("macro parameter not expanded");
1633 fputs(pp_token.base.symbol->string, out);
1636 last_token = pp_token.kind;
1639 static void eat_pp_directive(void)
1641 while (!info.at_line_begin) {
1646 static bool strings_equal(const string_t *string1, const string_t *string2)
1648 size_t size = string1->size;
1649 if (size != string2->size)
1652 const char *c1 = string1->begin;
1653 const char *c2 = string2->begin;
1654 for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1661 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1663 if (token1->kind != token2->kind)
1666 switch (token1->kind) {
1668 case T_CHARACTER_CONSTANT:
1669 case T_STRING_LITERAL:
1670 return strings_equal(&token1->literal.string, &token2->literal.string);
1672 case T_MACRO_PARAMETER:
1673 return token1->macro_parameter.def->symbol
1674 == token2->macro_parameter.def->symbol;
1677 return token1->base.symbol == token2->base.symbol;
1681 static bool pp_definitions_equal(const pp_definition_t *definition1,
1682 const pp_definition_t *definition2)
1684 if (definition1->list_len != definition2->list_len)
1687 size_t len = definition1->list_len;
1688 const saved_token_t *t1 = definition1->token_list;
1689 const saved_token_t *t2 = definition2->token_list;
1690 for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1691 if (!pp_tokens_equal(&t1->token, &t2->token))
1693 if (t1->had_whitespace != t2->had_whitespace)
1699 static void missing_macro_param_error(void)
1701 errorf(&pp_token.base.source_position,
1702 "'#' is not followed by a macro parameter");
1705 static bool is_defineable_token(char const *const context)
1707 if (info.at_line_begin) {
1708 errorf(&pp_token.base.source_position, "unexpected end of line after %s", context);
1711 symbol_t *const symbol = pp_token.base.symbol;
1715 if (pp_token.kind != T_IDENTIFIER) {
1716 switch (symbol->string[0]) {
1723 errorf(&pp_token.base.source_position, "expected identifier after %s, got %K", context, &pp_token);
1728 /* TODO turn this into a flag in pp_def. */
1729 switch (symbol->pp_ID) {
1732 errorf(&pp_token.base.source_position, "%K cannot be used as macro name in %s", &pp_token, context);
1740 static void parse_define_directive(void)
1748 assert(obstack_object_size(&pp_obstack) == 0);
1750 if (!is_defineable_token("#define"))
1752 symbol_t *const symbol = pp_token.base.symbol;
1754 pp_definition_t *new_definition
1755 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1756 memset(new_definition, 0, sizeof(new_definition[0]));
1757 new_definition->symbol = symbol;
1758 new_definition->source_position = input.position;
1760 /* this is probably the only place where spaces are significant in the
1761 * lexer (except for the fact that they separate tokens). #define b(x)
1762 * is something else than #define b (x) */
1763 if (input.c == '(') {
1768 switch (pp_token.kind) {
1770 new_definition->is_variadic = true;
1771 eat_token(T_DOTDOTDOT);
1772 if (pp_token.kind != ')') {
1773 errorf(&input.position,
1774 "'...' not at end of macro argument list");
1779 case T_IDENTIFIER: {
1780 pp_definition_t parameter;
1781 memset(¶meter, 0, sizeof(parameter));
1782 parameter.source_position = pp_token.base.source_position;
1783 parameter.symbol = pp_token.base.symbol;
1784 parameter.is_parameter = true;
1785 obstack_grow(&pp_obstack, ¶meter, sizeof(parameter));
1786 eat_token(T_IDENTIFIER);
1788 if (pp_token.kind == ',') {
1793 if (pp_token.kind != ')') {
1794 errorf(&pp_token.base.source_position,
1795 "expected ',' or ')' after identifier, got %K",
1804 goto finish_argument_list;
1807 errorf(&pp_token.base.source_position,
1808 "expected identifier, '...' or ')' in #define argument list, got %K",
1814 finish_argument_list:
1815 new_definition->has_parameters = true;
1816 size_t size = obstack_object_size(&pp_obstack);
1817 new_definition->n_parameters
1818 = size / sizeof(new_definition->parameters[0]);
1819 new_definition->parameters = obstack_finish(&pp_obstack);
1820 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1821 pp_definition_t *param = &new_definition->parameters[i];
1822 symbol_t *symbol = param->symbol;
1823 pp_definition_t *previous = symbol->pp_definition;
1824 if (previous != NULL
1825 && previous->function_definition == new_definition) {
1826 errorf(¶m->source_position,
1827 "duplicate macro parameter '%Y'", symbol);
1828 param->symbol = sym_anonymous;
1831 param->parent_expansion = previous;
1832 param->function_definition = new_definition;
1833 symbol->pp_definition = param;
1839 /* construct token list */
1840 assert(obstack_object_size(&pp_obstack) == 0);
1841 bool next_must_be_param = false;
1842 while (!info.at_line_begin) {
1843 if (pp_token.kind == T_IDENTIFIER) {
1844 const symbol_t *symbol = pp_token.base.symbol;
1845 pp_definition_t *definition = symbol->pp_definition;
1846 if (definition != NULL
1847 && definition->function_definition == new_definition) {
1848 pp_token.kind = T_MACRO_PARAMETER;
1849 pp_token.macro_parameter.def = definition;
1852 if (next_must_be_param && pp_token.kind != T_MACRO_PARAMETER) {
1853 missing_macro_param_error();
1855 saved_token_t saved_token;
1856 saved_token.token = pp_token;
1857 saved_token.had_whitespace = info.had_whitespace;
1858 obstack_grow(&pp_obstack, &saved_token, sizeof(saved_token));
1860 = new_definition->has_parameters && pp_token.kind == '#';
1863 if (next_must_be_param)
1864 missing_macro_param_error();
1866 new_definition->list_len = obstack_object_size(&pp_obstack)
1867 / sizeof(new_definition->token_list[0]);
1868 new_definition->token_list = obstack_finish(&pp_obstack);
1870 if (new_definition->has_parameters) {
1871 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1872 pp_definition_t *param = &new_definition->parameters[i];
1873 symbol_t *symbol = param->symbol;
1874 if (symbol == sym_anonymous)
1876 assert(symbol->pp_definition == param);
1877 assert(param->function_definition == new_definition);
1878 symbol->pp_definition = param->parent_expansion;
1879 param->parent_expansion = NULL;
1883 pp_definition_t *old_definition = symbol->pp_definition;
1884 if (old_definition != NULL) {
1885 if (!pp_definitions_equal(old_definition, new_definition)) {
1886 warningf(WARN_OTHER, &input.position, "multiple definition of macro '%Y' (first defined %P)", symbol, &old_definition->source_position);
1888 /* reuse the old definition */
1889 obstack_free(&pp_obstack, new_definition);
1890 new_definition = old_definition;
1894 symbol->pp_definition = new_definition;
1898 if (obstack_object_size(&pp_obstack) > 0) {
1899 char *ptr = obstack_finish(&pp_obstack);
1900 obstack_free(&pp_obstack, ptr);
1905 static void parse_undef_directive(void)
1913 if (!is_defineable_token("#undef")) {
1918 pp_token.base.symbol->pp_definition = NULL;
1921 if (!info.at_line_begin) {
1922 warningf(WARN_OTHER, &input.position, "extra tokens at end of #undef directive");
1927 /** behind an #include we can have the special headername lexems.
1928 * They're only allowed behind an #include so they're not recognized
1929 * by the normal next_preprocessing_token. We handle them as a special
1931 static const char *parse_headername(bool *system_include)
1933 if (info.at_line_begin) {
1934 parse_error("expected headername after #include");
1938 /* check wether we have a "... or <... headername */
1939 source_position_t position = input.position;
1943 case '<': delimiter = '>'; *system_include = true; goto parse_name;
1944 case '"': delimiter = '"'; *system_include = false; goto parse_name;
1946 assert(obstack_object_size(&symbol_obstack) == 0);
1953 char *dummy = obstack_finish(&symbol_obstack);
1954 obstack_free(&symbol_obstack, dummy);
1956 errorf(&pp_token.base.source_position,
1957 "header name without closing '%c'", (char)delimiter);
1961 if (input.c == delimiter) {
1963 goto finish_headername;
1965 obstack_1grow(&symbol_obstack, (char)input.c);
1971 /* we should never be here */
1975 next_preprocessing_token();
1976 if (info.at_line_begin) {
1977 /* TODO: if we are already in the new line then we parsed more than
1978 * wanted. We reuse the token, but could produce following errors
1979 * misbehaviours... */
1980 goto error_invalid_input;
1982 if (pp_token.kind == T_STRING_LITERAL) {
1983 *system_include = false;
1984 return pp_token.literal.string.begin;
1985 } else if (pp_token.kind == '<') {
1986 *system_include = true;
1987 assert(obstack_object_size(&pp_obstack) == 0);
1989 next_preprocessing_token();
1990 if (info.at_line_begin) {
1991 /* TODO: we shouldn't have parsed/expanded something on the
1992 * next line yet... */
1993 char *dummy = obstack_finish(&pp_obstack);
1994 obstack_free(&pp_obstack, dummy);
1995 goto error_invalid_input;
1997 if (pp_token.kind == '>')
2000 saved_token_t saved;
2001 saved.token = pp_token;
2002 saved.had_whitespace = info.had_whitespace;
2003 obstack_grow(&pp_obstack, &saved, sizeof(saved));
2005 size_t size = obstack_object_size(&pp_obstack);
2006 assert(size % sizeof(saved_token_t) == 0);
2007 size_t n_tokens = size / sizeof(saved_token_t);
2008 saved_token_t *tokens = obstack_finish(&pp_obstack);
2009 assert(obstack_object_size(&symbol_obstack) == 0);
2010 for (size_t i = 0; i < n_tokens; ++i) {
2011 const saved_token_t *saved = &tokens[i];
2012 if (i > 0 && saved->had_whitespace)
2013 obstack_1grow(&symbol_obstack, ' ');
2014 grow_token(&symbol_obstack, &saved->token);
2016 obstack_free(&pp_obstack, tokens);
2017 goto finish_headername;
2019 error_invalid_input:
2021 char *dummy = obstack_finish(&symbol_obstack);
2022 obstack_free(&symbol_obstack, dummy);
2025 errorf(&pp_token.base.source_position,
2026 "expected \"FILENAME\" or <FILENAME> after #include");
2032 obstack_1grow(&symbol_obstack, '\0');
2033 char *const headername = obstack_finish(&symbol_obstack);
2034 const char *identified = identify_string(headername);
2035 pp_token.base.source_position = position;
2039 static bool do_include(bool const bracket_include, bool const include_next, char const *const headername)
2041 size_t const headername_len = strlen(headername);
2042 searchpath_entry_t *entry;
2044 entry = input.path ? input.path->next
2045 : bracket_include ? bracket_searchpath.first
2046 : quote_searchpath.first;
2048 if (!bracket_include) {
2049 /* put dirname of current input on obstack */
2050 const char *filename = input.position.input_name;
2051 const char *last_slash = strrchr(filename, '/');
2052 const char *full_name;
2053 if (last_slash != NULL) {
2054 size_t len = last_slash - filename;
2055 obstack_grow(&symbol_obstack, filename, len + 1);
2056 obstack_grow0(&symbol_obstack, headername, headername_len);
2057 char *complete_path = obstack_finish(&symbol_obstack);
2058 full_name = identify_string(complete_path);
2060 full_name = headername;
2063 FILE *file = fopen(full_name, "r");
2065 switch_pp_input(file, full_name, NULL, false);
2068 entry = quote_searchpath.first;
2070 entry = bracket_searchpath.first;
2074 assert(obstack_object_size(&symbol_obstack) == 0);
2075 /* check searchpath */
2076 for (; entry; entry = entry->next) {
2077 const char *path = entry->path;
2078 size_t len = strlen(path);
2079 obstack_grow(&symbol_obstack, path, len);
2080 if (path[len-1] != '/')
2081 obstack_1grow(&symbol_obstack, '/');
2082 obstack_grow(&symbol_obstack, headername, headername_len+1);
2084 char *complete_path = obstack_finish(&symbol_obstack);
2085 FILE *file = fopen(complete_path, "r");
2087 const char *filename = identify_string(complete_path);
2088 switch_pp_input(file, filename, entry, entry->is_system_path);
2091 obstack_free(&symbol_obstack, complete_path);
2098 static void parse_include_directive(bool const include_next)
2105 /* do not eat the TP_include, since it would already parse the next token
2106 * which needs special handling here. */
2107 skip_till_newline(true);
2108 bool system_include;
2109 const char *headername = parse_headername(&system_include);
2110 if (headername == NULL) {
2115 bool had_nonwhitespace = skip_till_newline(false);
2116 if (had_nonwhitespace) {
2117 warningf(WARN_OTHER, &input.position,
2118 "extra tokens at end of #include directive");
2121 if (n_inputs > INCLUDE_LIMIT) {
2122 errorf(&pp_token.base.source_position, "#include nested too deeply");
2129 info.whitespace_at_line_begin = 0;
2130 info.had_whitespace = false;
2131 info.at_line_begin = true;
2134 bool res = do_include(system_include, include_next, headername);
2138 errorf(&pp_token.base.source_position, "failed including '%s': %s", headername, strerror(errno));
2139 pop_restore_input();
2143 static pp_conditional_t *push_conditional(void)
2145 pp_conditional_t *conditional
2146 = obstack_alloc(&pp_obstack, sizeof(*conditional));
2147 memset(conditional, 0, sizeof(*conditional));
2149 conditional->parent = conditional_stack;
2150 conditional_stack = conditional;
2155 static void pop_conditional(void)
2157 assert(conditional_stack != NULL);
2158 conditional_stack = conditional_stack->parent;
2161 void check_unclosed_conditionals(void)
2163 while (conditional_stack != NULL) {
2164 pp_conditional_t *conditional = conditional_stack;
2166 if (conditional->in_else) {
2167 errorf(&conditional->source_position, "unterminated #else");
2169 errorf(&conditional->source_position, "unterminated condition");
2175 static void parse_ifdef_ifndef_directive(bool const is_ifdef)
2178 eat_pp(is_ifdef ? TP_ifdef : TP_ifndef);
2182 pp_conditional_t *conditional = push_conditional();
2183 conditional->source_position = pp_token.base.source_position;
2184 conditional->skip = true;
2188 if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
2189 errorf(&pp_token.base.source_position,
2190 "expected identifier after #%s, got %K",
2191 is_ifdef ? "ifdef" : "ifndef", &pp_token);
2194 /* just take the true case in the hope to avoid further errors */
2197 /* evaluate wether we are in true or false case */
2198 condition = (bool)pp_token.base.symbol->pp_definition == is_ifdef;
2199 eat_token(T_IDENTIFIER);
2201 if (!info.at_line_begin) {
2202 errorf(&pp_token.base.source_position,
2203 "extra tokens at end of #%s",
2204 is_ifdef ? "ifdef" : "ifndef");
2209 pp_conditional_t *conditional = push_conditional();
2210 conditional->source_position = pp_token.base.source_position;
2211 conditional->condition = condition;
2218 static void parse_else_directive(void)
2222 if (!info.at_line_begin) {
2224 warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #else");
2229 pp_conditional_t *conditional = conditional_stack;
2230 if (conditional == NULL) {
2231 errorf(&pp_token.base.source_position, "#else without prior #if");
2235 if (conditional->in_else) {
2236 errorf(&pp_token.base.source_position,
2237 "#else after #else (condition started %P)",
2238 &conditional->source_position);
2243 conditional->in_else = true;
2244 if (!conditional->skip) {
2245 skip_mode = conditional->condition;
2247 conditional->source_position = pp_token.base.source_position;
2250 static void parse_endif_directive(void)
2254 if (!info.at_line_begin) {
2256 warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #endif");
2261 pp_conditional_t *conditional = conditional_stack;
2262 if (conditional == NULL) {
2263 errorf(&pp_token.base.source_position, "#endif without prior #if");
2267 if (!conditional->skip) {
2273 typedef enum stdc_pragma_kind_t {
2277 STDC_CX_LIMITED_RANGE
2278 } stdc_pragma_kind_t;
2280 typedef enum stdc_pragma_value_kind_t {
2285 } stdc_pragma_value_kind_t;
2287 static void parse_pragma_directive(void)
2295 if (pp_token.kind != T_IDENTIFIER) {
2296 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2297 "expected identifier after #pragma");
2302 stdc_pragma_kind_t kind = STDC_UNKNOWN;
2303 if (pp_token.base.symbol->pp_ID == TP_STDC && c_mode & _C99) {
2307 switch (pp_token.base.symbol->pp_ID) {
2308 case TP_FP_CONTRACT: kind = STDC_FP_CONTRACT; break;
2309 case TP_FENV_ACCESS: kind = STDC_FENV_ACCESS; break;
2310 case TP_CX_LIMITED_RANGE: kind = STDC_CX_LIMITED_RANGE; break;
2313 if (kind != STDC_UNKNOWN) {
2315 stdc_pragma_value_kind_t value;
2316 switch (pp_token.base.symbol->pp_ID) {
2317 case TP_ON: value = STDC_VALUE_ON; break;
2318 case TP_OFF: value = STDC_VALUE_OFF; break;
2319 case TP_DEFAULT: value = STDC_VALUE_DEFAULT; break;
2320 default: value = STDC_VALUE_UNKNOWN; break;
2322 if (value == STDC_VALUE_UNKNOWN) {
2323 kind = STDC_UNKNOWN;
2324 errorf(&pp_token.base.source_position, "bad STDC pragma argument");
2329 if (kind == STDC_UNKNOWN) {
2330 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2331 "encountered unknown #pragma");
2335 static void parse_line_directive(void)
2337 if (pp_token.kind != T_NUMBER) {
2339 parse_error("expected integer");
2342 long const line = strtol(pp_token.literal.string.begin, &end, 0);
2344 /* use offset -1 as this is about the next line */
2345 input.position.lineno = line - 1;
2346 /* force output of line */
2347 input.output_line = input.position.lineno - 20;
2350 errorf(&input.position, "'%S' is not a valid line number",
2351 &pp_token.literal.string);
2355 if (info.at_line_begin)
2358 if (pp_token.kind == T_STRING_LITERAL
2359 && pp_token.literal.string.encoding == STRING_ENCODING_CHAR) {
2360 input.position.input_name = pp_token.literal.string.begin;
2361 input.position.is_system_header = false;
2364 /* attempt to parse numeric flags as outputted by gcc preprocessor */
2365 while (!info.at_line_begin && pp_token.kind == T_NUMBER) {
2367 * 1 - indicates start of a new file
2368 * 2 - indicates return from a file
2369 * 3 - indicates system header
2370 * 4 - indicates implicit extern "C" in C++ mode
2372 * currently we're only interested in "3"
2374 if (streq(pp_token.literal.string.begin, "3")) {
2375 input.position.is_system_header = true;
2384 static void parse_error_directive(void)
2391 bool const old_resolve_escape_sequences = resolve_escape_sequences;
2392 resolve_escape_sequences = false;
2394 source_position_t const pos = pp_token.base.source_position;
2396 if (info.had_whitespace && obstack_object_size(&pp_obstack) != 0)
2397 obstack_1grow(&pp_obstack, ' ');
2399 switch (pp_token.kind) {
2401 string_t const *const str = &pp_token.literal.string;
2402 obstack_grow(&pp_obstack, str->begin, str->size);
2408 case T_STRING_LITERAL: delim = '"'; goto string;
2409 case T_CHARACTER_CONSTANT: delim = '\''; goto string;
2411 string_t const *const str = &pp_token.literal.string;
2412 char const *const enc = get_string_encoding_prefix(str->encoding);
2413 obstack_printf(&pp_obstack, "%s%c%s%c", enc, delim, str->begin, delim);
2418 char const *const str = pp_token.base.symbol->string;
2419 obstack_grow(&pp_obstack, str, strlen(str));
2425 } while (!info.at_line_begin);
2427 resolve_escape_sequences = old_resolve_escape_sequences;
2429 obstack_1grow(&pp_obstack, '\0');
2430 char *const str = obstack_finish(&pp_obstack);
2431 errorf(&pos, "#%s", str);
2432 obstack_free(&pp_obstack, str);
2435 static void parse_preprocessing_directive(void)
2439 if (info.at_line_begin) {
2440 /* empty directive */
2444 if (pp_token.base.symbol) {
2445 switch (pp_token.base.symbol->pp_ID) {
2446 case TP_define: parse_define_directive(); break;
2447 case TP_else: parse_else_directive(); break;
2448 case TP_endif: parse_endif_directive(); break;
2449 case TP_error: parse_error_directive(); break;
2450 case TP_ifdef: parse_ifdef_ifndef_directive(true); break;
2451 case TP_ifndef: parse_ifdef_ifndef_directive(false); break;
2452 case TP_include: parse_include_directive(false); break;
2453 case TP_include_next: parse_include_directive(true); break;
2454 case TP_line: next_input_token(); goto line_directive;
2455 case TP_pragma: parse_pragma_directive(); break;
2456 case TP_undef: parse_undef_directive(); break;
2459 } else if (pp_token.kind == T_NUMBER) {
2461 parse_line_directive();
2465 errorf(&pp_token.base.source_position, "invalid preprocessing directive #%K", &pp_token);
2470 assert(info.at_line_begin);
2473 static void finish_current_argument(void)
2475 if (current_argument == NULL)
2477 size_t size = obstack_object_size(&pp_obstack);
2478 current_argument->list_len = size/sizeof(current_argument->token_list[0]);
2479 current_argument->token_list = obstack_finish(&pp_obstack);
2482 void next_preprocessing_token(void)
2485 if (!expand_next()) {
2488 while (pp_token.kind == '#' && info.at_line_begin) {
2489 parse_preprocessing_directive();
2491 } while (skip_mode && pp_token.kind != T_EOF);
2494 const token_kind_t kind = pp_token.kind;
2495 if (current_call == NULL || argument_expanding != NULL) {
2496 symbol_t *const symbol = pp_token.base.symbol;
2498 if (kind == T_MACRO_PARAMETER) {
2499 assert(current_expansion != NULL);
2500 start_expanding(pp_token.macro_parameter.def);
2504 pp_definition_t *const pp_definition = symbol->pp_definition;
2505 if (pp_definition != NULL && !pp_definition->is_expanding) {
2506 if (pp_definition->has_parameters) {
2508 /* check if next token is a '(' */
2509 whitespace_info_t old_info = info;
2510 token_kind_t next_token = peek_expansion();
2511 if (next_token == T_EOF) {
2512 info.at_line_begin = false;
2513 info.had_whitespace = false;
2515 if (input.c == '(') {
2520 if (next_token == '(') {
2521 if (current_expansion == NULL)
2522 expansion_pos = pp_token.base.source_position;
2523 next_preprocessing_token();
2524 assert(pp_token.kind == '(');
2526 pp_definition->parent_expansion = current_expansion;
2527 current_call = pp_definition;
2528 current_call->expand_pos = 0;
2529 current_call->expand_info = old_info;
2530 if (current_call->n_parameters > 0) {
2531 current_argument = ¤t_call->parameters[0];
2532 assert(argument_brace_count == 0);
2536 /* skip_whitespaces() skipped newlines and whitespace,
2537 * remember results for next token */
2543 if (current_expansion == NULL)
2544 expansion_pos = pp_token.base.source_position;
2545 start_expanding(pp_definition);
2552 if (current_call != NULL) {
2553 /* current_call != NULL */
2555 ++argument_brace_count;
2556 } else if (kind == ')') {
2557 if (argument_brace_count > 0) {
2558 --argument_brace_count;
2560 finish_current_argument();
2561 assert(kind == ')');
2562 start_expanding(current_call);
2563 info = current_call->expand_info;
2564 current_call = NULL;
2565 current_argument = NULL;
2568 } else if (kind == ',' && argument_brace_count == 0) {
2569 finish_current_argument();
2570 current_call->expand_pos++;
2571 if (current_call->expand_pos >= current_call->n_parameters) {
2572 errorf(&pp_token.base.source_position,
2573 "too many arguments passed for macro '%Y'",
2574 current_call->symbol);
2575 current_argument = NULL;
2578 = ¤t_call->parameters[current_call->expand_pos];
2581 } else if (kind == T_MACRO_PARAMETER) {
2582 /* parameters have to be fully expanded before being used as
2583 * parameters for another macro-call */
2584 assert(current_expansion != NULL);
2585 pp_definition_t *argument = pp_token.macro_parameter.def;
2586 argument_expanding = argument;
2587 start_expanding(argument);
2589 } else if (kind == T_EOF) {
2590 errorf(&expansion_pos,
2591 "reached end of file while parsing arguments for '%Y'",
2592 current_call->symbol);
2595 if (current_argument != NULL) {
2596 saved_token_t saved;
2597 saved.token = pp_token;
2598 saved.had_whitespace = info.had_whitespace;
2599 obstack_grow(&pp_obstack, &saved, sizeof(saved));
2605 void append_include_path(searchpath_t *paths, const char *path)
2607 searchpath_entry_t *entry = OALLOCZ(&config_obstack, searchpath_entry_t);
2609 entry->is_system_path = paths->is_system_path;
2611 *paths->anchor = entry;
2612 paths->anchor = &entry->next;
2615 static void append_env_paths(searchpath_t *paths, const char *envvar)
2617 const char *val = getenv(envvar);
2618 if (val != NULL && *val != '\0') {
2619 const char *begin = val;
2623 while (*c != '\0' && *c != ':')
2626 size_t len = c-begin;
2628 /* use "." for gcc compatibility (Matze: I would expect that
2629 * nothing happens for an empty entry...) */
2630 append_include_path(paths, ".");
2632 char *const string = obstack_copy0(&config_obstack, begin, len);
2633 append_include_path(paths, string);
2640 } while(*c != '\0');
2644 static void append_searchpath(searchpath_t *path, const searchpath_t *append)
2646 *path->anchor = append->first;
2649 static void setup_include_path(void)
2651 /* built-in paths */
2652 append_include_path(&system_searchpath, "/usr/include");
2654 /* parse environment variable */
2655 append_env_paths(&bracket_searchpath, "CPATH");
2656 append_env_paths(&system_searchpath,
2657 c_mode & _CXX ? "CPLUS_INCLUDE_PATH" : "C_INCLUDE_PATH");
2659 /* append system search path to bracket searchpath */
2660 append_searchpath(&system_searchpath, &after_searchpath);
2661 append_searchpath(&bracket_searchpath, &system_searchpath);
2662 append_searchpath("e_searchpath, &bracket_searchpath);
2665 static void input_error(unsigned const delta_lines, unsigned const delta_cols, char const *const message)
2667 source_position_t pos = pp_token.base.source_position;
2668 pos.lineno += delta_lines;
2669 pos.colno += delta_cols;
2670 errorf(&pos, "%s", message);
2673 void init_include_paths(void)
2675 obstack_init(&config_obstack);
2678 void init_preprocessor(void)
2682 obstack_init(&pp_obstack);
2683 obstack_init(&input_obstack);
2684 strset_init(&stringset);
2686 setup_include_path();
2688 set_input_error_callback(input_error);
2691 void exit_preprocessor(void)
2693 obstack_free(&input_obstack, NULL);
2694 obstack_free(&pp_obstack, NULL);
2695 obstack_free(&config_obstack, NULL);
2697 strset_destroy(&stringset);
2700 int pptest_main(int argc, char **argv);
2701 int pptest_main(int argc, char **argv)
2703 init_symbol_table();
2704 init_include_paths();
2705 init_preprocessor();
2708 error_on_unknown_chars = false;
2709 resolve_escape_sequences = false;
2711 /* simplistic commandline parser */
2712 const char *filename = NULL;
2713 const char *output = NULL;
2714 for (int i = 1; i < argc; ++i) {
2715 const char *opt = argv[i];
2716 if (streq(opt, "-I")) {
2717 append_include_path(&bracket_searchpath, argv[++i]);
2719 } else if (streq(opt, "-E")) {
2721 } else if (streq(opt, "-o")) {
2724 } else if (opt[0] == '-') {
2725 fprintf(stderr, "Unknown option '%s'\n", opt);
2727 if (filename != NULL)
2728 fprintf(stderr, "Multiple inputs not supported\n");
2732 if (filename == NULL) {
2733 fprintf(stderr, "No input specified\n");
2737 if (output == NULL) {
2740 out = fopen(output, "w");
2742 fprintf(stderr, "Couldn't open output '%s'\n", output);
2747 /* just here for gcc compatibility */
2748 fprintf(out, "# 1 \"%s\"\n", filename);
2749 fprintf(out, "# 1 \"<built-in>\"\n");
2750 fprintf(out, "# 1 \"<command-line>\"\n");
2752 FILE *file = fopen(filename, "r");
2754 fprintf(stderr, "Couldn't open input '%s'\n", filename);
2757 switch_pp_input(file, filename, NULL, false);
2760 next_preprocessing_token();
2761 if (pp_token.kind == T_EOF)
2767 check_unclosed_conditionals();
2768 fclose(close_pp_input());
2773 exit_preprocessor();
2774 exit_symbol_table();