9 #include "preprocessor.h"
13 #include "adt/error.h"
14 #include "adt/strutil.h"
15 #include "adt/strset.h"
16 #include "lang_features.h"
17 #include "diagnostic.h"
18 #include "string_rep.h"
22 #define INCLUDE_LIMIT 199 /* 199 is for gcc "compatibility" */
24 typedef struct saved_token_t {
29 typedef struct whitespace_info_t {
30 /** current token had whitespace in front of it */
32 /** current token is at the beginning of a line.
33 * => a "#" at line begin starts a preprocessing directive. */
35 /** number of spaces before the first token in a line */
36 unsigned whitespace_at_line_begin;
39 struct pp_definition_t {
42 pp_definition_t *parent_expansion;
44 whitespace_info_t expand_info;
46 bool is_expanding : 1;
47 bool has_parameters : 1;
48 bool is_parameter : 1;
49 pp_definition_t *function_definition;
51 pp_definition_t *parameters;
55 saved_token_t *token_list;
58 typedef struct pp_conditional_t pp_conditional_t;
59 struct pp_conditional_t {
63 /** conditional in skip mode (then+else gets skipped) */
65 pp_conditional_t *parent;
68 typedef struct pp_input_t pp_input_t;
73 utf32 buf[1024+MAX_PUTBACK];
79 searchpath_entry_t *path;
82 struct searchpath_entry_t {
84 searchpath_entry_t *next;
88 static pp_input_t input;
90 static pp_input_t *input_stack;
91 static unsigned n_inputs;
92 static struct obstack input_obstack;
94 static pp_conditional_t *conditional_stack;
97 bool allow_dollar_in_symbol = true;
98 static bool resolve_escape_sequences = true;
99 static bool error_on_unknown_chars = true;
100 static bool skip_mode;
102 static struct obstack pp_obstack;
103 static struct obstack config_obstack;
104 static const char *printed_input_name = NULL;
105 static position_t expansion_pos;
106 static pp_definition_t *current_expansion = NULL;
107 static pp_definition_t *current_call = NULL;
108 static pp_definition_t *current_argument = NULL;
109 static pp_definition_t *argument_expanding = NULL;
110 static unsigned argument_brace_count;
111 static strset_t stringset;
112 static token_kind_t last_token;
114 struct searchpath_t {
115 searchpath_entry_t *first;
116 searchpath_entry_t **anchor;
120 searchpath_t bracket_searchpath = { NULL, &bracket_searchpath.first, false };
121 searchpath_t quote_searchpath = { NULL, "e_searchpath.first, false };
122 searchpath_t system_searchpath = { NULL, &system_searchpath.first, true };
123 searchpath_t after_searchpath = { NULL, &after_searchpath.first, true };
125 static whitespace_info_t next_info; /* valid if had_whitespace is true */
126 static whitespace_info_t info;
128 static inline void next_char(void);
129 static void next_input_token(void);
130 static void print_line_directive(const position_t *pos, const char *add);
132 static symbol_t *symbol_colongreater;
133 static symbol_t *symbol_lesscolon;
134 static symbol_t *symbol_lesspercent;
135 static symbol_t *symbol_percentcolon;
136 static symbol_t *symbol_percentcolonpercentcolon;
137 static symbol_t *symbol_percentgreater;
139 static symbol_t *symbol_L;
140 static symbol_t *symbol_U;
141 static symbol_t *symbol_u;
142 static symbol_t *symbol_u8;
144 static void init_symbols(void)
146 symbol_colongreater = symbol_table_insert(":>");
147 symbol_lesscolon = symbol_table_insert("<:");
148 symbol_lesspercent = symbol_table_insert("<%");
149 symbol_percentcolon = symbol_table_insert("%:");
150 symbol_percentcolonpercentcolon = symbol_table_insert("%:%:");
151 symbol_percentgreater = symbol_table_insert("%>");
153 symbol_L = symbol_table_insert("L");
154 symbol_U = symbol_table_insert("U");
155 symbol_u = symbol_table_insert("u");
156 symbol_u8 = symbol_table_insert("u8");
159 void switch_pp_input(FILE *const file, char const *const filename, searchpath_entry_t *const path, bool const is_system_header)
162 input.input = input_from_stream(file, NULL);
165 input.output_line = 0;
166 input.pos.input_name = filename;
167 input.pos.lineno = 1;
168 input.pos.is_system_header = is_system_header;
171 /* indicate that we're at a new input */
172 print_line_directive(&input.pos, input_stack != NULL ? "1" : NULL);
174 /* place a virtual '\n' so we realize we're at line begin */
175 input.pos.lineno = 0;
179 FILE *close_pp_input(void)
181 input_free(input.input);
183 FILE* const file = input.file;
195 static void push_input(void)
197 pp_input_t *const saved_input = obstack_copy(&input_obstack, &input, sizeof(input));
199 /* adjust buffer positions */
200 if (input.bufpos != NULL)
201 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
202 if (input.bufend != NULL)
203 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
205 saved_input->parent = input_stack;
206 input_stack = saved_input;
210 static void pop_restore_input(void)
212 assert(n_inputs > 0);
213 assert(input_stack != NULL);
215 pp_input_t *saved_input = input_stack;
217 memcpy(&input, saved_input, sizeof(input));
220 /* adjust buffer positions */
221 if (saved_input->bufpos != NULL)
222 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
223 if (saved_input->bufend != NULL)
224 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
226 input_stack = saved_input->parent;
227 obstack_free(&input_obstack, saved_input);
232 * Prints a parse error message at the current token.
234 * @param msg the error message
236 static void parse_error(const char *msg)
238 errorf(&pp_token.base.pos, "%s", msg);
241 static inline void next_real_char(void)
243 assert(input.bufpos <= input.bufend);
244 if (input.bufpos >= input.bufend) {
245 size_t const n = decode(input.input, input.buf + MAX_PUTBACK, lengthof(input.buf) - MAX_PUTBACK);
250 input.bufpos = input.buf + MAX_PUTBACK;
251 input.bufend = input.bufpos + n;
253 input.c = *input.bufpos++;
258 * Put a character back into the buffer.
260 * @param pc the character to put back
262 static inline void put_back(utf32 const pc)
264 assert(input.bufpos > input.buf);
265 *(--input.bufpos - input.buf + input.buf) = (char) pc;
272 if (input.c == '\n') { \
276 ++input.pos.lineno; \
277 input.pos.colno = 1; \
279 newline // Let it look like an ordinary case label.
281 #define eat(c_type) (assert(input.c == c_type), next_char())
283 static void maybe_concat_lines(void)
289 info.whitespace_at_line_begin = 0;
301 * Set c to the next input character, ie.
302 * after expanding trigraphs.
304 static inline void next_char(void)
308 /* filter trigraphs and concatenated lines */
309 if (UNLIKELY(input.c == '\\')) {
310 maybe_concat_lines();
311 goto end_of_next_char;
314 if (LIKELY(input.c != '?'))
315 goto end_of_next_char;
318 if (LIKELY(input.c != '?')) {
321 goto end_of_next_char;
326 case '=': input.c = '#'; break;
327 case '(': input.c = '['; break;
328 case '/': input.c = '\\'; maybe_concat_lines(); break;
329 case ')': input.c = ']'; break;
330 case '\'': input.c = '^'; break;
331 case '<': input.c = '{'; break;
332 case '!': input.c = '|'; break;
333 case '>': input.c = '}'; break;
334 case '-': input.c = '~'; break;
344 printf("nchar '%c'\n", input.c);
351 * Returns true if the given char is a octal digit.
353 * @param char the character to check
355 static inline bool is_octal_digit(int chr)
373 * Returns the value of a digit.
374 * The only portable way to do it ...
376 static int digit_value(int digit)
402 panic("wrong character given");
407 * Parses an octal character sequence.
409 * @param first_digit the already read first digit
411 static utf32 parse_octal_sequence(const utf32 first_digit)
413 assert(is_octal_digit(first_digit));
414 utf32 value = digit_value(first_digit);
415 if (!is_octal_digit(input.c)) return value;
416 value = 8 * value + digit_value(input.c);
418 if (!is_octal_digit(input.c)) return value;
419 value = 8 * value + digit_value(input.c);
426 * Parses a hex character sequence.
428 static utf32 parse_hex_sequence(void)
431 while (isxdigit(input.c)) {
432 value = 16 * value + digit_value(input.c);
438 static bool is_universal_char_valid(utf32 const v)
441 if (v < 0xA0U && v != 0x24 && v != 0x40 && v != 0x60)
443 if (0xD800 <= v && v <= 0xDFFF)
448 static utf32 parse_universal_char(unsigned const n_digits)
451 for (unsigned k = n_digits; k != 0; --k) {
452 if (isxdigit(input.c)) {
453 v = 16 * v + digit_value(input.c);
454 if (!resolve_escape_sequences)
455 obstack_1grow(&symbol_obstack, input.c);
459 "short universal character name, expected %u more digits",
464 if (!is_universal_char_valid(v)) {
466 "\\%c%0*X is not a valid universal character name",
467 n_digits == 4 ? 'u' : 'U', (int)n_digits, v);
472 static bool is_universal_char_valid_identifier_c99(utf32 const v)
474 static const utf32 single_chars[] = {
475 0x00AA, 0x00BA, 0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0,
476 0x1F59, 0x1F5B, 0x1F5D, 0x05BF, 0x09B2, 0x0A02, 0x0A5E, 0x0A74,
477 0x0A8D, 0x0AD0, 0x0AE0, 0x0B9C, 0x0CDE, 0x0E84, 0x0E8A, 0x0E8D,
478 0x0EA5, 0x0EA7, 0x0EC6, 0x0F00, 0x0F35, 0x0F37, 0x0F39, 0x0F97,
479 0x0FB9, 0x00B5, 0x00B7, 0x02BB, 0x037A, 0x0559, 0x093D, 0x0B3D,
480 0x1FBE, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128
483 static const utf32 ranges[][2] = {
484 {0x00C0, 0x00D6}, {0x00D8, 0x00F6}, {0x00F8, 0x01F5}, {0x01FA, 0x0217},
485 {0x0250, 0x02A8}, {0x1E00, 0x1E9B}, {0x1EA0, 0x1EF9}, {0x0388, 0x038A},
486 {0x038E, 0x03A1}, {0x03A3, 0x03CE}, {0x03D0, 0x03D6}, {0x03E2, 0x03F3},
487 {0x1F00, 0x1F15}, {0x1F18, 0x1F1D}, {0x1F20, 0x1F45}, {0x1F48, 0x1F4D},
488 {0x1F50, 0x1F57}, {0x1F5F, 0x1F7D}, {0x1F80, 0x1FB4}, {0x1FB6, 0x1FBC},
489 {0x1FC2, 0x1FC4}, {0x1FC6, 0x1FCC}, {0x1FD0, 0x1FD3}, {0x1FD6, 0x1FDB},
490 {0x1FE0, 0x1FEC}, {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFC}, {0x0401, 0x040C},
491 {0x040E, 0x044F}, {0x0451, 0x045C}, {0x045E, 0x0481}, {0x0490, 0x04C4},
492 {0x04C7, 0x04C8}, {0x04CB, 0x04CC}, {0x04D0, 0x04EB}, {0x04EE, 0x04F5},
493 {0x04F8, 0x04F9}, {0x0531, 0x0556}, {0x0561, 0x0587}, {0x05B0, 0x05B9},
494 {0x05BB, 0x05BD}, {0x05C1, 0x05C2}, {0x05D0, 0x05EA}, {0x05F0, 0x05F2},
495 {0x0621, 0x063A}, {0x0640, 0x0652}, {0x0670, 0x06B7}, {0x06BA, 0x06BE},
496 {0x06C0, 0x06CE}, {0x06D0, 0x06DC}, {0x06E5, 0x06E8}, {0x06EA, 0x06ED},
497 {0x0901, 0x0903}, {0x0905, 0x0939}, {0x093E, 0x094D}, {0x0950, 0x0952},
498 {0x0958, 0x0963}, {0x0981, 0x0983}, {0x0985, 0x098C}, {0x098F, 0x0990},
499 {0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B6, 0x09B9}, {0x09BE, 0x09C4},
500 {0x09C7, 0x09C8}, {0x09CB, 0x09CD}, {0x09DC, 0x09DD}, {0x09DF, 0x09E3},
501 {0x09F0, 0x09F1}, {0x0A05, 0x0A0A}, {0x0A0F, 0x0A10}, {0x0A13, 0x0A28},
502 {0x0A2A, 0x0A30}, {0x0A32, 0x0A33}, {0x0A35, 0x0A36}, {0x0A38, 0x0A39},
503 {0x0A3E, 0x0A42}, {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A59, 0x0A5C},
504 {0x0A81, 0x0A83}, {0x0A85, 0x0A8B}, {0x0A8F, 0x0A91}, {0x0A93, 0x0AA8},
505 {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3}, {0x0AB5, 0x0AB9}, {0x0ABD, 0x0AC5},
506 {0x0AC7, 0x0AC9}, {0x0ACB, 0x0ACD}, {0x0B01, 0x0B03}, {0x0B05, 0x0B0C},
507 {0x0B0F, 0x0B10}, {0x0B13, 0x0B28}, {0x0B2A, 0x0B30}, {0x0B32, 0x0B33},
508 {0x0B36, 0x0B39}, {0x0B3E, 0x0B43}, {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D},
509 {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B61}, {0x0B82, 0x0B83}, {0x0B85, 0x0B8A},
510 {0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, {0x0B99, 0x0B9A}, {0x0B9E, 0x0B9F},
511 {0x0BA3, 0x0BA4}, {0x0BA8, 0x0BAA}, {0x0BAE, 0x0BB5}, {0x0BB7, 0x0BB9},
512 {0x0BBE, 0x0BC2}, {0x0BC6, 0x0BC8}, {0x0BCA, 0x0BCD}, {0x0C01, 0x0C03},
513 {0x0C05, 0x0C0C}, {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C33},
514 {0x0C35, 0x0C39}, {0x0C3E, 0x0C44}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D},
515 {0x0C60, 0x0C61}, {0x0C82, 0x0C83}, {0x0C85, 0x0C8C}, {0x0C8E, 0x0C90},
516 {0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9}, {0x0CBE, 0x0CC4},
517 {0x0CC6, 0x0CC8}, {0x0CCA, 0x0CCD}, {0x0CE0, 0x0CE1}, {0x0D02, 0x0D03},
518 {0x0D05, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D28}, {0x0D2A, 0x0D39},
519 {0x0D3E, 0x0D43}, {0x0D46, 0x0D48}, {0x0D4A, 0x0D4D}, {0x0D60, 0x0D61},
520 {0x0E01, 0x0E3A}, {0x0E40, 0x0E5B}, {0x0E81, 0x0E82}, {0x0E87, 0x0E88},
521 {0x0E94, 0x0E97}, {0x0E99, 0x0E9F}, {0x0EA1, 0x0EA3}, {0x0EAA, 0x0EAB},
522 {0x0EAD, 0x0EAE}, {0x0EB0, 0x0EB9}, {0x0EBB, 0x0EBD}, {0x0EC0, 0x0EC4},
523 {0x0EC8, 0x0ECD}, {0x0EDC, 0x0EDD}, {0x0F18, 0x0F19}, {0x0F3E, 0x0F47},
524 {0x0F49, 0x0F69}, {0x0F71, 0x0F84}, {0x0F86, 0x0F8B}, {0x0F90, 0x0F95},
525 {0x0F99, 0x0FAD}, {0x0FB1, 0x0FB7}, {0x10A0, 0x10C5}, {0x10D0, 0x10F6},
526 {0x3041, 0x3093}, {0x309B, 0x309C}, {0x30A1, 0x30F6}, {0x30FB, 0x30FC},
527 {0x3105, 0x312C}, {0x4E00, 0x9FA5}, {0xAC00, 0xD7A3}, {0x0660, 0x0669},
528 {0x06F0, 0x06F9}, {0x0966, 0x096F}, {0x09E6, 0x09EF}, {0x0A66, 0x0A6F},
529 {0x0AE6, 0x0AEF}, {0x0B66, 0x0B6F}, {0x0BE7, 0x0BEF}, {0x0C66, 0x0C6F},
530 {0x0CE6, 0x0CEF}, {0x0D66, 0x0D6F}, {0x0E50, 0x0E59}, {0x0ED0, 0x0ED9},
531 {0x0F20, 0x0F33}, {0x02B0, 0x02B8}, {0x02BD, 0x02C1}, {0x02D0, 0x02D1},
532 {0x02E0, 0x02E4}, {0x203F, 0x2040}, {0x210A, 0x2113}, {0x2118, 0x211D},
533 {0x212A, 0x2131}, {0x2133, 0x2138}, {0x2160, 0x2182}, {0x3005, 0x3007},
536 for (size_t i = 0; i < sizeof(ranges)/sizeof(ranges[0]); ++i) {
537 if (ranges[i][0] <= v && v <= ranges[i][1])
540 for (size_t i = 0; i < sizeof(single_chars)/sizeof(single_chars[0]); ++i) {
541 if (v == single_chars[i])
547 static bool is_universal_char_valid_identifier_c11(utf32 const v)
550 if ( v == 0x000A8) return true;
551 if ( v == 0x000AA) return true;
552 if ( v == 0x000AD) return true;
553 if ( v == 0x000AF) return true;
554 if (0x000B2 <= v && v <= 0x000B5) return true;
555 if (0x000B7 <= v && v <= 0x000BA) return true;
556 if (0x000BC <= v && v <= 0x000BE) return true;
557 if (0x000C0 <= v && v <= 0x000D6) return true;
558 if (0x000D8 <= v && v <= 0x000F6) return true;
559 if (0x000F8 <= v && v <= 0x000FF) return true;
560 if (0x00100 <= v && v <= 0x0167F) return true;
561 if (0x01681 <= v && v <= 0x0180D) return true;
562 if (0x0180F <= v && v <= 0x01FFF) return true;
563 if (0x0200B <= v && v <= 0x0200D) return true;
564 if (0x0202A <= v && v <= 0x0202E) return true;
565 if (0x0203F <= v && v <= 0x02040) return true;
566 if ( v == 0x02054) return true;
567 if (0x02060 <= v && v <= 0x0206F) return true;
568 if (0x02070 <= v && v <= 0x0218F) return true;
569 if (0x02460 <= v && v <= 0x024FF) return true;
570 if (0x02776 <= v && v <= 0x02793) return true;
571 if (0x02C00 <= v && v <= 0x02DFF) return true;
572 if (0x02E80 <= v && v <= 0x02FFF) return true;
573 if (0x03004 <= v && v <= 0x03007) return true;
574 if (0x03021 <= v && v <= 0x0302F) return true;
575 if (0x03031 <= v && v <= 0x0303F) return true;
576 if (0x03040 <= v && v <= 0x0D7FF) return true;
577 if (0x0F900 <= v && v <= 0x0FD3D) return true;
578 if (0x0FD40 <= v && v <= 0x0FDCF) return true;
579 if (0x0FDF0 <= v && v <= 0x0FE44) return true;
580 if (0x0FE47 <= v && v <= 0x0FFFD) return true;
581 if (0x10000 <= v && v <= 0x1FFFD) return true;
582 if (0x20000 <= v && v <= 0x2FFFD) return true;
583 if (0x30000 <= v && v <= 0x3FFFD) return true;
584 if (0x40000 <= v && v <= 0x4FFFD) return true;
585 if (0x50000 <= v && v <= 0x5FFFD) return true;
586 if (0x60000 <= v && v <= 0x6FFFD) return true;
587 if (0x70000 <= v && v <= 0x7FFFD) return true;
588 if (0x80000 <= v && v <= 0x8FFFD) return true;
589 if (0x90000 <= v && v <= 0x9FFFD) return true;
590 if (0xA0000 <= v && v <= 0xAFFFD) return true;
591 if (0xB0000 <= v && v <= 0xBFFFD) return true;
592 if (0xC0000 <= v && v <= 0xCFFFD) return true;
593 if (0xD0000 <= v && v <= 0xDFFFD) return true;
594 if (0xE0000 <= v && v <= 0xEFFFD) return true;
598 static bool is_universal_char_valid_identifier(utf32 const v)
601 return is_universal_char_valid_identifier_c11(v);
602 return is_universal_char_valid_identifier_c99(v);
605 static bool is_universal_char_invalid_identifier_start(utf32 const v)
607 if (! (c_mode & _C11))
611 if (0x0300 <= v && v <= 0x036F) return true;
612 if (0x1DC0 <= v && v <= 0x1DFF) return true;
613 if (0x20D0 <= v && v <= 0x20FF) return true;
614 if (0xFE20 <= v && v <= 0xFE2F) return true;
619 * Parse an escape sequence.
621 static utf32 parse_escape_sequence(void)
625 utf32 const ec = input.c;
629 case '"': return '"';
630 case '\'': return '\'';
631 case '\\': return '\\';
632 case '?': return '\?';
633 case 'a': return '\a';
634 case 'b': return '\b';
635 case 'f': return '\f';
636 case 'n': return '\n';
637 case 'r': return '\r';
638 case 't': return '\t';
639 case 'v': return '\v';
641 return parse_hex_sequence();
650 return parse_octal_sequence(ec);
652 parse_error("reached end of file while parsing escape sequence");
654 /* \E is not documented, but handled, by GCC. It is acceptable according
655 * to §6.11.4, whereas \e is not. */
659 return 27; /* hopefully 27 is ALWAYS the code for ESCAPE */
662 case 'U': return parse_universal_char(8);
663 case 'u': return parse_universal_char(4);
668 /* §6.4.4.4:8 footnote 64 */
669 parse_error("unknown escape sequence");
673 static const char *identify_string(char *string)
675 const char *result = strset_insert(&stringset, string);
676 if (result != string) {
677 obstack_free(&symbol_obstack, string);
682 static string_t sym_make_string(string_encoding_t const enc)
684 obstack_1grow(&symbol_obstack, '\0');
685 size_t const len = obstack_object_size(&symbol_obstack) - 1;
686 char *const string = obstack_finish(&symbol_obstack);
687 char const *const result = identify_string(string);
688 return (string_t){ result, len, enc };
691 string_t make_string(char const *const string)
693 obstack_grow(&symbol_obstack, string, strlen(string));
694 return sym_make_string(STRING_ENCODING_CHAR);
697 static utf32 get_string_encoding_limit(string_encoding_t const enc)
700 case STRING_ENCODING_CHAR: return 0xFF;
701 case STRING_ENCODING_CHAR16: return 0xFFFF;
702 case STRING_ENCODING_CHAR32: return 0xFFFFFFFF;
703 case STRING_ENCODING_UTF8: return 0xFFFFFFFF;
704 case STRING_ENCODING_WIDE: return 0xFFFFFFFF; // FIXME depends on settings
706 panic("invalid string encoding");
709 static void parse_string(utf32 const delimiter, token_kind_t const kind,
710 string_encoding_t const enc,
711 char const *const context)
715 utf32 const limit = get_string_encoding_limit(enc);
719 if (resolve_escape_sequences) {
720 utf32 const tc = parse_escape_sequence();
722 warningf(WARN_OTHER, &pp_token.base.pos,
723 "escape sequence out of range");
725 if (enc == STRING_ENCODING_CHAR) {
726 obstack_1grow(&symbol_obstack, tc);
728 obstack_grow_utf8(&symbol_obstack, tc);
731 obstack_1grow(&symbol_obstack, (char)input.c);
733 obstack_1grow(&symbol_obstack, (char)input.c);
740 errorf(&pp_token.base.pos, "newline while parsing %s", context);
744 errorf(&pp_token.base.pos, "EOF while parsing %s", context);
748 if (input.c == delimiter) {
752 obstack_grow_utf8(&symbol_obstack, input.c);
760 pp_token.kind = kind;
761 pp_token.literal.string = sym_make_string(enc);
764 static void parse_string_literal(string_encoding_t const enc)
766 parse_string('"', T_STRING_LITERAL, enc, "string literal");
769 static void parse_character_constant(string_encoding_t const enc)
771 parse_string('\'', T_CHARACTER_CONSTANT, enc, "character constant");
772 if (pp_token.literal.string.size == 0) {
773 parse_error("empty character constant");
777 #define SYMBOL_CASES_WITHOUT_E_P \
778 '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
829 #define SYMBOL_CASES \
830 SYMBOL_CASES_WITHOUT_E_P: \
836 #define DIGIT_CASES \
848 static void start_expanding(pp_definition_t *definition)
850 definition->parent_expansion = current_expansion;
851 definition->expand_pos = 0;
852 definition->is_expanding = true;
853 if (definition->list_len > 0) {
854 definition->token_list[0].had_whitespace
855 = info.had_whitespace;
857 current_expansion = definition;
860 static void finished_expanding(pp_definition_t *definition)
862 assert(definition->is_expanding);
863 pp_definition_t *parent = definition->parent_expansion;
864 definition->parent_expansion = NULL;
865 definition->is_expanding = false;
867 /* stop further expanding once we expanded a parameter used in a
869 if (definition == argument_expanding)
870 argument_expanding = NULL;
872 assert(current_expansion == definition);
873 current_expansion = parent;
876 static void grow_string_escaped(struct obstack *obst, const string_t *string, char const *delimiter)
878 char const *prefix = get_string_encoding_prefix(string->encoding);
879 obstack_printf(obst, "%s%s", prefix, delimiter);
880 size_t size = string->size;
881 const char *str = string->begin;
882 if (resolve_escape_sequences) {
883 obstack_grow(obst, str, size);
885 for (size_t i = 0; i < size; ++i) {
886 const char c = str[i];
887 if (c == '\\' || c == '"')
888 obstack_1grow(obst, '\\');
889 obstack_1grow(obst, c);
892 obstack_printf(obst, "%s", delimiter);
895 static void grow_token(struct obstack *obst, const token_t *token)
897 switch (token->kind) {
899 obstack_grow(obst, token->literal.string.begin, token->literal.string.size);
902 case T_STRING_LITERAL: {
903 char const *const delimiter = resolve_escape_sequences ? "\"" : "\\\"";
904 grow_string_escaped(obst, &token->literal.string, delimiter);
908 case T_CHARACTER_CONSTANT:
909 grow_string_escaped(obst, &token->literal.string, "'");
914 const char *str = token->base.symbol->string;
915 size_t len = strlen(str);
916 obstack_grow(obst, str, len);
922 static void stringify(const pp_definition_t *definition)
924 assert(obstack_object_size(&symbol_obstack) == 0);
926 size_t list_len = definition->list_len;
927 for (size_t p = 0; p < list_len; ++p) {
928 const saved_token_t *saved = &definition->token_list[p];
929 if (p > 0 && saved->had_whitespace)
930 obstack_1grow(&symbol_obstack, ' ');
931 grow_token(&symbol_obstack, &saved->token);
933 pp_token.kind = T_STRING_LITERAL;
934 pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
937 static inline void set_punctuator(token_kind_t const kind)
939 pp_token.kind = kind;
940 pp_token.base.symbol = token_symbols[kind];
943 static inline void set_digraph(token_kind_t const kind, symbol_t *const symbol)
945 pp_token.kind = kind;
946 pp_token.base.symbol = symbol;
950 * returns next final token from a preprocessor macro expansion
952 static bool expand_next(void)
954 if (current_expansion == NULL)
958 size_t pos = current_expansion->expand_pos;
959 if (pos >= current_expansion->list_len) {
960 finished_expanding(current_expansion);
961 /* it was the outermost expansion, parse pptoken normally */
962 if (current_expansion == NULL) {
967 const saved_token_t *saved = ¤t_expansion->token_list[pos++];
968 pp_token = saved->token;
969 if (pp_token.kind == '#') {
970 if (pos < current_expansion->list_len) {
971 const saved_token_t *next = ¤t_expansion->token_list[pos];
972 if (next->token.kind == T_MACRO_PARAMETER) {
973 pp_definition_t *def = next->token.macro_parameter.def;
974 assert(def != NULL && def->is_parameter);
981 if (current_expansion->expand_pos > 0)
982 info.had_whitespace = saved->had_whitespace;
983 current_expansion->expand_pos = pos;
984 pp_token.base.pos = expansion_pos;
990 * Returns the next token kind found when continuing the current expansions
991 * without starting new sub-expansions.
993 static token_kind_t peek_expansion(void)
995 for (pp_definition_t *e = current_expansion; e; e = e->parent_expansion) {
996 if (e->expand_pos < e->list_len)
997 return e->token_list[e->expand_pos].token.kind;
1002 static void skip_line_comment(void)
1004 info.had_whitespace = true;
1021 static void skip_multiline_comment(void)
1023 info.had_whitespace = true;
1025 position_t const start_pos = input.pos;
1030 if (input.c == '*') {
1031 /* TODO: nested comment, warn here */
1036 if (input.c == '/') {
1037 if (input.pos.lineno != input.output_line)
1038 info.whitespace_at_line_begin = input.pos.colno;
1048 errorf(&start_pos, "at end of file while looking for comment end");
1058 static bool skip_till_newline(bool stop_at_non_whitespace)
1070 if (input.c == '/') {
1072 skip_line_comment();
1074 } else if (input.c == '*') {
1076 skip_multiline_comment();
1088 if (stop_at_non_whitespace)
1097 static void skip_whitespace(void)
1103 ++info.whitespace_at_line_begin;
1104 info.had_whitespace = true;
1109 info.at_line_begin = true;
1110 info.had_whitespace = true;
1111 info.whitespace_at_line_begin = 0;
1116 if (input.c == '/') {
1118 skip_line_comment();
1120 } else if (input.c == '*') {
1122 skip_multiline_comment();
1136 static inline void eat_pp(pp_token_kind_t const kind)
1138 assert(pp_token.base.symbol->pp_ID == kind);
1143 static inline void eat_token(token_kind_t const kind)
1145 assert(pp_token.kind == kind);
1150 static string_encoding_t identify_encoding_prefix(symbol_t *const sym)
1152 if (sym == symbol_L) return STRING_ENCODING_WIDE;
1153 if (c_mode & _C11) {
1154 if (sym == symbol_U) return STRING_ENCODING_CHAR32;
1155 if (sym == symbol_u) return STRING_ENCODING_CHAR16;
1156 if (sym == symbol_u8) return STRING_ENCODING_UTF8;
1158 return STRING_ENCODING_CHAR;
1161 static void parse_symbol(void)
1163 assert(obstack_object_size(&symbol_obstack) == 0);
1168 obstack_1grow(&symbol_obstack, (char) input.c);
1177 case 'U': n = 8; goto universal;
1178 case 'u': n = 4; goto universal;
1180 if (!resolve_escape_sequences) {
1181 obstack_1grow(&symbol_obstack, '\\');
1182 obstack_1grow(&symbol_obstack, input.c);
1185 utf32 const v = parse_universal_char(n);
1186 if (!is_universal_char_valid_identifier(v)) {
1187 if (is_universal_char_valid(v)) {
1189 "universal character \\%c%0*X is not valid in an identifier",
1190 n == 4 ? 'u' : 'U', (int)n, v);
1192 } else if (obstack_object_size(&symbol_obstack) == 0 && is_universal_char_invalid_identifier_start(v)) {
1194 "universal character \\%c%0*X is not valid as start of an identifier",
1195 n == 4 ? 'u' : 'U', (int)n, v);
1196 } else if (resolve_escape_sequences) {
1197 obstack_grow_utf8(&symbol_obstack, v);
1215 obstack_1grow(&symbol_obstack, '\0');
1216 char *string = obstack_finish(&symbol_obstack);
1218 symbol_t *symbol = symbol_table_insert(string);
1220 /* Might be a prefixed string or character constant: L/U/u/u8"string". */
1221 if (input.c == '"') {
1222 string_encoding_t const enc = identify_encoding_prefix(symbol);
1223 if (enc != STRING_ENCODING_CHAR) {
1224 parse_string_literal(enc);
1227 } else if (input.c == '\'') {
1228 string_encoding_t const enc = identify_encoding_prefix(symbol);
1229 if (enc != STRING_ENCODING_CHAR) {
1230 if (enc == STRING_ENCODING_UTF8) {
1231 errorf(&pp_token.base.pos,
1232 "'u8' is not a valid encoding for a chracter constant");
1234 parse_character_constant(enc);
1239 pp_token.kind = symbol->ID;
1240 pp_token.base.symbol = symbol;
1242 /* we can free the memory from symbol obstack if we already had an entry in
1243 * the symbol table */
1244 if (symbol->string != string) {
1245 obstack_free(&symbol_obstack, string);
1249 static void parse_number(void)
1251 obstack_1grow(&symbol_obstack, (char) input.c);
1258 case SYMBOL_CASES_WITHOUT_E_P:
1259 obstack_1grow(&symbol_obstack, (char) input.c);
1267 obstack_1grow(&symbol_obstack, (char) input.c);
1269 if (input.c == '+' || input.c == '-') {
1270 obstack_1grow(&symbol_obstack, (char) input.c);
1282 pp_token.kind = T_NUMBER;
1283 pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
1286 #define MAYBE_PROLOG \
1290 #define MAYBE(ch, kind) \
1293 set_punctuator(kind); \
1296 #define MAYBE_DIGRAPH(ch, kind, symbol) \
1299 set_digraph(kind, symbol); \
1302 #define ELSE_CODE(code) \
1307 #define ELSE(kind) ELSE_CODE(set_punctuator(kind); return;)
1309 /** identifies and returns the next preprocessing token contained in the
1310 * input stream. No macro expansion is performed. */
1311 static void next_input_token(void)
1313 if (next_info.had_whitespace) {
1315 next_info.had_whitespace = false;
1317 info.at_line_begin = false;
1318 info.had_whitespace = false;
1321 pp_token.base.pos = input.pos;
1322 pp_token.base.symbol = NULL;
1327 info.whitespace_at_line_begin++;
1328 info.had_whitespace = true;
1333 info.at_line_begin = true;
1334 info.had_whitespace = true;
1335 info.whitespace_at_line_begin = 0;
1347 parse_string_literal(STRING_ENCODING_CHAR);
1351 parse_character_constant(STRING_ENCODING_CHAR);
1373 MAYBE('.', T_DOTDOTDOT)
1377 set_punctuator('.');
1383 MAYBE('&', T_ANDAND)
1384 MAYBE('=', T_ANDEQUAL)
1388 MAYBE('=', T_ASTERISKEQUAL)
1392 MAYBE('+', T_PLUSPLUS)
1393 MAYBE('=', T_PLUSEQUAL)
1397 MAYBE('>', T_MINUSGREATER)
1398 MAYBE('-', T_MINUSMINUS)
1399 MAYBE('=', T_MINUSEQUAL)
1403 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
1407 MAYBE('=', T_SLASHEQUAL)
1410 skip_multiline_comment();
1414 skip_line_comment();
1419 MAYBE_DIGRAPH('>', '}', symbol_percentgreater)
1420 MAYBE('=', T_PERCENTEQUAL)
1425 MAYBE_DIGRAPH(':', T_HASHHASH, symbol_percentcolonpercentcolon)
1429 goto digraph_percentcolon;
1432 digraph_percentcolon:
1433 set_digraph('#', symbol_percentcolon);
1439 MAYBE_DIGRAPH(':', '[', symbol_lesscolon)
1440 MAYBE_DIGRAPH('%', '{', symbol_lesspercent)
1441 MAYBE('=', T_LESSEQUAL)
1444 MAYBE('=', T_LESSLESSEQUAL)
1449 MAYBE('=', T_GREATEREQUAL)
1452 MAYBE('=', T_GREATERGREATEREQUAL)
1453 ELSE(T_GREATERGREATER)
1457 MAYBE('=', T_CARETEQUAL)
1461 MAYBE('=', T_PIPEEQUAL)
1462 MAYBE('|', T_PIPEPIPE)
1466 MAYBE_DIGRAPH('>', ']', symbol_colongreater)
1468 if (c_mode & _CXX) {
1470 set_punctuator(T_COLONCOLON);
1477 MAYBE('=', T_EQUALEQUAL)
1481 MAYBE('#', T_HASHHASH)
1494 set_punctuator(input.c);
1499 if (input_stack != NULL) {
1500 fclose(close_pp_input());
1501 pop_restore_input();
1504 if (input.c == (utf32)EOF)
1506 print_line_directive(&input.pos, "2");
1509 info.at_line_begin = true;
1510 set_punctuator(T_EOF);
1516 int next_c = input.c;
1519 if (next_c == 'U' || next_c == 'u') {
1526 if (error_on_unknown_chars) {
1527 errorf(&pp_token.base.pos, "unknown character '%lc' found", input.c);
1531 assert(obstack_object_size(&symbol_obstack) == 0);
1532 obstack_grow_utf8(&symbol_obstack, input.c);
1533 obstack_1grow(&symbol_obstack, '\0');
1534 char *const string = obstack_finish(&symbol_obstack);
1535 symbol_t *const symbol = symbol_table_insert(string);
1536 if (symbol->string != string)
1537 obstack_free(&symbol_obstack, string);
1539 pp_token.kind = T_UNKNOWN_CHAR;
1540 pp_token.base.symbol = symbol;
1547 static void print_quoted_string(const char *const string)
1550 for (const char *c = string; *c != 0; ++c) {
1552 case '"': fputs("\\\"", out); break;
1553 case '\\': fputs("\\\\", out); break;
1554 case '\a': fputs("\\a", out); break;
1555 case '\b': fputs("\\b", out); break;
1556 case '\f': fputs("\\f", out); break;
1557 case '\n': fputs("\\n", out); break;
1558 case '\r': fputs("\\r", out); break;
1559 case '\t': fputs("\\t", out); break;
1560 case '\v': fputs("\\v", out); break;
1561 case '\?': fputs("\\?", out); break;
1564 fprintf(out, "\\%03o", (unsigned)*c);
1574 static void print_line_directive(const position_t *pos, const char *add)
1579 fprintf(out, "# %u ", pos->lineno);
1580 print_quoted_string(pos->input_name);
1585 if (pos->is_system_header) {
1589 printed_input_name = pos->input_name;
1590 input.output_line = pos->lineno-1;
1593 static bool emit_newlines(void)
1598 unsigned delta = pp_token.base.pos.lineno - input.output_line;
1604 print_line_directive(&pp_token.base.pos, NULL);
1607 for (unsigned i = 0; i < delta; ++i) {
1611 input.output_line = pp_token.base.pos.lineno;
1613 unsigned whitespace = info.whitespace_at_line_begin;
1614 /* make sure there is at least 1 whitespace before a (macro-expanded)
1615 * '#' at line begin. I'm not sure why this is good, but gcc does it. */
1616 if (pp_token.kind == '#' && whitespace == 0)
1618 for (unsigned i = 0; i < whitespace; ++i)
1624 void set_preprocessor_output(FILE *output)
1628 error_on_unknown_chars = false;
1629 resolve_escape_sequences = false;
1631 error_on_unknown_chars = true;
1632 resolve_escape_sequences = true;
1636 void emit_pp_token(void)
1638 if (!emit_newlines() &&
1639 (info.had_whitespace || tokens_would_paste(last_token, pp_token.kind)))
1642 switch (pp_token.kind) {
1644 fputs(pp_token.literal.string.begin, out);
1647 case T_STRING_LITERAL:
1648 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1650 fputs(pp_token.literal.string.begin, out);
1654 case T_CHARACTER_CONSTANT:
1655 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1657 fputs(pp_token.literal.string.begin, out);
1661 case T_MACRO_PARAMETER:
1662 panic("macro parameter not expanded");
1665 fputs(pp_token.base.symbol->string, out);
1668 last_token = pp_token.kind;
1671 static void eat_pp_directive(void)
1673 while (!info.at_line_begin) {
1678 static bool strings_equal(const string_t *string1, const string_t *string2)
1680 size_t size = string1->size;
1681 if (size != string2->size)
1684 const char *c1 = string1->begin;
1685 const char *c2 = string2->begin;
1686 for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1693 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1695 if (token1->kind != token2->kind)
1698 switch (token1->kind) {
1700 case T_CHARACTER_CONSTANT:
1701 case T_STRING_LITERAL:
1702 return strings_equal(&token1->literal.string, &token2->literal.string);
1704 case T_MACRO_PARAMETER:
1705 return token1->macro_parameter.def->symbol
1706 == token2->macro_parameter.def->symbol;
1709 return token1->base.symbol == token2->base.symbol;
1713 static bool pp_definitions_equal(const pp_definition_t *definition1,
1714 const pp_definition_t *definition2)
1716 if (definition1->list_len != definition2->list_len)
1719 size_t len = definition1->list_len;
1720 const saved_token_t *t1 = definition1->token_list;
1721 const saved_token_t *t2 = definition2->token_list;
1722 for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1723 if (!pp_tokens_equal(&t1->token, &t2->token))
1725 if (t1->had_whitespace != t2->had_whitespace)
1731 static void missing_macro_param_error(void)
1733 errorf(&pp_token.base.pos, "'#' is not followed by a macro parameter");
1736 static bool is_defineable_token(char const *const context)
1738 if (info.at_line_begin) {
1739 errorf(&pp_token.base.pos, "unexpected end of line after %s", context);
1742 symbol_t *const symbol = pp_token.base.symbol;
1746 if (pp_token.kind != T_IDENTIFIER) {
1747 switch (symbol->string[0]) {
1754 errorf(&pp_token.base.pos, "expected identifier after %s, got %K",
1755 context, &pp_token);
1760 /* TODO turn this into a flag in pp_def. */
1761 switch (symbol->pp_ID) {
1764 errorf(&pp_token.base.pos, "%K cannot be used as macro name in %s",
1765 &pp_token, context);
1773 static void parse_define_directive(void)
1781 assert(obstack_object_size(&pp_obstack) == 0);
1783 if (!is_defineable_token("#define"))
1785 symbol_t *const symbol = pp_token.base.symbol;
1787 pp_definition_t *new_definition
1788 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1789 memset(new_definition, 0, sizeof(new_definition[0]));
1790 new_definition->symbol = symbol;
1791 new_definition->pos = input.pos;
1793 /* this is probably the only place where spaces are significant in the
1794 * lexer (except for the fact that they separate tokens). #define b(x)
1795 * is something else than #define b (x) */
1796 if (input.c == '(') {
1801 switch (pp_token.kind) {
1803 new_definition->is_variadic = true;
1804 eat_token(T_DOTDOTDOT);
1805 if (pp_token.kind != ')') {
1807 "'...' not at end of macro argument list");
1812 case T_IDENTIFIER: {
1813 pp_definition_t parameter;
1814 memset(¶meter, 0, sizeof(parameter));
1815 parameter.pos = pp_token.base.pos;
1816 parameter.symbol = pp_token.base.symbol;
1817 parameter.is_parameter = true;
1818 obstack_grow(&pp_obstack, ¶meter, sizeof(parameter));
1819 eat_token(T_IDENTIFIER);
1821 if (pp_token.kind == ',') {
1826 if (pp_token.kind != ')') {
1827 errorf(&pp_token.base.pos,
1828 "expected ',' or ')' after identifier, got %K",
1837 goto finish_argument_list;
1840 errorf(&pp_token.base.pos,
1841 "expected identifier, '...' or ')' in #define argument list, got %K",
1847 finish_argument_list:
1848 new_definition->has_parameters = true;
1849 size_t size = obstack_object_size(&pp_obstack);
1850 new_definition->n_parameters
1851 = size / sizeof(new_definition->parameters[0]);
1852 new_definition->parameters = obstack_finish(&pp_obstack);
1853 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1854 pp_definition_t *param = &new_definition->parameters[i];
1855 symbol_t *symbol = param->symbol;
1856 pp_definition_t *previous = symbol->pp_definition;
1857 if (previous != NULL
1858 && previous->function_definition == new_definition) {
1859 errorf(¶m->pos, "duplicate macro parameter '%Y'", symbol);
1860 param->symbol = sym_anonymous;
1863 param->parent_expansion = previous;
1864 param->function_definition = new_definition;
1865 symbol->pp_definition = param;
1871 /* construct token list */
1872 assert(obstack_object_size(&pp_obstack) == 0);
1873 bool next_must_be_param = false;
1874 while (!info.at_line_begin) {
1875 if (pp_token.kind == T_IDENTIFIER) {
1876 const symbol_t *symbol = pp_token.base.symbol;
1877 pp_definition_t *definition = symbol->pp_definition;
1878 if (definition != NULL
1879 && definition->function_definition == new_definition) {
1880 pp_token.kind = T_MACRO_PARAMETER;
1881 pp_token.macro_parameter.def = definition;
1884 if (next_must_be_param && pp_token.kind != T_MACRO_PARAMETER) {
1885 missing_macro_param_error();
1887 saved_token_t saved_token;
1888 saved_token.token = pp_token;
1889 saved_token.had_whitespace = info.had_whitespace;
1890 obstack_grow(&pp_obstack, &saved_token, sizeof(saved_token));
1892 = new_definition->has_parameters && pp_token.kind == '#';
1895 if (next_must_be_param)
1896 missing_macro_param_error();
1898 new_definition->list_len = obstack_object_size(&pp_obstack)
1899 / sizeof(new_definition->token_list[0]);
1900 new_definition->token_list = obstack_finish(&pp_obstack);
1902 if (new_definition->has_parameters) {
1903 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1904 pp_definition_t *param = &new_definition->parameters[i];
1905 symbol_t *symbol = param->symbol;
1906 if (symbol == sym_anonymous)
1908 assert(symbol->pp_definition == param);
1909 assert(param->function_definition == new_definition);
1910 symbol->pp_definition = param->parent_expansion;
1911 param->parent_expansion = NULL;
1915 pp_definition_t *old_definition = symbol->pp_definition;
1916 if (old_definition != NULL) {
1917 if (!pp_definitions_equal(old_definition, new_definition)) {
1918 warningf(WARN_OTHER, &input.pos,
1919 "multiple definition of macro '%Y' (first defined %P)",
1920 symbol, &old_definition->pos);
1922 /* reuse the old definition */
1923 obstack_free(&pp_obstack, new_definition);
1924 new_definition = old_definition;
1928 symbol->pp_definition = new_definition;
1932 if (obstack_object_size(&pp_obstack) > 0) {
1933 char *ptr = obstack_finish(&pp_obstack);
1934 obstack_free(&pp_obstack, ptr);
1939 static void parse_undef_directive(void)
1947 if (!is_defineable_token("#undef")) {
1952 pp_token.base.symbol->pp_definition = NULL;
1955 if (!info.at_line_begin) {
1956 warningf(WARN_OTHER, &input.pos, "extra tokens at end of #undef directive");
1961 /** behind an #include we can have the special headername lexems.
1962 * They're only allowed behind an #include so they're not recognized
1963 * by the normal next_preprocessing_token. We handle them as a special
1965 static const char *parse_headername(bool *system_include)
1967 if (info.at_line_begin) {
1968 parse_error("expected headername after #include");
1972 /* check wether we have a "... or <... headername */
1973 position_t pos = input.pos;
1977 case '<': delimiter = '>'; *system_include = true; goto parse_name;
1978 case '"': delimiter = '"'; *system_include = false; goto parse_name;
1980 assert(obstack_object_size(&symbol_obstack) == 0);
1987 char *dummy = obstack_finish(&symbol_obstack);
1988 obstack_free(&symbol_obstack, dummy);
1990 errorf(&pp_token.base.pos,
1991 "header name without closing '%c'", (char)delimiter);
1995 if (input.c == delimiter) {
1997 goto finish_headername;
1999 obstack_1grow(&symbol_obstack, (char)input.c);
2005 /* we should never be here */
2009 next_preprocessing_token();
2010 if (info.at_line_begin) {
2011 /* TODO: if we are already in the new line then we parsed more than
2012 * wanted. We reuse the token, but could produce following errors
2013 * misbehaviours... */
2014 goto error_invalid_input;
2016 if (pp_token.kind == T_STRING_LITERAL) {
2017 *system_include = false;
2018 return pp_token.literal.string.begin;
2019 } else if (pp_token.kind == '<') {
2020 *system_include = true;
2021 assert(obstack_object_size(&pp_obstack) == 0);
2023 next_preprocessing_token();
2024 if (info.at_line_begin) {
2025 /* TODO: we shouldn't have parsed/expanded something on the
2026 * next line yet... */
2027 char *dummy = obstack_finish(&pp_obstack);
2028 obstack_free(&pp_obstack, dummy);
2029 goto error_invalid_input;
2031 if (pp_token.kind == '>')
2034 saved_token_t saved;
2035 saved.token = pp_token;
2036 saved.had_whitespace = info.had_whitespace;
2037 obstack_grow(&pp_obstack, &saved, sizeof(saved));
2039 size_t size = obstack_object_size(&pp_obstack);
2040 assert(size % sizeof(saved_token_t) == 0);
2041 size_t n_tokens = size / sizeof(saved_token_t);
2042 saved_token_t *tokens = obstack_finish(&pp_obstack);
2043 assert(obstack_object_size(&symbol_obstack) == 0);
2044 for (size_t i = 0; i < n_tokens; ++i) {
2045 const saved_token_t *saved = &tokens[i];
2046 if (i > 0 && saved->had_whitespace)
2047 obstack_1grow(&symbol_obstack, ' ');
2048 grow_token(&symbol_obstack, &saved->token);
2050 obstack_free(&pp_obstack, tokens);
2051 goto finish_headername;
2053 error_invalid_input:
2055 char *dummy = obstack_finish(&symbol_obstack);
2056 obstack_free(&symbol_obstack, dummy);
2059 errorf(&pp_token.base.pos,
2060 "expected \"FILENAME\" or <FILENAME> after #include");
2066 obstack_1grow(&symbol_obstack, '\0');
2067 char *const headername = obstack_finish(&symbol_obstack);
2068 const char *identified = identify_string(headername);
2069 pp_token.base.pos = pos;
2073 static bool do_include(bool const bracket_include, bool const include_next, char const *const headername)
2075 size_t const headername_len = strlen(headername);
2076 searchpath_entry_t *entry;
2078 entry = input.path ? input.path->next
2079 : bracket_include ? bracket_searchpath.first
2080 : quote_searchpath.first;
2082 if (!bracket_include) {
2083 /* put dirname of current input on obstack */
2084 const char *filename = input.pos.input_name;
2085 const char *last_slash = strrchr(filename, '/');
2086 const char *full_name;
2087 if (last_slash != NULL) {
2088 size_t len = last_slash - filename;
2089 obstack_grow(&symbol_obstack, filename, len + 1);
2090 obstack_grow0(&symbol_obstack, headername, headername_len);
2091 char *complete_path = obstack_finish(&symbol_obstack);
2092 full_name = identify_string(complete_path);
2094 full_name = headername;
2097 FILE *file = fopen(full_name, "r");
2099 switch_pp_input(file, full_name, NULL, false);
2102 entry = quote_searchpath.first;
2104 entry = bracket_searchpath.first;
2108 assert(obstack_object_size(&symbol_obstack) == 0);
2109 /* check searchpath */
2110 for (; entry; entry = entry->next) {
2111 const char *path = entry->path;
2112 size_t len = strlen(path);
2113 obstack_grow(&symbol_obstack, path, len);
2114 if (path[len-1] != '/')
2115 obstack_1grow(&symbol_obstack, '/');
2116 obstack_grow(&symbol_obstack, headername, headername_len+1);
2118 char *complete_path = obstack_finish(&symbol_obstack);
2119 FILE *file = fopen(complete_path, "r");
2121 const char *filename = identify_string(complete_path);
2122 switch_pp_input(file, filename, entry, entry->is_system_path);
2125 obstack_free(&symbol_obstack, complete_path);
2132 static void parse_include_directive(bool const include_next)
2139 /* do not eat the TP_include, since it would already parse the next token
2140 * which needs special handling here. */
2141 skip_till_newline(true);
2142 bool system_include;
2143 const char *headername = parse_headername(&system_include);
2144 if (headername == NULL) {
2149 bool had_nonwhitespace = skip_till_newline(false);
2150 if (had_nonwhitespace) {
2151 warningf(WARN_OTHER, &input.pos,
2152 "extra tokens at end of #include directive");
2155 if (n_inputs > INCLUDE_LIMIT) {
2156 errorf(&pp_token.base.pos, "#include nested too deeply");
2163 info.whitespace_at_line_begin = 0;
2164 info.had_whitespace = false;
2165 info.at_line_begin = true;
2168 bool res = do_include(system_include, include_next, headername);
2172 errorf(&pp_token.base.pos, "failed including '%s': %s", headername, strerror(errno));
2173 pop_restore_input();
2177 static pp_conditional_t *push_conditional(void)
2179 pp_conditional_t *conditional
2180 = obstack_alloc(&pp_obstack, sizeof(*conditional));
2181 memset(conditional, 0, sizeof(*conditional));
2183 conditional->parent = conditional_stack;
2184 conditional_stack = conditional;
2189 static void pop_conditional(void)
2191 assert(conditional_stack != NULL);
2192 conditional_stack = conditional_stack->parent;
2195 void check_unclosed_conditionals(void)
2197 while (conditional_stack != NULL) {
2198 pp_conditional_t *conditional = conditional_stack;
2200 if (conditional->in_else) {
2201 errorf(&conditional->pos, "unterminated #else");
2203 errorf(&conditional->pos, "unterminated condition");
2209 static void parse_ifdef_ifndef_directive(bool const is_ifdef)
2212 eat_pp(is_ifdef ? TP_ifdef : TP_ifndef);
2216 pp_conditional_t *conditional = push_conditional();
2217 conditional->pos = pp_token.base.pos;
2218 conditional->skip = true;
2222 if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
2223 errorf(&pp_token.base.pos, "expected identifier after #%s, got %K",
2224 is_ifdef ? "ifdef" : "ifndef", &pp_token);
2227 /* just take the true case in the hope to avoid further errors */
2230 /* evaluate wether we are in true or false case */
2231 condition = (bool)pp_token.base.symbol->pp_definition == is_ifdef;
2232 eat_token(T_IDENTIFIER);
2234 if (!info.at_line_begin) {
2235 errorf(&pp_token.base.pos, "extra tokens at end of #%s",
2236 is_ifdef ? "ifdef" : "ifndef");
2241 pp_conditional_t *conditional = push_conditional();
2242 conditional->pos = pp_token.base.pos;
2243 conditional->condition = condition;
2250 static void parse_else_directive(void)
2254 if (!info.at_line_begin) {
2256 warningf(WARN_OTHER, &pp_token.base.pos, "extra tokens at end of #else");
2261 pp_conditional_t *conditional = conditional_stack;
2262 if (conditional == NULL) {
2263 errorf(&pp_token.base.pos, "#else without prior #if");
2267 if (conditional->in_else) {
2268 errorf(&pp_token.base.pos,
2269 "#else after #else (condition started %P)",
2275 conditional->in_else = true;
2276 if (!conditional->skip) {
2277 skip_mode = conditional->condition;
2279 conditional->pos = pp_token.base.pos;
2282 static void parse_endif_directive(void)
2286 if (!info.at_line_begin) {
2288 warningf(WARN_OTHER, &pp_token.base.pos, "extra tokens at end of #endif");
2293 pp_conditional_t *conditional = conditional_stack;
2294 if (conditional == NULL) {
2295 errorf(&pp_token.base.pos, "#endif without prior #if");
2299 if (!conditional->skip) {
2305 typedef enum stdc_pragma_kind_t {
2309 STDC_CX_LIMITED_RANGE
2310 } stdc_pragma_kind_t;
2312 typedef enum stdc_pragma_value_kind_t {
2317 } stdc_pragma_value_kind_t;
2319 static void parse_pragma_directive(void)
2327 if (pp_token.kind != T_IDENTIFIER) {
2328 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.pos,
2329 "expected identifier after #pragma");
2334 stdc_pragma_kind_t kind = STDC_UNKNOWN;
2335 if (pp_token.base.symbol->pp_ID == TP_STDC && c_mode & _C99) {
2339 switch (pp_token.base.symbol->pp_ID) {
2340 case TP_FP_CONTRACT: kind = STDC_FP_CONTRACT; break;
2341 case TP_FENV_ACCESS: kind = STDC_FENV_ACCESS; break;
2342 case TP_CX_LIMITED_RANGE: kind = STDC_CX_LIMITED_RANGE; break;
2345 if (kind != STDC_UNKNOWN) {
2347 stdc_pragma_value_kind_t value;
2348 switch (pp_token.base.symbol->pp_ID) {
2349 case TP_ON: value = STDC_VALUE_ON; break;
2350 case TP_OFF: value = STDC_VALUE_OFF; break;
2351 case TP_DEFAULT: value = STDC_VALUE_DEFAULT; break;
2352 default: value = STDC_VALUE_UNKNOWN; break;
2354 if (value == STDC_VALUE_UNKNOWN) {
2355 kind = STDC_UNKNOWN;
2356 errorf(&pp_token.base.pos, "bad STDC pragma argument");
2361 if (kind == STDC_UNKNOWN) {
2362 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.pos,
2363 "encountered unknown #pragma");
2367 static void parse_line_directive(void)
2369 if (pp_token.kind != T_NUMBER) {
2371 parse_error("expected integer");
2374 long const line = strtol(pp_token.literal.string.begin, &end, 0);
2376 /* use offset -1 as this is about the next line */
2377 input.pos.lineno = line - 1;
2378 /* force output of line */
2379 input.output_line = input.pos.lineno - 20;
2382 errorf(&input.pos, "'%S' is not a valid line number",
2383 &pp_token.literal.string);
2387 if (info.at_line_begin)
2390 if (pp_token.kind == T_STRING_LITERAL
2391 && pp_token.literal.string.encoding == STRING_ENCODING_CHAR) {
2392 input.pos.input_name = pp_token.literal.string.begin;
2393 input.pos.is_system_header = false;
2396 /* attempt to parse numeric flags as outputted by gcc preprocessor */
2397 while (!info.at_line_begin && pp_token.kind == T_NUMBER) {
2399 * 1 - indicates start of a new file
2400 * 2 - indicates return from a file
2401 * 3 - indicates system header
2402 * 4 - indicates implicit extern "C" in C++ mode
2404 * currently we're only interested in "3"
2406 if (streq(pp_token.literal.string.begin, "3")) {
2407 input.pos.is_system_header = true;
2416 static void parse_error_directive(void)
2423 bool const old_resolve_escape_sequences = resolve_escape_sequences;
2424 resolve_escape_sequences = false;
2426 position_t const pos = pp_token.base.pos;
2428 if (info.had_whitespace && obstack_object_size(&pp_obstack) != 0)
2429 obstack_1grow(&pp_obstack, ' ');
2431 switch (pp_token.kind) {
2433 string_t const *const str = &pp_token.literal.string;
2434 obstack_grow(&pp_obstack, str->begin, str->size);
2440 case T_STRING_LITERAL: delim = '"'; goto string;
2441 case T_CHARACTER_CONSTANT: delim = '\''; goto string;
2443 string_t const *const str = &pp_token.literal.string;
2444 char const *const enc = get_string_encoding_prefix(str->encoding);
2445 obstack_printf(&pp_obstack, "%s%c%s%c", enc, delim, str->begin, delim);
2450 char const *const str = pp_token.base.symbol->string;
2451 obstack_grow(&pp_obstack, str, strlen(str));
2457 } while (!info.at_line_begin);
2459 resolve_escape_sequences = old_resolve_escape_sequences;
2461 obstack_1grow(&pp_obstack, '\0');
2462 char *const str = obstack_finish(&pp_obstack);
2463 errorf(&pos, "#%s", str);
2464 obstack_free(&pp_obstack, str);
2467 static void parse_preprocessing_directive(void)
2471 if (info.at_line_begin) {
2472 /* empty directive */
2476 if (pp_token.base.symbol) {
2477 switch (pp_token.base.symbol->pp_ID) {
2478 case TP_define: parse_define_directive(); break;
2479 case TP_else: parse_else_directive(); break;
2480 case TP_endif: parse_endif_directive(); break;
2481 case TP_error: parse_error_directive(); break;
2482 case TP_ifdef: parse_ifdef_ifndef_directive(true); break;
2483 case TP_ifndef: parse_ifdef_ifndef_directive(false); break;
2484 case TP_include: parse_include_directive(false); break;
2485 case TP_include_next: parse_include_directive(true); break;
2486 case TP_line: next_input_token(); goto line_directive;
2487 case TP_pragma: parse_pragma_directive(); break;
2488 case TP_undef: parse_undef_directive(); break;
2491 } else if (pp_token.kind == T_NUMBER) {
2493 parse_line_directive();
2497 errorf(&pp_token.base.pos, "invalid preprocessing directive #%K", &pp_token);
2502 assert(info.at_line_begin);
2505 static void finish_current_argument(void)
2507 if (current_argument == NULL)
2509 size_t size = obstack_object_size(&pp_obstack);
2510 current_argument->list_len = size/sizeof(current_argument->token_list[0]);
2511 current_argument->token_list = obstack_finish(&pp_obstack);
2514 void next_preprocessing_token(void)
2517 if (!expand_next()) {
2520 while (pp_token.kind == '#' && info.at_line_begin) {
2521 parse_preprocessing_directive();
2523 } while (skip_mode && pp_token.kind != T_EOF);
2526 const token_kind_t kind = pp_token.kind;
2527 if (current_call == NULL || argument_expanding != NULL) {
2528 symbol_t *const symbol = pp_token.base.symbol;
2530 if (kind == T_MACRO_PARAMETER) {
2531 assert(current_expansion != NULL);
2532 start_expanding(pp_token.macro_parameter.def);
2536 pp_definition_t *const pp_definition = symbol->pp_definition;
2537 if (pp_definition != NULL && !pp_definition->is_expanding) {
2538 if (pp_definition->has_parameters) {
2540 /* check if next token is a '(' */
2541 whitespace_info_t old_info = info;
2542 token_kind_t next_token = peek_expansion();
2543 if (next_token == T_EOF) {
2544 info.at_line_begin = false;
2545 info.had_whitespace = false;
2547 if (input.c == '(') {
2552 if (next_token == '(') {
2553 if (current_expansion == NULL)
2554 expansion_pos = pp_token.base.pos;
2555 next_preprocessing_token();
2556 assert(pp_token.kind == '(');
2558 pp_definition->parent_expansion = current_expansion;
2559 current_call = pp_definition;
2560 current_call->expand_pos = 0;
2561 current_call->expand_info = old_info;
2562 if (current_call->n_parameters > 0) {
2563 current_argument = ¤t_call->parameters[0];
2564 assert(argument_brace_count == 0);
2568 /* skip_whitespaces() skipped newlines and whitespace,
2569 * remember results for next token */
2575 if (current_expansion == NULL)
2576 expansion_pos = pp_token.base.pos;
2577 start_expanding(pp_definition);
2584 if (current_call != NULL) {
2585 /* current_call != NULL */
2587 ++argument_brace_count;
2588 } else if (kind == ')') {
2589 if (argument_brace_count > 0) {
2590 --argument_brace_count;
2592 finish_current_argument();
2593 assert(kind == ')');
2594 start_expanding(current_call);
2595 info = current_call->expand_info;
2596 current_call = NULL;
2597 current_argument = NULL;
2600 } else if (kind == ',' && argument_brace_count == 0) {
2601 finish_current_argument();
2602 current_call->expand_pos++;
2603 if (current_call->expand_pos >= current_call->n_parameters) {
2604 errorf(&pp_token.base.pos,
2605 "too many arguments passed for macro '%Y'",
2606 current_call->symbol);
2607 current_argument = NULL;
2610 = ¤t_call->parameters[current_call->expand_pos];
2613 } else if (kind == T_MACRO_PARAMETER) {
2614 /* parameters have to be fully expanded before being used as
2615 * parameters for another macro-call */
2616 assert(current_expansion != NULL);
2617 pp_definition_t *argument = pp_token.macro_parameter.def;
2618 argument_expanding = argument;
2619 start_expanding(argument);
2621 } else if (kind == T_EOF) {
2622 errorf(&expansion_pos,
2623 "reached end of file while parsing arguments for '%Y'",
2624 current_call->symbol);
2627 if (current_argument != NULL) {
2628 saved_token_t saved;
2629 saved.token = pp_token;
2630 saved.had_whitespace = info.had_whitespace;
2631 obstack_grow(&pp_obstack, &saved, sizeof(saved));
2637 void append_include_path(searchpath_t *paths, const char *path)
2639 searchpath_entry_t *entry = OALLOCZ(&config_obstack, searchpath_entry_t);
2641 entry->is_system_path = paths->is_system_path;
2643 *paths->anchor = entry;
2644 paths->anchor = &entry->next;
2647 static void append_env_paths(searchpath_t *paths, const char *envvar)
2649 const char *val = getenv(envvar);
2650 if (val != NULL && *val != '\0') {
2651 const char *begin = val;
2655 while (*c != '\0' && *c != ':')
2658 size_t len = c-begin;
2660 /* use "." for gcc compatibility (Matze: I would expect that
2661 * nothing happens for an empty entry...) */
2662 append_include_path(paths, ".");
2664 char *const string = obstack_copy0(&config_obstack, begin, len);
2665 append_include_path(paths, string);
2672 } while(*c != '\0');
2676 static void append_searchpath(searchpath_t *path, const searchpath_t *append)
2678 *path->anchor = append->first;
2681 static void setup_include_path(void)
2683 /* built-in paths */
2684 append_include_path(&system_searchpath, "/usr/include");
2686 /* parse environment variable */
2687 append_env_paths(&bracket_searchpath, "CPATH");
2688 append_env_paths(&system_searchpath,
2689 c_mode & _CXX ? "CPLUS_INCLUDE_PATH" : "C_INCLUDE_PATH");
2691 /* append system search path to bracket searchpath */
2692 append_searchpath(&system_searchpath, &after_searchpath);
2693 append_searchpath(&bracket_searchpath, &system_searchpath);
2694 append_searchpath("e_searchpath, &bracket_searchpath);
2697 static void input_error(unsigned const delta_lines, unsigned const delta_cols, char const *const message)
2699 position_t pos = pp_token.base.pos;
2700 pos.lineno += delta_lines;
2701 pos.colno += delta_cols;
2702 errorf(&pos, "%s", message);
2705 void init_include_paths(void)
2707 obstack_init(&config_obstack);
2710 void init_preprocessor(void)
2714 obstack_init(&pp_obstack);
2715 obstack_init(&input_obstack);
2716 strset_init(&stringset);
2718 setup_include_path();
2720 set_input_error_callback(input_error);
2723 void exit_preprocessor(void)
2725 obstack_free(&input_obstack, NULL);
2726 obstack_free(&pp_obstack, NULL);
2727 obstack_free(&config_obstack, NULL);
2729 strset_destroy(&stringset);
2732 int pptest_main(int argc, char **argv);
2733 int pptest_main(int argc, char **argv)
2735 init_symbol_table();
2736 init_include_paths();
2737 init_preprocessor();
2740 error_on_unknown_chars = false;
2741 resolve_escape_sequences = false;
2743 /* simplistic commandline parser */
2744 const char *filename = NULL;
2745 const char *output = NULL;
2746 for (int i = 1; i < argc; ++i) {
2747 const char *opt = argv[i];
2748 if (streq(opt, "-I")) {
2749 append_include_path(&bracket_searchpath, argv[++i]);
2751 } else if (streq(opt, "-E")) {
2753 } else if (streq(opt, "-o")) {
2756 } else if (opt[0] == '-') {
2757 fprintf(stderr, "Unknown option '%s'\n", opt);
2759 if (filename != NULL)
2760 fprintf(stderr, "Multiple inputs not supported\n");
2764 if (filename == NULL) {
2765 fprintf(stderr, "No input specified\n");
2769 if (output == NULL) {
2772 out = fopen(output, "w");
2774 fprintf(stderr, "Couldn't open output '%s'\n", output);
2779 /* just here for gcc compatibility */
2780 fprintf(out, "# 1 \"%s\"\n", filename);
2781 fprintf(out, "# 1 \"<built-in>\"\n");
2782 fprintf(out, "# 1 \"<command-line>\"\n");
2784 FILE *file = fopen(filename, "r");
2786 fprintf(stderr, "Couldn't open input '%s'\n", filename);
2789 switch_pp_input(file, filename, NULL, false);
2792 next_preprocessing_token();
2793 if (pp_token.kind == T_EOF)
2799 check_unclosed_conditionals();
2800 fclose(close_pp_input());
2805 exit_preprocessor();
2806 exit_symbol_table();