9 #include "preprocessor.h"
13 #include "adt/error.h"
14 #include "adt/strutil.h"
15 #include "adt/strset.h"
16 #include "lang_features.h"
17 #include "diagnostic.h"
18 #include "string_rep.h"
22 #define INCLUDE_LIMIT 199 /* 199 is for gcc "compatibility" */
24 typedef struct saved_token_t {
29 typedef struct whitespace_info_t {
30 /** current token had whitespace in front of it */
32 /** current token is at the beginning of a line.
33 * => a "#" at line begin starts a preprocessing directive. */
35 /** number of spaces before the first token in a line */
36 unsigned whitespace_at_line_begin;
39 struct pp_definition_t {
41 source_position_t source_position;
42 pp_definition_t *parent_expansion;
44 whitespace_info_t expand_info;
46 bool is_expanding : 1;
47 bool has_parameters : 1;
48 bool is_parameter : 1;
49 pp_definition_t *function_definition;
51 pp_definition_t *parameters;
55 saved_token_t *token_list;
58 typedef struct pp_conditional_t pp_conditional_t;
59 struct pp_conditional_t {
60 source_position_t source_position;
63 /** conditional in skip mode (then+else gets skipped) */
65 pp_conditional_t *parent;
68 typedef struct pp_input_t pp_input_t;
73 utf32 buf[1024+MAX_PUTBACK];
76 source_position_t position;
79 searchpath_entry_t *path;
82 struct searchpath_entry_t {
84 searchpath_entry_t *next;
88 static pp_input_t input;
90 static pp_input_t *input_stack;
91 static unsigned n_inputs;
92 static struct obstack input_obstack;
94 static pp_conditional_t *conditional_stack;
97 bool allow_dollar_in_symbol = true;
98 static bool resolve_escape_sequences = true;
99 static bool error_on_unknown_chars = true;
100 static bool skip_mode;
102 static struct obstack pp_obstack;
103 static struct obstack config_obstack;
104 static const char *printed_input_name = NULL;
105 static source_position_t expansion_pos;
106 static pp_definition_t *current_expansion = NULL;
107 static pp_definition_t *current_call = NULL;
108 static pp_definition_t *current_argument = NULL;
109 static pp_definition_t *argument_expanding = NULL;
110 static unsigned argument_brace_count;
111 static strset_t stringset;
112 static token_kind_t last_token;
114 struct searchpath_t {
115 searchpath_entry_t *first;
116 searchpath_entry_t **anchor;
120 searchpath_t bracket_searchpath = { NULL, &bracket_searchpath.first, false };
121 searchpath_t quote_searchpath = { NULL, "e_searchpath.first, false };
122 searchpath_t system_searchpath = { NULL, &system_searchpath.first, true };
124 static whitespace_info_t next_info; /* valid if had_whitespace is true */
125 static whitespace_info_t info;
127 static inline void next_char(void);
128 static void next_input_token(void);
129 static void print_line_directive(const source_position_t *pos, const char *add);
131 static symbol_t *symbol_colongreater;
132 static symbol_t *symbol_lesscolon;
133 static symbol_t *symbol_lesspercent;
134 static symbol_t *symbol_percentcolon;
135 static symbol_t *symbol_percentcolonpercentcolon;
136 static symbol_t *symbol_percentgreater;
138 static void init_symbols(void)
140 symbol_colongreater = symbol_table_insert(":>");
141 symbol_lesscolon = symbol_table_insert("<:");
142 symbol_lesspercent = symbol_table_insert("<%");
143 symbol_percentcolon = symbol_table_insert("%:");
144 symbol_percentcolonpercentcolon = symbol_table_insert("%:%:");
145 symbol_percentgreater = symbol_table_insert("%>");
148 void switch_pp_input(FILE *const file, char const *const filename, searchpath_entry_t *const path, bool const is_system_header)
151 input.input = input_from_stream(file, NULL);
154 input.output_line = 0;
155 input.position.input_name = filename;
156 input.position.lineno = 1;
157 input.position.is_system_header = is_system_header;
160 /* indicate that we're at a new input */
161 print_line_directive(&input.position, input_stack != NULL ? "1" : NULL);
163 /* place a virtual '\n' so we realize we're at line begin */
164 input.position.lineno = 0;
168 FILE *close_pp_input(void)
170 input_free(input.input);
172 FILE* const file = input.file;
184 static void push_input(void)
186 pp_input_t *const saved_input = obstack_copy(&input_obstack, &input, sizeof(input));
188 /* adjust buffer positions */
189 if (input.bufpos != NULL)
190 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
191 if (input.bufend != NULL)
192 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
194 saved_input->parent = input_stack;
195 input_stack = saved_input;
199 static void pop_restore_input(void)
201 assert(n_inputs > 0);
202 assert(input_stack != NULL);
204 pp_input_t *saved_input = input_stack;
206 memcpy(&input, saved_input, sizeof(input));
209 /* adjust buffer positions */
210 if (saved_input->bufpos != NULL)
211 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
212 if (saved_input->bufend != NULL)
213 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
215 input_stack = saved_input->parent;
216 obstack_free(&input_obstack, saved_input);
221 * Prints a parse error message at the current token.
223 * @param msg the error message
225 static void parse_error(const char *msg)
227 errorf(&pp_token.base.source_position, "%s", msg);
230 static inline void next_real_char(void)
232 assert(input.bufpos <= input.bufend);
233 if (input.bufpos >= input.bufend) {
234 size_t const n = decode(input.input, input.buf + MAX_PUTBACK, lengthof(input.buf) - MAX_PUTBACK);
239 input.bufpos = input.buf + MAX_PUTBACK;
240 input.bufend = input.bufpos + n;
242 input.c = *input.bufpos++;
243 ++input.position.colno;
247 * Put a character back into the buffer.
249 * @param pc the character to put back
251 static inline void put_back(utf32 const pc)
253 assert(input.bufpos > input.buf);
254 *(--input.bufpos - input.buf + input.buf) = (char) pc;
255 --input.position.colno;
261 if (input.c == '\n') { \
265 ++input.position.lineno; \
266 input.position.colno = 1; \
268 newline // Let it look like an ordinary case label.
270 #define eat(c_type) (assert(input.c == c_type), next_char())
272 static void maybe_concat_lines(void)
278 info.whitespace_at_line_begin = 0;
290 * Set c to the next input character, ie.
291 * after expanding trigraphs.
293 static inline void next_char(void)
297 /* filter trigraphs and concatenated lines */
298 if (UNLIKELY(input.c == '\\')) {
299 maybe_concat_lines();
300 goto end_of_next_char;
303 if (LIKELY(input.c != '?'))
304 goto end_of_next_char;
307 if (LIKELY(input.c != '?')) {
310 goto end_of_next_char;
315 case '=': input.c = '#'; break;
316 case '(': input.c = '['; break;
317 case '/': input.c = '\\'; maybe_concat_lines(); break;
318 case ')': input.c = ']'; break;
319 case '\'': input.c = '^'; break;
320 case '<': input.c = '{'; break;
321 case '!': input.c = '|'; break;
322 case '>': input.c = '}'; break;
323 case '-': input.c = '~'; break;
333 printf("nchar '%c'\n", input.c);
340 * Returns true if the given char is a octal digit.
342 * @param char the character to check
344 static inline bool is_octal_digit(int chr)
362 * Returns the value of a digit.
363 * The only portable way to do it ...
365 static int digit_value(int digit)
391 panic("wrong character given");
396 * Parses an octal character sequence.
398 * @param first_digit the already read first digit
400 static utf32 parse_octal_sequence(const utf32 first_digit)
402 assert(is_octal_digit(first_digit));
403 utf32 value = digit_value(first_digit);
404 if (!is_octal_digit(input.c)) return value;
405 value = 8 * value + digit_value(input.c);
407 if (!is_octal_digit(input.c)) return value;
408 value = 8 * value + digit_value(input.c);
415 * Parses a hex character sequence.
417 static utf32 parse_hex_sequence(void)
420 while (isxdigit(input.c)) {
421 value = 16 * value + digit_value(input.c);
427 static bool is_universal_char_valid(utf32 const v)
430 if (v < 0xA0U && v != 0x24 && v != 0x40 && v != 0x60)
432 if (0xD800 <= v && v <= 0xDFFF)
437 static utf32 parse_universal_char(unsigned const n_digits)
440 for (unsigned k = n_digits; k != 0; --k) {
441 if (isxdigit(input.c)) {
442 v = 16 * v + digit_value(input.c);
443 if (!resolve_escape_sequences)
444 obstack_1grow(&symbol_obstack, input.c);
447 errorf(&input.position,
448 "short universal character name, expected %u more digits",
453 if (!is_universal_char_valid(v)) {
454 errorf(&input.position,
455 "\\%c%0*X is not a valid universal character name",
456 n_digits == 4 ? 'u' : 'U', (int)n_digits, v);
461 static bool is_universal_char_valid_identifier(utf32 const v)
464 if ( v == 0x000A8) return true;
465 if ( v == 0x000AA) return true;
466 if ( v == 0x000AD) return true;
467 if ( v == 0x000AF) return true;
468 if (0x000B2 <= v && v <= 0x000B5) return true;
469 if (0x000B7 <= v && v <= 0x000BA) return true;
470 if (0x000BC <= v && v <= 0x000BE) return true;
471 if (0x000C0 <= v && v <= 0x000D6) return true;
472 if (0x000D8 <= v && v <= 0x000F6) return true;
473 if (0x000F8 <= v && v <= 0x000FF) return true;
474 if (0x00100 <= v && v <= 0x0167F) return true;
475 if (0x01681 <= v && v <= 0x0180D) return true;
476 if (0x0180F <= v && v <= 0x01FFF) return true;
477 if (0x0200B <= v && v <= 0x0200D) return true;
478 if (0x0202A <= v && v <= 0x0202E) return true;
479 if (0x0203F <= v && v <= 0x02040) return true;
480 if ( v == 0x02054) return true;
481 if (0x02060 <= v && v <= 0x0206F) return true;
482 if (0x02070 <= v && v <= 0x0218F) return true;
483 if (0x02460 <= v && v <= 0x024FF) return true;
484 if (0x02776 <= v && v <= 0x02793) return true;
485 if (0x02C00 <= v && v <= 0x02DFF) return true;
486 if (0x02E80 <= v && v <= 0x02FFF) return true;
487 if (0x03004 <= v && v <= 0x03007) return true;
488 if (0x03021 <= v && v <= 0x0302F) return true;
489 if (0x03031 <= v && v <= 0x0303F) return true;
490 if (0x03040 <= v && v <= 0x0D7FF) return true;
491 if (0x0F900 <= v && v <= 0x0FD3D) return true;
492 if (0x0FD40 <= v && v <= 0x0FDCF) return true;
493 if (0x0FDF0 <= v && v <= 0x0FE44) return true;
494 if (0x0FE47 <= v && v <= 0x0FFFD) return true;
495 if (0x10000 <= v && v <= 0x1FFFD) return true;
496 if (0x20000 <= v && v <= 0x2FFFD) return true;
497 if (0x30000 <= v && v <= 0x3FFFD) return true;
498 if (0x40000 <= v && v <= 0x4FFFD) return true;
499 if (0x50000 <= v && v <= 0x5FFFD) return true;
500 if (0x60000 <= v && v <= 0x6FFFD) return true;
501 if (0x70000 <= v && v <= 0x7FFFD) return true;
502 if (0x80000 <= v && v <= 0x8FFFD) return true;
503 if (0x90000 <= v && v <= 0x9FFFD) return true;
504 if (0xA0000 <= v && v <= 0xAFFFD) return true;
505 if (0xB0000 <= v && v <= 0xBFFFD) return true;
506 if (0xC0000 <= v && v <= 0xCFFFD) return true;
507 if (0xD0000 <= v && v <= 0xDFFFD) return true;
508 if (0xE0000 <= v && v <= 0xEFFFD) return true;
512 static bool is_universal_char_valid_identifier_start(utf32 const v)
515 if (0x0300 <= v && v <= 0x036F) return false;
516 if (0x1DC0 <= v && v <= 0x1DFF) return false;
517 if (0x20D0 <= v && v <= 0x20FF) return false;
518 if (0xFE20 <= v && v <= 0xFE2F) return false;
523 * Parse an escape sequence.
525 static utf32 parse_escape_sequence(void)
529 utf32 const ec = input.c;
533 case '"': return '"';
534 case '\'': return '\'';
535 case '\\': return '\\';
536 case '?': return '\?';
537 case 'a': return '\a';
538 case 'b': return '\b';
539 case 'f': return '\f';
540 case 'n': return '\n';
541 case 'r': return '\r';
542 case 't': return '\t';
543 case 'v': return '\v';
545 return parse_hex_sequence();
554 return parse_octal_sequence(ec);
556 parse_error("reached end of file while parsing escape sequence");
558 /* \E is not documented, but handled, by GCC. It is acceptable according
559 * to §6.11.4, whereas \e is not. */
563 return 27; /* hopefully 27 is ALWAYS the code for ESCAPE */
566 case 'U': return parse_universal_char(8);
567 case 'u': return parse_universal_char(4);
572 /* §6.4.4.4:8 footnote 64 */
573 parse_error("unknown escape sequence");
577 static const char *identify_string(char *string)
579 const char *result = strset_insert(&stringset, string);
580 if (result != string) {
581 obstack_free(&symbol_obstack, string);
586 static string_t sym_make_string(string_encoding_t const enc)
588 obstack_1grow(&symbol_obstack, '\0');
589 size_t const len = obstack_object_size(&symbol_obstack) - 1;
590 char *const string = obstack_finish(&symbol_obstack);
591 char const *const result = identify_string(string);
592 return (string_t){ result, len, enc };
595 string_t make_string(char const *const string)
597 obstack_grow(&symbol_obstack, string, strlen(string));
598 return sym_make_string(STRING_ENCODING_CHAR);
601 static void parse_string(utf32 const delimiter, token_kind_t const kind,
602 string_encoding_t const enc,
603 char const *const context)
605 const unsigned start_linenr = input.position.lineno;
612 if (resolve_escape_sequences) {
613 utf32 const tc = parse_escape_sequence();
614 if (enc == STRING_ENCODING_CHAR) {
616 warningf(WARN_OTHER, &pp_token.base.source_position, "escape sequence out of range");
618 obstack_1grow(&symbol_obstack, tc);
620 obstack_grow_utf8(&symbol_obstack, tc);
623 obstack_1grow(&symbol_obstack, (char)input.c);
625 obstack_1grow(&symbol_obstack, (char)input.c);
632 errorf(&pp_token.base.source_position, "newline while parsing %s", context);
636 source_position_t source_position;
637 source_position.input_name = pp_token.base.source_position.input_name;
638 source_position.lineno = start_linenr;
639 errorf(&source_position, "EOF while parsing %s", context);
644 if (input.c == delimiter) {
648 obstack_grow_utf8(&symbol_obstack, input.c);
656 pp_token.kind = kind;
657 pp_token.literal.string = sym_make_string(enc);
660 static void parse_string_literal(string_encoding_t const enc)
662 parse_string('"', T_STRING_LITERAL, enc, "string literal");
665 static void parse_character_constant(string_encoding_t const enc)
667 parse_string('\'', T_CHARACTER_CONSTANT, enc, "character constant");
668 if (pp_token.literal.string.size == 0) {
669 parse_error("empty character constant");
673 #define SYMBOL_CASES_WITHOUT_E_P \
674 '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
725 #define SYMBOL_CASES \
726 SYMBOL_CASES_WITHOUT_E_P: \
732 #define DIGIT_CASES \
744 static void start_expanding(pp_definition_t *definition)
746 definition->parent_expansion = current_expansion;
747 definition->expand_pos = 0;
748 definition->is_expanding = true;
749 if (definition->list_len > 0) {
750 definition->token_list[0].had_whitespace
751 = info.had_whitespace;
753 current_expansion = definition;
756 static void finished_expanding(pp_definition_t *definition)
758 assert(definition->is_expanding);
759 pp_definition_t *parent = definition->parent_expansion;
760 definition->parent_expansion = NULL;
761 definition->is_expanding = false;
763 /* stop further expanding once we expanded a parameter used in a
765 if (definition == argument_expanding)
766 argument_expanding = NULL;
768 assert(current_expansion == definition);
769 current_expansion = parent;
772 static void grow_string_escaped(struct obstack *obst, const string_t *string, char const *delimiter)
774 char const *prefix = get_string_encoding_prefix(string->encoding);
775 obstack_printf(obst, "%s%s", prefix, delimiter);
776 size_t size = string->size;
777 const char *str = string->begin;
778 if (resolve_escape_sequences) {
779 obstack_grow(obst, str, size);
781 for (size_t i = 0; i < size; ++i) {
782 const char c = str[i];
783 if (c == '\\' || c == '"')
784 obstack_1grow(obst, '\\');
785 obstack_1grow(obst, c);
788 obstack_printf(obst, "%s", delimiter);
791 static void grow_token(struct obstack *obst, const token_t *token)
793 switch (token->kind) {
795 obstack_grow(obst, token->literal.string.begin, token->literal.string.size);
798 case T_STRING_LITERAL: {
799 char const *const delimiter = resolve_escape_sequences ? "\"" : "\\\"";
800 grow_string_escaped(obst, &token->literal.string, delimiter);
804 case T_CHARACTER_CONSTANT:
805 grow_string_escaped(obst, &token->literal.string, "'");
810 const char *str = token->base.symbol->string;
811 size_t len = strlen(str);
812 obstack_grow(obst, str, len);
818 static void stringify(const pp_definition_t *definition)
820 assert(obstack_object_size(&symbol_obstack) == 0);
822 size_t list_len = definition->list_len;
823 for (size_t p = 0; p < list_len; ++p) {
824 const saved_token_t *saved = &definition->token_list[p];
825 if (p > 0 && saved->had_whitespace)
826 obstack_1grow(&symbol_obstack, ' ');
827 grow_token(&symbol_obstack, &saved->token);
829 pp_token.kind = T_STRING_LITERAL;
830 pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
833 static inline void set_punctuator(token_kind_t const kind)
835 pp_token.kind = kind;
836 pp_token.base.symbol = token_symbols[kind];
839 static inline void set_digraph(token_kind_t const kind, symbol_t *const symbol)
841 pp_token.kind = kind;
842 pp_token.base.symbol = symbol;
846 * returns next final token from a preprocessor macro expansion
848 static bool expand_next(void)
850 if (current_expansion == NULL)
854 size_t pos = current_expansion->expand_pos;
855 if (pos >= current_expansion->list_len) {
856 finished_expanding(current_expansion);
857 /* it was the outermost expansion, parse pptoken normally */
858 if (current_expansion == NULL) {
863 const saved_token_t *saved = ¤t_expansion->token_list[pos++];
864 pp_token = saved->token;
865 if (pp_token.kind == '#') {
866 if (pos < current_expansion->list_len) {
867 const saved_token_t *next = ¤t_expansion->token_list[pos];
868 if (next->token.kind == T_MACRO_PARAMETER) {
869 pp_definition_t *def = next->token.macro_parameter.def;
870 assert(def != NULL && def->is_parameter);
877 if (current_expansion->expand_pos > 0)
878 info.had_whitespace = saved->had_whitespace;
879 current_expansion->expand_pos = pos;
880 pp_token.base.source_position = expansion_pos;
886 * Returns the next token kind found when continuing the current expansions
887 * without starting new sub-expansions.
889 static token_kind_t peek_expansion(void)
891 for (pp_definition_t *e = current_expansion; e; e = e->parent_expansion) {
892 if (e->expand_pos < e->list_len)
893 return e->token_list[e->expand_pos].token.kind;
898 static void skip_line_comment(void)
900 info.had_whitespace = true;
917 static void skip_multiline_comment(void)
919 info.had_whitespace = true;
921 unsigned start_linenr = input.position.lineno;
926 if (input.c == '*') {
927 /* TODO: nested comment, warn here */
932 if (input.c == '/') {
933 if (input.position.lineno != input.output_line)
934 info.whitespace_at_line_begin = input.position.colno;
944 source_position_t source_position;
945 source_position.input_name = pp_token.base.source_position.input_name;
946 source_position.lineno = start_linenr;
947 errorf(&source_position, "at end of file while looking for comment end");
958 static bool skip_till_newline(bool stop_at_non_whitespace)
970 if (input.c == '/') {
974 } else if (input.c == '*') {
976 skip_multiline_comment();
988 if (stop_at_non_whitespace)
997 static void skip_whitespace(void)
1003 ++info.whitespace_at_line_begin;
1004 info.had_whitespace = true;
1009 info.at_line_begin = true;
1010 info.had_whitespace = true;
1011 info.whitespace_at_line_begin = 0;
1016 if (input.c == '/') {
1018 skip_line_comment();
1020 } else if (input.c == '*') {
1022 skip_multiline_comment();
1036 static inline void eat_pp(pp_token_kind_t const kind)
1038 assert(pp_token.base.symbol->pp_ID == kind);
1043 static inline void eat_token(token_kind_t const kind)
1045 assert(pp_token.kind == kind);
1050 static void parse_symbol(void)
1052 assert(obstack_object_size(&symbol_obstack) == 0);
1057 obstack_1grow(&symbol_obstack, (char) input.c);
1066 case 'U': n = 8; goto universal;
1067 case 'u': n = 4; goto universal;
1069 if (!resolve_escape_sequences) {
1070 obstack_1grow(&symbol_obstack, '\\');
1071 obstack_1grow(&symbol_obstack, input.c);
1074 utf32 const v = parse_universal_char(n);
1075 if (!is_universal_char_valid_identifier(v)) {
1076 if (is_universal_char_valid(v)) {
1077 errorf(&input.position,
1078 "universal character \\%c%0*X is not valid in an identifier",
1079 n == 4 ? 'u' : 'U', (int)n, v);
1081 } else if (obstack_object_size(&symbol_obstack) == 0 && !is_universal_char_valid_identifier_start(v)) {
1082 errorf(&input.position,
1083 "universal character \\%c%0*X is not valid as start of an identifier",
1084 n == 4 ? 'u' : 'U', (int)n, v);
1085 } else if (resolve_escape_sequences) {
1086 obstack_grow_utf8(&symbol_obstack, v);
1104 obstack_1grow(&symbol_obstack, '\0');
1105 char *string = obstack_finish(&symbol_obstack);
1107 /* might be a wide string or character constant ( L"string"/L'c' ) */
1108 if (input.c == '"' && string[0] == 'L' && string[1] == '\0') {
1109 obstack_free(&symbol_obstack, string);
1110 parse_string_literal(STRING_ENCODING_WIDE);
1112 } else if (input.c == '\'' && string[0] == 'L' && string[1] == '\0') {
1113 obstack_free(&symbol_obstack, string);
1114 parse_character_constant(STRING_ENCODING_WIDE);
1118 symbol_t *symbol = symbol_table_insert(string);
1120 pp_token.kind = symbol->ID;
1121 pp_token.base.symbol = symbol;
1123 /* we can free the memory from symbol obstack if we already had an entry in
1124 * the symbol table */
1125 if (symbol->string != string) {
1126 obstack_free(&symbol_obstack, string);
1130 static void parse_number(void)
1132 obstack_1grow(&symbol_obstack, (char) input.c);
1139 case SYMBOL_CASES_WITHOUT_E_P:
1140 obstack_1grow(&symbol_obstack, (char) input.c);
1148 obstack_1grow(&symbol_obstack, (char) input.c);
1150 if (input.c == '+' || input.c == '-') {
1151 obstack_1grow(&symbol_obstack, (char) input.c);
1163 pp_token.kind = T_NUMBER;
1164 pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
1167 #define MAYBE_PROLOG \
1171 #define MAYBE(ch, kind) \
1174 set_punctuator(kind); \
1177 #define MAYBE_DIGRAPH(ch, kind, symbol) \
1180 set_digraph(kind, symbol); \
1183 #define ELSE_CODE(code) \
1188 #define ELSE(kind) ELSE_CODE(set_punctuator(kind); return;)
1190 /** identifies and returns the next preprocessing token contained in the
1191 * input stream. No macro expansion is performed. */
1192 static void next_input_token(void)
1194 if (next_info.had_whitespace) {
1196 next_info.had_whitespace = false;
1198 info.at_line_begin = false;
1199 info.had_whitespace = false;
1202 pp_token.base.source_position = input.position;
1203 pp_token.base.symbol = NULL;
1208 info.whitespace_at_line_begin++;
1209 info.had_whitespace = true;
1214 info.at_line_begin = true;
1215 info.had_whitespace = true;
1216 info.whitespace_at_line_begin = 0;
1228 parse_string_literal(STRING_ENCODING_CHAR);
1232 parse_character_constant(STRING_ENCODING_CHAR);
1254 MAYBE('.', T_DOTDOTDOT)
1258 set_punctuator('.');
1264 MAYBE('&', T_ANDAND)
1265 MAYBE('=', T_ANDEQUAL)
1269 MAYBE('=', T_ASTERISKEQUAL)
1273 MAYBE('+', T_PLUSPLUS)
1274 MAYBE('=', T_PLUSEQUAL)
1278 MAYBE('>', T_MINUSGREATER)
1279 MAYBE('-', T_MINUSMINUS)
1280 MAYBE('=', T_MINUSEQUAL)
1284 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
1288 MAYBE('=', T_SLASHEQUAL)
1291 skip_multiline_comment();
1295 skip_line_comment();
1300 MAYBE_DIGRAPH('>', '}', symbol_percentgreater)
1301 MAYBE('=', T_PERCENTEQUAL)
1306 MAYBE_DIGRAPH(':', T_HASHHASH, symbol_percentcolonpercentcolon)
1310 goto digraph_percentcolon;
1313 digraph_percentcolon:
1314 set_digraph('#', symbol_percentcolon);
1320 MAYBE_DIGRAPH(':', '[', symbol_lesscolon)
1321 MAYBE_DIGRAPH('%', '{', symbol_lesspercent)
1322 MAYBE('=', T_LESSEQUAL)
1325 MAYBE('=', T_LESSLESSEQUAL)
1330 MAYBE('=', T_GREATEREQUAL)
1333 MAYBE('=', T_GREATERGREATEREQUAL)
1334 ELSE(T_GREATERGREATER)
1338 MAYBE('=', T_CARETEQUAL)
1342 MAYBE('=', T_PIPEEQUAL)
1343 MAYBE('|', T_PIPEPIPE)
1347 MAYBE_DIGRAPH('>', ']', symbol_colongreater)
1349 if (c_mode & _CXX) {
1351 set_punctuator(T_COLONCOLON);
1358 MAYBE('=', T_EQUALEQUAL)
1362 MAYBE('#', T_HASHHASH)
1375 set_punctuator(input.c);
1380 if (input_stack != NULL) {
1381 fclose(close_pp_input());
1382 pop_restore_input();
1385 if (input.c == (utf32)EOF)
1386 --input.position.lineno;
1387 print_line_directive(&input.position, "2");
1390 info.at_line_begin = true;
1391 set_punctuator(T_EOF);
1397 int next_c = input.c;
1400 if (next_c == 'U' || next_c == 'u') {
1407 if (error_on_unknown_chars) {
1408 errorf(&pp_token.base.source_position,
1409 "unknown character '%lc' found\n", input.c);
1413 assert(obstack_object_size(&symbol_obstack) == 0);
1414 obstack_grow_utf8(&symbol_obstack, input.c);
1415 obstack_1grow(&symbol_obstack, '\0');
1416 char *const string = obstack_finish(&symbol_obstack);
1417 symbol_t *const symbol = symbol_table_insert(string);
1418 if (symbol->string != string)
1419 obstack_free(&symbol_obstack, string);
1421 pp_token.kind = T_UNKNOWN_CHAR;
1422 pp_token.base.symbol = symbol;
1429 static void print_quoted_string(const char *const string)
1432 for (const char *c = string; *c != 0; ++c) {
1434 case '"': fputs("\\\"", out); break;
1435 case '\\': fputs("\\\\", out); break;
1436 case '\a': fputs("\\a", out); break;
1437 case '\b': fputs("\\b", out); break;
1438 case '\f': fputs("\\f", out); break;
1439 case '\n': fputs("\\n", out); break;
1440 case '\r': fputs("\\r", out); break;
1441 case '\t': fputs("\\t", out); break;
1442 case '\v': fputs("\\v", out); break;
1443 case '\?': fputs("\\?", out); break;
1446 fprintf(out, "\\%03o", (unsigned)*c);
1456 static void print_line_directive(const source_position_t *pos, const char *add)
1461 fprintf(out, "# %u ", pos->lineno);
1462 print_quoted_string(pos->input_name);
1467 if (pos->is_system_header) {
1471 printed_input_name = pos->input_name;
1472 input.output_line = pos->lineno-1;
1475 static bool emit_newlines(void)
1480 unsigned delta = pp_token.base.source_position.lineno - input.output_line;
1486 print_line_directive(&pp_token.base.source_position, NULL);
1489 for (unsigned i = 0; i < delta; ++i) {
1493 input.output_line = pp_token.base.source_position.lineno;
1495 unsigned whitespace = info.whitespace_at_line_begin;
1496 /* make sure there is at least 1 whitespace before a (macro-expanded)
1497 * '#' at line begin. I'm not sure why this is good, but gcc does it. */
1498 if (pp_token.kind == '#' && whitespace == 0)
1500 for (unsigned i = 0; i < whitespace; ++i)
1506 void set_preprocessor_output(FILE *output)
1510 error_on_unknown_chars = false;
1511 resolve_escape_sequences = false;
1513 error_on_unknown_chars = true;
1514 resolve_escape_sequences = true;
1518 void emit_pp_token(void)
1520 if (!emit_newlines() &&
1521 (info.had_whitespace || tokens_would_paste(last_token, pp_token.kind)))
1524 switch (pp_token.kind) {
1526 fputs(pp_token.literal.string.begin, out);
1529 case T_STRING_LITERAL:
1530 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1532 fputs(pp_token.literal.string.begin, out);
1536 case T_CHARACTER_CONSTANT:
1537 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1539 fputs(pp_token.literal.string.begin, out);
1543 case T_MACRO_PARAMETER:
1544 panic("macro parameter not expanded");
1547 fputs(pp_token.base.symbol->string, out);
1550 last_token = pp_token.kind;
1553 static void eat_pp_directive(void)
1555 while (!info.at_line_begin) {
1560 static bool strings_equal(const string_t *string1, const string_t *string2)
1562 size_t size = string1->size;
1563 if (size != string2->size)
1566 const char *c1 = string1->begin;
1567 const char *c2 = string2->begin;
1568 for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1575 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1577 if (token1->kind != token2->kind)
1580 switch (token1->kind) {
1582 case T_CHARACTER_CONSTANT:
1583 case T_STRING_LITERAL:
1584 return strings_equal(&token1->literal.string, &token2->literal.string);
1586 case T_MACRO_PARAMETER:
1587 return token1->macro_parameter.def->symbol
1588 == token2->macro_parameter.def->symbol;
1591 return token1->base.symbol == token2->base.symbol;
1595 static bool pp_definitions_equal(const pp_definition_t *definition1,
1596 const pp_definition_t *definition2)
1598 if (definition1->list_len != definition2->list_len)
1601 size_t len = definition1->list_len;
1602 const saved_token_t *t1 = definition1->token_list;
1603 const saved_token_t *t2 = definition2->token_list;
1604 for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1605 if (!pp_tokens_equal(&t1->token, &t2->token))
1607 if (t1->had_whitespace != t2->had_whitespace)
1613 static void missing_macro_param_error(void)
1615 errorf(&pp_token.base.source_position,
1616 "'#' is not followed by a macro parameter");
1619 static bool is_defineable_token(char const *const context)
1621 if (info.at_line_begin) {
1622 errorf(&pp_token.base.source_position, "unexpected end of line after %s", context);
1625 symbol_t *const symbol = pp_token.base.symbol;
1629 if (pp_token.kind != T_IDENTIFIER) {
1630 switch (symbol->string[0]) {
1637 errorf(&pp_token.base.source_position, "expected identifier after %s, got %K", context, &pp_token);
1642 /* TODO turn this into a flag in pp_def. */
1643 switch (symbol->pp_ID) {
1646 errorf(&pp_token.base.source_position, "%K cannot be used as macro name in %s", &pp_token, context);
1654 static void parse_define_directive(void)
1662 assert(obstack_object_size(&pp_obstack) == 0);
1664 if (!is_defineable_token("#define"))
1666 symbol_t *const symbol = pp_token.base.symbol;
1668 pp_definition_t *new_definition
1669 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1670 memset(new_definition, 0, sizeof(new_definition[0]));
1671 new_definition->symbol = symbol;
1672 new_definition->source_position = input.position;
1674 /* this is probably the only place where spaces are significant in the
1675 * lexer (except for the fact that they separate tokens). #define b(x)
1676 * is something else than #define b (x) */
1677 if (input.c == '(') {
1682 switch (pp_token.kind) {
1684 new_definition->is_variadic = true;
1685 eat_token(T_DOTDOTDOT);
1686 if (pp_token.kind != ')') {
1687 errorf(&input.position,
1688 "'...' not at end of macro argument list");
1693 case T_IDENTIFIER: {
1694 pp_definition_t parameter;
1695 memset(¶meter, 0, sizeof(parameter));
1696 parameter.source_position = pp_token.base.source_position;
1697 parameter.symbol = pp_token.base.symbol;
1698 parameter.is_parameter = true;
1699 obstack_grow(&pp_obstack, ¶meter, sizeof(parameter));
1700 eat_token(T_IDENTIFIER);
1702 if (pp_token.kind == ',') {
1707 if (pp_token.kind != ')') {
1708 errorf(&pp_token.base.source_position,
1709 "expected ',' or ')' after identifier, got %K",
1718 goto finish_argument_list;
1721 errorf(&pp_token.base.source_position,
1722 "expected identifier, '...' or ')' in #define argument list, got %K",
1728 finish_argument_list:
1729 new_definition->has_parameters = true;
1730 size_t size = obstack_object_size(&pp_obstack);
1731 new_definition->n_parameters
1732 = size / sizeof(new_definition->parameters[0]);
1733 new_definition->parameters = obstack_finish(&pp_obstack);
1734 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1735 pp_definition_t *param = &new_definition->parameters[i];
1736 symbol_t *symbol = param->symbol;
1737 pp_definition_t *previous = symbol->pp_definition;
1738 if (previous != NULL
1739 && previous->function_definition == new_definition) {
1740 errorf(¶m->source_position,
1741 "duplicate macro parameter '%Y'", symbol);
1742 param->symbol = sym_anonymous;
1745 param->parent_expansion = previous;
1746 param->function_definition = new_definition;
1747 symbol->pp_definition = param;
1753 /* construct token list */
1754 assert(obstack_object_size(&pp_obstack) == 0);
1755 bool next_must_be_param = false;
1756 while (!info.at_line_begin) {
1757 if (pp_token.kind == T_IDENTIFIER) {
1758 const symbol_t *symbol = pp_token.base.symbol;
1759 pp_definition_t *definition = symbol->pp_definition;
1760 if (definition != NULL
1761 && definition->function_definition == new_definition) {
1762 pp_token.kind = T_MACRO_PARAMETER;
1763 pp_token.macro_parameter.def = definition;
1766 if (next_must_be_param && pp_token.kind != T_MACRO_PARAMETER) {
1767 missing_macro_param_error();
1769 saved_token_t saved_token;
1770 saved_token.token = pp_token;
1771 saved_token.had_whitespace = info.had_whitespace;
1772 obstack_grow(&pp_obstack, &saved_token, sizeof(saved_token));
1774 = new_definition->has_parameters && pp_token.kind == '#';
1777 if (next_must_be_param)
1778 missing_macro_param_error();
1780 new_definition->list_len = obstack_object_size(&pp_obstack)
1781 / sizeof(new_definition->token_list[0]);
1782 new_definition->token_list = obstack_finish(&pp_obstack);
1784 if (new_definition->has_parameters) {
1785 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1786 pp_definition_t *param = &new_definition->parameters[i];
1787 symbol_t *symbol = param->symbol;
1788 if (symbol == sym_anonymous)
1790 assert(symbol->pp_definition == param);
1791 assert(param->function_definition == new_definition);
1792 symbol->pp_definition = param->parent_expansion;
1793 param->parent_expansion = NULL;
1797 pp_definition_t *old_definition = symbol->pp_definition;
1798 if (old_definition != NULL) {
1799 if (!pp_definitions_equal(old_definition, new_definition)) {
1800 warningf(WARN_OTHER, &input.position, "multiple definition of macro '%Y' (first defined %P)", symbol, &old_definition->source_position);
1802 /* reuse the old definition */
1803 obstack_free(&pp_obstack, new_definition);
1804 new_definition = old_definition;
1808 symbol->pp_definition = new_definition;
1812 if (obstack_object_size(&pp_obstack) > 0) {
1813 char *ptr = obstack_finish(&pp_obstack);
1814 obstack_free(&pp_obstack, ptr);
1819 static void parse_undef_directive(void)
1827 if (!is_defineable_token("#undef")) {
1832 pp_token.base.symbol->pp_definition = NULL;
1835 if (!info.at_line_begin) {
1836 warningf(WARN_OTHER, &input.position, "extra tokens at end of #undef directive");
1841 /** behind an #include we can have the special headername lexems.
1842 * They're only allowed behind an #include so they're not recognized
1843 * by the normal next_preprocessing_token. We handle them as a special
1845 static const char *parse_headername(bool *system_include)
1847 if (info.at_line_begin) {
1848 parse_error("expected headername after #include");
1852 /* check wether we have a "... or <... headername */
1853 source_position_t position = input.position;
1857 case '<': delimiter = '>'; *system_include = true; goto parse_name;
1858 case '"': delimiter = '"'; *system_include = false; goto parse_name;
1860 assert(obstack_object_size(&symbol_obstack) == 0);
1867 char *dummy = obstack_finish(&symbol_obstack);
1868 obstack_free(&symbol_obstack, dummy);
1870 errorf(&pp_token.base.source_position,
1871 "header name without closing '%c'", (char)delimiter);
1875 if (input.c == delimiter) {
1877 goto finish_headername;
1879 obstack_1grow(&symbol_obstack, (char)input.c);
1885 /* we should never be here */
1889 next_preprocessing_token();
1890 if (info.at_line_begin) {
1891 /* TODO: if we are already in the new line then we parsed more than
1892 * wanted. We reuse the token, but could produce following errors
1893 * misbehaviours... */
1894 goto error_invalid_input;
1896 if (pp_token.kind == T_STRING_LITERAL) {
1897 *system_include = false;
1898 return pp_token.literal.string.begin;
1899 } else if (pp_token.kind == '<') {
1900 *system_include = true;
1901 assert(obstack_object_size(&pp_obstack) == 0);
1903 next_preprocessing_token();
1904 if (info.at_line_begin) {
1905 /* TODO: we shouldn't have parsed/expanded something on the
1906 * next line yet... */
1907 char *dummy = obstack_finish(&pp_obstack);
1908 obstack_free(&pp_obstack, dummy);
1909 goto error_invalid_input;
1911 if (pp_token.kind == '>')
1914 saved_token_t saved;
1915 saved.token = pp_token;
1916 saved.had_whitespace = info.had_whitespace;
1917 obstack_grow(&pp_obstack, &saved, sizeof(saved));
1919 size_t size = obstack_object_size(&pp_obstack);
1920 assert(size % sizeof(saved_token_t) == 0);
1921 size_t n_tokens = size / sizeof(saved_token_t);
1922 saved_token_t *tokens = obstack_finish(&pp_obstack);
1923 assert(obstack_object_size(&symbol_obstack) == 0);
1924 for (size_t i = 0; i < n_tokens; ++i) {
1925 const saved_token_t *saved = &tokens[i];
1926 if (i > 0 && saved->had_whitespace)
1927 obstack_1grow(&symbol_obstack, ' ');
1928 grow_token(&symbol_obstack, &saved->token);
1930 obstack_free(&pp_obstack, tokens);
1931 goto finish_headername;
1933 error_invalid_input:
1935 char *dummy = obstack_finish(&symbol_obstack);
1936 obstack_free(&symbol_obstack, dummy);
1939 errorf(&pp_token.base.source_position,
1940 "expected \"FILENAME\" or <FILENAME> after #include");
1946 obstack_1grow(&symbol_obstack, '\0');
1947 char *const headername = obstack_finish(&symbol_obstack);
1948 const char *identified = identify_string(headername);
1949 pp_token.base.source_position = position;
1953 static bool do_include(bool const bracket_include, bool const include_next, char const *const headername)
1955 size_t const headername_len = strlen(headername);
1956 searchpath_entry_t *entry;
1958 entry = input.path ? input.path->next
1959 : bracket_include ? bracket_searchpath.first
1960 : quote_searchpath.first;
1962 if (!bracket_include) {
1963 /* put dirname of current input on obstack */
1964 const char *filename = input.position.input_name;
1965 const char *last_slash = strrchr(filename, '/');
1966 const char *full_name;
1967 if (last_slash != NULL) {
1968 size_t len = last_slash - filename;
1969 obstack_grow(&symbol_obstack, filename, len + 1);
1970 obstack_grow0(&symbol_obstack, headername, headername_len);
1971 char *complete_path = obstack_finish(&symbol_obstack);
1972 full_name = identify_string(complete_path);
1974 full_name = headername;
1977 FILE *file = fopen(full_name, "r");
1979 switch_pp_input(file, full_name, NULL, false);
1982 entry = quote_searchpath.first;
1984 entry = bracket_searchpath.first;
1988 assert(obstack_object_size(&symbol_obstack) == 0);
1989 /* check searchpath */
1990 for (; entry; entry = entry->next) {
1991 const char *path = entry->path;
1992 size_t len = strlen(path);
1993 obstack_grow(&symbol_obstack, path, len);
1994 if (path[len-1] != '/')
1995 obstack_1grow(&symbol_obstack, '/');
1996 obstack_grow(&symbol_obstack, headername, headername_len+1);
1998 char *complete_path = obstack_finish(&symbol_obstack);
1999 FILE *file = fopen(complete_path, "r");
2001 const char *filename = identify_string(complete_path);
2002 switch_pp_input(file, filename, entry, entry->is_system_path);
2005 obstack_free(&symbol_obstack, complete_path);
2012 static void parse_include_directive(bool const include_next)
2019 /* do not eat the TP_include, since it would already parse the next token
2020 * which needs special handling here. */
2021 skip_till_newline(true);
2022 bool system_include;
2023 const char *headername = parse_headername(&system_include);
2024 if (headername == NULL) {
2029 bool had_nonwhitespace = skip_till_newline(false);
2030 if (had_nonwhitespace) {
2031 warningf(WARN_OTHER, &input.position,
2032 "extra tokens at end of #include directive");
2035 if (n_inputs > INCLUDE_LIMIT) {
2036 errorf(&pp_token.base.source_position, "#include nested too deeply");
2043 info.whitespace_at_line_begin = 0;
2044 info.had_whitespace = false;
2045 info.at_line_begin = true;
2048 bool res = do_include(system_include, include_next, headername);
2052 errorf(&pp_token.base.source_position, "failed including '%s': %s", headername, strerror(errno));
2053 pop_restore_input();
2057 static pp_conditional_t *push_conditional(void)
2059 pp_conditional_t *conditional
2060 = obstack_alloc(&pp_obstack, sizeof(*conditional));
2061 memset(conditional, 0, sizeof(*conditional));
2063 conditional->parent = conditional_stack;
2064 conditional_stack = conditional;
2069 static void pop_conditional(void)
2071 assert(conditional_stack != NULL);
2072 conditional_stack = conditional_stack->parent;
2075 void check_unclosed_conditionals(void)
2077 while (conditional_stack != NULL) {
2078 pp_conditional_t *conditional = conditional_stack;
2080 if (conditional->in_else) {
2081 errorf(&conditional->source_position, "unterminated #else");
2083 errorf(&conditional->source_position, "unterminated condition");
2089 static void parse_ifdef_ifndef_directive(bool const is_ifdef)
2092 eat_pp(is_ifdef ? TP_ifdef : TP_ifndef);
2096 pp_conditional_t *conditional = push_conditional();
2097 conditional->source_position = pp_token.base.source_position;
2098 conditional->skip = true;
2102 if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
2103 errorf(&pp_token.base.source_position,
2104 "expected identifier after #%s, got %K",
2105 is_ifdef ? "ifdef" : "ifndef", &pp_token);
2108 /* just take the true case in the hope to avoid further errors */
2111 /* evaluate wether we are in true or false case */
2112 condition = (bool)pp_token.base.symbol->pp_definition == is_ifdef;
2113 eat_token(T_IDENTIFIER);
2115 if (!info.at_line_begin) {
2116 errorf(&pp_token.base.source_position,
2117 "extra tokens at end of #%s",
2118 is_ifdef ? "ifdef" : "ifndef");
2123 pp_conditional_t *conditional = push_conditional();
2124 conditional->source_position = pp_token.base.source_position;
2125 conditional->condition = condition;
2132 static void parse_else_directive(void)
2136 if (!info.at_line_begin) {
2138 warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #else");
2143 pp_conditional_t *conditional = conditional_stack;
2144 if (conditional == NULL) {
2145 errorf(&pp_token.base.source_position, "#else without prior #if");
2149 if (conditional->in_else) {
2150 errorf(&pp_token.base.source_position,
2151 "#else after #else (condition started %P)",
2152 &conditional->source_position);
2157 conditional->in_else = true;
2158 if (!conditional->skip) {
2159 skip_mode = conditional->condition;
2161 conditional->source_position = pp_token.base.source_position;
2164 static void parse_endif_directive(void)
2168 if (!info.at_line_begin) {
2170 warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #endif");
2175 pp_conditional_t *conditional = conditional_stack;
2176 if (conditional == NULL) {
2177 errorf(&pp_token.base.source_position, "#endif without prior #if");
2181 if (!conditional->skip) {
2187 typedef enum stdc_pragma_kind_t {
2191 STDC_CX_LIMITED_RANGE
2192 } stdc_pragma_kind_t;
2194 typedef enum stdc_pragma_value_kind_t {
2199 } stdc_pragma_value_kind_t;
2201 static void parse_pragma_directive(void)
2209 if (pp_token.kind != T_IDENTIFIER) {
2210 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2211 "expected identifier after #pragma");
2216 stdc_pragma_kind_t kind = STDC_UNKNOWN;
2217 if (pp_token.base.symbol->pp_ID == TP_STDC && c_mode & _C99) {
2221 switch (pp_token.base.symbol->pp_ID) {
2222 case TP_FP_CONTRACT: kind = STDC_FP_CONTRACT; break;
2223 case TP_FENV_ACCESS: kind = STDC_FENV_ACCESS; break;
2224 case TP_CX_LIMITED_RANGE: kind = STDC_CX_LIMITED_RANGE; break;
2227 if (kind != STDC_UNKNOWN) {
2229 stdc_pragma_value_kind_t value;
2230 switch (pp_token.base.symbol->pp_ID) {
2231 case TP_ON: value = STDC_VALUE_ON; break;
2232 case TP_OFF: value = STDC_VALUE_OFF; break;
2233 case TP_DEFAULT: value = STDC_VALUE_DEFAULT; break;
2234 default: value = STDC_VALUE_UNKNOWN; break;
2236 if (value == STDC_VALUE_UNKNOWN) {
2237 kind = STDC_UNKNOWN;
2238 errorf(&pp_token.base.source_position, "bad STDC pragma argument");
2243 if (kind == STDC_UNKNOWN) {
2244 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2245 "encountered unknown #pragma");
2249 static void parse_line_directive(void)
2251 if (pp_token.kind != T_NUMBER) {
2253 parse_error("expected integer");
2256 long const line = strtol(pp_token.literal.string.begin, &end, 0);
2258 /* use offset -1 as this is about the next line */
2259 input.position.lineno = line - 1;
2260 /* force output of line */
2261 input.output_line = input.position.lineno - 20;
2264 errorf(&input.position, "'%S' is not a valid line number",
2265 &pp_token.literal.string);
2269 if (info.at_line_begin)
2272 if (pp_token.kind == T_STRING_LITERAL
2273 && pp_token.literal.string.encoding == STRING_ENCODING_CHAR) {
2274 input.position.input_name = pp_token.literal.string.begin;
2275 input.position.is_system_header = false;
2278 /* attempt to parse numeric flags as outputted by gcc preprocessor */
2279 while (!info.at_line_begin && pp_token.kind == T_NUMBER) {
2281 * 1 - indicates start of a new file
2282 * 2 - indicates return from a file
2283 * 3 - indicates system header
2284 * 4 - indicates implicit extern "C" in C++ mode
2286 * currently we're only interested in "3"
2288 if (streq(pp_token.literal.string.begin, "3")) {
2289 input.position.is_system_header = true;
2298 static void parse_error_directive(void)
2305 bool const old_resolve_escape_sequences = resolve_escape_sequences;
2306 resolve_escape_sequences = false;
2308 source_position_t const pos = pp_token.base.source_position;
2310 if (info.had_whitespace && obstack_object_size(&pp_obstack) != 0)
2311 obstack_1grow(&pp_obstack, ' ');
2313 switch (pp_token.kind) {
2315 string_t const *const str = &pp_token.literal.string;
2316 obstack_grow(&pp_obstack, str->begin, str->size);
2322 case T_STRING_LITERAL: delim = '"'; goto string;
2323 case T_CHARACTER_CONSTANT: delim = '\''; goto string;
2325 string_t const *const str = &pp_token.literal.string;
2326 char const *const enc = get_string_encoding_prefix(str->encoding);
2327 obstack_printf(&pp_obstack, "%s%c%s%c", enc, delim, str->begin, delim);
2332 char const *const str = pp_token.base.symbol->string;
2333 obstack_grow(&pp_obstack, str, strlen(str));
2339 } while (!info.at_line_begin);
2341 resolve_escape_sequences = old_resolve_escape_sequences;
2343 obstack_1grow(&pp_obstack, '\0');
2344 char *const str = obstack_finish(&pp_obstack);
2345 errorf(&pos, "#%s", str);
2346 obstack_free(&pp_obstack, str);
2349 static void parse_preprocessing_directive(void)
2353 if (info.at_line_begin) {
2354 /* empty directive */
2358 if (pp_token.base.symbol) {
2359 switch (pp_token.base.symbol->pp_ID) {
2360 case TP_define: parse_define_directive(); break;
2361 case TP_else: parse_else_directive(); break;
2362 case TP_endif: parse_endif_directive(); break;
2363 case TP_error: parse_error_directive(); break;
2364 case TP_ifdef: parse_ifdef_ifndef_directive(true); break;
2365 case TP_ifndef: parse_ifdef_ifndef_directive(false); break;
2366 case TP_include: parse_include_directive(false); break;
2367 case TP_include_next: parse_include_directive(true); break;
2368 case TP_line: next_input_token(); goto line_directive;
2369 case TP_pragma: parse_pragma_directive(); break;
2370 case TP_undef: parse_undef_directive(); break;
2373 } else if (pp_token.kind == T_NUMBER) {
2375 parse_line_directive();
2379 errorf(&pp_token.base.source_position, "invalid preprocessing directive #%K", &pp_token);
2384 assert(info.at_line_begin);
2387 static void finish_current_argument(void)
2389 if (current_argument == NULL)
2391 size_t size = obstack_object_size(&pp_obstack);
2392 current_argument->list_len = size/sizeof(current_argument->token_list[0]);
2393 current_argument->token_list = obstack_finish(&pp_obstack);
2396 void next_preprocessing_token(void)
2399 if (!expand_next()) {
2402 while (pp_token.kind == '#' && info.at_line_begin) {
2403 parse_preprocessing_directive();
2405 } while (skip_mode && pp_token.kind != T_EOF);
2408 const token_kind_t kind = pp_token.kind;
2409 if (current_call == NULL || argument_expanding != NULL) {
2410 symbol_t *const symbol = pp_token.base.symbol;
2412 if (kind == T_MACRO_PARAMETER) {
2413 assert(current_expansion != NULL);
2414 start_expanding(pp_token.macro_parameter.def);
2418 pp_definition_t *const pp_definition = symbol->pp_definition;
2419 if (pp_definition != NULL && !pp_definition->is_expanding) {
2420 if (pp_definition->has_parameters) {
2422 /* check if next token is a '(' */
2423 whitespace_info_t old_info = info;
2424 token_kind_t next_token = peek_expansion();
2425 if (next_token == T_EOF) {
2426 info.at_line_begin = false;
2427 info.had_whitespace = false;
2429 if (input.c == '(') {
2434 if (next_token == '(') {
2435 if (current_expansion == NULL)
2436 expansion_pos = pp_token.base.source_position;
2437 next_preprocessing_token();
2438 assert(pp_token.kind == '(');
2440 pp_definition->parent_expansion = current_expansion;
2441 current_call = pp_definition;
2442 current_call->expand_pos = 0;
2443 current_call->expand_info = old_info;
2444 if (current_call->n_parameters > 0) {
2445 current_argument = ¤t_call->parameters[0];
2446 assert(argument_brace_count == 0);
2450 /* skip_whitespaces() skipped newlines and whitespace,
2451 * remember results for next token */
2457 if (current_expansion == NULL)
2458 expansion_pos = pp_token.base.source_position;
2459 start_expanding(pp_definition);
2466 if (current_call != NULL) {
2467 /* current_call != NULL */
2469 ++argument_brace_count;
2470 } else if (kind == ')') {
2471 if (argument_brace_count > 0) {
2472 --argument_brace_count;
2474 finish_current_argument();
2475 assert(kind == ')');
2476 start_expanding(current_call);
2477 info = current_call->expand_info;
2478 current_call = NULL;
2479 current_argument = NULL;
2482 } else if (kind == ',' && argument_brace_count == 0) {
2483 finish_current_argument();
2484 current_call->expand_pos++;
2485 if (current_call->expand_pos >= current_call->n_parameters) {
2486 errorf(&pp_token.base.source_position,
2487 "too many arguments passed for macro '%Y'",
2488 current_call->symbol);
2489 current_argument = NULL;
2492 = ¤t_call->parameters[current_call->expand_pos];
2495 } else if (kind == T_MACRO_PARAMETER) {
2496 /* parameters have to be fully expanded before being used as
2497 * parameters for another macro-call */
2498 assert(current_expansion != NULL);
2499 pp_definition_t *argument = pp_token.macro_parameter.def;
2500 argument_expanding = argument;
2501 start_expanding(argument);
2503 } else if (kind == T_EOF) {
2504 errorf(&expansion_pos,
2505 "reached end of file while parsing arguments for '%Y'",
2506 current_call->symbol);
2509 if (current_argument != NULL) {
2510 saved_token_t saved;
2511 saved.token = pp_token;
2512 saved.had_whitespace = info.had_whitespace;
2513 obstack_grow(&pp_obstack, &saved, sizeof(saved));
2519 void append_include_path(searchpath_t *paths, const char *path)
2521 searchpath_entry_t *entry = OALLOCZ(&config_obstack, searchpath_entry_t);
2523 entry->is_system_path = paths->is_system_path;
2525 *paths->anchor = entry;
2526 paths->anchor = &entry->next;
2529 static void append_env_paths(searchpath_t *paths, const char *envvar)
2531 const char *val = getenv(envvar);
2532 if (val != NULL && *val != '\0') {
2533 const char *begin = val;
2537 while (*c != '\0' && *c != ':')
2540 size_t len = c-begin;
2542 /* use "." for gcc compatibility (Matze: I would expect that
2543 * nothing happens for an empty entry...) */
2544 append_include_path(paths, ".");
2546 char *const string = obstack_copy0(&config_obstack, begin, len);
2547 append_include_path(paths, string);
2554 } while(*c != '\0');
2558 static void append_searchpath(searchpath_t *path, const searchpath_t *append)
2560 *path->anchor = append->first;
2563 static void setup_include_path(void)
2565 /* built-in paths */
2566 append_include_path(&system_searchpath, "/usr/include");
2568 /* parse environment variable */
2569 append_env_paths(&bracket_searchpath, "CPATH");
2570 append_env_paths(&system_searchpath,
2571 c_mode & _CXX ? "CPLUS_INCLUDE_PATH" : "C_INCLUDE_PATH");
2573 /* append system search path to bracket searchpath */
2574 append_searchpath(&bracket_searchpath, &system_searchpath);
2575 append_searchpath("e_searchpath, &bracket_searchpath);
2578 static void input_error(unsigned const delta_lines, unsigned const delta_cols, char const *const message)
2580 source_position_t pos = pp_token.base.source_position;
2581 pos.lineno += delta_lines;
2582 pos.colno += delta_cols;
2583 errorf(&pos, "%s", message);
2586 void init_include_paths(void)
2588 obstack_init(&config_obstack);
2591 void init_preprocessor(void)
2595 obstack_init(&pp_obstack);
2596 obstack_init(&input_obstack);
2597 strset_init(&stringset);
2599 setup_include_path();
2601 set_input_error_callback(input_error);
2604 void exit_preprocessor(void)
2606 obstack_free(&input_obstack, NULL);
2607 obstack_free(&pp_obstack, NULL);
2608 obstack_free(&config_obstack, NULL);
2610 strset_destroy(&stringset);
2613 int pptest_main(int argc, char **argv);
2614 int pptest_main(int argc, char **argv)
2616 init_symbol_table();
2617 init_include_paths();
2618 init_preprocessor();
2621 error_on_unknown_chars = false;
2622 resolve_escape_sequences = false;
2624 /* simplistic commandline parser */
2625 const char *filename = NULL;
2626 const char *output = NULL;
2627 for (int i = 1; i < argc; ++i) {
2628 const char *opt = argv[i];
2629 if (streq(opt, "-I")) {
2630 append_include_path(&bracket_searchpath, argv[++i]);
2632 } else if (streq(opt, "-E")) {
2634 } else if (streq(opt, "-o")) {
2637 } else if (opt[0] == '-') {
2638 fprintf(stderr, "Unknown option '%s'\n", opt);
2640 if (filename != NULL)
2641 fprintf(stderr, "Multiple inputs not supported\n");
2645 if (filename == NULL) {
2646 fprintf(stderr, "No input specified\n");
2650 if (output == NULL) {
2653 out = fopen(output, "w");
2655 fprintf(stderr, "Couldn't open output '%s'\n", output);
2660 /* just here for gcc compatibility */
2661 fprintf(out, "# 1 \"%s\"\n", filename);
2662 fprintf(out, "# 1 \"<built-in>\"\n");
2663 fprintf(out, "# 1 \"<command-line>\"\n");
2665 FILE *file = fopen(filename, "r");
2667 fprintf(stderr, "Couldn't open input '%s'\n", filename);
2670 switch_pp_input(file, filename, NULL, false);
2673 next_preprocessing_token();
2674 if (pp_token.kind == T_EOF)
2680 check_unclosed_conditionals();
2681 fclose(close_pp_input());
2686 exit_preprocessor();
2687 exit_symbol_table();