9 #include "preprocessor.h"
13 #include "adt/error.h"
14 #include "adt/strutil.h"
15 #include "adt/strset.h"
16 #include "lang_features.h"
17 #include "diagnostic.h"
18 #include "string_rep.h"
22 #define INCLUDE_LIMIT 199 /* 199 is for gcc "compatibility" */
24 typedef struct saved_token_t {
29 typedef struct whitespace_info_t {
30 /** current token had whitespace in front of it */
32 /** current token is at the beginning of a line.
33 * => a "#" at line begin starts a preprocessing directive. */
35 /** number of spaces before the first token in a line */
36 unsigned whitespace_at_line_begin;
39 struct pp_definition_t {
41 source_position_t source_position;
42 pp_definition_t *parent_expansion;
44 whitespace_info_t expand_info;
46 bool is_expanding : 1;
47 bool has_parameters : 1;
48 bool is_parameter : 1;
49 pp_definition_t *function_definition;
51 pp_definition_t *parameters;
55 saved_token_t *token_list;
58 typedef struct pp_conditional_t pp_conditional_t;
59 struct pp_conditional_t {
60 source_position_t source_position;
63 /** conditional in skip mode (then+else gets skipped) */
65 pp_conditional_t *parent;
68 typedef struct pp_input_t pp_input_t;
73 utf32 buf[1024+MAX_PUTBACK];
76 source_position_t position;
79 searchpath_entry_t *path;
82 struct searchpath_entry_t {
84 searchpath_entry_t *next;
88 static pp_input_t input;
90 static pp_input_t *input_stack;
91 static unsigned n_inputs;
92 static struct obstack input_obstack;
94 static pp_conditional_t *conditional_stack;
97 bool allow_dollar_in_symbol = true;
98 static bool resolve_escape_sequences = true;
99 static bool error_on_unknown_chars = true;
100 static bool skip_mode;
102 static struct obstack pp_obstack;
103 static struct obstack config_obstack;
104 static const char *printed_input_name = NULL;
105 static source_position_t expansion_pos;
106 static pp_definition_t *current_expansion = NULL;
107 static pp_definition_t *current_call = NULL;
108 static pp_definition_t *current_argument = NULL;
109 static pp_definition_t *argument_expanding = NULL;
110 static unsigned argument_brace_count;
111 static strset_t stringset;
112 static token_kind_t last_token;
114 struct searchpath_t {
115 searchpath_entry_t *first;
116 searchpath_entry_t **anchor;
120 searchpath_t bracket_searchpath = { NULL, &bracket_searchpath.first, false };
121 searchpath_t quote_searchpath = { NULL, "e_searchpath.first, false };
122 searchpath_t system_searchpath = { NULL, &system_searchpath.first, true };
123 searchpath_t after_searchpath = { NULL, &after_searchpath.first, true };
125 static whitespace_info_t next_info; /* valid if had_whitespace is true */
126 static whitespace_info_t info;
128 static inline void next_char(void);
129 static void next_input_token(void);
130 static void print_line_directive(const source_position_t *pos, const char *add);
132 static symbol_t *symbol_colongreater;
133 static symbol_t *symbol_lesscolon;
134 static symbol_t *symbol_lesspercent;
135 static symbol_t *symbol_percentcolon;
136 static symbol_t *symbol_percentcolonpercentcolon;
137 static symbol_t *symbol_percentgreater;
139 static void init_symbols(void)
141 symbol_colongreater = symbol_table_insert(":>");
142 symbol_lesscolon = symbol_table_insert("<:");
143 symbol_lesspercent = symbol_table_insert("<%");
144 symbol_percentcolon = symbol_table_insert("%:");
145 symbol_percentcolonpercentcolon = symbol_table_insert("%:%:");
146 symbol_percentgreater = symbol_table_insert("%>");
149 void switch_pp_input(FILE *const file, char const *const filename, searchpath_entry_t *const path, bool const is_system_header)
152 input.input = input_from_stream(file, NULL);
155 input.output_line = 0;
156 input.position.input_name = filename;
157 input.position.lineno = 1;
158 input.position.is_system_header = is_system_header;
161 /* indicate that we're at a new input */
162 print_line_directive(&input.position, input_stack != NULL ? "1" : NULL);
164 /* place a virtual '\n' so we realize we're at line begin */
165 input.position.lineno = 0;
169 FILE *close_pp_input(void)
171 input_free(input.input);
173 FILE* const file = input.file;
185 static void push_input(void)
187 pp_input_t *const saved_input = obstack_copy(&input_obstack, &input, sizeof(input));
189 /* adjust buffer positions */
190 if (input.bufpos != NULL)
191 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
192 if (input.bufend != NULL)
193 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
195 saved_input->parent = input_stack;
196 input_stack = saved_input;
200 static void pop_restore_input(void)
202 assert(n_inputs > 0);
203 assert(input_stack != NULL);
205 pp_input_t *saved_input = input_stack;
207 memcpy(&input, saved_input, sizeof(input));
210 /* adjust buffer positions */
211 if (saved_input->bufpos != NULL)
212 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
213 if (saved_input->bufend != NULL)
214 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
216 input_stack = saved_input->parent;
217 obstack_free(&input_obstack, saved_input);
222 * Prints a parse error message at the current token.
224 * @param msg the error message
226 static void parse_error(const char *msg)
228 errorf(&pp_token.base.source_position, "%s", msg);
231 static inline void next_real_char(void)
233 assert(input.bufpos <= input.bufend);
234 if (input.bufpos >= input.bufend) {
235 size_t const n = decode(input.input, input.buf + MAX_PUTBACK, lengthof(input.buf) - MAX_PUTBACK);
240 input.bufpos = input.buf + MAX_PUTBACK;
241 input.bufend = input.bufpos + n;
243 input.c = *input.bufpos++;
244 ++input.position.colno;
248 * Put a character back into the buffer.
250 * @param pc the character to put back
252 static inline void put_back(utf32 const pc)
254 assert(input.bufpos > input.buf);
255 *(--input.bufpos - input.buf + input.buf) = (char) pc;
256 --input.position.colno;
262 if (input.c == '\n') { \
266 ++input.position.lineno; \
267 input.position.colno = 1; \
269 newline // Let it look like an ordinary case label.
271 #define eat(c_type) (assert(input.c == c_type), next_char())
273 static void maybe_concat_lines(void)
279 info.whitespace_at_line_begin = 0;
291 * Set c to the next input character, ie.
292 * after expanding trigraphs.
294 static inline void next_char(void)
298 /* filter trigraphs and concatenated lines */
299 if (UNLIKELY(input.c == '\\')) {
300 maybe_concat_lines();
301 goto end_of_next_char;
304 if (LIKELY(input.c != '?'))
305 goto end_of_next_char;
308 if (LIKELY(input.c != '?')) {
311 goto end_of_next_char;
316 case '=': input.c = '#'; break;
317 case '(': input.c = '['; break;
318 case '/': input.c = '\\'; maybe_concat_lines(); break;
319 case ')': input.c = ']'; break;
320 case '\'': input.c = '^'; break;
321 case '<': input.c = '{'; break;
322 case '!': input.c = '|'; break;
323 case '>': input.c = '}'; break;
324 case '-': input.c = '~'; break;
334 printf("nchar '%c'\n", input.c);
341 * Returns true if the given char is a octal digit.
343 * @param char the character to check
345 static inline bool is_octal_digit(int chr)
363 * Returns the value of a digit.
364 * The only portable way to do it ...
366 static int digit_value(int digit)
392 panic("wrong character given");
397 * Parses an octal character sequence.
399 * @param first_digit the already read first digit
401 static utf32 parse_octal_sequence(const utf32 first_digit)
403 assert(is_octal_digit(first_digit));
404 utf32 value = digit_value(first_digit);
405 if (!is_octal_digit(input.c)) return value;
406 value = 8 * value + digit_value(input.c);
408 if (!is_octal_digit(input.c)) return value;
409 value = 8 * value + digit_value(input.c);
416 * Parses a hex character sequence.
418 static utf32 parse_hex_sequence(void)
421 while (isxdigit(input.c)) {
422 value = 16 * value + digit_value(input.c);
428 static bool is_universal_char_valid(utf32 const v)
431 if (v < 0xA0U && v != 0x24 && v != 0x40 && v != 0x60)
433 if (0xD800 <= v && v <= 0xDFFF)
438 static utf32 parse_universal_char(unsigned const n_digits)
441 for (unsigned k = n_digits; k != 0; --k) {
442 if (isxdigit(input.c)) {
443 v = 16 * v + digit_value(input.c);
444 if (!resolve_escape_sequences)
445 obstack_1grow(&symbol_obstack, input.c);
448 errorf(&input.position,
449 "short universal character name, expected %u more digits",
454 if (!is_universal_char_valid(v)) {
455 errorf(&input.position,
456 "\\%c%0*X is not a valid universal character name",
457 n_digits == 4 ? 'u' : 'U', (int)n_digits, v);
462 static bool is_universal_char_valid_identifier(utf32 const v)
465 if ( v == 0x000A8) return true;
466 if ( v == 0x000AA) return true;
467 if ( v == 0x000AD) return true;
468 if ( v == 0x000AF) return true;
469 if (0x000B2 <= v && v <= 0x000B5) return true;
470 if (0x000B7 <= v && v <= 0x000BA) return true;
471 if (0x000BC <= v && v <= 0x000BE) return true;
472 if (0x000C0 <= v && v <= 0x000D6) return true;
473 if (0x000D8 <= v && v <= 0x000F6) return true;
474 if (0x000F8 <= v && v <= 0x000FF) return true;
475 if (0x00100 <= v && v <= 0x0167F) return true;
476 if (0x01681 <= v && v <= 0x0180D) return true;
477 if (0x0180F <= v && v <= 0x01FFF) return true;
478 if (0x0200B <= v && v <= 0x0200D) return true;
479 if (0x0202A <= v && v <= 0x0202E) return true;
480 if (0x0203F <= v && v <= 0x02040) return true;
481 if ( v == 0x02054) return true;
482 if (0x02060 <= v && v <= 0x0206F) return true;
483 if (0x02070 <= v && v <= 0x0218F) return true;
484 if (0x02460 <= v && v <= 0x024FF) return true;
485 if (0x02776 <= v && v <= 0x02793) return true;
486 if (0x02C00 <= v && v <= 0x02DFF) return true;
487 if (0x02E80 <= v && v <= 0x02FFF) return true;
488 if (0x03004 <= v && v <= 0x03007) return true;
489 if (0x03021 <= v && v <= 0x0302F) return true;
490 if (0x03031 <= v && v <= 0x0303F) return true;
491 if (0x03040 <= v && v <= 0x0D7FF) return true;
492 if (0x0F900 <= v && v <= 0x0FD3D) return true;
493 if (0x0FD40 <= v && v <= 0x0FDCF) return true;
494 if (0x0FDF0 <= v && v <= 0x0FE44) return true;
495 if (0x0FE47 <= v && v <= 0x0FFFD) return true;
496 if (0x10000 <= v && v <= 0x1FFFD) return true;
497 if (0x20000 <= v && v <= 0x2FFFD) return true;
498 if (0x30000 <= v && v <= 0x3FFFD) return true;
499 if (0x40000 <= v && v <= 0x4FFFD) return true;
500 if (0x50000 <= v && v <= 0x5FFFD) return true;
501 if (0x60000 <= v && v <= 0x6FFFD) return true;
502 if (0x70000 <= v && v <= 0x7FFFD) return true;
503 if (0x80000 <= v && v <= 0x8FFFD) return true;
504 if (0x90000 <= v && v <= 0x9FFFD) return true;
505 if (0xA0000 <= v && v <= 0xAFFFD) return true;
506 if (0xB0000 <= v && v <= 0xBFFFD) return true;
507 if (0xC0000 <= v && v <= 0xCFFFD) return true;
508 if (0xD0000 <= v && v <= 0xDFFFD) return true;
509 if (0xE0000 <= v && v <= 0xEFFFD) return true;
513 static bool is_universal_char_valid_identifier_start(utf32 const v)
516 if (0x0300 <= v && v <= 0x036F) return false;
517 if (0x1DC0 <= v && v <= 0x1DFF) return false;
518 if (0x20D0 <= v && v <= 0x20FF) return false;
519 if (0xFE20 <= v && v <= 0xFE2F) return false;
524 * Parse an escape sequence.
526 static utf32 parse_escape_sequence(void)
530 utf32 const ec = input.c;
534 case '"': return '"';
535 case '\'': return '\'';
536 case '\\': return '\\';
537 case '?': return '\?';
538 case 'a': return '\a';
539 case 'b': return '\b';
540 case 'f': return '\f';
541 case 'n': return '\n';
542 case 'r': return '\r';
543 case 't': return '\t';
544 case 'v': return '\v';
546 return parse_hex_sequence();
555 return parse_octal_sequence(ec);
557 parse_error("reached end of file while parsing escape sequence");
559 /* \E is not documented, but handled, by GCC. It is acceptable according
560 * to §6.11.4, whereas \e is not. */
564 return 27; /* hopefully 27 is ALWAYS the code for ESCAPE */
567 case 'U': return parse_universal_char(8);
568 case 'u': return parse_universal_char(4);
573 /* §6.4.4.4:8 footnote 64 */
574 parse_error("unknown escape sequence");
578 static const char *identify_string(char *string)
580 const char *result = strset_insert(&stringset, string);
581 if (result != string) {
582 obstack_free(&symbol_obstack, string);
587 static string_t sym_make_string(string_encoding_t const enc)
589 obstack_1grow(&symbol_obstack, '\0');
590 size_t const len = obstack_object_size(&symbol_obstack) - 1;
591 char *const string = obstack_finish(&symbol_obstack);
592 char const *const result = identify_string(string);
593 return (string_t){ result, len, enc };
596 string_t make_string(char const *const string)
598 obstack_grow(&symbol_obstack, string, strlen(string));
599 return sym_make_string(STRING_ENCODING_CHAR);
602 static void parse_string(utf32 const delimiter, token_kind_t const kind,
603 string_encoding_t const enc,
604 char const *const context)
606 const unsigned start_linenr = input.position.lineno;
613 if (resolve_escape_sequences) {
614 utf32 const tc = parse_escape_sequence();
615 if (enc == STRING_ENCODING_CHAR) {
617 warningf(WARN_OTHER, &pp_token.base.source_position, "escape sequence out of range");
619 obstack_1grow(&symbol_obstack, tc);
621 obstack_grow_utf8(&symbol_obstack, tc);
624 obstack_1grow(&symbol_obstack, (char)input.c);
626 obstack_1grow(&symbol_obstack, (char)input.c);
633 errorf(&pp_token.base.source_position, "newline while parsing %s", context);
637 source_position_t source_position;
638 source_position.input_name = pp_token.base.source_position.input_name;
639 source_position.lineno = start_linenr;
640 errorf(&source_position, "EOF while parsing %s", context);
645 if (input.c == delimiter) {
649 obstack_grow_utf8(&symbol_obstack, input.c);
657 pp_token.kind = kind;
658 pp_token.literal.string = sym_make_string(enc);
661 static void parse_string_literal(string_encoding_t const enc)
663 parse_string('"', T_STRING_LITERAL, enc, "string literal");
666 static void parse_character_constant(string_encoding_t const enc)
668 parse_string('\'', T_CHARACTER_CONSTANT, enc, "character constant");
669 if (pp_token.literal.string.size == 0) {
670 parse_error("empty character constant");
674 #define SYMBOL_CASES_WITHOUT_E_P \
675 '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
726 #define SYMBOL_CASES \
727 SYMBOL_CASES_WITHOUT_E_P: \
733 #define DIGIT_CASES \
745 static void start_expanding(pp_definition_t *definition)
747 definition->parent_expansion = current_expansion;
748 definition->expand_pos = 0;
749 definition->is_expanding = true;
750 if (definition->list_len > 0) {
751 definition->token_list[0].had_whitespace
752 = info.had_whitespace;
754 current_expansion = definition;
757 static void finished_expanding(pp_definition_t *definition)
759 assert(definition->is_expanding);
760 pp_definition_t *parent = definition->parent_expansion;
761 definition->parent_expansion = NULL;
762 definition->is_expanding = false;
764 /* stop further expanding once we expanded a parameter used in a
766 if (definition == argument_expanding)
767 argument_expanding = NULL;
769 assert(current_expansion == definition);
770 current_expansion = parent;
773 static void grow_string_escaped(struct obstack *obst, const string_t *string, char const *delimiter)
775 char const *prefix = get_string_encoding_prefix(string->encoding);
776 obstack_printf(obst, "%s%s", prefix, delimiter);
777 size_t size = string->size;
778 const char *str = string->begin;
779 if (resolve_escape_sequences) {
780 obstack_grow(obst, str, size);
782 for (size_t i = 0; i < size; ++i) {
783 const char c = str[i];
784 if (c == '\\' || c == '"')
785 obstack_1grow(obst, '\\');
786 obstack_1grow(obst, c);
789 obstack_printf(obst, "%s", delimiter);
792 static void grow_token(struct obstack *obst, const token_t *token)
794 switch (token->kind) {
796 obstack_grow(obst, token->literal.string.begin, token->literal.string.size);
799 case T_STRING_LITERAL: {
800 char const *const delimiter = resolve_escape_sequences ? "\"" : "\\\"";
801 grow_string_escaped(obst, &token->literal.string, delimiter);
805 case T_CHARACTER_CONSTANT:
806 grow_string_escaped(obst, &token->literal.string, "'");
811 const char *str = token->base.symbol->string;
812 size_t len = strlen(str);
813 obstack_grow(obst, str, len);
819 static void stringify(const pp_definition_t *definition)
821 assert(obstack_object_size(&symbol_obstack) == 0);
823 size_t list_len = definition->list_len;
824 for (size_t p = 0; p < list_len; ++p) {
825 const saved_token_t *saved = &definition->token_list[p];
826 if (p > 0 && saved->had_whitespace)
827 obstack_1grow(&symbol_obstack, ' ');
828 grow_token(&symbol_obstack, &saved->token);
830 pp_token.kind = T_STRING_LITERAL;
831 pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
834 static inline void set_punctuator(token_kind_t const kind)
836 pp_token.kind = kind;
837 pp_token.base.symbol = token_symbols[kind];
840 static inline void set_digraph(token_kind_t const kind, symbol_t *const symbol)
842 pp_token.kind = kind;
843 pp_token.base.symbol = symbol;
847 * returns next final token from a preprocessor macro expansion
849 static bool expand_next(void)
851 if (current_expansion == NULL)
855 size_t pos = current_expansion->expand_pos;
856 if (pos >= current_expansion->list_len) {
857 finished_expanding(current_expansion);
858 /* it was the outermost expansion, parse pptoken normally */
859 if (current_expansion == NULL) {
864 const saved_token_t *saved = ¤t_expansion->token_list[pos++];
865 pp_token = saved->token;
866 if (pp_token.kind == '#') {
867 if (pos < current_expansion->list_len) {
868 const saved_token_t *next = ¤t_expansion->token_list[pos];
869 if (next->token.kind == T_MACRO_PARAMETER) {
870 pp_definition_t *def = next->token.macro_parameter.def;
871 assert(def != NULL && def->is_parameter);
878 if (current_expansion->expand_pos > 0)
879 info.had_whitespace = saved->had_whitespace;
880 current_expansion->expand_pos = pos;
881 pp_token.base.source_position = expansion_pos;
887 * Returns the next token kind found when continuing the current expansions
888 * without starting new sub-expansions.
890 static token_kind_t peek_expansion(void)
892 for (pp_definition_t *e = current_expansion; e; e = e->parent_expansion) {
893 if (e->expand_pos < e->list_len)
894 return e->token_list[e->expand_pos].token.kind;
899 static void skip_line_comment(void)
901 info.had_whitespace = true;
918 static void skip_multiline_comment(void)
920 info.had_whitespace = true;
922 unsigned start_linenr = input.position.lineno;
927 if (input.c == '*') {
928 /* TODO: nested comment, warn here */
933 if (input.c == '/') {
934 if (input.position.lineno != input.output_line)
935 info.whitespace_at_line_begin = input.position.colno;
945 source_position_t source_position;
946 source_position.input_name = pp_token.base.source_position.input_name;
947 source_position.lineno = start_linenr;
948 errorf(&source_position, "at end of file while looking for comment end");
959 static bool skip_till_newline(bool stop_at_non_whitespace)
971 if (input.c == '/') {
975 } else if (input.c == '*') {
977 skip_multiline_comment();
989 if (stop_at_non_whitespace)
998 static void skip_whitespace(void)
1004 ++info.whitespace_at_line_begin;
1005 info.had_whitespace = true;
1010 info.at_line_begin = true;
1011 info.had_whitespace = true;
1012 info.whitespace_at_line_begin = 0;
1017 if (input.c == '/') {
1019 skip_line_comment();
1021 } else if (input.c == '*') {
1023 skip_multiline_comment();
1037 static inline void eat_pp(pp_token_kind_t const kind)
1039 assert(pp_token.base.symbol->pp_ID == kind);
1044 static inline void eat_token(token_kind_t const kind)
1046 assert(pp_token.kind == kind);
1051 static void parse_symbol(void)
1053 assert(obstack_object_size(&symbol_obstack) == 0);
1058 obstack_1grow(&symbol_obstack, (char) input.c);
1067 case 'U': n = 8; goto universal;
1068 case 'u': n = 4; goto universal;
1070 if (!resolve_escape_sequences) {
1071 obstack_1grow(&symbol_obstack, '\\');
1072 obstack_1grow(&symbol_obstack, input.c);
1075 utf32 const v = parse_universal_char(n);
1076 if (!is_universal_char_valid_identifier(v)) {
1077 if (is_universal_char_valid(v)) {
1078 errorf(&input.position,
1079 "universal character \\%c%0*X is not valid in an identifier",
1080 n == 4 ? 'u' : 'U', (int)n, v);
1082 } else if (obstack_object_size(&symbol_obstack) == 0 && !is_universal_char_valid_identifier_start(v)) {
1083 errorf(&input.position,
1084 "universal character \\%c%0*X is not valid as start of an identifier",
1085 n == 4 ? 'u' : 'U', (int)n, v);
1086 } else if (resolve_escape_sequences) {
1087 obstack_grow_utf8(&symbol_obstack, v);
1105 obstack_1grow(&symbol_obstack, '\0');
1106 char *string = obstack_finish(&symbol_obstack);
1108 /* might be a wide string or character constant ( L"string"/L'c' ) */
1109 if (input.c == '"' && string[0] == 'L' && string[1] == '\0') {
1110 obstack_free(&symbol_obstack, string);
1111 parse_string_literal(STRING_ENCODING_WIDE);
1113 } else if (input.c == '\'' && string[0] == 'L' && string[1] == '\0') {
1114 obstack_free(&symbol_obstack, string);
1115 parse_character_constant(STRING_ENCODING_WIDE);
1119 symbol_t *symbol = symbol_table_insert(string);
1121 pp_token.kind = symbol->ID;
1122 pp_token.base.symbol = symbol;
1124 /* we can free the memory from symbol obstack if we already had an entry in
1125 * the symbol table */
1126 if (symbol->string != string) {
1127 obstack_free(&symbol_obstack, string);
1131 static void parse_number(void)
1133 obstack_1grow(&symbol_obstack, (char) input.c);
1140 case SYMBOL_CASES_WITHOUT_E_P:
1141 obstack_1grow(&symbol_obstack, (char) input.c);
1149 obstack_1grow(&symbol_obstack, (char) input.c);
1151 if (input.c == '+' || input.c == '-') {
1152 obstack_1grow(&symbol_obstack, (char) input.c);
1164 pp_token.kind = T_NUMBER;
1165 pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
1168 #define MAYBE_PROLOG \
1172 #define MAYBE(ch, kind) \
1175 set_punctuator(kind); \
1178 #define MAYBE_DIGRAPH(ch, kind, symbol) \
1181 set_digraph(kind, symbol); \
1184 #define ELSE_CODE(code) \
1189 #define ELSE(kind) ELSE_CODE(set_punctuator(kind); return;)
1191 /** identifies and returns the next preprocessing token contained in the
1192 * input stream. No macro expansion is performed. */
1193 static void next_input_token(void)
1195 if (next_info.had_whitespace) {
1197 next_info.had_whitespace = false;
1199 info.at_line_begin = false;
1200 info.had_whitespace = false;
1203 pp_token.base.source_position = input.position;
1204 pp_token.base.symbol = NULL;
1209 info.whitespace_at_line_begin++;
1210 info.had_whitespace = true;
1215 info.at_line_begin = true;
1216 info.had_whitespace = true;
1217 info.whitespace_at_line_begin = 0;
1229 parse_string_literal(STRING_ENCODING_CHAR);
1233 parse_character_constant(STRING_ENCODING_CHAR);
1255 MAYBE('.', T_DOTDOTDOT)
1259 set_punctuator('.');
1265 MAYBE('&', T_ANDAND)
1266 MAYBE('=', T_ANDEQUAL)
1270 MAYBE('=', T_ASTERISKEQUAL)
1274 MAYBE('+', T_PLUSPLUS)
1275 MAYBE('=', T_PLUSEQUAL)
1279 MAYBE('>', T_MINUSGREATER)
1280 MAYBE('-', T_MINUSMINUS)
1281 MAYBE('=', T_MINUSEQUAL)
1285 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
1289 MAYBE('=', T_SLASHEQUAL)
1292 skip_multiline_comment();
1296 skip_line_comment();
1301 MAYBE_DIGRAPH('>', '}', symbol_percentgreater)
1302 MAYBE('=', T_PERCENTEQUAL)
1307 MAYBE_DIGRAPH(':', T_HASHHASH, symbol_percentcolonpercentcolon)
1311 goto digraph_percentcolon;
1314 digraph_percentcolon:
1315 set_digraph('#', symbol_percentcolon);
1321 MAYBE_DIGRAPH(':', '[', symbol_lesscolon)
1322 MAYBE_DIGRAPH('%', '{', symbol_lesspercent)
1323 MAYBE('=', T_LESSEQUAL)
1326 MAYBE('=', T_LESSLESSEQUAL)
1331 MAYBE('=', T_GREATEREQUAL)
1334 MAYBE('=', T_GREATERGREATEREQUAL)
1335 ELSE(T_GREATERGREATER)
1339 MAYBE('=', T_CARETEQUAL)
1343 MAYBE('=', T_PIPEEQUAL)
1344 MAYBE('|', T_PIPEPIPE)
1348 MAYBE_DIGRAPH('>', ']', symbol_colongreater)
1350 if (c_mode & _CXX) {
1352 set_punctuator(T_COLONCOLON);
1359 MAYBE('=', T_EQUALEQUAL)
1363 MAYBE('#', T_HASHHASH)
1376 set_punctuator(input.c);
1381 if (input_stack != NULL) {
1382 fclose(close_pp_input());
1383 pop_restore_input();
1386 if (input.c == (utf32)EOF)
1387 --input.position.lineno;
1388 print_line_directive(&input.position, "2");
1391 info.at_line_begin = true;
1392 set_punctuator(T_EOF);
1398 int next_c = input.c;
1401 if (next_c == 'U' || next_c == 'u') {
1408 if (error_on_unknown_chars) {
1409 errorf(&pp_token.base.source_position,
1410 "unknown character '%lc' found\n", input.c);
1414 assert(obstack_object_size(&symbol_obstack) == 0);
1415 obstack_grow_utf8(&symbol_obstack, input.c);
1416 obstack_1grow(&symbol_obstack, '\0');
1417 char *const string = obstack_finish(&symbol_obstack);
1418 symbol_t *const symbol = symbol_table_insert(string);
1419 if (symbol->string != string)
1420 obstack_free(&symbol_obstack, string);
1422 pp_token.kind = T_UNKNOWN_CHAR;
1423 pp_token.base.symbol = symbol;
1430 static void print_quoted_string(const char *const string)
1433 for (const char *c = string; *c != 0; ++c) {
1435 case '"': fputs("\\\"", out); break;
1436 case '\\': fputs("\\\\", out); break;
1437 case '\a': fputs("\\a", out); break;
1438 case '\b': fputs("\\b", out); break;
1439 case '\f': fputs("\\f", out); break;
1440 case '\n': fputs("\\n", out); break;
1441 case '\r': fputs("\\r", out); break;
1442 case '\t': fputs("\\t", out); break;
1443 case '\v': fputs("\\v", out); break;
1444 case '\?': fputs("\\?", out); break;
1447 fprintf(out, "\\%03o", (unsigned)*c);
1457 static void print_line_directive(const source_position_t *pos, const char *add)
1462 fprintf(out, "# %u ", pos->lineno);
1463 print_quoted_string(pos->input_name);
1468 if (pos->is_system_header) {
1472 printed_input_name = pos->input_name;
1473 input.output_line = pos->lineno-1;
1476 static bool emit_newlines(void)
1481 unsigned delta = pp_token.base.source_position.lineno - input.output_line;
1487 print_line_directive(&pp_token.base.source_position, NULL);
1490 for (unsigned i = 0; i < delta; ++i) {
1494 input.output_line = pp_token.base.source_position.lineno;
1496 unsigned whitespace = info.whitespace_at_line_begin;
1497 /* make sure there is at least 1 whitespace before a (macro-expanded)
1498 * '#' at line begin. I'm not sure why this is good, but gcc does it. */
1499 if (pp_token.kind == '#' && whitespace == 0)
1501 for (unsigned i = 0; i < whitespace; ++i)
1507 void set_preprocessor_output(FILE *output)
1511 error_on_unknown_chars = false;
1512 resolve_escape_sequences = false;
1514 error_on_unknown_chars = true;
1515 resolve_escape_sequences = true;
1519 void emit_pp_token(void)
1521 if (!emit_newlines() &&
1522 (info.had_whitespace || tokens_would_paste(last_token, pp_token.kind)))
1525 switch (pp_token.kind) {
1527 fputs(pp_token.literal.string.begin, out);
1530 case T_STRING_LITERAL:
1531 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1533 fputs(pp_token.literal.string.begin, out);
1537 case T_CHARACTER_CONSTANT:
1538 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1540 fputs(pp_token.literal.string.begin, out);
1544 case T_MACRO_PARAMETER:
1545 panic("macro parameter not expanded");
1548 fputs(pp_token.base.symbol->string, out);
1551 last_token = pp_token.kind;
1554 static void eat_pp_directive(void)
1556 while (!info.at_line_begin) {
1561 static bool strings_equal(const string_t *string1, const string_t *string2)
1563 size_t size = string1->size;
1564 if (size != string2->size)
1567 const char *c1 = string1->begin;
1568 const char *c2 = string2->begin;
1569 for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1576 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1578 if (token1->kind != token2->kind)
1581 switch (token1->kind) {
1583 case T_CHARACTER_CONSTANT:
1584 case T_STRING_LITERAL:
1585 return strings_equal(&token1->literal.string, &token2->literal.string);
1587 case T_MACRO_PARAMETER:
1588 return token1->macro_parameter.def->symbol
1589 == token2->macro_parameter.def->symbol;
1592 return token1->base.symbol == token2->base.symbol;
1596 static bool pp_definitions_equal(const pp_definition_t *definition1,
1597 const pp_definition_t *definition2)
1599 if (definition1->list_len != definition2->list_len)
1602 size_t len = definition1->list_len;
1603 const saved_token_t *t1 = definition1->token_list;
1604 const saved_token_t *t2 = definition2->token_list;
1605 for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1606 if (!pp_tokens_equal(&t1->token, &t2->token))
1608 if (t1->had_whitespace != t2->had_whitespace)
1614 static void missing_macro_param_error(void)
1616 errorf(&pp_token.base.source_position,
1617 "'#' is not followed by a macro parameter");
1620 static bool is_defineable_token(char const *const context)
1622 if (info.at_line_begin) {
1623 errorf(&pp_token.base.source_position, "unexpected end of line after %s", context);
1626 symbol_t *const symbol = pp_token.base.symbol;
1630 if (pp_token.kind != T_IDENTIFIER) {
1631 switch (symbol->string[0]) {
1638 errorf(&pp_token.base.source_position, "expected identifier after %s, got %K", context, &pp_token);
1643 /* TODO turn this into a flag in pp_def. */
1644 switch (symbol->pp_ID) {
1647 errorf(&pp_token.base.source_position, "%K cannot be used as macro name in %s", &pp_token, context);
1655 static void parse_define_directive(void)
1663 assert(obstack_object_size(&pp_obstack) == 0);
1665 if (!is_defineable_token("#define"))
1667 symbol_t *const symbol = pp_token.base.symbol;
1669 pp_definition_t *new_definition
1670 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1671 memset(new_definition, 0, sizeof(new_definition[0]));
1672 new_definition->symbol = symbol;
1673 new_definition->source_position = input.position;
1675 /* this is probably the only place where spaces are significant in the
1676 * lexer (except for the fact that they separate tokens). #define b(x)
1677 * is something else than #define b (x) */
1678 if (input.c == '(') {
1683 switch (pp_token.kind) {
1685 new_definition->is_variadic = true;
1686 eat_token(T_DOTDOTDOT);
1687 if (pp_token.kind != ')') {
1688 errorf(&input.position,
1689 "'...' not at end of macro argument list");
1694 case T_IDENTIFIER: {
1695 pp_definition_t parameter;
1696 memset(¶meter, 0, sizeof(parameter));
1697 parameter.source_position = pp_token.base.source_position;
1698 parameter.symbol = pp_token.base.symbol;
1699 parameter.is_parameter = true;
1700 obstack_grow(&pp_obstack, ¶meter, sizeof(parameter));
1701 eat_token(T_IDENTIFIER);
1703 if (pp_token.kind == ',') {
1708 if (pp_token.kind != ')') {
1709 errorf(&pp_token.base.source_position,
1710 "expected ',' or ')' after identifier, got %K",
1719 goto finish_argument_list;
1722 errorf(&pp_token.base.source_position,
1723 "expected identifier, '...' or ')' in #define argument list, got %K",
1729 finish_argument_list:
1730 new_definition->has_parameters = true;
1731 size_t size = obstack_object_size(&pp_obstack);
1732 new_definition->n_parameters
1733 = size / sizeof(new_definition->parameters[0]);
1734 new_definition->parameters = obstack_finish(&pp_obstack);
1735 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1736 pp_definition_t *param = &new_definition->parameters[i];
1737 symbol_t *symbol = param->symbol;
1738 pp_definition_t *previous = symbol->pp_definition;
1739 if (previous != NULL
1740 && previous->function_definition == new_definition) {
1741 errorf(¶m->source_position,
1742 "duplicate macro parameter '%Y'", symbol);
1743 param->symbol = sym_anonymous;
1746 param->parent_expansion = previous;
1747 param->function_definition = new_definition;
1748 symbol->pp_definition = param;
1754 /* construct token list */
1755 assert(obstack_object_size(&pp_obstack) == 0);
1756 bool next_must_be_param = false;
1757 while (!info.at_line_begin) {
1758 if (pp_token.kind == T_IDENTIFIER) {
1759 const symbol_t *symbol = pp_token.base.symbol;
1760 pp_definition_t *definition = symbol->pp_definition;
1761 if (definition != NULL
1762 && definition->function_definition == new_definition) {
1763 pp_token.kind = T_MACRO_PARAMETER;
1764 pp_token.macro_parameter.def = definition;
1767 if (next_must_be_param && pp_token.kind != T_MACRO_PARAMETER) {
1768 missing_macro_param_error();
1770 saved_token_t saved_token;
1771 saved_token.token = pp_token;
1772 saved_token.had_whitespace = info.had_whitespace;
1773 obstack_grow(&pp_obstack, &saved_token, sizeof(saved_token));
1775 = new_definition->has_parameters && pp_token.kind == '#';
1778 if (next_must_be_param)
1779 missing_macro_param_error();
1781 new_definition->list_len = obstack_object_size(&pp_obstack)
1782 / sizeof(new_definition->token_list[0]);
1783 new_definition->token_list = obstack_finish(&pp_obstack);
1785 if (new_definition->has_parameters) {
1786 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1787 pp_definition_t *param = &new_definition->parameters[i];
1788 symbol_t *symbol = param->symbol;
1789 if (symbol == sym_anonymous)
1791 assert(symbol->pp_definition == param);
1792 assert(param->function_definition == new_definition);
1793 symbol->pp_definition = param->parent_expansion;
1794 param->parent_expansion = NULL;
1798 pp_definition_t *old_definition = symbol->pp_definition;
1799 if (old_definition != NULL) {
1800 if (!pp_definitions_equal(old_definition, new_definition)) {
1801 warningf(WARN_OTHER, &input.position, "multiple definition of macro '%Y' (first defined %P)", symbol, &old_definition->source_position);
1803 /* reuse the old definition */
1804 obstack_free(&pp_obstack, new_definition);
1805 new_definition = old_definition;
1809 symbol->pp_definition = new_definition;
1813 if (obstack_object_size(&pp_obstack) > 0) {
1814 char *ptr = obstack_finish(&pp_obstack);
1815 obstack_free(&pp_obstack, ptr);
1820 static void parse_undef_directive(void)
1828 if (!is_defineable_token("#undef")) {
1833 pp_token.base.symbol->pp_definition = NULL;
1836 if (!info.at_line_begin) {
1837 warningf(WARN_OTHER, &input.position, "extra tokens at end of #undef directive");
1842 /** behind an #include we can have the special headername lexems.
1843 * They're only allowed behind an #include so they're not recognized
1844 * by the normal next_preprocessing_token. We handle them as a special
1846 static const char *parse_headername(bool *system_include)
1848 if (info.at_line_begin) {
1849 parse_error("expected headername after #include");
1853 /* check wether we have a "... or <... headername */
1854 source_position_t position = input.position;
1858 case '<': delimiter = '>'; *system_include = true; goto parse_name;
1859 case '"': delimiter = '"'; *system_include = false; goto parse_name;
1861 assert(obstack_object_size(&symbol_obstack) == 0);
1868 char *dummy = obstack_finish(&symbol_obstack);
1869 obstack_free(&symbol_obstack, dummy);
1871 errorf(&pp_token.base.source_position,
1872 "header name without closing '%c'", (char)delimiter);
1876 if (input.c == delimiter) {
1878 goto finish_headername;
1880 obstack_1grow(&symbol_obstack, (char)input.c);
1886 /* we should never be here */
1890 next_preprocessing_token();
1891 if (info.at_line_begin) {
1892 /* TODO: if we are already in the new line then we parsed more than
1893 * wanted. We reuse the token, but could produce following errors
1894 * misbehaviours... */
1895 goto error_invalid_input;
1897 if (pp_token.kind == T_STRING_LITERAL) {
1898 *system_include = false;
1899 return pp_token.literal.string.begin;
1900 } else if (pp_token.kind == '<') {
1901 *system_include = true;
1902 assert(obstack_object_size(&pp_obstack) == 0);
1904 next_preprocessing_token();
1905 if (info.at_line_begin) {
1906 /* TODO: we shouldn't have parsed/expanded something on the
1907 * next line yet... */
1908 char *dummy = obstack_finish(&pp_obstack);
1909 obstack_free(&pp_obstack, dummy);
1910 goto error_invalid_input;
1912 if (pp_token.kind == '>')
1915 saved_token_t saved;
1916 saved.token = pp_token;
1917 saved.had_whitespace = info.had_whitespace;
1918 obstack_grow(&pp_obstack, &saved, sizeof(saved));
1920 size_t size = obstack_object_size(&pp_obstack);
1921 assert(size % sizeof(saved_token_t) == 0);
1922 size_t n_tokens = size / sizeof(saved_token_t);
1923 saved_token_t *tokens = obstack_finish(&pp_obstack);
1924 assert(obstack_object_size(&symbol_obstack) == 0);
1925 for (size_t i = 0; i < n_tokens; ++i) {
1926 const saved_token_t *saved = &tokens[i];
1927 if (i > 0 && saved->had_whitespace)
1928 obstack_1grow(&symbol_obstack, ' ');
1929 grow_token(&symbol_obstack, &saved->token);
1931 obstack_free(&pp_obstack, tokens);
1932 goto finish_headername;
1934 error_invalid_input:
1936 char *dummy = obstack_finish(&symbol_obstack);
1937 obstack_free(&symbol_obstack, dummy);
1940 errorf(&pp_token.base.source_position,
1941 "expected \"FILENAME\" or <FILENAME> after #include");
1947 obstack_1grow(&symbol_obstack, '\0');
1948 char *const headername = obstack_finish(&symbol_obstack);
1949 const char *identified = identify_string(headername);
1950 pp_token.base.source_position = position;
1954 static bool do_include(bool const bracket_include, bool const include_next, char const *const headername)
1956 size_t const headername_len = strlen(headername);
1957 searchpath_entry_t *entry;
1959 entry = input.path ? input.path->next
1960 : bracket_include ? bracket_searchpath.first
1961 : quote_searchpath.first;
1963 if (!bracket_include) {
1964 /* put dirname of current input on obstack */
1965 const char *filename = input.position.input_name;
1966 const char *last_slash = strrchr(filename, '/');
1967 const char *full_name;
1968 if (last_slash != NULL) {
1969 size_t len = last_slash - filename;
1970 obstack_grow(&symbol_obstack, filename, len + 1);
1971 obstack_grow0(&symbol_obstack, headername, headername_len);
1972 char *complete_path = obstack_finish(&symbol_obstack);
1973 full_name = identify_string(complete_path);
1975 full_name = headername;
1978 FILE *file = fopen(full_name, "r");
1980 switch_pp_input(file, full_name, NULL, false);
1983 entry = quote_searchpath.first;
1985 entry = bracket_searchpath.first;
1989 assert(obstack_object_size(&symbol_obstack) == 0);
1990 /* check searchpath */
1991 for (; entry; entry = entry->next) {
1992 const char *path = entry->path;
1993 size_t len = strlen(path);
1994 obstack_grow(&symbol_obstack, path, len);
1995 if (path[len-1] != '/')
1996 obstack_1grow(&symbol_obstack, '/');
1997 obstack_grow(&symbol_obstack, headername, headername_len+1);
1999 char *complete_path = obstack_finish(&symbol_obstack);
2000 FILE *file = fopen(complete_path, "r");
2002 const char *filename = identify_string(complete_path);
2003 switch_pp_input(file, filename, entry, entry->is_system_path);
2006 obstack_free(&symbol_obstack, complete_path);
2013 static void parse_include_directive(bool const include_next)
2020 /* do not eat the TP_include, since it would already parse the next token
2021 * which needs special handling here. */
2022 skip_till_newline(true);
2023 bool system_include;
2024 const char *headername = parse_headername(&system_include);
2025 if (headername == NULL) {
2030 bool had_nonwhitespace = skip_till_newline(false);
2031 if (had_nonwhitespace) {
2032 warningf(WARN_OTHER, &input.position,
2033 "extra tokens at end of #include directive");
2036 if (n_inputs > INCLUDE_LIMIT) {
2037 errorf(&pp_token.base.source_position, "#include nested too deeply");
2044 info.whitespace_at_line_begin = 0;
2045 info.had_whitespace = false;
2046 info.at_line_begin = true;
2049 bool res = do_include(system_include, include_next, headername);
2053 errorf(&pp_token.base.source_position, "failed including '%s': %s", headername, strerror(errno));
2054 pop_restore_input();
2058 static pp_conditional_t *push_conditional(void)
2060 pp_conditional_t *conditional
2061 = obstack_alloc(&pp_obstack, sizeof(*conditional));
2062 memset(conditional, 0, sizeof(*conditional));
2064 conditional->parent = conditional_stack;
2065 conditional_stack = conditional;
2070 static void pop_conditional(void)
2072 assert(conditional_stack != NULL);
2073 conditional_stack = conditional_stack->parent;
2076 void check_unclosed_conditionals(void)
2078 while (conditional_stack != NULL) {
2079 pp_conditional_t *conditional = conditional_stack;
2081 if (conditional->in_else) {
2082 errorf(&conditional->source_position, "unterminated #else");
2084 errorf(&conditional->source_position, "unterminated condition");
2090 static void parse_ifdef_ifndef_directive(bool const is_ifdef)
2093 eat_pp(is_ifdef ? TP_ifdef : TP_ifndef);
2097 pp_conditional_t *conditional = push_conditional();
2098 conditional->source_position = pp_token.base.source_position;
2099 conditional->skip = true;
2103 if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
2104 errorf(&pp_token.base.source_position,
2105 "expected identifier after #%s, got %K",
2106 is_ifdef ? "ifdef" : "ifndef", &pp_token);
2109 /* just take the true case in the hope to avoid further errors */
2112 /* evaluate wether we are in true or false case */
2113 condition = (bool)pp_token.base.symbol->pp_definition == is_ifdef;
2114 eat_token(T_IDENTIFIER);
2116 if (!info.at_line_begin) {
2117 errorf(&pp_token.base.source_position,
2118 "extra tokens at end of #%s",
2119 is_ifdef ? "ifdef" : "ifndef");
2124 pp_conditional_t *conditional = push_conditional();
2125 conditional->source_position = pp_token.base.source_position;
2126 conditional->condition = condition;
2133 static void parse_else_directive(void)
2137 if (!info.at_line_begin) {
2139 warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #else");
2144 pp_conditional_t *conditional = conditional_stack;
2145 if (conditional == NULL) {
2146 errorf(&pp_token.base.source_position, "#else without prior #if");
2150 if (conditional->in_else) {
2151 errorf(&pp_token.base.source_position,
2152 "#else after #else (condition started %P)",
2153 &conditional->source_position);
2158 conditional->in_else = true;
2159 if (!conditional->skip) {
2160 skip_mode = conditional->condition;
2162 conditional->source_position = pp_token.base.source_position;
2165 static void parse_endif_directive(void)
2169 if (!info.at_line_begin) {
2171 warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #endif");
2176 pp_conditional_t *conditional = conditional_stack;
2177 if (conditional == NULL) {
2178 errorf(&pp_token.base.source_position, "#endif without prior #if");
2182 if (!conditional->skip) {
2188 typedef enum stdc_pragma_kind_t {
2192 STDC_CX_LIMITED_RANGE
2193 } stdc_pragma_kind_t;
2195 typedef enum stdc_pragma_value_kind_t {
2200 } stdc_pragma_value_kind_t;
2202 static void parse_pragma_directive(void)
2210 if (pp_token.kind != T_IDENTIFIER) {
2211 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2212 "expected identifier after #pragma");
2217 stdc_pragma_kind_t kind = STDC_UNKNOWN;
2218 if (pp_token.base.symbol->pp_ID == TP_STDC && c_mode & _C99) {
2222 switch (pp_token.base.symbol->pp_ID) {
2223 case TP_FP_CONTRACT: kind = STDC_FP_CONTRACT; break;
2224 case TP_FENV_ACCESS: kind = STDC_FENV_ACCESS; break;
2225 case TP_CX_LIMITED_RANGE: kind = STDC_CX_LIMITED_RANGE; break;
2228 if (kind != STDC_UNKNOWN) {
2230 stdc_pragma_value_kind_t value;
2231 switch (pp_token.base.symbol->pp_ID) {
2232 case TP_ON: value = STDC_VALUE_ON; break;
2233 case TP_OFF: value = STDC_VALUE_OFF; break;
2234 case TP_DEFAULT: value = STDC_VALUE_DEFAULT; break;
2235 default: value = STDC_VALUE_UNKNOWN; break;
2237 if (value == STDC_VALUE_UNKNOWN) {
2238 kind = STDC_UNKNOWN;
2239 errorf(&pp_token.base.source_position, "bad STDC pragma argument");
2244 if (kind == STDC_UNKNOWN) {
2245 warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.base.source_position,
2246 "encountered unknown #pragma");
2250 static void parse_line_directive(void)
2252 if (pp_token.kind != T_NUMBER) {
2254 parse_error("expected integer");
2257 long const line = strtol(pp_token.literal.string.begin, &end, 0);
2259 /* use offset -1 as this is about the next line */
2260 input.position.lineno = line - 1;
2261 /* force output of line */
2262 input.output_line = input.position.lineno - 20;
2265 errorf(&input.position, "'%S' is not a valid line number",
2266 &pp_token.literal.string);
2270 if (info.at_line_begin)
2273 if (pp_token.kind == T_STRING_LITERAL
2274 && pp_token.literal.string.encoding == STRING_ENCODING_CHAR) {
2275 input.position.input_name = pp_token.literal.string.begin;
2276 input.position.is_system_header = false;
2279 /* attempt to parse numeric flags as outputted by gcc preprocessor */
2280 while (!info.at_line_begin && pp_token.kind == T_NUMBER) {
2282 * 1 - indicates start of a new file
2283 * 2 - indicates return from a file
2284 * 3 - indicates system header
2285 * 4 - indicates implicit extern "C" in C++ mode
2287 * currently we're only interested in "3"
2289 if (streq(pp_token.literal.string.begin, "3")) {
2290 input.position.is_system_header = true;
2299 static void parse_error_directive(void)
2306 bool const old_resolve_escape_sequences = resolve_escape_sequences;
2307 resolve_escape_sequences = false;
2309 source_position_t const pos = pp_token.base.source_position;
2311 if (info.had_whitespace && obstack_object_size(&pp_obstack) != 0)
2312 obstack_1grow(&pp_obstack, ' ');
2314 switch (pp_token.kind) {
2316 string_t const *const str = &pp_token.literal.string;
2317 obstack_grow(&pp_obstack, str->begin, str->size);
2323 case T_STRING_LITERAL: delim = '"'; goto string;
2324 case T_CHARACTER_CONSTANT: delim = '\''; goto string;
2326 string_t const *const str = &pp_token.literal.string;
2327 char const *const enc = get_string_encoding_prefix(str->encoding);
2328 obstack_printf(&pp_obstack, "%s%c%s%c", enc, delim, str->begin, delim);
2333 char const *const str = pp_token.base.symbol->string;
2334 obstack_grow(&pp_obstack, str, strlen(str));
2340 } while (!info.at_line_begin);
2342 resolve_escape_sequences = old_resolve_escape_sequences;
2344 obstack_1grow(&pp_obstack, '\0');
2345 char *const str = obstack_finish(&pp_obstack);
2346 errorf(&pos, "#%s", str);
2347 obstack_free(&pp_obstack, str);
2350 static void parse_preprocessing_directive(void)
2354 if (info.at_line_begin) {
2355 /* empty directive */
2359 if (pp_token.base.symbol) {
2360 switch (pp_token.base.symbol->pp_ID) {
2361 case TP_define: parse_define_directive(); break;
2362 case TP_else: parse_else_directive(); break;
2363 case TP_endif: parse_endif_directive(); break;
2364 case TP_error: parse_error_directive(); break;
2365 case TP_ifdef: parse_ifdef_ifndef_directive(true); break;
2366 case TP_ifndef: parse_ifdef_ifndef_directive(false); break;
2367 case TP_include: parse_include_directive(false); break;
2368 case TP_include_next: parse_include_directive(true); break;
2369 case TP_line: next_input_token(); goto line_directive;
2370 case TP_pragma: parse_pragma_directive(); break;
2371 case TP_undef: parse_undef_directive(); break;
2374 } else if (pp_token.kind == T_NUMBER) {
2376 parse_line_directive();
2380 errorf(&pp_token.base.source_position, "invalid preprocessing directive #%K", &pp_token);
2385 assert(info.at_line_begin);
2388 static void finish_current_argument(void)
2390 if (current_argument == NULL)
2392 size_t size = obstack_object_size(&pp_obstack);
2393 current_argument->list_len = size/sizeof(current_argument->token_list[0]);
2394 current_argument->token_list = obstack_finish(&pp_obstack);
2397 void next_preprocessing_token(void)
2400 if (!expand_next()) {
2403 while (pp_token.kind == '#' && info.at_line_begin) {
2404 parse_preprocessing_directive();
2406 } while (skip_mode && pp_token.kind != T_EOF);
2409 const token_kind_t kind = pp_token.kind;
2410 if (current_call == NULL || argument_expanding != NULL) {
2411 symbol_t *const symbol = pp_token.base.symbol;
2413 if (kind == T_MACRO_PARAMETER) {
2414 assert(current_expansion != NULL);
2415 start_expanding(pp_token.macro_parameter.def);
2419 pp_definition_t *const pp_definition = symbol->pp_definition;
2420 if (pp_definition != NULL && !pp_definition->is_expanding) {
2421 if (pp_definition->has_parameters) {
2423 /* check if next token is a '(' */
2424 whitespace_info_t old_info = info;
2425 token_kind_t next_token = peek_expansion();
2426 if (next_token == T_EOF) {
2427 info.at_line_begin = false;
2428 info.had_whitespace = false;
2430 if (input.c == '(') {
2435 if (next_token == '(') {
2436 if (current_expansion == NULL)
2437 expansion_pos = pp_token.base.source_position;
2438 next_preprocessing_token();
2439 assert(pp_token.kind == '(');
2441 pp_definition->parent_expansion = current_expansion;
2442 current_call = pp_definition;
2443 current_call->expand_pos = 0;
2444 current_call->expand_info = old_info;
2445 if (current_call->n_parameters > 0) {
2446 current_argument = ¤t_call->parameters[0];
2447 assert(argument_brace_count == 0);
2451 /* skip_whitespaces() skipped newlines and whitespace,
2452 * remember results for next token */
2458 if (current_expansion == NULL)
2459 expansion_pos = pp_token.base.source_position;
2460 start_expanding(pp_definition);
2467 if (current_call != NULL) {
2468 /* current_call != NULL */
2470 ++argument_brace_count;
2471 } else if (kind == ')') {
2472 if (argument_brace_count > 0) {
2473 --argument_brace_count;
2475 finish_current_argument();
2476 assert(kind == ')');
2477 start_expanding(current_call);
2478 info = current_call->expand_info;
2479 current_call = NULL;
2480 current_argument = NULL;
2483 } else if (kind == ',' && argument_brace_count == 0) {
2484 finish_current_argument();
2485 current_call->expand_pos++;
2486 if (current_call->expand_pos >= current_call->n_parameters) {
2487 errorf(&pp_token.base.source_position,
2488 "too many arguments passed for macro '%Y'",
2489 current_call->symbol);
2490 current_argument = NULL;
2493 = ¤t_call->parameters[current_call->expand_pos];
2496 } else if (kind == T_MACRO_PARAMETER) {
2497 /* parameters have to be fully expanded before being used as
2498 * parameters for another macro-call */
2499 assert(current_expansion != NULL);
2500 pp_definition_t *argument = pp_token.macro_parameter.def;
2501 argument_expanding = argument;
2502 start_expanding(argument);
2504 } else if (kind == T_EOF) {
2505 errorf(&expansion_pos,
2506 "reached end of file while parsing arguments for '%Y'",
2507 current_call->symbol);
2510 if (current_argument != NULL) {
2511 saved_token_t saved;
2512 saved.token = pp_token;
2513 saved.had_whitespace = info.had_whitespace;
2514 obstack_grow(&pp_obstack, &saved, sizeof(saved));
2520 void append_include_path(searchpath_t *paths, const char *path)
2522 searchpath_entry_t *entry = OALLOCZ(&config_obstack, searchpath_entry_t);
2524 entry->is_system_path = paths->is_system_path;
2526 *paths->anchor = entry;
2527 paths->anchor = &entry->next;
2530 static void append_env_paths(searchpath_t *paths, const char *envvar)
2532 const char *val = getenv(envvar);
2533 if (val != NULL && *val != '\0') {
2534 const char *begin = val;
2538 while (*c != '\0' && *c != ':')
2541 size_t len = c-begin;
2543 /* use "." for gcc compatibility (Matze: I would expect that
2544 * nothing happens for an empty entry...) */
2545 append_include_path(paths, ".");
2547 char *const string = obstack_copy0(&config_obstack, begin, len);
2548 append_include_path(paths, string);
2555 } while(*c != '\0');
2559 static void append_searchpath(searchpath_t *path, const searchpath_t *append)
2561 *path->anchor = append->first;
2564 static void setup_include_path(void)
2566 /* built-in paths */
2567 append_include_path(&system_searchpath, "/usr/include");
2569 /* parse environment variable */
2570 append_env_paths(&bracket_searchpath, "CPATH");
2571 append_env_paths(&system_searchpath,
2572 c_mode & _CXX ? "CPLUS_INCLUDE_PATH" : "C_INCLUDE_PATH");
2574 /* append system search path to bracket searchpath */
2575 append_searchpath(&system_searchpath, &after_searchpath);
2576 append_searchpath(&bracket_searchpath, &system_searchpath);
2577 append_searchpath("e_searchpath, &bracket_searchpath);
2580 static void input_error(unsigned const delta_lines, unsigned const delta_cols, char const *const message)
2582 source_position_t pos = pp_token.base.source_position;
2583 pos.lineno += delta_lines;
2584 pos.colno += delta_cols;
2585 errorf(&pos, "%s", message);
2588 void init_include_paths(void)
2590 obstack_init(&config_obstack);
2593 void init_preprocessor(void)
2597 obstack_init(&pp_obstack);
2598 obstack_init(&input_obstack);
2599 strset_init(&stringset);
2601 setup_include_path();
2603 set_input_error_callback(input_error);
2606 void exit_preprocessor(void)
2608 obstack_free(&input_obstack, NULL);
2609 obstack_free(&pp_obstack, NULL);
2610 obstack_free(&config_obstack, NULL);
2612 strset_destroy(&stringset);
2615 int pptest_main(int argc, char **argv);
2616 int pptest_main(int argc, char **argv)
2618 init_symbol_table();
2619 init_include_paths();
2620 init_preprocessor();
2623 error_on_unknown_chars = false;
2624 resolve_escape_sequences = false;
2626 /* simplistic commandline parser */
2627 const char *filename = NULL;
2628 const char *output = NULL;
2629 for (int i = 1; i < argc; ++i) {
2630 const char *opt = argv[i];
2631 if (streq(opt, "-I")) {
2632 append_include_path(&bracket_searchpath, argv[++i]);
2634 } else if (streq(opt, "-E")) {
2636 } else if (streq(opt, "-o")) {
2639 } else if (opt[0] == '-') {
2640 fprintf(stderr, "Unknown option '%s'\n", opt);
2642 if (filename != NULL)
2643 fprintf(stderr, "Multiple inputs not supported\n");
2647 if (filename == NULL) {
2648 fprintf(stderr, "No input specified\n");
2652 if (output == NULL) {
2655 out = fopen(output, "w");
2657 fprintf(stderr, "Couldn't open output '%s'\n", output);
2662 /* just here for gcc compatibility */
2663 fprintf(out, "# 1 \"%s\"\n", filename);
2664 fprintf(out, "# 1 \"<built-in>\"\n");
2665 fprintf(out, "# 1 \"<command-line>\"\n");
2667 FILE *file = fopen(filename, "r");
2669 fprintf(stderr, "Couldn't open input '%s'\n", filename);
2672 switch_pp_input(file, filename, NULL, false);
2675 next_preprocessing_token();
2676 if (pp_token.kind == T_EOF)
2682 check_unclosed_conditionals();
2683 fclose(close_pp_input());
2688 exit_preprocessor();
2689 exit_symbol_table();