12 #include "adt/error.h"
13 #include "adt/strutil.h"
14 #include "adt/strset.h"
15 #include "lang_features.h"
16 #include "diagnostic.h"
17 #include "string_rep.h"
21 #define INCLUDE_LIMIT 199 /* 199 is for gcc "compatibility" */
23 typedef struct saved_token_t {
28 typedef struct whitespace_info_t {
29 /** current token had whitespace in front of it */
31 /** current token is at the beginning of a line.
32 * => a "#" at line begin starts a preprocessing directive. */
34 /** number of spaces before the first token in a line */
35 unsigned whitespace_at_line_begin;
38 struct pp_definition_t {
40 source_position_t source_position;
41 pp_definition_t *parent_expansion;
43 whitespace_info_t expand_info;
45 bool is_expanding : 1;
46 bool has_parameters : 1;
47 bool is_parameter : 1;
48 pp_definition_t *function_definition;
50 pp_definition_t *parameters;
54 saved_token_t *token_list;
57 typedef struct pp_conditional_t pp_conditional_t;
58 struct pp_conditional_t {
59 source_position_t source_position;
62 /** conditional in skip mode (then+else gets skipped) */
64 pp_conditional_t *parent;
67 typedef struct pp_input_t pp_input_t;
72 utf32 buf[1024+MAX_PUTBACK];
75 source_position_t position;
80 typedef struct searchpath_entry_t searchpath_entry_t;
81 struct searchpath_entry_t {
83 searchpath_entry_t *next;
86 static pp_input_t input;
88 static pp_input_t *input_stack;
89 static unsigned n_inputs;
90 static struct obstack input_obstack;
92 static pp_conditional_t *conditional_stack;
94 static token_t pp_token;
95 static bool resolve_escape_sequences = false;
96 static bool error_on_unknown_chars = true;
97 static bool skip_mode;
99 static struct obstack pp_obstack;
100 static struct obstack config_obstack;
101 static const char *printed_input_name = NULL;
102 static source_position_t expansion_pos;
103 static pp_definition_t *current_expansion = NULL;
104 static pp_definition_t *current_call = NULL;
105 static pp_definition_t *current_argument = NULL;
106 static pp_definition_t *argument_expanding = NULL;
107 static unsigned argument_brace_count;
108 static strset_t stringset;
109 static token_kind_t last_token;
111 static searchpath_entry_t *searchpath;
113 static whitespace_info_t next_info; /* valid if had_whitespace is true */
114 static whitespace_info_t info;
116 static inline void next_char(void);
117 static void next_input_token(void);
118 static void print_line_directive(const source_position_t *pos, const char *add);
120 static symbol_t *symbol_colongreater;
121 static symbol_t *symbol_lesscolon;
122 static symbol_t *symbol_lesspercent;
123 static symbol_t *symbol_percentcolon;
124 static symbol_t *symbol_percentcolonpercentcolon;
125 static symbol_t *symbol_percentgreater;
127 static void init_symbols(void)
129 symbol_colongreater = symbol_table_insert(":>");
130 symbol_lesscolon = symbol_table_insert("<:");
131 symbol_lesspercent = symbol_table_insert("<%");
132 symbol_percentcolon = symbol_table_insert("%:");
133 symbol_percentcolonpercentcolon = symbol_table_insert("%:%:");
134 symbol_percentgreater = symbol_table_insert("%>");
137 static void switch_input(FILE *file, const char *filename)
140 input.input = input_from_stream(file, NULL);
143 input.output_line = 0;
144 input.position.input_name = filename;
145 input.position.lineno = 1;
147 /* indicate that we're at a new input */
148 print_line_directive(&input.position, input_stack != NULL ? "1" : NULL);
150 /* place a virtual '\n' so we realize we're at line begin */
151 input.position.lineno = 0;
155 static void close_input(void)
157 input_free(input.input);
158 assert(input.file != NULL);
168 static void push_input(void)
170 pp_input_t *saved_input
171 = obstack_alloc(&input_obstack, sizeof(*saved_input));
173 memcpy(saved_input, &input, sizeof(*saved_input));
175 /* adjust buffer positions */
176 if (input.bufpos != NULL)
177 saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
178 if (input.bufend != NULL)
179 saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
181 saved_input->parent = input_stack;
182 input_stack = saved_input;
186 static void pop_restore_input(void)
188 assert(n_inputs > 0);
189 assert(input_stack != NULL);
191 pp_input_t *saved_input = input_stack;
193 memcpy(&input, saved_input, sizeof(input));
196 /* adjust buffer positions */
197 if (saved_input->bufpos != NULL)
198 input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
199 if (saved_input->bufend != NULL)
200 input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
202 input_stack = saved_input->parent;
203 obstack_free(&input_obstack, saved_input);
208 * Prints a parse error message at the current token.
210 * @param msg the error message
212 static void parse_error(const char *msg)
214 errorf(&pp_token.base.source_position, "%s", msg);
217 static inline void next_real_char(void)
219 assert(input.bufpos <= input.bufend);
220 if (input.bufpos >= input.bufend) {
221 size_t const n = decode(input.input, input.buf + MAX_PUTBACK, lengthof(input.buf) - MAX_PUTBACK);
226 input.bufpos = input.buf + MAX_PUTBACK;
227 input.bufend = input.bufpos + n;
229 input.c = *input.bufpos++;
230 ++input.position.colno;
234 * Put a character back into the buffer.
236 * @param pc the character to put back
238 static inline void put_back(utf32 const pc)
240 assert(input.bufpos > input.buf);
241 *(--input.bufpos - input.buf + input.buf) = (char) pc;
242 --input.position.colno;
248 if (input.c == '\n') { \
252 ++input.position.lineno; \
253 input.position.colno = 1; \
255 newline // Let it look like an ordinary case label.
257 #define eat(c_type) (assert(input.c == c_type), next_char())
259 static void maybe_concat_lines(void)
265 info.whitespace_at_line_begin = 0;
277 * Set c to the next input character, ie.
278 * after expanding trigraphs.
280 static inline void next_char(void)
284 /* filter trigraphs and concatenated lines */
285 if (UNLIKELY(input.c == '\\')) {
286 maybe_concat_lines();
287 goto end_of_next_char;
290 if (LIKELY(input.c != '?'))
291 goto end_of_next_char;
294 if (LIKELY(input.c != '?')) {
297 goto end_of_next_char;
302 case '=': input.c = '#'; break;
303 case '(': input.c = '['; break;
304 case '/': input.c = '\\'; maybe_concat_lines(); break;
305 case ')': input.c = ']'; break;
306 case '\'': input.c = '^'; break;
307 case '<': input.c = '{'; break;
308 case '!': input.c = '|'; break;
309 case '>': input.c = '}'; break;
310 case '-': input.c = '~'; break;
320 printf("nchar '%c'\n", input.c);
327 * Returns true if the given char is a octal digit.
329 * @param char the character to check
331 static inline bool is_octal_digit(int chr)
349 * Returns the value of a digit.
350 * The only portable way to do it ...
352 static int digit_value(int digit)
378 panic("wrong character given");
383 * Parses an octal character sequence.
385 * @param first_digit the already read first digit
387 static utf32 parse_octal_sequence(const utf32 first_digit)
389 assert(is_octal_digit(first_digit));
390 utf32 value = digit_value(first_digit);
391 if (!is_octal_digit(input.c)) return value;
392 value = 8 * value + digit_value(input.c);
394 if (!is_octal_digit(input.c)) return value;
395 value = 8 * value + digit_value(input.c);
402 * Parses a hex character sequence.
404 static utf32 parse_hex_sequence(void)
407 while (isxdigit(input.c)) {
408 value = 16 * value + digit_value(input.c);
415 * Parse an escape sequence.
417 static utf32 parse_escape_sequence(void)
421 utf32 const ec = input.c;
425 case '"': return '"';
426 case '\'': return '\'';
427 case '\\': return '\\';
428 case '?': return '\?';
429 case 'a': return '\a';
430 case 'b': return '\b';
431 case 'f': return '\f';
432 case 'n': return '\n';
433 case 'r': return '\r';
434 case 't': return '\t';
435 case 'v': return '\v';
437 return parse_hex_sequence();
446 return parse_octal_sequence(ec);
448 parse_error("reached end of file while parsing escape sequence");
450 /* \E is not documented, but handled, by GCC. It is acceptable according
451 * to §6.11.4, whereas \e is not. */
455 return 27; /* hopefully 27 is ALWAYS the code for ESCAPE */
459 parse_error("universal character parsing not implemented yet");
464 /* §6.4.4.4:8 footnote 64 */
465 parse_error("unknown escape sequence");
469 static const char *identify_string(char *string)
471 const char *result = strset_insert(&stringset, string);
472 if (result != string) {
473 obstack_free(&symbol_obstack, string);
478 static string_t sym_make_string(string_encoding_t const enc)
480 obstack_1grow(&symbol_obstack, '\0');
481 size_t const len = obstack_object_size(&symbol_obstack) - 1;
482 char *const string = obstack_finish(&symbol_obstack);
483 char const *const result = identify_string(string);
484 return (string_t){ result, len, enc };
487 static void parse_string(utf32 const delimiter, token_kind_t const kind,
488 string_encoding_t const enc,
489 char const *const context)
491 const unsigned start_linenr = input.position.lineno;
498 if (resolve_escape_sequences) {
499 utf32 const tc = parse_escape_sequence();
500 if (enc == STRING_ENCODING_CHAR) {
502 warningf(WARN_OTHER, &pp_token.base.source_position, "escape sequence out of range");
504 obstack_1grow(&symbol_obstack, tc);
506 obstack_grow_utf8(&symbol_obstack, tc);
509 obstack_1grow(&symbol_obstack, (char)input.c);
511 obstack_1grow(&symbol_obstack, (char)input.c);
518 errorf(&pp_token.base.source_position, "newline while parsing %s", context);
522 source_position_t source_position;
523 source_position.input_name = pp_token.base.source_position.input_name;
524 source_position.lineno = start_linenr;
525 errorf(&source_position, "EOF while parsing %s", context);
530 if (input.c == delimiter) {
534 obstack_grow_utf8(&symbol_obstack, input.c);
542 pp_token.kind = kind;
543 pp_token.literal.string = sym_make_string(enc);
546 static void parse_string_literal(string_encoding_t const enc)
548 parse_string('"', T_STRING_LITERAL, enc, "string literal");
551 static void parse_character_constant(string_encoding_t const enc)
553 parse_string('\'', T_CHARACTER_CONSTANT, enc, "character constant");
554 if (pp_token.literal.string.size == 0) {
555 parse_error("empty character constant");
559 #define SYMBOL_CASES_WITHOUT_E_P \
610 #define SYMBOL_CASES \
611 SYMBOL_CASES_WITHOUT_E_P: \
617 #define DIGIT_CASES \
629 static void start_expanding(pp_definition_t *definition)
631 definition->parent_expansion = current_expansion;
632 definition->expand_pos = 0;
633 definition->is_expanding = true;
634 if (definition->list_len > 0) {
635 definition->token_list[0].had_whitespace
636 = info.had_whitespace;
638 current_expansion = definition;
641 static void finished_expanding(pp_definition_t *definition)
643 assert(definition->is_expanding);
644 pp_definition_t *parent = definition->parent_expansion;
645 definition->parent_expansion = NULL;
646 definition->is_expanding = false;
648 /* stop further expanding once we expanded a parameter used in a
650 if (definition == argument_expanding)
651 argument_expanding = NULL;
653 assert(current_expansion == definition);
654 current_expansion = parent;
657 static inline void set_punctuator(token_kind_t const kind)
659 pp_token.kind = kind;
660 pp_token.base.symbol = token_symbols[kind];
663 static inline void set_digraph(token_kind_t const kind, symbol_t *const symbol)
665 pp_token.kind = kind;
666 pp_token.base.symbol = symbol;
670 * returns next final token from a preprocessor macro expansion
672 static bool expand_next(void)
674 if (current_expansion == NULL)
678 size_t pos = current_expansion->expand_pos;
679 if (pos >= current_expansion->list_len) {
680 finished_expanding(current_expansion);
681 /* it was the outermost expansion, parse pptoken normally */
682 if (current_expansion == NULL) {
687 const saved_token_t *saved = ¤t_expansion->token_list[pos++];
688 pp_token = saved->token;
690 if (current_expansion->expand_pos > 0)
691 info.had_whitespace = saved->had_whitespace;
692 pp_token.base.source_position = expansion_pos;
693 ++current_expansion->expand_pos;
699 * Returns the next token kind found when continuing the current expansions
700 * without starting new sub-expansions.
702 static token_kind_t peek_expansion(void)
704 pp_definition_t *expansion = current_expansion;
705 while (expansion != NULL && expansion->expand_pos >= expansion->list_len) {
706 expansion = expansion->parent_expansion;
708 if (expansion == NULL)
710 return expansion->token_list[expansion->expand_pos].token.kind;
713 static void skip_line_comment(void)
715 info.had_whitespace = true;
732 static void skip_multiline_comment(void)
734 info.had_whitespace = true;
736 unsigned start_linenr = input.position.lineno;
741 if (input.c == '*') {
742 /* TODO: nested comment, warn here */
747 if (input.c == '/') {
748 if (input.position.lineno != input.output_line)
749 info.whitespace_at_line_begin = input.position.colno;
759 source_position_t source_position;
760 source_position.input_name = pp_token.base.source_position.input_name;
761 source_position.lineno = start_linenr;
762 errorf(&source_position, "at end of file while looking for comment end");
773 static bool skip_till_newline(bool stop_at_non_whitespace)
785 if (input.c == '/') {
789 } else if (input.c == '*') {
791 skip_multiline_comment();
803 if (stop_at_non_whitespace)
812 static void skip_whitespace(void)
818 ++info.whitespace_at_line_begin;
819 info.had_whitespace = true;
824 info.at_line_begin = true;
825 info.had_whitespace = true;
826 info.whitespace_at_line_begin = 0;
831 if (input.c == '/') {
835 } else if (input.c == '*') {
837 skip_multiline_comment();
851 static inline void eat_pp(pp_token_kind_t const kind)
853 assert(pp_token.base.symbol->pp_ID == kind);
858 static inline void eat_token(token_kind_t const kind)
860 assert(pp_token.kind == kind);
865 static void parse_symbol(void)
867 obstack_1grow(&symbol_obstack, (char) input.c);
874 obstack_1grow(&symbol_obstack, (char) input.c);
884 obstack_1grow(&symbol_obstack, '\0');
885 char *string = obstack_finish(&symbol_obstack);
887 /* might be a wide string or character constant ( L"string"/L'c' ) */
888 if (input.c == '"' && string[0] == 'L' && string[1] == '\0') {
889 obstack_free(&symbol_obstack, string);
890 parse_string_literal(STRING_ENCODING_WIDE);
892 } else if (input.c == '\'' && string[0] == 'L' && string[1] == '\0') {
893 obstack_free(&symbol_obstack, string);
894 parse_character_constant(STRING_ENCODING_WIDE);
898 symbol_t *symbol = symbol_table_insert(string);
900 pp_token.kind = symbol->ID;
901 pp_token.base.symbol = symbol;
903 /* we can free the memory from symbol obstack if we already had an entry in
904 * the symbol table */
905 if (symbol->string != string) {
906 obstack_free(&symbol_obstack, string);
910 static void parse_number(void)
912 obstack_1grow(&symbol_obstack, (char) input.c);
919 case SYMBOL_CASES_WITHOUT_E_P:
920 obstack_1grow(&symbol_obstack, (char) input.c);
928 obstack_1grow(&symbol_obstack, (char) input.c);
930 if (input.c == '+' || input.c == '-') {
931 obstack_1grow(&symbol_obstack, (char) input.c);
942 pp_token.kind = T_NUMBER;
943 pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
946 #define MAYBE_PROLOG \
950 #define MAYBE(ch, kind) \
953 set_punctuator(kind); \
956 #define MAYBE_DIGRAPH(ch, kind, symbol) \
959 set_digraph(kind, symbol); \
962 #define ELSE_CODE(code) \
968 #define ELSE(kind) ELSE_CODE(set_punctuator(kind);)
970 /** identifies and returns the next preprocessing token contained in the
971 * input stream. No macro expansion is performed. */
972 static void next_input_token(void)
974 if (next_info.had_whitespace) {
976 next_info.had_whitespace = false;
978 info.at_line_begin = false;
979 info.had_whitespace = false;
982 pp_token.base.source_position = input.position;
983 pp_token.base.symbol = NULL;
988 info.whitespace_at_line_begin++;
989 info.had_whitespace = true;
994 info.at_line_begin = true;
995 info.had_whitespace = true;
996 info.whitespace_at_line_begin = 0;
1008 parse_string_literal(STRING_ENCODING_CHAR);
1012 parse_character_constant(STRING_ENCODING_CHAR);
1034 MAYBE('.', T_DOTDOTDOT)
1038 set_punctuator('.');
1043 MAYBE('&', T_ANDAND)
1044 MAYBE('=', T_ANDEQUAL)
1048 MAYBE('=', T_ASTERISKEQUAL)
1052 MAYBE('+', T_PLUSPLUS)
1053 MAYBE('=', T_PLUSEQUAL)
1057 MAYBE('>', T_MINUSGREATER)
1058 MAYBE('-', T_MINUSMINUS)
1059 MAYBE('=', T_MINUSEQUAL)
1063 MAYBE('=', T_EXCLAMATIONMARKEQUAL)
1067 MAYBE('=', T_SLASHEQUAL)
1070 skip_multiline_comment();
1074 skip_line_comment();
1079 MAYBE_DIGRAPH('>', '}', symbol_percentgreater)
1080 MAYBE('=', T_PERCENTEQUAL)
1085 MAYBE_DIGRAPH(':', T_HASHHASH, symbol_percentcolonpercentcolon)
1089 goto digraph_percentcolon;
1092 digraph_percentcolon:
1093 set_digraph('#', symbol_percentcolon);
1098 MAYBE_DIGRAPH(':', '[', symbol_lesscolon)
1099 MAYBE_DIGRAPH('%', '{', symbol_lesspercent)
1100 MAYBE('=', T_LESSEQUAL)
1103 MAYBE('=', T_LESSLESSEQUAL)
1108 MAYBE('=', T_GREATEREQUAL)
1111 MAYBE('=', T_GREATERGREATEREQUAL)
1112 ELSE(T_GREATERGREATER)
1116 MAYBE('=', T_CARETEQUAL)
1120 MAYBE('=', T_PIPEEQUAL)
1121 MAYBE('|', T_PIPEPIPE)
1125 MAYBE_DIGRAPH('>', ']', symbol_colongreater)
1127 if (c_mode & _CXX) {
1129 set_punctuator(T_COLONCOLON);
1136 MAYBE('=', T_EQUALEQUAL)
1140 MAYBE('#', T_HASHHASH)
1153 set_punctuator(input.c);
1158 if (input_stack != NULL) {
1160 pop_restore_input();
1162 if (input.c == (utf32)EOF)
1163 --input.position.lineno;
1164 print_line_directive(&input.position, "2");
1167 info.at_line_begin = true;
1168 set_punctuator(T_EOF);
1173 if (error_on_unknown_chars) {
1174 errorf(&pp_token.base.source_position,
1175 "unknown character '%lc' found\n", input.c);
1179 assert(obstack_object_size(&symbol_obstack) == 0);
1180 obstack_grow_utf8(&symbol_obstack, input.c);
1181 obstack_1grow(&symbol_obstack, '\0');
1182 char *const string = obstack_finish(&symbol_obstack);
1183 symbol_t *const symbol = symbol_table_insert(string);
1184 if (symbol->string != string)
1185 obstack_free(&symbol_obstack, string);
1187 pp_token.kind = T_UNKNOWN_CHAR;
1188 pp_token.base.symbol = symbol;
1195 static void print_quoted_string(const char *const string)
1198 for (const char *c = string; *c != 0; ++c) {
1200 case '"': fputs("\\\"", out); break;
1201 case '\\': fputs("\\\\", out); break;
1202 case '\a': fputs("\\a", out); break;
1203 case '\b': fputs("\\b", out); break;
1204 case '\f': fputs("\\f", out); break;
1205 case '\n': fputs("\\n", out); break;
1206 case '\r': fputs("\\r", out); break;
1207 case '\t': fputs("\\t", out); break;
1208 case '\v': fputs("\\v", out); break;
1209 case '\?': fputs("\\?", out); break;
1212 fprintf(out, "\\%03o", (unsigned)*c);
1222 static void print_line_directive(const source_position_t *pos, const char *add)
1224 fprintf(out, "# %u ", pos->lineno);
1225 print_quoted_string(pos->input_name);
1231 printed_input_name = pos->input_name;
1232 input.output_line = pos->lineno-1;
1235 static bool emit_newlines(void)
1237 unsigned delta = pp_token.base.source_position.lineno - input.output_line;
1243 print_line_directive(&pp_token.base.source_position, NULL);
1246 for (unsigned i = 0; i < delta; ++i) {
1250 input.output_line = pp_token.base.source_position.lineno;
1252 for (unsigned i = 0; i < info.whitespace_at_line_begin; ++i)
1258 static void emit_pp_token(void)
1260 if (!emit_newlines() &&
1261 (info.had_whitespace || tokens_would_paste(last_token, pp_token.kind)))
1264 switch (pp_token.kind) {
1266 fputs(pp_token.literal.string.begin, out);
1269 case T_STRING_LITERAL:
1270 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1272 fputs(pp_token.literal.string.begin, out);
1276 case T_CHARACTER_CONSTANT:
1277 fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
1279 fputs(pp_token.literal.string.begin, out);
1283 case T_MACRO_PARAMETER:
1284 panic("macro parameter not expanded");
1287 fputs(pp_token.base.symbol->string, out);
1290 last_token = pp_token.kind;
1293 static void eat_pp_directive(void)
1295 while (!info.at_line_begin) {
1300 static bool strings_equal(const string_t *string1, const string_t *string2)
1302 size_t size = string1->size;
1303 if (size != string2->size)
1306 const char *c1 = string1->begin;
1307 const char *c2 = string2->begin;
1308 for (size_t i = 0; i < size; ++i, ++c1, ++c2) {
1315 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
1317 if (token1->kind != token2->kind)
1320 switch (token1->kind) {
1322 case T_CHARACTER_CONSTANT:
1323 case T_STRING_LITERAL:
1324 return strings_equal(&token1->literal.string, &token2->literal.string);
1326 case T_MACRO_PARAMETER:
1327 return token1->macro_parameter.def->symbol
1328 == token2->macro_parameter.def->symbol;
1331 return token1->base.symbol == token2->base.symbol;
1335 static bool pp_definitions_equal(const pp_definition_t *definition1,
1336 const pp_definition_t *definition2)
1338 if (definition1->list_len != definition2->list_len)
1341 size_t len = definition1->list_len;
1342 const saved_token_t *t1 = definition1->token_list;
1343 const saved_token_t *t2 = definition2->token_list;
1344 for (size_t i = 0; i < len; ++i, ++t1, ++t2) {
1345 if (!pp_tokens_equal(&t1->token, &t2->token))
1351 static void parse_define_directive(void)
1359 assert(obstack_object_size(&pp_obstack) == 0);
1361 if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
1362 errorf(&pp_token.base.source_position,
1363 "expected identifier after #define, got %K", &pp_token);
1366 symbol_t *const symbol = pp_token.base.symbol;
1368 pp_definition_t *new_definition
1369 = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
1370 memset(new_definition, 0, sizeof(new_definition[0]));
1371 new_definition->symbol = symbol;
1372 new_definition->source_position = input.position;
1374 /* this is probably the only place where spaces are significant in the
1375 * lexer (except for the fact that they separate tokens). #define b(x)
1376 * is something else than #define b (x) */
1377 if (input.c == '(') {
1378 eat_token(T_IDENTIFIER);
1382 switch (pp_token.kind) {
1384 new_definition->is_variadic = true;
1385 eat_token(T_DOTDOTDOT);
1386 if (pp_token.kind != ')') {
1387 errorf(&input.position,
1388 "'...' not at end of macro argument list");
1393 case T_IDENTIFIER: {
1394 pp_definition_t parameter;
1395 memset(¶meter, 0, sizeof(parameter));
1396 parameter.source_position = pp_token.base.source_position;
1397 parameter.symbol = pp_token.base.symbol;
1398 parameter.is_parameter = true;
1399 obstack_grow(&pp_obstack, ¶meter, sizeof(parameter));
1400 eat_token(T_IDENTIFIER);
1402 if (pp_token.kind == ',') {
1407 if (pp_token.kind != ')') {
1408 errorf(&pp_token.base.source_position,
1409 "expected ',' or ')' after identifier, got %K",
1418 goto finish_argument_list;
1421 errorf(&pp_token.base.source_position,
1422 "expected identifier, '...' or ')' in #define argument list, got %K",
1428 finish_argument_list:
1429 new_definition->has_parameters = true;
1430 size_t size = obstack_object_size(&pp_obstack);
1431 new_definition->n_parameters
1432 = size / sizeof(new_definition->parameters[0]);
1433 new_definition->parameters = obstack_finish(&pp_obstack);
1434 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1435 pp_definition_t *param = &new_definition->parameters[i];
1436 symbol_t *symbol = param->symbol;
1437 pp_definition_t *previous = symbol->pp_definition;
1438 if (previous != NULL
1439 && previous->function_definition == new_definition) {
1440 errorf(¶m->source_position,
1441 "duplicate macro parameter '%Y'", symbol);
1442 param->symbol = sym_anonymous;
1445 param->parent_expansion = previous;
1446 param->function_definition = new_definition;
1447 symbol->pp_definition = param;
1450 eat_token(T_IDENTIFIER);
1453 /* construct token list */
1454 assert(obstack_object_size(&pp_obstack) == 0);
1455 while (!info.at_line_begin) {
1456 if (pp_token.kind == T_IDENTIFIER) {
1457 const symbol_t *symbol = pp_token.base.symbol;
1458 pp_definition_t *definition = symbol->pp_definition;
1459 if (definition != NULL
1460 && definition->function_definition == new_definition) {
1461 pp_token.kind = T_MACRO_PARAMETER;
1462 pp_token.macro_parameter.def = definition;
1465 saved_token_t saved_token;
1466 saved_token.token = pp_token;
1467 saved_token.had_whitespace = info.had_whitespace;
1468 obstack_grow(&pp_obstack, &saved_token, sizeof(saved_token));
1472 new_definition->list_len = obstack_object_size(&pp_obstack)
1473 / sizeof(new_definition->token_list[0]);
1474 new_definition->token_list = obstack_finish(&pp_obstack);
1476 if (new_definition->has_parameters) {
1477 for (size_t i = 0; i < new_definition->n_parameters; ++i) {
1478 pp_definition_t *param = &new_definition->parameters[i];
1479 symbol_t *symbol = param->symbol;
1480 if (symbol == sym_anonymous)
1482 assert(symbol->pp_definition == param);
1483 assert(param->function_definition == new_definition);
1484 symbol->pp_definition = param->parent_expansion;
1485 param->parent_expansion = NULL;
1489 pp_definition_t *old_definition = symbol->pp_definition;
1490 if (old_definition != NULL) {
1491 if (!pp_definitions_equal(old_definition, new_definition)) {
1492 warningf(WARN_OTHER, &input.position, "multiple definition of macro '%Y' (first defined %P)", symbol, &old_definition->source_position);
1494 /* reuse the old definition */
1495 obstack_free(&pp_obstack, new_definition);
1496 new_definition = old_definition;
1500 symbol->pp_definition = new_definition;
1504 if (obstack_object_size(&pp_obstack) > 0) {
1505 char *ptr = obstack_finish(&pp_obstack);
1506 obstack_free(&pp_obstack, ptr);
1511 static void parse_undef_directive(void)
1519 if (pp_token.kind != T_IDENTIFIER) {
1520 errorf(&input.position,
1521 "expected identifier after #undef, got %K", &pp_token);
1526 pp_token.base.symbol->pp_definition = NULL;
1527 eat_token(T_IDENTIFIER);
1529 if (!info.at_line_begin) {
1530 warningf(WARN_OTHER, &input.position, "extra tokens at end of #undef directive");
1535 /** behind an #include we can have the special headername lexems.
1536 * They're only allowed behind an #include so they're not recognized
1537 * by the normal next_preprocessing_token. We handle them as a special
1539 static void parse_headername(void)
1541 const source_position_t start_position = input.position;
1542 string_t string = { NULL, 0, STRING_ENCODING_CHAR };
1543 assert(obstack_object_size(&symbol_obstack) == 0);
1545 if (info.at_line_begin) {
1546 parse_error("expected headername after #include");
1550 /* check wether we have a "... or <... headername */
1554 case '<': delimiter = '>'; goto parse_name;
1555 case '"': delimiter = '"'; goto parse_name;
1562 errorf(&pp_token.base.source_position, "header name without closing '%c'", (char)delimiter);
1566 if (input.c == delimiter) {
1568 goto finished_headername;
1570 obstack_1grow(&symbol_obstack, (char)input.c);
1576 /* we should never be here */
1580 /* TODO: do normal pp_token parsing and concatenate results */
1581 panic("pp_token concat include not implemented yet");
1584 finished_headername:
1585 string = sym_make_string(STRING_ENCODING_CHAR);
1588 pp_token.base.source_position = start_position;
1589 pp_token.kind = T_HEADERNAME;
1590 pp_token.literal.string = string;
1593 static bool do_include(bool system_include, const char *headername)
1595 size_t headername_len = strlen(headername);
1596 if (!system_include) {
1597 /* put dirname of current input on obstack */
1598 const char *filename = input.position.input_name;
1599 const char *last_slash = strrchr(filename, '/');
1600 if (last_slash != NULL) {
1601 size_t len = last_slash - filename;
1602 obstack_grow(&symbol_obstack, filename, len + 1);
1603 obstack_grow0(&symbol_obstack, headername, headername_len);
1604 char *complete_path = obstack_finish(&symbol_obstack);
1605 headername = identify_string(complete_path);
1608 FILE *file = fopen(headername, "r");
1610 switch_input(file, headername);
1615 assert(obstack_object_size(&symbol_obstack) == 0);
1616 /* check searchpath */
1617 for (searchpath_entry_t *entry = searchpath; entry != NULL;
1618 entry = entry->next) {
1619 const char *path = entry->path;
1620 size_t len = strlen(path);
1621 obstack_grow(&symbol_obstack, path, len);
1622 if (path[len-1] != '/')
1623 obstack_1grow(&symbol_obstack, '/');
1624 obstack_grow(&symbol_obstack, headername, headername_len+1);
1626 char *complete_path = obstack_finish(&symbol_obstack);
1627 FILE *file = fopen(complete_path, "r");
1629 const char *filename = identify_string(complete_path);
1630 switch_input(file, filename);
1633 obstack_free(&symbol_obstack, complete_path);
1640 static void parse_include_directive(void)
1647 /* don't eat the TP_include here!
1648 * we need an alternative parsing for the next token */
1649 skip_till_newline(true);
1650 bool system_include = input.c == '<';
1652 string_t headername = pp_token.literal.string;
1653 if (headername.begin == NULL) {
1658 bool had_nonwhitespace = skip_till_newline(false);
1659 if (had_nonwhitespace) {
1660 warningf(WARN_OTHER, &pp_token.base.source_position,
1661 "extra tokens at end of #include directive");
1664 if (n_inputs > INCLUDE_LIMIT) {
1665 errorf(&pp_token.base.source_position, "#include nested too deeply");
1672 info.whitespace_at_line_begin = 0;
1673 info.had_whitespace = false;
1674 info.at_line_begin = true;
1677 bool res = do_include(system_include, pp_token.literal.string.begin);
1681 errorf(&pp_token.base.source_position, "failed including '%S': %s", &pp_token.literal.string, strerror(errno));
1682 pop_restore_input();
1686 static pp_conditional_t *push_conditional(void)
1688 pp_conditional_t *conditional
1689 = obstack_alloc(&pp_obstack, sizeof(*conditional));
1690 memset(conditional, 0, sizeof(*conditional));
1692 conditional->parent = conditional_stack;
1693 conditional_stack = conditional;
1698 static void pop_conditional(void)
1700 assert(conditional_stack != NULL);
1701 conditional_stack = conditional_stack->parent;
1704 static void check_unclosed_conditionals(void)
1706 while (conditional_stack != NULL) {
1707 pp_conditional_t *conditional = conditional_stack;
1709 if (conditional->in_else) {
1710 errorf(&conditional->source_position, "unterminated #else");
1712 errorf(&conditional->source_position, "unterminated condition");
1718 static void parse_ifdef_ifndef_directive(bool const is_ifdef)
1721 eat_pp(is_ifdef ? TP_ifdef : TP_ifndef);
1725 pp_conditional_t *conditional = push_conditional();
1726 conditional->source_position = pp_token.base.source_position;
1727 conditional->skip = true;
1731 if (pp_token.kind != T_IDENTIFIER || info.at_line_begin) {
1732 errorf(&pp_token.base.source_position,
1733 "expected identifier after #%s, got %K",
1734 is_ifdef ? "ifdef" : "ifndef", &pp_token);
1737 /* just take the true case in the hope to avoid further errors */
1740 /* evaluate wether we are in true or false case */
1741 condition = (bool)pp_token.base.symbol->pp_definition == is_ifdef;
1742 eat_token(T_IDENTIFIER);
1744 if (!info.at_line_begin) {
1745 errorf(&pp_token.base.source_position,
1746 "extra tokens at end of #%s",
1747 is_ifdef ? "ifdef" : "ifndef");
1752 pp_conditional_t *conditional = push_conditional();
1753 conditional->source_position = pp_token.base.source_position;
1754 conditional->condition = condition;
1761 static void parse_else_directive(void)
1765 if (!info.at_line_begin) {
1767 warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #else");
1772 pp_conditional_t *conditional = conditional_stack;
1773 if (conditional == NULL) {
1774 errorf(&pp_token.base.source_position, "#else without prior #if");
1778 if (conditional->in_else) {
1779 errorf(&pp_token.base.source_position,
1780 "#else after #else (condition started %P)",
1781 &conditional->source_position);
1786 conditional->in_else = true;
1787 if (!conditional->skip) {
1788 skip_mode = conditional->condition;
1790 conditional->source_position = pp_token.base.source_position;
1793 static void parse_endif_directive(void)
1797 if (!info.at_line_begin) {
1799 warningf(WARN_OTHER, &pp_token.base.source_position, "extra tokens at end of #endif");
1804 pp_conditional_t *conditional = conditional_stack;
1805 if (conditional == NULL) {
1806 errorf(&pp_token.base.source_position, "#endif without prior #if");
1810 if (!conditional->skip) {
1816 static void parse_preprocessing_directive(void)
1820 if (info.at_line_begin) {
1821 /* empty directive */
1825 if (pp_token.base.symbol) {
1826 switch (pp_token.base.symbol->pp_ID) {
1827 case TP_define: parse_define_directive(); break;
1828 case TP_else: parse_else_directive(); break;
1829 case TP_endif: parse_endif_directive(); break;
1830 case TP_ifdef: parse_ifdef_ifndef_directive(true); break;
1831 case TP_ifndef: parse_ifdef_ifndef_directive(false); break;
1832 case TP_include: parse_include_directive(); break;
1833 case TP_undef: parse_undef_directive(); break;
1839 errorf(&pp_token.base.source_position, "invalid preprocessing directive #%K", &pp_token);
1844 assert(info.at_line_begin);
1847 static void finish_current_argument(void)
1849 if (current_argument == NULL)
1851 size_t size = obstack_object_size(&pp_obstack);
1852 current_argument->list_len = size/sizeof(current_argument->token_list[0]);
1853 current_argument->token_list = obstack_finish(&pp_obstack);
1856 static void next_preprocessing_token(void)
1859 if (!expand_next()) {
1862 while (pp_token.kind == '#' && info.at_line_begin) {
1863 parse_preprocessing_directive();
1865 } while (skip_mode && pp_token.kind != T_EOF);
1868 const token_kind_t kind = pp_token.kind;
1869 if (current_call == NULL || argument_expanding != NULL) {
1870 if (kind == T_IDENTIFIER) {
1871 symbol_t *const symbol = pp_token.base.symbol;
1872 pp_definition_t *const pp_definition = symbol->pp_definition;
1873 if (pp_definition != NULL && !pp_definition->is_expanding) {
1874 if (pp_definition->has_parameters) {
1876 /* check if next token is a '(' */
1877 whitespace_info_t old_info = info;
1878 token_kind_t next_token = peek_expansion();
1879 if (next_token == T_EOF) {
1880 info.at_line_begin = false;
1881 info.had_whitespace = false;
1883 if (input.c == '(') {
1888 if (next_token == '(') {
1889 if (current_expansion == NULL)
1890 expansion_pos = pp_token.base.source_position;
1891 next_preprocessing_token();
1892 assert(pp_token.kind == '(');
1894 pp_definition->parent_expansion = current_expansion;
1895 current_call = pp_definition;
1896 current_call->expand_pos = 0;
1897 current_call->expand_info = old_info;
1898 if (current_call->n_parameters > 0) {
1899 current_argument = ¤t_call->parameters[0];
1900 assert(argument_brace_count == 0);
1904 /* skip_whitespaces() skipped newlines and whitespace,
1905 * remember results for next token */
1911 if (current_expansion == NULL)
1912 expansion_pos = pp_token.base.source_position;
1913 start_expanding(pp_definition);
1917 } else if (kind == T_MACRO_PARAMETER) {
1918 assert(current_expansion != NULL);
1919 start_expanding(pp_token.macro_parameter.def);
1924 if (current_call != NULL) {
1925 /* current_call != NULL */
1927 ++argument_brace_count;
1928 } else if (kind == ')') {
1929 if (argument_brace_count > 0) {
1930 --argument_brace_count;
1932 finish_current_argument();
1933 assert(kind == ')');
1934 start_expanding(current_call);
1935 info = current_call->expand_info;
1936 current_call = NULL;
1937 current_argument = NULL;
1940 } else if (kind == ',' && argument_brace_count == 0) {
1941 finish_current_argument();
1942 current_call->expand_pos++;
1943 if (current_call->expand_pos >= current_call->n_parameters) {
1944 errorf(&pp_token.base.source_position,
1945 "too many arguments passed for macro '%Y'",
1946 current_call->symbol);
1947 current_argument = NULL;
1950 = ¤t_call->parameters[current_call->expand_pos];
1953 } else if (kind == T_MACRO_PARAMETER) {
1954 /* parameters have to be fully expanded before being used as
1955 * parameters for another macro-call */
1956 assert(current_expansion != NULL);
1957 pp_definition_t *argument = pp_token.macro_parameter.def;
1958 argument_expanding = argument;
1959 start_expanding(argument);
1961 } else if (kind == T_EOF) {
1962 errorf(&expansion_pos,
1963 "reached end of file while parsing arguments for '%Y'",
1964 current_call->symbol);
1967 if (current_argument != NULL) {
1968 saved_token_t saved;
1969 saved.token = pp_token;
1970 saved.had_whitespace = info.had_whitespace;
1971 obstack_grow(&pp_obstack, &saved, sizeof(saved));
1978 static void prepend_include_path(const char *path)
1980 searchpath_entry_t *entry = OALLOCZ(&config_obstack, searchpath_entry_t);
1982 entry->next = searchpath;
1986 static void setup_include_path(void)
1988 /* built-in paths */
1989 prepend_include_path("/usr/include");
1991 /* parse environment variable */
1992 const char *cpath = getenv("CPATH");
1993 if (cpath != NULL && *cpath != '\0') {
1994 const char *begin = cpath;
1998 while (*c != '\0' && *c != ':')
2001 size_t len = c-begin;
2003 /* for gcc compatibility (Matze: I would expect that
2004 * nothing happens for an empty entry...) */
2005 prepend_include_path(".");
2007 char *string = obstack_alloc(&config_obstack, len+1);
2008 memcpy(string, begin, len);
2011 prepend_include_path(string);
2018 } while(*c != '\0');
2022 int pptest_main(int argc, char **argv);
2023 int pptest_main(int argc, char **argv)
2025 init_symbol_table();
2029 obstack_init(&config_obstack);
2030 obstack_init(&pp_obstack);
2031 obstack_init(&input_obstack);
2032 strset_init(&stringset);
2034 error_on_unknown_chars = false;
2036 setup_include_path();
2038 /* simplistic commandline parser */
2039 const char *filename = NULL;
2040 const char *output = NULL;
2041 for (int i = 1; i < argc; ++i) {
2042 const char *opt = argv[i];
2043 if (streq(opt, "-I")) {
2044 prepend_include_path(argv[++i]);
2046 } else if (streq(opt, "-E")) {
2048 } else if (streq(opt, "-o")) {
2051 } else if (opt[0] == '-') {
2052 fprintf(stderr, "Unknown option '%s'\n", opt);
2054 if (filename != NULL)
2055 fprintf(stderr, "Multiple inputs not supported\n");
2059 if (filename == NULL) {
2060 fprintf(stderr, "No input specified\n");
2064 if (output == NULL) {
2067 out = fopen(output, "w");
2069 fprintf(stderr, "Couldn't open output '%s'\n", output);
2074 /* just here for gcc compatibility */
2075 fprintf(out, "# 1 \"%s\"\n", filename);
2076 fprintf(out, "# 1 \"<built-in>\"\n");
2077 fprintf(out, "# 1 \"<command-line>\"\n");
2079 FILE *file = fopen(filename, "r");
2081 fprintf(stderr, "Couldn't open input '%s'\n", filename);
2084 switch_input(file, filename);
2087 next_preprocessing_token();
2088 if (pp_token.kind == T_EOF)
2094 check_unclosed_conditionals();
2099 obstack_free(&input_obstack, NULL);
2100 obstack_free(&pp_obstack, NULL);
2101 obstack_free(&config_obstack, NULL);
2103 strset_destroy(&stringset);
2106 exit_symbol_table();