//#define DEBUG_CHARS
#define MAX_PUTBACK 3
+#define INCLUDE_LIMIT 199 /* 199 is for gcc "compatibility" */
+
+struct pp_argument_t {
+ size_t list_len;
+ token_t *token_list;
+};
struct pp_definition_t {
symbol_t *symbol;
source_position_t source_position;
pp_definition_t *parent_expansion;
size_t expand_pos;
- bool is_variadic : 1;
- bool is_expanding : 1;
- size_t argument_count;
- token_t *arguments;
+ bool is_variadic : 1;
+ bool is_expanding : 1;
+ bool has_parameters : 1;
+ size_t n_parameters;
+ symbol_t *parameters;
+
+ /* replacement */
size_t list_len;
- token_t *replacement_list;
+ token_t *token_list;
+
+};
+
+typedef struct pp_conditional_t pp_conditional_t;
+struct pp_conditional_t {
+ source_position_t source_position;
+ bool condition;
+ bool in_else;
+ bool skip; /**< conditional in skip mode (then+else gets skipped) */
+ pp_conditional_t *parent;
};
-static int c;
+typedef struct pp_input_t pp_input_t;
+struct pp_input_t {
+ FILE *file;
+ int c;
+ char buf[1024+MAX_PUTBACK];
+ const char *bufend;
+ const char *bufpos;
+ source_position_t position;
+ bool had_non_space;
+ pp_input_t *parent;
+};
+
+pp_input_t input;
+#define CC input.c
+
+static pp_input_t *input_stack;
+static unsigned n_inputs;
+static struct obstack input_obstack;
+
+static pp_conditional_t *conditional_stack;
+
token_t pp_token;
-static FILE *input;
-static char buf[1024 + MAX_PUTBACK];
-static const char *bufend;
-static const char *bufpos;
static bool resolve_escape_sequences = false;
-static bool print_spaces = true;
+static bool do_print_spaces = true;
+static bool do_expansions;
+static bool skip_mode;
static FILE *out;
static struct obstack pp_obstack;
static unsigned counted_newlines;
static unsigned counted_spaces;
-static source_position_t input_position;
static const char *printed_input_name = NULL;
static pp_definition_t *current_expansion = NULL;
-static bool do_expansions;
+static inline void next_char(void);
static void next_preprocessing_token(void);
+static void print_line_directive(const source_position_t *pos, const char *add);
+static void print_spaces(void);
+
+static bool open_input(const char *filename)
+{
+ FILE *file = fopen(filename, "r");
+ if (file == NULL)
+ return false;
+
+ input.file = file;
+ input.bufend = NULL;
+ input.bufpos = NULL;
+ input.had_non_space = false;
+ input.position.input_name = filename;
+ input.position.linenr = 1;
+
+ /* indicate that we're at a new input */
+ print_line_directive(&input.position, input_stack != NULL ? "1" : NULL);
+
+ counted_newlines = 0;
+ counted_spaces = 0;
+
+ /* read first char and first token */
+ next_char();
+ next_preprocessing_token();
+
+ return true;
+}
+
+static void close_input(void)
+{
+ /* ensure we have a newline at EOF */
+ if (input.had_non_space) {
+ fputc('\n', out);
+ }
+
+ assert(input.file != NULL);
+
+ fclose(input.file);
+ input.file = NULL;
+ input.bufend = NULL;
+ input.bufpos = NULL;
+ input.c = EOF;
+}
+
+static void push_input(void)
+{
+ pp_input_t *saved_input
+ = obstack_alloc(&input_obstack, sizeof(*saved_input));
+
+ memcpy(saved_input, &input, sizeof(*saved_input));
+
+ /* adjust buffer positions */
+ if (input.bufpos != NULL)
+ saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf);
+ if (input.bufend != NULL)
+ saved_input->bufend = saved_input->buf + (input.bufend - input.buf);
+
+ saved_input->parent = input_stack;
+ input_stack = saved_input;
+ ++n_inputs;
+}
+
+static void pop_restore_input(void)
+{
+ assert(n_inputs > 0);
+ assert(input_stack != NULL);
+
+ pp_input_t *saved_input = input_stack;
+
+ memcpy(&input, saved_input, sizeof(input));
+ input.parent = NULL;
+
+ /* adjust buffer positions */
+ if (saved_input->bufpos != NULL)
+ input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf);
+ if (saved_input->bufend != NULL)
+ input.bufend = input.buf + (saved_input->bufend - saved_input->buf);
+
+ input_stack = saved_input->parent;
+ obstack_free(&input_obstack, saved_input);
+ --n_inputs;
+}
/**
* Prints a parse error message at the current token.
static inline void next_real_char(void)
{
- assert(bufpos <= bufend);
- if (bufpos >= bufend) {
- size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
- input);
- if(s == 0) {
- c = EOF;
+ assert(input.bufpos <= input.bufend);
+ if (input.bufpos >= input.bufend) {
+ size_t s = fread(input.buf + MAX_PUTBACK, 1,
+ sizeof(input.buf) - MAX_PUTBACK, input.file);
+ if (s == 0) {
+ CC = EOF;
return;
}
- bufpos = buf + MAX_PUTBACK;
- bufend = buf + MAX_PUTBACK + s;
+ input.bufpos = input.buf + MAX_PUTBACK;
+ input.bufend = input.buf + MAX_PUTBACK + s;
}
- c = *bufpos++;
+ CC = *input.bufpos++;
}
/**
*/
static inline void put_back(int pc)
{
- assert(bufpos > buf);
- *(--bufpos - buf + buf) = (char) pc;
+ assert(input.bufpos > input.buf);
+ *(--input.bufpos - input.buf + input.buf) = (char) pc;
#ifdef DEBUG_CHARS
printf("putback '%c'\n", pc);
#endif
}
-static inline void next_char(void);
-
#define MATCH_NEWLINE(code) \
case '\r': \
next_char(); \
- if(c == '\n') { \
+ if(CC == '\n') { \
next_char(); \
} \
- ++input_position.linenr; \
+ ++input.position.linenr; \
code \
case '\n': \
next_char(); \
- ++input_position.linenr; \
+ ++input.position.linenr; \
code
-#define eat(c_type) do { assert(c == c_type); next_char(); } while(0)
+#define eat(c_type) do { assert(CC == c_type); next_char(); } while(0)
static void maybe_concat_lines(void)
{
eat('\\');
- switch(c) {
+ switch(CC) {
MATCH_NEWLINE(return;)
default:
break;
}
- put_back(c);
- c = '\\';
+ put_back(CC);
+ CC = '\\';
}
/**
next_real_char();
/* filter trigraphs and concatenated lines */
- if(UNLIKELY(c == '\\')) {
+ if(UNLIKELY(CC == '\\')) {
maybe_concat_lines();
goto end_of_next_char;
}
- if(LIKELY(c != '?'))
+ if(LIKELY(CC != '?'))
goto end_of_next_char;
next_real_char();
- if(LIKELY(c != '?')) {
- put_back(c);
- c = '?';
+ if(LIKELY(CC != '?')) {
+ put_back(CC);
+ CC = '?';
goto end_of_next_char;
}
next_real_char();
- switch(c) {
- case '=': c = '#'; break;
- case '(': c = '['; break;
- case '/': c = '\\'; maybe_concat_lines(); break;
- case ')': c = ']'; break;
- case '\'': c = '^'; break;
- case '<': c = '{'; break;
- case '!': c = '|'; break;
- case '>': c = '}'; break;
- case '-': c = '~'; break;
+ switch(CC) {
+ case '=': CC = '#'; break;
+ case '(': CC = '['; break;
+ case '/': CC = '\\'; maybe_concat_lines(); break;
+ case ')': CC = ']'; break;
+ case '\'': CC = '^'; break;
+ case '<': CC = '{'; break;
+ case '!': CC = '|'; break;
+ case '>': CC = '}'; break;
+ case '-': CC = '~'; break;
default:
- put_back(c);
+ put_back(CC);
put_back('?');
- c = '?';
+ CC = '?';
break;
}
end_of_next_char:;
#ifdef DEBUG_CHARS
- printf("nchar '%c'\n", c);
+ printf("nchar '%c'\n", CC);
#endif
}
{
assert(is_octal_digit(first_digit));
int value = digit_value(first_digit);
- if (!is_octal_digit(c)) return value;
- value = 8 * value + digit_value(c);
+ if (!is_octal_digit(CC)) return value;
+ value = 8 * value + digit_value(CC);
next_char();
- if (!is_octal_digit(c)) return value;
- value = 8 * value + digit_value(c);
+ if (!is_octal_digit(CC)) return value;
+ value = 8 * value + digit_value(CC);
next_char();
if(char_is_signed) {
static int parse_hex_sequence(void)
{
int value = 0;
- while(isxdigit(c)) {
- value = 16 * value + digit_value(c);
+ while(isxdigit(CC)) {
+ value = 16 * value + digit_value(CC);
next_char();
}
{
eat('\\');
- int ec = c;
+ int ec = CC;
next_char();
switch(ec) {
static void parse_string_literal(void)
{
- const unsigned start_linenr = input_position.linenr;
+ const unsigned start_linenr = input.position.linenr;
eat('"');
int tc;
while(1) {
- switch(c) {
+ switch(CC) {
case '\\':
if(resolve_escape_sequences) {
tc = parse_escape_sequence();
obstack_1grow(&symbol_obstack, (char) tc);
} else {
- obstack_1grow(&symbol_obstack, (char) c);
+ obstack_1grow(&symbol_obstack, (char) CC);
next_char();
- obstack_1grow(&symbol_obstack, (char) c);
+ obstack_1grow(&symbol_obstack, (char) CC);
next_char();
}
break;
goto end_of_string;
default:
- obstack_1grow(&symbol_obstack, (char) c);
+ obstack_1grow(&symbol_obstack, (char) CC);
next_char();
break;
}
int found_char = 0;
while(1) {
- switch(c) {
+ switch(CC) {
case '\\':
found_char = parse_escape_sequence();
break;
"constant");
goto end_of_wide_char_constant;
} else {
- found_char = c;
+ found_char = CC;
next_char();
}
break;
static void parse_wide_string_literal(void)
{
- const unsigned start_linenr = input_position.linenr;
+ const unsigned start_linenr = input.position.linenr;
- assert(c == '"');
+ assert(CC == '"');
next_char();
while(1) {
- switch(c) {
+ switch(CC) {
case '\\': {
wchar_rep_t tc = parse_escape_sequence();
obstack_grow(&symbol_obstack, &tc, sizeof(tc));
goto end_of_string;
default: {
- wchar_rep_t tc = c;
+ wchar_rep_t tc = CC;
obstack_grow(&symbol_obstack, &tc, sizeof(tc));
next_char();
break;
static void parse_character_constant(void)
{
- const unsigned start_linenr = input_position.linenr;
+ const unsigned start_linenr = input.position.linenr;
eat('\'');
int tc;
while(1) {
- switch(c) {
+ switch(CC) {
case '\\':
tc = parse_escape_sequence();
obstack_1grow(&symbol_obstack, (char) tc);
goto end_of_char_constant;
default:
- obstack_1grow(&symbol_obstack, (char) c);
+ obstack_1grow(&symbol_obstack, (char) CC);
next_char();
break;
pp_definition_t *parent = definition->parent_expansion;
definition->parent_expansion = NULL;
definition->is_expanding = false;
+
+ /* it was the outermost expansion, parse normal pptoken */
if(parent == NULL) {
current_expansion = NULL;
next_preprocessing_token();
current_expansion = definition;
goto restart;
}
- pp_token = definition->replacement_list[definition->expand_pos];
+ pp_token = definition->token_list[definition->expand_pos];
++definition->expand_pos;
if(pp_token.type != TP_IDENTIFIER)
return;
+ /* if it was an identifier then we might need to expand again */
pp_definition_t *symbol_definition = pp_token.v.symbol->pp_definition;
if(symbol_definition != NULL && !symbol_definition->is_expanding) {
symbol_definition->parent_expansion = definition;
}
}
+static void skip_line_comment(void)
+{
+ if(do_print_spaces)
+ counted_spaces++;
+
+ while(1) {
+ switch(CC) {
+ case EOF:
+ return;
+
+ case '\n':
+ case '\r':
+ return;
+
+ default:
+ next_char();
+ break;
+ }
+ }
+}
+
+static void skip_multiline_comment(void)
+{
+ if(do_print_spaces)
+ counted_spaces++;
+
+ unsigned start_linenr = input.position.linenr;
+ while(1) {
+ switch(CC) {
+ case '/':
+ next_char();
+ if (CC == '*') {
+ /* TODO: nested comment, warn here */
+ }
+ break;
+ case '*':
+ next_char();
+ if(CC == '/') {
+ next_char();
+ return;
+ }
+ break;
+
+ MATCH_NEWLINE(
+ if(do_print_spaces) {
+ counted_newlines++;
+ counted_spaces = 0;
+ }
+ break;
+ )
+
+ case EOF: {
+ source_position_t source_position;
+ source_position.input_name = pp_token.source_position.input_name;
+ source_position.linenr = start_linenr;
+ errorf(&source_position, "at end of file while looking for comment end");
+ return;
+ }
+
+ default:
+ next_char();
+ break;
+ }
+ }
+}
+
+/* skip spaces advancing at the start of the next preprocessing token */
+static void skip_spaces(bool skip_newline)
+{
+ while (true) {
+ switch (CC) {
+ case ' ':
+ case '\t':
+ if(do_print_spaces)
+ counted_spaces++;
+ next_char();
+ continue;
+ case '/':
+ next_char();
+ if (CC == '/') {
+ next_char();
+ skip_line_comment();
+ continue;
+ } else if (CC == '*') {
+ next_char();
+ skip_multiline_comment();
+ continue;
+ } else {
+ put_back(CC);
+ CC = '/';
+ }
+ return;
+
+ case '\r':
+ if (!skip_newline)
+ return;
+
+ next_char();
+ if(CC == '\n') {
+ next_char();
+ }
+ ++input.position.linenr;
+ if (do_print_spaces)
+ ++counted_newlines;
+ continue;
+
+ case '\n':
+ if (!skip_newline)
+ return;
+
+ next_char();
+ ++input.position.linenr;
+ if (do_print_spaces)
+ ++counted_newlines;
+ continue;
+
+ default:
+ return;
+ }
+ }
+}
+
+static void eat_pp(int type)
+{
+ (void) type;
+ assert(pp_token.type == type);
+ next_preprocessing_token();
+}
+
static void parse_symbol(void)
{
- obstack_1grow(&symbol_obstack, (char) c);
+ obstack_1grow(&symbol_obstack, (char) CC);
next_char();
while(1) {
- switch(c) {
+ switch(CC) {
DIGITS
SYMBOL_CHARS
- obstack_1grow(&symbol_obstack, (char) c);
+ obstack_1grow(&symbol_obstack, (char) CC);
next_char();
break;
char *string = obstack_finish(&symbol_obstack);
/* might be a wide string or character constant ( L"string"/L'c' ) */
- if(c == '"' && string[0] == 'L' && string[1] == '\0') {
+ if (CC == '"' && string[0] == 'L' && string[1] == '\0') {
obstack_free(&symbol_obstack, string);
parse_wide_string_literal();
return;
- } else if(c == '\'' && string[0] == 'L' && string[1] == '\0') {
+ } else if (CC == '\'' && string[0] == 'L' && string[1] == '\0') {
obstack_free(&symbol_obstack, string);
parse_wide_character_constant();
return;
/* we can free the memory from symbol obstack if we already had an entry in
* the symbol table */
- if(symbol->string != string) {
+ if (symbol->string != string) {
obstack_free(&symbol_obstack, string);
}
+ if (!do_expansions)
+ return;
pp_definition_t *pp_definition = symbol->pp_definition;
- if(do_expansions && pp_definition != NULL) {
- pp_definition->expand_pos = 0;
- pp_definition->is_expanding = true,
- current_expansion = pp_definition;
- expand_next();
+ if (pp_definition == NULL)
+ return;
+
+ if (pp_definition->has_parameters) {
+ skip_spaces(true);
+ /* no opening brace -> no expansion */
+ if (CC != '(')
+ return;
+ next_preprocessing_token();
+ eat_pp('(');
+
+ /* parse arguments (TODO) */
+ while (pp_token.type != TP_EOF && pp_token.type != ')')
+ next_preprocessing_token();
+ next_preprocessing_token();
}
+
+ pp_definition->expand_pos = 0;
+ pp_definition->is_expanding = true,
+ current_expansion = pp_definition;
+ expand_next();
}
static void parse_number(void)
{
- obstack_1grow(&symbol_obstack, (char) c);
+ obstack_1grow(&symbol_obstack, (char) CC);
next_char();
while(1) {
- switch(c) {
+ switch(CC) {
case '.':
DIGITS
SYMBOL_CHARS_WITHOUT_E_P
- obstack_1grow(&symbol_obstack, (char) c);
+ obstack_1grow(&symbol_obstack, (char) CC);
next_char();
break;
case 'p':
case 'E':
case 'P':
- obstack_1grow(&symbol_obstack, (char) c);
+ obstack_1grow(&symbol_obstack, (char) CC);
next_char();
- if(c == '+' || c == '-') {
- obstack_1grow(&symbol_obstack, (char) c);
+ if(CC == '+' || CC == '-') {
+ obstack_1grow(&symbol_obstack, (char) CC);
next_char();
}
break;
pp_token.v.string.size = size;
}
-static void skip_multiline_comment(void)
-{
- unsigned start_linenr = input_position.linenr;
-
- while(1) {
- switch(c) {
- case '/':
- next_char();
- if (c == '*') {
- /* TODO: nested comment, warn here */
- }
- break;
- case '*':
- next_char();
- if(c == '/') {
- next_char();
- return;
- }
- break;
-
- MATCH_NEWLINE(
- if(print_spaces) {
- counted_newlines++;
- counted_spaces = 0;
- }
- break;
- )
-
- case EOF: {
- source_position_t source_position;
- source_position.input_name = pp_token.source_position.input_name;
- source_position.linenr = start_linenr;
- errorf(&source_position, "at end of file while looking for comment end");
- return;
- }
-
- default:
- next_char();
- break;
- }
- }
-}
-
-static void skip_line_comment(void)
-{
- while(1) {
- switch(c) {
- case EOF:
- return;
-
- case '\n':
- case '\r':
- return;
-
- default:
- next_char();
- break;
- }
- }
-}
-
#define MAYBE_PROLOG \
next_char(); \
while(1) { \
- switch(c) {
+ switch(CC) {
#define MAYBE(ch, set_type) \
case ch: \
return;
}
- pp_token.source_position = input_position;
+ pp_token.source_position = input.position;
restart:
- switch(c) {
+ switch(CC) {
case ' ':
case '\t':
- if(print_spaces)
+ if(do_print_spaces)
counted_spaces++;
next_char();
goto restart;
case '7':
case '8':
case '9':
- put_back(c);
- c = '.';
+ put_back(CC);
+ CC = '.';
parse_number();
return;
MAYBE_PROLOG
MAYBE('.', TP_DOTDOTDOT)
ELSE_CODE(
- put_back(c);
- c = '.';
+ put_back(CC);
+ CC = '.';
pp_token.type = '.';
return;
)
case '*':
next_char();
skip_multiline_comment();
- if(print_spaces)
- counted_spaces++;
goto restart;
case '/':
next_char();
skip_line_comment();
- if(print_spaces)
- counted_spaces++;
goto restart;
ELSE('/')
case '%':
MAYBE_PROLOG
MAYBE(':', TP_HASHHASH)
ELSE_CODE(
- put_back(c);
- c = '%';
+ put_back(CC);
+ CC = '%';
pp_token.type = '#';
return;
)
case ';':
case ',':
case '\\':
- pp_token.type = c;
+ pp_token.type = CC;
next_char();
return;
case EOF:
- pp_token.type = TP_EOF;
+ if (input_stack != NULL) {
+ close_input();
+ pop_restore_input();
+ counted_newlines = 0;
+ counted_spaces = 0;
+ /* hack to output correct line number */
+ print_line_directive(&input.position, "2");
+ next_preprocessing_token();
+ } else {
+ pp_token.type = TP_EOF;
+ }
return;
default:
next_char();
- errorf(&pp_token.source_position, "unknown character '%c' found\n", c);
+ errorf(&pp_token.source_position, "unknown character '%c' found\n", CC);
pp_token.type = TP_ERROR;
return;
}
fputc('"', out);
}
-static void print_line_directive(const source_position_t *pos)
+static void print_line_directive(const source_position_t *pos, const char *add)
{
fprintf(out, "# %d ", pos->linenr);
print_quoted_string(pos->input_name);
+ if (add != NULL) {
+ fputc(' ', out);
+ fputs(add, out);
+ }
fputc('\n', out);
printed_input_name = pos->input_name;
}
-static bool had_non_space = false;
+static void print_spaces(void)
+{
+ if (counted_newlines >= 9) {
+ if (input.had_non_space) {
+ fputc('\n', out);
+ }
+ print_line_directive(&pp_token.source_position, NULL);
+ counted_newlines = 0;
+ } else {
+ for (unsigned i = 0; i < counted_newlines; ++i)
+ fputc('\n', out);
+ counted_newlines = 0;
+ }
+ for (unsigned i = 0; i < counted_spaces; ++i)
+ fputc(' ', out);
+ counted_spaces = 0;
+}
static void emit_pp_token(void)
{
- if (printed_input_name != pp_token.source_position.input_name) {
- print_line_directive(&pp_token.source_position);
- } else if (pp_token.type != '\n') {
- if (counted_newlines >= 9) {
- if (had_non_space) {
- fputc('\n', out);
- }
- print_line_directive(&pp_token.source_position);
- counted_newlines = 0;
- } else {
- for (unsigned i = 0; i < counted_newlines; ++i)
- fputc('\n', out);
- counted_newlines = 0;
- }
- for (unsigned i = 0; i < counted_spaces; ++i)
- fputc(' ', out);
- counted_spaces = 0;
- had_non_space = true;
+ if (skip_mode)
+ return;
+
+ if (pp_token.type != '\n') {
+ print_spaces();
+ input.had_non_space = true;
}
switch(pp_token.type) {
}
}
-static void eat_pp(preprocessor_token_type_t type)
-{
- (void) type;
- assert(pp_token.type == type);
- next_preprocessing_token();
-}
-
static void eat_pp_directive(void)
{
while(pp_token.type != '\n' && pp_token.type != TP_EOF) {
return false;
size_t len = definition1->list_len;
- const token_t *t1 = definition1->replacement_list;
- const token_t *t2 = definition2->replacement_list;
+ const token_t *t1 = definition1->token_list;
+ const token_t *t2 = definition2->token_list;
for(size_t i = 0; i < len; ++i, ++t1, ++t2) {
if(!pp_tokens_equal(t1, t2))
return false;
static void parse_define_directive(void)
{
eat_pp(TP_define);
+ assert(obstack_object_size(&pp_obstack) == 0);
- if(pp_token.type != TP_IDENTIFIER) {
+ if (pp_token.type != TP_IDENTIFIER) {
errorf(&pp_token.source_position,
- "expected identifier after #define, got '%T'", &pp_token);
- eat_pp_directive();
- return;
+ "expected identifier after #define, got '%t'", &pp_token);
+ goto error_out;
}
symbol_t *symbol = pp_token.v.symbol;
pp_definition_t *new_definition
= obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
memset(new_definition, 0, sizeof(new_definition[0]));
- new_definition->source_position = input_position;
+ new_definition->source_position = input.position;
/* this is probably the only place where spaces are significant in the
* lexer (except for the fact that they separate tokens). #define b(x)
* is something else than #define b (x) */
- //token_t *arguments = NULL;
- if(c == '(') {
+ if (CC == '(') {
+ /* eat the '(' */
+ next_preprocessing_token();
+ /* get next token after '(' */
next_preprocessing_token();
- while(pp_token.type != ')') {
- if(pp_token.type == TP_DOTDOTDOT) {
+
+ while (true) {
+ switch (pp_token.type) {
+ case TP_DOTDOTDOT:
new_definition->is_variadic = true;
next_preprocessing_token();
- if(pp_token.type != ')') {
- errorf(&input_position,
+ if (pp_token.type != ')') {
+ errorf(&input.position,
"'...' not at end of macro argument list");
- continue;
+ goto error_out;
}
- } else if(pp_token.type != TP_IDENTIFIER) {
+ break;
+ case TP_IDENTIFIER:
+ obstack_ptr_grow(&pp_obstack, pp_token.v.symbol);
next_preprocessing_token();
+
+ if (pp_token.type == ',') {
+ next_preprocessing_token();
+ break;
+ }
+
+ if (pp_token.type != ')') {
+ errorf(&pp_token.source_position,
+ "expected ',' or ')' after identifier, got '%t'",
+ &pp_token);
+ goto error_out;
+ }
+ break;
+ case ')':
+ next_preprocessing_token();
+ goto finish_argument_list;
+ default:
+ errorf(&pp_token.source_position,
+ "expected identifier, '...' or ')' in #define argument list, got '%t'",
+ &pp_token);
+ goto error_out;
}
}
+
+ finish_argument_list:
+ new_definition->has_parameters = true;
+ new_definition->n_parameters
+ = obstack_object_size(&pp_obstack) / sizeof(new_definition->parameters[0]);
+ new_definition->parameters = obstack_finish(&pp_obstack);
} else {
next_preprocessing_token();
}
/* construct a new pp_definition on the obstack */
assert(obstack_object_size(&pp_obstack) == 0);
size_t list_len = 0;
- while(pp_token.type != '\n' && pp_token.type != TP_EOF) {
+ while (pp_token.type != '\n' && pp_token.type != TP_EOF) {
obstack_grow(&pp_obstack, &pp_token, sizeof(pp_token));
++list_len;
next_preprocessing_token();
}
- new_definition->list_len = list_len;
- new_definition->replacement_list = obstack_finish(&pp_obstack);
+ new_definition->list_len = list_len;
+ new_definition->token_list = obstack_finish(&pp_obstack);
pp_definition_t *old_definition = symbol->pp_definition;
- if(old_definition != NULL) {
- if(!pp_definitions_equal(old_definition, new_definition)) {
- warningf(&input_position, "multiple definition of macro '%Y' (first defined %P)",
+ if (old_definition != NULL) {
+ if (!pp_definitions_equal(old_definition, new_definition)) {
+ warningf(&input.position, "multiple definition of macro '%Y' (first defined %P)",
symbol, &old_definition->source_position);
} else {
/* reuse the old definition */
}
symbol->pp_definition = new_definition;
+ return;
+
+error_out:
+ if (obstack_object_size(&pp_obstack) > 0) {
+ char *ptr = obstack_finish(&pp_obstack);
+ obstack_free(&pp_obstack, ptr);
+ }
+ eat_pp_directive();
}
static void parse_undef_directive(void)
eat_pp(TP_undef);
if(pp_token.type != TP_IDENTIFIER) {
- errorf(&input_position,
- "expected identifier after #undef, got '%T'", &pp_token);
+ errorf(&input.position,
+ "expected identifier after #undef, got '%t'", &pp_token);
eat_pp_directive();
return;
}
next_preprocessing_token();
if(pp_token.type != '\n') {
- warningf(&input_position, "extra tokens at end of #undef directive");
+ warningf(&input.position, "extra tokens at end of #undef directive");
}
/* eat until '\n' */
eat_pp_directive();
}
-static void parse_preprocessing_directive(void)
+static const char *parse_headername(void)
{
- print_spaces = false;
- do_expansions = false;
- eat_pp('#');
+ /* behind an #include we can have the special headername lexems.
+ * They're only allowed behind an #include so they're not recognized
+ * by the normal next_preprocessing_token. We handle them as a special
+ * exception here */
+
+ /* skip spaces so we reach start of next preprocessing token */
+ skip_spaces(false);
+
+ assert(obstack_object_size(&input_obstack) == 0);
+
+ /* check wether we have a "... or <... headername */
+ switch (CC) {
+ case '<':
+ /* for now until we have proper searchpath handling */
+ obstack_1grow(&input_obstack, '.');
+ obstack_1grow(&input_obstack, '/');
+
+ next_char();
+ while (true) {
+ switch (CC) {
+ case EOF:
+ /* fallthrough */
+ MATCH_NEWLINE(
+ parse_error("header name without closing '>'");
+ return NULL;
+ )
+ case '>':
+ next_char();
+ goto finished_headername;
+ }
+ obstack_1grow(&input_obstack, (char) CC);
+ next_char();
+ }
+ /* we should never be here */
+
+ case '"':
+ /* for now until we have proper searchpath handling */
+ obstack_1grow(&input_obstack, '.');
+ obstack_1grow(&input_obstack, '/');
+
+ next_char();
+ while (true) {
+ switch (CC) {
+ case EOF:
+ /* fallthrough */
+ MATCH_NEWLINE(
+ parse_error("header name without closing '>'");
+ return NULL;
+ )
+ case '"':
+ next_char();
+ goto finished_headername;
+ }
+ obstack_1grow(&input_obstack, (char) CC);
+ next_char();
+ }
+ /* we should never be here */
- switch(pp_token.type) {
- case TP_define:
- parse_define_directive();
- break;
- case TP_undef:
- parse_undef_directive();
- break;
default:
+ /* TODO: do normale pp_token parsing and concatenate results */
+ panic("pp_token concat include not implemented yet");
+ }
+
+finished_headername:
+ obstack_1grow(&input_obstack, '\0');
+ char *headername = obstack_finish(&input_obstack);
+
+ /* TODO: iterate search-path to find the file */
+
+ next_preprocessing_token();
+
+ return headername;
+}
+
+static bool parse_include_directive(void)
+{
+ /* don't eat the TP_include here!
+ * we need an alternative parsing for the next token */
+
+ print_spaces();
+
+ const char *headername = parse_headername();
+ if (headername == NULL) {
+ eat_pp_directive();
+ return false;
+ }
+
+ if (pp_token.type != '\n' && pp_token.type != TP_EOF) {
+ warningf(&pp_token.source_position,
+ "extra tokens at end of #include directive");
+ eat_pp_directive();
+ }
+
+ if (n_inputs > INCLUDE_LIMIT) {
+ errorf(&pp_token.source_position, "#include nested too deeply");
+ /* eat \n or EOF */
+ next_preprocessing_token();
+ return false;
+ }
+
+ /* we have to reenable space counting and macro expansion here,
+ * because it is still disabled in directive parsing,
+ * but we will trigger a preprocessing token reading of the new file
+ * now and need expansions/space counting */
+ do_print_spaces = true;
+ do_expansions = true;
+
+ /* switch inputs */
+ push_input();
+ bool res = open_input(headername);
+ if (!res) {
errorf(&pp_token.source_position,
- "invalid preprocessing directive #%T", &pp_token);
+ "failed including '%s': %s", headername, strerror(errno));
+ pop_restore_input();
+ return false;
+ }
+
+ return true;
+}
+
+static pp_conditional_t *push_conditional(void)
+{
+ pp_conditional_t *conditional
+ = obstack_alloc(&pp_obstack, sizeof(*conditional));
+ memset(conditional, 0, sizeof(*conditional));
+
+ conditional->parent = conditional_stack;
+ conditional_stack = conditional;
+
+ return conditional;
+}
+
+static void pop_conditional(void)
+{
+ assert(conditional_stack != NULL);
+ conditional_stack = conditional_stack->parent;
+}
+
+static void check_unclosed_conditionals(void)
+{
+ while (conditional_stack != NULL) {
+ pp_conditional_t *conditional = conditional_stack;
+
+ if (conditional->in_else) {
+ errorf(&conditional->source_position, "unterminated #else");
+ } else {
+ errorf(&conditional->source_position, "unterminated condition");
+ }
+ pop_conditional();
+ }
+}
+
+static void parse_ifdef_ifndef_directive(void)
+{
+ bool is_ifndef = (pp_token.type == TP_ifndef);
+ bool condition;
+ next_preprocessing_token();
+
+ if (skip_mode) {
eat_pp_directive();
- break;
+ pp_conditional_t *conditional = push_conditional();
+ conditional->source_position = pp_token.source_position;
+ conditional->skip = true;
+ return;
+ }
+
+ if (pp_token.type != TP_IDENTIFIER) {
+ errorf(&pp_token.source_position,
+ "expected identifier after #%s, got '%t'",
+ is_ifndef ? "ifndef" : "ifdef", &pp_token);
+ eat_pp_directive();
+
+ /* just take the true case in the hope to avoid further errors */
+ condition = true;
+ } else {
+ symbol_t *symbol = pp_token.v.symbol;
+ pp_definition_t *pp_definition = symbol->pp_definition;
+ next_preprocessing_token();
+
+ if (pp_token.type != '\n') {
+ errorf(&pp_token.source_position,
+ "extra tokens at end of #%s",
+ is_ifndef ? "ifndef" : "ifdef");
+ eat_pp_directive();
+ }
+
+ /* evaluate wether we are in true or false case */
+ condition = is_ifndef ? pp_definition == NULL : pp_definition != NULL;
+ }
+
+ pp_conditional_t *conditional = push_conditional();
+ conditional->source_position = pp_token.source_position;
+ conditional->condition = condition;
+
+ if (!condition) {
+ skip_mode = true;
+ }
+}
+
+static void parse_else_directive(void)
+{
+ eat_pp(TP_else);
+
+ if (pp_token.type != '\n') {
+ if (!skip_mode) {
+ warningf(&pp_token.source_position, "extra tokens at end of #else");
+ }
+ eat_pp_directive();
+ }
+
+ pp_conditional_t *conditional = conditional_stack;
+ if (conditional == NULL) {
+ errorf(&pp_token.source_position, "#else without prior #if");
+ return;
+ }
+
+ if (conditional->in_else) {
+ errorf(&pp_token.source_position,
+ "#else after #else (condition started %P)",
+ conditional->source_position);
+ skip_mode = true;
+ return;
+ }
+
+ conditional->in_else = true;
+ if (!conditional->skip) {
+ skip_mode = conditional->condition;
+ }
+ conditional->source_position = pp_token.source_position;
+}
+
+static void parse_endif_directive(void)
+{
+ eat_pp(TP_endif);
+
+ if (pp_token.type != '\n') {
+ if (!skip_mode) {
+ warningf(&pp_token.source_position,
+ "extra tokens at end of #endif");
+ }
+ eat_pp_directive();
+ }
+
+ pp_conditional_t *conditional = conditional_stack;
+ if (conditional == NULL) {
+ errorf(&pp_token.source_position, "#endif without prior #if");
+ return;
+ }
+
+ if (!conditional->skip) {
+ skip_mode = false;
+ }
+ pop_conditional();
+}
+
+static void parse_preprocessing_directive(void)
+{
+ do_print_spaces = false;
+ do_expansions = false;
+ eat_pp('#');
+
+ if (skip_mode) {
+ switch(pp_token.type) {
+ case TP_ifdef:
+ case TP_ifndef:
+ parse_ifdef_ifndef_directive();
+ break;
+ case TP_else:
+ parse_else_directive();
+ break;
+ case TP_endif:
+ parse_endif_directive();
+ break;
+ default:
+ eat_pp_directive();
+ break;
+ }
+ } else {
+ switch(pp_token.type) {
+ case TP_define:
+ parse_define_directive();
+ break;
+ case TP_undef:
+ parse_undef_directive();
+ break;
+ case TP_ifdef:
+ case TP_ifndef:
+ parse_ifdef_ifndef_directive();
+ break;
+ case TP_else:
+ parse_else_directive();
+ break;
+ case TP_endif:
+ parse_endif_directive();
+ break;
+ case TP_include: {
+ bool in_new_source = parse_include_directive();
+ /* no need to do anything if source file switched */
+ if (in_new_source)
+ return;
+ break;
+ }
+ case '\n':
+ /* the nop directive */
+ break;
+ default:
+ errorf(&pp_token.source_position,
+ "invalid preprocessing directive #%t", &pp_token);
+ eat_pp_directive();
+ break;
+ }
}
- print_spaces = true;
- do_expansions = true;
+ do_print_spaces = true;
+ do_expansions = true;
/* eat '\n' */
assert(pp_token.type == '\n' || pp_token.type == TP_EOF);
next_preprocessing_token();
}
-int pptest_main(int argc, char **argv);
-
#define GCC_COMPAT_MODE
+int pptest_main(int argc, char **argv);
int pptest_main(int argc, char **argv)
{
init_symbol_table();
init_tokens();
obstack_init(&pp_obstack);
+ obstack_init(&input_obstack);
- const char *infname = "t.c";
+ const char *filename = "t.c";
if (argc > 1)
- infname = argv[1];
-
- input = fopen(infname, "r");
- assert(input != NULL);
- input_position.input_name = infname;
- input_position.linenr = 1;
-
- bufpos = NULL;
- bufend = NULL;
- counted_newlines = 0;
- counted_spaces = 0;
+ filename = argv[1];
out = stdout;
#ifdef GCC_COMPAT_MODE
/* this is here so we can directly compare "gcc -E" output and our output */
- fprintf(out, "# 1 \"%s\"\n", input_position.input_name);
+ fprintf(out, "# 1 \"%s\"\n", filename);
fputs("# 1 \"<built-in>\"\n", out);
fputs("# 1 \"<command-line>\"\n", out);
#endif
- next_char();
-
- next_preprocessing_token();
+ bool ok = open_input(filename);
+ assert(ok);
while(true) {
/* we're at a line begin */
}
end_of_main_loop:
- if (counted_newlines > 0) {
- fputc('\n', out);
- }
+ check_unclosed_conditionals();
+ close_input();
+ obstack_free(&input_obstack, NULL);
obstack_free(&pp_obstack, NULL);
exit_tokens();