#define MAX_PUTBACK 3
#define INCLUDE_LIMIT 199 /* 199 is for gcc "compatibility" */
+struct pp_argument_t {
+ size_t list_len;
+ token_t *token_list;
+};
+
struct pp_definition_t {
symbol_t *symbol;
source_position_t source_position;
pp_definition_t *parent_expansion;
size_t expand_pos;
- bool is_variadic : 1;
- bool is_expanding : 1;
- size_t argument_count;
- token_t *arguments;
+ bool is_variadic : 1;
+ bool is_expanding : 1;
+ bool has_parameters : 1;
+ size_t n_parameters;
+ symbol_t *parameters;
+
+ /* replacement */
size_t list_len;
- token_t *replacement_list;
+ token_t *token_list;
+
};
typedef struct pp_conditional_t pp_conditional_t;
pp_input_t *parent;
};
-pp_input_t input;
+static pp_input_t input;
#define CC input.c
static pp_input_t *input_stack;
static pp_conditional_t *conditional_stack;
-token_t pp_token;
+static token_t pp_token;
static bool resolve_escape_sequences = false;
static bool do_print_spaces = true;
static bool do_expansions;
static inline void next_char(void);
static void next_preprocessing_token(void);
static void print_line_directive(const source_position_t *pos, const char *add);
-static void print_spaces(void);
static bool open_input(const char *filename)
{
input.bufpos = NULL;
input.had_non_space = false;
input.position.input_name = filename;
- input.position.linenr = 1;
+ input.position.lineno = 1;
/* indicate that we're at a new input */
print_line_directive(&input.position, input_stack != NULL ? "1" : NULL);
if (input.bufpos >= input.bufend) {
size_t s = fread(input.buf + MAX_PUTBACK, 1,
sizeof(input.buf) - MAX_PUTBACK, input.file);
- if(s == 0) {
+ if (s == 0) {
CC = EOF;
return;
}
if(CC == '\n') { \
next_char(); \
} \
- ++input.position.linenr; \
+ ++input.position.lineno; \
code \
case '\n': \
next_char(); \
- ++input.position.linenr; \
+ ++input.position.lineno; \
code
-#define eat(c_type) do { assert(CC == c_type); next_char(); } while(0)
+#define eat(c_type) (assert(CC == c_type), next_char())
static void maybe_concat_lines(void)
{
* Returns the value of a digit.
* The only portable way to do it ...
*/
-static int digit_value(int digit) {
+static int digit_value(int digit)
+{
switch (digit) {
case '0': return 0;
case '1': return 1;
static void parse_string_literal(void)
{
- const unsigned start_linenr = input.position.linenr;
+ const unsigned start_linenr = input.position.lineno;
eat('"');
case EOF: {
source_position_t source_position;
source_position.input_name = pp_token.source_position.input_name;
- source_position.linenr = start_linenr;
+ source_position.lineno = start_linenr;
errorf(&source_position, "string has no end");
pp_token.type = TP_ERROR;
return;
const char *const result = string;
#endif
- pp_token.type = TP_STRING_LITERAL;
- pp_token.v.string.begin = result;
- pp_token.v.string.size = size;
+ pp_token.type = TP_STRING_LITERAL;
+ pp_token.literal.begin = result;
+ pp_token.literal.size = size;
}
static void parse_wide_character_constant(void)
/* TODO... */
}
-static void parse_wide_string_literal(void)
-{
- const unsigned start_linenr = input.position.linenr;
-
- assert(CC == '"');
- next_char();
-
- while(1) {
- switch(CC) {
- case '\\': {
- wchar_rep_t tc = parse_escape_sequence();
- obstack_grow(&symbol_obstack, &tc, sizeof(tc));
- break;
- }
-
- case EOF: {
- source_position_t source_position;
- source_position.input_name = pp_token.source_position.input_name;
- source_position.linenr = start_linenr;
- errorf(&source_position, "string has no end");
- pp_token.type = TP_ERROR;
- return;
- }
-
- case '"':
- next_char();
- goto end_of_string;
-
- default: {
- wchar_rep_t tc = CC;
- obstack_grow(&symbol_obstack, &tc, sizeof(tc));
- next_char();
- break;
- }
- }
- }
-
-end_of_string:;
- /* add finishing 0 to the string */
- static const wchar_rep_t nul = L'\0';
- obstack_grow(&symbol_obstack, &nul, sizeof(nul));
-
- const size_t size
- = (size_t)obstack_object_size(&symbol_obstack) / sizeof(wchar_rep_t);
- const wchar_rep_t *const string = obstack_finish(&symbol_obstack);
-
-#if 0 /* TODO hash */
- /* check if there is already a copy of the string */
- const wchar_rep_t *const result = strset_insert(&stringset, string);
- if(result != string) {
- obstack_free(&symbol_obstack, string);
- }
-#else
- const wchar_rep_t *const result = string;
-#endif
-
- pp_token.type = TP_WIDE_STRING_LITERAL;
- pp_token.v.wide_string.begin = result;
- pp_token.v.wide_string.size = size;
-}
-
static void parse_character_constant(void)
{
- const unsigned start_linenr = input.position.linenr;
+ const unsigned start_linenr = input.position.lineno;
eat('\'');
case EOF: {
source_position_t source_position;
source_position.input_name = pp_token.source_position.input_name;
- source_position.linenr = start_linenr;
+ source_position.lineno = start_linenr;
errorf(&source_position, "EOF while parsing character constant");
pp_token.type = TP_ERROR;
return;
const size_t size = (size_t)obstack_object_size(&symbol_obstack);
const char *const string = obstack_finish(&symbol_obstack);
- pp_token.type = TP_CHARACTER_CONSTANT;
- pp_token.v.string.begin = string;
- pp_token.v.string.size = size;
+ pp_token.type = TP_CHARACTER_CONSTANT;
+ pp_token.literal.begin = string;
+ pp_token.literal.size = size;
}
#define SYMBOL_CHARS_WITHOUT_E_P \
current_expansion = definition;
goto restart;
}
- pp_token = definition->replacement_list[definition->expand_pos];
+ pp_token = definition->token_list[definition->expand_pos];
++definition->expand_pos;
if(pp_token.type != TP_IDENTIFIER)
return;
/* if it was an identifier then we might need to expand again */
- pp_definition_t *symbol_definition = pp_token.v.symbol->pp_definition;
+ pp_definition_t *symbol_definition = pp_token.symbol->pp_definition;
if(symbol_definition != NULL && !symbol_definition->is_expanding) {
symbol_definition->parent_expansion = definition;
symbol_definition->expand_pos = 0;
}
}
+static void skip_line_comment(void)
+{
+ if(do_print_spaces)
+ counted_spaces++;
+
+ while(1) {
+ switch(CC) {
+ case EOF:
+ return;
+
+ case '\n':
+ case '\r':
+ return;
+
+ default:
+ next_char();
+ break;
+ }
+ }
+}
+
+static void skip_multiline_comment(void)
+{
+ if(do_print_spaces)
+ counted_spaces++;
+
+ unsigned start_linenr = input.position.lineno;
+ while(1) {
+ switch(CC) {
+ case '/':
+ next_char();
+ if (CC == '*') {
+ /* TODO: nested comment, warn here */
+ }
+ break;
+ case '*':
+ next_char();
+ if(CC == '/') {
+ next_char();
+ return;
+ }
+ break;
+
+ MATCH_NEWLINE(
+ if(do_print_spaces) {
+ counted_newlines++;
+ counted_spaces = 0;
+ }
+ break;
+ )
+
+ case EOF: {
+ source_position_t source_position;
+ source_position.input_name = pp_token.source_position.input_name;
+ source_position.lineno = start_linenr;
+ errorf(&source_position, "at end of file while looking for comment end");
+ return;
+ }
+
+ default:
+ next_char();
+ break;
+ }
+ }
+}
+
+/* skip spaces advancing at the start of the next preprocessing token */
+static void skip_spaces(bool skip_newline)
+{
+ while (true) {
+ switch (CC) {
+ case ' ':
+ case '\t':
+ if (do_print_spaces)
+ counted_spaces++;
+ next_char();
+ continue;
+ case '/':
+ next_char();
+ if (CC == '/') {
+ next_char();
+ skip_line_comment();
+ continue;
+ } else if (CC == '*') {
+ next_char();
+ skip_multiline_comment();
+ continue;
+ } else {
+ put_back(CC);
+ CC = '/';
+ }
+ return;
+
+ case '\r':
+ if (!skip_newline)
+ return;
+
+ next_char();
+ if(CC == '\n') {
+ next_char();
+ }
+ ++input.position.lineno;
+ if (do_print_spaces)
+ ++counted_newlines;
+ continue;
+
+ case '\n':
+ if (!skip_newline)
+ return;
+
+ next_char();
+ ++input.position.lineno;
+ if (do_print_spaces)
+ ++counted_newlines;
+ continue;
+
+ default:
+ return;
+ }
+ }
+}
+
+static void eat_pp(int type)
+{
+ (void) type;
+ assert(pp_token.type == type);
+ next_preprocessing_token();
+}
+
static void parse_symbol(void)
{
obstack_1grow(&symbol_obstack, (char) CC);
char *string = obstack_finish(&symbol_obstack);
/* might be a wide string or character constant ( L"string"/L'c' ) */
- if(CC == '"' && string[0] == 'L' && string[1] == '\0') {
+ if (CC == '"' && string[0] == 'L' && string[1] == '\0') {
obstack_free(&symbol_obstack, string);
- parse_wide_string_literal();
+ /* TODO */
return;
- } else if(CC == '\'' && string[0] == 'L' && string[1] == '\0') {
+ } else if (CC == '\'' && string[0] == 'L' && string[1] == '\0') {
obstack_free(&symbol_obstack, string);
parse_wide_character_constant();
return;
symbol_t *symbol = symbol_table_insert(string);
- pp_token.type = symbol->pp_ID;
- pp_token.v.symbol = symbol;
+ pp_token.type = symbol->pp_ID;
+ pp_token.symbol = symbol;
/* we can free the memory from symbol obstack if we already had an entry in
* the symbol table */
- if(symbol->string != string) {
+ if (symbol->string != string) {
obstack_free(&symbol_obstack, string);
}
+ if (!do_expansions)
+ return;
pp_definition_t *pp_definition = symbol->pp_definition;
- if(do_expansions && pp_definition != NULL) {
- pp_definition->expand_pos = 0;
- pp_definition->is_expanding = true,
- current_expansion = pp_definition;
- expand_next();
+ if (pp_definition == NULL)
+ return;
+
+ if (pp_definition->has_parameters) {
+ skip_spaces(true);
+ /* no opening brace -> no expansion */
+ if (CC != '(')
+ return;
+ next_preprocessing_token();
+ eat_pp('(');
+
+ /* parse arguments (TODO) */
+ while (pp_token.type != TP_EOF && pp_token.type != ')')
+ next_preprocessing_token();
+ next_preprocessing_token();
}
+
+ pp_definition->expand_pos = 0;
+ pp_definition->is_expanding = true,
+ current_expansion = pp_definition;
+ expand_next();
}
static void parse_number(void)
size_t size = obstack_object_size(&symbol_obstack);
char *string = obstack_finish(&symbol_obstack);
- pp_token.type = TP_NUMBER;
- pp_token.v.string.begin = string;
- pp_token.v.string.size = size;
-}
-
-static void skip_multiline_comment(void)
-{
- unsigned start_linenr = input.position.linenr;
-
- while(1) {
- switch(CC) {
- case '/':
- next_char();
- if (CC == '*') {
- /* TODO: nested comment, warn here */
- }
- break;
- case '*':
- next_char();
- if(CC == '/') {
- next_char();
- return;
- }
- break;
-
- MATCH_NEWLINE(
- if(do_print_spaces) {
- counted_newlines++;
- counted_spaces = 0;
- }
- break;
- )
-
- case EOF: {
- source_position_t source_position;
- source_position.input_name = pp_token.source_position.input_name;
- source_position.linenr = start_linenr;
- errorf(&source_position, "at end of file while looking for comment end");
- return;
- }
-
- default:
- next_char();
- break;
- }
- }
-}
-
-static void skip_line_comment(void)
-{
- while(1) {
- switch(CC) {
- case EOF:
- return;
-
- case '\n':
- case '\r':
- return;
-
- default:
- next_char();
- break;
- }
- }
+ pp_token.type = TP_NUMBER;
+ pp_token.literal.begin = string;
+ pp_token.literal.size = size;
}
-
#define MAYBE_PROLOG \
next_char(); \
while(1) { \
#define ELSE_CODE(code) \
default: \
- code; \
+ code \
+ return; \
} \
} /* end of while(1) */ \
- break;
#define ELSE(set_type) \
ELSE_CODE( \
pp_token.type = set_type; \
- return; \
)
static void next_preprocessing_token(void)
switch(CC) {
case ' ':
case '\t':
- if(do_print_spaces)
+ if (do_print_spaces)
counted_spaces++;
- next_char();
+ next_char();
goto restart;
MATCH_NEWLINE(
put_back(CC);
CC = '.';
pp_token.type = '.';
- return;
)
ELSE('.')
case '&':
case '*':
next_char();
skip_multiline_comment();
- if(do_print_spaces)
- counted_spaces++;
goto restart;
case '/':
next_char();
skip_line_comment();
- if(do_print_spaces)
- counted_spaces++;
goto restart;
ELSE('/')
case '%':
put_back(CC);
CC = '%';
pp_token.type = '#';
- return;
)
ELSE('#')
ELSE('%')
case '\?': fputs("\\?", out); break;
default:
if(!isprint(*c)) {
- fprintf(out, "\\%03o", *c);
+ fprintf(out, "\\%03o", (unsigned)*c);
break;
}
fputc(*c, out);
static void print_line_directive(const source_position_t *pos, const char *add)
{
- fprintf(out, "# %d ", pos->linenr);
+ fprintf(out, "# %u ", pos->lineno);
print_quoted_string(pos->input_name);
if (add != NULL) {
fputc(' ', out);
switch(pp_token.type) {
case TP_IDENTIFIER:
- fputs(pp_token.v.symbol->string, out);
+ fputs(pp_token.symbol->string, out);
break;
case TP_NUMBER:
- fputs(pp_token.v.string.begin, out);
+ fputs(pp_token.literal.begin, out);
break;
case TP_STRING_LITERAL:
fputc('"', out);
- fputs(pp_token.v.string.begin, out);
+ fputs(pp_token.literal.begin, out);
fputc('"', out);
break;
case '\n':
}
}
-static void eat_pp(preprocessor_token_type_t type)
-{
- (void) type;
- assert(pp_token.type == type);
- next_preprocessing_token();
-}
-
static void eat_pp_directive(void)
{
while(pp_token.type != '\n' && pp_token.type != TP_EOF) {
return true;
}
-static bool wide_strings_equal(const wide_string_t *string1,
- const wide_string_t *string2)
-{
- size_t size = string1->size;
- if(size != string2->size)
- return false;
-
- const wchar_rep_t *c1 = string1->begin;
- const wchar_rep_t *c2 = string2->begin;
- for(size_t i = 0; i < size; ++i, ++c1, ++c2) {
- if(*c1 != *c2)
- return false;
- }
- return true;
-}
-
static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
{
if(token1->type != token2->type)
/* TODO */
return false;
case TP_IDENTIFIER:
- return token1->v.symbol == token2->v.symbol;
+ return token1->symbol == token2->symbol;
case TP_NUMBER:
case TP_CHARACTER_CONSTANT:
case TP_STRING_LITERAL:
- return strings_equal(&token1->v.string, &token2->v.string);
+ return strings_equal(&token1->literal, &token2->literal);
- case TP_WIDE_CHARACTER_CONSTANT:
- case TP_WIDE_STRING_LITERAL:
- return wide_strings_equal(&token1->v.wide_string,
- &token2->v.wide_string);
default:
return true;
}
return false;
size_t len = definition1->list_len;
- const token_t *t1 = definition1->replacement_list;
- const token_t *t2 = definition2->replacement_list;
+ const token_t *t1 = definition1->token_list;
+ const token_t *t2 = definition2->token_list;
for(size_t i = 0; i < len; ++i, ++t1, ++t2) {
if(!pp_tokens_equal(t1, t2))
return false;
static void parse_define_directive(void)
{
eat_pp(TP_define);
+ assert(obstack_object_size(&pp_obstack) == 0);
- if(pp_token.type != TP_IDENTIFIER) {
+ if (pp_token.type != TP_IDENTIFIER) {
errorf(&pp_token.source_position,
- "expected identifier after #define, got '%T'", &pp_token);
- eat_pp_directive();
- return;
+ "expected identifier after #define, got '%t'", &pp_token);
+ goto error_out;
}
- symbol_t *symbol = pp_token.v.symbol;
+ symbol_t *symbol = pp_token.symbol;
pp_definition_t *new_definition
= obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
/* this is probably the only place where spaces are significant in the
* lexer (except for the fact that they separate tokens). #define b(x)
* is something else than #define b (x) */
- //token_t *arguments = NULL;
- if(CC == '(') {
+ if (CC == '(') {
+ /* eat the '(' */
next_preprocessing_token();
- while(pp_token.type != ')') {
- if(pp_token.type == TP_DOTDOTDOT) {
+ /* get next token after '(' */
+ next_preprocessing_token();
+
+ while (true) {
+ switch (pp_token.type) {
+ case TP_DOTDOTDOT:
new_definition->is_variadic = true;
next_preprocessing_token();
- if(pp_token.type != ')') {
+ if (pp_token.type != ')') {
errorf(&input.position,
"'...' not at end of macro argument list");
- continue;
+ goto error_out;
}
- } else if(pp_token.type != TP_IDENTIFIER) {
+ break;
+ case TP_IDENTIFIER:
+ obstack_ptr_grow(&pp_obstack, pp_token.symbol);
next_preprocessing_token();
+
+ if (pp_token.type == ',') {
+ next_preprocessing_token();
+ break;
+ }
+
+ if (pp_token.type != ')') {
+ errorf(&pp_token.source_position,
+ "expected ',' or ')' after identifier, got '%t'",
+ &pp_token);
+ goto error_out;
+ }
+ break;
+ case ')':
+ next_preprocessing_token();
+ goto finish_argument_list;
+ default:
+ errorf(&pp_token.source_position,
+ "expected identifier, '...' or ')' in #define argument list, got '%t'",
+ &pp_token);
+ goto error_out;
}
}
+
+ finish_argument_list:
+ new_definition->has_parameters = true;
+ new_definition->n_parameters
+ = obstack_object_size(&pp_obstack) / sizeof(new_definition->parameters[0]);
+ new_definition->parameters = obstack_finish(&pp_obstack);
} else {
next_preprocessing_token();
}
/* construct a new pp_definition on the obstack */
assert(obstack_object_size(&pp_obstack) == 0);
size_t list_len = 0;
- while(pp_token.type != '\n' && pp_token.type != TP_EOF) {
+ while (pp_token.type != '\n' && pp_token.type != TP_EOF) {
obstack_grow(&pp_obstack, &pp_token, sizeof(pp_token));
++list_len;
next_preprocessing_token();
}
- new_definition->list_len = list_len;
- new_definition->replacement_list = obstack_finish(&pp_obstack);
+ new_definition->list_len = list_len;
+ new_definition->token_list = obstack_finish(&pp_obstack);
pp_definition_t *old_definition = symbol->pp_definition;
- if(old_definition != NULL) {
- if(!pp_definitions_equal(old_definition, new_definition)) {
- warningf(&input.position, "multiple definition of macro '%Y' (first defined %P)",
- symbol, &old_definition->source_position);
+ if (old_definition != NULL) {
+ if (!pp_definitions_equal(old_definition, new_definition)) {
+ warningf(WARN_OTHER, &input.position, "multiple definition of macro '%Y' (first defined %P)", symbol, &old_definition->source_position);
} else {
/* reuse the old definition */
obstack_free(&pp_obstack, new_definition);
}
symbol->pp_definition = new_definition;
+ return;
+
+error_out:
+ if (obstack_object_size(&pp_obstack) > 0) {
+ char *ptr = obstack_finish(&pp_obstack);
+ obstack_free(&pp_obstack, ptr);
+ }
+ eat_pp_directive();
}
static void parse_undef_directive(void)
if(pp_token.type != TP_IDENTIFIER) {
errorf(&input.position,
- "expected identifier after #undef, got '%T'", &pp_token);
+ "expected identifier after #undef, got '%t'", &pp_token);
eat_pp_directive();
return;
}
- symbol_t *symbol = pp_token.v.symbol;
+ symbol_t *symbol = pp_token.symbol;
symbol->pp_definition = NULL;
next_preprocessing_token();
if(pp_token.type != '\n') {
- warningf(&input.position, "extra tokens at end of #undef directive");
+ warningf(WARN_OTHER, &input.position, "extra tokens at end of #undef directive");
}
/* eat until '\n' */
eat_pp_directive();
static const char *parse_headername(void)
{
- /* behind an #include we can have the special headername lexems, check
- * for them here */
+ /* behind an #include we can have the special headername lexems.
+ * They're only allowed behind an #include so they're not recognized
+ * by the normal next_preprocessing_token. We handle them as a special
+ * exception here */
- /* skip spaces */
- while (CC == ' ' || CC == '\t') {
- next_char();
- }
+ /* skip spaces so we reach start of next preprocessing token */
+ skip_spaces(false);
assert(obstack_object_size(&input_obstack) == 0);
+ /* check wether we have a "... or <... headername */
switch (CC) {
case '<':
/* for now until we have proper searchpath handling */
return headername;
}
-static void parse_include_directive(void)
+static bool parse_include_directive(void)
{
/* don't eat the TP_include here!
* we need an alternative parsing for the next token */
const char *headername = parse_headername();
if (headername == NULL) {
eat_pp_directive();
- return;
+ return false;
}
if (pp_token.type != '\n' && pp_token.type != TP_EOF) {
- warningf(&pp_token.source_position,
- "extra tokens at end of #include directive");
+ warningf(WARN_OTHER, &pp_token.source_position, "extra tokens at end of #include directive");
eat_pp_directive();
}
errorf(&pp_token.source_position, "#include nested too deeply");
/* eat \n or EOF */
next_preprocessing_token();
- return;
+ return false;
}
+ /* we have to reenable space counting and macro expansion here,
+ * because it is still disabled in directive parsing,
+ * but we will trigger a preprocessing token reading of the new file
+ * now and need expansions/space counting */
+ do_print_spaces = true;
+ do_expansions = true;
+
/* switch inputs */
push_input();
bool res = open_input(headername);
errorf(&pp_token.source_position,
"failed including '%s': %s", headername, strerror(errno));
pop_restore_input();
- return;
+ return false;
}
+
+ return true;
}
static pp_conditional_t *push_conditional(void)
if (pp_token.type != TP_IDENTIFIER) {
errorf(&pp_token.source_position,
- "expected identifier after #%s, got '%T'",
+ "expected identifier after #%s, got '%t'",
is_ifndef ? "ifndef" : "ifdef", &pp_token);
eat_pp_directive();
/* just take the true case in the hope to avoid further errors */
condition = true;
} else {
- symbol_t *symbol = pp_token.v.symbol;
+ symbol_t *symbol = pp_token.symbol;
pp_definition_t *pp_definition = symbol->pp_definition;
next_preprocessing_token();
if (pp_token.type != '\n') {
if (!skip_mode) {
- warningf(&pp_token.source_position, "extra tokens at end of #else");
+ warningf(WARN_OTHER, &pp_token.source_position, "extra tokens at end of #else");
}
eat_pp_directive();
}
if (pp_token.type != '\n') {
if (!skip_mode) {
- warningf(&pp_token.source_position,
- "extra tokens at end of #endif");
+ warningf(WARN_OTHER, &pp_token.source_position, "extra tokens at end of #endif");
}
eat_pp_directive();
}
case TP_endif:
parse_endif_directive();
break;
- case TP_include:
- parse_include_directive();
- /* no need to parse ending '\n' */
- do_print_spaces = true;
- do_expansions = true;
- return;
+ case TP_include: {
+ bool in_new_source = parse_include_directive();
+ /* no need to do anything if source file switched */
+ if (in_new_source)
+ return;
+ break;
+ }
+ case '\n':
+ /* the nop directive */
+ break;
default:
errorf(&pp_token.source_position,
- "invalid preprocessing directive #%T", &pp_token);
+ "invalid preprocessing directive #%t", &pp_token);
eat_pp_directive();
break;
}