X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=preprocessor.c;h=5089a687b99040ffad00e2b2a0160769679d9c8e;hb=66285deadc46366723208fad0adcf28c5dc4f36d;hp=f1e23bb2d19df8ba4a60eae13c515fed4a902137;hpb=e7758b4be72d9453a4fd333034712295f7d1e16d;p=cparser diff --git a/preprocessor.c b/preprocessor.c index f1e23bb..5089a68 100644 --- a/preprocessor.c +++ b/preprocessor.c @@ -16,38 +16,156 @@ //#define DEBUG_CHARS #define MAX_PUTBACK 3 +#define INCLUDE_LIMIT 199 /* 199 is for gcc "compatibility" */ + +struct pp_argument_t { + size_t list_len; + token_t *token_list; +}; struct pp_definition_t { symbol_t *symbol; source_position_t source_position; pp_definition_t *parent_expansion; size_t expand_pos; - bool is_variadic : 1; - bool is_expanding : 1; - size_t argument_count; - token_t *arguments; + bool is_variadic : 1; + bool is_expanding : 1; + bool has_parameters : 1; + size_t n_parameters; + symbol_t *parameters; + + /* replacement */ size_t list_len; - token_t *replacement_list; + token_t *token_list; + }; -static int c; +typedef struct pp_conditional_t pp_conditional_t; +struct pp_conditional_t { + source_position_t source_position; + bool condition; + bool in_else; + bool skip; /**< conditional in skip mode (then+else gets skipped) */ + pp_conditional_t *parent; +}; + +typedef struct pp_input_t pp_input_t; +struct pp_input_t { + FILE *file; + int c; + char buf[1024+MAX_PUTBACK]; + const char *bufend; + const char *bufpos; + source_position_t position; + bool had_non_space; + pp_input_t *parent; +}; + +pp_input_t input; +#define CC input.c + +static pp_input_t *input_stack; +static unsigned n_inputs; +static struct obstack input_obstack; + +static pp_conditional_t *conditional_stack; + token_t pp_token; -static FILE *input; -static char buf[1024 + MAX_PUTBACK]; -static const char *bufend; -static const char *bufpos; static bool resolve_escape_sequences = false; -static bool print_spaces = true; +static bool do_print_spaces = true; +static bool do_expansions; +static bool skip_mode; static FILE *out; static struct obstack pp_obstack; static unsigned counted_newlines; static unsigned counted_spaces; -static source_position_t input_position; static const char *printed_input_name = NULL; static pp_definition_t *current_expansion = NULL; -static bool do_expansions; +static inline void next_char(void); static void next_preprocessing_token(void); +static void print_line_directive(const source_position_t *pos, const char *add); + +static bool open_input(const char *filename) +{ + FILE *file = fopen(filename, "r"); + if (file == NULL) + return false; + + input.file = file; + input.bufend = NULL; + input.bufpos = NULL; + input.had_non_space = false; + input.position.input_name = filename; + input.position.lineno = 1; + + /* indicate that we're at a new input */ + print_line_directive(&input.position, input_stack != NULL ? "1" : NULL); + + counted_newlines = 0; + counted_spaces = 0; + + /* read first char and first token */ + next_char(); + next_preprocessing_token(); + + return true; +} + +static void close_input(void) +{ + /* ensure we have a newline at EOF */ + if (input.had_non_space) { + fputc('\n', out); + } + + assert(input.file != NULL); + + fclose(input.file); + input.file = NULL; + input.bufend = NULL; + input.bufpos = NULL; + input.c = EOF; +} + +static void push_input(void) +{ + pp_input_t *saved_input + = obstack_alloc(&input_obstack, sizeof(*saved_input)); + + memcpy(saved_input, &input, sizeof(*saved_input)); + + /* adjust buffer positions */ + if (input.bufpos != NULL) + saved_input->bufpos = saved_input->buf + (input.bufpos - input.buf); + if (input.bufend != NULL) + saved_input->bufend = saved_input->buf + (input.bufend - input.buf); + + saved_input->parent = input_stack; + input_stack = saved_input; + ++n_inputs; +} + +static void pop_restore_input(void) +{ + assert(n_inputs > 0); + assert(input_stack != NULL); + + pp_input_t *saved_input = input_stack; + + memcpy(&input, saved_input, sizeof(input)); + input.parent = NULL; + + /* adjust buffer positions */ + if (saved_input->bufpos != NULL) + input.bufpos = input.buf + (saved_input->bufpos - saved_input->buf); + if (saved_input->bufend != NULL) + input.bufend = input.buf + (saved_input->bufend - saved_input->buf); + + input_stack = saved_input->parent; + obstack_free(&input_obstack, saved_input); + --n_inputs; +} /** * Prints a parse error message at the current token. @@ -61,18 +179,18 @@ static void parse_error(const char *msg) static inline void next_real_char(void) { - assert(bufpos <= bufend); - if (bufpos >= bufend) { - size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK, - input); - if(s == 0) { - c = EOF; + assert(input.bufpos <= input.bufend); + if (input.bufpos >= input.bufend) { + size_t s = fread(input.buf + MAX_PUTBACK, 1, + sizeof(input.buf) - MAX_PUTBACK, input.file); + if (s == 0) { + CC = EOF; return; } - bufpos = buf + MAX_PUTBACK; - bufend = buf + MAX_PUTBACK + s; + input.bufpos = input.buf + MAX_PUTBACK; + input.bufend = input.buf + MAX_PUTBACK + s; } - c = *bufpos++; + CC = *input.bufpos++; } /** @@ -82,44 +200,42 @@ static inline void next_real_char(void) */ static inline void put_back(int pc) { - assert(bufpos > buf); - *(--bufpos - buf + buf) = (char) pc; + assert(input.bufpos > input.buf); + *(--input.bufpos - input.buf + input.buf) = (char) pc; #ifdef DEBUG_CHARS printf("putback '%c'\n", pc); #endif } -static inline void next_char(void); - #define MATCH_NEWLINE(code) \ case '\r': \ next_char(); \ - if(c == '\n') { \ + if(CC == '\n') { \ next_char(); \ } \ - ++input_position.linenr; \ + ++input.position.lineno; \ code \ case '\n': \ next_char(); \ - ++input_position.linenr; \ + ++input.position.lineno; \ code -#define eat(c_type) do { assert(c == c_type); next_char(); } while(0) +#define eat(c_type) do { assert(CC == c_type); next_char(); } while(0) static void maybe_concat_lines(void) { eat('\\'); - switch(c) { + switch(CC) { MATCH_NEWLINE(return;) default: break; } - put_back(c); - c = '\\'; + put_back(CC); + CC = '\\'; } /** @@ -131,42 +247,42 @@ static inline void next_char(void) next_real_char(); /* filter trigraphs and concatenated lines */ - if(UNLIKELY(c == '\\')) { + if(UNLIKELY(CC == '\\')) { maybe_concat_lines(); goto end_of_next_char; } - if(LIKELY(c != '?')) + if(LIKELY(CC != '?')) goto end_of_next_char; next_real_char(); - if(LIKELY(c != '?')) { - put_back(c); - c = '?'; + if(LIKELY(CC != '?')) { + put_back(CC); + CC = '?'; goto end_of_next_char; } next_real_char(); - switch(c) { - case '=': c = '#'; break; - case '(': c = '['; break; - case '/': c = '\\'; maybe_concat_lines(); break; - case ')': c = ']'; break; - case '\'': c = '^'; break; - case '<': c = '{'; break; - case '!': c = '|'; break; - case '>': c = '}'; break; - case '-': c = '~'; break; + switch(CC) { + case '=': CC = '#'; break; + case '(': CC = '['; break; + case '/': CC = '\\'; maybe_concat_lines(); break; + case ')': CC = ']'; break; + case '\'': CC = '^'; break; + case '<': CC = '{'; break; + case '!': CC = '|'; break; + case '>': CC = '}'; break; + case '-': CC = '~'; break; default: - put_back(c); + put_back(CC); put_back('?'); - c = '?'; + CC = '?'; break; } end_of_next_char:; #ifdef DEBUG_CHARS - printf("nchar '%c'\n", c); + printf("nchar '%c'\n", CC); #endif } @@ -198,7 +314,8 @@ static inline bool is_octal_digit(int chr) * Returns the value of a digit. * The only portable way to do it ... */ -static int digit_value(int digit) { +static int digit_value(int digit) +{ switch (digit) { case '0': return 0; case '1': return 1; @@ -236,11 +353,11 @@ static int parse_octal_sequence(const int first_digit) { assert(is_octal_digit(first_digit)); int value = digit_value(first_digit); - if (!is_octal_digit(c)) return value; - value = 8 * value + digit_value(c); + if (!is_octal_digit(CC)) return value; + value = 8 * value + digit_value(CC); next_char(); - if (!is_octal_digit(c)) return value; - value = 8 * value + digit_value(c); + if (!is_octal_digit(CC)) return value; + value = 8 * value + digit_value(CC); next_char(); if(char_is_signed) { @@ -256,8 +373,8 @@ static int parse_octal_sequence(const int first_digit) static int parse_hex_sequence(void) { int value = 0; - while(isxdigit(c)) { - value = 16 * value + digit_value(c); + while(isxdigit(CC)) { + value = 16 * value + digit_value(CC); next_char(); } @@ -275,7 +392,7 @@ static int parse_escape_sequence(void) { eat('\\'); - int ec = c; + int ec = CC; next_char(); switch(ec) { @@ -312,21 +429,21 @@ static int parse_escape_sequence(void) static void parse_string_literal(void) { - const unsigned start_linenr = input_position.linenr; + const unsigned start_linenr = input.position.lineno; eat('"'); int tc; while(1) { - switch(c) { + switch(CC) { case '\\': if(resolve_escape_sequences) { tc = parse_escape_sequence(); obstack_1grow(&symbol_obstack, (char) tc); } else { - obstack_1grow(&symbol_obstack, (char) c); + obstack_1grow(&symbol_obstack, (char) CC); next_char(); - obstack_1grow(&symbol_obstack, (char) c); + obstack_1grow(&symbol_obstack, (char) CC); next_char(); } break; @@ -334,7 +451,7 @@ static void parse_string_literal(void) case EOF: { source_position_t source_position; source_position.input_name = pp_token.source_position.input_name; - source_position.linenr = start_linenr; + source_position.lineno = start_linenr; errorf(&source_position, "string has no end"); pp_token.type = TP_ERROR; return; @@ -345,7 +462,7 @@ static void parse_string_literal(void) goto end_of_string; default: - obstack_1grow(&symbol_obstack, (char) c); + obstack_1grow(&symbol_obstack, (char) CC); next_char(); break; } @@ -367,9 +484,9 @@ end_of_string: const char *const result = string; #endif - pp_token.type = TP_STRING_LITERAL; - pp_token.v.string.begin = result; - pp_token.v.string.size = size; + pp_token.type = TP_STRING_LITERAL; + pp_token.literal.begin = result; + pp_token.literal.size = size; } static void parse_wide_character_constant(void) @@ -378,7 +495,7 @@ static void parse_wide_character_constant(void) int found_char = 0; while(1) { - switch(c) { + switch(CC) { case '\\': found_char = parse_escape_sequence(); break; @@ -403,7 +520,7 @@ static void parse_wide_character_constant(void) "constant"); goto end_of_wide_char_constant; } else { - found_char = c; + found_char = CC; next_char(); } break; @@ -415,76 +532,15 @@ end_of_wide_char_constant: /* TODO... */ } -static void parse_wide_string_literal(void) -{ - const unsigned start_linenr = input_position.linenr; - - assert(c == '"'); - next_char(); - - while(1) { - switch(c) { - case '\\': { - wchar_rep_t tc = parse_escape_sequence(); - obstack_grow(&symbol_obstack, &tc, sizeof(tc)); - break; - } - - case EOF: { - source_position_t source_position; - source_position.input_name = pp_token.source_position.input_name; - source_position.linenr = start_linenr; - errorf(&source_position, "string has no end"); - pp_token.type = TP_ERROR; - return; - } - - case '"': - next_char(); - goto end_of_string; - - default: { - wchar_rep_t tc = c; - obstack_grow(&symbol_obstack, &tc, sizeof(tc)); - next_char(); - break; - } - } - } - -end_of_string:; - /* add finishing 0 to the string */ - static const wchar_rep_t nul = L'\0'; - obstack_grow(&symbol_obstack, &nul, sizeof(nul)); - - const size_t size - = (size_t)obstack_object_size(&symbol_obstack) / sizeof(wchar_rep_t); - const wchar_rep_t *const string = obstack_finish(&symbol_obstack); - -#if 0 /* TODO hash */ - /* check if there is already a copy of the string */ - const wchar_rep_t *const result = strset_insert(&stringset, string); - if(result != string) { - obstack_free(&symbol_obstack, string); - } -#else - const wchar_rep_t *const result = string; -#endif - - pp_token.type = TP_WIDE_STRING_LITERAL; - pp_token.v.wide_string.begin = result; - pp_token.v.wide_string.size = size; -} - static void parse_character_constant(void) { - const unsigned start_linenr = input_position.linenr; + const unsigned start_linenr = input.position.lineno; eat('\''); int tc; while(1) { - switch(c) { + switch(CC) { case '\\': tc = parse_escape_sequence(); obstack_1grow(&symbol_obstack, (char) tc); @@ -498,7 +554,7 @@ static void parse_character_constant(void) case EOF: { source_position_t source_position; source_position.input_name = pp_token.source_position.input_name; - source_position.linenr = start_linenr; + source_position.lineno = start_linenr; errorf(&source_position, "EOF while parsing character constant"); pp_token.type = TP_ERROR; return; @@ -509,7 +565,7 @@ static void parse_character_constant(void) goto end_of_char_constant; default: - obstack_1grow(&symbol_obstack, (char) c); + obstack_1grow(&symbol_obstack, (char) CC); next_char(); break; @@ -520,9 +576,9 @@ end_of_char_constant:; const size_t size = (size_t)obstack_object_size(&symbol_obstack); const char *const string = obstack_finish(&symbol_obstack); - pp_token.type = TP_CHARACTER_CONSTANT; - pp_token.v.string.begin = string; - pp_token.v.string.size = size; + pp_token.type = TP_CHARACTER_CONSTANT; + pp_token.literal.begin = string; + pp_token.literal.size = size; } #define SYMBOL_CHARS_WITHOUT_E_P \ @@ -612,6 +668,8 @@ restart: pp_definition_t *parent = definition->parent_expansion; definition->parent_expansion = NULL; definition->is_expanding = false; + + /* it was the outermost expansion, parse normal pptoken */ if(parent == NULL) { current_expansion = NULL; next_preprocessing_token(); @@ -621,13 +679,14 @@ restart: current_expansion = definition; goto restart; } - pp_token = definition->replacement_list[definition->expand_pos]; + pp_token = definition->token_list[definition->expand_pos]; ++definition->expand_pos; if(pp_token.type != TP_IDENTIFIER) return; - pp_definition_t *symbol_definition = pp_token.v.symbol->pp_definition; + /* if it was an identifier then we might need to expand again */ + pp_definition_t *symbol_definition = pp_token.symbol->pp_definition; if(symbol_definition != NULL && !symbol_definition->is_expanding) { symbol_definition->parent_expansion = definition; symbol_definition->expand_pos = 0; @@ -638,16 +697,145 @@ restart: } } +static void skip_line_comment(void) +{ + if(do_print_spaces) + counted_spaces++; + + while(1) { + switch(CC) { + case EOF: + return; + + case '\n': + case '\r': + return; + + default: + next_char(); + break; + } + } +} + +static void skip_multiline_comment(void) +{ + if(do_print_spaces) + counted_spaces++; + + unsigned start_linenr = input.position.lineno; + while(1) { + switch(CC) { + case '/': + next_char(); + if (CC == '*') { + /* TODO: nested comment, warn here */ + } + break; + case '*': + next_char(); + if(CC == '/') { + next_char(); + return; + } + break; + + MATCH_NEWLINE( + if(do_print_spaces) { + counted_newlines++; + counted_spaces = 0; + } + break; + ) + + case EOF: { + source_position_t source_position; + source_position.input_name = pp_token.source_position.input_name; + source_position.lineno = start_linenr; + errorf(&source_position, "at end of file while looking for comment end"); + return; + } + + default: + next_char(); + break; + } + } +} + +/* skip spaces advancing at the start of the next preprocessing token */ +static void skip_spaces(bool skip_newline) +{ + while (true) { + switch (CC) { + case ' ': + case '\t': + if (do_print_spaces) + counted_spaces++; + next_char(); + continue; + case '/': + next_char(); + if (CC == '/') { + next_char(); + skip_line_comment(); + continue; + } else if (CC == '*') { + next_char(); + skip_multiline_comment(); + continue; + } else { + put_back(CC); + CC = '/'; + } + return; + + case '\r': + if (!skip_newline) + return; + + next_char(); + if(CC == '\n') { + next_char(); + } + ++input.position.lineno; + if (do_print_spaces) + ++counted_newlines; + continue; + + case '\n': + if (!skip_newline) + return; + + next_char(); + ++input.position.lineno; + if (do_print_spaces) + ++counted_newlines; + continue; + + default: + return; + } + } +} + +static void eat_pp(int type) +{ + (void) type; + assert(pp_token.type == type); + next_preprocessing_token(); +} + static void parse_symbol(void) { - obstack_1grow(&symbol_obstack, (char) c); + obstack_1grow(&symbol_obstack, (char) CC); next_char(); while(1) { - switch(c) { + switch(CC) { DIGITS SYMBOL_CHARS - obstack_1grow(&symbol_obstack, (char) c); + obstack_1grow(&symbol_obstack, (char) CC); next_char(); break; @@ -661,11 +849,11 @@ end_symbol: char *string = obstack_finish(&symbol_obstack); /* might be a wide string or character constant ( L"string"/L'c' ) */ - if(c == '"' && string[0] == 'L' && string[1] == '\0') { + if (CC == '"' && string[0] == 'L' && string[1] == '\0') { obstack_free(&symbol_obstack, string); - parse_wide_string_literal(); + /* TODO */ return; - } else if(c == '\'' && string[0] == 'L' && string[1] == '\0') { + } else if (CC == '\'' && string[0] == 'L' && string[1] == '\0') { obstack_free(&symbol_obstack, string); parse_wide_character_constant(); return; @@ -673,35 +861,52 @@ end_symbol: symbol_t *symbol = symbol_table_insert(string); - pp_token.type = symbol->pp_ID; - pp_token.v.symbol = symbol; + pp_token.type = symbol->pp_ID; + pp_token.symbol = symbol; /* we can free the memory from symbol obstack if we already had an entry in * the symbol table */ - if(symbol->string != string) { + if (symbol->string != string) { obstack_free(&symbol_obstack, string); } + if (!do_expansions) + return; pp_definition_t *pp_definition = symbol->pp_definition; - if(do_expansions && pp_definition != NULL) { - pp_definition->expand_pos = 0; - pp_definition->is_expanding = true, - current_expansion = pp_definition; - expand_next(); + if (pp_definition == NULL) + return; + + if (pp_definition->has_parameters) { + skip_spaces(true); + /* no opening brace -> no expansion */ + if (CC != '(') + return; + next_preprocessing_token(); + eat_pp('('); + + /* parse arguments (TODO) */ + while (pp_token.type != TP_EOF && pp_token.type != ')') + next_preprocessing_token(); + next_preprocessing_token(); } + + pp_definition->expand_pos = 0; + pp_definition->is_expanding = true, + current_expansion = pp_definition; + expand_next(); } static void parse_number(void) { - obstack_1grow(&symbol_obstack, (char) c); + obstack_1grow(&symbol_obstack, (char) CC); next_char(); while(1) { - switch(c) { + switch(CC) { case '.': DIGITS SYMBOL_CHARS_WITHOUT_E_P - obstack_1grow(&symbol_obstack, (char) c); + obstack_1grow(&symbol_obstack, (char) CC); next_char(); break; @@ -709,10 +914,10 @@ static void parse_number(void) case 'p': case 'E': case 'P': - obstack_1grow(&symbol_obstack, (char) c); + obstack_1grow(&symbol_obstack, (char) CC); next_char(); - if(c == '+' || c == '-') { - obstack_1grow(&symbol_obstack, (char) c); + if(CC == '+' || CC == '-') { + obstack_1grow(&symbol_obstack, (char) CC); next_char(); } break; @@ -727,78 +932,16 @@ end_number: size_t size = obstack_object_size(&symbol_obstack); char *string = obstack_finish(&symbol_obstack); - pp_token.type = TP_NUMBER; - pp_token.v.string.begin = string; - pp_token.v.string.size = size; -} - -static void skip_multiline_comment(void) -{ - unsigned start_linenr = input_position.linenr; - - while(1) { - switch(c) { - case '/': - next_char(); - if (c == '*') { - /* TODO: nested comment, warn here */ - } - break; - case '*': - next_char(); - if(c == '/') { - next_char(); - return; - } - break; - - MATCH_NEWLINE( - if(print_spaces) { - counted_newlines++; - counted_spaces = 0; - } - break; - ) - - case EOF: { - source_position_t source_position; - source_position.input_name = pp_token.source_position.input_name; - source_position.linenr = start_linenr; - errorf(&source_position, "at end of file while looking for comment end"); - return; - } - - default: - next_char(); - break; - } - } -} - -static void skip_line_comment(void) -{ - while(1) { - switch(c) { - case EOF: - return; - - case '\n': - case '\r': - return; - - default: - next_char(); - break; - } - } + pp_token.type = TP_NUMBER; + pp_token.literal.begin = string; + pp_token.literal.size = size; } - #define MAYBE_PROLOG \ next_char(); \ while(1) { \ - switch(c) { + switch(CC) { #define MAYBE(ch, set_type) \ case ch: \ @@ -826,15 +969,15 @@ static void next_preprocessing_token(void) return; } - pp_token.source_position = input_position; + pp_token.source_position = input.position; restart: - switch(c) { + switch(CC) { case ' ': case '\t': - if(print_spaces) + if (do_print_spaces) counted_spaces++; - next_char(); + next_char(); goto restart; MATCH_NEWLINE( @@ -872,8 +1015,8 @@ restart: case '7': case '8': case '9': - put_back(c); - c = '.'; + put_back(CC); + CC = '.'; parse_number(); return; @@ -881,8 +1024,8 @@ restart: MAYBE_PROLOG MAYBE('.', TP_DOTDOTDOT) ELSE_CODE( - put_back(c); - c = '.'; + put_back(CC); + CC = '.'; pp_token.type = '.'; return; ) @@ -917,14 +1060,10 @@ restart: case '*': next_char(); skip_multiline_comment(); - if(print_spaces) - counted_spaces++; goto restart; case '/': next_char(); skip_line_comment(); - if(print_spaces) - counted_spaces++; goto restart; ELSE('/') case '%': @@ -937,8 +1076,8 @@ restart: MAYBE_PROLOG MAYBE(':', TP_HASHHASH) ELSE_CODE( - put_back(c); - c = '%'; + put_back(CC); + CC = '%'; pp_token.type = '#'; return; ) @@ -995,17 +1134,27 @@ restart: case ';': case ',': case '\\': - pp_token.type = c; + pp_token.type = CC; next_char(); return; case EOF: - pp_token.type = TP_EOF; + if (input_stack != NULL) { + close_input(); + pop_restore_input(); + counted_newlines = 0; + counted_spaces = 0; + /* hack to output correct line number */ + print_line_directive(&input.position, "2"); + next_preprocessing_token(); + } else { + pp_token.type = TP_EOF; + } return; default: next_char(); - errorf(&pp_token.source_position, "unknown character '%c' found\n", c); + errorf(&pp_token.source_position, "unknown character '%c' found\n", CC); pp_token.type = TP_ERROR; return; } @@ -1038,49 +1187,57 @@ static void print_quoted_string(const char *const string) fputc('"', out); } -static void print_line_directive(const source_position_t *pos) +static void print_line_directive(const source_position_t *pos, const char *add) { - fprintf(out, "# %d ", pos->linenr); + fprintf(out, "# %u ", pos->lineno); print_quoted_string(pos->input_name); + if (add != NULL) { + fputc(' ', out); + fputs(add, out); + } fputc('\n', out); printed_input_name = pos->input_name; } -static bool had_non_space = false; +static void print_spaces(void) +{ + if (counted_newlines >= 9) { + if (input.had_non_space) { + fputc('\n', out); + } + print_line_directive(&pp_token.source_position, NULL); + counted_newlines = 0; + } else { + for (unsigned i = 0; i < counted_newlines; ++i) + fputc('\n', out); + counted_newlines = 0; + } + for (unsigned i = 0; i < counted_spaces; ++i) + fputc(' ', out); + counted_spaces = 0; +} static void emit_pp_token(void) { - if (printed_input_name != pp_token.source_position.input_name) { - print_line_directive(&pp_token.source_position); - } else if (pp_token.type != '\n') { - if (counted_newlines >= 9) { - if (had_non_space) { - fputc('\n', out); - } - print_line_directive(&pp_token.source_position); - counted_newlines = 0; - } else { - for (unsigned i = 0; i < counted_newlines; ++i) - fputc('\n', out); - counted_newlines = 0; - } - for (unsigned i = 0; i < counted_spaces; ++i) - fputc(' ', out); - counted_spaces = 0; - had_non_space = true; + if (skip_mode) + return; + + if (pp_token.type != '\n') { + print_spaces(); + input.had_non_space = true; } switch(pp_token.type) { case TP_IDENTIFIER: - fputs(pp_token.v.symbol->string, out); + fputs(pp_token.symbol->string, out); break; case TP_NUMBER: - fputs(pp_token.v.string.begin, out); + fputs(pp_token.literal.begin, out); break; case TP_STRING_LITERAL: fputc('"', out); - fputs(pp_token.v.string.begin, out); + fputs(pp_token.literal.begin, out); fputc('"', out); break; case '\n': @@ -1091,13 +1248,6 @@ static void emit_pp_token(void) } } -static void eat_pp(preprocessor_token_type_t type) -{ - (void) type; - assert(pp_token.type == type); - next_preprocessing_token(); -} - static void eat_pp_directive(void) { while(pp_token.type != '\n' && pp_token.type != TP_EOF) { @@ -1120,22 +1270,6 @@ static bool strings_equal(const string_t *string1, const string_t *string2) return true; } -static bool wide_strings_equal(const wide_string_t *string1, - const wide_string_t *string2) -{ - size_t size = string1->size; - if(size != string2->size) - return false; - - const wchar_rep_t *c1 = string1->begin; - const wchar_rep_t *c2 = string2->begin; - for(size_t i = 0; i < size; ++i, ++c1, ++c2) { - if(*c1 != *c2) - return false; - } - return true; -} - static bool pp_tokens_equal(const token_t *token1, const token_t *token2) { if(token1->type != token2->type) @@ -1146,16 +1280,12 @@ static bool pp_tokens_equal(const token_t *token1, const token_t *token2) /* TODO */ return false; case TP_IDENTIFIER: - return token1->v.symbol == token2->v.symbol; + return token1->symbol == token2->symbol; case TP_NUMBER: case TP_CHARACTER_CONSTANT: case TP_STRING_LITERAL: - return strings_equal(&token1->v.string, &token2->v.string); + return strings_equal(&token1->literal, &token2->literal); - case TP_WIDE_CHARACTER_CONSTANT: - case TP_WIDE_STRING_LITERAL: - return wide_strings_equal(&token1->v.wide_string, - &token2->v.wide_string); default: return true; } @@ -1168,8 +1298,8 @@ static bool pp_definitions_equal(const pp_definition_t *definition1, return false; size_t len = definition1->list_len; - const token_t *t1 = definition1->replacement_list; - const token_t *t2 = definition2->replacement_list; + const token_t *t1 = definition1->token_list; + const token_t *t2 = definition2->token_list; for(size_t i = 0; i < len; ++i, ++t1, ++t2) { if(!pp_tokens_equal(t1, t2)) return false; @@ -1180,39 +1310,72 @@ static bool pp_definitions_equal(const pp_definition_t *definition1, static void parse_define_directive(void) { eat_pp(TP_define); + assert(obstack_object_size(&pp_obstack) == 0); - if(pp_token.type != TP_IDENTIFIER) { + if (pp_token.type != TP_IDENTIFIER) { errorf(&pp_token.source_position, - "expected identifier after #define, got '%T'", &pp_token); - eat_pp_directive(); - return; + "expected identifier after #define, got '%t'", &pp_token); + goto error_out; } - symbol_t *symbol = pp_token.v.symbol; + symbol_t *symbol = pp_token.symbol; pp_definition_t *new_definition = obstack_alloc(&pp_obstack, sizeof(new_definition[0])); memset(new_definition, 0, sizeof(new_definition[0])); - new_definition->source_position = input_position; + new_definition->source_position = input.position; /* this is probably the only place where spaces are significant in the * lexer (except for the fact that they separate tokens). #define b(x) * is something else than #define b (x) */ - //token_t *arguments = NULL; - if(c == '(') { + if (CC == '(') { + /* eat the '(' */ + next_preprocessing_token(); + /* get next token after '(' */ next_preprocessing_token(); - while(pp_token.type != ')') { - if(pp_token.type == TP_DOTDOTDOT) { + + while (true) { + switch (pp_token.type) { + case TP_DOTDOTDOT: new_definition->is_variadic = true; next_preprocessing_token(); - if(pp_token.type != ')') { - errorf(&input_position, + if (pp_token.type != ')') { + errorf(&input.position, "'...' not at end of macro argument list"); - continue; + goto error_out; } - } else if(pp_token.type != TP_IDENTIFIER) { + break; + case TP_IDENTIFIER: + obstack_ptr_grow(&pp_obstack, pp_token.symbol); next_preprocessing_token(); + + if (pp_token.type == ',') { + next_preprocessing_token(); + break; + } + + if (pp_token.type != ')') { + errorf(&pp_token.source_position, + "expected ',' or ')' after identifier, got '%t'", + &pp_token); + goto error_out; + } + break; + case ')': + next_preprocessing_token(); + goto finish_argument_list; + default: + errorf(&pp_token.source_position, + "expected identifier, '...' or ')' in #define argument list, got '%t'", + &pp_token); + goto error_out; } } + + finish_argument_list: + new_definition->has_parameters = true; + new_definition->n_parameters + = obstack_object_size(&pp_obstack) / sizeof(new_definition->parameters[0]); + new_definition->parameters = obstack_finish(&pp_obstack); } else { next_preprocessing_token(); } @@ -1220,20 +1383,20 @@ static void parse_define_directive(void) /* construct a new pp_definition on the obstack */ assert(obstack_object_size(&pp_obstack) == 0); size_t list_len = 0; - while(pp_token.type != '\n' && pp_token.type != TP_EOF) { + while (pp_token.type != '\n' && pp_token.type != TP_EOF) { obstack_grow(&pp_obstack, &pp_token, sizeof(pp_token)); ++list_len; next_preprocessing_token(); } - new_definition->list_len = list_len; - new_definition->replacement_list = obstack_finish(&pp_obstack); + new_definition->list_len = list_len; + new_definition->token_list = obstack_finish(&pp_obstack); pp_definition_t *old_definition = symbol->pp_definition; - if(old_definition != NULL) { - if(!pp_definitions_equal(old_definition, new_definition)) { - warningf(&input_position, "multiple definition of macro '%Y' (first defined %P)", - symbol, &old_definition->source_position); + if (old_definition != NULL) { + if (!pp_definitions_equal(old_definition, new_definition)) { + warningf(&input.position, "multiple definition of macro '%Y' (first defined %P)", + symbol, &old_definition->source_position); } else { /* reuse the old definition */ obstack_free(&pp_obstack, new_definition); @@ -1242,6 +1405,14 @@ static void parse_define_directive(void) } symbol->pp_definition = new_definition; + return; + +error_out: + if (obstack_object_size(&pp_obstack) > 0) { + char *ptr = obstack_finish(&pp_obstack); + obstack_free(&pp_obstack, ptr); + } + eat_pp_directive(); } static void parse_undef_directive(void) @@ -1249,88 +1420,372 @@ static void parse_undef_directive(void) eat_pp(TP_undef); if(pp_token.type != TP_IDENTIFIER) { - errorf(&input_position, - "expected identifier after #undef, got '%T'", &pp_token); + errorf(&input.position, + "expected identifier after #undef, got '%t'", &pp_token); eat_pp_directive(); return; } - symbol_t *symbol = pp_token.v.symbol; + symbol_t *symbol = pp_token.symbol; symbol->pp_definition = NULL; next_preprocessing_token(); if(pp_token.type != '\n') { - warningf(&input_position, "extra tokens at end of #undef directive"); + warningf(&input.position, "extra tokens at end of #undef directive"); } /* eat until '\n' */ eat_pp_directive(); } -static void parse_preprocessing_directive(void) +static const char *parse_headername(void) { - print_spaces = false; - do_expansions = false; - eat_pp('#'); + /* behind an #include we can have the special headername lexems. + * They're only allowed behind an #include so they're not recognized + * by the normal next_preprocessing_token. We handle them as a special + * exception here */ + + /* skip spaces so we reach start of next preprocessing token */ + skip_spaces(false); + + assert(obstack_object_size(&input_obstack) == 0); + + /* check wether we have a "... or <... headername */ + switch (CC) { + case '<': + /* for now until we have proper searchpath handling */ + obstack_1grow(&input_obstack, '.'); + obstack_1grow(&input_obstack, '/'); + + next_char(); + while (true) { + switch (CC) { + case EOF: + /* fallthrough */ + MATCH_NEWLINE( + parse_error("header name without closing '>'"); + return NULL; + ) + case '>': + next_char(); + goto finished_headername; + } + obstack_1grow(&input_obstack, (char) CC); + next_char(); + } + /* we should never be here */ + + case '"': + /* for now until we have proper searchpath handling */ + obstack_1grow(&input_obstack, '.'); + obstack_1grow(&input_obstack, '/'); + + next_char(); + while (true) { + switch (CC) { + case EOF: + /* fallthrough */ + MATCH_NEWLINE( + parse_error("header name without closing '>'"); + return NULL; + ) + case '"': + next_char(); + goto finished_headername; + } + obstack_1grow(&input_obstack, (char) CC); + next_char(); + } + /* we should never be here */ - switch(pp_token.type) { - case TP_define: - parse_define_directive(); - break; - case TP_undef: - parse_undef_directive(); - break; default: + /* TODO: do normale pp_token parsing and concatenate results */ + panic("pp_token concat include not implemented yet"); + } + +finished_headername: + obstack_1grow(&input_obstack, '\0'); + char *headername = obstack_finish(&input_obstack); + + /* TODO: iterate search-path to find the file */ + + next_preprocessing_token(); + + return headername; +} + +static bool parse_include_directive(void) +{ + /* don't eat the TP_include here! + * we need an alternative parsing for the next token */ + + print_spaces(); + + const char *headername = parse_headername(); + if (headername == NULL) { + eat_pp_directive(); + return false; + } + + if (pp_token.type != '\n' && pp_token.type != TP_EOF) { + warningf(&pp_token.source_position, + "extra tokens at end of #include directive"); + eat_pp_directive(); + } + + if (n_inputs > INCLUDE_LIMIT) { + errorf(&pp_token.source_position, "#include nested too deeply"); + /* eat \n or EOF */ + next_preprocessing_token(); + return false; + } + + /* we have to reenable space counting and macro expansion here, + * because it is still disabled in directive parsing, + * but we will trigger a preprocessing token reading of the new file + * now and need expansions/space counting */ + do_print_spaces = true; + do_expansions = true; + + /* switch inputs */ + push_input(); + bool res = open_input(headername); + if (!res) { errorf(&pp_token.source_position, - "invalid preprocessing directive #%T", &pp_token); + "failed including '%s': %s", headername, strerror(errno)); + pop_restore_input(); + return false; + } + + return true; +} + +static pp_conditional_t *push_conditional(void) +{ + pp_conditional_t *conditional + = obstack_alloc(&pp_obstack, sizeof(*conditional)); + memset(conditional, 0, sizeof(*conditional)); + + conditional->parent = conditional_stack; + conditional_stack = conditional; + + return conditional; +} + +static void pop_conditional(void) +{ + assert(conditional_stack != NULL); + conditional_stack = conditional_stack->parent; +} + +static void check_unclosed_conditionals(void) +{ + while (conditional_stack != NULL) { + pp_conditional_t *conditional = conditional_stack; + + if (conditional->in_else) { + errorf(&conditional->source_position, "unterminated #else"); + } else { + errorf(&conditional->source_position, "unterminated condition"); + } + pop_conditional(); + } +} + +static void parse_ifdef_ifndef_directive(void) +{ + bool is_ifndef = (pp_token.type == TP_ifndef); + bool condition; + next_preprocessing_token(); + + if (skip_mode) { eat_pp_directive(); - break; + pp_conditional_t *conditional = push_conditional(); + conditional->source_position = pp_token.source_position; + conditional->skip = true; + return; + } + + if (pp_token.type != TP_IDENTIFIER) { + errorf(&pp_token.source_position, + "expected identifier after #%s, got '%t'", + is_ifndef ? "ifndef" : "ifdef", &pp_token); + eat_pp_directive(); + + /* just take the true case in the hope to avoid further errors */ + condition = true; + } else { + symbol_t *symbol = pp_token.symbol; + pp_definition_t *pp_definition = symbol->pp_definition; + next_preprocessing_token(); + + if (pp_token.type != '\n') { + errorf(&pp_token.source_position, + "extra tokens at end of #%s", + is_ifndef ? "ifndef" : "ifdef"); + eat_pp_directive(); + } + + /* evaluate wether we are in true or false case */ + condition = is_ifndef ? pp_definition == NULL : pp_definition != NULL; + } + + pp_conditional_t *conditional = push_conditional(); + conditional->source_position = pp_token.source_position; + conditional->condition = condition; + + if (!condition) { + skip_mode = true; + } +} + +static void parse_else_directive(void) +{ + eat_pp(TP_else); + + if (pp_token.type != '\n') { + if (!skip_mode) { + warningf(&pp_token.source_position, "extra tokens at end of #else"); + } + eat_pp_directive(); + } + + pp_conditional_t *conditional = conditional_stack; + if (conditional == NULL) { + errorf(&pp_token.source_position, "#else without prior #if"); + return; } - print_spaces = true; - do_expansions = true; + if (conditional->in_else) { + errorf(&pp_token.source_position, + "#else after #else (condition started %P)", + conditional->source_position); + skip_mode = true; + return; + } + + conditional->in_else = true; + if (!conditional->skip) { + skip_mode = conditional->condition; + } + conditional->source_position = pp_token.source_position; +} + +static void parse_endif_directive(void) +{ + eat_pp(TP_endif); + + if (pp_token.type != '\n') { + if (!skip_mode) { + warningf(&pp_token.source_position, + "extra tokens at end of #endif"); + } + eat_pp_directive(); + } + + pp_conditional_t *conditional = conditional_stack; + if (conditional == NULL) { + errorf(&pp_token.source_position, "#endif without prior #if"); + return; + } + + if (!conditional->skip) { + skip_mode = false; + } + pop_conditional(); +} + +static void parse_preprocessing_directive(void) +{ + do_print_spaces = false; + do_expansions = false; + eat_pp('#'); + + if (skip_mode) { + switch(pp_token.type) { + case TP_ifdef: + case TP_ifndef: + parse_ifdef_ifndef_directive(); + break; + case TP_else: + parse_else_directive(); + break; + case TP_endif: + parse_endif_directive(); + break; + default: + eat_pp_directive(); + break; + } + } else { + switch(pp_token.type) { + case TP_define: + parse_define_directive(); + break; + case TP_undef: + parse_undef_directive(); + break; + case TP_ifdef: + case TP_ifndef: + parse_ifdef_ifndef_directive(); + break; + case TP_else: + parse_else_directive(); + break; + case TP_endif: + parse_endif_directive(); + break; + case TP_include: { + bool in_new_source = parse_include_directive(); + /* no need to do anything if source file switched */ + if (in_new_source) + return; + break; + } + case '\n': + /* the nop directive */ + break; + default: + errorf(&pp_token.source_position, + "invalid preprocessing directive #%t", &pp_token); + eat_pp_directive(); + break; + } + } + + do_print_spaces = true; + do_expansions = true; /* eat '\n' */ assert(pp_token.type == '\n' || pp_token.type == TP_EOF); next_preprocessing_token(); } -int pptest_main(int argc, char **argv); - #define GCC_COMPAT_MODE +int pptest_main(int argc, char **argv); int pptest_main(int argc, char **argv) { init_symbol_table(); init_tokens(); obstack_init(&pp_obstack); + obstack_init(&input_obstack); - const char *infname = "t.c"; + const char *filename = "t.c"; if (argc > 1) - infname = argv[1]; - - input = fopen(infname, "r"); - assert(input != NULL); - input_position.input_name = infname; - input_position.linenr = 1; - - bufpos = NULL; - bufend = NULL; - counted_newlines = 0; - counted_spaces = 0; + filename = argv[1]; out = stdout; #ifdef GCC_COMPAT_MODE /* this is here so we can directly compare "gcc -E" output and our output */ - fprintf(out, "# 1 \"%s\"\n", input_position.input_name); + fprintf(out, "# 1 \"%s\"\n", filename); fputs("# 1 \"\"\n", out); fputs("# 1 \"\"\n", out); #endif - next_char(); - - next_preprocessing_token(); + bool ok = open_input(filename); + assert(ok); while(true) { /* we're at a line begin */ @@ -1350,10 +1805,10 @@ int pptest_main(int argc, char **argv) } end_of_main_loop: - if (counted_newlines > 0) { - fputc('\n', out); - } + check_unclosed_conditionals(); + close_input(); + obstack_free(&input_obstack, NULL); obstack_free(&pp_obstack, NULL); exit_tokens();