X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=preprocessor.c;h=5089a687b99040ffad00e2b2a0160769679d9c8e;hb=c4cbd607a4bc2655c08300e565250978713508ab;hp=ca342c0b33d5d0105d53a33a808b14e18dbae8cf;hpb=4b3a6b5235628a9c0c67831dbf3cfeeb7cc170b0;p=cparser diff --git a/preprocessor.c b/preprocessor.c index ca342c0..5089a68 100644 --- a/preprocessor.c +++ b/preprocessor.c @@ -18,18 +18,26 @@ #define MAX_PUTBACK 3 #define INCLUDE_LIMIT 199 /* 199 is for gcc "compatibility" */ +struct pp_argument_t { + size_t list_len; + token_t *token_list; +}; + struct pp_definition_t { symbol_t *symbol; source_position_t source_position; pp_definition_t *parent_expansion; size_t expand_pos; - bool is_variadic : 1; - bool is_expanding : 1; - bool has_arguments : 1; - size_t n_arguments; - symbol_t *arguments; + bool is_variadic : 1; + bool is_expanding : 1; + bool has_parameters : 1; + size_t n_parameters; + symbol_t *parameters; + + /* replacement */ size_t list_len; - token_t *replacement_list; + token_t *token_list; + }; typedef struct pp_conditional_t pp_conditional_t; @@ -77,7 +85,6 @@ static pp_definition_t *current_expansion = NULL; static inline void next_char(void); static void next_preprocessing_token(void); static void print_line_directive(const source_position_t *pos, const char *add); -static void print_spaces(void); static bool open_input(const char *filename) { @@ -90,7 +97,7 @@ static bool open_input(const char *filename) input.bufpos = NULL; input.had_non_space = false; input.position.input_name = filename; - input.position.linenr = 1; + input.position.lineno = 1; /* indicate that we're at a new input */ print_line_directive(&input.position, input_stack != NULL ? "1" : NULL); @@ -176,7 +183,7 @@ static inline void next_real_char(void) if (input.bufpos >= input.bufend) { size_t s = fread(input.buf + MAX_PUTBACK, 1, sizeof(input.buf) - MAX_PUTBACK, input.file); - if(s == 0) { + if (s == 0) { CC = EOF; return; } @@ -207,11 +214,11 @@ static inline void put_back(int pc) if(CC == '\n') { \ next_char(); \ } \ - ++input.position.linenr; \ + ++input.position.lineno; \ code \ case '\n': \ next_char(); \ - ++input.position.linenr; \ + ++input.position.lineno; \ code #define eat(c_type) do { assert(CC == c_type); next_char(); } while(0) @@ -307,7 +314,8 @@ static inline bool is_octal_digit(int chr) * Returns the value of a digit. * The only portable way to do it ... */ -static int digit_value(int digit) { +static int digit_value(int digit) +{ switch (digit) { case '0': return 0; case '1': return 1; @@ -421,7 +429,7 @@ static int parse_escape_sequence(void) static void parse_string_literal(void) { - const unsigned start_linenr = input.position.linenr; + const unsigned start_linenr = input.position.lineno; eat('"'); @@ -443,7 +451,7 @@ static void parse_string_literal(void) case EOF: { source_position_t source_position; source_position.input_name = pp_token.source_position.input_name; - source_position.linenr = start_linenr; + source_position.lineno = start_linenr; errorf(&source_position, "string has no end"); pp_token.type = TP_ERROR; return; @@ -476,9 +484,9 @@ end_of_string: const char *const result = string; #endif - pp_token.type = TP_STRING_LITERAL; - pp_token.v.string.begin = result; - pp_token.v.string.size = size; + pp_token.type = TP_STRING_LITERAL; + pp_token.literal.begin = result; + pp_token.literal.size = size; } static void parse_wide_character_constant(void) @@ -524,70 +532,9 @@ end_of_wide_char_constant: /* TODO... */ } -static void parse_wide_string_literal(void) -{ - const unsigned start_linenr = input.position.linenr; - - assert(CC == '"'); - next_char(); - - while(1) { - switch(CC) { - case '\\': { - wchar_rep_t tc = parse_escape_sequence(); - obstack_grow(&symbol_obstack, &tc, sizeof(tc)); - break; - } - - case EOF: { - source_position_t source_position; - source_position.input_name = pp_token.source_position.input_name; - source_position.linenr = start_linenr; - errorf(&source_position, "string has no end"); - pp_token.type = TP_ERROR; - return; - } - - case '"': - next_char(); - goto end_of_string; - - default: { - wchar_rep_t tc = CC; - obstack_grow(&symbol_obstack, &tc, sizeof(tc)); - next_char(); - break; - } - } - } - -end_of_string:; - /* add finishing 0 to the string */ - static const wchar_rep_t nul = L'\0'; - obstack_grow(&symbol_obstack, &nul, sizeof(nul)); - - const size_t size - = (size_t)obstack_object_size(&symbol_obstack) / sizeof(wchar_rep_t); - const wchar_rep_t *const string = obstack_finish(&symbol_obstack); - -#if 0 /* TODO hash */ - /* check if there is already a copy of the string */ - const wchar_rep_t *const result = strset_insert(&stringset, string); - if(result != string) { - obstack_free(&symbol_obstack, string); - } -#else - const wchar_rep_t *const result = string; -#endif - - pp_token.type = TP_WIDE_STRING_LITERAL; - pp_token.v.wide_string.begin = result; - pp_token.v.wide_string.size = size; -} - static void parse_character_constant(void) { - const unsigned start_linenr = input.position.linenr; + const unsigned start_linenr = input.position.lineno; eat('\''); @@ -607,7 +554,7 @@ static void parse_character_constant(void) case EOF: { source_position_t source_position; source_position.input_name = pp_token.source_position.input_name; - source_position.linenr = start_linenr; + source_position.lineno = start_linenr; errorf(&source_position, "EOF while parsing character constant"); pp_token.type = TP_ERROR; return; @@ -629,9 +576,9 @@ end_of_char_constant:; const size_t size = (size_t)obstack_object_size(&symbol_obstack); const char *const string = obstack_finish(&symbol_obstack); - pp_token.type = TP_CHARACTER_CONSTANT; - pp_token.v.string.begin = string; - pp_token.v.string.size = size; + pp_token.type = TP_CHARACTER_CONSTANT; + pp_token.literal.begin = string; + pp_token.literal.size = size; } #define SYMBOL_CHARS_WITHOUT_E_P \ @@ -732,14 +679,14 @@ restart: current_expansion = definition; goto restart; } - pp_token = definition->replacement_list[definition->expand_pos]; + pp_token = definition->token_list[definition->expand_pos]; ++definition->expand_pos; if(pp_token.type != TP_IDENTIFIER) return; /* if it was an identifier then we might need to expand again */ - pp_definition_t *symbol_definition = pp_token.v.symbol->pp_definition; + pp_definition_t *symbol_definition = pp_token.symbol->pp_definition; if(symbol_definition != NULL && !symbol_definition->is_expanding) { symbol_definition->parent_expansion = definition; symbol_definition->expand_pos = 0; @@ -750,6 +697,135 @@ restart: } } +static void skip_line_comment(void) +{ + if(do_print_spaces) + counted_spaces++; + + while(1) { + switch(CC) { + case EOF: + return; + + case '\n': + case '\r': + return; + + default: + next_char(); + break; + } + } +} + +static void skip_multiline_comment(void) +{ + if(do_print_spaces) + counted_spaces++; + + unsigned start_linenr = input.position.lineno; + while(1) { + switch(CC) { + case '/': + next_char(); + if (CC == '*') { + /* TODO: nested comment, warn here */ + } + break; + case '*': + next_char(); + if(CC == '/') { + next_char(); + return; + } + break; + + MATCH_NEWLINE( + if(do_print_spaces) { + counted_newlines++; + counted_spaces = 0; + } + break; + ) + + case EOF: { + source_position_t source_position; + source_position.input_name = pp_token.source_position.input_name; + source_position.lineno = start_linenr; + errorf(&source_position, "at end of file while looking for comment end"); + return; + } + + default: + next_char(); + break; + } + } +} + +/* skip spaces advancing at the start of the next preprocessing token */ +static void skip_spaces(bool skip_newline) +{ + while (true) { + switch (CC) { + case ' ': + case '\t': + if (do_print_spaces) + counted_spaces++; + next_char(); + continue; + case '/': + next_char(); + if (CC == '/') { + next_char(); + skip_line_comment(); + continue; + } else if (CC == '*') { + next_char(); + skip_multiline_comment(); + continue; + } else { + put_back(CC); + CC = '/'; + } + return; + + case '\r': + if (!skip_newline) + return; + + next_char(); + if(CC == '\n') { + next_char(); + } + ++input.position.lineno; + if (do_print_spaces) + ++counted_newlines; + continue; + + case '\n': + if (!skip_newline) + return; + + next_char(); + ++input.position.lineno; + if (do_print_spaces) + ++counted_newlines; + continue; + + default: + return; + } + } +} + +static void eat_pp(int type) +{ + (void) type; + assert(pp_token.type == type); + next_preprocessing_token(); +} + static void parse_symbol(void) { obstack_1grow(&symbol_obstack, (char) CC); @@ -773,11 +849,11 @@ end_symbol: char *string = obstack_finish(&symbol_obstack); /* might be a wide string or character constant ( L"string"/L'c' ) */ - if(CC == '"' && string[0] == 'L' && string[1] == '\0') { + if (CC == '"' && string[0] == 'L' && string[1] == '\0') { obstack_free(&symbol_obstack, string); - parse_wide_string_literal(); + /* TODO */ return; - } else if(CC == '\'' && string[0] == 'L' && string[1] == '\0') { + } else if (CC == '\'' && string[0] == 'L' && string[1] == '\0') { obstack_free(&symbol_obstack, string); parse_wide_character_constant(); return; @@ -785,22 +861,39 @@ end_symbol: symbol_t *symbol = symbol_table_insert(string); - pp_token.type = symbol->pp_ID; - pp_token.v.symbol = symbol; + pp_token.type = symbol->pp_ID; + pp_token.symbol = symbol; /* we can free the memory from symbol obstack if we already had an entry in * the symbol table */ - if(symbol->string != string) { + if (symbol->string != string) { obstack_free(&symbol_obstack, string); } + if (!do_expansions) + return; pp_definition_t *pp_definition = symbol->pp_definition; - if(do_expansions && pp_definition != NULL) { - pp_definition->expand_pos = 0; - pp_definition->is_expanding = true, - current_expansion = pp_definition; - expand_next(); + if (pp_definition == NULL) + return; + + if (pp_definition->has_parameters) { + skip_spaces(true); + /* no opening brace -> no expansion */ + if (CC != '(') + return; + next_preprocessing_token(); + eat_pp('('); + + /* parse arguments (TODO) */ + while (pp_token.type != TP_EOF && pp_token.type != ')') + next_preprocessing_token(); + next_preprocessing_token(); } + + pp_definition->expand_pos = 0; + pp_definition->is_expanding = true, + current_expansion = pp_definition; + expand_next(); } static void parse_number(void) @@ -839,79 +932,12 @@ end_number: size_t size = obstack_object_size(&symbol_obstack); char *string = obstack_finish(&symbol_obstack); - pp_token.type = TP_NUMBER; - pp_token.v.string.begin = string; - pp_token.v.string.size = size; -} - -static void skip_multiline_comment(void) -{ - if(do_print_spaces) - counted_spaces++; - - unsigned start_linenr = input.position.linenr; - while(1) { - switch(CC) { - case '/': - next_char(); - if (CC == '*') { - /* TODO: nested comment, warn here */ - } - break; - case '*': - next_char(); - if(CC == '/') { - next_char(); - return; - } - break; - - MATCH_NEWLINE( - if(do_print_spaces) { - counted_newlines++; - counted_spaces = 0; - } - break; - ) - - case EOF: { - source_position_t source_position; - source_position.input_name = pp_token.source_position.input_name; - source_position.linenr = start_linenr; - errorf(&source_position, "at end of file while looking for comment end"); - return; - } - - default: - next_char(); - break; - } - } -} - -static void skip_line_comment(void) -{ - if(do_print_spaces) - counted_spaces++; - - while(1) { - switch(CC) { - case EOF: - return; - - case '\n': - case '\r': - return; - - default: - next_char(); - break; - } - } + pp_token.type = TP_NUMBER; + pp_token.literal.begin = string; + pp_token.literal.size = size; } - #define MAYBE_PROLOG \ next_char(); \ while(1) { \ @@ -949,9 +975,9 @@ restart: switch(CC) { case ' ': case '\t': - if(do_print_spaces) + if (do_print_spaces) counted_spaces++; - next_char(); + next_char(); goto restart; MATCH_NEWLINE( @@ -1163,7 +1189,7 @@ static void print_quoted_string(const char *const string) static void print_line_directive(const source_position_t *pos, const char *add) { - fprintf(out, "# %d ", pos->linenr); + fprintf(out, "# %u ", pos->lineno); print_quoted_string(pos->input_name); if (add != NULL) { fputc(' ', out); @@ -1176,7 +1202,7 @@ static void print_line_directive(const source_position_t *pos, const char *add) static void print_spaces(void) { - if (counted_newlines >= 8) { + if (counted_newlines >= 9) { if (input.had_non_space) { fputc('\n', out); } @@ -1204,14 +1230,14 @@ static void emit_pp_token(void) switch(pp_token.type) { case TP_IDENTIFIER: - fputs(pp_token.v.symbol->string, out); + fputs(pp_token.symbol->string, out); break; case TP_NUMBER: - fputs(pp_token.v.string.begin, out); + fputs(pp_token.literal.begin, out); break; case TP_STRING_LITERAL: fputc('"', out); - fputs(pp_token.v.string.begin, out); + fputs(pp_token.literal.begin, out); fputc('"', out); break; case '\n': @@ -1222,13 +1248,6 @@ static void emit_pp_token(void) } } -static void eat_pp(preprocessor_token_type_t type) -{ - (void) type; - assert(pp_token.type == type); - next_preprocessing_token(); -} - static void eat_pp_directive(void) { while(pp_token.type != '\n' && pp_token.type != TP_EOF) { @@ -1251,22 +1270,6 @@ static bool strings_equal(const string_t *string1, const string_t *string2) return true; } -static bool wide_strings_equal(const wide_string_t *string1, - const wide_string_t *string2) -{ - size_t size = string1->size; - if(size != string2->size) - return false; - - const wchar_rep_t *c1 = string1->begin; - const wchar_rep_t *c2 = string2->begin; - for(size_t i = 0; i < size; ++i, ++c1, ++c2) { - if(*c1 != *c2) - return false; - } - return true; -} - static bool pp_tokens_equal(const token_t *token1, const token_t *token2) { if(token1->type != token2->type) @@ -1277,16 +1280,12 @@ static bool pp_tokens_equal(const token_t *token1, const token_t *token2) /* TODO */ return false; case TP_IDENTIFIER: - return token1->v.symbol == token2->v.symbol; + return token1->symbol == token2->symbol; case TP_NUMBER: case TP_CHARACTER_CONSTANT: case TP_STRING_LITERAL: - return strings_equal(&token1->v.string, &token2->v.string); + return strings_equal(&token1->literal, &token2->literal); - case TP_WIDE_CHARACTER_CONSTANT: - case TP_WIDE_STRING_LITERAL: - return wide_strings_equal(&token1->v.wide_string, - &token2->v.wide_string); default: return true; } @@ -1299,8 +1298,8 @@ static bool pp_definitions_equal(const pp_definition_t *definition1, return false; size_t len = definition1->list_len; - const token_t *t1 = definition1->replacement_list; - const token_t *t2 = definition2->replacement_list; + const token_t *t1 = definition1->token_list; + const token_t *t2 = definition2->token_list; for(size_t i = 0; i < len; ++i, ++t1, ++t2) { if(!pp_tokens_equal(t1, t2)) return false; @@ -1318,7 +1317,7 @@ static void parse_define_directive(void) "expected identifier after #define, got '%t'", &pp_token); goto error_out; } - symbol_t *symbol = pp_token.v.symbol; + symbol_t *symbol = pp_token.symbol; pp_definition_t *new_definition = obstack_alloc(&pp_obstack, sizeof(new_definition[0])); @@ -1346,7 +1345,7 @@ static void parse_define_directive(void) } break; case TP_IDENTIFIER: - obstack_ptr_grow(&pp_obstack, pp_token.v.symbol); + obstack_ptr_grow(&pp_obstack, pp_token.symbol); next_preprocessing_token(); if (pp_token.type == ',') { @@ -1362,6 +1361,7 @@ static void parse_define_directive(void) } break; case ')': + next_preprocessing_token(); goto finish_argument_list; default: errorf(&pp_token.source_position, @@ -1372,10 +1372,10 @@ static void parse_define_directive(void) } finish_argument_list: - new_definition->has_arguments = true; - new_definition->n_arguments - = obstack_object_size(&pp_obstack) / sizeof(new_definition->arguments[0]); - new_definition->arguments = obstack_finish(&pp_obstack); + new_definition->has_parameters = true; + new_definition->n_parameters + = obstack_object_size(&pp_obstack) / sizeof(new_definition->parameters[0]); + new_definition->parameters = obstack_finish(&pp_obstack); } else { next_preprocessing_token(); } @@ -1389,14 +1389,14 @@ static void parse_define_directive(void) next_preprocessing_token(); } - new_definition->list_len = list_len; - new_definition->replacement_list = obstack_finish(&pp_obstack); + new_definition->list_len = list_len; + new_definition->token_list = obstack_finish(&pp_obstack); pp_definition_t *old_definition = symbol->pp_definition; if (old_definition != NULL) { if (!pp_definitions_equal(old_definition, new_definition)) { warningf(&input.position, "multiple definition of macro '%Y' (first defined %P)", - symbol, &old_definition->source_position); + symbol, &old_definition->source_position); } else { /* reuse the old definition */ obstack_free(&pp_obstack, new_definition); @@ -1426,7 +1426,7 @@ static void parse_undef_directive(void) return; } - symbol_t *symbol = pp_token.v.symbol; + symbol_t *symbol = pp_token.symbol; symbol->pp_definition = NULL; next_preprocessing_token(); @@ -1437,38 +1437,6 @@ static void parse_undef_directive(void) eat_pp_directive(); } -/* skip spaces advancing at the start of the next preprocessing token */ -static void skip_spaces(void) -{ - while (true) { - switch (CC) { - case ' ': - case '\t': - if(do_print_spaces) - counted_spaces++; - next_char(); - continue; - case '/': - next_char(); - if (CC == '/') { - next_char(); - skip_line_comment(); - continue; - } else if (CC == '*') { - next_char(); - skip_multiline_comment(); - continue; - } else { - put_back(CC); - CC = '/'; - } - return; - default: - return; - } - } -} - static const char *parse_headername(void) { /* behind an #include we can have the special headername lexems. @@ -1477,7 +1445,7 @@ static const char *parse_headername(void) * exception here */ /* skip spaces so we reach start of next preprocessing token */ - skip_spaces(); + skip_spaces(false); assert(obstack_object_size(&input_obstack) == 0); @@ -1646,7 +1614,7 @@ static void parse_ifdef_ifndef_directive(void) /* just take the true case in the hope to avoid further errors */ condition = true; } else { - symbol_t *symbol = pp_token.v.symbol; + symbol_t *symbol = pp_token.symbol; pp_definition_t *pp_definition = symbol->pp_definition; next_preprocessing_token();