From 4eade33d45008f90c8952a65a92cdda17d9598f9 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Fri, 28 Sep 2007 13:02:52 +0000 Subject: [PATCH] More work on cparser: - cleanup some lexer code - allow some usual non-c99 constructs (implicit function, implicit int type) - parse float constants [r18361] --- ast.c | 7 ++- ast_t.h | 7 ++- lexer.c | 87 ++++++++++++++++++---------------- main.c | 10 +++- parser.c | 124 ++++++++++++++++++++++++++++++++++--------------- type.c | 2 +- write_fluffy.c | 7 ++- 7 files changed, 158 insertions(+), 86 deletions(-) diff --git a/ast.c b/ast.c index 6590f57..f4ba2a4 100644 --- a/ast.c +++ b/ast.c @@ -26,7 +26,12 @@ static void print_indent(void) static void print_const(const const_t *cnst) { - fprintf(out, "%d", cnst->value); + if(cnst->expression.datatype == NULL) + return; + + if(is_type_integer(cnst->expression.datatype)) { + fprintf(out, "%d", cnst->v.int_value); + } } static void print_string_literal(const string_literal_t *string_literal) diff --git a/ast_t.h b/ast_t.h index fadd270..99340b3 100644 --- a/ast_t.h +++ b/ast_t.h @@ -44,7 +44,10 @@ struct expression_t { struct const_t { expression_t expression; - int value; + union { + int int_value; + long double float_value; + } v; }; struct string_literal_t { @@ -53,8 +56,8 @@ struct string_literal_t { }; struct builtin_symbol_expression_t { - symbol_t *symbol; expression_t expression; + symbol_t *symbol; }; struct reference_expression_t { diff --git a/lexer.c b/lexer.c index 2a6cc2c..2b4897a 100644 --- a/lexer.c +++ b/lexer.c @@ -12,7 +12,7 @@ #include #include -//#define DEBUG_CHARS +#define DEBUG_CHARS #define MAX_PUTBACK 3 static int c; @@ -85,9 +85,16 @@ static inline void next_char(void); lexer_token.source_position.linenr++; \ code; -static void maybe_concat_lines(void) +static inline void eat(char c_type) { + assert(c == c_type); next_char(); +} + +static void maybe_concat_lines(void) +{ + eat('\\'); + switch(c) { MATCH_NEWLINE(return;) @@ -481,40 +488,40 @@ static int parse_hex_sequence(void) static int parse_escape_sequence(void) { - while(1) { - int ec = c; - next_char(); + eat('\\'); - switch(ec) { - case '"': return '"'; - case '\'': return'\''; - case '\\': return '\\'; - case '?': return '\?'; - case 'a': return '\a'; - case 'b': return '\b'; - case 'f': return '\f'; - case 'n': return '\n'; - case 'r': return '\r'; - case 't': return '\t'; - case 'v': return '\v'; - case 'x': - return parse_hex_sequence(); - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - return parse_octal_sequence(); - case EOF: - parse_error("reached end of file while parsing escape sequence"); - return EOF; - default: - parse_error("unknown escape sequence"); - return EOF; - } + int ec = c; + next_char(); + + switch(ec) { + case '"': return '"'; + case '\'': return'\''; + case '\\': return '\\'; + case '?': return '\?'; + case 'a': return '\a'; + case 'b': return '\b'; + case 'f': return '\f'; + case 'n': return '\n'; + case 'r': return '\r'; + case 't': return '\t'; + case 'v': return '\v'; + case 'x': + return parse_hex_sequence(); + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + return parse_octal_sequence(); + case EOF: + parse_error("reached end of file while parsing escape sequence"); + return EOF; + default: + parse_error("unknown escape sequence"); + return EOF; } } @@ -544,12 +551,12 @@ static void parse_string_literal(void) assert(c == '"'); next_char(); + int tc; while(1) { switch(c) { case '\\': - next_char(); - int ec = parse_escape_sequence(); - obstack_1grow(&symbol_obstack, ec); + tc = parse_escape_sequence(); + obstack_1grow(&symbol_obstack, tc); break; case EOF: @@ -590,14 +597,12 @@ end_of_string: static void parse_character_constant(void) { - assert(c == '\''); - next_char(); + eat('\''); int found_char = 0; while(1) { switch(c) { case '\\': - next_char(); found_char = parse_escape_sequence(); break; diff --git a/main.c b/main.c index 58e0601..09dd39c 100644 --- a/main.c +++ b/main.c @@ -98,9 +98,15 @@ int main(int argc, char **argv) return 0; } - for(int i = 1; i < argc; ++i) { - translation_unit_t *unit = do_parsing(argv[i]); + if(argc > 2 && strcmp(argv[1], "--print-fluffy") == 0) { + translation_unit_t *unit = do_parsing(argv[2]); + ast_set_output(stdout); write_fluffy_decls(unit); + return 0; + } + + for(int i = 1; i < argc; ++i) { + do_parsing(argv[i]); } exit_parser(); diff --git a/parser.c b/parser.c index c1f162b..879b620 100644 --- a/parser.c +++ b/parser.c @@ -14,9 +14,10 @@ #include "adt/error.h" #include "adt/array.h" -//#define PRINT_TOKENS +#define PRINT_TOKENS //#define ABORT_ON_ERROR #define MAX_LOOKAHEAD 2 +//#define STRICT_C99 struct environment_entry_t { symbol_t *symbol; @@ -151,20 +152,32 @@ static inline void eat(token_type_t type) next_token(); } -void parser_print_error_prefix_pos(const source_position_t source_position) +void error(void) { - fputs(source_position.input_name, stderr); - fputc(':', stderr); - fprintf(stderr, "%d", source_position.linenr); - fputs(": error: ", stderr); #ifdef ABORT_ON_ERROR abort(); #endif } +void parser_print_prefix_pos(const source_position_t source_position) +{ + fputs(source_position.input_name, stderr); + fputc(':', stderr); + fprintf(stderr, "%d", source_position.linenr); + fputs(": ", stderr); +} + +void parser_print_error_prefix_pos(const source_position_t source_position) +{ + parser_print_prefix_pos(source_position); + fputs("error: ", stderr); + error(); +} + void parser_print_error_prefix(void) { - parser_print_error_prefix_pos(token.source_position); + parser_print_prefix_pos(token.source_position); + error(); } static void parse_error(const char *message) @@ -173,6 +186,13 @@ static void parse_error(const char *message) fprintf(stderr, "parse error: %s\n", message); } +__attribute__((unused)) +static void parse_warning(const char *message) +{ + parser_print_prefix_pos(token.source_position); + fprintf(stderr, "warning: %s\n", message); +} + static void parse_error_expected(const char *message, ...) { va_list args; @@ -243,6 +263,13 @@ static void eat_brace(void) while(token.type != ')') { if(token.type == T_EOF) return; + if(token.type == ')' || token.type == ';' || token.type == '}') { + return; + } + if(token.type == '(') { + eat_brace(); + continue; + } if(token.type == '{') { eat_block(); continue; @@ -313,6 +340,7 @@ static inline declaration_t *environment_push(declaration_t *declaration, parser_print_error_prefix_pos(declaration->source_position); fprintf(stderr, "definition of symbol '%s' with type ", declaration->symbol->string); + error(); print_type(declaration->type); fputc('\n', stderr); parser_print_error_prefix_pos( @@ -946,7 +974,13 @@ finish_specifiers: default: /* invalid specifier combination, give an error message */ if(type_specifiers == 0) { +#ifndef STRICT_C99 + parse_warning("no type specifiers in declaration (using int)"); + atomic_type = ATOMIC_TYPE_INT; + break; +#else parse_error("no type specifiers given in declaration"); +#endif } else if((type_specifiers & SPECIFIER_SIGNED) && (type_specifiers & SPECIFIER_UNSIGNED)) { parse_error("signed and unsigned specifiers gives"); @@ -1276,28 +1310,6 @@ declarator_finished: return result; } -#if 0 -static type_t *make_pointers(type_t *type, parsed_pointer_t *pointer) -{ - for( ; pointer != NULL; pointer = pointer->next) { - pointer_type_t *pointer_type - = allocate_type_zero(sizeof(pointer_type[0])); - pointer_type->type.type = TYPE_POINTER; - pointer_type->points_to = type; - pointer_type->type.qualifiers = pointer->type_qualifiers; - - type_t *result = typehash_insert((type_t*) pointer_type); - if(result != (type_t*) pointer_type) { - obstack_free(type_obst, pointer_type); - } - - type = result; - } - - return type; -} -#endif - static type_t *construct_declarator_type(construct_type_t *construct_list, type_t *type) { @@ -1412,6 +1424,7 @@ static void parser_error_multiple_definition(declaration_t *previous, parser_print_error_prefix_pos(previous->source_position); fprintf(stderr, "this is the location of the previous " "definition.\n"); + error(); } static void parse_init_declarators(const declaration_specifiers_t *specifiers) @@ -1608,7 +1621,20 @@ static expression_t *parse_int_const(void) cnst->expression.type = EXPR_CONST; cnst->expression.datatype = type_int; - cnst->value = token.v.intvalue; + cnst->v.int_value = token.v.intvalue; + + next_token(); + + return (expression_t*) cnst; +} + +static expression_t *parse_float_const(void) +{ + const_t *cnst = allocate_ast_zero(sizeof(cnst[0])); + + cnst->expression.type = EXPR_CONST; + cnst->expression.datatype = type_int; + cnst->v.float_value = token.v.floatvalue; next_token(); @@ -1622,14 +1648,28 @@ static expression_t *parse_reference(void) ref->expression.type = EXPR_REFERENCE; ref->symbol = token.v.symbol; - if(ref->symbol->declaration == NULL) { + declaration_t *declaration = ref->symbol->declaration; + next_token(); + + if(declaration == NULL) { +#ifndef STRICT_C99 + /* is it an implicitely defined function */ + if(token.type == '(') { + parser_print_prefix_pos(token.source_position); + fprintf(stderr, "warning: implicit declaration of function '%s'\n", + ref->symbol->string); + /* TODO: do this correctly */ + return (expression_t*) ref; + } +#endif + parser_print_error_prefix(); fprintf(stderr, "unknown symbol '%s' found.\n", ref->symbol->string); + } else { + ref->declaration = declaration; + ref->expression.datatype = declaration->type; } - ref->declaration = ref->symbol->declaration; - ref->expression.datatype = ref->declaration->type; - next_token(); return (expression_t*) ref; } @@ -1834,6 +1874,8 @@ static expression_t *parse_primary_expression(void) switch(token.type) { case T_INTEGER: return parse_int_const(); + case T_FLOATINGPOINT: + return parse_float_const(); case T_STRING_LITERAL: return parse_string_const(); case T_IDENTIFIER: @@ -1859,7 +1901,12 @@ static expression_t *parse_primary_expression(void) print_token(stderr, &token); fprintf(stderr, "\n"); eat_statement(); - return NULL; + + expression_t *expression = allocate_ast_zero(sizeof(expression[0])); + expression->type = EXPR_INVALID; + expression->datatype = type_void; + + return expression; } static expression_t *parse_array_expression(unsigned precedence, @@ -1891,7 +1938,7 @@ static expression_t *parse_array_expression(unsigned precedence, if(token.type != ']') { parse_error_expected("Problem while parsing array access", ']', 0); - return NULL; + return (expression_t*) array_access; } next_token(); @@ -1963,7 +2010,7 @@ static expression_t *parse_select_expression(unsigned precedence, if(token.type != T_IDENTIFIER) { parse_error_expected("Problem while parsing select", T_IDENTIFIER, 0); - return NULL; + return (expression_t*) select; } select->symbol = token.v.symbol; next_token(); @@ -2033,6 +2080,7 @@ static void type_error(const char *msg, const source_position_t source_position, fprintf(stderr, "%s, but found type ", msg); print_type(type); fputc('\n', stderr); + error(); } static void type_error_incompatible(const char *msg, @@ -2045,6 +2093,7 @@ static void type_error_incompatible(const char *msg, fprintf(stderr, " - "); print_type(type2); fprintf(stderr, ")\n"); + error(); } static type_t *get_type_after_conversion(const type_t *type1, @@ -2231,7 +2280,6 @@ static expression_t *parse_sub_expression(unsigned precedence) left = parse_primary_expression(); } assert(left != NULL); - assert(left->type != EXPR_INVALID); left->source_position = source_position; while(true) { diff --git a/type.c b/type.c index 1d5192c..5bb7a5f 100644 --- a/type.c +++ b/type.c @@ -258,7 +258,7 @@ bool type_valid(const type_t *type) return type->type != TYPE_INVALID; } -bool is_type_int(const type_t *type) +bool is_type_integer(const type_t *type) { if(type->type != TYPE_ATOMIC) return 0; diff --git a/write_fluffy.c b/write_fluffy.c index 651bcff..e9db530 100644 --- a/write_fluffy.c +++ b/write_fluffy.c @@ -5,6 +5,7 @@ #include "ast_t.h" #include "type_t.h" +#include "type.h" #include "adt/error.h" static const context_t *global_context; @@ -216,7 +217,11 @@ static void write_expression(const expression_t *expression) switch(expression->type) { case EXPR_CONST: constant = (const const_t*) expression; - fprintf(out, "%d", constant->value); + if(is_type_integer(expression->datatype)) { + fprintf(out, "%d", constant->v.int_value); + } else { + fprintf(out, "%Lf", constant->v.float_value); + } break; case EXPR_UNARY: write_unary_expression((const unary_expression_t*) expression); -- 2.20.1