More work on cparser:
authorMatthias Braun <matze@braunis.de>
Fri, 28 Sep 2007 13:02:52 +0000 (13:02 +0000)
committerMatthias Braun <matze@braunis.de>
Fri, 28 Sep 2007 13:02:52 +0000 (13:02 +0000)
- cleanup some lexer code
- allow some usual non-c99 constructs (implicit function, implicit int type)
- parse float constants

[r18361]

ast.c
ast_t.h
lexer.c
main.c
parser.c
type.c
write_fluffy.c

diff --git a/ast.c b/ast.c
index 6590f57..f4ba2a4 100644 (file)
--- a/ast.c
+++ b/ast.c
@@ -26,7 +26,12 @@ static void print_indent(void)
 
 static void print_const(const const_t *cnst)
 {
-       fprintf(out, "%d", cnst->value);
+       if(cnst->expression.datatype == NULL)
+               return;
+
+       if(is_type_integer(cnst->expression.datatype)) {
+               fprintf(out, "%d", cnst->v.int_value);
+       }
 }
 
 static void print_string_literal(const string_literal_t *string_literal)
diff --git a/ast_t.h b/ast_t.h
index fadd270..99340b3 100644 (file)
--- a/ast_t.h
+++ b/ast_t.h
@@ -44,7 +44,10 @@ struct expression_t {
 
 struct const_t {
        expression_t  expression;
-       int           value;
+       union {
+               int         int_value;
+               long double float_value;
+       } v;
 };
 
 struct string_literal_t {
@@ -53,8 +56,8 @@ struct string_literal_t {
 };
 
 struct builtin_symbol_expression_t {
-       symbol_t     *symbol;
        expression_t  expression;
+       symbol_t     *symbol;
 };
 
 struct reference_expression_t {
diff --git a/lexer.c b/lexer.c
index 2a6cc2c..2b4897a 100644 (file)
--- a/lexer.c
+++ b/lexer.c
@@ -12,7 +12,7 @@
 #include <string.h>
 #include <ctype.h>
 
-//#define DEBUG_CHARS
+#define DEBUG_CHARS
 #define MAX_PUTBACK 3
 
 static int         c;
@@ -85,9 +85,16 @@ static inline void next_char(void);
                lexer_token.source_position.linenr++; \
                code;
 
-static void maybe_concat_lines(void)
+static inline void eat(char c_type)
 {
+       assert(c == c_type);
        next_char();
+}
+
+static void maybe_concat_lines(void)
+{
+       eat('\\');
+
        switch(c) {
        MATCH_NEWLINE(return;)
 
@@ -481,40 +488,40 @@ static int parse_hex_sequence(void)
 
 static int parse_escape_sequence(void)
 {
-       while(1) {
-               int ec = c;
-               next_char();
+       eat('\\');
 
-               switch(ec) {
-               case '"':  return '"';
-               case '\'': return'\'';
-               case '\\': return '\\';
-               case '?': return '\?';
-               case 'a': return '\a';
-               case 'b': return '\b';
-               case 'f': return '\f';
-               case 'n': return '\n';
-               case 'r': return '\r';
-               case 't': return '\t';
-               case 'v': return '\v';
-               case 'x':
-                       return parse_hex_sequence();
-               case '0':
-               case '1':
-               case '2':
-               case '3':
-               case '4':
-               case '5':
-               case '6':
-               case '7':
-                       return parse_octal_sequence();
-               case EOF:
-                       parse_error("reached end of file while parsing escape sequence");
-                       return EOF;
-               default:
-                       parse_error("unknown escape sequence");
-                       return EOF;
-               }
+       int ec = c;
+       next_char();
+
+       switch(ec) {
+       case '"':  return '"';
+       case '\'': return'\'';
+       case '\\': return '\\';
+       case '?': return '\?';
+       case 'a': return '\a';
+       case 'b': return '\b';
+       case 'f': return '\f';
+       case 'n': return '\n';
+       case 'r': return '\r';
+       case 't': return '\t';
+       case 'v': return '\v';
+       case 'x':
+               return parse_hex_sequence();
+       case '0':
+       case '1':
+       case '2':
+       case '3':
+       case '4':
+       case '5':
+       case '6':
+       case '7':
+               return parse_octal_sequence();
+       case EOF:
+               parse_error("reached end of file while parsing escape sequence");
+               return EOF;
+       default:
+               parse_error("unknown escape sequence");
+               return EOF;
        }
 }
 
@@ -544,12 +551,12 @@ static void parse_string_literal(void)
        assert(c == '"');
        next_char();
 
+       int tc;
        while(1) {
                switch(c) {
                case '\\':
-                       next_char();
-                       int ec = parse_escape_sequence();
-                       obstack_1grow(&symbol_obstack, ec);
+                       tc = parse_escape_sequence();
+                       obstack_1grow(&symbol_obstack, tc);
                        break;
 
                case EOF:
@@ -590,14 +597,12 @@ end_of_string:
 
 static void parse_character_constant(void)
 {
-       assert(c == '\'');
-       next_char();
+       eat('\'');
 
        int found_char = 0;
        while(1) {
                switch(c) {
                case '\\':
-                       next_char();
                        found_char = parse_escape_sequence();
                        break;
 
diff --git a/main.c b/main.c
index 58e0601..09dd39c 100644 (file)
--- a/main.c
+++ b/main.c
@@ -98,9 +98,15 @@ int main(int argc, char **argv)
                return 0;
        }
 
-       for(int i = 1; i < argc; ++i) {
-               translation_unit_t *unit = do_parsing(argv[i]);
+       if(argc > 2 && strcmp(argv[1], "--print-fluffy") == 0) {
+               translation_unit_t *unit = do_parsing(argv[2]);
+               ast_set_output(stdout);
                write_fluffy_decls(unit);
+               return 0;
+       }
+
+       for(int i = 1; i < argc; ++i) {
+               do_parsing(argv[i]);
        }
 
        exit_parser();
index c1f162b..879b620 100644 (file)
--- a/parser.c
+++ b/parser.c
 #include "adt/error.h"
 #include "adt/array.h"
 
-//#define PRINT_TOKENS
+#define PRINT_TOKENS
 //#define ABORT_ON_ERROR
 #define MAX_LOOKAHEAD 2
+//#define STRICT_C99
 
 struct environment_entry_t {
        symbol_t      *symbol;
@@ -151,20 +152,32 @@ static inline void eat(token_type_t type)
        next_token();
 }
 
-void parser_print_error_prefix_pos(const source_position_t source_position)
+void error(void)
 {
-    fputs(source_position.input_name, stderr);
-    fputc(':', stderr);
-    fprintf(stderr, "%d", source_position.linenr);
-    fputs(": error: ", stderr);
 #ifdef ABORT_ON_ERROR
        abort();
 #endif
 }
 
+void parser_print_prefix_pos(const source_position_t source_position)
+{
+    fputs(source_position.input_name, stderr);
+    fputc(':', stderr);
+    fprintf(stderr, "%d", source_position.linenr);
+    fputs(": ", stderr);
+}
+
+void parser_print_error_prefix_pos(const source_position_t source_position)
+{
+       parser_print_prefix_pos(source_position);
+       fputs("error: ", stderr);
+       error();
+}
+
 void parser_print_error_prefix(void)
 {
-       parser_print_error_prefix_pos(token.source_position);
+       parser_print_prefix_pos(token.source_position);
+       error();
 }
 
 static void parse_error(const char *message)
@@ -173,6 +186,13 @@ static void parse_error(const char *message)
        fprintf(stderr, "parse error: %s\n", message);
 }
 
+__attribute__((unused))
+static void parse_warning(const char *message)
+{
+       parser_print_prefix_pos(token.source_position);
+       fprintf(stderr, "warning: %s\n", message);
+}
+
 static void parse_error_expected(const char *message, ...)
 {
        va_list args;
@@ -243,6 +263,13 @@ static void eat_brace(void)
        while(token.type != ')') {
                if(token.type == T_EOF)
                        return;
+               if(token.type == ')' || token.type == ';' || token.type == '}') {
+                       return;
+               }
+               if(token.type == '(') {
+                       eat_brace();
+                       continue;
+               }
                if(token.type == '{') {
                        eat_block();
                        continue;
@@ -313,6 +340,7 @@ static inline declaration_t *environment_push(declaration_t *declaration,
                                parser_print_error_prefix_pos(declaration->source_position);
                                fprintf(stderr, "definition of symbol '%s' with type ",
                                        declaration->symbol->string);
+                               error();
                                print_type(declaration->type);
                                fputc('\n', stderr);
                                parser_print_error_prefix_pos(
@@ -946,7 +974,13 @@ finish_specifiers:
                default:
                        /* invalid specifier combination, give an error message */
                        if(type_specifiers == 0) {
+#ifndef STRICT_C99
+                               parse_warning("no type specifiers in declaration (using int)");
+                               atomic_type = ATOMIC_TYPE_INT;
+                               break;
+#else
                                parse_error("no type specifiers given in declaration");
+#endif
                        } else if((type_specifiers & SPECIFIER_SIGNED) &&
                                  (type_specifiers & SPECIFIER_UNSIGNED)) {
                                parse_error("signed and unsigned specifiers gives");
@@ -1276,28 +1310,6 @@ declarator_finished:
        return result;
 }
 
-#if 0
-static type_t *make_pointers(type_t *type, parsed_pointer_t *pointer)
-{
-       for( ; pointer != NULL; pointer = pointer->next) {
-               pointer_type_t *pointer_type
-                       = allocate_type_zero(sizeof(pointer_type[0]));
-               pointer_type->type.type       = TYPE_POINTER;
-               pointer_type->points_to       = type;
-               pointer_type->type.qualifiers = pointer->type_qualifiers;
-
-               type_t *result = typehash_insert((type_t*) pointer_type);
-               if(result != (type_t*) pointer_type) {
-                       obstack_free(type_obst, pointer_type);
-               }
-
-               type = result;
-       }
-
-       return type;
-}
-#endif
-
 static type_t *construct_declarator_type(construct_type_t *construct_list,
                                          type_t *type)
 {
@@ -1412,6 +1424,7 @@ static void parser_error_multiple_definition(declaration_t *previous,
        parser_print_error_prefix_pos(previous->source_position);
        fprintf(stderr, "this is the location of the previous "
                "definition.\n");
+       error();
 }
 
 static void parse_init_declarators(const declaration_specifiers_t *specifiers)
@@ -1608,7 +1621,20 @@ static expression_t *parse_int_const(void)
 
        cnst->expression.type     = EXPR_CONST;
        cnst->expression.datatype = type_int;
-       cnst->value               = token.v.intvalue;
+       cnst->v.int_value         = token.v.intvalue;
+
+       next_token();
+
+       return (expression_t*) cnst;
+}
+
+static expression_t *parse_float_const(void)
+{
+       const_t *cnst = allocate_ast_zero(sizeof(cnst[0]));
+
+       cnst->expression.type     = EXPR_CONST;
+       cnst->expression.datatype = type_int;
+       cnst->v.float_value       = token.v.floatvalue;
 
        next_token();
 
@@ -1622,14 +1648,28 @@ static expression_t *parse_reference(void)
        ref->expression.type = EXPR_REFERENCE;
        ref->symbol          = token.v.symbol;
 
-       if(ref->symbol->declaration == NULL) {
+       declaration_t *declaration = ref->symbol->declaration;
+       next_token();
+
+       if(declaration == NULL) {
+#ifndef STRICT_C99
+               /* is it an implicitely defined function */
+               if(token.type == '(') {
+                       parser_print_prefix_pos(token.source_position);
+                       fprintf(stderr, "warning: implicit declaration of function '%s'\n",
+                               ref->symbol->string);
+                       /* TODO: do this correctly */
+                       return (expression_t*) ref;
+               }
+#endif
+
                parser_print_error_prefix();
                fprintf(stderr, "unknown symbol '%s' found.\n", ref->symbol->string);
+       } else {
+               ref->declaration         = declaration;
+               ref->expression.datatype = declaration->type;
        }
-       ref->declaration         = ref->symbol->declaration;
-       ref->expression.datatype = ref->declaration->type;
 
-       next_token();
 
        return (expression_t*) ref;
 }
@@ -1834,6 +1874,8 @@ static expression_t *parse_primary_expression(void)
        switch(token.type) {
        case T_INTEGER:
                return parse_int_const();
+       case T_FLOATINGPOINT:
+               return parse_float_const();
        case T_STRING_LITERAL:
                return parse_string_const();
        case T_IDENTIFIER:
@@ -1859,7 +1901,12 @@ static expression_t *parse_primary_expression(void)
        print_token(stderr, &token);
        fprintf(stderr, "\n");
        eat_statement();
-       return NULL;
+
+       expression_t *expression = allocate_ast_zero(sizeof(expression[0]));
+       expression->type     = EXPR_INVALID;
+       expression->datatype = type_void;
+
+       return expression;
 }
 
 static expression_t *parse_array_expression(unsigned precedence,
@@ -1891,7 +1938,7 @@ static expression_t *parse_array_expression(unsigned precedence,
 
        if(token.type != ']') {
                parse_error_expected("Problem while parsing array access", ']', 0);
-               return NULL;
+               return (expression_t*) array_access;
        }
        next_token();
 
@@ -1963,7 +2010,7 @@ static expression_t *parse_select_expression(unsigned precedence,
 
        if(token.type != T_IDENTIFIER) {
                parse_error_expected("Problem while parsing select", T_IDENTIFIER, 0);
-               return NULL;
+               return (expression_t*) select;
        }
        select->symbol = token.v.symbol;
        next_token();
@@ -2033,6 +2080,7 @@ static void type_error(const char *msg, const source_position_t source_position,
        fprintf(stderr, "%s, but found type ", msg);
        print_type(type);
        fputc('\n', stderr);
+       error();
 }
 
 static void type_error_incompatible(const char *msg,
@@ -2045,6 +2093,7 @@ static void type_error_incompatible(const char *msg,
        fprintf(stderr, " - ");
        print_type(type2);
        fprintf(stderr, ")\n");
+       error();
 }
 
 static type_t *get_type_after_conversion(const type_t *type1,
@@ -2231,7 +2280,6 @@ static expression_t *parse_sub_expression(unsigned precedence)
                left = parse_primary_expression();
        }
        assert(left != NULL);
-       assert(left->type != EXPR_INVALID);
        left->source_position = source_position;
 
        while(true) {
diff --git a/type.c b/type.c
index 1d5192c..5bb7a5f 100644 (file)
--- a/type.c
+++ b/type.c
@@ -258,7 +258,7 @@ bool type_valid(const type_t *type)
        return type->type != TYPE_INVALID;
 }
 
-bool is_type_int(const type_t *type)
+bool is_type_integer(const type_t *type)
 {
        if(type->type != TYPE_ATOMIC)
                return 0;
index 651bcff..e9db530 100644 (file)
@@ -5,6 +5,7 @@
 
 #include "ast_t.h"
 #include "type_t.h"
+#include "type.h"
 #include "adt/error.h"
 
 static const context_t *global_context;
@@ -216,7 +217,11 @@ static void write_expression(const expression_t *expression)
        switch(expression->type) {
        case EXPR_CONST:
                constant = (const const_t*) expression;
-               fprintf(out, "%d", constant->value);
+               if(is_type_integer(expression->datatype)) {
+                       fprintf(out, "%d", constant->v.int_value);
+               } else {
+                       fprintf(out, "%Lf", constant->v.float_value);
+               }
                break;
        case EXPR_UNARY:
                write_unary_expression((const unary_expression_t*) expression);