From 05dbb10d24382cce46842d47c7eab8471a8c8834 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Sat, 14 Jul 2007 00:38:09 +0000 Subject: [PATCH] more work on parser, stdio.h is fully parsed now [r18333] --- ast_t.h | 5 +- lexer.c | 23 ++-- main.c | 9 -- parser.c | 365 +++++++++++++++++++++++++++++++++++++++++----------- tokens.inc | 10 +- type.c | 40 +++--- type.h | 1 + type_hash.c | 13 ++ type_t.h | 21 ++- 9 files changed, 360 insertions(+), 127 deletions(-) diff --git a/ast_t.h b/ast_t.h index fabd945..0b36f28 100644 --- a/ast_t.h +++ b/ast_t.h @@ -134,10 +134,7 @@ struct array_access_expression_t { struct sizeof_expression_t { expression_t expression; - union { - type_t *type; - expression_t *size_expression; - } v; + type_t *type; }; struct conditional_expression_t { diff --git a/lexer.c b/lexer.c index 476aba8..857ff85 100644 --- a/lexer.c +++ b/lexer.c @@ -973,19 +973,18 @@ void lexer_next_preprocessing_token(token_t *token) void lexer_next_token(token_t *token) { - while(1) { - lexer_next_preprocessing_token(token); - if(token->type == '\n') { - do { - lexer_next_preprocessing_token(token); - } while(token->type == '\n'); - - if(token->type == '#') { - parse_preprocessor_directive(); - continue; - } - } + lexer_next_preprocessing_token(token); + if(token->type != '\n') return; + +newline_found: + do { + lexer_next_preprocessing_token(token); + } while(token->type == '\n'); + + if(token->type == '#') { + parse_preprocessor_directive(); + goto newline_found; } } diff --git a/main.c b/main.c index 17f7cf1..63beaa0 100644 --- a/main.c +++ b/main.c @@ -47,16 +47,7 @@ void compile(const char *fname) lexer_open_stream(in, fname); -#if 1 - token_t token; - do { - lexer_next_token(&token); - print_token(stdout, &token); - puts(""); - } while(token.type != T_EOF); -#else parse(); -#endif fclose(in); } diff --git a/parser.c b/parser.c index 454f058..7719371 100644 --- a/parser.c +++ b/parser.c @@ -196,12 +196,12 @@ void parse_error_expected(const char *message, ...) } static -void eat_until_semi(void) +void eat_until(int token_type) { - while(token.type != ';') { - next_token(); + while(token.type != token_type) { if(token.type == T_EOF) return; + next_token(); } next_token(); } @@ -209,7 +209,7 @@ void eat_until_semi(void) #define expect(expected) \ if(UNLIKELY(token.type != (expected))) { \ parse_error_expected(NULL, (expected), 0); \ - eat_until_semi(); \ + eat_until(';'); \ return NULL; \ } \ next_token(); @@ -217,7 +217,7 @@ void eat_until_semi(void) #define expect_void(expected) \ if(UNLIKELY(token.type != (expected))) { \ parse_error_expected(NULL, (expected), 0); \ - eat_until_semi(); \ + eat_until(';'); \ return; \ } \ next_token(); @@ -228,7 +228,15 @@ static expression_t *parse_constant_expression(void) return parse_expression(); } +static expression_t *parse_assignment_expression(void) +{ + /* TODO: not correct yet */ + return parse_expression(); +} + static compound_entry_t *parse_compound_type_entries(void); +static void parse_declarator(declaration_t *declaration, + storage_class_t storage_class, type_t *type); typedef struct declaration_specifiers_t declaration_specifiers_t; struct declaration_specifiers_t { @@ -244,7 +252,7 @@ static type_t *parse_struct_specifier(void) struct_type->type.type = TYPE_COMPOUND_STRUCT; struct_type->source_position = source_position; - if(token.type == T_IDENTIFIER) { + if(token.type == T_IDENTIFIER || token.type == T_TYPENAME) { /* TODO */ next_token(); if(token.type == '{') { @@ -254,7 +262,7 @@ static type_t *parse_struct_specifier(void) parse_compound_type_entries(); } else { parse_error_expected("problem while parsing struct type specifiers", - T_IDENTIFIER, '{'); + T_IDENTIFIER, '{', 0); return NULL; } @@ -270,7 +278,7 @@ static type_t *parse_union_specifier(void) union_type->source_position = source_position; if(token.type == T_IDENTIFIER) { - /* TODO */ + union_type->symbol = token.v.symbol; next_token(); if(token.type == '{') { parse_compound_type_entries(); @@ -285,6 +293,61 @@ static type_t *parse_union_specifier(void) return (type_t*) union_type; } +static void parse_enum_type_entries() +{ + eat('{'); + + if(token.type == '}') { + next_token(); + parse_error("empty enum not allowed"); + return; + } + + do { + if(token.type != T_IDENTIFIER) { + parse_error_expected("problem while parsing enum entry", + T_IDENTIFIER, 0); + eat_until('}'); + return; + } + next_token(); + + if(token.type == '=') { + parse_constant_expression(); + } + + if(token.type != ',') + break; + next_token(); + } while(token.type != '}'); + + expect_void('}'); +} + +static type_t *parse_enum_specifier(void) +{ + eat(T_enum); + + enum_type_t *enum_type = allocate_type_zero(sizeof(enum_type[0])); + enum_type->type.type = TYPE_ENUM; + enum_type->source_position = source_position; + + if(token.type == T_IDENTIFIER) { + enum_type->symbol = token.v.symbol; + next_token(); + if(token.type == '{') { + parse_enum_type_entries(); + } + } else if(token.type == '{') { + parse_enum_type_entries(); + } else { + parse_error_expected("problem while parsing enum type specifiers", + T_IDENTIFIER, '{'); + } + + return (type_t*) enum_type; +} + typedef enum { SPECIFIER_SIGNED = 1 << 0, SPECIFIER_UNSIGNED = 1 << 1, @@ -317,8 +380,7 @@ typedef enum { case T_restrict: \ case T_volatile: \ case T_inline: \ - case T___extension__: \ - case T___attribute__: + case T___extension__: #ifdef PROVIDE_COMPLEX #define COMPLEX_SPECIFIERS \ @@ -349,11 +411,29 @@ typedef enum { case T_struct: \ case T_union: \ case T_enum: \ - case T___quad_t: \ - case T___u_quad_t: \ COMPLEX_SPECIFIERS \ IMAGINARY_SPECIFIERS +#define DECLARATION_START \ + STORAGE_CLASSES \ + TYPE_QUALIFIERS \ + TYPE_SPECIFIERS + +static +type_t *create_builtin_type(symbol_t *symbol) +{ + builtin_type_t *type = allocate_type_zero(sizeof(type[0])); + type->type.type = TYPE_BUILTIN; + type->symbol = symbol; + + type_t *result = typehash_insert((type_t*) type); + if(result != (type_t*) type) { + obstack_free(type_obst, type); + } + + return result; +} + static void parse_declaration_specifiers(declaration_specifiers_t *specifiers) { @@ -398,11 +478,6 @@ void parse_declaration_specifiers(declaration_specifiers_t *specifiers) next_token(); break; - case T___attribute__: - fprintf(stderr, "TODO: __attribute__ not handled yet\n"); - next_token(); - break; - /* type specifiers */ #define MATCH_SPECIFIER(token, specifier, name) \ case token: \ @@ -440,27 +515,6 @@ void parse_declaration_specifiers(declaration_specifiers_t *specifiers) } break; - case T___quad_t: - next_token(); - if(type_specifiers & SPECIFIER_LONG_LONG || - type_specifiers & SPECIFIER_LONG) { - parse_error("multiple type specifiers given"); - } else { - type_specifiers |= SPECIFIER_LONG_LONG; - } - break; - - case T___u_quad_t: - next_token(); - if(type_specifiers & SPECIFIER_LONG_LONG || - type_specifiers & SPECIFIER_LONG || - type_specifiers & SPECIFIER_UNSIGNED) { - parse_error("multiple type specifiers given"); - } else { - type_specifiers |= SPECIFIER_LONG_LONG | SPECIFIER_UNSIGNED; - } - break; - case T_struct: type = parse_struct_specifier(); break; @@ -468,8 +522,11 @@ void parse_declaration_specifiers(declaration_specifiers_t *specifiers) type = parse_union_specifier(); break; case T_enum: - /* TODO */ - assert(0); + type = parse_enum_specifier(); + break; + case T___builtin_va_list: + type = create_builtin_type(token.v.symbol); + next_token(); break; case T_TYPENAME: @@ -536,14 +593,16 @@ finish_specifiers: case SPECIFIER_UNSIGNED | SPECIFIER_LONG | SPECIFIER_INT: atomic_type = ATOMIC_TYPE_ULONG; break; - case SPECIFIER_LONG_LONG: - case SPECIFIER_SIGNED | SPECIFIER_LONG_LONG: - case SPECIFIER_LONG_LONG | SPECIFIER_INT: - case SPECIFIER_SIGNED | SPECIFIER_LONG_LONG | SPECIFIER_INT: + case SPECIFIER_LONG | SPECIFIER_LONG_LONG: + case SPECIFIER_SIGNED | SPECIFIER_LONG | SPECIFIER_LONG_LONG: + case SPECIFIER_LONG | SPECIFIER_LONG_LONG | SPECIFIER_INT: + case SPECIFIER_SIGNED | SPECIFIER_LONG | SPECIFIER_LONG_LONG + | SPECIFIER_INT: atomic_type = ATOMIC_TYPE_LONGLONG; break; - case SPECIFIER_UNSIGNED | SPECIFIER_LONG_LONG: - case SPECIFIER_UNSIGNED | SPECIFIER_LONG_LONG | SPECIFIER_INT: + case SPECIFIER_UNSIGNED | SPECIFIER_LONG | SPECIFIER_LONG_LONG: + case SPECIFIER_UNSIGNED | SPECIFIER_LONG | SPECIFIER_LONG_LONG + | SPECIFIER_INT: atomic_type = ATOMIC_TYPE_ULONGLONG; break; case SPECIFIER_FLOAT: @@ -640,8 +699,7 @@ unsigned parse_type_qualifiers() } static -void parse_declarator(declaration_t *declaration, storage_class_t storage_class, - type_t *type) +type_t *parse_pointer(type_t *type) { while(token.type == '*') { /* pointer */ @@ -661,6 +719,106 @@ void parse_declarator(declaration_t *declaration, storage_class_t storage_class, type = result; } + + return type; +} + +static +void parse_identifier_list() +{ + while(1) { + if(token.type != T_IDENTIFIER) { + parse_error_expected("problem while parsing parameter identifier " + "list", T_IDENTIFIER, 0); + return; + } + next_token(); + if(token.type != ',') + break; + next_token(); + } +} + +static +void parse_parameter() +{ + if(token.type == T_DOTDOTDOT) { + next_token(); + return; + } + + declaration_specifiers_t specifiers; + memset(&specifiers, 0, sizeof(specifiers)); + + parse_declaration_specifiers(&specifiers); + specifiers.type = parse_pointer(specifiers.type); + + if(token.type == '(' || token.type == T_IDENTIFIER + || token.type == T_TYPENAME) { + declaration_t declaration; + memset(&declaration, 0, sizeof(declaration)); + parse_declarator(&declaration, specifiers.storage_class, + specifiers.type); + } +} + +static +void parse_parameters() +{ + if(token.type == T_IDENTIFIER) { + parse_identifier_list(); + return; + } + + while(1) { + switch(token.type) { + case T_DOTDOTDOT: + DECLARATION_START + parse_parameter(); + break; + default: + return; + } + if(token.type != ',') + return; + next_token(); + } +} + +static +void parse_attributes(void) +{ + while(token.type == T___attribute__) { + next_token(); + fprintf(stderr, "TODO: __attribute__ not handled yet\n"); + + expect_void('('); + int depth = 1; + while(depth > 0) { + switch(token.type) { + case T_EOF: + parse_error("EOF while parsing attribute"); + break; + case '(': + next_token(); + depth++; + break; + case ')': + next_token(); + depth--; + break; + default: + next_token(); + } + } + } +} + +static +void parse_declarator(declaration_t *declaration, storage_class_t storage_class, + type_t *type) +{ + type = parse_pointer(type); declaration->storage_class = storage_class; declaration->type = type; @@ -676,40 +834,66 @@ void parse_declarator(declaration_t *declaration, storage_class_t storage_class, expect_void(')'); break; default: - parse_error("problem while parsing declarator"); + parse_error_expected("problem while parsing declarator", T_TYPENAME, + T_IDENTIFIER, '(', 0); } - if(token.type == '(') { - next_token(); + while(1) { + switch(token.type) { + case '(': + next_token(); - /* parse parameter-type-list or identifier-list */ + parse_parameters(); - expect_void(')'); - } else if(token.type == '[') { - next_token(); + expect_void(')'); + break; + case '[': + next_token(); + + if(token.type == T_static) { + next_token(); + } - /* multiple type qualifiers, and static */ + unsigned type_qualifiers = parse_type_qualifiers(); + if(type_qualifiers != 0) { + if(token.type == T_static) { + next_token(); + } + } - /* assignment_expression or '*' or nothing */ + if(token.type == '*' /* TODO: && lookahead == ']' */) { + next_token(); + } else if(token.type != ']') { + parse_assignment_expression(); + } - expect_void(']'); + expect_void(']'); + break; + default: + goto declarator_finished; + } } +declarator_finished: + parse_attributes(); + fprintf(stderr, "Declarator type: "); print_type(stderr, type); fprintf(stderr, "\n"); symbol_t *symbol = declaration->symbol; - environment_entry_t *entry = environment_push(symbol); - entry->declaration = declaration; - entry->old_symbol_ID = symbol->ID; + if(symbol != NULL) { + environment_entry_t *entry = environment_push(symbol); + entry->declaration = declaration; + entry->old_symbol_ID = symbol->ID; - if(storage_class == STORAGE_CLASS_TYPEDEF) { - symbol->ID = T_TYPENAME; - fprintf(stderr, "typedef '%s'\n", symbol->string); - } else { - symbol->ID = T_IDENTIFIER; + if(storage_class == STORAGE_CLASS_TYPEDEF) { + symbol->ID = T_TYPENAME; + fprintf(stderr, "typedef '%s'\n", symbol->string); + } else { + symbol->ID = T_IDENTIFIER; + } } } @@ -723,7 +907,12 @@ void parse_init_declarators(const declaration_specifiers_t *specifiers) specifiers->type); if(token.type == '=') { next_token(); - // parse_initializer TODO + if(token.type == '{') { + // TODO + expect_void('}'); + } else { + parse_assignment_expression(); + } } else if(token.type == '{') { parse_compound_statement(); return; @@ -798,6 +987,19 @@ void parse_declaration(void) parse_init_declarators(&specifiers); } +type_t *parse_typename(void) +{ + declaration_specifiers_t specifiers; + memset(&specifiers, 0, sizeof(specifiers)); + /* TODO not correct storage class elements are not allowed here */ + parse_declaration_specifiers(&specifiers); + + specifiers.type = parse_pointer(specifiers.type); + + return specifiers.type; +} + + typedef expression_t* (*parse_expression_function) (unsigned precedence); @@ -921,14 +1123,33 @@ expression_t *parse_array_expression(unsigned precedence, return (expression_t*) array_access; } +static +type_t *get_expression_type(const expression_t *expression) +{ + (void) expression; + /* TODO */ + return NULL; +} + static expression_t *parse_sizeof(unsigned precedence) { - (void) precedence; eat(T_sizeof); - /* TODO... */ - return NULL; + sizeof_expression_t *sizeof_expression + = allocate_ast_zero(sizeof(sizeof_expression[0])); + sizeof_expression->expression.type = EXPR_SIZEOF; + + if(token.type == '(' /* && LA1 is type_specifier */) { + next_token(); + sizeof_expression->type = parse_typename(); + expect(')'); + } else { + expression_t *expression = parse_sub_expression(precedence); + sizeof_expression->type = get_expression_type(expression); + } + + return (expression_t*) sizeof_expression; } static @@ -1414,9 +1635,7 @@ statement_t *parse_statement(void) statement = NULL; break; - STORAGE_CLASSES - TYPE_QUALIFIERS - TYPE_SPECIFIERS + DECLARATION_START statement = parse_declaration_statement(); break; } diff --git a/tokens.inc b/tokens.inc index 996666b..566caba 100644 --- a/tokens.inc +++ b/tokens.inc @@ -12,7 +12,6 @@ S(auto) S(break) S(case) S(char) -S(const) S(continue) S(default) S(do) @@ -28,7 +27,6 @@ S(inline) S(int) S(long) S(register) -S(restrict) S(return) S(short) S(signed) @@ -47,10 +45,14 @@ S(_Complex) S(_Imaginary) S(__extension__) S(__attribute__) -S(__u_quad_t) -S(__quad_t) +S(__builtin_va_list) #undef S +T(const, "__const",) +T(_const, "const", = T_const) +T(restrict, "__restrict",) +T(_restrict, "restrict", = T_restrict) + T(SELECT, "->",) T(PLUSPLUS, "++",) T(MINUSMINUS, "--",) diff --git a/type.c b/type.c index 4332aa0..9d9c36f 100644 --- a/type.c +++ b/type.c @@ -40,24 +40,25 @@ void print_atomic_type(FILE *out, const atomic_type_t *type) print_type_qualifiers(out, & type->type); switch(type->atype) { - case ATOMIC_TYPE_INVALID: fputs("INVALIDATOMIC", out); break; - case ATOMIC_TYPE_VOID: fputs("void", out); break; - case ATOMIC_TYPE_BOOL: fputs("bool", out); break; - case ATOMIC_TYPE_CHAR: fputs("char", out); break; - case ATOMIC_TYPE_SCHAR: fputs("signed char", out); break; - case ATOMIC_TYPE_UCHAR: fputs("unsigned char", out); break; - case ATOMIC_TYPE_INT: fputs("int", out); break; - case ATOMIC_TYPE_UINT: fputs("unsigned int", out); break; - case ATOMIC_TYPE_SHORT: fputs("short", out); break; - case ATOMIC_TYPE_USHORT: fputs("unsigned short", out); break; - case ATOMIC_TYPE_LONG: fputs("long", out); break; - case ATOMIC_TYPE_ULONG: fputs("unsigned long", out); break; - case ATOMIC_TYPE_LONGLONG: fputs("long long", out); break; - case ATOMIC_TYPE_ULONGLONG: fputs("unsigned long long", out); break; - case ATOMIC_TYPE_FLOAT: fputs("float", out); break; - case ATOMIC_TYPE_DOUBLE: fputs("double", out); break; - default: fputs("UNKNOWNATOMIC", out); break; + case ATOMIC_TYPE_INVALID: fputs("INVALIDATOMIC", out); return; + case ATOMIC_TYPE_VOID: fputs("void", out); return; + case ATOMIC_TYPE_BOOL: fputs("bool", out); return; + case ATOMIC_TYPE_CHAR: fputs("char", out); return; + case ATOMIC_TYPE_SCHAR: fputs("signed char", out); return; + case ATOMIC_TYPE_UCHAR: fputs("unsigned char", out); return; + case ATOMIC_TYPE_INT: fputs("int", out); return; + case ATOMIC_TYPE_UINT: fputs("unsigned int", out); return; + case ATOMIC_TYPE_SHORT: fputs("short", out); return; + case ATOMIC_TYPE_USHORT: fputs("unsigned short", out); return; + case ATOMIC_TYPE_LONG: fputs("long", out); return; + case ATOMIC_TYPE_ULONG: fputs("unsigned long", out); return; + case ATOMIC_TYPE_LONGLONG: fputs("long long", out); return; + case ATOMIC_TYPE_ULONGLONG: fputs("unsigned long long", out); return; + case ATOMIC_TYPE_LONG_DOUBLE: fputs("long double", out); return; + case ATOMIC_TYPE_FLOAT: fputs("float", out); return; + case ATOMIC_TYPE_DOUBLE: fputs("double", out); return; } + fputs("UNKNOWNATOMIC", out); } static @@ -108,7 +109,7 @@ void print_type(FILE *out, const type_t *type) return; case TYPE_ENUM: print_type_qualifiers(out, type); - fputs("TODO", out); + fputs("enum (TODO)", out); return; case TYPE_ATOMIC: print_atomic_type(out, (const atomic_type_t*) type); @@ -120,6 +121,9 @@ void print_type(FILE *out, const type_t *type) fprintf(out, "%s", ((const compound_type_t*) type)->symbol->string); } return; + case TYPE_BUILTIN: + fputs(((builtin_type_t*) type)->symbol->string, out); + return; case TYPE_METHOD: print_method_type(out, (const method_type_t*) type); return; diff --git a/type.h b/type.h index e1f5444..11a98ff 100644 --- a/type.h +++ b/type.h @@ -11,6 +11,7 @@ typedef struct method_type_t method_type_t; typedef struct compound_entry_t compound_entry_t; typedef struct compound_type_t compound_type_t; typedef struct enum_type_t enum_type_t; +typedef struct builtin_type_t builtin_type_t; void init_types(void); void exit_types(void); diff --git a/type_hash.c b/type_hash.c index 312b8cf..29535ff 100644 --- a/type_hash.c +++ b/type_hash.c @@ -97,6 +97,9 @@ unsigned hash_type(const type_t *type) case TYPE_POINTER: hash = hash_pointer_type((const pointer_type_t*) type); break; + case TYPE_BUILTIN: + hash = hash_ptr(((const builtin_type_t*) type)->symbol); + break; } unsigned some_prime = 99991; @@ -172,6 +175,13 @@ int enum_types_equal(const enum_type_t *type1, const enum_type_t *type2) return type1->symbol == type2->symbol; } +static +int builtin_types_equal(const builtin_type_t *type1, + const builtin_type_t *type2) +{ + return type1->symbol == type2->symbol; +} + static int types_equal(const type_t *type1, const type_t *type2) { @@ -201,6 +211,9 @@ int types_equal(const type_t *type1, const type_t *type2) case TYPE_POINTER: return pointer_types_equal((const pointer_type_t*) type1, (const pointer_type_t*) type2); + case TYPE_BUILTIN: + return builtin_types_equal((const builtin_type_t*) type1, + (const builtin_type_t*) type2); } abort(); diff --git a/type_t.h b/type_t.h index 9d2b61e..ef3f260 100644 --- a/type_t.h +++ b/type_t.h @@ -15,7 +15,8 @@ typedef enum { TYPE_COMPOUND_UNION, TYPE_ENUM, TYPE_METHOD, - TYPE_POINTER + TYPE_POINTER, + TYPE_BUILTIN } type_type_t; typedef enum { @@ -65,6 +66,18 @@ struct atomic_type_t { atomic_type_type_t atype; }; +struct builtin_type_t { + type_t type; + symbol_t *symbol; +}; + +struct enum_type_t { + type_t type; + symbol_t *symbol; + /* TODO: list of enum members */ + source_position_t source_position; +}; + struct pointer_type_t { type_t type; type_t *points_to; @@ -96,10 +109,4 @@ struct compound_type_t { source_position_t source_position; }; -struct enum_type_t { - /* todo */ - symbol_t *symbol; - source_position_t source_position; -}; - #endif -- 2.20.1