no enviornment passing around anymore in lexer, more work on lexer, import expression...
authorMatthias Braun <matze@braunis.de>
Sat, 16 Jun 2007 15:36:01 +0000 (15:36 +0000)
committerMatthias Braun <matze@braunis.de>
Sat, 16 Jun 2007 15:36:01 +0000 (15:36 +0000)
[r18327]

17 files changed:
ast.h
ast_t.h
lexer.c
lexer.h
lexer_t.h
lextest/do_tests.sh
lextest/preprocessor/recursivedefine [new file with mode: 0644]
main.c
parser.c
parser.h [new file with mode: 0644]
symbol.h
symbol_table.c
symbol_table.h
token.c
token_t.h
tokens.inc
tokens_preprocessor.inc [new file with mode: 0644]

diff --git a/ast.h b/ast.h
index 30560cb..d662a59 100644 (file)
--- a/ast.h
+++ b/ast.h
@@ -6,6 +6,7 @@
 typedef struct expression_t               expression_t;
 typedef struct const_t                    const_t;
 typedef struct string_literal_t           string_literal_t;
+typedef struct reference_expression_t     reference_expression_t;
 typedef struct cast_expression_t          cast_expression_t;
 typedef struct call_argument_t            call_argument_t;
 typedef struct type_argument_t            type_argument_t;
diff --git a/ast_t.h b/ast_t.h
index 415bbc6..cbd689a 100644 (file)
--- a/ast_t.h
+++ b/ast_t.h
@@ -63,6 +63,7 @@ struct call_expression_t {
 typedef enum {
        UNEXPR_INVALID = 0,
        UNEXPR_NEGATE,
+       UNEXPR_PLUS,
        UNEXPR_BITWISE_NEGATE,
        UNEXPR_NOT,
        UNEXPR_DEREFERENCE,
@@ -95,7 +96,7 @@ typedef enum {
        BINEXPR_GREATEREQUAL,
        BINEXPR_BITWISE_AND,
        BINEXPR_BITWISE_OR,
-       BINEXPR_BITWSIE_XOR,
+       BINEXPR_BITWISE_XOR,
        BINEXPR_LOGICAL_AND,
        BINEXPR_LOGICAL_OR,
        BINEXPR_SHIFTLEFT,
diff --git a/lexer.c b/lexer.c
index 04060b9..be7360c 100644 (file)
--- a/lexer.c
+++ b/lexer.c
 //#define DEBUG_CHARS
 #define MAX_PUTBACK 3
 
+static int               c;
+source_position_t source_position;
+static FILE             *input;
+static char              buf[1027];
+static const char       *bufend;
+static const char       *bufpos;
+static strset_t          stringset;
+//static FILE            **input_stack;
+//static char            **buf_stack;
+
 static
-void error_prefix_at(lexer_t *this, const char *input_name, unsigned linenr)
+void error_prefix_at(const char *input_name, unsigned linenr)
 {
-       (void) this;
        fprintf(stderr, "%s:%d: Error: ", input_name, linenr);
 }
 
 static
-void error_prefix(lexer_t *this)
+void error_prefix()
 {
-       error_prefix_at(this, this->source_position.input_name,
-                       this->source_position.linenr);
+       error_prefix_at(source_position.input_name, source_position.linenr);
 }
 
 static
-void parse_error(lexer_t *this, const char *msg)
+void parse_error(const char *msg)
 {
-       error_prefix(this);
+       error_prefix();
        fprintf(stderr, "%s\n", msg);
 }
 
 static inline
-void next_char(lexer_t *this)
+void next_char()
 {
-       this->bufpos++;
-       if(this->bufpos >= this->bufend) {
-               size_t s = fread(this->buf + MAX_PUTBACK, 1,
-                                sizeof(this->buf) - MAX_PUTBACK, this->input);
+       bufpos++;
+       if(bufpos >= bufend) {
+               size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
+                                input);
                if(s == 0) {
-                       this->c = EOF;
+                       c = EOF;
                        return;
                }
-               this->bufpos = this->buf + MAX_PUTBACK;
-               this->bufend = this->buf + MAX_PUTBACK + s;
+               bufpos = buf + MAX_PUTBACK;
+               bufend = buf + MAX_PUTBACK + s;
        }
-       this->c = *(this->bufpos);
+       c = *(bufpos);
 #ifdef DEBUG_CHARS
-       printf("nchar '%c'\n", this->c);
+       printf("nchar '%c'\n", c);
 #endif
 }
 
 static inline
-void put_back(lexer_t *this, int c)
+void put_back(int pc)
 {
-       char *p = (char*) this->bufpos - 1;
-       this->bufpos--;
-       assert(p >= this->buf);
-       *p = c;
+       char *p = (char*) bufpos - 1;
+       bufpos--;
+       assert(p >= buf);
+       *p = pc;
 
 #ifdef DEBUG_CHARS
-       printf("putback '%c'\n", c);
+       printf("putback '%c'\n", pc);
 #endif
 }
 
 
 static
-int replace_trigraph(lexer_t *this)
+int replace_trigraph(void)
 {
 #define MATCH_TRIGRAPH(ch,replacement)           \
        case ch:                                     \
-               this->c = replacement;                   \
+               c = replacement;                         \
                return 1;
 
-       switch(this->c) {
+       switch(c) {
        MATCH_TRIGRAPH('=', '#')
        MATCH_TRIGRAPH('(', '[')
        MATCH_TRIGRAPH('/', '\\')
@@ -95,72 +103,72 @@ int replace_trigraph(lexer_t *this)
 
 #define SKIP_TRIGRAPHS(custom_putback, no_trigraph_code) \
        case '?':                                  \
-               next_char(this);                       \
-               if(this->c != '?') {                   \
+               next_char();                           \
+               if(c != '?') {                         \
                        custom_putback;                    \
-                       put_back(this, this->c);           \
-                       this->c = '?';                     \
+                       put_back(c);                       \
+                       c = '?';                           \
                        no_trigraph_code;                  \
                }                                      \
-               next_char(this);                       \
-               if(replace_trigraph(this)) {           \
+               next_char();                           \
+               if(replace_trigraph()) {               \
                        break;                             \
                }                                      \
                custom_putback;                        \
-               put_back(this, '?');                   \
-               put_back(this, this->c);               \
-               this->c = '?';                         \
+               put_back('?');                         \
+               put_back(c);                           \
+               c = '?';                               \
                no_trigraph_code;
 
 #define EAT_NEWLINE(newline_code)              \
-       if(this->c == '\r') {                      \
-               next_char(this);                       \
-               if(this->c == '\n')                    \
-                       next_char(this);                   \
-               this->source_position.linenr++;        \
+       if(c == '\r') {                            \
+               next_char();                           \
+               if(c == '\n')                          \
+                       next_char();                       \
+               source_position.linenr++;              \
                newline_code;                          \
-       } else if(this->c == '\n') {               \
-               next_char(this);                       \
-               this->source_position.linenr++;        \
+       } else if(c == '\n') {                     \
+               next_char();                           \
+               source_position.linenr++;              \
                newline_code;                          \
        }
 
 static
-void parse_symbol(lexer_t *this, token_t *token)
+void parse_symbol(token_t *token)
 {
        symbol_t *symbol;
        char     *string;
 
-       obstack_1grow(&symbol_obstack, this->c);
-       next_char(this);
+       obstack_1grow(&symbol_obstack, c);
+       next_char();
 
        while(1) {
-               switch(this->c) {
+               switch(c) {
                case '\\':
-                       next_char(this);
+                       next_char();
                        EAT_NEWLINE(break;)
                        goto end_symbol;
 
                case 'A' ... 'Z':
                case 'a' ... 'z':
                case '_':
-                       obstack_1grow(&symbol_obstack, this->c);
-                       next_char(this);
+                       obstack_1grow(&symbol_obstack, c);
+                       next_char();
                        break;
 
                case '?':
-                       next_char(this);
-                       if(this->c != '?') {
-                               put_back(this, this->c);
-                               this->c = '?';
+                       next_char();
+                       if(c != '?') {
+                               put_back(c);
+                               c = '?';
                                goto end_symbol;
                        }
-                       next_char(this);
-                       if(replace_trigraph(this))
+                       next_char();
+                       if(replace_trigraph())
                                break;
-                       put_back(this, '?');
-                       put_back(this, this->c);
-                       this->c = '?';
+                       put_back('?');
+                       put_back(c);
+                       c = '?';
                        goto end_symbol;
 
                default:
@@ -173,11 +181,14 @@ end_symbol:
        string = obstack_finish(&symbol_obstack);
        symbol = symbol_table_insert(string);
 
+#if 0
        if(symbol->ID > 0) {
                token->type = symbol->ID;
        } else {
                token->type = T_IDENTIFIER;
        }
+#endif
+       token->type     = T_IDENTIFIER;
        token->v.symbol = symbol;
 
        if(symbol->string != string) {
@@ -185,79 +196,58 @@ end_symbol:
        }
 }
 
-#if 0
-static
-preprocessor_token_type_t parse_pp_symbol(lexer_t *this)
-{
-       do {
-               obstack_1grow(&symbol_obstack, this->c);
-               next_char(this);
-       } while(is_ident_char(this->c));
-       obstack_1grow(&symbol_obstack, '\0');
-
-       char     *string = obstack_finish(&symbol_obstack);
-       symbol_t *symbol = preprocessor_symbol_table_find(string);
-       obstack_free(&symbol_obstack, string);
-
-       if(symbol == 0)
-               return TP_ERROR;
-
-       return symbol->ID;
-}
-#endif
-
 static
-void parse_number_hex(lexer_t *this, token_t *token)
+void parse_number_hex(token_t *token)
 {
-       assert(this->c == 'x' || this->c == 'X');
-       next_char(this);
+       assert(c == 'x' || c == 'X');
+       next_char();
 
-       if (!isdigit(this->c) &&
-               !('A' <= this->c && this->c <= 'F') &&
-               !('a' <= this->c && this->c <= 'f')) {
-               parse_error(this, "premature end of hex number literal");
+       if (!isdigit(c) &&
+               !('A' <= c && c <= 'F') &&
+               !('a' <= c && c <= 'f')) {
+               parse_error("premature end of hex number literal");
                token->type = T_ERROR;
                return;
        }
 
        int value = 0;
        for(;;) {
-               if (isdigit(this->c)) {
-                       value = 16 * value + this->c - '0';
-               } else if ('A' <= this->c && this->c <= 'F') {
-                       value = 16 * value + this->c - 'A' + 10;
-               } else if ('a' <= this->c && this->c <= 'f') {
-                       value = 16 * value + this->c - 'a' + 10;
+               if (isdigit(c)) {
+                       value = 16 * value + c - '0';
+               } else if ('A' <= c && c <= 'F') {
+                       value = 16 * value + c - 'A' + 10;
+               } else if ('a' <= c && c <= 'f') {
+                       value = 16 * value + c - 'a' + 10;
                } else {
                        token->type     = T_INTEGER;
                        token->v.intvalue = value;
                        return;
                }
-               next_char(this);
+               next_char();
        }
 }
 
 static
-void parse_number_oct(lexer_t *this, token_t *token)
+void parse_number_oct(token_t *token)
 {
-       assert(this->c == 'o' || this->c == 'O');
-       next_char(this);
+       assert(c == 'o' || c == 'O');
+       next_char();
 
        int value = 0;
        for(;;) {
-               if ('0' <= this->c && this->c <= '7') {
-                       value = 8 * value + this->c - '0';
+               if ('0' <= c && c <= '7') {
+                       value = 8 * value + c - '0';
                } else {
                        token->type     = T_INTEGER;
                        token->v.intvalue = value;
                        return;
                }
-               next_char(this);
+               next_char();
        }
 }
 
 static
-void parse_number_dec(lexer_t *this, token_t *token, int first_char)
+void parse_number_dec(token_t *token, int first_char)
 {
        int value = 0;
        if(first_char > 0) {
@@ -266,45 +256,45 @@ void parse_number_dec(lexer_t *this, token_t *token, int first_char)
        }
 
        for(;;) {
-               if (isdigit(this->c)) {
-                       value = 10 * value + this->c - '0';
+               if (isdigit(c)) {
+                       value = 10 * value + c - '0';
                } else {
                        token->type     = T_INTEGER;
                        token->v.intvalue = value;
                        return;
                }
-               next_char(this);
+               next_char();
        }
 }
 
 static
-void parse_number(lexer_t *this, token_t *token)
+void parse_number(token_t *token)
 {
        // TODO check for overflow
        // TODO check for various invalid inputs sequences
 
-       if (this->c == '0') {
-               next_char(this);
-               switch (this->c) {
+       if (c == '0') {
+               next_char();
+               switch (c) {
                        case 'X':
-                       case 'x': parse_number_hex(this, token); break;
+                       case 'x': parse_number_hex(token); break;
                        case 'o':
-                       case 'O': parse_number_oct(this, token); break;
-                       default:  parse_number_dec(this, token, '0');
+                       case 'O': parse_number_oct(token); break;
+                       default:  parse_number_dec(token, '0');
                }
        } else {
-               parse_number_dec(this, token, 0);
+               parse_number_dec(token, 0);
        }
 }
 
 static
-int parse_escape_sequence(lexer_t *this)
+int parse_escape_sequence()
 {
        while(1) {
-               int c = this->c;
-               next_char(this);
+               int ec = c;
+               next_char();
 
-               switch(c) {
+               switch(ec) {
                case '"': return '"';
                case '\'': return'\'';
                case '\\':
@@ -318,73 +308,72 @@ int parse_escape_sequence(lexer_t *this)
                case 't': return '\t';
                case 'v': return '\v';
                case 'x': /* TODO parse hex number ... */
-                       parse_error(this, "hex escape sequences not implemented yet");
+                       parse_error("hex escape sequences not implemented yet");
                        return EOF;
                case 0 ... 8: /* TODO parse octal number ... */
-                       parse_error(this, "octal escape sequences not implemented yet");
+                       parse_error("octal escape sequences not implemented yet");
                        return EOF;
                case '?':
-                       if(this->c != '?') {
+                       if(c != '?') {
                                return '?';
                        }
                        /* might be a trigraph */
-                       next_char(this);
-                       if(replace_trigraph(this)) {
+                       next_char();
+                       if(replace_trigraph()) {
                                break;
                        }
-                       put_back(this, this->c);
-                       this->c = '?';
+                       put_back(c);
+                       c = '?';
                        return '?';
 
                case EOF:
-                       parse_error(this, "reached end of file while parsing escape sequence");
+                       parse_error("reached end of file while parsing escape sequence");
                        return EOF;
                default:
-                       parse_error(this, "unknown escape sequence");
+                       parse_error("unknown escape sequence");
                        return EOF;
                }
        }
 }
 
 static
-void parse_string_literal(lexer_t *this, token_t *token)
+void parse_string_literal(token_t *token)
 {
-       unsigned    start_linenr = this->source_position.linenr;
+       unsigned    start_linenr = source_position.linenr;
        char       *string;
        const char *result;
 
-       assert(this->c == '"');
-       next_char(this);
+       assert(c == '"');
+       next_char();
 
        while(1) {
-               switch(this->c) {
+               switch(c) {
                SKIP_TRIGRAPHS(,
                        obstack_1grow(&symbol_obstack, '?');
-                       next_char(this);
+                       next_char();
                        break;
                )
 
                case '\\':
-                       next_char(this);
+                       next_char();
                        EAT_NEWLINE(break;)
-                       int c = parse_escape_sequence(this);
-                       obstack_1grow(&symbol_obstack, c);
+                       int ec = parse_escape_sequence();
+                       obstack_1grow(&symbol_obstack, ec);
                        break;
 
                case EOF:
-                       error_prefix_at(this, this->source_position.input_name,
-                                       start_linenr);
+                       error_prefix_at(source_position.input_name, start_linenr);
                        fprintf(stderr, "string has no end\n");
                        token->type = T_ERROR;
                        return;
 
                case '"':
-                       next_char(this);
+                       next_char();
                        goto end_of_string;
 
                default:
-                       obstack_1grow(&symbol_obstack, this->c);
-                       next_char(this);
+                       obstack_1grow(&symbol_obstack, c);
+                       next_char();
                        break;
                }
        }
@@ -398,7 +387,7 @@ end_of_string:
        string = obstack_finish(&symbol_obstack);
 
        /* check if there is already a copy of the string */
-       result = strset_insert(&this->stringset, string);
+       result = strset_insert(&stringset, string);
        if(result != string) {
                obstack_free(&symbol_obstack, string);
        }
@@ -409,59 +398,59 @@ end_of_string:
 
 #define MATCH_NEWLINE(code)                 \
        case '\r':                              \
-               next_char(this);                    \
-               if(this->c == '\n') {               \
-                       next_char(this);                \
+               next_char();                        \
+               if(c == '\n') {                     \
+                       next_char();                    \
                }                                   \
-               this->source_position.linenr++;     \
+               source_position.linenr++;           \
                code;                               \
        case '\n':                              \
-               next_char(this);                    \
-               this->source_position.linenr++;     \
+               next_char();                        \
+               source_position.linenr++;           \
                code;
 
 static
-void parse_character_constant(lexer_t *this, token_t *token)
+void parse_character_constant(token_t *token)
 {
-       assert(this->c == '\'');
-       next_char(this);
+       assert(c == '\'');
+       next_char();
 
        int found_char = 0;
        while(1) {
-               switch(this->c) {
+               switch(c) {
                SKIP_TRIGRAPHS(,
                        found_char = '?';
                        break;
                )
 
                case '\\':
-                       next_char(this);
+                       next_char();
                        EAT_NEWLINE(break;)
                        found_char = '\\';
                        break;
 
                MATCH_NEWLINE(
-                       parse_error(this, "newline while parsing character constant");
+                       parse_error("newline while parsing character constant");
                        break;
                )
 
                case '\'':
-                       next_char(this);
+                       next_char();
                        goto end_of_char_constant;
 
                case EOF:
-                       parse_error(this, "EOF while parsing character constant");
+                       parse_error("EOF while parsing character constant");
                        token->type = T_ERROR;
                        return;
 
                default:
                        if(found_char != 0) {
-                               parse_error(this, "more than 1 characters in character "
+                               parse_error("more than 1 characters in character "
                                            "constant");
                                goto end_of_char_constant;
                        } else {
-                               found_char = this->c;
-                               next_char(this);
+                               found_char = c;
+                               next_char();
                        }
                        break;
                }
@@ -473,20 +462,20 @@ end_of_char_constant:
 }
 
 static
-void skip_multiline_comment(lexer_t *this)
+void skip_multiline_comment(void)
 {
-       unsigned start_linenr = this->source_position.linenr;
+       unsigned start_linenr = source_position.linenr;
        int had_star = 0;
 
        while(1) {
-               switch(this->c) {
+               switch(c) {
                case '*':
-                       next_char(this);
+                       next_char();
                        had_star = 1;
                        break;
 
                case '/':
-                       next_char(this);
+                       next_char();
                        if(had_star) {
                                return;
                        }
@@ -494,22 +483,22 @@ void skip_multiline_comment(lexer_t *this)
                        break;
 
                case '\\':
-                       next_char(this);
+                       next_char();
                        EAT_NEWLINE(break;)
                        had_star = 0;
                        break;
 
                case '?':
-                       next_char(this);
-                       if(this->c != '?') {
+                       next_char();
+                       if(c != '?') {
                                had_star = 0;
                                break;
                        }
-                       next_char(this);
-                       if(replace_trigraph(this))
+                       next_char();
+                       if(replace_trigraph())
                                break;
-                       put_back(this, this->c);
-                       this->c = '?';
+                       put_back(c);
+                       c = '?';
                        had_star = 0;
                        /* we don't put back the 2nd ? as the comment text is discarded
                         * anyway */
@@ -518,40 +507,39 @@ void skip_multiline_comment(lexer_t *this)
                MATCH_NEWLINE(had_star = 0; break;)
 
                case EOF:
-                       error_prefix_at(this, this->source_position.input_name,
-                                       start_linenr);
+                       error_prefix_at(source_position.input_name, start_linenr);
                        fprintf(stderr, "at end of file while looking for comment end\n");
                        return;
                default:
                        had_star = 0;
-                       next_char(this);
+                       next_char();
                        break;
                }
        }
 }
 
 static
-void skip_line_comment(lexer_t *this)
+void skip_line_comment(void)
 {
        while(1) {
-               switch(this->c) {
+               switch(c) {
                case '?':
-                       next_char(this);
-                       if(this->c != '?')
+                       next_char();
+                       if(c != '?')
                                break;
-                       next_char(this);
-                       if(replace_trigraph(this))
+                       next_char();
+                       if(replace_trigraph())
                                break;
-                       put_back(this, '?');
+                       put_back('?');
                        /* we don't put back the 2nd ? as the comment text is discarded
                         * anyway */
                        break;
 
                case '\\':
-                       next_char(this);
-                       if(this->c == '\n') {
-                               next_char(this);
-                               this->source_position.linenr++;
+                       next_char();
+                       if(c == '\n') {
+                               next_char();
+                               source_position.linenr++;
                        }
                        break;
 
@@ -561,48 +549,112 @@ void skip_line_comment(lexer_t *this)
                        return;
 
                default:
-                       next_char(this);
+                       next_char();
                        break;
                }
        }
 }
 
 static
-void parse_preprocessor_directive(lexer_t *this, token_t *result_token)
+void lexer_next_preprocessing_token(token_t *token);
+
+static
+void eat_until_newline(void)
+{
+       /* TODO */
+}
+
+static
+void error_directive(void)
 {
-       printf("PP: ");
-       while(this->c != '\n') {
-               printf("%c", this->c);
-               next_char(this);
+       error_prefix();
+       fprintf(stderr, "#error directive: \n");
+
+       /* parse pp-tokens until new-line */
+}
+
+static
+void define_directive(void)
+{
+       token_t temptoken;
+
+       lexer_next_preprocessing_token(&temptoken);
+       if(temptoken.type != T_IDENTIFIER) {
+               parse_error("expected identifier after #define\n");
+               eat_until_newline();
        }
-       printf("\n");
+}
 
-       lexer_next_token(this, result_token);
+static
+void ifdef_directive(int is_ifndef)
+{
+       (void) is_ifndef;
+       token_t temptoken;
+       lexer_next_preprocessing_token(&temptoken);
+       //expect_identifier();
+       //extect_newline();
 }
 
-void preprocessor_next_token(lexer_t *this, token_t *token)
+static
+void endif_directive(void)
 {
-       /* skip whitespaces */
-       while(this->c == ' ' || this->c == '\t' || this->c == '\r') {
-               next_char(this);
+       //expect_newline();
+}
+
+static
+void found_preprocessor_identifier(symbol_t *symbol)
+{
+       switch(symbol->pp_ID) {
+       case TP_include:
+               printf("include - enable header name parsing!\n");
+               break;
+       case TP_define:
+               define_directive();
+               break;
+       case TP_ifdef:
+               ifdef_directive(0);
+               break;
+       case TP_ifndef:
+               ifdef_directive(1);
+               break;
+       case TP_endif:
+               endif_directive();
+               break;
+       case TP_if:
+       case TP_else:
+       case TP_elif:
+       case TP_undef:
+       case TP_line:
+       case TP_error:
+               error_directive();
+               break;
+       case TP_pragma:
+               break;
        }
+}
+
+static
+void parse_preprocessor_directive(token_t *result_token)
+{
+       token_t temptoken;
 
-       switch(this->c) {
-       case 'A' ... 'Z':
-       case 'a' ... 'z':
-       case '_':
-               parse_symbol(this, token);
+       (void) result_token;
+       lexer_next_preprocessing_token(&temptoken);
+       switch(temptoken.type) {
+       case T_IDENTIFIER:
+               found_preprocessor_identifier(temptoken.v.symbol);
+               break;
        }
 }
 
 #define MAYBE_PROLOG                                       \
-                       next_char(this);                               \
+                       next_char();                                   \
                        while(1) {                                     \
-                               switch(this->c) {
+                               switch(c) {
 
 #define MAYBE(ch, set_type)                                \
                                case ch:                                   \
-                                       next_char(this);                       \
+                                       next_char();                           \
                                        token->type = set_type;                \
                                        return;
 
@@ -612,7 +664,7 @@ void preprocessor_next_token(lexer_t *this, token_t *token)
                                )                                          \
                                                                                                                   \
                                case '\\':                                 \
-                                       next_char(this);                       \
+                                       next_char();                           \
                                        EAT_NEWLINE(break;)                    \
                                        /* fallthrough */                      \
                                default:                                   \
@@ -628,29 +680,29 @@ void preprocessor_next_token(lexer_t *this, token_t *token)
                )
 
 static
-void eat_whitespace(lexer_t *this)
+void eat_whitespace()
 {
        while(1) {
-               switch(this->c) {
+               switch(c) {
                case ' ':
                case '\t':
-                       next_char(this);
+                       next_char();
                        break;
 
-               MATCH_NEWLINE(
-                       break;
-               )
+               case '\r':
+               case '\n':
+                       return;
 
                case '\\':
-                       next_char(this);
-                       if(this->c == '\n') {
-                               next_char(this);
-                               this->source_position.linenr++;
+                       next_char();
+                       if(c == '\n') {
+                               next_char();
+                               source_position.linenr++;
                                break;
                        }
 
-                       put_back(this, this->c);
-                       this->c = '\\';
+                       put_back(c);
+                       c = '\\';
                        return;
 
                SKIP_TRIGRAPHS(,
@@ -658,29 +710,29 @@ void eat_whitespace(lexer_t *this)
                )
 
                case '/':
-                       next_char(this);
+                       next_char();
                        while(1) {
-                               switch(this->c) {
+                               switch(c) {
                                case '*':
-                                       next_char(this);
-                                       skip_multiline_comment(this);
-                                       eat_whitespace(this);
+                                       next_char();
+                                       skip_multiline_comment();
+                                       eat_whitespace();
                                        return;
                                case '/':
-                                       next_char(this);
-                                       skip_line_comment(this);
-                                       eat_whitespace(this);
+                                       next_char();
+                                       skip_line_comment();
+                                       eat_whitespace();
                                        return;
 
                                SKIP_TRIGRAPHS(
-                                               put_back(this, '?');
+                                               put_back('?');
                                        ,
-                                               this->c = '/';
+                                               c = '/';
                                                return;
                                )
 
                                case '\\':
-                                       next_char(this);
+                                       next_char();
                                        EAT_NEWLINE(break;)
                                        /* fallthrough */
                                default:
@@ -695,51 +747,53 @@ void eat_whitespace(lexer_t *this)
        }
 }
 
-void lexer_next_token(lexer_t *this, token_t *token)
+static
+void lexer_next_preprocessing_token(token_t *token)
 {
        while(1) {
-               switch(this->c) {
+               switch(c) {
                case ' ':
                case '\t':
-                       next_char(this);
+                       next_char();
                        break;
 
                MATCH_NEWLINE(
-                       eat_whitespace(this);
-                       if(this->c == '#') {
-                               next_char(this);
-                               parse_preprocessor_directive(this, token);
+                       eat_whitespace();
+                       if(c == '#') {
+                               next_char();
+                               parse_preprocessor_directive(token);
                                return;
                        }
-                       break;
+                       token->type = '\n';
+                       return;
                )
 
                case 'A' ... 'Z':
                case 'a' ... 'z':
                case '_':
-                       parse_symbol(this, token);
+                       parse_symbol(token);
                        return;
 
                case '0' ... '9':
-                       parse_number(this, token);
+                       parse_number(token);
                        return;
 
                case '"':
-                       parse_string_literal(this, token);
+                       parse_string_literal(token);
                        return;
 
                case '\'':
-                       parse_character_constant(this, token);
+                       parse_character_constant(token);
                        return;
 
                case '\\':
-                       next_char(this);
-                       if(this->c == '\n') {
-                               next_char(this);
-                               this->source_position.linenr++;
+                       next_char();
+                       if(c == '\n') {
+                               next_char();
+                               source_position.linenr++;
                                break;
                        } else {
-                               parse_error(this, "unexpected '\\' found");
+                               parse_error("unexpected '\\' found");
                                token->type = T_ERROR;
                        }
                        return;
@@ -750,8 +804,8 @@ void lexer_next_token(lexer_t *this, token_t *token)
                                        MAYBE_PROLOG
                                        MAYBE('.', T_DOTDOTDOT)
                                        ELSE_CODE(
-                                               put_back(this, this->c);
-                                               this->c = '.';
+                                               put_back(c);
+                                               c = '.';
                                                token->type = '.';
                                                return;
                                        )
@@ -783,14 +837,14 @@ void lexer_next_token(lexer_t *this, token_t *token)
                        MAYBE_PROLOG
                        MAYBE('=', T_SLASHEQUAL)
                                case '*':
-                                       next_char(this);
-                                       skip_multiline_comment(this);
-                                       lexer_next_token(this, token);
+                                       next_char();
+                                       skip_multiline_comment();
+                                       lexer_next_preprocessing_token(token);
                                        return;
                                case '/':
-                                       next_char(this);
-                                       skip_line_comment(this);
-                                       lexer_next_token(this, token);
+                                       next_char();
+                                       skip_line_comment();
+                                       lexer_next_preprocessing_token(token);
                                        return;
                        ELSE('/')
                case '%':
@@ -803,8 +857,8 @@ void lexer_next_token(lexer_t *this, token_t *token)
                                                        MAYBE_PROLOG
                                                        MAYBE(':', T_PERCENTCOLONPERCENTCOLON)
                                                        ELSE_CODE(
-                                                               put_back(this, this->c);
-                                                               this->c = '%';
+                                                               put_back(c);
+                                                               c = '%';
                                                                token->type = T_PERCENTCOLON;
                                                                return;
                                                        )
@@ -849,19 +903,19 @@ void lexer_next_token(lexer_t *this, token_t *token)
                        ELSE('#')
 
                case '?':
-                       next_char(this);
+                       next_char();
                        /* just a simple ? */
-                       if(this->c != '?') {
+                       if(c != '?') {
                                token->type = '?';
                                return;
                        }
                        /* might be a trigraph */
-                       next_char(this);
-                       if(replace_trigraph(this)) {
+                       next_char();
+                       if(replace_trigraph()) {
                                break;
                        }
-                       put_back(this, this->c);
-                       this->c = '?';
+                       put_back(c);
+                       c = '?';
                        token->type = '?';
                        return;
 
@@ -874,8 +928,8 @@ void lexer_next_token(lexer_t *this, token_t *token)
                case '~':
                case ';':
                case ',':
-                       token->type = this->c;
-                       next_char(this);
+                       token->type = c;
+                       next_char();
                        return;
 
                case EOF:
@@ -883,33 +937,41 @@ void lexer_next_token(lexer_t *this, token_t *token)
                        return;
 
                default:
-                       next_char(this);
-                       error_prefix(this);
-                       fprintf(stderr, "unknown character '%c' found\n", this->c);
+                       next_char();
+                       error_prefix();
+                       fprintf(stderr, "unknown character '%c' found\n", c);
                        token->type = T_ERROR;
                        return;
                }
        }
 }
 
-void lexer_init(lexer_t *this, FILE *stream, const char *input_name)
+void lexer_next_token(token_t *token)
 {
-       memset(this, 0, sizeof(this[0]));
+       do {
+               lexer_next_preprocessing_token(token);
+       } while(token->type == '\n');
+}
 
-       this->input = stream;
+void init_lexer(void)
+{
+       strset_init(&stringset);
+}
 
-       this->source_position.linenr     = 0;
-       this->source_position.input_name = input_name;
-       strset_init(&this->stringset);
+void lexer_open_stream(FILE *stream, const char *input_name)
+{
+       input                      = stream;
+       source_position.linenr     = 0;
+       source_position.input_name = input_name;
 
        /* we place a virtual '\n' at the beginning so the lexer knows we're at the
         * beginning of a line */
-       this->c = '\n';
+       c = '\n';
 }
 
-void lexer_destroy(lexer_t *this)
+void exit_lexer(void)
 {
-       (void) this;
+       strset_destroy(&stringset);
 }
 
 static __attribute__((unused))
diff --git a/lexer.h b/lexer.h
index f22f445..d3ffd39 100644 (file)
--- a/lexer.h
+++ b/lexer.h
@@ -4,8 +4,6 @@
 #include "symbol_table_t.h"
 #include "token_t.h"
 
-typedef struct lexer_t lexer_t;
-
-void lexer_next_token(lexer_t *lexer, token_t *token);
+void lexer_next_token(token_t *token);
 
 #endif
index f4da9b2..0925ba7 100644 (file)
--- a/lexer_t.h
+++ b/lexer_t.h
@@ -16,18 +16,11 @@ struct source_position_t {
        unsigned    linenr;
 };
 
-struct lexer_t {
-       int               c;
-       source_position_t source_position;
-       FILE             *input;
-       char              buf[1024];
-       const char       *bufend;
-       const char       *bufpos;
-       strset_t          stringset;
-};
+extern source_position_t source_position;
 
-void lexer_init(lexer_t *lexer, FILE *stream, const char *input_name);
+void init_lexer(void);
+void exit_lexer(void);
 
-void lexer_destroy(lexer_t *lexer);
+void lexer_open_stream(FILE *stream, const char *input_name);
 
 #endif
index 76d1b08..7e1f500 100755 (executable)
@@ -4,6 +4,6 @@ for i in tokenstreams/*; do
        if [ "$i" != "tokenstreams/refresults" ]; then
                echo "==> Checking $i"
                ../cparser $i > /tmp/tokenstream
-               diff -u /tmp/tokenstream tokenstreams/refresults/`basename $i` || exit 1
+               diff -u /tmp/tokenstream tokenstreams/refresults/`basename $i`
        fi
 done
diff --git a/lextest/preprocessor/recursivedefine b/lextest/preprocessor/recursivedefine
new file mode 100644 (file)
index 0000000..43cc55b
--- /dev/null
@@ -0,0 +1,15 @@
+#define concat(a,b,x,y)  a##b(x,y,a,b);
+#define bla(a,b,x,y)     a##b(x,y,a,b);
+
+int (bla)(int a, int b, int c, int d) {
+       return a + b + c + d;
+}
+
+int main()
+{
+       int con = 0;
+       int cat = 0;
+       int b = 0;
+       int la = 0;
+       return bla(con,cat,b,la);
+}
diff --git a/main.c b/main.c
index 81d4760..799f364 100644 (file)
--- a/main.c
+++ b/main.c
@@ -7,6 +7,7 @@
 
 #include "lexer_t.h"
 #include "token_t.h"
+#include "parser.h"
 
 #if 0
 static
@@ -37,7 +38,6 @@ void get_output_name(char *buf, size_t buflen, const char *inputname,
 static
 void compile(const char *fname)
 {
-       lexer_t         lexer;
        token_t         token;
 
        FILE *in = fopen(fname, "r");
@@ -46,15 +46,14 @@ void compile(const char *fname)
                exit(1);
        }
 
-       lexer_init(&lexer, in, fname);
+       lexer_open_stream(in, fname);
 
        do {
-               lexer_next_token(&lexer, &token);
+               lexer_next_token(&token);
                print_token(stdout, &token);
                puts("");
        } while(token.type != T_EOF);
 
-       lexer_destroy(&lexer);
        fclose(in);
 }
 
@@ -62,11 +61,15 @@ int main(int argc, char **argv)
 {
        init_symbol_table();
        init_tokens();
+       init_lexer();
+       init_parser();
 
        for(int i = 1; i < argc; ++i) {
                compile(argv[i]);
        }
 
+       exit_parser();
+       exit_lexer();
        exit_tokens();
        exit_symbol_table();
        return 0;
index a5628b4..fc385a1 100644 (file)
--- a/parser.c
+++ b/parser.c
@@ -1,22 +1,31 @@
 #include <config.h>
 
 #include <assert.h>
+#include <stdarg.h>
 
 #include "lexer_t.h"
 #include "token_t.h"
 #include "type_t.h"
 #include "ast_t.h"
 #include "adt/bitfiddle.h"
+#include "adt/error.h"
 
 #define PRINT_TOKENS
 
-static lexer_t lexer;
 static token_t token;
 
 static inline
-void next_token()
+void *allocate_ast_zero(size_t size)
 {
-       lexer_next_token(&lexer, &token);
+       void *res = allocate_ast(size);
+       memset(res, 0, size);
+       return res;
+}
+
+static inline
+void next_token(void)
+{
+       lexer_next_token(&token);
 
 #ifdef PRINT_TOKENS
        print_token(stderr, &token);
@@ -31,11 +40,11 @@ void eat(token_type_t type)
        next_token();
 }
 
-void parser_print_error_prefix()
+void parser_print_error_prefix(void)
 {
-    fputs(lexer.source_position.input_name, stderr);
+    fputs(source_position.input_name, stderr);
     fputc(':', stderr);
-    fprintf(stderr, "%d", lexer.source_position.linenr);
+    fprintf(stderr, "%d", source_position.linenr);
     fputs(": error: ", stderr);
 }
 
@@ -46,12 +55,53 @@ void parse_error(const char *message)
        fprintf(stderr, "parse error: %s\n", message);
 }
 
-#define expect(expected) \
-    if(UNLIKELY(token.type != (expected))) { \
-        /*parse_error_expected(NULL, (expected), 0);*/ \
-        /*eat_until_semi();*/ \
-        return NULL; \
-    } \
+static
+void parse_error_expected(const char *message, ...)
+{
+       va_list args;
+       int first = 1;
+
+       if(message != NULL) {
+               parser_print_error_prefix();
+               fprintf(stderr, "%s\n", message);
+       }
+       parser_print_error_prefix();
+       fputs("Parse error: got ", stderr);
+       print_token(stderr, &token);
+       fputs(", expected ", stderr);
+
+       va_start(args, message);
+       token_type_t token_type = va_arg(args, token_type_t);
+       while(token_type != 0) {
+               if(first == 1) {
+                       first = 0;
+               } else {
+                       fprintf(stderr, ", ");
+               }
+               print_token_type(stderr, token_type);
+               token_type = va_arg(args, token_type_t);
+       }
+       va_end(args);
+       fprintf(stderr, "\n");
+}
+
+static
+void eat_until_semi(void)
+{
+       while(token.type != ';') {
+               next_token();
+               if(token.type == T_EOF)
+                       return;
+       }
+       next_token();
+}
+
+#define expect(expected)                           \
+    if(UNLIKELY(token.type != (expected))) {       \
+        parse_error_expected(NULL, (expected), 0); \
+        eat_until_semi();                          \
+        return NULL;                               \
+    }                                              \
     next_token();
 
 typedef enum {
@@ -172,9 +222,12 @@ void parse_declaration_specifiers(declaration_specifiers_t *specifiers)
                        }
                        break;
 
-               /* struct or union specifier */
-               /* enum specifier */
-               /* typedef name */
+               case T_struct:
+               case T_enum:
+               case T_IDENTIFIER:
+                       /* TODO */
+                       assert(0);
+                       break;
 
                /* function specifier */
                default:
@@ -248,7 +301,7 @@ void parse_declaration_specifiers(declaration_specifiers_t *specifiers)
                case SPECIFIER_BOOL:
                        atomic_type = ATOMIC_TYPE_BOOL;
                        break;
-       #ifdef PROVIDE_COMPLEX
+#ifdef PROVIDE_COMPLEX
                case SPECIFIER_FLOAT | SPECIFIER_COMPLEX:
                        atomic_type = ATOMIC_TYPE_FLOAT_COMPLEX;
                        break;
@@ -258,8 +311,8 @@ void parse_declaration_specifiers(declaration_specifiers_t *specifiers)
                case SPECIFIER_LONG | SPECIFIER_DOUBLE | SPECIFIER_COMPLEX:
                        atomic_type = ATOMIC_TYPE_LONG_DOUBLE_COMPLEX;
                        break;
-       #endif
-       #ifdef PROVIDE_IMAGINARY
+#endif
+#ifdef PROVIDE_IMAGINARY
                case SPECIFIER_FLOAT | SPECIFIER_IMAGINARY:
                        atomic_type = ATOMIC_TYPE_FLOAT_IMAGINARY;
                        break;
@@ -269,7 +322,7 @@ void parse_declaration_specifiers(declaration_specifiers_t *specifiers)
                case SPECIFIER_LONG | SPECIFIER_DOUBLE | SPECIFIER_IMAGINARY:
                        atomic_type = ATOMIC_TYPE_LONG_DOUBLE_IMAGINARY;
                        break;
-       #endif
+#endif
                default:
                        /* invalid specifier combination, give an error message */
                        if(type_specifiers == 0) {
@@ -298,7 +351,7 @@ struct declarator_t {
        declarator_t *next;
 };
 
-declarator_t *parse_declarator()
+declarator_t *parse_declarator(void)
 {
        while(token.type == '*') {
                /* pointer */
@@ -310,8 +363,7 @@ declarator_t *parse_declarator()
 
        switch(token.type) {
        case T_IDENTIFIER:
-               declarator = allocate_ast(sizeof(declarator[0]));
-               memset(declarator, 0, sizeof(declarator[0]));
+               declarator = allocate_ast_zero(sizeof(declarator[0]));
                declarator->symbol = token.v.symbol;
                return declarator;
        case '(':
@@ -342,7 +394,7 @@ declarator_t *parse_declarator()
        return declarator;
 }
 
-declarator_t *parse_init_declarator()
+declarator_t *parse_init_declarator(void)
 {
        declarator_t *declarator = parse_declarator();
        if(token.type == '=') {
@@ -359,7 +411,7 @@ struct declaration_t {
        declaration_t            *declarators;
 };
 
-void parse_declaration()
+void parse_declaration(void)
 {
        declaration_specifiers_t specifiers;
        memset(&specifiers, 0, sizeof(specifiers));
@@ -374,3 +426,680 @@ namespace_t *parse(FILE *in, const char *input_name)
        return namespace;
 }
 #endif
+
+
+
+static
+expression_t *parse_sub_expression(unsigned precedence);
+static
+expression_t *parse_expression(void);
+
+typedef expression_t* (*parse_expression_function) (unsigned precedence);
+typedef expression_t* (*parse_expression_infix_function) (unsigned precedence,
+                                                          expression_t *left);
+
+typedef struct expression_parser_function_t expression_parser_function_t;
+struct expression_parser_function_t {
+       unsigned                         precedence;
+       parse_expression_function        parser;
+       unsigned                         infix_precedence;
+       parse_expression_infix_function  infix_parser;
+};
+
+expression_parser_function_t expression_parsers[T_LAST_TOKEN];
+
+static
+expression_t *expected_expression_error(void)
+{
+       parser_print_error_prefix();
+       fprintf(stderr, "expected expression, got token ");
+       print_token(stderr, & token);
+       fprintf(stderr, "\n");
+
+       expression_t *expression = allocate_ast_zero(sizeof(expression[0]));
+       expression->type = EXPR_INVALID;
+       next_token();
+
+       return expression;
+}
+
+static
+expression_t *parse_string_const(void)
+{
+       string_literal_t *cnst = allocate_ast_zero(sizeof(cnst[0]));
+
+       cnst->expression.type = EXPR_STRING_LITERAL;
+       cnst->value           = token.v.string;
+
+       next_token();
+
+       return (expression_t*) cnst;
+}
+
+static
+expression_t *parse_int_const(void)
+{
+       const_t *cnst = allocate_ast_zero(sizeof(cnst[0]));
+
+       cnst->expression.type = EXPR_CONST;
+       cnst->value           = token.v.intvalue;
+
+       next_token();
+
+       return (expression_t*) cnst;
+}
+
+static
+expression_t *parse_reference(void)
+{
+       reference_expression_t *ref = allocate_ast_zero(sizeof(ref[0]));
+
+       ref->expression.type            = EXPR_REFERENCE;
+       ref->symbol                     = token.v.symbol;
+
+       next_token();
+
+       return (expression_t*) ref;
+}
+
+static
+expression_t *parse_brace_expression(void)
+{
+       eat('(');
+
+       expression_t *result = parse_expression();
+       expect(')');
+
+       return result;
+}
+
+static
+expression_t *parse_primary_expression(void)
+{
+       switch(token.type) {
+       case T_INTEGER:
+               return parse_int_const();
+       case T_STRING_LITERAL:
+               return parse_string_const();
+       case T_IDENTIFIER:
+               return parse_reference();
+       case '(':
+               return parse_brace_expression();
+       }
+
+       /* TODO: error message */
+       return NULL;
+}
+
+static
+expression_t *parse_array_expression(unsigned precedence,
+                                     expression_t *array_ref)
+{
+       (void) precedence;
+
+       eat('[');
+
+       array_access_expression_t *array_access
+               = allocate_ast_zero(sizeof(array_access[0]));
+
+       array_access->expression.type = EXPR_ARRAY_ACCESS;
+       array_access->array_ref       = array_ref;
+       array_access->index           = parse_expression();
+
+       if(token.type != ']') {
+               parse_error_expected("Problem while parsing array access", ']', 0);
+               return NULL;
+       }
+       next_token();
+
+       return (expression_t*) array_access;
+}
+
+static
+expression_t *parse_sizeof(unsigned precedence)
+{
+       (void) precedence;
+       eat(T_sizeof);
+       /* TODO... */
+
+       return NULL;
+}
+
+static
+expression_t *parse_select_expression(unsigned precedence,
+                                      expression_t *compound)
+{
+       (void) precedence;
+
+       assert(token.type == '.' || token.type == T_SELECT);
+       next_token();
+
+       select_expression_t *select = allocate_ast_zero(sizeof(select[0]));
+
+       select->expression.type = EXPR_SELECT;
+       select->compound        = compound;
+
+       if(token.type != T_IDENTIFIER) {
+               parse_error_expected("Problem while parsing compound select",
+                                    T_IDENTIFIER, 0);
+               return NULL;
+       }
+       select->symbol = token.v.symbol;
+       next_token();
+
+       return (expression_t*) select;
+}
+
+static
+expression_t *parse_call_expression(unsigned precedence,
+                                    expression_t *expression)
+{
+       (void) precedence;
+       call_expression_t *call = allocate_ast_zero(sizeof(call[0]));
+
+       call->expression.type            = EXPR_CALL;
+       call->method                     = expression;
+
+       /* parse arguments */
+       eat('(');
+
+       if(token.type != ')') {
+               call_argument_t *last_argument = NULL;
+
+               while(1) {
+                       call_argument_t *argument = allocate_ast_zero(sizeof(argument[0]));
+
+                       argument->expression = parse_expression();
+                       if(last_argument == NULL) {
+                               call->arguments = argument;
+                       } else {
+                               last_argument->next = argument;
+                       }
+                       last_argument = argument;
+
+                       if(token.type != ',')
+                               break;
+                       next_token();
+               }
+       }
+       expect(')');
+
+       return (expression_t*) call;
+}
+
+#define CREATE_UNARY_EXPRESSION_PARSER(token_type, unexpression_type)     \
+static                                                                    \
+expression_t *parse_##unexpression_type(unsigned precedence)              \
+{                                                                         \
+       eat(token_type);                                                      \
+                                                                          \
+       unary_expression_t *unary_expression                                  \
+               = allocate_ast_zero(sizeof(unary_expression[0]));                 \
+       unary_expression->expression.type = EXPR_UNARY;                       \
+       unary_expression->type            = unexpression_type;                \
+       unary_expression->value           = parse_sub_expression(precedence); \
+                                                                          \
+       return (expression_t*) unary_expression;                              \
+}
+
+CREATE_UNARY_EXPRESSION_PARSER('-', UNEXPR_NEGATE);
+CREATE_UNARY_EXPRESSION_PARSER('+', UNEXPR_PLUS);
+CREATE_UNARY_EXPRESSION_PARSER('!', UNEXPR_NOT);
+CREATE_UNARY_EXPRESSION_PARSER('*', UNEXPR_DEREFERENCE);
+CREATE_UNARY_EXPRESSION_PARSER('&', UNEXPR_TAKE_ADDRESS);
+CREATE_UNARY_EXPRESSION_PARSER('~', UNEXPR_BITWISE_NEGATE);
+CREATE_UNARY_EXPRESSION_PARSER(T_PLUSPLUS,   UNEXPR_PREFIX_INCREMENT);
+CREATE_UNARY_EXPRESSION_PARSER(T_MINUSMINUS, UNEXPR_PREFIX_DECREMENT);
+
+#define CREATE_UNARY_POSTFIX_EXPRESSION_PARSER(token_type, unexpression_type) \
+static                                                                        \
+expression_t *parse_##unexpression_type(unsigned precedence,                  \
+                                        expression_t *left)                   \
+{                                                                             \
+       (void) precedence;                                                        \
+       eat(token_type);                                                          \
+                                                                              \
+       unary_expression_t *unary_expression                                      \
+               = allocate_ast_zero(sizeof(unary_expression[0]));                     \
+       unary_expression->expression.type = EXPR_UNARY;                           \
+       unary_expression->type            = unexpression_type;                    \
+       unary_expression->value           = left;                                 \
+                                                                              \
+       return (expression_t*) unary_expression;                                  \
+}
+
+CREATE_UNARY_POSTFIX_EXPRESSION_PARSER(T_PLUSPLUS,   UNEXPR_POSTFIX_INCREMENT);
+CREATE_UNARY_POSTFIX_EXPRESSION_PARSER(T_MINUSMINUS, UNEXPR_POSTFIX_DECREMENT);
+
+#define CREATE_BINEXPR_PARSER(token_type, binexpression_type)    \
+static                                                           \
+expression_t *parse_##binexpression_type(unsigned precedence,    \
+                                         expression_t *left)     \
+{                                                                \
+       eat(token_type);                                             \
+                                                                 \
+       expression_t *right = parse_sub_expression(precedence);      \
+                                                                 \
+       binary_expression_t *binexpr                                 \
+               = allocate_ast_zero(sizeof(binexpr[0]));                 \
+       binexpr->expression.type            = EXPR_BINARY;           \
+       binexpr->type                       = binexpression_type;    \
+       binexpr->left                       = left;                  \
+       binexpr->right                      = right;                 \
+                                                                 \
+       return (expression_t*) binexpr;                              \
+}
+
+CREATE_BINEXPR_PARSER('*', BINEXPR_MUL);
+CREATE_BINEXPR_PARSER('/', BINEXPR_DIV);
+CREATE_BINEXPR_PARSER('+', BINEXPR_ADD);
+CREATE_BINEXPR_PARSER('-', BINEXPR_SUB);
+CREATE_BINEXPR_PARSER('<', BINEXPR_LESS);
+CREATE_BINEXPR_PARSER('>', BINEXPR_GREATER);
+CREATE_BINEXPR_PARSER('=', BINEXPR_ASSIGN);
+CREATE_BINEXPR_PARSER(T_EQUALEQUAL, BINEXPR_EQUAL);
+CREATE_BINEXPR_PARSER(T_SLASHEQUAL, BINEXPR_NOTEQUAL);
+CREATE_BINEXPR_PARSER(T_LESSEQUAL, BINEXPR_LESSEQUAL);
+CREATE_BINEXPR_PARSER(T_GREATEREQUAL, BINEXPR_GREATEREQUAL);
+CREATE_BINEXPR_PARSER('&', BINEXPR_BITWISE_AND);
+CREATE_BINEXPR_PARSER('|', BINEXPR_BITWISE_OR);
+CREATE_BINEXPR_PARSER('^', BINEXPR_BITWISE_XOR);
+CREATE_BINEXPR_PARSER(T_LESSLESS, BINEXPR_SHIFTLEFT);
+CREATE_BINEXPR_PARSER(T_GREATERGREATER, BINEXPR_SHIFTRIGHT);
+
+static
+expression_t *parse_sub_expression(unsigned precedence)
+{
+       if(token.type < 0) {
+               return expected_expression_error();
+       }
+
+       expression_parser_function_t *parser
+               = &expression_parsers[token.type];
+       source_position_t             source_position = source_position;
+       expression_t                 *left;
+
+       if(parser->parser != NULL) {
+               left = parser->parser(parser->precedence);
+       } else {
+               left = parse_primary_expression();
+       }
+       if(left != NULL)
+               left->source_position = source_position;
+
+       while(1) {
+               if(token.type < 0) {
+                       return expected_expression_error();
+               }
+
+               parser = &expression_parsers[token.type];
+               if(parser->infix_parser == NULL)
+                       break;
+               if(parser->infix_precedence < precedence)
+                       break;
+
+               left = parser->infix_parser(parser->infix_precedence, left);
+               if(left != NULL)
+                       left->source_position = source_position;
+       }
+
+       return left;
+}
+
+static
+expression_t *parse_expression(void)
+{
+       return parse_sub_expression(1);
+}
+
+
+
+void register_expression_parser(parse_expression_function parser,
+                                int token_type, unsigned precedence)
+{
+       expression_parser_function_t *entry = &expression_parsers[token_type];
+
+       if(entry->parser != NULL) {
+               fprintf(stderr, "for token ");
+               print_token_type(stderr, token_type);
+               fprintf(stderr, "\n");
+               panic("trying to register multiple expression parsers for a token");
+       }
+       entry->parser     = parser;
+       entry->precedence = precedence;
+}
+
+void register_expression_infix_parser(parse_expression_infix_function parser,
+                                      int token_type, unsigned precedence)
+{
+       expression_parser_function_t *entry = &expression_parsers[token_type];
+
+       if(entry->infix_parser != NULL) {
+               fprintf(stderr, "for token ");
+               print_token_type(stderr, token_type);
+               fprintf(stderr, "\n");
+               panic("trying to register multiple infix expression parsers for a "
+                     "token");
+       }
+       entry->infix_parser     = parser;
+       entry->infix_precedence = precedence;
+}
+
+static
+void init_expression_parsers(void)
+{
+       memset(&expression_parsers, 0, sizeof(expression_parsers));
+
+       register_expression_infix_parser(parse_BINEXPR_MUL,       '*', 16);
+       register_expression_infix_parser(parse_BINEXPR_DIV,       '/', 16);
+       register_expression_infix_parser(parse_BINEXPR_SHIFTLEFT,
+                                  T_LESSLESS, 16);
+       register_expression_infix_parser(parse_BINEXPR_SHIFTRIGHT,
+                                  T_GREATERGREATER, 16);
+       register_expression_infix_parser(parse_BINEXPR_ADD,       '+', 15);
+       register_expression_infix_parser(parse_BINEXPR_SUB,       '-', 15);
+       register_expression_infix_parser(parse_BINEXPR_LESS,      '<', 14);
+       register_expression_infix_parser(parse_BINEXPR_GREATER,   '>', 14);
+       register_expression_infix_parser(parse_BINEXPR_LESSEQUAL, T_LESSEQUAL, 14);
+       register_expression_infix_parser(parse_BINEXPR_GREATEREQUAL,
+                                  T_GREATEREQUAL, 14);
+       register_expression_infix_parser(parse_BINEXPR_EQUAL,     T_EQUALEQUAL, 13);
+       register_expression_infix_parser(parse_BINEXPR_NOTEQUAL,
+                                        T_EXCLAMATIONMARKEQUAL, 13);
+       register_expression_infix_parser(parse_BINEXPR_BITWISE_AND, '&',        12);
+       register_expression_infix_parser(parse_BINEXPR_BITWISE_XOR, '^',        11);
+       register_expression_infix_parser(parse_BINEXPR_BITWISE_OR,  '|',        10);
+       register_expression_infix_parser(parse_BINEXPR_ASSIGN,      T_EQUAL,     2);
+
+       register_expression_infix_parser(parse_array_expression,        '[',    30);
+       register_expression_infix_parser(parse_call_expression,         '(',    30);
+       register_expression_infix_parser(parse_select_expression,       '.',    30);
+       register_expression_infix_parser(parse_select_expression,  T_SELECT,    30);
+       register_expression_infix_parser(parse_UNEXPR_POSTFIX_INCREMENT,
+                                        T_PLUSPLUS, 30);
+       register_expression_infix_parser(parse_UNEXPR_POSTFIX_DECREMENT,
+                                        T_MINUSMINUS, 30);
+
+       register_expression_parser(parse_UNEXPR_NEGATE,           '-',          25);
+       register_expression_parser(parse_UNEXPR_PLUS,             '+',          25);
+       register_expression_parser(parse_UNEXPR_NOT,              '!',          25);
+       register_expression_parser(parse_UNEXPR_BITWISE_NEGATE,   '~',          25);
+       register_expression_parser(parse_UNEXPR_DEREFERENCE,      '*',          25);
+       register_expression_parser(parse_UNEXPR_TAKE_ADDRESS,     '&',          25);
+       register_expression_parser(parse_UNEXPR_PREFIX_INCREMENT, T_PLUSPLUS,   25);
+       register_expression_parser(parse_UNEXPR_PREFIX_DECREMENT, T_MINUSMINUS, 25);
+       register_expression_parser(parse_sizeof,                  T_sizeof,     25);
+}
+
+
+static
+statement_t *parse_compound_statement(void);
+
+static
+statement_t *parse_statement(void);
+
+static
+statement_t *parse_case_statement(void)
+{
+       eat(T_case);
+       parse_expression();
+       expect(':');
+       parse_statement();
+
+       return NULL;
+}
+
+static
+statement_t *parse_default_statement(void)
+{
+       eat(T_default);
+       expect(':');
+       parse_statement();
+
+       return NULL;
+}
+
+static
+statement_t *parse_label_statement(void)
+{
+       eat(T_IDENTIFIER);
+       expect(';');
+       parse_statement();
+
+       return NULL;
+}
+
+static
+statement_t *parse_if(void)
+{
+       eat(T_if);
+       expect('(');
+       parse_expression();
+       expect(')');
+
+       parse_statement();
+       if(token.type == T_else) {
+               next_token();
+               parse_statement();
+       }
+
+       return NULL;
+}
+
+static
+statement_t *parse_switch(void)
+{
+       eat(T_switch);
+       expect('(');
+       parse_expression();
+       expect(')');
+       parse_statement();
+
+       return NULL;
+}
+
+static
+statement_t *parse_while(void)
+{
+       eat(T_while);
+       expect('(');
+       parse_expression();
+       expect(')');
+       parse_statement();
+
+       return NULL;
+}
+
+static
+statement_t *parse_do(void)
+{
+       eat(T_do);
+       parse_statement();
+       expect(T_while);
+       expect('(');
+       parse_expression();
+       expect(')');
+
+       return NULL;
+}
+
+static
+statement_t *parse_for(void)
+{
+       eat(T_for);
+       expect('(');
+       if(token.type != ';') {
+               /* TODO not correct... this could also be a declaration */
+               parse_expression();
+       }
+       expect(';');
+       if(token.type != ';') {
+               parse_expression();
+       }
+       expect(';');
+       if(token.type != ')') {
+               parse_expression();
+       }
+       expect(')');
+       parse_statement();
+
+       return NULL;
+}
+
+static
+statement_t *parse_goto(void)
+{
+       eat(T_goto);
+       expect(T_IDENTIFIER);
+       expect(';');
+
+       return NULL;
+}
+
+static
+statement_t *parse_continue(void)
+{
+       eat(T_continue);
+       expect(';');
+
+       return NULL;
+}
+
+static
+statement_t *parse_break(void)
+{
+       eat(T_break);
+       expect(';');
+
+       return NULL;
+}
+
+static
+statement_t *parse_return(void)
+{
+       eat(T_return);
+       parse_expression();
+       expect(';');
+
+       return NULL;
+}
+
+static
+statement_t *parse_statement(void)
+{
+       statement_t *statement = NULL;
+
+       /* declaration or statement */
+       switch(token.type) {
+       case T_case:
+               statement = parse_case_statement();
+               break;
+
+       case T_default:
+               statement = parse_default_statement();
+               break;
+
+       case T_IDENTIFIER:
+               statement = parse_label_statement();
+               break;
+
+       case '{':
+               statement = parse_compound_statement();
+               break;
+
+       case T_if:
+               statement = parse_if();
+               break;
+
+       case T_switch:
+               statement = parse_switch();
+               break;
+
+       case T_while:
+               statement = parse_while();
+               break;
+
+       case T_do:
+               statement = parse_do();
+               break;
+
+       case T_for:
+               statement = parse_for();
+               break;
+
+       case T_goto:
+               statement = parse_goto();
+               break;
+
+       case T_continue:
+               statement = parse_continue();
+               break;
+
+       case T_break:
+               statement = parse_break();
+               break;
+
+       case T_return:
+               statement = parse_return();
+               break;
+
+       case ';':
+               statement = NULL;
+               break;
+       }
+
+       return statement;
+}
+
+static
+statement_t *parse_compound_statement(void)
+{
+       expect('{');
+
+       while(token.type != '}') {
+               parse_statement();
+       }
+       next_token();
+
+       return NULL;
+}
+
+static
+void parse_translation_unit(void)
+{
+       declaration_specifiers_t specifiers;
+       memset(&specifiers, 0, sizeof(specifiers));
+       parse_declaration_specifiers(&specifiers);
+
+       while(token.type != T_EOF) {
+               if(token.type == '{') {
+                       next_token();
+                       continue;
+               }
+
+               declarator_t *declarators = parse_declarator();
+               (void) declarators;
+               /* multiple declarations? */
+
+               if(token.type == '{') {
+                       parse_compound_statement();
+               } else if(token.type == ';') {
+                       next_token();
+               }
+       }
+}
+
+void parse(void)
+{
+       next_token();
+       parse_translation_unit();
+}
+
+void init_parser(void)
+{
+       init_expression_parsers();
+}
+
+void exit_parser(void)
+{
+}
diff --git a/parser.h b/parser.h
new file mode 100644 (file)
index 0000000..84c1d17
--- /dev/null
+++ b/parser.h
@@ -0,0 +1,8 @@
+#ifndef PARSER_H
+#define PARSER_H
+
+void init_parser(void);
+void exit_parser(void);
+void parse_translation_unit(void);
+
+#endif
index 413319b..895bfea 100644 (file)
--- a/symbol.h
+++ b/symbol.h
@@ -5,7 +5,8 @@ typedef struct symbol_t symbol_t;
 
 struct symbol_t {
        const char          *string;
-       unsigned             ID;
+       unsigned short       ID;
+       unsigned short       pp_ID;
 };
 
 #endif
index c422e0c..03cbdd5 100644 (file)
@@ -10,6 +10,7 @@ static inline
 void init_symbol_table_entry(symbol_t *entry, const char *string)
 {
        entry->ID     = 0;
+       entry->pp_ID  = 0;
        entry->string = string;
 }
 
@@ -41,28 +42,20 @@ void init_symbol_table_entry(symbol_t *entry, const char *string)
 #include "adt/hashset.c"
 
 static symbol_table_t  symbol_table;
-static symbol_table_t  preprocessor_symbol_table;
 
 symbol_t *symbol_table_insert(const char *symbol)
 {
        return _symbol_table_insert(&symbol_table, symbol);
 }
 
-symbol_t *preprocessor_symbol_table_insert(const char *symbol)
-{
-       return _symbol_table_insert(&preprocessor_symbol_table, symbol);
-}
-
 void init_symbol_table(void)
 {
        obstack_init(&symbol_obstack);
        _symbol_table_init(&symbol_table);
-       _symbol_table_init(&preprocessor_symbol_table);
 }
 
 void exit_symbol_table(void)
 {
        _symbol_table_destroy(&symbol_table);
-       _symbol_table_destroy(&preprocessor_symbol_table);
        obstack_free(&symbol_obstack, NULL);
 }
index b6ef219..21c9169 100644 (file)
@@ -5,8 +5,6 @@
 #include "adt/obst.h"
 
 symbol_t *symbol_table_insert(const char *symbol);
-symbol_t *preprocessor_symbol_table_insert(const char *symbol);
-symbol_t *preprocessor_symbol_table_find(const char *symbol);
 
 void init_symbol_table(void);
 void exit_symbol_table(void);
diff --git a/token.c b/token.c
index eaa1dec..9464783 100644 (file)
--- a/token.c
+++ b/token.c
@@ -30,6 +30,15 @@ void init_tokens(void)
 #include "tokens.inc"
 
 #undef TS
+#undef T
+
+#define T(x,str,val)                                               \
+       assert(TP_##x >= 0 && TP_##x < TP_LAST_TOKEN);                 \
+       symbol               = symbol_table_insert(str);               \
+       symbol->pp_ID        = TP_##x;
+
+#include "tokens_preprocessor.inc"
+
 #undef T
 }
 
@@ -39,10 +48,6 @@ void exit_tokens(void)
 
 void print_token_type(FILE *f, token_type_t token_type)
 {
-       if(token_type >= 0 && token_type < 256) {
-               fprintf(f, "'%c'", token_type);
-               return;
-       }
        if(token_type == T_EOF) {
                fputs("end of file", f);
                return;
@@ -62,6 +67,10 @@ void print_token_type(FILE *f, token_type_t token_type)
        if(symbol != NULL) {
                fputs(symbol->string, f);
        } else {
+               if(token_type >= 0 && token_type < 256) {
+                       fprintf(f, "'%c'", token_type);
+                       return;
+               }
                fputs("unknown token", f);
        }
 }
index be48841..c1975a0 100644 (file)
--- a/token_t.h
+++ b/token_t.h
@@ -16,6 +16,14 @@ typedef enum {
        T_ERROR = -2
 } token_type_t;
 
+typedef enum {
+#define T(x,str,val) TP_##x val,
+#define TS(x,str,val) TP_##x val,
+#include "tokens_preprocessor.inc"
+#undef TS
+#undef T
+} preprocessor_token_type_t;
+
 typedef struct {
        int type;
        union {
index 9c40b57..9915f18 100644 (file)
@@ -5,8 +5,6 @@
 TS(IDENTIFIER,     "identifier", = 256)
 TS(INTEGER,        "integer number",)
 TS(STRING_LITERAL, "string literal",)
-TS(PP_NEWLINE,     "praeprocessor newline",)
-TS(PP_HEADER_NAME, "praeprocessor header name",)
 
 #define S(x)   T(x,#x,)
 S(auto)
@@ -106,3 +104,5 @@ T(LESSPERCENT,              "<%",   = '{')
 T(PERCENTGREATER,           "%>",   = '}')
 T(PERCENTCOLON,             "%:",   = '#')
 T(PERCENTCOLONPERCENTCOLON, "%:%:", = T_HASHHASH)
+
+TS(NEWLINE,        "newline", = '\n')
diff --git a/tokens_preprocessor.inc b/tokens_preprocessor.inc
new file mode 100644 (file)
index 0000000..fc8def9
--- /dev/null
@@ -0,0 +1,18 @@
+#define S(x)   T(x,#x,)
+
+S(if)
+S(else)
+S(elif)
+S(endif)
+S(ifdef)
+S(ifndef)
+S(include)
+S(define)
+S(undef)
+S(line)
+S(error)
+S(pragma)
+
+#undef S
+
+#define TP_LAST_TOKEN  (TP_pragma + 1)