changed the way preprocessing directives are parsed
authorMatthias Braun <matze@braunis.de>
Tue, 19 Jun 2007 22:48:08 +0000 (22:48 +0000)
committerMatthias Braun <matze@braunis.de>
Tue, 19 Jun 2007 22:48:08 +0000 (22:48 +0000)
[r18332]

14 files changed:
lexer.c
lexer.h
lextest/do_tests.sh
lextest/tokenstreams/refresults/charconstants
lextest/tokenstreams/refresults/comments
lextest/tokenstreams/refresults/linefeeds
lextest/tokenstreams/refresults/operators
lextest/tokenstreams/refresults/strings
lextest/tokenstreams/refresults/symbols
lextest/tokenstreams/refresults/trigraphs
main.c
parser.c
tokens.inc
type.c

diff --git a/lexer.c b/lexer.c
index ec96a35..476aba8 100644 (file)
--- a/lexer.c
+++ b/lexer.c
@@ -188,6 +188,18 @@ int replace_trigraph(void)
        case 'Z':         \
        case '_':
 
+#define DIGITS        \
+       case '0':         \
+       case '1':         \
+       case '2':         \
+       case '3':         \
+       case '4':         \
+       case '5':         \
+       case '6':         \
+       case '7':         \
+       case '8':         \
+       case '9':
+
 static
 void parse_symbol(token_t *token)
 {
@@ -204,6 +216,7 @@ void parse_symbol(token_t *token)
                        EAT_NEWLINE(break;)
                        goto end_symbol;
 
+               DIGITS
                SYMBOL_CHARS
                        obstack_1grow(&symbol_obstack, c);
                        next_char();
@@ -356,14 +369,14 @@ int parse_escape_sequence()
                case 'x': /* TODO parse hex number ... */
                        parse_error("hex escape sequences not implemented yet");
                        return EOF;
-               case 0:
-               case 1:
-               case 2:
-               case 3:
-               case 4:
-               case 5:
-               case 6:
-               case 7:
+               case '0':
+               case '1':
+               case '2':
+               case '3':
+               case '4':
+               case '5':
+               case '6':
+               case '7':
                        /* TODO parse octal number ... */
                        parse_error("octal escape sequences not implemented yet");
                        return EOF;
@@ -609,9 +622,6 @@ void skip_line_comment(void)
        }
 }
 
-static
-void lexer_next_preprocessing_token(token_t *token);
-
 static token_t pp_token;
 
 static inline
@@ -623,7 +633,9 @@ void next_pp_token(void)
 static
 void eat_until_newline(void)
 {
-       /* TODO */
+       while(pp_token.type != '\n' && pp_token.type != T_EOF) {
+               next_pp_token();
+       }
 }
 
 static
@@ -677,9 +689,7 @@ void parse_line_directive(void)
                next_pp_token();
        }
 
-       while(pp_token.type != T_EOF && pp_token.type != '\n') {
-               next_pp_token();
-       }
+       eat_until_newline();
 }
 
 static
@@ -721,7 +731,7 @@ void parse_preprocessor_identifier(void)
 }
 
 static
-void parse_preprocessor_directive(token_t *result_token)
+void parse_preprocessor_directive()
 {
        next_pp_token();
 
@@ -732,9 +742,11 @@ void parse_preprocessor_directive(token_t *result_token)
        case T_INTEGER:
                parse_line_directive();
                break;
+       default:
+               parse_error("invalid preprocessor directive");
+               eat_until_newline();
+               break;
        }
-
-       lexer_next_token(result_token);
 }
 
 #define MAYBE_PROLOG                                       \
@@ -769,75 +781,6 @@ void parse_preprocessor_directive(token_t *result_token)
                        return;                                        \
                )
 
-static
-void eat_whitespace()
-{
-       while(1) {
-               switch(c) {
-               case ' ':
-               case '\t':
-                       next_char();
-                       break;
-
-               case '\r':
-               case '\n':
-                       return;
-
-               case '\\':
-                       next_char();
-                       if(c == '\n') {
-                               next_char();
-                               source_position.linenr++;
-                               break;
-                       }
-
-                       put_back(c);
-                       c = '\\';
-                       return;
-
-               SKIP_TRIGRAPHS(,
-                       return;
-               )
-
-               case '/':
-                       next_char();
-                       while(1) {
-                               switch(c) {
-                               case '*':
-                                       next_char();
-                                       skip_multiline_comment();
-                                       eat_whitespace();
-                                       return;
-                               case '/':
-                                       next_char();
-                                       skip_line_comment();
-                                       eat_whitespace();
-                                       return;
-
-                               SKIP_TRIGRAPHS(
-                                               put_back('?');
-                                       ,
-                                               c = '/';
-                                               return;
-                               )
-
-                               case '\\':
-                                       next_char();
-                                       EAT_NEWLINE(break;)
-                                       /* fallthrough */
-                               default:
-                                       return;
-                               }
-                       }
-                       break;
-
-               default:
-                       return;
-               }
-       }
-}
-
-static
 void lexer_next_preprocessing_token(token_t *token)
 {
        while(1) {
@@ -848,12 +791,6 @@ void lexer_next_preprocessing_token(token_t *token)
                        break;
 
                MATCH_NEWLINE(
-                       eat_whitespace();
-                       if(c == '#') {
-                               next_char();
-                               parse_preprocessor_directive(token);
-                               return;
-                       }
                        token->type = '\n';
                        return;
                )
@@ -862,16 +799,7 @@ void lexer_next_preprocessing_token(token_t *token)
                        parse_symbol(token);
                        return;
 
-               case '0':
-               case '1':
-               case '2':
-               case '3':
-               case '4':
-               case '5':
-               case '6':
-               case '7':
-               case '8':
-               case '9':
+               DIGITS
                        parse_number(token);
                        return;
 
@@ -1045,9 +973,20 @@ void lexer_next_preprocessing_token(token_t *token)
 
 void lexer_next_token(token_t *token)
 {
-       do {
+       while(1) {
                lexer_next_preprocessing_token(token);
-       } while(token->type == '\n');
+               if(token->type == '\n') {
+                       do {
+                               lexer_next_preprocessing_token(token);
+                       } while(token->type == '\n');
+
+                       if(token->type == '#') {
+                               parse_preprocessor_directive();
+                               continue;
+                       }
+               }
+               return;
+       }
 }
 
 void init_lexer(void)
diff --git a/lexer.h b/lexer.h
index d3ffd39..3d8cfde 100644 (file)
--- a/lexer.h
+++ b/lexer.h
@@ -6,4 +6,7 @@
 
 void lexer_next_token(token_t *token);
 
+/* for debugging */
+void lexer_next_preprocessing_token(token_t *token);
+
 #endif
index 7e1f500..d5b60ac 100755 (executable)
@@ -3,7 +3,7 @@ cd `dirname $0`
 for i in tokenstreams/*; do
        if [ "$i" != "tokenstreams/refresults" ]; then
                echo "==> Checking $i"
-               ../cparser $i > /tmp/tokenstream
-               diff -u /tmp/tokenstream tokenstreams/refresults/`basename $i`
+               ../cparser --lextest $i > /tmp/tokenstream
+               diff -u tokenstreams/refresults/`basename $i` /tmp/tokenstream
        fi
 done
index 6c0a47f..73ab4ed 100644 (file)
@@ -1,9 +1,19 @@
+'newline'
 integer number 97
+'newline'
 integer number 98
+'newline'
 integer number 99
+'newline'
 integer number 92
+'newline'
 integer number 92
+'newline'
 integer number 92
+'newline'
 integer number 92
+'newline'
 integer number 92
+'newline'
+'newline'
 end of file
index e6ef830..0f055e3 100644 (file)
@@ -1 +1,9 @@
+'newline'
+'newline'
+'newline'
+'newline'
+'newline'
+'newline'
+'newline'
+'newline'
 end of file
index be05286..9ea95b2 100644 (file)
@@ -1,5 +1,9 @@
+'newline'
 symbol 'hohohohohohoho'
+'newline'
 symbol 'ho'
+'newline'
 symbol 'ho'
+'newline'
 symbol 'ho'
 end of file
index b54a266..7594576 100644 (file)
@@ -1,43 +1,74 @@
-++
-++
+'newline'
+'++'
+'++'
 '+'
+'newline'
 '.'
+'newline'
 '.'
 '.'
+'newline'
 '.'
 '.'
-...
-...
+'newline'
+'...'
+'newline'
+'...'
+'newline'
 '.'
+'newline'
 '.'
-...
+'newline'
+'...'
 '.'
+'newline'
 '.'
 '.'
 '.'
-++
-<<=
->>=
+'newline'
+'++'
+'newline'
+'<<='
+'newline'
+'>>='
+'newline'
 '%'
+'newline'
 '}'
-%=
+'newline'
+'%='
+'newline'
 '#'
+'newline'
 '#'
 '%'
-%:%:
-%:%:
+'newline'
+'%:%:'
+'newline'
+'%:%:'
+'newline'
 '<'
-<<
-<<=
-<<
+'newline'
+'<<'
+'newline'
+'<<='
+'newline'
+'<<'
 '<'
 '='
-<<=
+'newline'
+'<<='
+'newline'
 '>'
->>
->>=
->>
+'newline'
+'>>'
+'newline'
+'>>='
+'newline'
+'>>'
 '>'
 '='
->>=
+'newline'
+'>>='
+'newline'
 end of file
index 7dec63d..486276b 100644 (file)
@@ -1,16 +1,28 @@
+'newline'
 string 'bla?'
+'newline'
 string 'bla??'
+'newline'
 string 'bla???'
+'newline'
 string 'bla
 '
+'newline'
 string 'bla?
 '
+'newline'
 string 'bla??
 '
+'newline'
 string 'bla'
+'newline'
 string 'bla?'
+'newline'
 string 'bla??'
+'newline'
 string 'bla'
+'newline'
 string 'bla
 '
+'newline'
 end of file
index 9d4ad33..b3eafcd 100644 (file)
@@ -1,16 +1,25 @@
+'newline'
 symbol 't'
+'newline'
 symbol 'tt'
+'newline'
 symbol 'one'
 symbol 'two'
+'newline'
 symbol 'symbo'
 '?'
 '?'
 '?'
+'newline'
 symbol 'symbo'
 '?'
 '?'
+'newline'
 symbol 'symbo'
 '?'
+'newline'
+'newline'
 symbol 'one'
 symbol 'two'
+'newline'
 end of file
index 261b5fd..4c07f9f 100644 (file)
@@ -1,18 +1,31 @@
+'newline'
 '?'
+'newline'
 '?'
 '?'
+'newline'
 '?'
 '?'
 '?'
+'newline'
 integer number 35
+'newline'
 '['
+'newline'
 ']'
+'newline'
 '^'
+'newline'
 '{'
+'newline'
 '|'
+'newline'
 '}'
+'newline'
 '~'
+'newline'
 '?'
 '?'
 '>'
+'newline'
 end of file
diff --git a/main.c b/main.c
index eb0c479..17f7cf1 100644 (file)
--- a/main.c
+++ b/main.c
@@ -47,7 +47,7 @@ void compile(const char *fname)
 
        lexer_open_stream(in, fname);
 
-#if 0
+#if 1
        token_t token;
        do {
                lexer_next_token(&token);
@@ -61,6 +61,27 @@ void compile(const char *fname)
        fclose(in);
 }
 
+static
+void lextest(const char *fname)
+{
+       FILE *in = fopen(fname, "r");
+       if(in == NULL) {
+               fprintf(stderr, "Couldn't open '%s': %s\n", fname, strerror(errno));
+               exit(1);
+       }
+
+       lexer_open_stream(in, fname);
+
+       token_t token;
+       do {
+               lexer_next_preprocessing_token(&token);
+               print_token(stdout, &token);
+               puts("");
+       } while(token.type != T_EOF);
+
+       fclose(in);
+}
+
 int main(int argc, char **argv)
 {
        init_symbol_table();
@@ -71,6 +92,11 @@ int main(int argc, char **argv)
        init_ast();
        init_parser();
 
+       if(argc > 2 && strcmp(argv[1], "--lextest") == 0) {
+               lextest(argv[2]);
+               return 0;
+       }
+
        for(int i = 1; i < argc; ++i) {
                compile(argv[i]);
        }
index 9a094bd..454f058 100644 (file)
--- a/parser.c
+++ b/parser.c
@@ -707,6 +707,7 @@ void parse_declarator(declaration_t *declaration, storage_class_t storage_class,
 
        if(storage_class == STORAGE_CLASS_TYPEDEF) {
                symbol->ID       = T_TYPENAME;
+               fprintf(stderr, "typedef '%s'\n", symbol->string);
        } else {
                symbol->ID       = T_IDENTIFIER;
        }
index fc22fad..996666b 100644 (file)
@@ -77,6 +77,13 @@ T(HASHHASH,                 "##",)
 
 #define T_LAST_TOKEN  (T_HASHHASH+1)
 
+T(LESSCOLON,                "<:",   = '[')
+T(COLONGREATER,             ":>",   = ']')
+T(LESSPERCENT,              "<%",   = '{')
+T(PERCENTGREATER,           "%>",   = '}')
+T(PERCENTCOLON,             "%:",   = '#')
+T(PERCENTCOLONPERCENTCOLON, "%:%:", = T_HASHHASH)
+
 T(RBRACK,          "[", = '[')
 T(LBRACK,          "]", = ']')
 T(LBRACE,          "(", = '(')
@@ -103,11 +110,4 @@ T(EQUAL,           "=", = '=')
 T(COMMA,           ",", = ',')
 T(HASH,            "#", = '#')
 
-T(LESSCOLON,                "<:",   = '[')
-T(COLONGREATER,             ":>",   = ']')
-T(LESSPERCENT,              "<%",   = '{')
-T(PERCENTGREATER,           "%>",   = '}')
-T(PERCENTCOLON,             "%:",   = '#')
-T(PERCENTCOLONPERCENTCOLON, "%:%:", = T_HASHHASH)
-
 TS(NEWLINE,        "newline", = '\n')
diff --git a/type.c b/type.c
index fade11f..4332aa0 100644 (file)
--- a/type.c
+++ b/type.c
@@ -41,6 +41,7 @@ void print_atomic_type(FILE *out, const atomic_type_t *type)
 
        switch(type->atype) {
        case ATOMIC_TYPE_INVALID:   fputs("INVALIDATOMIC", out); break;
+       case ATOMIC_TYPE_VOID:      fputs("void", out); break;
        case ATOMIC_TYPE_BOOL:      fputs("bool", out); break;
        case ATOMIC_TYPE_CHAR:      fputs("char", out); break;
        case ATOMIC_TYPE_SCHAR:     fputs("signed char", out); break;