changed the way preprocessing directives are parsed

author Matthias Braun <matze@braunis.de>

Tue, 19 Jun 2007 22:48:08 +0000 (22:48 +0000)

committer Matthias Braun <matze@braunis.de>

Tue, 19 Jun 2007 22:48:08 +0000 (22:48 +0000)
author Matthias Braun <matze@braunis.de>
Tue, 19 Jun 2007 22:48:08 +0000 (22:48 +0000)
committer Matthias Braun <matze@braunis.de>
Tue, 19 Jun 2007 22:48:08 +0000 (22:48 +0000)
diff --git a/lexer.c b/lexer.c

index ec96a35..476aba8 100644 (file)
--- a/lexer.c
+++ b/lexer.c
@@ -188,6 +188,18 @@ int replace_trigraph(void)
         case 'Z':         \
         case '_':
  
+#define DIGITS        \
+       case '0':         \
+       case '1':         \
+       case '2':         \
+       case '3':         \
+       case '4':         \
+       case '5':         \
+       case '6':         \
+       case '7':         \
+       case '8':         \
+       case '9':
+
  static
  void parse_symbol(token_t *token)
  {
@@ -204,6 +216,7 @@ void parse_symbol(token_t *token)
                         EAT_NEWLINE(break;)
                         goto end_symbol;
  
+               DIGITS
                 SYMBOL_CHARS
                         obstack_1grow(&symbol_obstack, c);
                         next_char();
@@ -356,14 +369,14 @@ int parse_escape_sequence()
                 case 'x': /* TODO parse hex number ... */
                         parse_error("hex escape sequences not implemented yet");
                         return EOF;
-               case 0:
-               case 1:
-               case 2:
-               case 3:
-               case 4:
-               case 5:
-               case 6:
-               case 7:
+               case '0':
+               case '1':
+               case '2':
+               case '3':
+               case '4':
+               case '5':
+               case '6':
+               case '7':
                         /* TODO parse octal number ... */
                         parse_error("octal escape sequences not implemented yet");
                         return EOF;
@@ -609,9 +622,6 @@ void skip_line_comment(void)
         }
  }
  
-static
-void lexer_next_preprocessing_token(token_t *token);
-
  static token_t pp_token;
  
  static inline
@@ -623,7 +633,9 @@ void next_pp_token(void)
  static
  void eat_until_newline(void)
  {
-       /* TODO */
+       while(pp_token.type != '\n' && pp_token.type != T_EOF) {
+               next_pp_token();
+       }
  }
  
  static
@@ -677,9 +689,7 @@ void parse_line_directive(void)
                 next_pp_token();
         }
  
-       while(pp_token.type != T_EOF && pp_token.type != '\n') {
-               next_pp_token();
-       }
+       eat_until_newline();
  }
  
  static
@@ -721,7 +731,7 @@ void parse_preprocessor_identifier(void)
  }
  
  static
-void parse_preprocessor_directive(token_t *result_token)
+void parse_preprocessor_directive()
  {
         next_pp_token();
  
@@ -732,9 +742,11 @@ void parse_preprocessor_directive(token_t *result_token)
         case T_INTEGER:
                 parse_line_directive();
                 break;
+       default:
+               parse_error("invalid preprocessor directive");
+               eat_until_newline();
+               break;
         }
-
-       lexer_next_token(result_token);
  }
  
  #define MAYBE_PROLOG                                       \
@@ -769,75 +781,6 @@ void parse_preprocessor_directive(token_t *result_token)
                         return;                                        \
                 )
  
-static
-void eat_whitespace()
-{
-       while(1) {
-               switch(c) {
-               case ' ':
-               case '\t':
-                       next_char();
-                       break;
-
-               case '\r':
-               case '\n':
-                       return;
-
-               case '\\':
-                       next_char();
-                       if(c == '\n') {
-                               next_char();
-                               source_position.linenr++;
-                               break;
-                       }
-
-                       put_back(c);
-                       c = '\\';
-                       return;
-
-               SKIP_TRIGRAPHS(,
-                       return;
-               )
-
-               case '/':
-                       next_char();
-                       while(1) {
-                               switch(c) {
-                               case '*':
-                                       next_char();
-                                       skip_multiline_comment();
-                                       eat_whitespace();
-                                       return;
-                               case '/':
-                                       next_char();
-                                       skip_line_comment();
-                                       eat_whitespace();
-                                       return;
-
-                               SKIP_TRIGRAPHS(
-                                               put_back('?');
-                                       ,
-                                               c = '/';
-                                               return;
-                               )
-
-                               case '\\':
-                                       next_char();
-                                       EAT_NEWLINE(break;)
-                                       /* fallthrough */
-                               default:
-                                       return;
-                               }
-                       }
-                       break;
-
-               default:
-                       return;
-               }
-       }
-}
-
-static
  void lexer_next_preprocessing_token(token_t *token)
  {
         while(1) {
@@ -848,12 +791,6 @@ void lexer_next_preprocessing_token(token_t *token)
                         break;
  
                 MATCH_NEWLINE(
-                       eat_whitespace();
-                       if(c == '#') {
-                               next_char();
-                               parse_preprocessor_directive(token);
-                               return;
-                       }
                         token->type = '\n';
                         return;
                 )
@@ -862,16 +799,7 @@ void lexer_next_preprocessing_token(token_t *token)
                         parse_symbol(token);
                         return;
  
-               case '0':
-               case '1':
-               case '2':
-               case '3':
-               case '4':
-               case '5':
-               case '6':
-               case '7':
-               case '8':
-               case '9':
+               DIGITS
                         parse_number(token);
                         return;
  
@@ -1045,9 +973,20 @@ void lexer_next_preprocessing_token(token_t *token)
  
  void lexer_next_token(token_t *token)
  {
-       do {
+       while(1) {
                 lexer_next_preprocessing_token(token);
-       } while(token->type == '\n');
+               if(token->type == '\n') {
+                       do {
+                               lexer_next_preprocessing_token(token);
+                       } while(token->type == '\n');
+
+                       if(token->type == '#') {
+                               parse_preprocessor_directive();
+                               continue;
+                       }
+               }
+               return;
+       }
  }
  
  void init_lexer(void)
diff --git a/lexer.h b/lexer.h

index d3ffd39..3d8cfde 100644 (file)
--- a/lexer.h
+++ b/lexer.h
@@ -6,4 +6,7 @@
  
  void lexer_next_token(token_t *token);
  
+/* for debugging */
+void lexer_next_preprocessing_token(token_t *token);
+
  #endif
diff --git a/lextest/do_tests.sh b/lextest/do_tests.sh

index 7e1f500..d5b60ac 100755 (executable)
--- a/lextest/do_tests.sh
+++ b/lextest/do_tests.sh
@@ -3,7 +3,7 @@ cd `dirname $0`
  for i in tokenstreams/*; do
         if [ "$i" != "tokenstreams/refresults" ]; then
                 echo "==> Checking $i"
-               ../cparser $i > /tmp/tokenstream
-               diff -u /tmp/tokenstream tokenstreams/refresults/`basename $i`
+               ../cparser --lextest $i > /tmp/tokenstream
+               diff -u tokenstreams/refresults/`basename $i` /tmp/tokenstream
         fi
  done
diff --git a/lextest/tokenstreams/refresults/charconstants b/lextest/tokenstreams/refresults/charconstants

index 6c0a47f..73ab4ed 100644 (file)
--- a/lextest/tokenstreams/refresults/charconstants
+++ b/lextest/tokenstreams/refresults/charconstants
@@ -1,9 +1,19 @@
+'newline'
  integer number 97
+'newline'
  integer number 98
+'newline'
  integer number 99
+'newline'
  integer number 92
+'newline'
  integer number 92
+'newline'
  integer number 92
+'newline'
  integer number 92
+'newline'
  integer number 92
+'newline'
+'newline'
  end of file
diff --git a/lextest/tokenstreams/refresults/comments b/lextest/tokenstreams/refresults/comments

index e6ef830..0f055e3 100644 (file)
--- a/lextest/tokenstreams/refresults/comments
+++ b/lextest/tokenstreams/refresults/comments
@@ -1 +1,9 @@
+'newline'
+'newline'
+'newline'
+'newline'
+'newline'
+'newline'
+'newline'
+'newline'
  end of file
diff --git a/lextest/tokenstreams/refresults/linefeeds b/lextest/tokenstreams/refresults/linefeeds

index be05286..9ea95b2 100644 (file)
--- a/lextest/tokenstreams/refresults/linefeeds
+++ b/lextest/tokenstreams/refresults/linefeeds
@@ -1,5 +1,9 @@
+'newline'
  symbol 'hohohohohohoho'
+'newline'
  symbol 'ho'
+'newline'
  symbol 'ho'
+'newline'
  symbol 'ho'
  end of file
diff --git a/lextest/tokenstreams/refresults/operators b/lextest/tokenstreams/refresults/operators

index b54a266..7594576 100644 (file)
--- a/lextest/tokenstreams/refresults/operators
+++ b/lextest/tokenstreams/refresults/operators
@@ -1,43 +1,74 @@
-++
-++
+'newline'
+'++'
+'++'
  '+'
+'newline'
  '.'
+'newline'
  '.'
  '.'
+'newline'
  '.'
  '.'
-...
-...
+'newline'
+'...'
+'newline'
+'...'
+'newline'
  '.'
+'newline'
  '.'
-...
+'newline'
+'...'
  '.'
+'newline'
  '.'
  '.'
  '.'
-++
-<<=
->>=
+'newline'
+'++'
+'newline'
+'<<='
+'newline'
+'>>='
+'newline'
  '%'
+'newline'
  '}'
-%=
+'newline'
+'%='
+'newline'
  '#'
+'newline'
  '#'
  '%'
-%:%:
-%:%:
+'newline'
+'%:%:'
+'newline'
+'%:%:'
+'newline'
  '<'
-<<
-<<=
-<<
+'newline'
+'<<'
+'newline'
+'<<='
+'newline'
+'<<'
  '<'
  '='
-<<=
+'newline'
+'<<='
+'newline'
  '>'
->>
->>=
->>
+'newline'
+'>>'
+'newline'
+'>>='
+'newline'
+'>>'
  '>'
  '='
->>=
+'newline'
+'>>='
+'newline'
  end of file
diff --git a/lextest/tokenstreams/refresults/strings b/lextest/tokenstreams/refresults/strings

index 7dec63d..486276b 100644 (file)
--- a/lextest/tokenstreams/refresults/strings
+++ b/lextest/tokenstreams/refresults/strings
@@ -1,16 +1,28 @@
+'newline'
  string 'bla?'
+'newline'
  string 'bla??'
+'newline'
  string 'bla???'
+'newline'
  string 'bla
  '
+'newline'
  string 'bla?
  '
+'newline'
  string 'bla??
  '
+'newline'
  string 'bla'
+'newline'
  string 'bla?'
+'newline'
  string 'bla??'
+'newline'
  string 'bla'
+'newline'
  string 'bla
  '
+'newline'
  end of file
diff --git a/lextest/tokenstreams/refresults/symbols b/lextest/tokenstreams/refresults/symbols

index 9d4ad33..b3eafcd 100644 (file)
--- a/lextest/tokenstreams/refresults/symbols
+++ b/lextest/tokenstreams/refresults/symbols
@@ -1,16 +1,25 @@
+'newline'
  symbol 't'
+'newline'
  symbol 'tt'
+'newline'
  symbol 'one'
  symbol 'two'
+'newline'
  symbol 'symbo'
  '?'
  '?'
  '?'
+'newline'
  symbol 'symbo'
  '?'
  '?'
+'newline'
  symbol 'symbo'
  '?'
+'newline'
+'newline'
  symbol 'one'
  symbol 'two'
+'newline'
  end of file
diff --git a/lextest/tokenstreams/refresults/trigraphs b/lextest/tokenstreams/refresults/trigraphs

index 261b5fd..4c07f9f 100644 (file)
--- a/lextest/tokenstreams/refresults/trigraphs
+++ b/lextest/tokenstreams/refresults/trigraphs
@@ -1,18 +1,31 @@
+'newline'
  '?'
+'newline'
  '?'
  '?'
+'newline'
  '?'
  '?'
  '?'
+'newline'
  integer number 35
+'newline'
  '['
+'newline'
  ']'
+'newline'
  '^'
+'newline'
  '{'
+'newline'
  '|'
+'newline'
  '}'
+'newline'
  '~'
+'newline'
  '?'
  '?'
  '>'
+'newline'
  end of file
diff --git a/main.c b/main.c

index eb0c479..17f7cf1 100644 (file)
--- a/main.c
+++ b/main.c
@@ -47,7 +47,7 @@ void compile(const char *fname)
  
         lexer_open_stream(in, fname);
  
-#if 0
+#if 1
         token_t token;
         do {
                 lexer_next_token(&token);
@@ -61,6 +61,27 @@ void compile(const char *fname)
         fclose(in);
  }
  
+static
+void lextest(const char *fname)
+{
+       FILE *in = fopen(fname, "r");
+       if(in == NULL) {
+               fprintf(stderr, "Couldn't open '%s': %s\n", fname, strerror(errno));
+               exit(1);
+       }
+
+       lexer_open_stream(in, fname);
+
+       token_t token;
+       do {
+               lexer_next_preprocessing_token(&token);
+               print_token(stdout, &token);
+               puts("");
+       } while(token.type != T_EOF);
+
+       fclose(in);
+}
+
  int main(int argc, char **argv)
  {
         init_symbol_table();
@@ -71,6 +92,11 @@ int main(int argc, char **argv)
         init_ast();
         init_parser();
  
+       if(argc > 2 && strcmp(argv[1], "--lextest") == 0) {
+               lextest(argv[2]);
+               return 0;
+       }
+
         for(int i = 1; i < argc; ++i) {
                 compile(argv[i]);
         }
diff --git a/parser.c b/parser.c

index 9a094bd..454f058 100644 (file)
--- a/parser.c
+++ b/parser.c
@@ -707,6 +707,7 @@ void parse_declarator(declaration_t *declaration, storage_class_t storage_class,
  
         if(storage_class == STORAGE_CLASS_TYPEDEF) {
                 symbol->ID       = T_TYPENAME;
+               fprintf(stderr, "typedef '%s'\n", symbol->string);
         } else {
                 symbol->ID       = T_IDENTIFIER;
         }
diff --git a/tokens.inc b/tokens.inc

index fc22fad..996666b 100644 (file)
--- a/tokens.inc
+++ b/tokens.inc
@@ -77,6 +77,13 @@ T(HASHHASH,                 "##",)
  
  #define T_LAST_TOKEN  (T_HASHHASH+1)
  
+T(LESSCOLON,                "<:",   = '[')
+T(COLONGREATER,             ":>",   = ']')
+T(LESSPERCENT,              "<%",   = '{')
+T(PERCENTGREATER,           "%>",   = '}')
+T(PERCENTCOLON,             "%:",   = '#')
+T(PERCENTCOLONPERCENTCOLON, "%:%:", = T_HASHHASH)
+
  T(RBRACK,          "[", = '[')
  T(LBRACK,          "]", = ']')
  T(LBRACE,          "(", = '(')
@@ -103,11 +110,4 @@ T(EQUAL,           "=", = '=')
  T(COMMA,           ",", = ',')
  T(HASH,            "#", = '#')
  
-T(LESSCOLON,                "<:",   = '[')
-T(COLONGREATER,             ":>",   = ']')
-T(LESSPERCENT,              "<%",   = '{')
-T(PERCENTGREATER,           "%>",   = '}')
-T(PERCENTCOLON,             "%:",   = '#')
-T(PERCENTCOLONPERCENTCOLON, "%:%:", = T_HASHHASH)
-
  TS(NEWLINE,        "newline", = '\n')
diff --git a/type.c b/type.c

index fade11f..4332aa0 100644 (file)
--- a/type.c
+++ b/type.c
@@ -41,6 +41,7 @@ void print_atomic_type(FILE *out, const atomic_type_t *type)
  
         switch(type->atype) {
         case ATOMIC_TYPE_INVALID:   fputs("INVALIDATOMIC", out); break;
+       case ATOMIC_TYPE_VOID:      fputs("void", out); break;
         case ATOMIC_TYPE_BOOL:      fputs("bool", out); break;
         case ATOMIC_TYPE_CHAR:      fputs("char", out); break;
         case ATOMIC_TYPE_SCHAR:     fputs("signed char", out); break;
author	Matthias Braun <matze@braunis.de>
	Tue, 19 Jun 2007 22:48:08 +0000 (22:48 +0000)
committer	Matthias Braun <matze@braunis.de>
	Tue, 19 Jun 2007 22:48:08 +0000 (22:48 +0000)
lexer.c		patch \| blob \| history
lexer.h		patch \| blob \| history
lextest/do_tests.sh		patch \| blob \| history
lextest/tokenstreams/refresults/charconstants		patch \| blob \| history
lextest/tokenstreams/refresults/comments		patch \| blob \| history
lextest/tokenstreams/refresults/linefeeds		patch \| blob \| history
lextest/tokenstreams/refresults/operators		patch \| blob \| history
lextest/tokenstreams/refresults/strings		patch \| blob \| history
lextest/tokenstreams/refresults/symbols		patch \| blob \| history
lextest/tokenstreams/refresults/trigraphs		patch \| blob \| history
main.c		patch \| blob \| history
parser.c		patch \| blob \| history
tokens.inc		patch \| blob \| history
type.c		patch \| blob \| history