recognizre preprocessor hashs
authorMatthias Braun <matze@braunis.de>
Sun, 10 Jun 2007 16:33:32 +0000 (16:33 +0000)
committerMatthias Braun <matze@braunis.de>
Sun, 10 Jun 2007 16:33:32 +0000 (16:33 +0000)
[r18325]

lexer.c
lextest/preprocessor/test [new file with mode: 0644]
preprocessor_tokens.inc [deleted file]
token.c
token_t.h
tokens.inc

diff --git a/lexer.c b/lexer.c
index 715c851..04060b9 100644 (file)
--- a/lexer.c
+++ b/lexer.c
@@ -11,7 +11,7 @@
 #include <ctype.h>
 
 //#define DEBUG_CHARS
-#define MAX_PUTBACK 2
+#define MAX_PUTBACK 3
 
 static
 void error_prefix_at(lexer_t *this, const char *input_name, unsigned linenr)
@@ -67,6 +67,7 @@ void put_back(lexer_t *this, int c)
 #endif
 }
 
+
 static
 int replace_trigraph(lexer_t *this)
 {
@@ -92,17 +93,20 @@ int replace_trigraph(lexer_t *this)
        return 0;
 }
 
-#define SKIP_TRIGRAPHS(no_trigraph_code)       \
+#define SKIP_TRIGRAPHS(custom_putback, no_trigraph_code) \
        case '?':                                  \
                next_char(this);                       \
                if(this->c != '?') {                   \
+                       custom_putback;                    \
                        put_back(this, this->c);           \
                        this->c = '?';                     \
                        no_trigraph_code;                  \
                }                                      \
                next_char(this);                       \
-               if(replace_trigraph(this))             \
+               if(replace_trigraph(this)) {           \
                        break;                             \
+               }                                      \
+               custom_putback;                        \
                put_back(this, '?');                   \
                put_back(this, this->c);               \
                this->c = '?';                         \
@@ -354,7 +358,7 @@ void parse_string_literal(lexer_t *this, token_t *token)
 
        while(1) {
                switch(this->c) {
-               SKIP_TRIGRAPHS(
+               SKIP_TRIGRAPHS(,
                        obstack_1grow(&symbol_obstack, '?');
                        next_char(this);
                        break;
@@ -425,7 +429,7 @@ void parse_character_constant(lexer_t *this, token_t *token)
        int found_char = 0;
        while(1) {
                switch(this->c) {
-               SKIP_TRIGRAPHS(
+               SKIP_TRIGRAPHS(,
                        found_char = '?';
                        break;
                )
@@ -566,11 +570,14 @@ void skip_line_comment(lexer_t *this)
 static
 void parse_preprocessor_directive(lexer_t *this, token_t *result_token)
 {
-       (void) result_token;
-       /* skip whitespaces */
-       while(this->c == ' ' || this->c == '\t' || this->c == '\r') {
+       printf("PP: ");
+       while(this->c != '\n') {
+               printf("%c", this->c);
                next_char(this);
        }
+       printf("\n");
+
+       lexer_next_token(this, result_token);
 }
 
 void preprocessor_next_token(lexer_t *this, token_t *token)
@@ -588,10 +595,108 @@ void preprocessor_next_token(lexer_t *this, token_t *token)
        }
 }
 
-void lexer_next_token(lexer_t *this, token_t *token)
+#define MAYBE_PROLOG                                       \
+                       next_char(this);                               \
+                       while(1) {                                     \
+                               switch(this->c) {
+
+#define MAYBE(ch, set_type)                                \
+                               case ch:                                   \
+                                       next_char(this);                       \
+                                       token->type = set_type;                \
+                                       return;
+
+#define ELSE_CODE(code)                                    \
+                               SKIP_TRIGRAPHS(,                           \
+                                       code;                                  \
+                               )                                          \
+                                                                                                                  \
+                               case '\\':                                 \
+                                       next_char(this);                       \
+                                       EAT_NEWLINE(break;)                    \
+                                       /* fallthrough */                      \
+                               default:                                   \
+                                       code;                                  \
+                               }                                          \
+                       } /* end of while(1) */                        \
+                       break;
+
+#define ELSE(set_type)                                     \
+               ELSE_CODE(                                         \
+                       token->type = set_type;                        \
+                       return;                                        \
+               )
+
+static
+void eat_whitespace(lexer_t *this)
 {
-       int line_begin = 0;
+       while(1) {
+               switch(this->c) {
+               case ' ':
+               case '\t':
+                       next_char(this);
+                       break;
+
+               MATCH_NEWLINE(
+                       break;
+               )
+
+               case '\\':
+                       next_char(this);
+                       if(this->c == '\n') {
+                               next_char(this);
+                               this->source_position.linenr++;
+                               break;
+                       }
+
+                       put_back(this, this->c);
+                       this->c = '\\';
+                       return;
+
+               SKIP_TRIGRAPHS(,
+                       return;
+               )
+
+               case '/':
+                       next_char(this);
+                       while(1) {
+                               switch(this->c) {
+                               case '*':
+                                       next_char(this);
+                                       skip_multiline_comment(this);
+                                       eat_whitespace(this);
+                                       return;
+                               case '/':
+                                       next_char(this);
+                                       skip_line_comment(this);
+                                       eat_whitespace(this);
+                                       return;
+
+                               SKIP_TRIGRAPHS(
+                                               put_back(this, '?');
+                                       ,
+                                               this->c = '/';
+                                               return;
+                               )
+
+                               case '\\':
+                                       next_char(this);
+                                       EAT_NEWLINE(break;)
+                                       /* fallthrough */
+                               default:
+                                       return;
+                               }
+                       }
+                       break;
+
+               default:
+                       return;
+               }
+       }
+}
 
+void lexer_next_token(lexer_t *this, token_t *token)
+{
        while(1) {
                switch(this->c) {
                case ' ':
@@ -599,7 +704,15 @@ void lexer_next_token(lexer_t *this, token_t *token)
                        next_char(this);
                        break;
 
-               MATCH_NEWLINE(break;)
+               MATCH_NEWLINE(
+                       eat_whitespace(this);
+                       if(this->c == '#') {
+                               next_char(this);
+                               parse_preprocessor_directive(this, token);
+                               return;
+                       }
+                       break;
+               )
 
                case 'A' ... 'Z':
                case 'a' ... 'z':
@@ -631,42 +744,6 @@ void lexer_next_token(lexer_t *this, token_t *token)
                        }
                        return;
 
-#define MAYBE_PROLOG                                       \
-                       next_char(this);                               \
-                       while(1) {                                     \
-                               switch(this->c) {
-
-#define MAYBE(ch, set_type)                                \
-                               case ch:                                   \
-                                       next_char(this);                       \
-                                       token->type = set_type;                \
-                                       return;
-
-#define ELSE_CODE(code)                                    \
-                               SKIP_TRIGRAPHS(                            \
-                                       code;                                  \
-                               )                                          \
-                                                                                                                  \
-                               case '\\':                                 \
-                                       next_char(this);                       \
-                                       if(this->c == '\n') {                  \
-                                               next_char(this);                   \
-                                               this->source_position.linenr++;    \
-                                               break;                             \
-                                       }                                      \
-                                       /* fallthrough */                      \
-                               default:                                   \
-                                       code;                                  \
-                               }                                          \
-                       } /* end of while(1) */                        \
-                       break;
-
-#define ELSE(set_type)                                     \
-               ELSE_CODE(                                         \
-                       token->type = set_type;                        \
-                       return;                                        \
-               )
-
                case '.':
                        MAYBE_PROLOG
                                case '.':
@@ -769,15 +846,7 @@ void lexer_next_token(lexer_t *this, token_t *token)
                case '#':
                        MAYBE_PROLOG
                        MAYBE('#', T_HASHHASH)
-                       ELSE_CODE(
-                               if(line_begin) {
-                                       parse_preprocessor_directive(this, token);
-                                       return;
-                               } else {
-                                       token->type = '#';
-                                       return;
-                               }
-                       )
+                       ELSE('#')
 
                case '?':
                        next_char(this);
diff --git a/lextest/preprocessor/test b/lextest/preprocessor/test
new file mode 100644 (file)
index 0000000..8de3a5d
--- /dev/null
@@ -0,0 +1,6 @@
+#define blo bla
+
+  #define b1 b2
+      /* /* */ /??/
+* jo */ #define b2 b3
+        /??a  #define b3 b4
diff --git a/preprocessor_tokens.inc b/preprocessor_tokens.inc
deleted file mode 100644 (file)
index f1760ca..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef TS
-#define TS(x,str,val)
-#endif
-
-TS(IDENTIFIER,     "identifier", = 256)
-TS(INTEGER,        "integer number",)
-TS(STRING_LITERAL, "string literal",)
-
-#define S(x)   T(x,#x,)
-S(include)
-S(define)
-S(undef)
-S(line)
-S(error)
-S(pragma)
-S(if)
-S(else)
-S(elif)
-S(endif)
-S(ifdef)
-S(ifndef)
-#undef S
-
-T(DOTDOTDOT,      "...",)
-
-#define T_LAST_TOKEN  (T_DOTDOTDOT+1)
-
-T(LPAREN,          "(", = '(')
-T(RPAREN,          ")", = ')')
diff --git a/token.c b/token.c
index 50e166a..eaa1dec 100644 (file)
--- a/token.c
+++ b/token.c
@@ -47,6 +47,10 @@ void print_token_type(FILE *f, token_type_t token_type)
                fputs("end of file", f);
                return;
        }
+       if(token_type == T_ERROR) {
+               fputs("error", f);
+               return;
+       }
 
        int token_symbols_len = T_LAST_TOKEN;
        if(token_type < 0 || token_type >= token_symbols_len) {
index 21669af..be48841 100644 (file)
--- a/token_t.h
+++ b/token_t.h
@@ -16,17 +16,6 @@ typedef enum {
        T_ERROR = -2
 } token_type_t;
 
-typedef enum {
-#define T(x,str,val) TP_##x val,
-#define TS(x,str,val) TP_##x val,
-#include "tokens.inc"
-#undef TS
-#undef T
-
-       TP_EOF   = T_EOF,
-       TP_ERROR = T_ERROR
-} preprocessor_token_type_t;
-
 typedef struct {
        int type;
        union {
index f4f777a..9c40b57 100644 (file)
@@ -5,6 +5,8 @@
 TS(IDENTIFIER,     "identifier", = 256)
 TS(INTEGER,        "integer number",)
 TS(STRING_LITERAL, "string literal",)
+TS(PP_NEWLINE,     "praeprocessor newline",)
+TS(PP_HEADER_NAME, "praeprocessor header name",)
 
 #define S(x)   T(x,#x,)
 S(auto)