more fixes, lexer should now be correct
authorMatthias Braun <matze@braunis.de>
Sat, 9 Jun 2007 23:26:28 +0000 (23:26 +0000)
committerMatthias Braun <matze@braunis.de>
Sat, 9 Jun 2007 23:26:28 +0000 (23:26 +0000)
[r18319]

lexer.c
lextest/tokenstreams/charconstants
lextest/tokenstreams/charconstants.reference [new file with mode: 0644]
lextest/tokenstreams/strings [new file with mode: 0644]
lextest/tokenstreams/strings.reference [new file with mode: 0644]
lextest/tokenstreams/stringtrigraphs [deleted file]
lextest/tokenstreams/t2 [deleted file]
lextest/tokenstreams/t3 [deleted file]
lextest/tokenstreams/trigraphs [new file with mode: 0644]
lextest/tokenstreams/trigraphs.reference [new file with mode: 0644]

diff --git a/lexer.c b/lexer.c
index 7e3ee4b..8968131 100644 (file)
--- a/lexer.c
+++ b/lexer.c
@@ -92,6 +92,22 @@ int replace_trigraph(lexer_t *this)
        return 0;
 }
 
+#define SKIP_TRIGRAPHS(no_trigraph_code)       \
+       case '?':                                  \
+               next_char(this);                       \
+               if(this->c != '?') {                   \
+                       put_back(this, this->c);           \
+                       this->c = '?';                     \
+                       no_trigraph_code;                  \
+               }                                      \
+               next_char(this);                       \
+               if(replace_trigraph(this))             \
+                       break;                             \
+               put_back(this, '?');                   \
+               put_back(this, this->c);               \
+               this->c = '?';                         \
+               no_trigraph_code;                      \
+
 static
 void parse_symbol(lexer_t *this, token_t *token)
 {
@@ -318,22 +334,6 @@ int parse_escape_sequence(lexer_t *this)
        }
 }
 
-#define SKIP_TRIGRAPHS(no_trigraph_code)       \
-       case '?':                                  \
-               next_char(this);                       \
-               if(this->c != '?') {                   \
-                       put_back(this, this->c);           \
-                       this->c = '?';                     \
-                       no_trigraph_code;                  \
-               }                                      \
-               next_char(this);                       \
-               if(replace_trigraph(this))             \
-                       break;                             \
-               put_back(this, '?');                   \
-               put_back(this, this->c);               \
-               this->c = '?';                         \
-               no_trigraph_code;                      \
-
 static
 void parse_string_literal(lexer_t *this, token_t *token)
 {
@@ -348,6 +348,7 @@ void parse_string_literal(lexer_t *this, token_t *token)
                switch(this->c) {
                SKIP_TRIGRAPHS(
                        obstack_1grow(&symbol_obstack, '?');
+                       next_char(this);
                        break;
                )
 
@@ -404,28 +405,32 @@ void parse_character_constant(lexer_t *this, token_t *token)
        assert(this->c == '\'');
        next_char(this);
 
+       int found_char = 0;
        while(1) {
                switch(this->c) {
                SKIP_TRIGRAPHS(
-                       token->type       = T_INTEGER;
-                       token->v.intvalue = '?';
-                       goto end_of_char_constant;
+                       found_char = '?';
+                       break;
                )
 
                case '\\':
                        next_char(this);
                        if(this->c == '\n') {
+                               next_char(this);
                                this->source_position.linenr++;
                                break;
                        }
-                       token->type       = T_INTEGER;
-                       token->v.intvalue = parse_escape_sequence(this);
-                       goto end_of_char_constant;
+                       found_char = '\\';
+                       break;
 
                case '\n':
                        next_char(this);
                        parse_error(this, "newline while parsing character constant");
                        this->source_position.linenr++;
+                       break;
+
+               case '\'':
+                       next_char(this);
                        goto end_of_char_constant;
 
                case EOF:
@@ -434,19 +439,21 @@ void parse_character_constant(lexer_t *this, token_t *token)
                        return;
 
                default:
-                       token->type       = T_INTEGER;
-                       token->v.intvalue = this->c;
-                       next_char(this);
-                       goto end_of_char_constant;
+                       if(found_char != 0) {
+                               parse_error(this, "more than 1 characters in character "
+                                           "constant");
+                               goto end_of_char_constant;
+                       } else {
+                               found_char = this->c;
+                               next_char(this);
+                       }
+                       break;
                }
        }
 
 end_of_char_constant:
-       if(this->c != '\'') {
-               parse_error(this, "multibyte character constant");
-       } else {
-               next_char(this);
-       }
+       token->type       = T_INTEGER;
+       token->v.intvalue = found_char;
 }
 
 static
index b5ef615..b25762a 100644 (file)
@@ -1,5 +1,11 @@
 'a'
 'b'
 '??/
-z'
+c'
+'d\
+??/
+\'
 '??/\'
+'\\'
+'??/??/'
+'\??/'
diff --git a/lextest/tokenstreams/charconstants.reference b/lextest/tokenstreams/charconstants.reference
new file mode 100644 (file)
index 0000000..6c0a47f
--- /dev/null
@@ -0,0 +1,9 @@
+integer number 97
+integer number 98
+integer number 99
+integer number 92
+integer number 92
+integer number 92
+integer number 92
+integer number 92
+end of file
diff --git a/lextest/tokenstreams/strings b/lextest/tokenstreams/strings
new file mode 100644 (file)
index 0000000..22c8538
--- /dev/null
@@ -0,0 +1,15 @@
+"bla?"
+"bla??"
+"bla???"
+"bla??/n"
+"bla???/n"
+"bla????/n"
+"bla??/
+"
+"bla???/
+"
+"bla????/
+"
+"bla\
+"
+"bla\n"
diff --git a/lextest/tokenstreams/strings.reference b/lextest/tokenstreams/strings.reference
new file mode 100644 (file)
index 0000000..7dec63d
--- /dev/null
@@ -0,0 +1,16 @@
+string 'bla?'
+string 'bla??'
+string 'bla???'
+string 'bla
+'
+string 'bla?
+'
+string 'bla??
+'
+string 'bla'
+string 'bla?'
+string 'bla??'
+string 'bla'
+string 'bla
+'
+end of file
diff --git a/lextest/tokenstreams/stringtrigraphs b/lextest/tokenstreams/stringtrigraphs
deleted file mode 100644 (file)
index 726b66d..0000000
+++ /dev/null
@@ -1,20 +0,0 @@
-"bla?"
-"bla??"
-"bla???"
-"bla??/n"
-"bla???/n"
-"bla????/n"
-"bla??/
-"
-"bla???/
-"
-"bla????/
-"
-"bla\
-"
-"bla\n"
-'a'
-'\
-??/
-\
-z'
diff --git a/lextest/tokenstreams/t2 b/lextest/tokenstreams/t2
deleted file mode 100644 (file)
index a1d44d5..0000000
+++ /dev/null
@@ -1 +0,0 @@
-??
diff --git a/lextest/tokenstreams/t3 b/lextest/tokenstreams/t3
deleted file mode 100644 (file)
index 6272e43..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-"??? ?? ?"
-'?'
-'??='
diff --git a/lextest/tokenstreams/trigraphs b/lextest/tokenstreams/trigraphs
new file mode 100644 (file)
index 0000000..0ad07b8
--- /dev/null
@@ -0,0 +1,13 @@
+?
+??
+???
+??=
+??(
+??/
+??)
+??'
+??<
+??!
+??>
+??-
+?/**/?>
diff --git a/lextest/tokenstreams/trigraphs.reference b/lextest/tokenstreams/trigraphs.reference
new file mode 100644 (file)
index 0000000..8089fa9
--- /dev/null
@@ -0,0 +1,18 @@
+'?'
+'?'
+'?'
+'?'
+'?'
+'?'
+'#'
+'['
+']'
+'^'
+'{'
+'|'
+'}'
+'~'
+'?'
+'?'
+'>'
+end of file