more fixes, lexer should now be correct

author Matthias Braun <matze@braunis.de>

Sat, 9 Jun 2007 23:26:28 +0000 (23:26 +0000)

committer Matthias Braun <matze@braunis.de>

Sat, 9 Jun 2007 23:26:28 +0000 (23:26 +0000)
author Matthias Braun <matze@braunis.de>
Sat, 9 Jun 2007 23:26:28 +0000 (23:26 +0000)
committer Matthias Braun <matze@braunis.de>
Sat, 9 Jun 2007 23:26:28 +0000 (23:26 +0000)
diff --git a/lexer.c b/lexer.c

index 7e3ee4b..8968131 100644 (file)
--- a/lexer.c
+++ b/lexer.c
@@ -92,6 +92,22 @@ int replace_trigraph(lexer_t *this)
         return 0;
  }
  
+#define SKIP_TRIGRAPHS(no_trigraph_code)       \
+       case '?':                                  \
+               next_char(this);                       \
+               if(this->c != '?') {                   \
+                       put_back(this, this->c);           \
+                       this->c = '?';                     \
+                       no_trigraph_code;                  \
+               }                                      \
+               next_char(this);                       \
+               if(replace_trigraph(this))             \
+                       break;                             \
+               put_back(this, '?');                   \
+               put_back(this, this->c);               \
+               this->c = '?';                         \
+               no_trigraph_code;                      \
+
  static
  void parse_symbol(lexer_t *this, token_t *token)
  {
@@ -318,22 +334,6 @@ int parse_escape_sequence(lexer_t *this)
         }
  }
  
-#define SKIP_TRIGRAPHS(no_trigraph_code)       \
-       case '?':                                  \
-               next_char(this);                       \
-               if(this->c != '?') {                   \
-                       put_back(this, this->c);           \
-                       this->c = '?';                     \
-                       no_trigraph_code;                  \
-               }                                      \
-               next_char(this);                       \
-               if(replace_trigraph(this))             \
-                       break;                             \
-               put_back(this, '?');                   \
-               put_back(this, this->c);               \
-               this->c = '?';                         \
-               no_trigraph_code;                      \
-
  static
  void parse_string_literal(lexer_t *this, token_t *token)
  {
@@ -348,6 +348,7 @@ void parse_string_literal(lexer_t *this, token_t *token)
                 switch(this->c) {
                 SKIP_TRIGRAPHS(
                         obstack_1grow(&symbol_obstack, '?');
+                       next_char(this);
                         break;
                 )
  
@@ -404,28 +405,32 @@ void parse_character_constant(lexer_t *this, token_t *token)
         assert(this->c == '\'');
         next_char(this);
  
+       int found_char = 0;
         while(1) {
                 switch(this->c) {
                 SKIP_TRIGRAPHS(
-                       token->type       = T_INTEGER;
-                       token->v.intvalue = '?';
-                       goto end_of_char_constant;
+                       found_char = '?';
+                       break;
                 )
  
                 case '\\':
                         next_char(this);
                         if(this->c == '\n') {
+                               next_char(this);
                                 this->source_position.linenr++;
                                 break;
                         }
-                       token->type       = T_INTEGER;
-                       token->v.intvalue = parse_escape_sequence(this);
-                       goto end_of_char_constant;
+                       found_char = '\\';
+                       break;
  
                 case '\n':
                         next_char(this);
                         parse_error(this, "newline while parsing character constant");
                         this->source_position.linenr++;
+                       break;
+
+               case '\'':
+                       next_char(this);
                         goto end_of_char_constant;
  
                 case EOF:
@@ -434,19 +439,21 @@ void parse_character_constant(lexer_t *this, token_t *token)
                         return;
  
                 default:
-                       token->type       = T_INTEGER;
-                       token->v.intvalue = this->c;
-                       next_char(this);
-                       goto end_of_char_constant;
+                       if(found_char != 0) {
+                               parse_error(this, "more than 1 characters in character "
+                                           "constant");
+                               goto end_of_char_constant;
+                       } else {
+                               found_char = this->c;
+                               next_char(this);
+                       }
+                       break;
                 }
         }
  
  end_of_char_constant:
-       if(this->c != '\'') {
-               parse_error(this, "multibyte character constant");
-       } else {
-               next_char(this);
-       }
+       token->type       = T_INTEGER;
+       token->v.intvalue = found_char;
  }
  
  static
diff --git a/lextest/tokenstreams/charconstants b/lextest/tokenstreams/charconstants

index b5ef615..b25762a 100644 (file)
--- a/lextest/tokenstreams/charconstants
+++ b/lextest/tokenstreams/charconstants
@@ -1,5 +1,11 @@
  'a'
  'b'
  '??/
-z'
+c'
+'d\
+??/
+\'
  '??/\'
+'\\'
+'??/??/'
+'\??/'
diff --git a/lextest/tokenstreams/charconstants.reference b/lextest/tokenstreams/charconstants.reference

new file mode 100644 (file)

index 0000000..6c0a47f
--- /dev/null
+++ b/lextest/tokenstreams/charconstants.reference
@@ -0,0 +1,9 @@
+integer number 97
+integer number 98
+integer number 99
+integer number 92
+integer number 92
+integer number 92
+integer number 92
+integer number 92
+end of file
diff --git a/lextest/tokenstreams/strings b/lextest/tokenstreams/strings

new file mode 100644 (file)

index 0000000..22c8538
--- /dev/null
+++ b/lextest/tokenstreams/strings
@@ -0,0 +1,15 @@
+"bla?"
+"bla??"
+"bla???"
+"bla??/n"
+"bla???/n"
+"bla????/n"
+"bla??/
+"
+"bla???/
+"
+"bla????/
+"
+"bla\
+"
+"bla\n"
diff --git a/lextest/tokenstreams/strings.reference b/lextest/tokenstreams/strings.reference

new file mode 100644 (file)

index 0000000..7dec63d
--- /dev/null
+++ b/lextest/tokenstreams/strings.reference
@@ -0,0 +1,16 @@
+string 'bla?'
+string 'bla??'
+string 'bla???'
+string 'bla
+'
+string 'bla?
+'
+string 'bla??
+'
+string 'bla'
+string 'bla?'
+string 'bla??'
+string 'bla'
+string 'bla
+'
+end of file
diff --git a/lextest/tokenstreams/stringtrigraphs b/lextest/tokenstreams/stringtrigraphs

deleted file mode 100644 (file)

index 726b66d..0000000
--- a/lextest/tokenstreams/stringtrigraphs
+++ /dev/null
@@ -1,20 +0,0 @@
-"bla?"
-"bla??"
-"bla???"
-"bla??/n"
-"bla???/n"
-"bla????/n"
-"bla??/
-"
-"bla???/
-"
-"bla????/
-"
-"bla\
-"
-"bla\n"
-'a'
-'\
-??/
-\
-z'
diff --git a/lextest/tokenstreams/t2 b/lextest/tokenstreams/t2

deleted file mode 100644 (file)

index a1d44d5..0000000
--- a/lextest/tokenstreams/t2
+++ /dev/null
@@ -1 +0,0 @@
-??
diff --git a/lextest/tokenstreams/t3 b/lextest/tokenstreams/t3

deleted file mode 100644 (file)

index 6272e43..0000000
--- a/lextest/tokenstreams/t3
+++ /dev/null
@@ -1,3 +0,0 @@
-"??? ?? ?"
-'?'
-'??='
diff --git a/lextest/tokenstreams/trigraphs b/lextest/tokenstreams/trigraphs

new file mode 100644 (file)

index 0000000..0ad07b8
--- /dev/null
+++ b/lextest/tokenstreams/trigraphs
@@ -0,0 +1,13 @@
+?
+??
+???
+??=
+??(
+??/
+??)
+??'
+??<
+??!
+??>
+??-
+?/**/?>
diff --git a/lextest/tokenstreams/trigraphs.reference b/lextest/tokenstreams/trigraphs.reference

new file mode 100644 (file)

index 0000000..8089fa9
--- /dev/null
+++ b/lextest/tokenstreams/trigraphs.reference
@@ -0,0 +1,18 @@
+'?'
+'?'
+'?'
+'?'
+'?'
+'?'
+'#'
+'['
+']'
+'^'
+'{'
+'|'
+'}'
+'~'
+'?'
+'?'
+'>'
+end of file
author	Matthias Braun <matze@braunis.de>
	Sat, 9 Jun 2007 23:26:28 +0000 (23:26 +0000)
committer	Matthias Braun <matze@braunis.de>
	Sat, 9 Jun 2007 23:26:28 +0000 (23:26 +0000)
lexer.c		patch \| blob \| history
lextest/tokenstreams/charconstants		patch \| blob \| history
lextest/tokenstreams/charconstants.reference	[new file with mode: 0644]	patch \| blob
lextest/tokenstreams/strings	[new file with mode: 0644]	patch \| blob
lextest/tokenstreams/strings.reference	[new file with mode: 0644]	patch \| blob
lextest/tokenstreams/stringtrigraphs	[deleted file]	patch \| blob \| history
lextest/tokenstreams/t2	[deleted file]	patch \| blob \| history
lextest/tokenstreams/t3	[deleted file]	patch \| blob \| history
lextest/tokenstreams/trigraphs	[new file with mode: 0644]	patch \| blob
lextest/tokenstreams/trigraphs.reference	[new file with mode: 0644]	patch \| blob