- add support for \e escape

[cparser] / lexer.c
diff --git a/lexer.c b/lexer.c

index 9643ec5..f1b00e3 100644 (file)
--- a/lexer.c
+++ b/lexer.c
@@ -56,6 +56,7 @@ static char        buf[1024 + MAX_PUTBACK];
  static const char *bufend;
  static const char *bufpos;
  static strset_t    stringset;
+bool               allow_dollar_in_symbol = true;
  
  /**
   * Prints a parse error message at the current token.
@@ -64,7 +65,7 @@ static strset_t    stringset;
   */
  static void parse_error(const char *msg)
  {
-       errorf(lexer_token.source_position,  "%s", msg);
+       errorf(&lexer_token.source_position,  "%s", msg);
  }
  
  /**
@@ -74,13 +75,18 @@ static void parse_error(const char *msg)
   */
  static NORETURN internal_error(const char *msg)
  {
-       internal_errorf(lexer_token.source_position,  "%s", msg);
+       internal_errorf(&lexer_token.source_position,  "%s", msg);
  }
  
  static inline void next_real_char(void)
  {
         assert(bufpos <= bufend);
         if (bufpos >= bufend) {
+               if (input == NULL) {
+                       c = EOF;
+                       return;
+               }
+
                 size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
                                  input);
                 if(s == 0) {
@@ -189,6 +195,7 @@ end_of_next_char:;
  }
  
  #define SYMBOL_CHARS  \
+       case '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
         case 'a':         \
         case 'b':         \
         case 'c':         \
@@ -276,6 +283,7 @@ static void parse_symbol(void)
                         break;
  
                 default:
+dollar_sign:
                         goto end_symbol;
                 }
         }
@@ -343,7 +351,7 @@ static void parse_integer_suffix(bool is_oct_hex)
                         if(v >= TARGET_LONG_MIN && v <= TARGET_LONG_MAX) {
                                 lexer_token.datatype = type_long;
                                 return;
-                       } else if(is_oct_hex && v >= 0 && v <= TARGET_ULONG_MAX) {
+                       } else if(is_oct_hex && v >= 0 && (unsigned long long)v <= (unsigned long long)TARGET_ULONG_MAX) {
                                 lexer_token.datatype = type_unsigned_long;
                                 return;
                         }
@@ -744,7 +752,7 @@ static int parse_escape_sequence(void)
         int ec = c;
         next_char();
  
-       switch(ec) {
+       switch (ec) {
         case '"':  return '"';
         case '\'': return '\'';
         case '\\': return '\\';
@@ -770,6 +778,10 @@ static int parse_escape_sequence(void)
         case EOF:
                 parse_error("reached end of file while parsing escape sequence");
                 return EOF;
+       case 'e':
+               if (c_mode & _GNUC)
+                       return 27;   /* hopefully 27 is ALWAYS the code for ESACAPE */
+               /*fallthrough*/
         default:
                 parse_error("unknown escape sequence");
                 return EOF;
@@ -872,7 +884,7 @@ static void parse_string_literal(void)
                         source_position_t source_position;
                         source_position.input_name = lexer_token.source_position.input_name;
                         source_position.linenr     = start_linenr;
-                       errorf(source_position, "string has no end");
+                       errorf(&source_position, "string has no end");
                         lexer_token.type = T_ERROR;
                         return;
                 }
@@ -941,7 +953,7 @@ static void parse_wide_character_constant(void)
                 case EOF: {
                         source_position_t source_position = lexer_token.source_position;
                         source_position.linenr = start_linenr;
-                       errorf(source_position, "EOF while parsing character constant");
+                       errorf(&source_position, "EOF while parsing character constant");
                         lexer_token.type = T_ERROR;
                         return;
                 }
@@ -957,6 +969,9 @@ static void parse_wide_character_constant(void)
  
  end_of_wide_char_constant:;
         size_t             size   = (size_t) obstack_object_size(&symbol_obstack);
+       assert(size % sizeof(wchar_rep_t) == 0);
+       size /= sizeof(wchar_rep_t);
+
         const wchar_rep_t *string = obstack_finish(&symbol_obstack);
  
         lexer_token.type                = T_WIDE_CHARACTER_CONSTANT;
@@ -987,7 +1002,7 @@ static void parse_wide_string_literal(void)
                         source_position_t source_position;
                         source_position.input_name = lexer_token.source_position.input_name;
                         source_position.linenr     = start_linenr;
-                       errorf(source_position, "string has no end");
+                       errorf(&source_position, "string has no end");
                         lexer_token.type = T_ERROR;
                         return;
                 }
@@ -1060,7 +1075,7 @@ static void parse_character_constant(void)
                         source_position_t source_position;
                         source_position.input_name = lexer_token.source_position.input_name;
                         source_position.linenr     = start_linenr;
-                       errorf(source_position, "EOF while parsing character constant");
+                       errorf(&source_position, "EOF while parsing character constant");
                         lexer_token.type = T_ERROR;
                         return;
                 }
@@ -1112,7 +1127,7 @@ static void skip_multiline_comment(void)
                         source_position_t source_position;
                         source_position.input_name = lexer_token.source_position.input_name;
                         source_position.linenr     = start_linenr;
-                       errorf(source_position, "at end of file while looking for comment end");
+                       errorf(&source_position, "at end of file while looking for comment end");
                         return;
                 }
  
@@ -1219,7 +1234,7 @@ static void parse_line_directive(void)
  /**
   * STDC pragmas.
   */
-typedef enum {
+typedef enum stdc_pragma_kind_t {
         STDC_UNKNOWN,
         STDC_FP_CONTRACT,
         STDC_FENV_ACCESS,
@@ -1229,7 +1244,7 @@ typedef enum {
  /**
   * STDC pragma values.
   */
-typedef enum {
+typedef enum stdc_pragma_value_kind_t {
         STDC_VALUE_UNKNOWN,
         STDC_VALUE_ON,
         STDC_VALUE_OFF,
@@ -1281,7 +1296,7 @@ static void parse_pragma(void) {
                                 if (value != STDC_VALUE_UNKNOWN) {
                                         unknown_pragma = false;
                                 } else {
-                                       errorf(pp_token.source_position, "bad STDC pragma argument");
+                                       errorf(&pp_token.source_position, "bad STDC pragma argument");
                                 }
                         }
                 }
@@ -1290,7 +1305,7 @@ static void parse_pragma(void) {
         }
         eat_until_newline();
         if (unknown_pragma && warning.unknown_pragmas) {
-               warningf(pp_token.source_position, "encountered unknown #pragma");
+               warningf(&pp_token.source_position, "encountered unknown #pragma");
         }
  }
  
@@ -1373,7 +1388,7 @@ static void parse_preprocessor_directive(void)
  
  #define ELSE_CODE(code)                                    \
                                 default:                                   \
-                                       code;                                  \
+                                       code                                   \
                                 }                                          \
                         } /* end of while(1) */                        \
                         break;
@@ -1425,16 +1440,7 @@ void lexer_next_preprocessing_token(void)
  
                 case '.':
                         MAYBE_PROLOG
-                               case '0':
-                               case '1':
-                               case '2':
-                               case '3':
-                               case '4':
-                               case '5':
-                               case '6':
-                               case '7':
-                               case '8':
-                               case '9':
+                               DIGITS
                                         put_back(c);
                                         c = '.';
                                         parse_number_dec();
@@ -1565,8 +1571,9 @@ void lexer_next_preprocessing_token(void)
                         return;
  
                 default:
+dollar_sign:
+                       errorf(&lexer_token.source_position, "unknown character '%c' found", c);
                         next_char();
-                       errorf(lexer_token.source_position, "unknown character '%c' found\n", c);
                         lexer_token.type = T_ERROR;
                         return;
                 }
@@ -1576,15 +1583,13 @@ void lexer_next_preprocessing_token(void)
  void lexer_next_token(void)
  {
         lexer_next_preprocessing_token();
-       if(lexer_token.type != '\n')
-               return;
  
+       while (lexer_token.type == '\n') {
  newline_found:
-       do {
                 lexer_next_preprocessing_token();
-       } while(lexer_token.type == '\n');
+       }
  
-       if(lexer_token.type == '#') {
+       if (lexer_token.type == '#') {
                 parse_preprocessor_directive();
                 goto newline_found;
         }
@@ -1593,6 +1598,7 @@ newline_found:
  void init_lexer(void)
  {
         strset_init(&stringset);
+       symbol_L = symbol_table_insert("L");
  }
  
  void lexer_open_stream(FILE *stream, const char *input_name)
@@ -1601,7 +1607,6 @@ void lexer_open_stream(FILE *stream, const char *input_name)
         lexer_token.source_position.linenr     = 0;
         lexer_token.source_position.input_name = input_name;
  
-       symbol_L = symbol_table_insert("L");
         bufpos = NULL;
         bufend = NULL;
  
@@ -1610,6 +1615,20 @@ void lexer_open_stream(FILE *stream, const char *input_name)
         c = '\n';
  }
  
+void lexer_open_buffer(const char *buffer, size_t len, const char *input_name)
+{
+       input                                  = NULL;
+       lexer_token.source_position.linenr     = 0;
+       lexer_token.source_position.input_name = input_name;
+
+       bufpos = buffer;
+       bufend = buffer + len;
+
+       /* place a virtual \n at the beginning so the lexer knows that we're
+        * at the beginning of a line */
+       c = '\n';
+}
+
  void exit_lexer(void)
  {
         strset_destroy(&stringset);