- add support for \e escape

[cparser] / lexer.c
diff --git a/lexer.c b/lexer.c

index faecfe3..f1b00e3 100644 (file)
--- a/lexer.c
+++ b/lexer.c
@@ -56,6 +56,7 @@ static char        buf[1024 + MAX_PUTBACK];
  static const char *bufend;
  static const char *bufpos;
  static strset_t    stringset;
+bool               allow_dollar_in_symbol = true;
  
  /**
   * Prints a parse error message at the current token.
@@ -81,6 +82,11 @@ static inline void next_real_char(void)
  {
         assert(bufpos <= bufend);
         if (bufpos >= bufend) {
+               if (input == NULL) {
+                       c = EOF;
+                       return;
+               }
+
                 size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
                                  input);
                 if(s == 0) {
@@ -189,6 +195,7 @@ end_of_next_char:;
  }
  
  #define SYMBOL_CHARS  \
+       case '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
         case 'a':         \
         case 'b':         \
         case 'c':         \
@@ -276,6 +283,7 @@ static void parse_symbol(void)
                         break;
  
                 default:
+dollar_sign:
                         goto end_symbol;
                 }
         }
@@ -744,7 +752,7 @@ static int parse_escape_sequence(void)
         int ec = c;
         next_char();
  
-       switch(ec) {
+       switch (ec) {
         case '"':  return '"';
         case '\'': return '\'';
         case '\\': return '\\';
@@ -770,6 +778,10 @@ static int parse_escape_sequence(void)
         case EOF:
                 parse_error("reached end of file while parsing escape sequence");
                 return EOF;
+       case 'e':
+               if (c_mode & _GNUC)
+                       return 27;   /* hopefully 27 is ALWAYS the code for ESACAPE */
+               /*fallthrough*/
         default:
                 parse_error("unknown escape sequence");
                 return EOF;
@@ -1222,7 +1234,7 @@ static void parse_line_directive(void)
  /**
   * STDC pragmas.
   */
-typedef enum {
+typedef enum stdc_pragma_kind_t {
         STDC_UNKNOWN,
         STDC_FP_CONTRACT,
         STDC_FENV_ACCESS,
@@ -1232,7 +1244,7 @@ typedef enum {
  /**
   * STDC pragma values.
   */
-typedef enum {
+typedef enum stdc_pragma_value_kind_t {
         STDC_VALUE_UNKNOWN,
         STDC_VALUE_ON,
         STDC_VALUE_OFF,
@@ -1428,16 +1440,7 @@ void lexer_next_preprocessing_token(void)
  
                 case '.':
                         MAYBE_PROLOG
-                               case '0':
-                               case '1':
-                               case '2':
-                               case '3':
-                               case '4':
-                               case '5':
-                               case '6':
-                               case '7':
-                               case '8':
-                               case '9':
+                               DIGITS
                                         put_back(c);
                                         c = '.';
                                         parse_number_dec();
@@ -1568,8 +1571,9 @@ void lexer_next_preprocessing_token(void)
                         return;
  
                 default:
+dollar_sign:
+                       errorf(&lexer_token.source_position, "unknown character '%c' found", c);
                         next_char();
-                       errorf(&lexer_token.source_position, "unknown character '%c' found\n", c);
                         lexer_token.type = T_ERROR;
                         return;
                 }
@@ -1579,15 +1583,13 @@ void lexer_next_preprocessing_token(void)
  void lexer_next_token(void)
  {
         lexer_next_preprocessing_token();
-       if(lexer_token.type != '\n')
-               return;
  
+       while (lexer_token.type == '\n') {
  newline_found:
-       do {
                 lexer_next_preprocessing_token();
-       } while(lexer_token.type == '\n');
+       }
  
-       if(lexer_token.type == '#') {
+       if (lexer_token.type == '#') {
                 parse_preprocessor_directive();
                 goto newline_found;
         }
@@ -1596,6 +1598,7 @@ newline_found:
  void init_lexer(void)
  {
         strset_init(&stringset);
+       symbol_L = symbol_table_insert("L");
  }
  
  void lexer_open_stream(FILE *stream, const char *input_name)
@@ -1604,7 +1607,6 @@ void lexer_open_stream(FILE *stream, const char *input_name)
         lexer_token.source_position.linenr     = 0;
         lexer_token.source_position.input_name = input_name;
  
-       symbol_L = symbol_table_insert("L");
         bufpos = NULL;
         bufend = NULL;
  
@@ -1613,6 +1615,20 @@ void lexer_open_stream(FILE *stream, const char *input_name)
         c = '\n';
  }
  
+void lexer_open_buffer(const char *buffer, size_t len, const char *input_name)
+{
+       input                                  = NULL;
+       lexer_token.source_position.linenr     = 0;
+       lexer_token.source_position.input_name = input_name;
+
+       bufpos = buffer;
+       bufend = buffer + len;
+
+       /* place a virtual \n at the beginning so the lexer knows that we're
+        * at the beginning of a line */
+       c = '\n';
+}
+
  void exit_lexer(void)
  {
         strset_destroy(&stringset);