- add support for \e escape
[cparser] / lexer.c
diff --git a/lexer.c b/lexer.c
index faecfe3..f1b00e3 100644 (file)
--- a/lexer.c
+++ b/lexer.c
@@ -56,6 +56,7 @@ static char        buf[1024 + MAX_PUTBACK];
 static const char *bufend;
 static const char *bufpos;
 static strset_t    stringset;
+bool               allow_dollar_in_symbol = true;
 
 /**
  * Prints a parse error message at the current token.
@@ -81,6 +82,11 @@ static inline void next_real_char(void)
 {
        assert(bufpos <= bufend);
        if (bufpos >= bufend) {
+               if (input == NULL) {
+                       c = EOF;
+                       return;
+               }
+
                size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK,
                                 input);
                if(s == 0) {
@@ -189,6 +195,7 @@ end_of_next_char:;
 }
 
 #define SYMBOL_CHARS  \
+       case '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
        case 'a':         \
        case 'b':         \
        case 'c':         \
@@ -276,6 +283,7 @@ static void parse_symbol(void)
                        break;
 
                default:
+dollar_sign:
                        goto end_symbol;
                }
        }
@@ -744,7 +752,7 @@ static int parse_escape_sequence(void)
        int ec = c;
        next_char();
 
-       switch(ec) {
+       switch (ec) {
        case '"':  return '"';
        case '\'': return '\'';
        case '\\': return '\\';
@@ -770,6 +778,10 @@ static int parse_escape_sequence(void)
        case EOF:
                parse_error("reached end of file while parsing escape sequence");
                return EOF;
+       case 'e':
+               if (c_mode & _GNUC)
+                       return 27;   /* hopefully 27 is ALWAYS the code for ESACAPE */
+               /*fallthrough*/
        default:
                parse_error("unknown escape sequence");
                return EOF;
@@ -1222,7 +1234,7 @@ static void parse_line_directive(void)
 /**
  * STDC pragmas.
  */
-typedef enum {
+typedef enum stdc_pragma_kind_t {
        STDC_UNKNOWN,
        STDC_FP_CONTRACT,
        STDC_FENV_ACCESS,
@@ -1232,7 +1244,7 @@ typedef enum {
 /**
  * STDC pragma values.
  */
-typedef enum {
+typedef enum stdc_pragma_value_kind_t {
        STDC_VALUE_UNKNOWN,
        STDC_VALUE_ON,
        STDC_VALUE_OFF,
@@ -1428,16 +1440,7 @@ void lexer_next_preprocessing_token(void)
 
                case '.':
                        MAYBE_PROLOG
-                               case '0':
-                               case '1':
-                               case '2':
-                               case '3':
-                               case '4':
-                               case '5':
-                               case '6':
-                               case '7':
-                               case '8':
-                               case '9':
+                               DIGITS
                                        put_back(c);
                                        c = '.';
                                        parse_number_dec();
@@ -1568,8 +1571,9 @@ void lexer_next_preprocessing_token(void)
                        return;
 
                default:
+dollar_sign:
+                       errorf(&lexer_token.source_position, "unknown character '%c' found", c);
                        next_char();
-                       errorf(&lexer_token.source_position, "unknown character '%c' found\n", c);
                        lexer_token.type = T_ERROR;
                        return;
                }
@@ -1579,15 +1583,13 @@ void lexer_next_preprocessing_token(void)
 void lexer_next_token(void)
 {
        lexer_next_preprocessing_token();
-       if(lexer_token.type != '\n')
-               return;
 
+       while (lexer_token.type == '\n') {
 newline_found:
-       do {
                lexer_next_preprocessing_token();
-       } while(lexer_token.type == '\n');
+       }
 
-       if(lexer_token.type == '#') {
+       if (lexer_token.type == '#') {
                parse_preprocessor_directive();
                goto newline_found;
        }
@@ -1596,6 +1598,7 @@ newline_found:
 void init_lexer(void)
 {
        strset_init(&stringset);
+       symbol_L = symbol_table_insert("L");
 }
 
 void lexer_open_stream(FILE *stream, const char *input_name)
@@ -1604,7 +1607,6 @@ void lexer_open_stream(FILE *stream, const char *input_name)
        lexer_token.source_position.linenr     = 0;
        lexer_token.source_position.input_name = input_name;
 
-       symbol_L = symbol_table_insert("L");
        bufpos = NULL;
        bufend = NULL;
 
@@ -1613,6 +1615,20 @@ void lexer_open_stream(FILE *stream, const char *input_name)
        c = '\n';
 }
 
+void lexer_open_buffer(const char *buffer, size_t len, const char *input_name)
+{
+       input                                  = NULL;
+       lexer_token.source_position.linenr     = 0;
+       lexer_token.source_position.input_name = input_name;
+
+       bufpos = buffer;
+       bufend = buffer + len;
+
+       /* place a virtual \n at the beginning so the lexer knows that we're
+        * at the beginning of a line */
+       c = '\n';
+}
+
 void exit_lexer(void)
 {
        strset_destroy(&stringset);