rework input logic to allow parsing from strings
authorMatthias Braun <matze@braunis.de>
Thu, 11 Aug 2011 23:28:40 +0000 (01:28 +0200)
committerMatthias Braun <matze@braunis.de>
Thu, 11 Aug 2011 23:32:47 +0000 (01:32 +0200)
input.c
input.h
lexer.c
lexer.h
main.c

diff --git a/input.c b/input.c
index 748a0ca..f65cddf 100644 (file)
--- a/input.c
+++ b/input.c
@@ -8,8 +8,17 @@
 
 typedef size_t (*decode_func)(input_t *input, utf32 *buffer, size_t buffer_size);
 
+typedef enum {
+       INPUT_FILE,
+       INPUT_STRING
+} input_kind_t;
+
 struct input_t {
-       FILE       *file;
+       input_kind_t kind;
+       union {
+               FILE *file;
+               const char *string;
+       } in;
        decode_func decode;
 
        /* state for utf-8 decoder */
@@ -25,25 +34,36 @@ void set_input_error_callback(input_error_callback_func new_func)
        input_error = new_func;
 }
 
-static size_t read_block(FILE *file, unsigned char *const read_buf,
+static size_t read_block(input_t *input, unsigned char *const read_buf,
                          size_t const n)
 {
-       size_t const s = fread(read_buf, 1, n, file);
-       if (s == 0) {
-               /* on OS/X ferror appears to return true on eof as well when running
-                * the application in gdb... */
-               if (!feof(file) && ferror(file))
-                       input_error(0, 0, "read from input failed");
-               return 0;
+       if (input->kind == INPUT_FILE) {
+               FILE *file = input->in.file;
+               size_t const s = fread(read_buf, 1, n, file);
+               if (s == 0) {
+                       /* on OS/X ferror appears to return true on eof as well when running
+                        * the application in gdb... */
+                       if (!feof(file) && ferror(file))
+                               input_error(0, 0, "read from input failed");
+                       return 0;
+               }
+               return s;
+       } else {
+               assert(input->kind == INPUT_STRING);
+               size_t len = strlen(input->in.string);
+               if (len > n)
+                       len = n;
+               memcpy(read_buf, input->in.string, len);
+               input->in.string += len;
+               return len;
        }
-       return s;
 }
 
 static size_t decode_iso_8859_1(input_t *input, utf32 *buffer,
                                 size_t buffer_size)
 {
        unsigned char read_buf[buffer_size];
-       size_t const s = read_block(input->file, read_buf, sizeof(read_buf));
+       size_t const s = read_block(input, read_buf, sizeof(read_buf));
 
        unsigned char const *src = read_buf;
        unsigned char const *end = read_buf + s;
@@ -58,7 +78,7 @@ static size_t decode_iso_8859_15(input_t *input, utf32 *buffer,
                                  size_t buffer_size)
 {
        unsigned char read_buf[buffer_size];
-       size_t const s = read_block(input->file, read_buf, sizeof(read_buf));
+       size_t const s = read_block(input, read_buf, sizeof(read_buf));
 
        unsigned char const *src = read_buf;
        unsigned char const *end = read_buf + s;
@@ -86,7 +106,7 @@ static size_t decode_utf8(input_t *input, utf32 *buffer, size_t buffer_size)
        unsigned char read_buf[buffer_size];
 
        while (true) {
-               size_t const s = read_block(input->file, read_buf, sizeof(read_buf));
+               size_t const s = read_block(input, read_buf, sizeof(read_buf));
                if (s == 0) {
                        if (input->utf8_part_decoded_rest_len > 0)
                                input_error(0, 0, "incomplete input char at end of input");
@@ -195,7 +215,7 @@ static size_t decode_windows_1252(input_t *input, utf32 *buffer,
                                   size_t buffer_size)
 {
        unsigned char read_buf[buffer_size];
-       size_t const s = read_block(input->file, read_buf, sizeof(read_buf));
+       size_t const s = read_block(input, read_buf, sizeof(read_buf));
 
        unsigned char const *src = read_buf;
        unsigned char const *end = read_buf + s;
@@ -274,11 +294,8 @@ static int my_strcasecmp(const char *s1, const char *s2)
        return (unsigned char)*s1 - (unsigned char)*s2;
 }
 
-input_t *input_from_stream(FILE *file, const char *encoding)
+static void choose_decoder(input_t *result, const char *encoding)
 {
-       input_t *result = XMALLOCZ(input_t);
-       result->file = file;
-
        if (encoding == NULL) {
                result->decode = decode_utf8;
        } else {
@@ -294,6 +311,26 @@ input_t *input_from_stream(FILE *file, const char *encoding)
                        result->decode = decode_utf8;
                }
        }
+}
+
+input_t *input_from_stream(FILE *file, const char *encoding)
+{
+       input_t *result = XMALLOCZ(input_t);
+       result->kind    = INPUT_FILE;
+       result->in.file = file;
+
+       choose_decoder(result, encoding);
+
+       return result;
+}
+
+input_t *input_from_string(const char *string, const char *encoding)
+{
+       input_t *result   = XMALLOCZ(input_t);
+       result->kind      = INPUT_STRING;
+       result->in.string = string;
+
+       choose_decoder(result, encoding);
 
        return result;
 }
diff --git a/input.h b/input.h
index 137d42e..053a5b9 100644 (file)
--- a/input.h
+++ b/input.h
@@ -7,6 +7,7 @@
 typedef struct input_t input_t;
 
 input_t *input_from_stream(FILE *stream, const char *encoding);
+input_t *input_from_string(const char *string, const char *encoding);
 
 /** Type for a function being called on an input (or encoding) errors. */
 typedef void (*input_error_callback_func)(unsigned delta_lines,
diff --git a/lexer.c b/lexer.c
index e85895e..3e9494c 100644 (file)
--- a/lexer.c
+++ b/lexer.c
@@ -57,7 +57,6 @@ static source_position_t  lexer_pos;
 token_t                   lexer_token;
 static symbol_t          *symbol_L;
 static strset_t           stringset;
-static char              *encoding;
 bool                      allow_dollar_in_symbol = true;
 
 /**
@@ -1332,26 +1331,14 @@ static void input_error(unsigned delta_lines, unsigned delta_cols,
        errorf(&lexer_pos, "%s", message);
 }
 
-void select_input_encoding(char const* new_encoding)
+void lexer_switch_input(input_t *new_input, const char *input_name)
 {
-       if (encoding != NULL)
-               xfree(encoding);
-       encoding = xstrdup(new_encoding);
-}
-
-void lexer_open_stream(FILE *stream, const char *input_name)
-{
-       if (input != NULL) {
-               input_free(input);
-               input = NULL;
-       }
-
        lexer_pos.lineno     = 0;
        lexer_pos.colno      = 0;
        lexer_pos.input_name = input_name;
 
        set_input_error_callback(input_error);
-       input  = input_from_stream(stream, encoding);
+       input  = new_input;
        bufpos = NULL;
        bufend = NULL;
 
@@ -1362,10 +1349,6 @@ void lexer_open_stream(FILE *stream, const char *input_name)
 
 void exit_lexer(void)
 {
-       if (input != NULL) {
-               input_free(input);
-               input = NULL;
-       }
        strset_destroy(&stringset);
 }
 
diff --git a/lexer.h b/lexer.h
index df7bebb..bc9d297 100644 (file)
--- a/lexer.h
+++ b/lexer.h
@@ -22,6 +22,7 @@
 
 #include "symbol_table_t.h"
 #include "token_t.h"
+#include "input.h"
 
 extern token_t lexer_token;
 extern bool    allow_dollar_in_symbol;
@@ -34,9 +35,7 @@ void lexer_next_preprocessing_token(void);
 void init_lexer(void);
 void exit_lexer(void);
 
-void select_input_encoding(char const* encoding);
-
-void lexer_open_stream(FILE *stream, const char *input_name);
+void lexer_switch_input(input_t *input, const char *input_name);
 
 string_t concat_strings(const string_t *s1, const string_t *s2);
 string_t make_string(const char *str);
diff --git a/main.c b/main.c
index a4ff403..b52afd8 100644 (file)
--- a/main.c
+++ b/main.c
@@ -117,6 +117,7 @@ static struct obstack    asflags_obst;
 static char              dep_target[1024];
 static const char       *outname;
 static bool              define_intmax_types;
+static const char       *input_encoding;
 
 typedef enum lang_standard_t {
        STANDARD_DEFAULT, /* gnu99 (for C, GCC does gnu89) or gnu++98 (for C++) */
@@ -182,22 +183,26 @@ static translation_unit_t *do_parsing(FILE *const in, const char *const input_na
 {
        start_parsing();
 
-       lexer_open_stream(in, input_name);
+       input_t *input = input_from_stream(in, input_encoding);
+       lexer_switch_input(input, input_name);
        parse();
-
        translation_unit_t *unit = finish_parsing();
+       input_free(input);
+
        return unit;
 }
 
 static void lextest(FILE *in, const char *fname)
 {
-       lexer_open_stream(in, fname);
+       input_t *input = input_from_stream(in, input_encoding);
+       lexer_switch_input(input, fname);
 
        do {
                lexer_next_preprocessing_token();
                print_token(stdout, &lexer_token);
                putchar('\n');
        } while (lexer_token.kind != T_EOF);
+       input_free(input);
 }
 
 static void add_flag(struct obstack *obst, const char *format, ...)
@@ -1171,7 +1176,7 @@ int main(int argc, char **argv)
 
                                if (strstart(orig_opt, "input-charset=")) {
                                        char const* const encoding = strchr(orig_opt, '=') + 1;
-                                       select_input_encoding(encoding);
+                                       input_encoding = encoding;
                                } else if (strstart(orig_opt, "align-loops=") ||
                                           strstart(orig_opt, "align-jumps=") ||
                                           strstart(orig_opt, "align-functions=")) {