rework input logic to allow parsing from strings

author Matthias Braun <matze@braunis.de>

Thu, 11 Aug 2011 23:28:40 +0000 (01:28 +0200)

committer Matthias Braun <matze@braunis.de>

Thu, 11 Aug 2011 23:32:47 +0000 (01:32 +0200)
author Matthias Braun <matze@braunis.de>
Thu, 11 Aug 2011 23:28:40 +0000 (01:28 +0200)
committer Matthias Braun <matze@braunis.de>
Thu, 11 Aug 2011 23:32:47 +0000 (01:32 +0200)
diff --git a/input.c b/input.c

index 748a0ca..f65cddf 100644 (file)
--- a/input.c
+++ b/input.c
@@ -8,8 +8,17 @@
  
  typedef size_t (*decode_func)(input_t *input, utf32 *buffer, size_t buffer_size);
  
+typedef enum {
+       INPUT_FILE,
+       INPUT_STRING
+} input_kind_t;
+
  struct input_t {
-       FILE       *file;
+       input_kind_t kind;
+       union {
+               FILE *file;
+               const char *string;
+       } in;
         decode_func decode;
  
         /* state for utf-8 decoder */
@@ -25,25 +34,36 @@ void set_input_error_callback(input_error_callback_func new_func)
         input_error = new_func;
  }
  
-static size_t read_block(FILE *file, unsigned char *const read_buf,
+static size_t read_block(input_t *input, unsigned char *const read_buf,
                           size_t const n)
  {
-       size_t const s = fread(read_buf, 1, n, file);
-       if (s == 0) {
-               /* on OS/X ferror appears to return true on eof as well when running
-                * the application in gdb... */
-               if (!feof(file) && ferror(file))
-                       input_error(0, 0, "read from input failed");
-               return 0;
+       if (input->kind == INPUT_FILE) {
+               FILE *file = input->in.file;
+               size_t const s = fread(read_buf, 1, n, file);
+               if (s == 0) {
+                       /* on OS/X ferror appears to return true on eof as well when running
+                        * the application in gdb... */
+                       if (!feof(file) && ferror(file))
+                               input_error(0, 0, "read from input failed");
+                       return 0;
+               }
+               return s;
+       } else {
+               assert(input->kind == INPUT_STRING);
+               size_t len = strlen(input->in.string);
+               if (len > n)
+                       len = n;
+               memcpy(read_buf, input->in.string, len);
+               input->in.string += len;
+               return len;
         }
-       return s;
  }
  
  static size_t decode_iso_8859_1(input_t *input, utf32 *buffer,
                                  size_t buffer_size)
  {
         unsigned char read_buf[buffer_size];
-       size_t const s = read_block(input->file, read_buf, sizeof(read_buf));
+       size_t const s = read_block(input, read_buf, sizeof(read_buf));
  
         unsigned char const *src = read_buf;
         unsigned char const *end = read_buf + s;
@@ -58,7 +78,7 @@ static size_t decode_iso_8859_15(input_t *input, utf32 *buffer,
                                   size_t buffer_size)
  {
         unsigned char read_buf[buffer_size];
-       size_t const s = read_block(input->file, read_buf, sizeof(read_buf));
+       size_t const s = read_block(input, read_buf, sizeof(read_buf));
  
         unsigned char const *src = read_buf;
         unsigned char const *end = read_buf + s;
@@ -86,7 +106,7 @@ static size_t decode_utf8(input_t *input, utf32 *buffer, size_t buffer_size)
         unsigned char read_buf[buffer_size];
  
         while (true) {
-               size_t const s = read_block(input->file, read_buf, sizeof(read_buf));
+               size_t const s = read_block(input, read_buf, sizeof(read_buf));
                 if (s == 0) {
                         if (input->utf8_part_decoded_rest_len > 0)
                                 input_error(0, 0, "incomplete input char at end of input");
@@ -195,7 +215,7 @@ static size_t decode_windows_1252(input_t *input, utf32 *buffer,
                                    size_t buffer_size)
  {
         unsigned char read_buf[buffer_size];
-       size_t const s = read_block(input->file, read_buf, sizeof(read_buf));
+       size_t const s = read_block(input, read_buf, sizeof(read_buf));
  
         unsigned char const *src = read_buf;
         unsigned char const *end = read_buf + s;
@@ -274,11 +294,8 @@ static int my_strcasecmp(const char *s1, const char *s2)
         return (unsigned char)*s1 - (unsigned char)*s2;
  }
  
-input_t *input_from_stream(FILE *file, const char *encoding)
+static void choose_decoder(input_t *result, const char *encoding)
  {
-       input_t *result = XMALLOCZ(input_t);
-       result->file = file;
-
         if (encoding == NULL) {
                 result->decode = decode_utf8;
         } else {
@@ -294,6 +311,26 @@ input_t *input_from_stream(FILE *file, const char *encoding)
                         result->decode = decode_utf8;
                 }
         }
+}
+
+input_t *input_from_stream(FILE *file, const char *encoding)
+{
+       input_t *result = XMALLOCZ(input_t);
+       result->kind    = INPUT_FILE;
+       result->in.file = file;
+
+       choose_decoder(result, encoding);
+
+       return result;
+}
+
+input_t *input_from_string(const char *string, const char *encoding)
+{
+       input_t *result   = XMALLOCZ(input_t);
+       result->kind      = INPUT_STRING;
+       result->in.string = string;
+
+       choose_decoder(result, encoding);
  
         return result;
  }
diff --git a/input.h b/input.h

index 137d42e..053a5b9 100644 (file)
--- a/input.h
+++ b/input.h
@@ -7,6 +7,7 @@
  typedef struct input_t input_t;
  
  input_t *input_from_stream(FILE *stream, const char *encoding);
+input_t *input_from_string(const char *string, const char *encoding);
  
  /** Type for a function being called on an input (or encoding) errors. */
  typedef void (*input_error_callback_func)(unsigned delta_lines,
diff --git a/lexer.c b/lexer.c

index e85895e..3e9494c 100644 (file)
--- a/lexer.c
+++ b/lexer.c
@@ -57,7 +57,6 @@ static source_position_t  lexer_pos;
  token_t                   lexer_token;
  static symbol_t          *symbol_L;
  static strset_t           stringset;
-static char              *encoding;
  bool                      allow_dollar_in_symbol = true;
  
  /**
@@ -1332,26 +1331,14 @@ static void input_error(unsigned delta_lines, unsigned delta_cols,
         errorf(&lexer_pos, "%s", message);
  }
  
-void select_input_encoding(char const* new_encoding)
+void lexer_switch_input(input_t *new_input, const char *input_name)
  {
-       if (encoding != NULL)
-               xfree(encoding);
-       encoding = xstrdup(new_encoding);
-}
-
-void lexer_open_stream(FILE *stream, const char *input_name)
-{
-       if (input != NULL) {
-               input_free(input);
-               input = NULL;
-       }
-
         lexer_pos.lineno     = 0;
         lexer_pos.colno      = 0;
         lexer_pos.input_name = input_name;
  
         set_input_error_callback(input_error);
-       input  = input_from_stream(stream, encoding);
+       input  = new_input;
         bufpos = NULL;
         bufend = NULL;
  
@@ -1362,10 +1349,6 @@ void lexer_open_stream(FILE *stream, const char *input_name)
  
  void exit_lexer(void)
  {
-       if (input != NULL) {
-               input_free(input);
-               input = NULL;
-       }
         strset_destroy(&stringset);
  }
  
diff --git a/lexer.h b/lexer.h

index df7bebb..bc9d297 100644 (file)
--- a/lexer.h
+++ b/lexer.h
@@ -22,6 +22,7 @@
  
  #include "symbol_table_t.h"
  #include "token_t.h"
+#include "input.h"
  
  extern token_t lexer_token;
  extern bool    allow_dollar_in_symbol;
@@ -34,9 +35,7 @@ void lexer_next_preprocessing_token(void);
  void init_lexer(void);
  void exit_lexer(void);
  
-void select_input_encoding(char const* encoding);
-
-void lexer_open_stream(FILE *stream, const char *input_name);
+void lexer_switch_input(input_t *input, const char *input_name);
  
  string_t concat_strings(const string_t *s1, const string_t *s2);
  string_t make_string(const char *str);
diff --git a/main.c b/main.c

index a4ff403..b52afd8 100644 (file)
--- a/main.c
+++ b/main.c
@@ -117,6 +117,7 @@ static struct obstack    asflags_obst;
  static char              dep_target[1024];
  static const char       *outname;
  static bool              define_intmax_types;
+static const char       *input_encoding;
  
  typedef enum lang_standard_t {
         STANDARD_DEFAULT, /* gnu99 (for C, GCC does gnu89) or gnu++98 (for C++) */
@@ -182,22 +183,26 @@ static translation_unit_t *do_parsing(FILE *const in, const char *const input_na
  {
         start_parsing();
  
-       lexer_open_stream(in, input_name);
+       input_t *input = input_from_stream(in, input_encoding);
+       lexer_switch_input(input, input_name);
         parse();
-
         translation_unit_t *unit = finish_parsing();
+       input_free(input);
+
         return unit;
  }
  
  static void lextest(FILE *in, const char *fname)
  {
-       lexer_open_stream(in, fname);
+       input_t *input = input_from_stream(in, input_encoding);
+       lexer_switch_input(input, fname);
  
         do {
                 lexer_next_preprocessing_token();
                 print_token(stdout, &lexer_token);
                 putchar('\n');
         } while (lexer_token.kind != T_EOF);
+       input_free(input);
  }
  
  static void add_flag(struct obstack *obst, const char *format, ...)
@@ -1171,7 +1176,7 @@ int main(int argc, char **argv)
  
                                 if (strstart(orig_opt, "input-charset=")) {
                                         char const* const encoding = strchr(orig_opt, '=') + 1;
-                                       select_input_encoding(encoding);
+                                       input_encoding = encoding;
                                 } else if (strstart(orig_opt, "align-loops=") ||
                                            strstart(orig_opt, "align-jumps=") ||
                                            strstart(orig_opt, "align-functions=")) {
author	Matthias Braun <matze@braunis.de>
	Thu, 11 Aug 2011 23:28:40 +0000 (01:28 +0200)
committer	Matthias Braun <matze@braunis.de>
	Thu, 11 Aug 2011 23:32:47 +0000 (01:32 +0200)
input.c		patch \| blob \| history
input.h		patch \| blob \| history
lexer.c		patch \| blob \| history
lexer.h		patch \| blob \| history
main.c		patch \| blob \| history