Consistently use get_string_len() to correct the size calculation of wide string...

author Christoph Mallon <christoph.mallon@gmx.de>

Wed, 13 Jun 2012 10:37:01 +0000 (12:37 +0200)

committer Christoph Mallon <christoph.mallon@gmx.de>

Thu, 14 Jun 2012 08:38:07 +0000 (10:38 +0200)
author Christoph Mallon <christoph.mallon@gmx.de>
Wed, 13 Jun 2012 10:37:01 +0000 (12:37 +0200)
committer Christoph Mallon <christoph.mallon@gmx.de>
Thu, 14 Jun 2012 08:38:07 +0000 (10:38 +0200)
diff --git a/Makefile b/Makefile

index 2299308..88333e1 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -52,6 +52,7 @@ SOURCES := \
         mangle.c \
         preprocessor.c \
         printer.c \
+       string_rep.c \
         symbol_table.c \
         token.c \
         type.c \
diff --git a/ast2firm.c b/ast2firm.c

index 9c150b9..c85a8a1 100644 (file)
--- a/ast2firm.c
+++ b/ast2firm.c
@@ -1103,14 +1103,12 @@ static ir_node *create_conv(dbg_info *dbgi, ir_node *value, ir_mode *dest_mode)
   */
  static ir_node *string_to_firm(source_position_t const *const src_pos, char const *const id_prefix, string_encoding_t const enc, string_t const *const value)
  {
-       size_t            slen;
-       ir_type          *elem_type;
-       ir_initializer_t *initializer;
+       size_t            const slen        = get_string_len(enc, value) + 1;
+       ir_initializer_t *const initializer = create_initializer_compound(slen);
+       ir_type          *      elem_type;
         switch (enc) {
         case STRING_ENCODING_CHAR: {
-               slen        = value->size + 1;
-               elem_type   = ir_type_char;
-               initializer = create_initializer_compound(slen);
+               elem_type = ir_type_char;
  
                 ir_mode *const mode = get_type_mode(elem_type);
                 char const    *p    = value->begin;
@@ -1123,9 +1121,7 @@ static ir_node *string_to_firm(source_position_t const *const src_pos, char cons
         }
  
         case STRING_ENCODING_WIDE: {
-               slen        = wstrlen(value) + 1;
-               elem_type   = ir_type_wchar_t;
-               initializer = create_initializer_compound(slen);
+               elem_type = ir_type_wchar_t;
  
                 ir_mode *const mode = get_type_mode(elem_type);
                 char const    *p    = value->begin;
diff --git a/parser.c b/parser.c

index 1dc9abb..4f3cddc 100644 (file)
--- a/parser.c
+++ b/parser.c
@@ -2222,7 +2222,7 @@ static initializer_t *parse_initializer(parse_initializer_env_t *env)
                         break;
  
                 case INITIALIZER_STRING:
-                       size = result->string.string.size + 1;
+                       size = get_string_len(result->string.encoding, &result->string.string) + 1;
                         break;
  
                 case INITIALIZER_DESIGNATOR:
@@ -5839,10 +5839,11 @@ static expression_t *parse_character_constant(void)
         literal->string_literal.encoding = token.string.encoding;
         literal->string_literal.value    = token.string.string;
  
+       size_t const size = get_string_len(token.string.encoding, &token.string.string);
         switch (token.string.encoding) {
         case STRING_ENCODING_CHAR:
                 literal->base.type = c_mode & _CXX ? type_char : type_int;
-               if (literal->string_literal.value.size > 1) {
+               if (size > 1) {
                         if (!GNU_MODE && !(c_mode & _C99)) {
                                 errorf(HERE, "more than 1 character in character constant");
                         } else {
@@ -5854,7 +5855,7 @@ static expression_t *parse_character_constant(void)
  
         case STRING_ENCODING_WIDE:
                 literal->base.type = type_int;
-               if (wstrlen(&literal->string_literal.value) > 1) {
+               if (size > 1) {
                         warningf(WARN_MULTICHAR, HERE, "multi-character character constant");
                 }
                 break;
@@ -5949,7 +5950,7 @@ type_t *revert_automatic_type_conversion(const expression_t *expression)
         }
  
         case EXPR_STRING_LITERAL: {
-               size_t  const size = expression->string_literal.value.size + 1;
+               size_t  const size = get_string_len(expression->string_literal.encoding, &expression->string_literal.value) + 1;
                 type_t *const elem = get_unqualified_type(expression->base.type->pointer.points_to);
                 return make_array_type(elem, size, TYPE_QUALIFIER_NONE);
         }
diff --git a/string_rep.c b/string_rep.c

new file mode 100644 (file)

index 0000000..19c59da
--- /dev/null
+++ b/string_rep.c
@@ -0,0 +1,23 @@
+#include "adt/error.h"
+#include "string_rep.h"
+
+static inline size_t wstrlen(const string_t *string)
+{
+       size_t      result = 0;
+       const char *p      = string->begin;
+       const char *end    = p + string->size;
+       while (p < end) {
+               read_utf8_char(&p);
+               ++result;
+       }
+       return result;
+}
+
+size_t get_string_len(string_encoding_t const enc, string_t const *const str)
+{
+       switch (enc) {
+       case STRING_ENCODING_CHAR: return str->size;
+       case STRING_ENCODING_WIDE: return wstrlen(str);
+       }
+       panic("invalid string encoding");
+}
diff --git a/string_rep.h b/string_rep.h

index f3a1e6b..fd6c00b 100644 (file)
--- a/string_rep.h
+++ b/string_rep.h
@@ -23,22 +23,18 @@
  #include <stdlib.h>
  #include "unicode.h"
  
+enum string_encoding_t {
+       STRING_ENCODING_CHAR,
+       STRING_ENCODING_WIDE
+};
+typedef enum string_encoding_t string_encoding_t;
+
  typedef struct string_t {
         const char *begin; /**< UTF-8 encoded string, the last character is
                                                 * guaranteed to be 0 */
         size_t      size;  /**< size of string in bytes (not characters) */
  } string_t;
  
-static inline size_t wstrlen(const string_t *string)
-{
-       size_t      result = 0;
-       const char *p      = string->begin;
-       const char *end    = p + string->size;
-       while (p < end) {
-               read_utf8_char(&p);
-               ++result;
-       }
-       return result;
-}
+size_t get_string_len(string_encoding_t enc, string_t const *str);
  
  #endif
diff --git a/token_t.h b/token_t.h

index 9a68f81..5d66af4 100644 (file)
--- a/token_t.h
+++ b/token_t.h
@@ -70,12 +70,6 @@ struct token_base_t {
         symbol_t         *symbol;
  };
  
-enum string_encoding_t {
-       STRING_ENCODING_CHAR,
-       STRING_ENCODING_WIDE
-};
-typedef enum string_encoding_t string_encoding_t;
-
  struct string_literal_t {
         token_base_t      base;
         string_encoding_t encoding;
author	Christoph Mallon <christoph.mallon@gmx.de>
	Wed, 13 Jun 2012 10:37:01 +0000 (12:37 +0200)
committer	Christoph Mallon <christoph.mallon@gmx.de>
	Thu, 14 Jun 2012 08:38:07 +0000 (10:38 +0200)
Makefile		patch \| blob \| history
ast2firm.c		patch \| blob \| history
parser.c		patch \| blob \| history
string_rep.c	[new file with mode: 0644]	patch \| blob
string_rep.h		patch \| blob \| history
token_t.h		patch \| blob \| history