Consistently use get_string_len() to correct the size calculation of wide string...
authorChristoph Mallon <christoph.mallon@gmx.de>
Wed, 13 Jun 2012 10:37:01 +0000 (12:37 +0200)
committerChristoph Mallon <christoph.mallon@gmx.de>
Thu, 14 Jun 2012 08:38:07 +0000 (10:38 +0200)
Makefile
ast2firm.c
parser.c
string_rep.c [new file with mode: 0644]
string_rep.h
token_t.h

index 2299308..88333e1 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -52,6 +52,7 @@ SOURCES := \
        mangle.c \
        preprocessor.c \
        printer.c \
+       string_rep.c \
        symbol_table.c \
        token.c \
        type.c \
index 9c150b9..c85a8a1 100644 (file)
@@ -1103,14 +1103,12 @@ static ir_node *create_conv(dbg_info *dbgi, ir_node *value, ir_mode *dest_mode)
  */
 static ir_node *string_to_firm(source_position_t const *const src_pos, char const *const id_prefix, string_encoding_t const enc, string_t const *const value)
 {
-       size_t            slen;
-       ir_type          *elem_type;
-       ir_initializer_t *initializer;
+       size_t            const slen        = get_string_len(enc, value) + 1;
+       ir_initializer_t *const initializer = create_initializer_compound(slen);
+       ir_type          *      elem_type;
        switch (enc) {
        case STRING_ENCODING_CHAR: {
-               slen        = value->size + 1;
-               elem_type   = ir_type_char;
-               initializer = create_initializer_compound(slen);
+               elem_type = ir_type_char;
 
                ir_mode *const mode = get_type_mode(elem_type);
                char const    *p    = value->begin;
@@ -1123,9 +1121,7 @@ static ir_node *string_to_firm(source_position_t const *const src_pos, char cons
        }
 
        case STRING_ENCODING_WIDE: {
-               slen        = wstrlen(value) + 1;
-               elem_type   = ir_type_wchar_t;
-               initializer = create_initializer_compound(slen);
+               elem_type = ir_type_wchar_t;
 
                ir_mode *const mode = get_type_mode(elem_type);
                char const    *p    = value->begin;
index 1dc9abb..4f3cddc 100644 (file)
--- a/parser.c
+++ b/parser.c
@@ -2222,7 +2222,7 @@ static initializer_t *parse_initializer(parse_initializer_env_t *env)
                        break;
 
                case INITIALIZER_STRING:
-                       size = result->string.string.size + 1;
+                       size = get_string_len(result->string.encoding, &result->string.string) + 1;
                        break;
 
                case INITIALIZER_DESIGNATOR:
@@ -5839,10 +5839,11 @@ static expression_t *parse_character_constant(void)
        literal->string_literal.encoding = token.string.encoding;
        literal->string_literal.value    = token.string.string;
 
+       size_t const size = get_string_len(token.string.encoding, &token.string.string);
        switch (token.string.encoding) {
        case STRING_ENCODING_CHAR:
                literal->base.type = c_mode & _CXX ? type_char : type_int;
-               if (literal->string_literal.value.size > 1) {
+               if (size > 1) {
                        if (!GNU_MODE && !(c_mode & _C99)) {
                                errorf(HERE, "more than 1 character in character constant");
                        } else {
@@ -5854,7 +5855,7 @@ static expression_t *parse_character_constant(void)
 
        case STRING_ENCODING_WIDE:
                literal->base.type = type_int;
-               if (wstrlen(&literal->string_literal.value) > 1) {
+               if (size > 1) {
                        warningf(WARN_MULTICHAR, HERE, "multi-character character constant");
                }
                break;
@@ -5949,7 +5950,7 @@ type_t *revert_automatic_type_conversion(const expression_t *expression)
        }
 
        case EXPR_STRING_LITERAL: {
-               size_t  const size = expression->string_literal.value.size + 1;
+               size_t  const size = get_string_len(expression->string_literal.encoding, &expression->string_literal.value) + 1;
                type_t *const elem = get_unqualified_type(expression->base.type->pointer.points_to);
                return make_array_type(elem, size, TYPE_QUALIFIER_NONE);
        }
diff --git a/string_rep.c b/string_rep.c
new file mode 100644 (file)
index 0000000..19c59da
--- /dev/null
@@ -0,0 +1,23 @@
+#include "adt/error.h"
+#include "string_rep.h"
+
+static inline size_t wstrlen(const string_t *string)
+{
+       size_t      result = 0;
+       const char *p      = string->begin;
+       const char *end    = p + string->size;
+       while (p < end) {
+               read_utf8_char(&p);
+               ++result;
+       }
+       return result;
+}
+
+size_t get_string_len(string_encoding_t const enc, string_t const *const str)
+{
+       switch (enc) {
+       case STRING_ENCODING_CHAR: return str->size;
+       case STRING_ENCODING_WIDE: return wstrlen(str);
+       }
+       panic("invalid string encoding");
+}
index f3a1e6b..fd6c00b 100644 (file)
 #include <stdlib.h>
 #include "unicode.h"
 
+enum string_encoding_t {
+       STRING_ENCODING_CHAR,
+       STRING_ENCODING_WIDE
+};
+typedef enum string_encoding_t string_encoding_t;
+
 typedef struct string_t {
        const char *begin; /**< UTF-8 encoded string, the last character is
                                                * guaranteed to be 0 */
        size_t      size;  /**< size of string in bytes (not characters) */
 } string_t;
 
-static inline size_t wstrlen(const string_t *string)
-{
-       size_t      result = 0;
-       const char *p      = string->begin;
-       const char *end    = p + string->size;
-       while (p < end) {
-               read_utf8_char(&p);
-               ++result;
-       }
-       return result;
-}
+size_t get_string_len(string_encoding_t enc, string_t const *str);
 
 #endif
index 9a68f81..5d66af4 100644 (file)
--- a/token_t.h
+++ b/token_t.h
@@ -70,12 +70,6 @@ struct token_base_t {
        symbol_t         *symbol;
 };
 
-enum string_encoding_t {
-       STRING_ENCODING_CHAR,
-       STRING_ENCODING_WIDE
-};
-typedef enum string_encoding_t string_encoding_t;
-
 struct string_literal_t {
        token_base_t      base;
        string_encoding_t encoding;