From 6d30fa5c7d03437d992a80234c88d8eb6431b35e Mon Sep 17 00:00:00 2001 From: Christoph Mallon Date: Wed, 13 Jun 2012 12:37:01 +0200 Subject: [PATCH] Consistently use get_string_len() to correct the size calculation of wide string literals. --- Makefile | 1 + ast2firm.c | 14 +++++--------- parser.c | 9 +++++---- string_rep.c | 23 +++++++++++++++++++++++ string_rep.h | 18 +++++++----------- token_t.h | 6 ------ 6 files changed, 41 insertions(+), 30 deletions(-) create mode 100644 string_rep.c diff --git a/Makefile b/Makefile index 2299308..88333e1 100644 --- a/Makefile +++ b/Makefile @@ -52,6 +52,7 @@ SOURCES := \ mangle.c \ preprocessor.c \ printer.c \ + string_rep.c \ symbol_table.c \ token.c \ type.c \ diff --git a/ast2firm.c b/ast2firm.c index 9c150b9..c85a8a1 100644 --- a/ast2firm.c +++ b/ast2firm.c @@ -1103,14 +1103,12 @@ static ir_node *create_conv(dbg_info *dbgi, ir_node *value, ir_mode *dest_mode) */ static ir_node *string_to_firm(source_position_t const *const src_pos, char const *const id_prefix, string_encoding_t const enc, string_t const *const value) { - size_t slen; - ir_type *elem_type; - ir_initializer_t *initializer; + size_t const slen = get_string_len(enc, value) + 1; + ir_initializer_t *const initializer = create_initializer_compound(slen); + ir_type * elem_type; switch (enc) { case STRING_ENCODING_CHAR: { - slen = value->size + 1; - elem_type = ir_type_char; - initializer = create_initializer_compound(slen); + elem_type = ir_type_char; ir_mode *const mode = get_type_mode(elem_type); char const *p = value->begin; @@ -1123,9 +1121,7 @@ static ir_node *string_to_firm(source_position_t const *const src_pos, char cons } case STRING_ENCODING_WIDE: { - slen = wstrlen(value) + 1; - elem_type = ir_type_wchar_t; - initializer = create_initializer_compound(slen); + elem_type = ir_type_wchar_t; ir_mode *const mode = get_type_mode(elem_type); char const *p = value->begin; diff --git a/parser.c b/parser.c index 1dc9abb..4f3cddc 100644 --- a/parser.c +++ b/parser.c @@ -2222,7 +2222,7 @@ static initializer_t *parse_initializer(parse_initializer_env_t *env) break; case INITIALIZER_STRING: - size = result->string.string.size + 1; + size = get_string_len(result->string.encoding, &result->string.string) + 1; break; case INITIALIZER_DESIGNATOR: @@ -5839,10 +5839,11 @@ static expression_t *parse_character_constant(void) literal->string_literal.encoding = token.string.encoding; literal->string_literal.value = token.string.string; + size_t const size = get_string_len(token.string.encoding, &token.string.string); switch (token.string.encoding) { case STRING_ENCODING_CHAR: literal->base.type = c_mode & _CXX ? type_char : type_int; - if (literal->string_literal.value.size > 1) { + if (size > 1) { if (!GNU_MODE && !(c_mode & _C99)) { errorf(HERE, "more than 1 character in character constant"); } else { @@ -5854,7 +5855,7 @@ static expression_t *parse_character_constant(void) case STRING_ENCODING_WIDE: literal->base.type = type_int; - if (wstrlen(&literal->string_literal.value) > 1) { + if (size > 1) { warningf(WARN_MULTICHAR, HERE, "multi-character character constant"); } break; @@ -5949,7 +5950,7 @@ type_t *revert_automatic_type_conversion(const expression_t *expression) } case EXPR_STRING_LITERAL: { - size_t const size = expression->string_literal.value.size + 1; + size_t const size = get_string_len(expression->string_literal.encoding, &expression->string_literal.value) + 1; type_t *const elem = get_unqualified_type(expression->base.type->pointer.points_to); return make_array_type(elem, size, TYPE_QUALIFIER_NONE); } diff --git a/string_rep.c b/string_rep.c new file mode 100644 index 0000000..19c59da --- /dev/null +++ b/string_rep.c @@ -0,0 +1,23 @@ +#include "adt/error.h" +#include "string_rep.h" + +static inline size_t wstrlen(const string_t *string) +{ + size_t result = 0; + const char *p = string->begin; + const char *end = p + string->size; + while (p < end) { + read_utf8_char(&p); + ++result; + } + return result; +} + +size_t get_string_len(string_encoding_t const enc, string_t const *const str) +{ + switch (enc) { + case STRING_ENCODING_CHAR: return str->size; + case STRING_ENCODING_WIDE: return wstrlen(str); + } + panic("invalid string encoding"); +} diff --git a/string_rep.h b/string_rep.h index f3a1e6b..fd6c00b 100644 --- a/string_rep.h +++ b/string_rep.h @@ -23,22 +23,18 @@ #include #include "unicode.h" +enum string_encoding_t { + STRING_ENCODING_CHAR, + STRING_ENCODING_WIDE +}; +typedef enum string_encoding_t string_encoding_t; + typedef struct string_t { const char *begin; /**< UTF-8 encoded string, the last character is * guaranteed to be 0 */ size_t size; /**< size of string in bytes (not characters) */ } string_t; -static inline size_t wstrlen(const string_t *string) -{ - size_t result = 0; - const char *p = string->begin; - const char *end = p + string->size; - while (p < end) { - read_utf8_char(&p); - ++result; - } - return result; -} +size_t get_string_len(string_encoding_t enc, string_t const *str); #endif diff --git a/token_t.h b/token_t.h index 9a68f81..5d66af4 100644 --- a/token_t.h +++ b/token_t.h @@ -70,12 +70,6 @@ struct token_base_t { symbol_t *symbol; }; -enum string_encoding_t { - STRING_ENCODING_CHAR, - STRING_ENCODING_WIDE -}; -typedef enum string_encoding_t string_encoding_t; - struct string_literal_t { token_base_t base; string_encoding_t encoding; -- 2.20.1