Include string encoding in string_t.
authorChristoph Mallon <christoph.mallon@gmx.de>
Thu, 14 Jun 2012 17:12:01 +0000 (19:12 +0200)
committerChristoph Mallon <christoph.mallon@gmx.de>
Thu, 14 Jun 2012 17:12:01 +0000 (19:12 +0200)
ast.c
ast2firm.c
ast_t.h
lexer.c
parser.c
preprocessor.c
string_rep.c
string_rep.h
token.c
token_t.h

diff --git a/ast.c b/ast.c
index e3e4bea..005d0fc 100644 (file)
--- a/ast.c
+++ b/ast.c
@@ -197,6 +197,8 @@ static unsigned get_expression_precedence(expression_kind_t kind)
  */
 static void print_quoted_string(const string_t *const string, char border)
 {
+       print_string(get_string_encoding_prefix(string->encoding));
+
        print_char(border);
        const char *end = string->begin + string->size;
        for (const char *c = string->begin; c != end; ++c) {
@@ -233,7 +235,6 @@ static void print_quoted_string(const string_t *const string, char border)
 
 static void print_string_literal(string_literal_expression_t const *const literal, char const delimiter)
 {
-       print_string(get_string_encoding_prefix(literal->encoding));
        print_quoted_string(&literal->value, delimiter);
 }
 
index 05ed110..c3a1e69 100644 (file)
@@ -1101,12 +1101,12 @@ static ir_node *create_conv(dbg_info *dbgi, ir_node *value, ir_mode *dest_mode)
  * @param id_prefix  a prefix for the name of the generated string constant
  * @param value      the value of the string constant
  */
-static ir_node *string_to_firm(source_position_t const *const src_pos, char const *const id_prefix, string_encoding_t const enc, string_t const *const value)
+static ir_node *string_to_firm(source_position_t const *const src_pos, char const *const id_prefix, string_t const *const value)
 {
-       size_t            const slen        = get_string_len(enc, value) + 1;
+       size_t            const slen        = get_string_len(value) + 1;
        ir_initializer_t *const initializer = create_initializer_compound(slen);
        ir_type          *      elem_type;
-       switch (enc) {
+       switch (value->encoding) {
        case STRING_ENCODING_CHAR: {
                elem_type = ir_type_char;
 
@@ -1283,7 +1283,7 @@ static ir_node *char_literal_to_firm(string_literal_expression_t const *literal)
        size_t      size   = literal->value.size;
        ir_tarval  *tv;
 
-       switch (literal->encoding) {
+       switch (literal->value.encoding) {
        case STRING_ENCODING_WIDE: {
                utf32  v = read_utf8_char(&string);
                char   buf[128];
@@ -3140,19 +3140,19 @@ static ir_node *function_name_to_firm(
        case FUNCNAME_PRETTY_FUNCTION:
        case FUNCNAME_FUNCDNAME:
                if (current_function_name == NULL) {
-                       const source_position_t *const src_pos = &expr->base.source_position;
-                       const char    *name  = current_function_entity->base.symbol->string;
-                       const string_t string = { name, strlen(name) };
-                       current_function_name = string_to_firm(src_pos, "__func__.%u", STRING_ENCODING_CHAR, &string);
+                       source_position_t const *const src_pos = &expr->base.source_position;
+                       char              const *const name    = current_function_entity->base.symbol->string;
+                       string_t                 const string  = { name, strlen(name), STRING_ENCODING_CHAR };
+                       current_function_name = string_to_firm(src_pos, "__func__.%u", &string);
                }
                return current_function_name;
        case FUNCNAME_FUNCSIG:
                if (current_funcsig == NULL) {
-                       const source_position_t *const src_pos = &expr->base.source_position;
-                       ir_entity *ent = get_irg_entity(current_ir_graph);
-                       const char *const name = get_entity_ld_name(ent);
-                       const string_t string = { name, strlen(name) };
-                       current_funcsig = string_to_firm(src_pos, "__FUNCSIG__.%u", STRING_ENCODING_CHAR, &string);
+                       source_position_t const *const src_pos = &expr->base.source_position;
+                       ir_entity               *const ent     = get_irg_entity(current_ir_graph);
+                       char              const *const name    = get_entity_ld_name(ent);
+                       string_t                 const string  = { name, strlen(name), STRING_ENCODING_CHAR };
+                       current_funcsig = string_to_firm(src_pos, "__FUNCSIG__.%u", &string);
                }
                return current_funcsig;
        }
@@ -3343,7 +3343,7 @@ static ir_node *_expression_to_firm(expression_t const *const expr)
        case EXPR_VA_COPY:                    return va_copy_expression_to_firm(      &expr->va_copye);
        case EXPR_VA_START:                   return va_start_expression_to_firm(     &expr->va_starte);
 
-       case EXPR_STRING_LITERAL: return string_to_firm(&expr->base.source_position, "str.%u", expr->string_literal.encoding, &expr->string_literal.value);
+       case EXPR_STRING_LITERAL: return string_to_firm(&expr->base.source_position, "str.%u", &expr->string_literal.value);
 
        case EXPR_ERROR: break;
        }
@@ -3863,7 +3863,7 @@ static ir_initializer_t *create_ir_initializer_string(initializer_t const *const
        ir_initializer_t *const irinit  = create_initializer_compound(arr_len);
        ir_mode          *const mode    = get_ir_mode_storage(type->array.element_type);
        char const       *      p       = str->value.begin;
-       switch (str->encoding) {
+       switch (str->value.encoding) {
        case STRING_ENCODING_CHAR:
                for (size_t i = 0; i != arr_len; ++i) {
                        char              const c      = i < str_len ? *p++ : 0;
diff --git a/ast_t.h b/ast_t.h
index 6f68d8f..74ccf40 100644 (file)
--- a/ast_t.h
+++ b/ast_t.h
@@ -264,9 +264,8 @@ struct literal_expression_t {
  * string and character literals
  */
 struct string_literal_expression_t {
-       expression_base_t  base;
-       string_encoding_t  encoding;
-       string_t           value;
+       expression_base_t base;
+       string_t          value;
 };
 
 struct funcname_expression_t {
diff --git a/lexer.c b/lexer.c
index cca5b77..d368aed 100644 (file)
--- a/lexer.c
+++ b/lexer.c
@@ -402,7 +402,7 @@ end_symbol:
        }
 }
 
-static string_t sym_make_string(void)
+static string_t sym_make_string(string_encoding_t const enc)
 {
        obstack_1grow(&symbol_obstack, '\0');
        size_t const len    = obstack_object_size(&symbol_obstack) - 1;
@@ -417,7 +417,7 @@ static string_t sym_make_string(void)
 #else
        const char *result = string;
 #endif
-       return (string_t) {result, len};
+       return (string_t){ result, len, enc };
 }
 
 /**
@@ -444,7 +444,7 @@ finish_suffix:
                return;
        }
 
-       lexer_token.number.suffix = sym_make_string();
+       lexer_token.number.suffix = sym_make_string(STRING_ENCODING_CHAR);
 }
 
 static void parse_exponent(void)
@@ -500,7 +500,7 @@ static void parse_number_hex(void)
                       "hexadecimal floatingpoint constant requires an exponent");
        }
 
-       lexer_token.number.number = sym_make_string();
+       lexer_token.number.number = sym_make_string(STRING_ENCODING_CHAR);
 
        lexer_token.kind = is_float ? T_FLOATINGPOINT : T_INTEGER;
 
@@ -523,7 +523,7 @@ static void parse_number_bin(void)
                next_char();
        }
 
-       lexer_token.number.number = sym_make_string();
+       lexer_token.number.number = sym_make_string(STRING_ENCODING_CHAR);
        lexer_token.kind          = T_INTEGER;
 
        if (!has_digits) {
@@ -596,7 +596,7 @@ static void parse_number(void)
                parse_exponent();
        }
 
-       lexer_token.number.number = sym_make_string();
+       lexer_token.number.number = sym_make_string(STRING_ENCODING_CHAR);
 
        if (is_float) {
                lexer_token.kind = T_FLOATINGPOINT;
@@ -744,7 +744,7 @@ static utf32 parse_escape_sequence(void)
 string_t make_string(const char *string)
 {
        obstack_grow(&symbol_obstack, string, strlen(string));
-       return sym_make_string();
+       return sym_make_string(STRING_ENCODING_CHAR);
 }
 
 static void parse_string(utf32 const delim, token_kind_t const kind, string_encoding_t const enc, char const *const context)
@@ -787,9 +787,8 @@ static void parse_string(utf32 const delim, token_kind_t const kind, string_enco
        }
 
 end_of_string:
-       lexer_token.kind            = kind;
-       lexer_token.string.encoding = enc;
-       lexer_token.string.string   = sym_make_string();
+       lexer_token.kind          = kind;
+       lexer_token.string.string = sym_make_string(enc);
 }
 
 /**
@@ -912,8 +911,8 @@ static void parse_line_directive(void)
                lexer_pos.lineno = atoi(pp_token.number.number.begin) - 1;
                next_pp_token();
        }
-       if (pp_token.kind == T_STRING_LITERAL && pp_token.string.encoding == STRING_ENCODING_CHAR) {
-               lexer_pos.input_name = pp_token.string.string.begin;
+       if (pp_token.kind == T_STRING_LITERAL && pp_token.string.string.encoding == STRING_ENCODING_CHAR) {
+               lexer_pos.input_name       = pp_token.string.string.begin;
                lexer_pos.is_system_header = false;
                next_pp_token();
 
index 3fb68d8..e2e560a 100644 (file)
--- a/parser.c
+++ b/parser.c
@@ -1038,51 +1038,49 @@ static void append_string(string_t const *const s)
        obstack_grow(&ast_obstack, s->begin, s->size);
 }
 
-static string_t finish_string(void)
+static string_t finish_string(string_encoding_t const enc)
 {
        obstack_1grow(&ast_obstack, '\0');
        size_t      const size   = obstack_object_size(&ast_obstack) - 1;
        char const *const string = obstack_finish(&ast_obstack);
-       return (string_t){ string, size };
+       return (string_t){ string, size, enc };
 }
 
-static string_t concat_string_literals(string_encoding_t *const out_enc)
+static string_t concat_string_literals(void)
 {
        assert(token.kind == T_STRING_LITERAL);
 
-       string_t          result;
-       string_encoding_t enc = token.string.encoding;
+       string_t result;
        if (look_ahead(1)->kind == T_STRING_LITERAL) {
                append_string(&token.string.string);
                eat(T_STRING_LITERAL);
                warningf(WARN_TRADITIONAL, HERE, "traditional C rejects string constant concatenation");
+               string_encoding_t enc = token.string.string.encoding;
                do {
-                       if (token.string.encoding != STRING_ENCODING_CHAR) {
-                               enc = token.string.encoding;
+                       if (token.string.string.encoding != STRING_ENCODING_CHAR) {
+                               enc = token.string.string.encoding;
                        }
                        append_string(&token.string.string);
                        eat(T_STRING_LITERAL);
                } while (token.kind == T_STRING_LITERAL);
-               result = finish_string();
+               result = finish_string(enc);
        } else {
                result = token.string.string;
                eat(T_STRING_LITERAL);
        }
 
-       *out_enc = enc;
        return result;
 }
 
 static string_t parse_string_literals(char const *const context)
 {
        if (!skip_till(T_STRING_LITERAL, context))
-               return (string_t){ "", 0 };
+               return (string_t){ "", 0, STRING_ENCODING_CHAR };
 
-       string_encoding_t       enc;
        source_position_t const pos = *HERE;
-       string_t          const res = concat_string_literals(&enc);
+       string_t          const res = concat_string_literals();
 
-       if (enc != STRING_ENCODING_CHAR) {
+       if (res.encoding != STRING_ENCODING_CHAR) {
                errorf(&pos, "expected plain string literal, got wide string literal");
        }
 
@@ -1557,7 +1555,7 @@ static initializer_t *initializer_from_expression(type_t *orig_type,
        if (expression->kind == EXPR_STRING_LITERAL && is_type_array(type)) {
                array_type_t *const array_type   = &type->array;
                type_t       *const element_type = skip_typeref(array_type->element_type);
-               switch (expression->string_literal.encoding) {
+               switch (expression->string_literal.value.encoding) {
                case STRING_ENCODING_CHAR: {
                        if (is_type_atomic(element_type, ATOMIC_TYPE_CHAR)  ||
                            is_type_atomic(element_type, ATOMIC_TYPE_SCHAR) ||
@@ -2206,8 +2204,7 @@ static initializer_t *parse_initializer(parse_initializer_env_t *env)
                        break;
 
                case INITIALIZER_STRING: {
-                       string_literal_expression_t const *const str = get_init_string(result);
-                       size = get_string_len(str->encoding, &str->value) + 1;
+                       size = get_string_len(&get_init_string(result)->value) + 1;
                        break;
                }
 
@@ -5687,8 +5684,8 @@ static type_t *get_string_type(string_encoding_t const enc)
 static expression_t *parse_string_literal(void)
 {
        expression_t *const expr = allocate_expression_zero(EXPR_STRING_LITERAL);
-       expr->string_literal.value = concat_string_literals(&expr->string_literal.encoding);
-       expr->base.type            = get_string_type(expr->string_literal.encoding);
+       expr->string_literal.value = concat_string_literals();
+       expr->base.type            = get_string_type(expr->string_literal.value.encoding);
        return expr;
 }
 
@@ -5818,11 +5815,10 @@ static expression_t *parse_number_literal(void)
 static expression_t *parse_character_constant(void)
 {
        expression_t *const literal = allocate_expression_zero(EXPR_LITERAL_CHARACTER);
-       literal->string_literal.encoding = token.string.encoding;
-       literal->string_literal.value    = token.string.string;
+       literal->string_literal.value = token.string.string;
 
-       size_t const size = get_string_len(token.string.encoding, &token.string.string);
-       switch (token.string.encoding) {
+       size_t const size = get_string_len(&token.string.string);
+       switch (token.string.string.encoding) {
        case STRING_ENCODING_CHAR:
                literal->base.type = c_mode & _CXX ? type_char : type_int;
                if (size > 1) {
@@ -5932,7 +5928,7 @@ type_t *revert_automatic_type_conversion(const expression_t *expression)
        }
 
        case EXPR_STRING_LITERAL: {
-               size_t  const size = get_string_len(expression->string_literal.encoding, &expression->string_literal.value) + 1;
+               size_t  const size = get_string_len(&expression->string_literal.value) + 1;
                type_t *const elem = get_unqualified_type(expression->base.type->pointer.points_to);
                return make_array_type(elem, size, TYPE_QUALIFIER_NONE);
        }
index a9b246d..3829329 100644 (file)
@@ -451,13 +451,13 @@ static const char *identify_string(char *string)
        return result;
 }
 
-static string_t sym_make_string(void)
+static string_t sym_make_string(string_encoding_t const enc)
 {
        obstack_1grow(&symbol_obstack, '\0');
        size_t      const len    = obstack_object_size(&symbol_obstack) - 1;
        char       *const string = obstack_finish(&symbol_obstack);
        char const *const result = identify_string(string);
-       return (string_t) {result, len};
+       return (string_t){ result, len, enc };
 }
 
 static void parse_string(utf32 const delimiter, preprocessor_token_kind_t const kind, string_encoding_t const enc, char const *const context)
@@ -513,9 +513,8 @@ static void parse_string(utf32 const delimiter, preprocessor_token_kind_t const
        }
 
 end_of_string:
-       pp_token.kind            = kind;
-       pp_token.string.encoding = enc;
-       pp_token.string.string   = sym_make_string();
+       pp_token.kind          = kind;
+       pp_token.string.string = sym_make_string(enc);
 }
 
 static void parse_string_literal(string_encoding_t const enc)
@@ -824,7 +823,7 @@ static void parse_number(void)
 
 end_number:
        pp_token.kind          = TP_NUMBER;
-       pp_token.number.number = sym_make_string();
+       pp_token.number.number = sym_make_string(STRING_ENCODING_CHAR);
 }
 
 
@@ -1138,14 +1137,14 @@ static void emit_pp_token(void)
                break;
 
        case TP_STRING_LITERAL:
-               fputs(get_string_encoding_prefix(pp_token.string.encoding), out);
+               fputs(get_string_encoding_prefix(pp_token.string.string.encoding), out);
                fputc('"', out);
                fputs(pp_token.string.string.begin, out);
                fputc('"', out);
                break;
 
        case TP_CHARACTER_CONSTANT:
-               fputs(get_string_encoding_prefix(pp_token.string.encoding), out);
+               fputs(get_string_encoding_prefix(pp_token.string.string.encoding), out);
                fputc('\'', out);
                fputs(pp_token.string.string.begin, out);
                fputc('\'', out);
@@ -1344,7 +1343,7 @@ static void parse_undef_directive(void)
 static void parse_headername(void)
 {
        const source_position_t start_position = input.position;
-       string_t                string         = {NULL, 0};
+       string_t                string         = { NULL, 0, STRING_ENCODING_CHAR };
        assert(obstack_object_size(&symbol_obstack) == 0);
 
        /* behind an #include we can have the special headername lexems.
@@ -1391,7 +1390,7 @@ parse_name:
        }
 
 finished_headername:
-       string = sym_make_string();
+       string = sym_make_string(STRING_ENCODING_CHAR);
 
 finish_error:
        pp_token.base.source_position = start_position;
index 19c59da..ff58aad 100644 (file)
@@ -13,9 +13,9 @@ static inline size_t wstrlen(const string_t *string)
        return result;
 }
 
-size_t get_string_len(string_encoding_t const enc, string_t const *const str)
+size_t get_string_len(string_t const *const str)
 {
-       switch (enc) {
+       switch (str->encoding) {
        case STRING_ENCODING_CHAR: return str->size;
        case STRING_ENCODING_WIDE: return wstrlen(str);
        }
index fd6c00b..c0868eb 100644 (file)
@@ -30,11 +30,11 @@ enum string_encoding_t {
 typedef enum string_encoding_t string_encoding_t;
 
 typedef struct string_t {
-       const char *begin; /**< UTF-8 encoded string, the last character is
-                                               * guaranteed to be 0 */
-       size_t      size;  /**< size of string in bytes (not characters) */
+       char const       *begin; /**< UTF-8 encoded string, the last character is guaranteed to be \0. */
+       size_t            size;  /**< size of string in bytes (not characters), without terminating \0. */
+       string_encoding_t encoding;
 } string_t;
 
-size_t get_string_len(string_encoding_t enc, string_t const *str);
+size_t get_string_len(string_t const *str);
 
 #endif
diff --git a/token.c b/token.c
index 5d97f92..8184554 100644 (file)
--- a/token.c
+++ b/token.c
@@ -161,7 +161,7 @@ void print_token(FILE *f, const token_t *token)
        case T_CHARACTER_CONSTANT: delim = '\''; goto print_string;
 print_string:
                print_token_kind(f, (token_kind_t)token->kind);
-               fprintf(f, " %s%c", get_string_encoding_prefix(token->string.encoding), delim);
+               fprintf(f, " %s%c", get_string_encoding_prefix(token->string.string.encoding), delim);
                print_stringrep(&token->string.string, f);
                fputc(delim, f);
                break;
index 5d66af4..7de0e6a 100644 (file)
--- a/token_t.h
+++ b/token_t.h
@@ -71,9 +71,8 @@ struct token_base_t {
 };
 
 struct string_literal_t {
-       token_base_t      base;
-       string_encoding_t encoding;
-       string_t          string;
+       token_base_t base;
+       string_t     string;
 };
 
 struct number_literal_t {