Record the encoding in string_literal_expression_t and merge EXPR_WIDE_STRING_LITERAL...
authorChristoph Mallon <christoph.mallon@gmx.de>
Tue, 22 May 2012 08:47:16 +0000 (10:47 +0200)
committerChristoph Mallon <christoph.mallon@gmx.de>
Wed, 23 May 2012 05:01:01 +0000 (07:01 +0200)
ast.c
ast2firm.c
ast_t.h
format_check.c
parser.c
walk.c

diff --git a/ast.c b/ast.c
index c0e6b1d..80efca1 100644 (file)
--- a/ast.c
+++ b/ast.c
@@ -110,7 +110,6 @@ static unsigned get_expression_precedence(expression_kind_t kind)
                [EXPR_LITERAL_WIDE_CHARACTER]     = PREC_PRIMARY,
                [EXPR_LITERAL_MS_NOOP]            = PREC_PRIMARY,
                [EXPR_STRING_LITERAL]             = PREC_PRIMARY,
-               [EXPR_WIDE_STRING_LITERAL]        = PREC_PRIMARY,
                [EXPR_COMPOUND_LITERAL]           = PREC_UNARY,
                [EXPR_CALL]                       = PREC_POSTFIX,
                [EXPR_CONDITIONAL]                = PREC_CONDITIONAL,
@@ -237,9 +236,7 @@ static void print_quoted_string(const string_t *const string, char border,
 
 static void print_string_literal(const string_literal_expression_t *literal)
 {
-       if (literal->base.kind == EXPR_WIDE_STRING_LITERAL) {
-               print_char('L');
-       }
+       print_string(get_string_encoding_prefix(literal->encoding));
        print_quoted_string(&literal->value, '"', 1);
 }
 
@@ -665,7 +662,6 @@ static bool needs_parentheses(expression_t const *const expr, unsigned const top
                case EXPR_LITERAL_CASES:
                case EXPR_REFERENCE:
                case EXPR_STRING_LITERAL:
-               case EXPR_WIDE_STRING_LITERAL:
                        /* Do not print () around subexpressions consisting of a single token. */
                        return false;
 
@@ -713,8 +709,7 @@ static void print_expression_prec(expression_t const *expr, unsigned const top_p
        case EXPR_ENUM_CONSTANT:              print_reference_expression(    &expr->reference);                break;
        case EXPR_SELECT:                     print_select(                  &expr->select);                   break;
        case EXPR_STATEMENT:                  print_statement_expression(    &expr->statement);                break;
-       case EXPR_STRING_LITERAL:
-       case EXPR_WIDE_STRING_LITERAL:        print_string_literal(          &expr->string_literal);           break;
+       case EXPR_STRING_LITERAL:             print_string_literal(          &expr->string_literal);           break;
        case EXPR_UNARY_CASES:                print_unary_expression(        &expr->unary);                    break;
        case EXPR_VA_ARG:                     print_va_arg(                  &expr->va_arge);                  break;
        case EXPR_VA_COPY:                    print_va_copy(                 &expr->va_copye);                 break;
@@ -1595,7 +1590,6 @@ expression_classification_t is_linker_constant(const expression_t *expression)
 {
        switch (expression->kind) {
        case EXPR_STRING_LITERAL:
-       case EXPR_WIDE_STRING_LITERAL:
        case EXPR_FUNCNAME:
        case EXPR_LABEL_ADDRESS:
                return EXPR_CLASS_CONSTANT;
@@ -1806,7 +1800,6 @@ expression_classification_t is_constant_expression(const expression_t *expressio
        }
 
        case EXPR_STRING_LITERAL:
-       case EXPR_WIDE_STRING_LITERAL:
        case EXPR_FUNCNAME:
        case EXPR_LABEL_ADDRESS:
        case EXPR_SELECT:
index 39aaa2e..066434a 100644 (file)
@@ -1101,47 +1101,6 @@ static ir_node *create_conv(dbg_info *dbgi, ir_node *value, ir_mode *dest_mode)
        return new_d_Conv(dbgi, value, dest_mode);
 }
 
-/**
- * Creates a SymConst node representing a wide string literal.
- *
- * @param literal   the wide string literal
- */
-static ir_node *wide_string_literal_to_firm(
-               const string_literal_expression_t *literal)
-{
-       ir_type  *const global_type = get_glob_type();
-       ir_type  *const elem_type   = ir_type_wchar_t;
-       dbg_info *const dbgi        = get_dbg_info(&literal->base.source_position);
-       ir_type  *const type        = new_type_array(1, elem_type);
-
-       ident     *const id     = id_unique("str.%u");
-       ir_entity *const entity = new_d_entity(global_type, id, type, dbgi);
-       set_entity_ld_ident(entity, id);
-       set_entity_visibility(entity, ir_visibility_private);
-       add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
-
-       ir_mode      *const mode = get_type_mode(elem_type);
-       const size_t        slen = wstrlen(&literal->value);
-
-       set_array_lower_bound_int(type, 0, 0);
-       set_array_upper_bound_int(type, 0, slen);
-       set_type_size_bytes(type, slen * get_mode_size_bytes(mode));
-       set_type_state(type, layout_fixed);
-
-       ir_initializer_t *initializer = create_initializer_compound(slen);
-       const char              *p    = literal->value.begin;
-       for (size_t i = 0; i < slen; ++i) {
-               assert(p < literal->value.begin + literal->value.size);
-               utf32             v   = read_utf8_char(&p);
-               ir_tarval        *tv  = new_tarval_from_long(v, mode);
-               ir_initializer_t *val = create_initializer_tarval(tv);
-               set_initializer_compound_value(initializer, i, val);
-       }
-       set_entity_initializer(entity, initializer);
-
-       return create_symconst(dbgi, entity);
-}
-
 /**
  * Creates a SymConst node representing a string constant.
  *
@@ -1149,36 +1108,59 @@ static ir_node *wide_string_literal_to_firm(
  * @param id_prefix  a prefix for the name of the generated string constant
  * @param value      the value of the string constant
  */
-static ir_node *string_to_firm(const source_position_t *const src_pos,
-                               const char *const id_prefix,
-                               const string_t *const value)
+static ir_node *string_to_firm(source_position_t const *const src_pos, char const *const id_prefix, string_encoding_t const enc, string_t const *const value)
 {
-       ir_type  *const global_type = get_glob_type();
-       dbg_info *const dbgi        = get_dbg_info(src_pos);
-       ir_type  *const elem_type   = ir_type_char;
-       ir_type  *const type        = new_type_array(1, elem_type);
-
-       ident     *const id     = id_unique(id_prefix);
-       ir_entity *const entity = new_d_entity(global_type, id, type, dbgi);
-       set_entity_ld_ident(entity, id);
-       set_entity_visibility(entity, ir_visibility_private);
-       add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
-
-       ir_mode    *const mode   = get_type_mode(elem_type);
-       const char* const string = value->begin;
-       const size_t      slen   = value->size;
+       size_t            slen;
+       ir_type          *elem_type;
+       ir_initializer_t *initializer;
+       switch (enc) {
+       case STRING_ENCODING_CHAR: {
+               slen        = value->size;
+               elem_type   = ir_type_char;
+               initializer = create_initializer_compound(slen);
+
+               ir_mode  *const mode = get_type_mode(elem_type);
+               char const     *p    = value->begin;
+               for (size_t i = 0; i < slen; ++i) {
+                       ir_tarval        *tv  = new_tarval_from_long(*p++, mode);
+                       ir_initializer_t *val = create_initializer_tarval(tv);
+                       set_initializer_compound_value(initializer, i, val);
+               }
+               goto finish;
+       }
+
+       case STRING_ENCODING_WIDE: {
+               slen        = wstrlen(value);
+               elem_type   = ir_type_wchar_t;
+               initializer = create_initializer_compound(slen);
+
+               ir_mode *const mode = get_type_mode(elem_type);
+               char const    *p    = value->begin;
+               for (size_t i = 0; i < slen; ++i) {
+                       assert(p < value->begin + value->size);
+                       utf32             v   = read_utf8_char(&p);
+                       ir_tarval        *tv  = new_tarval_from_long(v, mode);
+                       ir_initializer_t *val = create_initializer_tarval(tv);
+                       set_initializer_compound_value(initializer, i, val);
+               }
+               goto finish;
+       }
+       }
+       panic("invalid string encoding");
 
-       set_array_lower_bound_int(type, 0, 0);
-       set_array_upper_bound_int(type, 0, slen);
-       set_type_size_bytes(type, slen);
-       set_type_state(type, layout_fixed);
+finish:;
+       ir_type *const type = new_type_array(1, elem_type);
+       set_array_bounds_int(type, 0, 0, slen);
+       set_type_size_bytes( type, slen * get_type_size_bytes(elem_type));
+       set_type_state(      type, layout_fixed);
 
-       ir_initializer_t *initializer = create_initializer_compound(slen);
-       for (size_t i = 0; i < slen; ++i) {
-               ir_tarval        *tv  = new_tarval_from_long(string[i], mode);
-               ir_initializer_t *val = create_initializer_tarval(tv);
-               set_initializer_compound_value(initializer, i, val);
-       }
+       ir_type   *const global_type = get_glob_type();
+       ident     *const id          = id_unique(id_prefix);
+       dbg_info  *const dbgi        = get_dbg_info(src_pos);
+       ir_entity *const entity      = new_d_entity(global_type, id, type, dbgi);
+       set_entity_ld_ident(   entity, id);
+       set_entity_visibility( entity, ir_visibility_private);
+       add_entity_linkage(    entity, IR_LINKAGE_CONSTANT);
        set_entity_initializer(entity, initializer);
 
        return create_symconst(dbgi, entity);
@@ -3156,7 +3138,7 @@ static ir_node *function_name_to_firm(
                        const source_position_t *const src_pos = &expr->base.source_position;
                        const char    *name  = current_function_entity->base.symbol->string;
                        const string_t string = { name, strlen(name) + 1 };
-                       current_function_name = string_to_firm(src_pos, "__func__.%u", &string);
+                       current_function_name = string_to_firm(src_pos, "__func__.%u", STRING_ENCODING_CHAR, &string);
                }
                return current_function_name;
        case FUNCNAME_FUNCSIG:
@@ -3165,7 +3147,7 @@ static ir_node *function_name_to_firm(
                        ir_entity *ent = get_irg_entity(current_ir_graph);
                        const char *const name = get_entity_ld_name(ent);
                        const string_t string = { name, strlen(name) + 1 };
-                       current_funcsig = string_to_firm(src_pos, "__FUNCSIG__.%u", &string);
+                       current_funcsig = string_to_firm(src_pos, "__FUNCSIG__.%u", STRING_ENCODING_CHAR, &string);
                }
                return current_funcsig;
        }
@@ -3354,9 +3336,8 @@ static ir_node *_expression_to_firm(expression_t const *const expr)
        case EXPR_VA_ARG:                     return va_arg_expression_to_firm(       &expr->va_arge);
        case EXPR_VA_COPY:                    return va_copy_expression_to_firm(      &expr->va_copye);
        case EXPR_VA_START:                   return va_start_expression_to_firm(     &expr->va_starte);
-       case EXPR_WIDE_STRING_LITERAL:        return wide_string_literal_to_firm(     &expr->string_literal);
 
-       case EXPR_STRING_LITERAL: return string_to_firm(&expr->base.source_position, "str.%u", &expr->string_literal.value);
+       case EXPR_STRING_LITERAL: return string_to_firm(&expr->base.source_position, "str.%u", expr->string_literal.encoding, &expr->string_literal.value);
 
        case EXPR_ERROR: break;
        }
diff --git a/ast_t.h b/ast_t.h
index 6013b12..371295e 100644 (file)
--- a/ast_t.h
+++ b/ast_t.h
@@ -72,7 +72,6 @@ typedef enum expression_kind_t {
        EXPR_LITERAL_WIDE_CHARACTER,
        EXPR_LITERAL_MS_NOOP, /**< MS __noop extension */
        EXPR_STRING_LITERAL,
-       EXPR_WIDE_STRING_LITERAL,
        EXPR_COMPOUND_LITERAL,
        EXPR_CALL,
        EXPR_CONDITIONAL,
@@ -266,6 +265,7 @@ struct literal_expression_t {
 
 struct string_literal_expression_t {
        expression_base_t  base;
+       string_encoding_t  encoding;
        string_t           value;
 };
 
index fa6ffd1..4c1d670 100644 (file)
@@ -127,8 +127,7 @@ static int internal_check_printf_format(const expression_t *fmt_expr,
                return nt > nf ? nt : nf;
        }
 
-       if (fmt_expr->kind != EXPR_STRING_LITERAL
-                       && fmt_expr->kind != EXPR_WIDE_STRING_LITERAL)
+       if (fmt_expr->kind != EXPR_STRING_LITERAL)
                return -1;
 
        const char *string = fmt_expr->string_literal.value.begin;
@@ -600,8 +599,7 @@ static void check_scanf_format(const call_argument_t *arg,
                fmt_expr = fmt_expr->unary.value;
        }
 
-       if (fmt_expr->kind != EXPR_STRING_LITERAL
-                       && fmt_expr->kind != EXPR_WIDE_STRING_LITERAL)
+       if (fmt_expr->kind != EXPR_STRING_LITERAL)
                return;
 
        const char *string = fmt_expr->string_literal.value.begin;
index b2702de..a14086f 100644 (file)
--- a/parser.c
+++ b/parser.c
@@ -329,7 +329,6 @@ static size_t get_expression_struct_size(expression_kind_t kind)
                [EXPR_LITERAL_CHARACTER]          = sizeof(literal_expression_t),
                [EXPR_LITERAL_WIDE_CHARACTER]     = sizeof(literal_expression_t),
                [EXPR_STRING_LITERAL]             = sizeof(string_literal_expression_t),
-               [EXPR_WIDE_STRING_LITERAL]        = sizeof(string_literal_expression_t),
                [EXPR_COMPOUND_LITERAL]           = sizeof(compound_literal_expression_t),
                [EXPR_CALL]                       = sizeof(call_expression_t),
                [EXPR_UNARY_FIRST]                = sizeof(unary_expression_t),
@@ -1490,7 +1489,6 @@ unary:
                case EXPR_LITERAL_CASES:
                case EXPR_ERROR:
                case EXPR_STRING_LITERAL:
-               case EXPR_WIDE_STRING_LITERAL:
                case EXPR_COMPOUND_LITERAL: // TODO init?
                case EXPR_SIZEOF:
                case EXPR_CLASSIFY_TYPE:
@@ -1586,10 +1584,10 @@ static initializer_t *initializer_from_expression(type_t *orig_type,
                array_type_t *const array_type   = &type->array;
                type_t       *const element_type = skip_typeref(array_type->element_type);
 
-               if (element_type->kind == TYPE_ATOMIC) {
-                       atomic_type_kind_t akind = element_type->atomic.akind;
-                       switch (expression->kind) {
-                       case EXPR_STRING_LITERAL:
+               if (element_type->kind == TYPE_ATOMIC && expression->kind == EXPR_STRING_LITERAL) {
+                       switch (expression->string_literal.encoding) {
+                       case STRING_ENCODING_CHAR: {
+                               atomic_type_kind_t const akind = element_type->atomic.akind;
                                if (akind == ATOMIC_TYPE_CHAR
                                                || akind == ATOMIC_TYPE_SCHAR
                                                || akind == ATOMIC_TYPE_UCHAR) {
@@ -1597,8 +1595,9 @@ static initializer_t *initializer_from_expression(type_t *orig_type,
                                                        &expression->string_literal.value);
                                }
                                break;
+                       }
 
-                       case EXPR_WIDE_STRING_LITERAL: {
+                       case STRING_ENCODING_WIDE: {
                                type_t *bare_wchar_type = skip_typeref(type_wchar_t);
                                if (get_unqualified_type(element_type) == bare_wchar_type) {
                                        return initializer_from_wide_string(array_type,
@@ -1606,9 +1605,6 @@ static initializer_t *initializer_from_expression(type_t *orig_type,
                                }
                                break;
                        }
-
-                       default:
-                               break;
                        }
                }
        }
@@ -2089,9 +2085,7 @@ finish_designator:
                        }
 
                        /* handle { "string" } special case */
-                       if ((expression->kind == EXPR_STRING_LITERAL
-                                       || expression->kind == EXPR_WIDE_STRING_LITERAL)
-                                       && outer_type != NULL) {
+                       if (expression->kind == EXPR_STRING_LITERAL && outer_type != NULL) {
                                sub = initializer_from_expression(outer_type, expression);
                                if (sub != NULL) {
                                        next_if(',');
@@ -4655,7 +4649,6 @@ static bool expression_returns(expression_t const *const expr)
                case EXPR_ENUM_CONSTANT:
                case EXPR_LITERAL_CASES:
                case EXPR_STRING_LITERAL:
-               case EXPR_WIDE_STRING_LITERAL:
                case EXPR_COMPOUND_LITERAL: // TODO descend into initialisers
                case EXPR_LABEL_ADDRESS:
                case EXPR_CLASSIFY_TYPE:
@@ -5713,14 +5706,14 @@ struct expression_parser_function_t {
 
 static expression_parser_function_t expression_parsers[T_LAST_TOKEN];
 
-static type_t *get_string_type(void)
+static type_t *get_string_type(string_encoding_t const enc)
 {
-       return is_warn_on(WARN_WRITE_STRINGS) ? type_const_char_ptr : type_char_ptr;
-}
-
-static type_t *get_wide_string_type(void)
-{
-       return is_warn_on(WARN_WRITE_STRINGS) ? type_const_wchar_t_ptr : type_wchar_t_ptr;
+       bool const warn = is_warn_on(WARN_WRITE_STRINGS);
+       switch (enc) {
+       case STRING_ENCODING_CHAR: return warn ? type_const_char_ptr    : type_char_ptr;
+       case STRING_ENCODING_WIDE: return warn ? type_const_wchar_t_ptr : type_wchar_t_ptr;
+       }
+       panic("invalid string encoding");
 }
 
 /**
@@ -5728,22 +5721,10 @@ static type_t *get_wide_string_type(void)
  */
 static expression_t *parse_string_literal(void)
 {
-       string_encoding_t       enc;
-       source_position_t const pos = *HERE;
-       string_t          const res = concat_string_literals(&enc);
-
-       expression_t *literal;
-       if (enc != STRING_ENCODING_CHAR) {
-               literal = allocate_expression_zero(EXPR_WIDE_STRING_LITERAL);
-               literal->base.type = get_wide_string_type();
-       } else {
-               literal = allocate_expression_zero(EXPR_STRING_LITERAL);
-               literal->base.type = get_string_type();
-       }
-       literal->base.source_position = pos;
-       literal->string_literal.value = res;
-
-       return literal;
+       expression_t *const expr = allocate_expression_zero(EXPR_STRING_LITERAL);
+       expr->string_literal.value = concat_string_literals(&expr->string_literal.encoding);
+       expr->base.type            = get_string_type(expr->string_literal.encoding);
+       return expr;
 }
 
 /**
@@ -5992,13 +5973,9 @@ type_t *revert_automatic_type_conversion(const expression_t *expression)
        }
 
        case EXPR_STRING_LITERAL: {
-               size_t size = expression->string_literal.value.size;
-               return make_array_type(type_char, size, TYPE_QUALIFIER_NONE);
-       }
-
-       case EXPR_WIDE_STRING_LITERAL: {
-               size_t size = wstrlen(&expression->string_literal.value);
-               return make_array_type(type_wchar_t, size, TYPE_QUALIFIER_NONE);
+               size_t  const size = expression->string_literal.value.size;
+               type_t *const elem = get_unqualified_type(expression->base.type->pointer.points_to);
+               return make_array_type(elem, size, TYPE_QUALIFIER_NONE);
        }
 
        case EXPR_COMPOUND_LITERAL:
@@ -7991,8 +7968,7 @@ static void warn_string_literal_address(expression_t const* expr)
                expr = expr->unary.value;
        }
 
-       if (expr->kind == EXPR_STRING_LITERAL
-                       || expr->kind == EXPR_WIDE_STRING_LITERAL) {
+       if (expr->kind == EXPR_STRING_LITERAL) {
                source_position_t const *const pos = &expr->base.source_position;
                warningf(WARN_ADDRESS, pos, "comparison with string literal results in unspecified behaviour");
        }
@@ -8334,7 +8310,6 @@ static bool expression_has_effect(const expression_t *const expr)
                case EXPR_LITERAL_INTEGER:
                case EXPR_LITERAL_FLOATINGPOINT:
                case EXPR_STRING_LITERAL:             return false;
-               case EXPR_WIDE_STRING_LITERAL:        return false;
 
                case EXPR_CALL: {
                        const call_expression_t *const call = &expr->call;
diff --git a/walk.c b/walk.c
index 1b88790..5c65515 100644 (file)
--- a/walk.c
+++ b/walk.c
@@ -180,7 +180,6 @@ static void walk_expression(expression_t *const expr,
        case EXPR_REFERENCE:
        case EXPR_ENUM_CONSTANT:
        case EXPR_STRING_LITERAL:
-       case EXPR_WIDE_STRING_LITERAL:
        case EXPR_FUNCNAME:
        case EXPR_LABEL_ADDRESS:
        case EXPR_ERROR: