From 2fb66fd8bd2a5956ab2cad26978ccfb7e105d45f Mon Sep 17 00:00:00 2001 From: Christoph Mallon Date: Thu, 14 Jun 2012 19:12:01 +0200 Subject: [PATCH] Include string encoding in string_t. --- ast.c | 3 ++- ast2firm.c | 30 +++++++++++++++--------------- ast_t.h | 5 ++--- lexer.c | 23 +++++++++++------------ parser.c | 42 +++++++++++++++++++----------------------- preprocessor.c | 19 +++++++++---------- string_rep.c | 4 ++-- string_rep.h | 8 ++++---- token.c | 2 +- token_t.h | 5 ++--- 10 files changed, 67 insertions(+), 74 deletions(-) diff --git a/ast.c b/ast.c index e3e4bea..005d0fc 100644 --- a/ast.c +++ b/ast.c @@ -197,6 +197,8 @@ static unsigned get_expression_precedence(expression_kind_t kind) */ static void print_quoted_string(const string_t *const string, char border) { + print_string(get_string_encoding_prefix(string->encoding)); + print_char(border); const char *end = string->begin + string->size; for (const char *c = string->begin; c != end; ++c) { @@ -233,7 +235,6 @@ static void print_quoted_string(const string_t *const string, char border) static void print_string_literal(string_literal_expression_t const *const literal, char const delimiter) { - print_string(get_string_encoding_prefix(literal->encoding)); print_quoted_string(&literal->value, delimiter); } diff --git a/ast2firm.c b/ast2firm.c index 05ed110..c3a1e69 100644 --- a/ast2firm.c +++ b/ast2firm.c @@ -1101,12 +1101,12 @@ static ir_node *create_conv(dbg_info *dbgi, ir_node *value, ir_mode *dest_mode) * @param id_prefix a prefix for the name of the generated string constant * @param value the value of the string constant */ -static ir_node *string_to_firm(source_position_t const *const src_pos, char const *const id_prefix, string_encoding_t const enc, string_t const *const value) +static ir_node *string_to_firm(source_position_t const *const src_pos, char const *const id_prefix, string_t const *const value) { - size_t const slen = get_string_len(enc, value) + 1; + size_t const slen = get_string_len(value) + 1; ir_initializer_t *const initializer = create_initializer_compound(slen); ir_type * elem_type; - switch (enc) { + switch (value->encoding) { case STRING_ENCODING_CHAR: { elem_type = ir_type_char; @@ -1283,7 +1283,7 @@ static ir_node *char_literal_to_firm(string_literal_expression_t const *literal) size_t size = literal->value.size; ir_tarval *tv; - switch (literal->encoding) { + switch (literal->value.encoding) { case STRING_ENCODING_WIDE: { utf32 v = read_utf8_char(&string); char buf[128]; @@ -3140,19 +3140,19 @@ static ir_node *function_name_to_firm( case FUNCNAME_PRETTY_FUNCTION: case FUNCNAME_FUNCDNAME: if (current_function_name == NULL) { - const source_position_t *const src_pos = &expr->base.source_position; - const char *name = current_function_entity->base.symbol->string; - const string_t string = { name, strlen(name) }; - current_function_name = string_to_firm(src_pos, "__func__.%u", STRING_ENCODING_CHAR, &string); + source_position_t const *const src_pos = &expr->base.source_position; + char const *const name = current_function_entity->base.symbol->string; + string_t const string = { name, strlen(name), STRING_ENCODING_CHAR }; + current_function_name = string_to_firm(src_pos, "__func__.%u", &string); } return current_function_name; case FUNCNAME_FUNCSIG: if (current_funcsig == NULL) { - const source_position_t *const src_pos = &expr->base.source_position; - ir_entity *ent = get_irg_entity(current_ir_graph); - const char *const name = get_entity_ld_name(ent); - const string_t string = { name, strlen(name) }; - current_funcsig = string_to_firm(src_pos, "__FUNCSIG__.%u", STRING_ENCODING_CHAR, &string); + source_position_t const *const src_pos = &expr->base.source_position; + ir_entity *const ent = get_irg_entity(current_ir_graph); + char const *const name = get_entity_ld_name(ent); + string_t const string = { name, strlen(name), STRING_ENCODING_CHAR }; + current_funcsig = string_to_firm(src_pos, "__FUNCSIG__.%u", &string); } return current_funcsig; } @@ -3343,7 +3343,7 @@ static ir_node *_expression_to_firm(expression_t const *const expr) case EXPR_VA_COPY: return va_copy_expression_to_firm( &expr->va_copye); case EXPR_VA_START: return va_start_expression_to_firm( &expr->va_starte); - case EXPR_STRING_LITERAL: return string_to_firm(&expr->base.source_position, "str.%u", expr->string_literal.encoding, &expr->string_literal.value); + case EXPR_STRING_LITERAL: return string_to_firm(&expr->base.source_position, "str.%u", &expr->string_literal.value); case EXPR_ERROR: break; } @@ -3863,7 +3863,7 @@ static ir_initializer_t *create_ir_initializer_string(initializer_t const *const ir_initializer_t *const irinit = create_initializer_compound(arr_len); ir_mode *const mode = get_ir_mode_storage(type->array.element_type); char const * p = str->value.begin; - switch (str->encoding) { + switch (str->value.encoding) { case STRING_ENCODING_CHAR: for (size_t i = 0; i != arr_len; ++i) { char const c = i < str_len ? *p++ : 0; diff --git a/ast_t.h b/ast_t.h index 6f68d8f..74ccf40 100644 --- a/ast_t.h +++ b/ast_t.h @@ -264,9 +264,8 @@ struct literal_expression_t { * string and character literals */ struct string_literal_expression_t { - expression_base_t base; - string_encoding_t encoding; - string_t value; + expression_base_t base; + string_t value; }; struct funcname_expression_t { diff --git a/lexer.c b/lexer.c index cca5b77..d368aed 100644 --- a/lexer.c +++ b/lexer.c @@ -402,7 +402,7 @@ end_symbol: } } -static string_t sym_make_string(void) +static string_t sym_make_string(string_encoding_t const enc) { obstack_1grow(&symbol_obstack, '\0'); size_t const len = obstack_object_size(&symbol_obstack) - 1; @@ -417,7 +417,7 @@ static string_t sym_make_string(void) #else const char *result = string; #endif - return (string_t) {result, len}; + return (string_t){ result, len, enc }; } /** @@ -444,7 +444,7 @@ finish_suffix: return; } - lexer_token.number.suffix = sym_make_string(); + lexer_token.number.suffix = sym_make_string(STRING_ENCODING_CHAR); } static void parse_exponent(void) @@ -500,7 +500,7 @@ static void parse_number_hex(void) "hexadecimal floatingpoint constant requires an exponent"); } - lexer_token.number.number = sym_make_string(); + lexer_token.number.number = sym_make_string(STRING_ENCODING_CHAR); lexer_token.kind = is_float ? T_FLOATINGPOINT : T_INTEGER; @@ -523,7 +523,7 @@ static void parse_number_bin(void) next_char(); } - lexer_token.number.number = sym_make_string(); + lexer_token.number.number = sym_make_string(STRING_ENCODING_CHAR); lexer_token.kind = T_INTEGER; if (!has_digits) { @@ -596,7 +596,7 @@ static void parse_number(void) parse_exponent(); } - lexer_token.number.number = sym_make_string(); + lexer_token.number.number = sym_make_string(STRING_ENCODING_CHAR); if (is_float) { lexer_token.kind = T_FLOATINGPOINT; @@ -744,7 +744,7 @@ static utf32 parse_escape_sequence(void) string_t make_string(const char *string) { obstack_grow(&symbol_obstack, string, strlen(string)); - return sym_make_string(); + return sym_make_string(STRING_ENCODING_CHAR); } static void parse_string(utf32 const delim, token_kind_t const kind, string_encoding_t const enc, char const *const context) @@ -787,9 +787,8 @@ static void parse_string(utf32 const delim, token_kind_t const kind, string_enco } end_of_string: - lexer_token.kind = kind; - lexer_token.string.encoding = enc; - lexer_token.string.string = sym_make_string(); + lexer_token.kind = kind; + lexer_token.string.string = sym_make_string(enc); } /** @@ -912,8 +911,8 @@ static void parse_line_directive(void) lexer_pos.lineno = atoi(pp_token.number.number.begin) - 1; next_pp_token(); } - if (pp_token.kind == T_STRING_LITERAL && pp_token.string.encoding == STRING_ENCODING_CHAR) { - lexer_pos.input_name = pp_token.string.string.begin; + if (pp_token.kind == T_STRING_LITERAL && pp_token.string.string.encoding == STRING_ENCODING_CHAR) { + lexer_pos.input_name = pp_token.string.string.begin; lexer_pos.is_system_header = false; next_pp_token(); diff --git a/parser.c b/parser.c index 3fb68d8..e2e560a 100644 --- a/parser.c +++ b/parser.c @@ -1038,51 +1038,49 @@ static void append_string(string_t const *const s) obstack_grow(&ast_obstack, s->begin, s->size); } -static string_t finish_string(void) +static string_t finish_string(string_encoding_t const enc) { obstack_1grow(&ast_obstack, '\0'); size_t const size = obstack_object_size(&ast_obstack) - 1; char const *const string = obstack_finish(&ast_obstack); - return (string_t){ string, size }; + return (string_t){ string, size, enc }; } -static string_t concat_string_literals(string_encoding_t *const out_enc) +static string_t concat_string_literals(void) { assert(token.kind == T_STRING_LITERAL); - string_t result; - string_encoding_t enc = token.string.encoding; + string_t result; if (look_ahead(1)->kind == T_STRING_LITERAL) { append_string(&token.string.string); eat(T_STRING_LITERAL); warningf(WARN_TRADITIONAL, HERE, "traditional C rejects string constant concatenation"); + string_encoding_t enc = token.string.string.encoding; do { - if (token.string.encoding != STRING_ENCODING_CHAR) { - enc = token.string.encoding; + if (token.string.string.encoding != STRING_ENCODING_CHAR) { + enc = token.string.string.encoding; } append_string(&token.string.string); eat(T_STRING_LITERAL); } while (token.kind == T_STRING_LITERAL); - result = finish_string(); + result = finish_string(enc); } else { result = token.string.string; eat(T_STRING_LITERAL); } - *out_enc = enc; return result; } static string_t parse_string_literals(char const *const context) { if (!skip_till(T_STRING_LITERAL, context)) - return (string_t){ "", 0 }; + return (string_t){ "", 0, STRING_ENCODING_CHAR }; - string_encoding_t enc; source_position_t const pos = *HERE; - string_t const res = concat_string_literals(&enc); + string_t const res = concat_string_literals(); - if (enc != STRING_ENCODING_CHAR) { + if (res.encoding != STRING_ENCODING_CHAR) { errorf(&pos, "expected plain string literal, got wide string literal"); } @@ -1557,7 +1555,7 @@ static initializer_t *initializer_from_expression(type_t *orig_type, if (expression->kind == EXPR_STRING_LITERAL && is_type_array(type)) { array_type_t *const array_type = &type->array; type_t *const element_type = skip_typeref(array_type->element_type); - switch (expression->string_literal.encoding) { + switch (expression->string_literal.value.encoding) { case STRING_ENCODING_CHAR: { if (is_type_atomic(element_type, ATOMIC_TYPE_CHAR) || is_type_atomic(element_type, ATOMIC_TYPE_SCHAR) || @@ -2206,8 +2204,7 @@ static initializer_t *parse_initializer(parse_initializer_env_t *env) break; case INITIALIZER_STRING: { - string_literal_expression_t const *const str = get_init_string(result); - size = get_string_len(str->encoding, &str->value) + 1; + size = get_string_len(&get_init_string(result)->value) + 1; break; } @@ -5687,8 +5684,8 @@ static type_t *get_string_type(string_encoding_t const enc) static expression_t *parse_string_literal(void) { expression_t *const expr = allocate_expression_zero(EXPR_STRING_LITERAL); - expr->string_literal.value = concat_string_literals(&expr->string_literal.encoding); - expr->base.type = get_string_type(expr->string_literal.encoding); + expr->string_literal.value = concat_string_literals(); + expr->base.type = get_string_type(expr->string_literal.value.encoding); return expr; } @@ -5818,11 +5815,10 @@ static expression_t *parse_number_literal(void) static expression_t *parse_character_constant(void) { expression_t *const literal = allocate_expression_zero(EXPR_LITERAL_CHARACTER); - literal->string_literal.encoding = token.string.encoding; - literal->string_literal.value = token.string.string; + literal->string_literal.value = token.string.string; - size_t const size = get_string_len(token.string.encoding, &token.string.string); - switch (token.string.encoding) { + size_t const size = get_string_len(&token.string.string); + switch (token.string.string.encoding) { case STRING_ENCODING_CHAR: literal->base.type = c_mode & _CXX ? type_char : type_int; if (size > 1) { @@ -5932,7 +5928,7 @@ type_t *revert_automatic_type_conversion(const expression_t *expression) } case EXPR_STRING_LITERAL: { - size_t const size = get_string_len(expression->string_literal.encoding, &expression->string_literal.value) + 1; + size_t const size = get_string_len(&expression->string_literal.value) + 1; type_t *const elem = get_unqualified_type(expression->base.type->pointer.points_to); return make_array_type(elem, size, TYPE_QUALIFIER_NONE); } diff --git a/preprocessor.c b/preprocessor.c index a9b246d..3829329 100644 --- a/preprocessor.c +++ b/preprocessor.c @@ -451,13 +451,13 @@ static const char *identify_string(char *string) return result; } -static string_t sym_make_string(void) +static string_t sym_make_string(string_encoding_t const enc) { obstack_1grow(&symbol_obstack, '\0'); size_t const len = obstack_object_size(&symbol_obstack) - 1; char *const string = obstack_finish(&symbol_obstack); char const *const result = identify_string(string); - return (string_t) {result, len}; + return (string_t){ result, len, enc }; } static void parse_string(utf32 const delimiter, preprocessor_token_kind_t const kind, string_encoding_t const enc, char const *const context) @@ -513,9 +513,8 @@ static void parse_string(utf32 const delimiter, preprocessor_token_kind_t const } end_of_string: - pp_token.kind = kind; - pp_token.string.encoding = enc; - pp_token.string.string = sym_make_string(); + pp_token.kind = kind; + pp_token.string.string = sym_make_string(enc); } static void parse_string_literal(string_encoding_t const enc) @@ -824,7 +823,7 @@ static void parse_number(void) end_number: pp_token.kind = TP_NUMBER; - pp_token.number.number = sym_make_string(); + pp_token.number.number = sym_make_string(STRING_ENCODING_CHAR); } @@ -1138,14 +1137,14 @@ static void emit_pp_token(void) break; case TP_STRING_LITERAL: - fputs(get_string_encoding_prefix(pp_token.string.encoding), out); + fputs(get_string_encoding_prefix(pp_token.string.string.encoding), out); fputc('"', out); fputs(pp_token.string.string.begin, out); fputc('"', out); break; case TP_CHARACTER_CONSTANT: - fputs(get_string_encoding_prefix(pp_token.string.encoding), out); + fputs(get_string_encoding_prefix(pp_token.string.string.encoding), out); fputc('\'', out); fputs(pp_token.string.string.begin, out); fputc('\'', out); @@ -1344,7 +1343,7 @@ static void parse_undef_directive(void) static void parse_headername(void) { const source_position_t start_position = input.position; - string_t string = {NULL, 0}; + string_t string = { NULL, 0, STRING_ENCODING_CHAR }; assert(obstack_object_size(&symbol_obstack) == 0); /* behind an #include we can have the special headername lexems. @@ -1391,7 +1390,7 @@ parse_name: } finished_headername: - string = sym_make_string(); + string = sym_make_string(STRING_ENCODING_CHAR); finish_error: pp_token.base.source_position = start_position; diff --git a/string_rep.c b/string_rep.c index 19c59da..ff58aad 100644 --- a/string_rep.c +++ b/string_rep.c @@ -13,9 +13,9 @@ static inline size_t wstrlen(const string_t *string) return result; } -size_t get_string_len(string_encoding_t const enc, string_t const *const str) +size_t get_string_len(string_t const *const str) { - switch (enc) { + switch (str->encoding) { case STRING_ENCODING_CHAR: return str->size; case STRING_ENCODING_WIDE: return wstrlen(str); } diff --git a/string_rep.h b/string_rep.h index fd6c00b..c0868eb 100644 --- a/string_rep.h +++ b/string_rep.h @@ -30,11 +30,11 @@ enum string_encoding_t { typedef enum string_encoding_t string_encoding_t; typedef struct string_t { - const char *begin; /**< UTF-8 encoded string, the last character is - * guaranteed to be 0 */ - size_t size; /**< size of string in bytes (not characters) */ + char const *begin; /**< UTF-8 encoded string, the last character is guaranteed to be \0. */ + size_t size; /**< size of string in bytes (not characters), without terminating \0. */ + string_encoding_t encoding; } string_t; -size_t get_string_len(string_encoding_t enc, string_t const *str); +size_t get_string_len(string_t const *str); #endif diff --git a/token.c b/token.c index 5d97f92..8184554 100644 --- a/token.c +++ b/token.c @@ -161,7 +161,7 @@ void print_token(FILE *f, const token_t *token) case T_CHARACTER_CONSTANT: delim = '\''; goto print_string; print_string: print_token_kind(f, (token_kind_t)token->kind); - fprintf(f, " %s%c", get_string_encoding_prefix(token->string.encoding), delim); + fprintf(f, " %s%c", get_string_encoding_prefix(token->string.string.encoding), delim); print_stringrep(&token->string.string, f); fputc(delim, f); break; diff --git a/token_t.h b/token_t.h index 5d66af4..7de0e6a 100644 --- a/token_t.h +++ b/token_t.h @@ -71,9 +71,8 @@ struct token_base_t { }; struct string_literal_t { - token_base_t base; - string_encoding_t encoding; - string_t string; + token_base_t base; + string_t string; }; struct number_literal_t { -- 2.20.1