static const backend_params *be_params;
static ir_type *ir_type_char;
-static ir_type *ir_type_wchar_t;
/* architecture specific floating point arithmetic mode (if any) */
static ir_mode *mode_float_arithmetic;
ir_initializer_t *const initializer = create_initializer_compound(slen);
ir_type * elem_type;
switch (value->encoding) {
- case STRING_ENCODING_CHAR: {
+ case STRING_ENCODING_CHAR:
+ case STRING_ENCODING_UTF8: {
elem_type = ir_type_char;
ir_mode *const mode = get_type_mode(elem_type);
goto finish;
}
- case STRING_ENCODING_WIDE: {
- elem_type = ir_type_wchar_t;
+ {
+ type_t *type;
+ case STRING_ENCODING_CHAR16: type = type_char16_t; goto init_wide;
+ case STRING_ENCODING_CHAR32: type = type_char32_t; goto init_wide;
+ case STRING_ENCODING_WIDE: type = type_wchar_t; goto init_wide;
+init_wide:;
+ elem_type = get_ir_type(type);
ir_mode *const mode = get_type_mode(elem_type);
char const *p = value->begin;
char const * p = str->value.begin;
switch (str->value.encoding) {
case STRING_ENCODING_CHAR:
+ case STRING_ENCODING_UTF8:
for (size_t i = 0; i != arr_len; ++i) {
char const c = i < str_len ? *p++ : 0;
ir_tarval *const tv = new_tarval_from_long(c, mode);
}
break;
+ case STRING_ENCODING_CHAR16:
+ case STRING_ENCODING_CHAR32:
case STRING_ENCODING_WIDE:
for (size_t i = 0; i != arr_len; ++i) {
utf32 const c = i < str_len ? read_utf8_char(&p) : 0;
return;
ir_types_initialized = 1;
- ir_type_char = get_ir_type(type_char);
- ir_type_wchar_t = get_ir_type(type_wchar_t);
+ ir_type_char = get_ir_type(type_char);
be_params = be_get_backend_param();
mode_float_arithmetic = be_params->mode_float_arithmetic;
warningf(WARN_TRADITIONAL, HERE, "traditional C rejects string constant concatenation");
string_encoding_t enc = token.literal.string.encoding;
do {
- if (token.literal.string.encoding != STRING_ENCODING_CHAR) {
- enc = token.literal.string.encoding;
+ string_encoding_t const new_enc = token.literal.string.encoding;
+ if (new_enc != enc && new_enc != STRING_ENCODING_CHAR) {
+ if (enc == STRING_ENCODING_CHAR) {
+ enc = new_enc;
+ } else {
+ errorf(HERE, "concatenating string literals with encodings %s and %s", get_string_encoding_prefix(enc), get_string_encoding_prefix(new_enc));
+ }
}
append_string(&token.literal.string);
eat(T_STRING_LITERAL);
string_t const res = concat_string_literals();
if (res.encoding != STRING_ENCODING_CHAR) {
- errorf(&pos, "expected plain string literal, got wide string literal");
+ errorf(&pos, "expected plain string literal, got %s string literal", get_string_encoding_prefix(res.encoding));
}
return res;
array_type_t *const array_type = &type->array;
type_t *const element_type = skip_typeref(array_type->element_type);
switch (expression->string_literal.value.encoding) {
- case STRING_ENCODING_CHAR: {
+ case STRING_ENCODING_CHAR:
+ case STRING_ENCODING_UTF8: {
if (is_type_atomic(element_type, ATOMIC_TYPE_CHAR) ||
is_type_atomic(element_type, ATOMIC_TYPE_SCHAR) ||
is_type_atomic(element_type, ATOMIC_TYPE_UCHAR)) {
break;
}
+ case STRING_ENCODING_CHAR16:
+ case STRING_ENCODING_CHAR32:
case STRING_ENCODING_WIDE: {
- type_t *bare_wchar_type = skip_typeref(type_wchar_t);
- if (get_unqualified_type(element_type) == bare_wchar_type) {
+ assert(is_type_pointer(expression->base.type));
+ type_t *const init_type = get_unqualified_type(expression->base.type->pointer.points_to);
+ if (types_compatible(get_unqualified_type(element_type), init_type)) {
make_string_init:;
initializer_t *const init = allocate_initializer_zero(INITIALIZER_STRING);
init->value.value = expression;
{
bool const warn = is_warn_on(WARN_WRITE_STRINGS);
switch (enc) {
- case STRING_ENCODING_CHAR: return warn ? type_const_char_ptr : type_char_ptr;
- case STRING_ENCODING_WIDE: return warn ? type_const_wchar_t_ptr : type_wchar_t_ptr;
+ case STRING_ENCODING_CHAR:
+ case STRING_ENCODING_UTF8: return warn ? type_const_char_ptr : type_char_ptr;
+ case STRING_ENCODING_CHAR16: return warn ? type_char16_t_const_ptr : type_char16_t_ptr;
+ case STRING_ENCODING_CHAR32: return warn ? type_char32_t_const_ptr : type_char32_t_ptr;
+ case STRING_ENCODING_WIDE: return warn ? type_const_wchar_t_ptr : type_wchar_t_ptr;
}
panic("invalid string encoding");
}
size_t const size = get_string_len(&token.literal.string);
switch (token.literal.string.encoding) {
case STRING_ENCODING_CHAR:
+ case STRING_ENCODING_UTF8:
literal->base.type = c_mode & _CXX ? type_char : type_int;
if (size > 1) {
if (!GNU_MODE && !(c_mode & _C99)) {
}
break;
- case STRING_ENCODING_WIDE:
- literal->base.type = type_int;
+ case STRING_ENCODING_CHAR16: literal->base.type = type_char16_t; goto warn_multi;
+ case STRING_ENCODING_CHAR32: literal->base.type = type_char32_t; goto warn_multi;
+ case STRING_ENCODING_WIDE: literal->base.type = type_wchar_t; goto warn_multi;
+warn_multi:
if (size > 1) {
warningf(WARN_MULTICHAR, HERE, "multi-character character constant");
}
static symbol_t *symbol_percentcolonpercentcolon;
static symbol_t *symbol_percentgreater;
+static symbol_t *symbol_L;
+static symbol_t *symbol_U;
+static symbol_t *symbol_u;
+static symbol_t *symbol_u8;
+
static void init_symbols(void)
{
symbol_colongreater = symbol_table_insert(":>");
symbol_percentcolon = symbol_table_insert("%:");
symbol_percentcolonpercentcolon = symbol_table_insert("%:%:");
symbol_percentgreater = symbol_table_insert("%>");
+
+ symbol_L = symbol_table_insert("L");
+ symbol_U = symbol_table_insert("U");
+ symbol_u = symbol_table_insert("u");
+ symbol_u8 = symbol_table_insert("u8");
}
void switch_pp_input(FILE *const file, char const *const filename, searchpath_entry_t *const path, bool const is_system_header)
return sym_make_string(STRING_ENCODING_CHAR);
}
+static utf32 get_string_encoding_limit(string_encoding_t const enc)
+{
+ switch (enc) {
+ case STRING_ENCODING_CHAR: return 0xFF;
+ case STRING_ENCODING_CHAR16: return 0xFFFF;
+ case STRING_ENCODING_CHAR32: return 0xFFFFFFFF;
+ case STRING_ENCODING_UTF8: return 0xFFFFFFFF;
+ case STRING_ENCODING_WIDE: return 0xFFFFFFFF; // FIXME depends on settings
+ }
+ panic("invalid string encoding");
+}
+
static void parse_string(utf32 const delimiter, token_kind_t const kind,
string_encoding_t const enc,
char const *const context)
eat(delimiter);
+ utf32 const limit = get_string_encoding_limit(enc);
while (true) {
switch (input.c) {
case '\\': {
if (resolve_escape_sequences) {
utf32 const tc = parse_escape_sequence();
+ if (tc > limit) {
+ warningf(WARN_OTHER, &pp_token.base.source_position, "escape sequence out of range");
+ }
if (enc == STRING_ENCODING_CHAR) {
- if (tc >= 0x100) {
- warningf(WARN_OTHER, &pp_token.base.source_position, "escape sequence out of range");
- }
obstack_1grow(&symbol_obstack, tc);
} else {
obstack_grow_utf8(&symbol_obstack, tc);
next_input_token();
}
+static string_encoding_t identify_encoding_prefix(symbol_t *const sym)
+{
+ if (sym == symbol_L) return STRING_ENCODING_WIDE;
+ if (c_mode & _C11) {
+ if (sym == symbol_U) return STRING_ENCODING_CHAR32;
+ if (sym == symbol_u) return STRING_ENCODING_CHAR16;
+ if (sym == symbol_u8) return STRING_ENCODING_UTF8;
+ }
+ return STRING_ENCODING_CHAR;
+}
+
static void parse_symbol(void)
{
assert(obstack_object_size(&symbol_obstack) == 0);
obstack_1grow(&symbol_obstack, '\0');
char *string = obstack_finish(&symbol_obstack);
- /* might be a wide string or character constant ( L"string"/L'c' ) */
- if (input.c == '"' && string[0] == 'L' && string[1] == '\0') {
- obstack_free(&symbol_obstack, string);
- parse_string_literal(STRING_ENCODING_WIDE);
- return;
- } else if (input.c == '\'' && string[0] == 'L' && string[1] == '\0') {
- obstack_free(&symbol_obstack, string);
- parse_character_constant(STRING_ENCODING_WIDE);
- return;
- }
-
symbol_t *symbol = symbol_table_insert(string);
+ /* Might be a prefixed string or character constant: L/U/u/u8"string". */
+ if (input.c == '"') {
+ string_encoding_t const enc = identify_encoding_prefix(symbol);
+ if (enc != STRING_ENCODING_CHAR) {
+ parse_string_literal(enc);
+ return;
+ }
+ } else if (input.c == '\'') {
+ string_encoding_t const enc = identify_encoding_prefix(symbol);
+ if (enc != STRING_ENCODING_CHAR) {
+ if (enc == STRING_ENCODING_UTF8) {
+ errorf(&pp_token.base.source_position, "'u8' is not a valid encoding for a chracter constant");
+ }
+ parse_character_constant(enc);
+ return;
+ }
+ }
+
pp_token.kind = symbol->ID;
pp_token.base.symbol = symbol;
size_t get_string_len(string_t const *const str)
{
switch (str->encoding) {
- case STRING_ENCODING_CHAR: return str->size;
- case STRING_ENCODING_WIDE: return wstrlen(str);
+ case STRING_ENCODING_CHAR:
+ case STRING_ENCODING_UTF8: return str->size;
+ case STRING_ENCODING_CHAR16:
+ case STRING_ENCODING_CHAR32:
+ case STRING_ENCODING_WIDE: return wstrlen(str);
}
panic("invalid string encoding");
}
enum string_encoding_t {
STRING_ENCODING_CHAR,
+ STRING_ENCODING_CHAR16,
+ STRING_ENCODING_CHAR32,
+ STRING_ENCODING_UTF8,
STRING_ENCODING_WIDE
};
typedef enum string_encoding_t string_encoding_t;
char const *get_string_encoding_prefix(string_encoding_t const enc)
{
switch (enc) {
- case STRING_ENCODING_CHAR: return "";
- case STRING_ENCODING_WIDE: return "L";
+ case STRING_ENCODING_CHAR: return "";
+ case STRING_ENCODING_CHAR16: return "u";
+ case STRING_ENCODING_CHAR32: return "U";
+ case STRING_ENCODING_UTF8: return "u8";
+ case STRING_ENCODING_WIDE: return "L";
}
panic("invalid string encoding");
}
type_t *type_char_ptr_ptr;
+type_t *type_char16_t;
+type_t *type_char32_t;
+type_t *type_char16_t_const;
+type_t *type_char32_t_const;
type_t *type_intmax_t;
type_t *type_ptrdiff_t;
type_t *type_size_t;
type_t *type_int32_t;
type_t *type_int64_t;
+type_t *type_char16_t_ptr;
+type_t *type_char32_t_ptr;
+type_t *type_char16_t_const_ptr;
+type_t *type_char32_t_const_ptr;
type_t *type_intmax_t_ptr;
type_t *type_ptrdiff_t_ptr;
type_t *type_ssize_t_ptr;
type_wchar_t_ptr = make_pointer_type(type_wchar_t, TYPE_QUALIFIER_NONE);
type_const_wchar_t_ptr
= make_pointer_type(type_const_wchar_t, TYPE_QUALIFIER_NONE);
+
+ atomic_type_kind_t const u2 = find_unsigned_int_atomic_type_kind_for_size(2);
+ type_char16_t = make_atomic_type(u2, TYPE_QUALIFIER_NONE);
+ type_char16_t_const = make_atomic_type(u2, TYPE_QUALIFIER_CONST);
+ type_char16_t_ptr = make_pointer_type(type_char16_t, TYPE_QUALIFIER_NONE);
+ type_char16_t_const_ptr = make_pointer_type(type_char16_t_const, TYPE_QUALIFIER_NONE);
+
+ atomic_type_kind_t const u4 = find_unsigned_int_atomic_type_kind_for_size(4);
+ type_char32_t = make_atomic_type(u4, TYPE_QUALIFIER_NONE);
+ type_char32_t_const = make_atomic_type(u4, TYPE_QUALIFIER_CONST);
+ type_char32_t_ptr = make_pointer_type(type_char32_t, TYPE_QUALIFIER_NONE);
+ type_char32_t_const_ptr = make_pointer_type(type_char32_t_const, TYPE_QUALIFIER_NONE);
}
extern type_t *type_char_ptr_ptr;
+extern type_t *type_char16_t;
+extern type_t *type_char32_t;
+extern type_t *type_char16_t_const;
+extern type_t *type_char32_t_const;
extern type_t *type_intmax_t;
extern type_t *type_ptrdiff_t;
extern type_t *type_size_t;
extern type_t *type_int32_t;
extern type_t *type_int64_t;
+extern type_t *type_char16_t_ptr;
+extern type_t *type_char32_t_ptr;
+extern type_t *type_char16_t_const_ptr;
+extern type_t *type_char32_t_const_ptr;
extern type_t *type_intmax_t_ptr;
extern type_t *type_ptrdiff_t_ptr;
extern type_t *type_ssize_t_ptr;