/**
* Parse a string literal and set lexer_token.
*/
-static void parse_string_literal(void)
+static void parse_string_literal(string_encoding_t const enc)
{
eat('"');
const size_t size = (size_t)obstack_object_size(&symbol_obstack);
char *string = obstack_finish(&symbol_obstack);
- lexer_token.kind = T_STRING_LITERAL;
- lexer_token.string.string = identify_string(string, size);
+ lexer_token.kind = T_STRING_LITERAL;
+ lexer_token.string.encoding = enc;
+ lexer_token.string.string = identify_string(string, size);
}
/**
}
}
-/**
- * Parse a wide string literal and set lexer_token.
- */
-static void parse_wide_string_literal(void)
-{
- parse_string_literal();
- if (lexer_token.kind == T_STRING_LITERAL)
- lexer_token.kind = T_WIDE_STRING_LITERAL;
-}
-
/**
* Parse a character constant and set lexer_token.
*/
lexer_pos.lineno = atoi(pp_token.number.number.begin) - 1;
next_pp_token();
}
- if (pp_token.kind == T_STRING_LITERAL) {
+ if (pp_token.kind == T_STRING_LITERAL && pp_token.string.encoding == STRING_ENCODING_CHAR) {
lexer_pos.input_name = pp_token.string.string.begin;
lexer_pos.is_system_header = false;
next_pp_token();
return;
)
- SYMBOL_CHARS
+ SYMBOL_CHARS {
parse_symbol();
/* might be a wide string ( L"string" ) */
+ string_encoding_t const enc = STRING_ENCODING_WIDE;
if (lexer_token.base.symbol == symbol_L) {
switch (c) {
- case '"': parse_wide_string_literal(); break;
+ case '"': parse_string_literal(enc); break;
case '\'': parse_wide_character_constant(); break;
}
}
return;
+ }
DIGITS
parse_number();
return;
case '"':
- parse_string_literal();
+ parse_string_literal(STRING_ENCODING_CHAR);
return;
case '\'':
case T_PLUSPLUS: \
case T_STRING_LITERAL: \
case T_WIDE_CHARACTER_CONSTANT: \
- case T_WIDE_STRING_LITERAL: \
case T___FUNCDNAME__: \
case T___FUNCSIG__: \
case T___FUNCTION__: \
return (string_t){ string, size };
}
-static string_t concat_string_literals(bool *const out_is_wide)
+static string_t concat_string_literals(string_encoding_t *const out_enc)
{
- assert(token.kind == T_STRING_LITERAL || token.kind == T_WIDE_STRING_LITERAL);
+ assert(token.kind == T_STRING_LITERAL);
- string_t result;
- bool is_wide = token.kind == T_WIDE_STRING_LITERAL;
- token_kind_t const la1 = (token_kind_t)look_ahead(1)->kind;
- if (la1 == T_STRING_LITERAL || la1 == T_WIDE_STRING_LITERAL) {
+ string_t result;
+ string_encoding_t enc = token.string.encoding;
+ if (look_ahead(1)->kind == T_STRING_LITERAL) {
append_string(&token.string.string);
- next_token();
+ eat(T_STRING_LITERAL);
warningf(WARN_TRADITIONAL, HERE, "traditional C rejects string constant concatenation");
do {
- is_wide |= token.kind == T_WIDE_STRING_LITERAL;
+ if (token.string.encoding != STRING_ENCODING_CHAR) {
+ enc = token.string.encoding;
+ }
append_string(&token.string.string);
- next_token();
- } while (token.kind == T_STRING_LITERAL || token.kind == T_WIDE_STRING_LITERAL);
+ eat(T_STRING_LITERAL);
+ } while (token.kind == T_STRING_LITERAL);
result = finish_string();
} else {
result = token.string.string;
- next_token();
+ eat(T_STRING_LITERAL);
}
- *out_is_wide = is_wide;
+ *out_enc = enc;
return result;
}
static string_t parse_string_literals(void)
{
- bool is_wide;
+ string_encoding_t enc;
source_position_t const pos = *HERE;
- string_t const res = concat_string_literals(&is_wide);
+ string_t const res = concat_string_literals(&enc);
- if (is_wide) {
+ if (enc != STRING_ENCODING_CHAR) {
errorf(&pos, "expected plain string literal, got wide string literal");
}
*/
static expression_t *parse_string_literal(void)
{
- bool is_wide;
+ string_encoding_t enc;
source_position_t const pos = *HERE;
- string_t const res = concat_string_literals(&is_wide);
+ string_t const res = concat_string_literals(&enc);
expression_t *literal;
- if (is_wide) {
+ if (enc != STRING_ENCODING_CHAR) {
literal = allocate_expression_zero(EXPR_WIDE_STRING_LITERAL);
literal->base.type = get_wide_string_type();
} else {
case T_FLOATINGPOINT: return parse_number_literal();
case T_CHARACTER_CONSTANT: return parse_character_constant();
case T_WIDE_CHARACTER_CONSTANT: return parse_wide_character_constant();
- case T_STRING_LITERAL:
- case T_WIDE_STRING_LITERAL: return parse_string_literal();
+ case T_STRING_LITERAL: return parse_string_literal();
case T___FUNCTION__:
case T___func__: return parse_function_keyword(FUNCNAME_FUNCTION);
case T___PRETTY_FUNCTION__: return parse_function_keyword(FUNCNAME_PRETTY_FUNCTION);
add_anchor_token(T_PLUSPLUS);
add_anchor_token(T_STRING_LITERAL);
add_anchor_token(T_WIDE_CHARACTER_CONSTANT);
- add_anchor_token(T_WIDE_STRING_LITERAL);
add_anchor_token(T__Bool);
add_anchor_token(T__Complex);
add_anchor_token(T__Imaginary);
rem_anchor_token(T__Imaginary);
rem_anchor_token(T__Complex);
rem_anchor_token(T__Bool);
- rem_anchor_token(T_WIDE_STRING_LITERAL);
rem_anchor_token(T_WIDE_CHARACTER_CONSTANT);
rem_anchor_token(T_STRING_LITERAL);
rem_anchor_token(T_PLUSPLUS);
#include "symbol.h"
#include "lang_features.h"
#include "adt/array.h"
+#include "adt/error.h"
#include "adt/util.h"
static symbol_t *token_symbols[T_LAST_TOKEN];
}
}
+char const *get_string_encoding_prefix(string_encoding_t const enc)
+{
+ switch (enc) {
+ case STRING_ENCODING_CHAR: return "";
+ case STRING_ENCODING_WIDE: return "L";
+ }
+ panic("invalid string encoding");
+}
+
static void print_stringrep(const string_t *string, FILE *f)
{
for (size_t i = 0; i < string->size; ++i) {
print_stringrep(&token->number.suffix, f);
fputc('\'', f);
break;
- case T_WIDE_STRING_LITERAL:
+
case T_STRING_LITERAL:
print_token_kind(f, (token_kind_t)token->kind);
- fprintf(f, " \"%s\"", token->string.string.begin);
+ fprintf(f, " %s\"%s\"", get_string_encoding_prefix(token->string.encoding), token->string.string.begin);
break;
+
case T_CHARACTER_CONSTANT:
case T_WIDE_CHARACTER_CONSTANT:
print_token_kind(f, (token_kind_t)token->kind);
symbol_t *symbol;
};
+enum string_encoding_t {
+ STRING_ENCODING_CHAR,
+ STRING_ENCODING_WIDE
+};
+typedef enum string_encoding_t string_encoding_t;
+
struct string_literal_t {
- token_base_t base;
- string_t string;
+ token_base_t base;
+ string_encoding_t encoding;
+ string_t string;
};
struct number_literal_t {
number_literal_t number;
};
+char const *get_string_encoding_prefix(string_encoding_t);
+
void init_tokens(void);
void exit_tokens(void);
void print_token_kind(FILE *out, token_kind_t token_kind);
TS(CHARACTER_CONSTANT, "character constant",)
TS(WIDE_CHARACTER_CONSTANT, "wide character constant",)
TS(STRING_LITERAL, "string literal",)
-TS(WIDE_STRING_LITERAL, "wide string literal",)
#define S(mode,x) T(mode,x,#x,)
S(_ALL, auto)