Add the field encoding to struct string_literal_t and merge T_WIDE_STRING_LITERAL...
authorChristoph Mallon <christoph.mallon@gmx.de>
Mon, 21 May 2012 06:29:49 +0000 (08:29 +0200)
committerChristoph Mallon <christoph.mallon@gmx.de>
Wed, 23 May 2012 04:48:13 +0000 (06:48 +0200)
lexer.c
parser.c
token.c
token_t.h
tokens.inc

diff --git a/lexer.c b/lexer.c
index 7acabb5..3b9c4cb 100644 (file)
--- a/lexer.c
+++ b/lexer.c
@@ -644,7 +644,7 @@ string_t make_string(const char *string)
 /**
  * Parse a string literal and set lexer_token.
  */
-static void parse_string_literal(void)
+static void parse_string_literal(string_encoding_t const enc)
 {
        eat('"');
 
@@ -683,8 +683,9 @@ end_of_string:
        const size_t  size   = (size_t)obstack_object_size(&symbol_obstack);
        char         *string = obstack_finish(&symbol_obstack);
 
-       lexer_token.kind          = T_STRING_LITERAL;
-       lexer_token.string.string = identify_string(string, size);
+       lexer_token.kind            = T_STRING_LITERAL;
+       lexer_token.string.encoding = enc;
+       lexer_token.string.string   = identify_string(string, size);
 }
 
 /**
@@ -735,16 +736,6 @@ end_of_wide_char_constant:;
        }
 }
 
-/**
- * Parse a wide string literal and set lexer_token.
- */
-static void parse_wide_string_literal(void)
-{
-       parse_string_literal();
-       if (lexer_token.kind == T_STRING_LITERAL)
-               lexer_token.kind = T_WIDE_STRING_LITERAL;
-}
-
 /**
  * Parse a character constant and set lexer_token.
  */
@@ -897,7 +888,7 @@ static void parse_line_directive(void)
                lexer_pos.lineno = atoi(pp_token.number.number.begin) - 1;
                next_pp_token();
        }
-       if (pp_token.kind == T_STRING_LITERAL) {
+       if (pp_token.kind == T_STRING_LITERAL && pp_token.string.encoding == STRING_ENCODING_CHAR) {
                lexer_pos.input_name = pp_token.string.string.begin;
                lexer_pos.is_system_header = false;
                next_pp_token();
@@ -1083,23 +1074,25 @@ void lexer_next_preprocessing_token(void)
                        return;
                )
 
-               SYMBOL_CHARS
+               SYMBOL_CHARS {
                        parse_symbol();
                        /* might be a wide string ( L"string" ) */
+                       string_encoding_t const enc = STRING_ENCODING_WIDE;
                        if (lexer_token.base.symbol == symbol_L) {
                                switch (c) {
-                                       case '"':  parse_wide_string_literal();     break;
+                                       case '"':  parse_string_literal(enc);       break;
                                        case '\'': parse_wide_character_constant(); break;
                                }
                        }
                        return;
+               }
 
                DIGITS
                        parse_number();
                        return;
 
                case '"':
-                       parse_string_literal();
+                       parse_string_literal(STRING_ENCODING_CHAR);
                        return;
 
                case '\'':
index 4f8292d..7c5e554 100644 (file)
--- a/parser.c
+++ b/parser.c
@@ -253,7 +253,6 @@ static void semantic_comparison(binary_expression_t *expression);
        case T_PLUSPLUS:                  \
        case T_STRING_LITERAL:            \
        case T_WIDE_CHARACTER_CONSTANT:   \
-       case T_WIDE_STRING_LITERAL:       \
        case T___FUNCDNAME__:             \
        case T___FUNCSIG__:               \
        case T___FUNCTION__:              \
@@ -1052,39 +1051,40 @@ static string_t finish_string(void)
        return (string_t){ string, size };
 }
 
-static string_t concat_string_literals(bool *const out_is_wide)
+static string_t concat_string_literals(string_encoding_t *const out_enc)
 {
-       assert(token.kind == T_STRING_LITERAL || token.kind == T_WIDE_STRING_LITERAL);
+       assert(token.kind == T_STRING_LITERAL);
 
-       string_t           result;
-       bool               is_wide = token.kind == T_WIDE_STRING_LITERAL;
-       token_kind_t const la1     = (token_kind_t)look_ahead(1)->kind;
-       if (la1 == T_STRING_LITERAL || la1 == T_WIDE_STRING_LITERAL) {
+       string_t          result;
+       string_encoding_t enc = token.string.encoding;
+       if (look_ahead(1)->kind == T_STRING_LITERAL) {
                append_string(&token.string.string);
-               next_token();
+               eat(T_STRING_LITERAL);
                warningf(WARN_TRADITIONAL, HERE, "traditional C rejects string constant concatenation");
                do {
-                       is_wide |= token.kind == T_WIDE_STRING_LITERAL;
+                       if (token.string.encoding != STRING_ENCODING_CHAR) {
+                               enc = token.string.encoding;
+                       }
                        append_string(&token.string.string);
-                       next_token();
-               } while (token.kind == T_STRING_LITERAL || token.kind == T_WIDE_STRING_LITERAL);
+                       eat(T_STRING_LITERAL);
+               } while (token.kind == T_STRING_LITERAL);
                result = finish_string();
        } else {
                result = token.string.string;
-               next_token();
+               eat(T_STRING_LITERAL);
        }
 
-       *out_is_wide = is_wide;
+       *out_enc = enc;
        return result;
 }
 
 static string_t parse_string_literals(void)
 {
-       bool                    is_wide;
+       string_encoding_t       enc;
        source_position_t const pos = *HERE;
-       string_t          const res = concat_string_literals(&is_wide);
+       string_t          const res = concat_string_literals(&enc);
 
-       if (is_wide) {
+       if (enc != STRING_ENCODING_CHAR) {
                errorf(&pos, "expected plain string literal, got wide string literal");
        }
 
@@ -5729,12 +5729,12 @@ static type_t *get_wide_string_type(void)
  */
 static expression_t *parse_string_literal(void)
 {
-       bool                    is_wide;
+       string_encoding_t       enc;
        source_position_t const pos = *HERE;
-       string_t          const res = concat_string_literals(&is_wide);
+       string_t          const res = concat_string_literals(&enc);
 
        expression_t *literal;
-       if (is_wide) {
+       if (enc != STRING_ENCODING_CHAR) {
                literal = allocate_expression_zero(EXPR_WIDE_STRING_LITERAL);
                literal->base.type = get_wide_string_type();
        } else {
@@ -6695,8 +6695,7 @@ static expression_t *parse_primary_expression(void)
        case T_FLOATINGPOINT:                return parse_number_literal();
        case T_CHARACTER_CONSTANT:           return parse_character_constant();
        case T_WIDE_CHARACTER_CONSTANT:      return parse_wide_character_constant();
-       case T_STRING_LITERAL:
-       case T_WIDE_STRING_LITERAL:          return parse_string_literal();
+       case T_STRING_LITERAL:               return parse_string_literal();
        case T___FUNCTION__:
        case T___func__:                     return parse_function_keyword(FUNCNAME_FUNCTION);
        case T___PRETTY_FUNCTION__:          return parse_function_keyword(FUNCNAME_PRETTY_FUNCTION);
@@ -9835,7 +9834,6 @@ static statement_t *parse_compound_statement(bool inside_expression_statement)
        add_anchor_token(T_PLUSPLUS);
        add_anchor_token(T_STRING_LITERAL);
        add_anchor_token(T_WIDE_CHARACTER_CONSTANT);
-       add_anchor_token(T_WIDE_STRING_LITERAL);
        add_anchor_token(T__Bool);
        add_anchor_token(T__Complex);
        add_anchor_token(T__Imaginary);
@@ -10011,7 +10009,6 @@ static statement_t *parse_compound_statement(bool inside_expression_statement)
        rem_anchor_token(T__Imaginary);
        rem_anchor_token(T__Complex);
        rem_anchor_token(T__Bool);
-       rem_anchor_token(T_WIDE_STRING_LITERAL);
        rem_anchor_token(T_WIDE_CHARACTER_CONSTANT);
        rem_anchor_token(T_STRING_LITERAL);
        rem_anchor_token(T_PLUSPLUS);
diff --git a/token.c b/token.c
index c9b0de8..ac2b0d7 100644 (file)
--- a/token.c
+++ b/token.c
@@ -28,6 +28,7 @@
 #include "symbol.h"
 #include "lang_features.h"
 #include "adt/array.h"
+#include "adt/error.h"
 #include "adt/util.h"
 
 static symbol_t *token_symbols[T_LAST_TOKEN];
@@ -123,6 +124,15 @@ void print_token_kind(FILE *f, token_kind_t token_kind)
        }
 }
 
+char const *get_string_encoding_prefix(string_encoding_t const enc)
+{
+       switch (enc) {
+       case STRING_ENCODING_CHAR: return "";
+       case STRING_ENCODING_WIDE: return "L";
+       }
+       panic("invalid string encoding");
+}
+
 static void print_stringrep(const string_t *string, FILE *f)
 {
        for (size_t i = 0; i < string->size; ++i) {
@@ -145,11 +155,12 @@ void print_token(FILE *f, const token_t *token)
                        print_stringrep(&token->number.suffix, f);
                fputc('\'', f);
                break;
-       case T_WIDE_STRING_LITERAL:
+
        case T_STRING_LITERAL:
                print_token_kind(f, (token_kind_t)token->kind);
-               fprintf(f, " \"%s\"", token->string.string.begin);
+               fprintf(f, " %s\"%s\"", get_string_encoding_prefix(token->string.encoding), token->string.string.begin);
                break;
+
        case T_CHARACTER_CONSTANT:
        case T_WIDE_CHARACTER_CONSTANT:
                print_token_kind(f, (token_kind_t)token->kind);
index b58531f..9a68f81 100644 (file)
--- a/token_t.h
+++ b/token_t.h
@@ -70,9 +70,16 @@ struct token_base_t {
        symbol_t         *symbol;
 };
 
+enum string_encoding_t {
+       STRING_ENCODING_CHAR,
+       STRING_ENCODING_WIDE
+};
+typedef enum string_encoding_t string_encoding_t;
+
 struct string_literal_t {
-       token_base_t  base;
-       string_t      string;
+       token_base_t      base;
+       string_encoding_t encoding;
+       string_t          string;
 };
 
 struct number_literal_t {
@@ -88,6 +95,8 @@ union token_t {
        number_literal_t  number;
 };
 
+char const *get_string_encoding_prefix(string_encoding_t);
+
 void init_tokens(void);
 void exit_tokens(void);
 void print_token_kind(FILE *out, token_kind_t token_kind);
index 1e21ee1..162e73b 100644 (file)
@@ -15,7 +15,6 @@ TS(FLOATINGPOINT,           "floatingpoint number",)
 TS(CHARACTER_CONSTANT,      "character constant",)
 TS(WIDE_CHARACTER_CONSTANT, "wide character constant",)
 TS(STRING_LITERAL,          "string literal",)
-TS(WIDE_STRING_LITERAL,     "wide string literal",)
 
 #define S(mode,x)   T(mode,x,#x,)
 S(_ALL, auto)