Merge T_WIDE_CHARACTER_CONSTANT into T_CHARACTER_CONSTANT.
authorChristoph Mallon <christoph.mallon@gmx.de>
Mon, 21 May 2012 06:43:24 +0000 (08:43 +0200)
committerChristoph Mallon <christoph.mallon@gmx.de>
Wed, 23 May 2012 04:53:40 +0000 (06:53 +0200)
lexer.c
parser.c
token.c

diff --git a/lexer.c b/lexer.c
index 3b9c4cb..0d09927 100644 (file)
--- a/lexer.c
+++ b/lexer.c
@@ -688,58 +688,10 @@ end_of_string:
        lexer_token.string.string   = identify_string(string, size);
 }
 
-/**
- * Parse a wide character constant and set lexer_token.
- */
-static void parse_wide_character_constant(void)
-{
-       eat('\'');
-
-       while (true) {
-               switch (c) {
-               case '\\': {
-                       const utf32 tc = parse_escape_sequence();
-                       obstack_grow_symbol(&symbol_obstack, tc);
-                       break;
-               }
-
-               MATCH_NEWLINE(
-                       parse_error("newline while parsing character constant");
-                       break;
-               )
-
-               case '\'':
-                       next_char();
-                       goto end_of_wide_char_constant;
-
-               case EOF:
-                       errorf(&lexer_token.base.source_position, "EOF while parsing character constant");
-                       goto end_of_wide_char_constant;
-
-               default:
-                       obstack_grow_symbol(&symbol_obstack, c);
-                       next_char();
-                       break;
-               }
-       }
-
-end_of_wide_char_constant:;
-       obstack_1grow(&symbol_obstack, '\0');
-       size_t  size   = (size_t) obstack_object_size(&symbol_obstack) - 1;
-       char   *string = obstack_finish(&symbol_obstack);
-
-       lexer_token.kind          = T_WIDE_CHARACTER_CONSTANT;
-       lexer_token.string.string = identify_string(string, size);
-
-       if (size == 0) {
-               errorf(&lexer_token.base.source_position, "empty character constant");
-       }
-}
-
 /**
  * Parse a character constant and set lexer_token.
  */
-static void parse_character_constant(void)
+static void parse_character_constant(string_encoding_t const enc)
 {
        eat('\'');
 
@@ -747,10 +699,14 @@ static void parse_character_constant(void)
                switch (c) {
                case '\\': {
                        utf32 const tc = parse_escape_sequence();
-                       if (tc >= 0x100) {
-                               warningf(WARN_OTHER, &lexer_pos, "escape sequence out of range");
+                       if (enc == STRING_ENCODING_CHAR) {
+                               if (tc >= 0x100) {
+                                       warningf(WARN_OTHER, &lexer_pos, "escape sequence out of range");
+                               }
+                               obstack_1grow(&symbol_obstack, tc);
+                       } else {
+                               obstack_grow_symbol(&symbol_obstack, tc);
                        }
-                       obstack_1grow(&symbol_obstack, tc);
                        break;
                }
 
@@ -780,8 +736,9 @@ end_of_char_constant:;
        const size_t        size   = (size_t)obstack_object_size(&symbol_obstack)-1;
        char         *const string = obstack_finish(&symbol_obstack);
 
-       lexer_token.kind          = T_CHARACTER_CONSTANT;
-       lexer_token.string.string = identify_string(string, size);
+       lexer_token.kind            = T_CHARACTER_CONSTANT;
+       lexer_token.string.encoding = enc;
+       lexer_token.string.string   = identify_string(string, size);
 
        if (size == 0) {
                errorf(&lexer_token.base.source_position, "empty character constant");
@@ -1080,8 +1037,8 @@ void lexer_next_preprocessing_token(void)
                        string_encoding_t const enc = STRING_ENCODING_WIDE;
                        if (lexer_token.base.symbol == symbol_L) {
                                switch (c) {
-                                       case '"':  parse_string_literal(enc);       break;
-                                       case '\'': parse_wide_character_constant(); break;
+                               case '"':  parse_string_literal(enc);     break;
+                               case '\'': parse_character_constant(enc); break;
                                }
                        }
                        return;
@@ -1096,7 +1053,7 @@ void lexer_next_preprocessing_token(void)
                        return;
 
                case '\'':
-                       parse_character_constant();
+                       parse_character_constant(STRING_ENCODING_CHAR);
                        return;
 
                case '.':
index 7c5e554..c542874 100644 (file)
--- a/parser.c
+++ b/parser.c
@@ -252,7 +252,6 @@ static void semantic_comparison(binary_expression_t *expression);
        case T_MINUSMINUS:                \
        case T_PLUSPLUS:                  \
        case T_STRING_LITERAL:            \
-       case T_WIDE_CHARACTER_CONSTANT:   \
        case T___FUNCDNAME__:             \
        case T___FUNCSIG__:               \
        case T___FUNCTION__:              \
@@ -5872,39 +5871,39 @@ static expression_t *parse_number_literal(void)
  */
 static expression_t *parse_character_constant(void)
 {
-       expression_t *literal = allocate_expression_zero(EXPR_LITERAL_CHARACTER);
-       literal->base.type     = c_mode & _CXX ? type_char : type_int;
-       literal->literal.value = token.string.string;
-
-       size_t len = literal->literal.value.size;
-       if (len > 1) {
-               if (!GNU_MODE && !(c_mode & _C99)) {
-                       errorf(HERE, "more than 1 character in character constant");
-               } else {
-                       literal->base.type = type_int;
-                       warningf(WARN_MULTICHAR, HERE, "multi-character character constant");
+       expression_t *literal;
+       switch (token.string.encoding) {
+       case STRING_ENCODING_CHAR: {
+               literal = allocate_expression_zero(EXPR_LITERAL_CHARACTER);
+               literal->base.type     = c_mode & _CXX ? type_char : type_int;
+               literal->literal.value = token.string.string;
+
+               size_t len = literal->literal.value.size;
+               if (len > 1) {
+                       if (!GNU_MODE && !(c_mode & _C99)) {
+                               errorf(HERE, "more than 1 character in character constant");
+                       } else {
+                               literal->base.type = type_int;
+                               warningf(WARN_MULTICHAR, HERE, "multi-character character constant");
+                       }
                }
+               break;
        }
 
-       eat(T_CHARACTER_CONSTANT);
-       return literal;
-}
+       case STRING_ENCODING_WIDE: {
+               literal = allocate_expression_zero(EXPR_LITERAL_WIDE_CHARACTER);
+               literal->base.type     = type_int;
+               literal->literal.value = token.string.string;
 
-/**
- * Parse a wide character constant.
- */
-static expression_t *parse_wide_character_constant(void)
-{
-       expression_t *literal = allocate_expression_zero(EXPR_LITERAL_WIDE_CHARACTER);
-       literal->base.type     = type_int;
-       literal->literal.value = token.string.string;
-
-       size_t len = wstrlen(&literal->literal.value);
-       if (len > 1) {
-               warningf(WARN_MULTICHAR, HERE, "multi-character character constant");
+               size_t len = wstrlen(&literal->literal.value);
+               if (len > 1) {
+                       warningf(WARN_MULTICHAR, HERE, "multi-character character constant");
+               }
+               break;
+       }
        }
 
-       eat(T_WIDE_CHARACTER_CONSTANT);
+       eat(T_CHARACTER_CONSTANT);
        return literal;
 }
 
@@ -6694,7 +6693,6 @@ static expression_t *parse_primary_expression(void)
        case T_INTEGER:
        case T_FLOATINGPOINT:                return parse_number_literal();
        case T_CHARACTER_CONSTANT:           return parse_character_constant();
-       case T_WIDE_CHARACTER_CONSTANT:      return parse_wide_character_constant();
        case T_STRING_LITERAL:               return parse_string_literal();
        case T___FUNCTION__:
        case T___func__:                     return parse_function_keyword(FUNCNAME_FUNCTION);
@@ -9833,7 +9831,6 @@ static statement_t *parse_compound_statement(bool inside_expression_statement)
        add_anchor_token(T_MINUSMINUS);
        add_anchor_token(T_PLUSPLUS);
        add_anchor_token(T_STRING_LITERAL);
-       add_anchor_token(T_WIDE_CHARACTER_CONSTANT);
        add_anchor_token(T__Bool);
        add_anchor_token(T__Complex);
        add_anchor_token(T__Imaginary);
@@ -10009,7 +10006,6 @@ static statement_t *parse_compound_statement(bool inside_expression_statement)
        rem_anchor_token(T__Imaginary);
        rem_anchor_token(T__Complex);
        rem_anchor_token(T__Bool);
-       rem_anchor_token(T_WIDE_CHARACTER_CONSTANT);
        rem_anchor_token(T_STRING_LITERAL);
        rem_anchor_token(T_PLUSPLUS);
        rem_anchor_token(T_MINUSMINUS);
diff --git a/token.c b/token.c
index ac2b0d7..755b2c9 100644 (file)
--- a/token.c
+++ b/token.c
@@ -162,9 +162,8 @@ void print_token(FILE *f, const token_t *token)
                break;
 
        case T_CHARACTER_CONSTANT:
-       case T_WIDE_CHARACTER_CONSTANT:
                print_token_kind(f, (token_kind_t)token->kind);
-               fputs(" \'", f);
+               fprintf(f, " %s'", get_string_encoding_prefix(token->string.encoding));
                print_stringrep(&token->string.string, f);
                fputs("'", f);
                break;