- Rework the way literals are handled, these are now kept as strings until
authorMatthias Braun <matze@braunis.de>
Wed, 24 Feb 2010 15:06:07 +0000 (15:06 +0000)
committerMatthias Braun <matze@braunis.de>
Wed, 24 Feb 2010 15:06:07 +0000 (15:06 +0000)
  codegeneration
- Don't work with wide-strings inside the compiler but always use utf-8
  encoded "normal" strings. This simplifies some places (like the printf
  format checker) and avoids code duplication.

[r27212]

21 files changed:
ast.c
ast.h
ast2firm.c
ast_t.h
attribute.c
diagnostic.c
diagnostic.h
format_check.c
lexer.c
lexer.h
parser.c
preprocessor.c
printer.c
printer.h
string_rep.h
token.c
token_t.h
tokens.inc
walk_statements.c
wrappergen/write_fluffy.c
wrappergen/write_jna.c

diff --git a/ast.c b/ast.c
index 5dc9b4c..fa3cd71 100644 (file)
--- a/ast.c
+++ b/ast.c
@@ -26,6 +26,7 @@
 #include "lang_features.h"
 #include "entity_t.h"
 #include "printer.h"
+#include "types.h"
 
 #include <assert.h>
 #include <stdio.h>
@@ -68,6 +69,18 @@ void print_indent(void)
                print_string("\t");
 }
 
+static void print_symbol(const symbol_t *symbol)
+{
+       print_string(symbol->string);
+}
+
+static void print_stringrep(const string_t *string)
+{
+       for (size_t i = 0; i < string->size; ++i) {
+               print_char(string->begin[i]);
+       }
+}
+
 /**
  * Returns 1 if a given precedence level has right-to-left
  * associativity, else 0.
@@ -95,88 +108,93 @@ static int right_to_left(unsigned precedence)
 static unsigned get_expression_precedence(expression_kind_t kind)
 {
        static const unsigned prec[] = {
-               [EXPR_UNKNOWN]                    = PREC_PRIMARY,
-               [EXPR_INVALID]                    = PREC_PRIMARY,
-               [EXPR_REFERENCE]                  = PREC_PRIMARY,
-               [EXPR_REFERENCE_ENUM_VALUE]       = PREC_PRIMARY,
-               [EXPR_CHARACTER_CONSTANT]         = PREC_PRIMARY,
-               [EXPR_WIDE_CHARACTER_CONSTANT]    = PREC_PRIMARY,
-               [EXPR_CONST]                      = PREC_PRIMARY,
-               [EXPR_STRING_LITERAL]             = PREC_PRIMARY,
-               [EXPR_WIDE_STRING_LITERAL]        = PREC_PRIMARY,
-               [EXPR_COMPOUND_LITERAL]           = PREC_UNARY,
-               [EXPR_CALL]                       = PREC_POSTFIX,
-               [EXPR_CONDITIONAL]                = PREC_CONDITIONAL,
-               [EXPR_SELECT]                     = PREC_POSTFIX,
-               [EXPR_ARRAY_ACCESS]               = PREC_POSTFIX,
-               [EXPR_SIZEOF]                     = PREC_UNARY,
-               [EXPR_CLASSIFY_TYPE]              = PREC_UNARY,
-               [EXPR_ALIGNOF]                    = PREC_UNARY,
-
-               [EXPR_FUNCNAME]                   = PREC_PRIMARY,
-               [EXPR_BUILTIN_CONSTANT_P]         = PREC_PRIMARY,
-               [EXPR_BUILTIN_TYPES_COMPATIBLE_P] = PREC_PRIMARY,
-               [EXPR_OFFSETOF]                   = PREC_PRIMARY,
-               [EXPR_VA_START]                   = PREC_PRIMARY,
-               [EXPR_VA_ARG]                     = PREC_PRIMARY,
-               [EXPR_VA_COPY]                    = PREC_PRIMARY,
-               [EXPR_STATEMENT]                  = PREC_PRIMARY,
-               [EXPR_LABEL_ADDRESS]              = PREC_PRIMARY,
-
-               [EXPR_UNARY_NEGATE]               = PREC_UNARY,
-               [EXPR_UNARY_PLUS]                 = PREC_UNARY,
-               [EXPR_UNARY_BITWISE_NEGATE]       = PREC_UNARY,
-               [EXPR_UNARY_NOT]                  = PREC_UNARY,
-               [EXPR_UNARY_DEREFERENCE]          = PREC_UNARY,
-               [EXPR_UNARY_TAKE_ADDRESS]         = PREC_UNARY,
-               [EXPR_UNARY_POSTFIX_INCREMENT]    = PREC_POSTFIX,
-               [EXPR_UNARY_POSTFIX_DECREMENT]    = PREC_POSTFIX,
-               [EXPR_UNARY_PREFIX_INCREMENT]     = PREC_UNARY,
-               [EXPR_UNARY_PREFIX_DECREMENT]     = PREC_UNARY,
-               [EXPR_UNARY_CAST]                 = PREC_UNARY,
-               [EXPR_UNARY_CAST_IMPLICIT]        = PREC_UNARY,
-               [EXPR_UNARY_ASSUME]               = PREC_PRIMARY,
-               [EXPR_UNARY_DELETE]               = PREC_UNARY,
-               [EXPR_UNARY_DELETE_ARRAY]         = PREC_UNARY,
-               [EXPR_UNARY_THROW]                = PREC_ASSIGNMENT,
-
-               [EXPR_BINARY_ADD]                 = PREC_ADDITIVE,
-               [EXPR_BINARY_SUB]                 = PREC_ADDITIVE,
-               [EXPR_BINARY_MUL]                 = PREC_MULTIPLICATIVE,
-               [EXPR_BINARY_DIV]                 = PREC_MULTIPLICATIVE,
-               [EXPR_BINARY_MOD]                 = PREC_MULTIPLICATIVE,
-               [EXPR_BINARY_EQUAL]               = PREC_EQUALITY,
-               [EXPR_BINARY_NOTEQUAL]            = PREC_EQUALITY,
-               [EXPR_BINARY_LESS]                = PREC_RELATIONAL,
-               [EXPR_BINARY_LESSEQUAL]           = PREC_RELATIONAL,
-               [EXPR_BINARY_GREATER]             = PREC_RELATIONAL,
-               [EXPR_BINARY_GREATEREQUAL]        = PREC_RELATIONAL,
-               [EXPR_BINARY_BITWISE_AND]         = PREC_AND,
-               [EXPR_BINARY_BITWISE_OR]          = PREC_OR,
-               [EXPR_BINARY_BITWISE_XOR]         = PREC_XOR,
-               [EXPR_BINARY_LOGICAL_AND]         = PREC_LOGICAL_AND,
-               [EXPR_BINARY_LOGICAL_OR]          = PREC_LOGICAL_OR,
-               [EXPR_BINARY_SHIFTLEFT]           = PREC_SHIFT,
-               [EXPR_BINARY_SHIFTRIGHT]          = PREC_SHIFT,
-               [EXPR_BINARY_ASSIGN]              = PREC_ASSIGNMENT,
-               [EXPR_BINARY_MUL_ASSIGN]          = PREC_ASSIGNMENT,
-               [EXPR_BINARY_DIV_ASSIGN]          = PREC_ASSIGNMENT,
-               [EXPR_BINARY_MOD_ASSIGN]          = PREC_ASSIGNMENT,
-               [EXPR_BINARY_ADD_ASSIGN]          = PREC_ASSIGNMENT,
-               [EXPR_BINARY_SUB_ASSIGN]          = PREC_ASSIGNMENT,
-               [EXPR_BINARY_SHIFTLEFT_ASSIGN]    = PREC_ASSIGNMENT,
-               [EXPR_BINARY_SHIFTRIGHT_ASSIGN]   = PREC_ASSIGNMENT,
-               [EXPR_BINARY_BITWISE_AND_ASSIGN]  = PREC_ASSIGNMENT,
-               [EXPR_BINARY_BITWISE_XOR_ASSIGN]  = PREC_ASSIGNMENT,
-               [EXPR_BINARY_BITWISE_OR_ASSIGN]   = PREC_ASSIGNMENT,
-               [EXPR_BINARY_COMMA]               = PREC_EXPRESSION,
-
-               [EXPR_BINARY_ISGREATER]           = PREC_PRIMARY,
-               [EXPR_BINARY_ISGREATEREQUAL]      = PREC_PRIMARY,
-               [EXPR_BINARY_ISLESS]              = PREC_PRIMARY,
-               [EXPR_BINARY_ISLESSEQUAL]         = PREC_PRIMARY,
-               [EXPR_BINARY_ISLESSGREATER]       = PREC_PRIMARY,
-               [EXPR_BINARY_ISUNORDERED]         = PREC_PRIMARY
+               [EXPR_UNKNOWN]                           = PREC_PRIMARY,
+               [EXPR_INVALID]                           = PREC_PRIMARY,
+               [EXPR_REFERENCE]                         = PREC_PRIMARY,
+               [EXPR_REFERENCE_ENUM_VALUE]              = PREC_PRIMARY,
+               [EXPR_LITERAL_INTEGER]                   = PREC_PRIMARY,
+               [EXPR_LITERAL_INTEGER_OCTAL]             = PREC_PRIMARY,
+               [EXPR_LITERAL_INTEGER_HEXADECIMAL]       = PREC_PRIMARY,
+               [EXPR_LITERAL_FLOATINGPOINT]             = PREC_PRIMARY,
+               [EXPR_LITERAL_FLOATINGPOINT_HEXADECIMAL] = PREC_PRIMARY,
+               [EXPR_LITERAL_CHARACTER]                 = PREC_PRIMARY,
+               [EXPR_LITERAL_WIDE_CHARACTER]            = PREC_PRIMARY,
+               [EXPR_LITERAL_MS_NOOP]                   = PREC_PRIMARY,
+               [EXPR_STRING_LITERAL]                    = PREC_PRIMARY,
+               [EXPR_WIDE_STRING_LITERAL]               = PREC_PRIMARY,
+               [EXPR_COMPOUND_LITERAL]                  = PREC_UNARY,
+               [EXPR_CALL]                              = PREC_POSTFIX,
+               [EXPR_CONDITIONAL]                       = PREC_CONDITIONAL,
+               [EXPR_SELECT]                            = PREC_POSTFIX,
+               [EXPR_ARRAY_ACCESS]                      = PREC_POSTFIX,
+               [EXPR_SIZEOF]                            = PREC_UNARY,
+               [EXPR_CLASSIFY_TYPE]                     = PREC_UNARY,
+               [EXPR_ALIGNOF]                           = PREC_UNARY,
+
+               [EXPR_FUNCNAME]                          = PREC_PRIMARY,
+               [EXPR_BUILTIN_CONSTANT_P]                = PREC_PRIMARY,
+               [EXPR_BUILTIN_TYPES_COMPATIBLE_P]        = PREC_PRIMARY,
+               [EXPR_OFFSETOF]                          = PREC_PRIMARY,
+               [EXPR_VA_START]                          = PREC_PRIMARY,
+               [EXPR_VA_ARG]                            = PREC_PRIMARY,
+               [EXPR_VA_COPY]                           = PREC_PRIMARY,
+               [EXPR_STATEMENT]                         = PREC_PRIMARY,
+               [EXPR_LABEL_ADDRESS]                     = PREC_PRIMARY,
+
+               [EXPR_UNARY_NEGATE]                      = PREC_UNARY,
+               [EXPR_UNARY_PLUS]                        = PREC_UNARY,
+               [EXPR_UNARY_BITWISE_NEGATE]              = PREC_UNARY,
+               [EXPR_UNARY_NOT]                         = PREC_UNARY,
+               [EXPR_UNARY_DEREFERENCE]                 = PREC_UNARY,
+               [EXPR_UNARY_TAKE_ADDRESS]                = PREC_UNARY,
+               [EXPR_UNARY_POSTFIX_INCREMENT]           = PREC_POSTFIX,
+               [EXPR_UNARY_POSTFIX_DECREMENT]           = PREC_POSTFIX,
+               [EXPR_UNARY_PREFIX_INCREMENT]            = PREC_UNARY,
+               [EXPR_UNARY_PREFIX_DECREMENT]            = PREC_UNARY,
+               [EXPR_UNARY_CAST]                        = PREC_UNARY,
+               [EXPR_UNARY_CAST_IMPLICIT]               = PREC_UNARY,
+               [EXPR_UNARY_ASSUME]                      = PREC_PRIMARY,
+               [EXPR_UNARY_DELETE]                      = PREC_UNARY,
+               [EXPR_UNARY_DELETE_ARRAY]                = PREC_UNARY,
+               [EXPR_UNARY_THROW]                       = PREC_ASSIGNMENT,
+
+               [EXPR_BINARY_ADD]                        = PREC_ADDITIVE,
+               [EXPR_BINARY_SUB]                        = PREC_ADDITIVE,
+               [EXPR_BINARY_MUL]                        = PREC_MULTIPLICATIVE,
+               [EXPR_BINARY_DIV]                        = PREC_MULTIPLICATIVE,
+               [EXPR_BINARY_MOD]                        = PREC_MULTIPLICATIVE,
+               [EXPR_BINARY_EQUAL]                      = PREC_EQUALITY,
+               [EXPR_BINARY_NOTEQUAL]                   = PREC_EQUALITY,
+               [EXPR_BINARY_LESS]                       = PREC_RELATIONAL,
+               [EXPR_BINARY_LESSEQUAL]                  = PREC_RELATIONAL,
+               [EXPR_BINARY_GREATER]                    = PREC_RELATIONAL,
+               [EXPR_BINARY_GREATEREQUAL]               = PREC_RELATIONAL,
+               [EXPR_BINARY_BITWISE_AND]                = PREC_AND,
+               [EXPR_BINARY_BITWISE_OR]                 = PREC_OR,
+               [EXPR_BINARY_BITWISE_XOR]                = PREC_XOR,
+               [EXPR_BINARY_LOGICAL_AND]                = PREC_LOGICAL_AND,
+               [EXPR_BINARY_LOGICAL_OR]                 = PREC_LOGICAL_OR,
+               [EXPR_BINARY_SHIFTLEFT]                  = PREC_SHIFT,
+               [EXPR_BINARY_SHIFTRIGHT]                 = PREC_SHIFT,
+               [EXPR_BINARY_ASSIGN]                     = PREC_ASSIGNMENT,
+               [EXPR_BINARY_MUL_ASSIGN]                 = PREC_ASSIGNMENT,
+               [EXPR_BINARY_DIV_ASSIGN]                 = PREC_ASSIGNMENT,
+               [EXPR_BINARY_MOD_ASSIGN]                 = PREC_ASSIGNMENT,
+               [EXPR_BINARY_ADD_ASSIGN]                 = PREC_ASSIGNMENT,
+               [EXPR_BINARY_SUB_ASSIGN]                 = PREC_ASSIGNMENT,
+               [EXPR_BINARY_SHIFTLEFT_ASSIGN]           = PREC_ASSIGNMENT,
+               [EXPR_BINARY_SHIFTRIGHT_ASSIGN]          = PREC_ASSIGNMENT,
+               [EXPR_BINARY_BITWISE_AND_ASSIGN]         = PREC_ASSIGNMENT,
+               [EXPR_BINARY_BITWISE_XOR_ASSIGN]         = PREC_ASSIGNMENT,
+               [EXPR_BINARY_BITWISE_OR_ASSIGN]          = PREC_ASSIGNMENT,
+               [EXPR_BINARY_COMMA]                      = PREC_EXPRESSION,
+
+               [EXPR_BINARY_ISGREATER]                  = PREC_PRIMARY,
+               [EXPR_BINARY_ISGREATEREQUAL]             = PREC_PRIMARY,
+               [EXPR_BINARY_ISLESS]                     = PREC_PRIMARY,
+               [EXPR_BINARY_ISLESSEQUAL]                = PREC_PRIMARY,
+               [EXPR_BINARY_ISLESSGREATER]              = PREC_PRIMARY,
+               [EXPR_BINARY_ISUNORDERED]                = PREC_PRIMARY
        };
        assert((size_t)kind < lengthof(prec));
        unsigned res = prec[kind];
@@ -185,51 +203,6 @@ static unsigned get_expression_precedence(expression_kind_t kind)
        return res;
 }
 
-/**
- * Print a constant expression.
- *
- * @param cnst  the constant expression
- */
-static void print_const(const const_expression_t *cnst)
-{
-       if (cnst->base.type == NULL)
-               return;
-
-       const type_t *const type = skip_typeref(cnst->base.type);
-
-       if (is_type_atomic(type, ATOMIC_TYPE_BOOL)) {
-               print_string(cnst->v.int_value ? "true" : "false");
-       } else if (is_type_integer(type)) {
-               print_format("%lld", cnst->v.int_value);
-       } else if (is_type_float(type)) {
-               long double const val = cnst->v.float_value;
-#ifdef _WIN32
-               /* ARG, no way to print long double */
-               print_format("%.20g", (double)val);
-#else
-               print_format("%.20Lg", val);
-#endif
-               if (isfinite(val) && truncl(val) == val)
-                       print_string(".0");
-       } else {
-               panic("unknown constant");
-       }
-
-       char const* suffix;
-       switch (type->atomic.akind) {
-               case ATOMIC_TYPE_UINT:        suffix = "U";   break;
-               case ATOMIC_TYPE_LONG:        suffix = "L";   break;
-               case ATOMIC_TYPE_ULONG:       suffix = "UL";  break;
-               case ATOMIC_TYPE_LONGLONG:    suffix = "LL";  break;
-               case ATOMIC_TYPE_ULONGLONG:   suffix = "ULL"; break;
-               case ATOMIC_TYPE_FLOAT:       suffix = "F";   break;
-               case ATOMIC_TYPE_LONG_DOUBLE: suffix = "L";   break;
-
-               default: return;
-       }
-       print_string(suffix);
-}
-
 /**
  * Print a quoted string constant.
  *
@@ -237,7 +210,8 @@ static void print_const(const const_expression_t *cnst)
  * @param border  the border char
  * @param skip    number of chars to skip at the end
  */
-static void print_quoted_string(const string_t *const string, char border, int skip)
+static void print_quoted_string(const string_t *const string, char border,
+                                int skip)
 {
        print_char(border);
        const char *end = string->begin + string->size - skip;
@@ -247,15 +221,15 @@ static void print_quoted_string(const string_t *const string, char border, int s
                        print_string("\\");
                }
                switch (tc) {
-               case '\\':  print_string("\\\\"); break;
-               case '\a':  print_string("\\a"); break;
-               case '\b':  print_string("\\b"); break;
-               case '\f':  print_string("\\f"); break;
-               case '\n':  print_string("\\n"); break;
-               case '\r':  print_string("\\r"); break;
-               case '\t':  print_string("\\t"); break;
-               case '\v':  print_string("\\v"); break;
-               case '\?':  print_string("\\?"); break;
+               case '\\': print_string("\\\\"); break;
+               case '\a': print_string("\\a"); break;
+               case '\b': print_string("\\b"); break;
+               case '\f': print_string("\\f"); break;
+               case '\n': print_string("\\n"); break;
+               case '\r': print_string("\\r"); break;
+               case '\t': print_string("\\t"); break;
+               case '\v': print_string("\\v"); break;
+               case '\?': print_string("\\?"); break;
                case 27:
                        if (c_mode & _GNUC) {
                                print_string("\\e"); break;
@@ -273,77 +247,42 @@ static void print_quoted_string(const string_t *const string, char border, int s
        print_char(border);
 }
 
-/**
- * Prints a wide string literal expression.
- *
- * @param wstr    the wide string literal expression
- * @param border  the border char
- * @param skip    number of chars to skip at the end
- */
-static void print_quoted_wide_string(const wide_string_t *const wstr,
-                                     char border, int skip)
+static void print_string_literal(const string_literal_expression_t *literal)
 {
-       print_string("L");
-       print_char(border);
-       const wchar_rep_t *end = wstr->begin + wstr->size - skip;
-       for (const wchar_rep_t *c = wstr->begin; c != end; ++c) {
-               switch (*c) {
-                       case L'\"':  print_string("\\\""); break;
-                       case L'\\':  print_string("\\\\"); break;
-                       case L'\a':  print_string("\\a");  break;
-                       case L'\b':  print_string("\\b");  break;
-                       case L'\f':  print_string("\\f");  break;
-                       case L'\n':  print_string("\\n");  break;
-                       case L'\r':  print_string("\\r");  break;
-                       case L'\t':  print_string("\\t");  break;
-                       case L'\v':  print_string("\\v");  break;
-                       case L'\?':  print_string("\\?");  break;
-                       case 27:
-                               if (c_mode & _GNUC) {
-                                       print_string("\\e"); break;
-                               }
-                               /* FALLTHROUGH */
-                       default: {
-                               const unsigned tc = *c;
-                               if (tc < 0x80U) {
-                                       if (isprint(*c)) {
-                                               print_char(*c);
-                                       } else {
-                                               print_format("\\%03o", tc);
-                                       }
-                               } else {
-                                       print_char(tc);
-                               }
-                       }
-               }
+       if (literal->base.kind == EXPR_WIDE_STRING_LITERAL) {
+               print_char('L');
        }
-       print_char(border);
+       print_quoted_string(&literal->value, '"', 1);
 }
 
-/**
- * Print a constant character expression.
- *
- * @param cnst  the constant character expression
- */
-static void print_character_constant(const const_expression_t *cnst)
-{
-       print_quoted_string(&cnst->v.character, '\'', 0);
-}
-
-static void print_wide_character_constant(const const_expression_t *cnst)
+static void print_literal(const literal_expression_t *literal)
 {
-       print_quoted_wide_string(&cnst->v.wide_character, '\'', 0);
-}
-
-/**
- * Prints a string literal expression.
- *
- * @param string_literal  the string literal expression
- */
-static void print_string_literal(
-               const string_literal_expression_t *string_literal)
-{
-       print_quoted_string(&string_literal->value, '"', 1);
+       switch (literal->base.kind) {
+       case EXPR_LITERAL_MS_NOOP:
+               print_string("__noop");
+               return;
+       case EXPR_LITERAL_INTEGER_HEXADECIMAL:
+       case EXPR_LITERAL_FLOATINGPOINT_HEXADECIMAL:
+               print_string("0x");
+               /* FALLTHROUGH */
+       case EXPR_LITERAL_BOOLEAN:
+       case EXPR_LITERAL_INTEGER:
+       case EXPR_LITERAL_INTEGER_OCTAL:
+       case EXPR_LITERAL_FLOATINGPOINT:
+               print_stringrep(&literal->value);
+               if (literal->suffix != NULL)
+                       print_symbol(literal->suffix);
+               return;
+       case EXPR_LITERAL_WIDE_CHARACTER:
+               print_char('L');
+               /* FALLTHROUGH */
+       case EXPR_LITERAL_CHARACTER:
+               print_quoted_string(&literal->value, '\'', 0);
+               return;
+       default:
+               break;
+       }
+       print_string("INVALID LITERAL KIND");
 }
 
 /**
@@ -361,12 +300,6 @@ static void print_funcname(const funcname_expression_t *funcname)
        print_string(s);
 }
 
-static void print_wide_string_literal(
-       const wide_string_literal_expression_t *const wstr)
-{
-       print_quoted_wide_string(&wstr->value, '"', 1);
-}
-
 static void print_compound_literal(
                const compound_literal_expression_t *expression)
 {
@@ -758,24 +691,16 @@ static void print_expression_prec(const expression_t *expression, unsigned top_p
        case EXPR_INVALID:
                print_string("$invalid expression$");
                break;
-       case EXPR_CHARACTER_CONSTANT:
-               print_character_constant(&expression->conste);
-               break;
-       case EXPR_WIDE_CHARACTER_CONSTANT:
-               print_wide_character_constant(&expression->conste);
+       case EXPR_WIDE_STRING_LITERAL:
+       case EXPR_STRING_LITERAL:
+               print_string_literal(&expression->string_literal);
                break;
-       case EXPR_CONST:
-               print_const(&expression->conste);
+       EXPR_LITERAL_CASES
+               print_literal(&expression->literal);
                break;
        case EXPR_FUNCNAME:
                print_funcname(&expression->funcname);
                break;
-       case EXPR_STRING_LITERAL:
-               print_string_literal(&expression->string);
-               break;
-       case EXPR_WIDE_STRING_LITERAL:
-               print_wide_string_literal(&expression->wide_string);
-               break;
        case EXPR_COMPOUND_LITERAL:
                print_compound_literal(&expression->compound_literal);
                break;
@@ -833,10 +758,12 @@ static void print_expression_prec(const expression_t *expression, unsigned top_p
                print_statement_expression(&expression->statement);
                break;
 
+#if 0
        default:
                /* TODO */
                print_format("some expression of type %d", (int)expression->kind);
                break;
+#endif
        }
        if (parenthesized)
                print_string(")");
@@ -1328,7 +1255,7 @@ void print_initializer(const initializer_t *initializer)
                print_quoted_string(&initializer->string.string, '"', 1);
                return;
        case INITIALIZER_WIDE_STRING:
-               print_quoted_wide_string(&initializer->wide_string.string, '"', 1);
+               print_quoted_string(&initializer->string.string, '"', 1);
                return;
        case INITIALIZER_DESIGNATOR:
                print_designator(initializer->designator.designator);
@@ -1851,10 +1778,7 @@ static bool is_object_with_constant_address(const expression_t *expression)
 bool is_constant_expression(const expression_t *expression)
 {
        switch (expression->kind) {
-
-       case EXPR_CONST:
-       case EXPR_CHARACTER_CONSTANT:
-       case EXPR_WIDE_CHARACTER_CONSTANT:
+       EXPR_LITERAL_CASES
        case EXPR_CLASSIFY_TYPE:
        case EXPR_OFFSETOF:
        case EXPR_ALIGNOF:
diff --git a/ast.h b/ast.h
index 9d8c9b7..0f70f3b 100644 (file)
--- a/ast.h
+++ b/ast.h
 #include "entity.h"
 
 typedef struct expression_base_t                     expression_base_t;
-typedef struct const_expression_t                    const_expression_t;
+typedef struct literal_expression_t                  literal_expression_t;
 typedef struct string_literal_expression_t           string_literal_expression_t;
 typedef struct funcname_expression_t                 funcname_expression_t;
-typedef struct wide_string_literal_expression_t      wide_string_literal_expression_t;
 typedef struct compound_literal_expression_t         compound_literal_expression_t;
 typedef struct reference_expression_t                reference_expression_t;
 typedef struct cast_expression_t                     cast_expression_t;
@@ -125,4 +124,11 @@ bool is_address_constant(const expression_t *expression);
 long fold_constant_to_int(const expression_t *expression);
 bool fold_constant_to_bool(const expression_t *expression);
 
+/**
+ * the type of a literal is usually the biggest type that can hold the value.
+ * Since this is backend dependent the parses needs this call exposed.
+ * Works for EXPR_LITERAL_* expressions.
+ */
+void determine_literal_type(literal_expression_t *literal);
+
 #endif
index 5e7f363..47430b5 100644 (file)
@@ -1043,6 +1043,22 @@ entity_created:
        return irentity;
 }
 
+/**
+ * Creates a SymConst for a given entity.
+ *
+ * @param dbgi    debug info
+ * @param mode    the (reference) mode for the SymConst
+ * @param entity  the entity
+ */
+static ir_node *create_symconst(dbg_info *dbgi, ir_mode *mode,
+                                ir_entity *entity)
+{
+       assert(entity != NULL);
+       union symconst_symbol sym;
+       sym.entity_p = entity;
+       return new_d_SymConst(dbgi, mode, sym, symconst_addr_ent);
+}
+
 static ir_node *create_conv(dbg_info *dbgi, ir_node *value, ir_mode *dest_mode)
 {
        ir_mode *value_mode = get_irn_mode(value);
@@ -1061,74 +1077,251 @@ static ir_node *create_conv(dbg_info *dbgi, ir_node *value, ir_mode *dest_mode)
 }
 
 /**
- * Creates a Const node representing a constant.
+ * Creates a SymConst node representing a wide string literal.
+ *
+ * @param literal   the wide string literal
  */
-static ir_node *const_to_firm(const const_expression_t *cnst)
+static ir_node *wide_string_literal_to_firm(
+               const string_literal_expression_t *literal)
 {
-       dbg_info *dbgi = get_dbg_info(&cnst->base.source_position);
-       type_t   *type = skip_typeref(cnst->base.type);
-       ir_mode  *mode = get_ir_mode_storage(type);
+       ir_type  *const global_type = get_glob_type();
+       ir_type  *const elem_type   = ir_type_wchar_t;
+       dbg_info *const dbgi        = get_dbg_info(&literal->base.source_position);
+       ir_type  *const type        = new_type_array(1, elem_type);
 
-       char    buf[128];
-       tarval *tv;
-       size_t  len;
-       if (mode_is_float(mode)) {
-               tv = new_tarval_from_double(cnst->v.float_value, mode);
-       } else {
-               if (mode_is_signed(mode)) {
-                       len = snprintf(buf, sizeof(buf), "%lld", cnst->v.int_value);
-               } else {
-                       len = snprintf(buf, sizeof(buf), "%llu",
-                                      (unsigned long long) cnst->v.int_value);
-               }
-               tv = new_tarval_from_str(buf, len, mode);
+       ident     *const id     = id_unique("str.%u");
+       ir_entity *const entity = new_d_entity(global_type, id, type, dbgi);
+       set_entity_ld_ident(entity, id);
+       set_entity_visibility(entity, ir_visibility_private);
+       add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
+
+       ir_mode      *const mode = get_type_mode(elem_type);
+       const size_t        slen = wstrlen(&literal->value);
+
+       set_array_lower_bound_int(type, 0, 0);
+       set_array_upper_bound_int(type, 0, slen);
+       set_type_size_bytes(type, slen * get_mode_size_bytes(mode));
+       set_type_state(type, layout_fixed);
+
+       ir_initializer_t *initializer = create_initializer_compound(slen);
+       const char              *p    = literal->value.begin;
+       for (size_t i = 0; i < slen; ++i) {
+               assert(p < literal->value.begin + literal->value.size);
+               utf32              v  = read_utf8_char(&p);
+               tarval           *tv  = new_tarval_from_long(v, mode);
+               ir_initializer_t *val = create_initializer_tarval(tv);
+               set_initializer_compound_value(initializer, i, val);
        }
+       set_entity_initializer(entity, initializer);
 
-       ir_node *res        = new_d_Const(dbgi, tv);
-       ir_mode *mode_arith = get_ir_mode_arithmetic(type);
-       return create_conv(dbgi, res, mode_arith);
+       return create_symconst(dbgi, mode_P_data, entity);
 }
 
 /**
- * Creates a Const node representing a character constant.
+ * Creates a SymConst node representing a string constant.
+ *
+ * @param src_pos    the source position of the string constant
+ * @param id_prefix  a prefix for the name of the generated string constant
+ * @param value      the value of the string constant
  */
-static ir_node *character_constant_to_firm(const const_expression_t *cnst)
+static ir_node *string_to_firm(const source_position_t *const src_pos,
+                               const char *const id_prefix,
+                               const string_t *const value)
 {
-       dbg_info *dbgi = get_dbg_info(&cnst->base.source_position);
-       ir_mode  *mode = get_ir_mode_arithmetic(cnst->base.type);
+       ir_type  *const global_type = get_glob_type();
+       dbg_info *const dbgi        = get_dbg_info(src_pos);
+       ir_type  *const type        = new_type_array(1, ir_type_const_char);
 
-       long long int v;
-       size_t const  size = cnst->v.character.size;
-       if (size == 1 && char_is_signed) {
-               v = (signed char)cnst->v.character.begin[0];
-       } else {
-               v = 0;
-               for (size_t i = 0; i < size; ++i) {
-                       v = (v << 8) | ((unsigned char)cnst->v.character.begin[i]);
+       ident     *const id     = id_unique(id_prefix);
+       ir_entity *const entity = new_d_entity(global_type, id, type, dbgi);
+       set_entity_ld_ident(entity, id);
+       set_entity_visibility(entity, ir_visibility_private);
+       add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
+
+       ir_type *const elem_type = ir_type_const_char;
+       ir_mode *const mode      = get_type_mode(elem_type);
+
+       const char* const string = value->begin;
+       const size_t      slen   = value->size;
+
+       set_array_lower_bound_int(type, 0, 0);
+       set_array_upper_bound_int(type, 0, slen);
+       set_type_size_bytes(type, slen);
+       set_type_state(type, layout_fixed);
+
+       ir_initializer_t *initializer = create_initializer_compound(slen);
+       for (size_t i = 0; i < slen; ++i) {
+               tarval           *tv  = new_tarval_from_long(string[i], mode);
+               ir_initializer_t *val = create_initializer_tarval(tv);
+               set_initializer_compound_value(initializer, i, val);
+       }
+       set_entity_initializer(entity, initializer);
+
+       return create_symconst(dbgi, mode_P_data, entity);
+}
+
+static bool try_create_integer(literal_expression_t *literal,
+                               type_t *type, unsigned char base)
+{
+       const char *string = literal->value.begin;
+       size_t      size   = literal->value.size;
+
+       assert(type->kind == TYPE_ATOMIC);
+       atomic_type_kind_t akind = type->atomic.akind;
+
+       ir_mode *mode = atomic_modes[akind];
+       tarval  *tv   = new_integer_tarval_from_str(string, size, 1, base, mode);
+       if (tv == tarval_bad)
+               return false;
+
+       literal->base.type    = type;
+       literal->target_value = tv;
+       return true;
+}
+
+static void create_integer_tarval(literal_expression_t *literal)
+{
+       unsigned  us     = 0;
+       unsigned  ls     = 0;
+       symbol_t *suffix = literal->suffix;
+       /* parse suffix */
+       if (suffix != NULL) {
+               for (const char *c = suffix->string; *c != '\0'; ++c) {
+                       if (*c == 'u' || *c == 'U') { ++us; }
+                       if (*c == 'l' || *c == 'L') { ++ls; }
                }
        }
-       char    buf[128];
-       size_t  len = snprintf(buf, sizeof(buf), "%lld", v);
-       tarval *tv = new_tarval_from_str(buf, len, mode);
 
-       return new_d_Const(dbgi, tv);
+       unsigned char base = 10;
+       if (literal->base.kind == EXPR_LITERAL_INTEGER_OCTAL) {
+               base = 8;
+       } else if (literal->base.kind == EXPR_LITERAL_INTEGER_HEXADECIMAL) {
+               base = 16;
+       } else {
+               assert(literal->base.kind == EXPR_LITERAL_INTEGER);
+       }
+
+       tarval_int_overflow_mode_t old_mode = tarval_get_integer_overflow_mode();
+
+       /* now try if the constant is small enough for some types */
+       tarval_set_integer_overflow_mode(TV_OVERFLOW_BAD);
+       if (ls < 1) {
+               if (us == 0 && try_create_integer(literal, type_int, base))
+                       goto finished;
+               if ((us == 1 || base != 10)
+                               && try_create_integer(literal, type_unsigned_int, base))
+                       goto finished;
+       }
+       if (ls < 2) {
+               if (us == 0 && try_create_integer(literal, type_long, base))
+                       goto finished;
+               if ((us == 1 || base != 10)
+                               && try_create_integer(literal, type_unsigned_long, base))
+                       goto finished;
+       }
+       /* last try? then we should not report tarval_bad */
+       if (us != 1 && base == 10)
+               tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
+       if (us == 0 && try_create_integer(literal, type_long_long, base))
+               goto finished;
+
+       /* last try */
+       assert(us == 1 || base != 10);
+       tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
+       bool res = try_create_integer(literal, type_unsigned_long_long, base);
+       if (res == false)
+               panic("internal error when parsing number literal");
+
+finished:
+       tarval_set_integer_overflow_mode(old_mode);
+}
+
+void determine_literal_type(literal_expression_t *literal)
+{
+       switch (literal->base.kind) {
+       case EXPR_LITERAL_INTEGER:
+       case EXPR_LITERAL_INTEGER_OCTAL:
+       case EXPR_LITERAL_INTEGER_HEXADECIMAL:
+               create_integer_tarval(literal);
+               return;
+       default:
+               break;
+       }
 }
 
 /**
- * Creates a Const node representing a wide character constant.
+ * Creates a Const node representing a constant.
  */
-static ir_node *wide_character_constant_to_firm(const const_expression_t *cnst)
+static ir_node *literal_to_firm(const literal_expression_t *literal)
 {
-       dbg_info *dbgi = get_dbg_info(&cnst->base.source_position);
-       ir_mode  *mode = get_ir_mode_arithmetic(cnst->base.type);
+       type_t     *type   = skip_typeref(literal->base.type);
+       ir_mode    *mode   = get_ir_mode_storage(type);
+       const char *string = literal->value.begin;
+       size_t      size   = literal->value.size;
+       tarval     *tv;
 
-       long long int v = cnst->v.wide_character.begin[0];
+       switch (literal->base.kind) {
+       case EXPR_LITERAL_WIDE_CHARACTER: {
+               utf32  v = read_utf8_char(&string);
+               char   buf[128];
+               size_t len = snprintf(buf, sizeof(buf), UTF32_PRINTF_FORMAT, v);
 
-       char    buf[128];
-       size_t  len = snprintf(buf, sizeof(buf), "%lld", v);
-       tarval *tv = new_tarval_from_str(buf, len, mode);
+               tv = new_tarval_from_str(buf, len, mode);
+               goto make_const;
+       }
+       case EXPR_LITERAL_CHARACTER: {
+               long long int v;
+               if (size == 1 && char_is_signed) {
+                       v = (signed char)string[0];
+               } else {
+                       v = 0;
+                       for (size_t i = 0; i < size; ++i) {
+                               v = (v << 8) | ((unsigned char)string[i]);
+                       }
+               }
+               char   buf[128];
+               size_t len = snprintf(buf, sizeof(buf), "%lld", v);
 
-       return new_d_Const(dbgi, tv);
+               tv = new_tarval_from_str(buf, len, mode);
+               goto make_const;
+       }
+       case EXPR_LITERAL_INTEGER:
+       case EXPR_LITERAL_INTEGER_OCTAL:
+       case EXPR_LITERAL_INTEGER_HEXADECIMAL:
+               assert(literal->target_value != NULL);
+               tv = literal->target_value;
+               goto make_const;
+       case EXPR_LITERAL_FLOATINGPOINT:
+               tv = new_tarval_from_str(string, size, mode);
+               goto make_const;
+       case EXPR_LITERAL_FLOATINGPOINT_HEXADECIMAL: {
+               char *buffer = alloca(size + 2);
+               memcpy(buffer, "0x", 2);
+               memcpy(buffer+2, string, size);
+               tv = new_tarval_from_str(buffer, size+2, mode);
+               goto make_const;
+       }
+       case EXPR_LITERAL_BOOLEAN:
+               if (string[0] == 't') {
+                       tv = get_mode_one(mode);
+               } else {
+                       assert(string[0] == 'f');
+                       tv = get_mode_null(mode);
+               }
+               goto make_const;
+       case EXPR_LITERAL_MS_NOOP:
+               tv = get_mode_null(mode);
+               goto make_const;
+       default:
+               break;
+       }
+       panic("Invalid literal kind found");
+
+make_const: ;
+       dbg_info *dbgi       = get_dbg_info(&literal->base.source_position);
+       ir_node  *res        = new_d_Const(dbgi, tv);
+       ir_mode  *mode_arith = get_ir_mode_arithmetic(type);
+       return create_conv(dbgi, res, mode_arith);
 }
 
 /*
@@ -1192,23 +1385,6 @@ static ir_node *get_trampoline_region(dbg_info *dbgi, ir_entity *entity)
                               region);
 }
 
-
-/**
- * Creates a SymConst for a given entity.
- *
- * @param dbgi    debug info
- * @param mode    the (reference) mode for the SymConst
- * @param entity  the entity
- */
-static ir_node *create_symconst(dbg_info *dbgi, ir_mode *mode,
-                                ir_entity *entity)
-{
-       assert(entity != NULL);
-       union symconst_symbol sym;
-       sym.entity_p = entity;
-       return new_d_SymConst(dbgi, mode, sym, symconst_addr_ent);
-}
-
 /**
  * Creates a trampoline for a function represented by an entity.
  *
@@ -1230,101 +1406,6 @@ static ir_node *create_trampoline(dbg_info *dbgi, ir_mode *mode,
        return new_Proj(irn, mode, pn_Builtin_1_result);
 }
 
-/**
- * Creates a SymConst node representing a string constant.
- *
- * @param src_pos    the source position of the string constant
- * @param id_prefix  a prefix for the name of the generated string constant
- * @param value      the value of the string constant
- */
-static ir_node *string_to_firm(const source_position_t *const src_pos,
-                               const char *const id_prefix,
-                               const string_t *const value)
-{
-       ir_type  *const global_type = get_glob_type();
-       dbg_info *const dbgi        = get_dbg_info(src_pos);
-       ir_type  *const type        = new_type_array(1, ir_type_const_char);
-
-       ident     *const id     = id_unique(id_prefix);
-       ir_entity *const entity = new_d_entity(global_type, id, type, dbgi);
-       set_entity_ld_ident(entity, id);
-       set_entity_visibility(entity, ir_visibility_private);
-       add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
-
-       ir_type *const elem_type = ir_type_const_char;
-       ir_mode *const mode      = get_type_mode(elem_type);
-
-       const char* const string = value->begin;
-       const size_t      slen   = value->size;
-
-       set_array_lower_bound_int(type, 0, 0);
-       set_array_upper_bound_int(type, 0, slen);
-       set_type_size_bytes(type, slen);
-       set_type_state(type, layout_fixed);
-
-       ir_initializer_t *initializer = create_initializer_compound(slen);
-       for (size_t i = 0; i < slen; ++i) {
-               tarval           *tv  = new_tarval_from_long(string[i], mode);
-               ir_initializer_t *val = create_initializer_tarval(tv);
-               set_initializer_compound_value(initializer, i, val);
-       }
-       set_entity_initializer(entity, initializer);
-
-       return create_symconst(dbgi, mode_P_data, entity);
-}
-
-/**
- * Creates a SymConst node representing a string literal.
- *
- * @param literal   the string literal
- */
-static ir_node *string_literal_to_firm(
-               const string_literal_expression_t* literal)
-{
-       return string_to_firm(&literal->base.source_position, "str.%u",
-                             &literal->value);
-}
-
-/**
- * Creates a SymConst node representing a wide string literal.
- *
- * @param literal   the wide string literal
- */
-static ir_node *wide_string_literal_to_firm(
-       const wide_string_literal_expression_t* const literal)
-{
-       ir_type *const global_type = get_glob_type();
-       ir_type *const elem_type   = ir_type_wchar_t;
-       dbg_info *const dbgi       = get_dbg_info(&literal->base.source_position);
-       ir_type *const type        = new_type_array(1, elem_type);
-
-       ident     *const id     = id_unique("str.%u");
-       ir_entity *const entity = new_d_entity(global_type, id, type, dbgi);
-       set_entity_ld_ident(entity, id);
-       set_entity_visibility(entity, ir_visibility_private);
-       add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
-
-       ir_mode *const mode      = get_type_mode(elem_type);
-
-       const wchar_rep_t *const string = literal->value.begin;
-       const size_t             slen   = literal->value.size;
-
-       set_array_lower_bound_int(type, 0, 0);
-       set_array_upper_bound_int(type, 0, slen);
-       set_type_size_bytes(type, slen * get_mode_size_bytes(mode));
-       set_type_state(type, layout_fixed);
-
-       ir_initializer_t *initializer = create_initializer_compound(slen);
-       for (size_t i = 0; i < slen; ++i) {
-               tarval           *tv  = new_tarval_from_long(string[i], mode);
-               ir_initializer_t *val = create_initializer_tarval(tv);
-               set_initializer_compound_value(initializer, i, val);
-       }
-       set_entity_initializer(entity, initializer);
-
-       return create_symconst(dbgi, mode_P_data, entity);
-}
-
 /**
  * Dereference an address.
  *
@@ -3449,16 +3530,13 @@ static ir_node *_expression_to_firm(const expression_t *expression)
 #endif
 
        switch (expression->kind) {
-       case EXPR_CHARACTER_CONSTANT:
-               return character_constant_to_firm(&expression->conste);
-       case EXPR_WIDE_CHARACTER_CONSTANT:
-               return wide_character_constant_to_firm(&expression->conste);
-       case EXPR_CONST:
-               return const_to_firm(&expression->conste);
+       EXPR_LITERAL_CASES
+               return literal_to_firm(&expression->literal);
        case EXPR_STRING_LITERAL:
-               return string_literal_to_firm(&expression->string);
+               return string_to_firm(&expression->base.source_position, "str.%u",
+                                     &expression->literal.value);
        case EXPR_WIDE_STRING_LITERAL:
-               return wide_string_literal_to_firm(&expression->wide_string);
+               return wide_string_literal_to_firm(&expression->string_literal);
        case EXPR_REFERENCE:
                return reference_expression_to_firm(&expression->reference);
        case EXPR_REFERENCE_ENUM_VALUE:
@@ -4084,19 +4162,19 @@ static ir_initializer_t *create_ir_initializer_string(
 static ir_initializer_t *create_ir_initializer_wide_string(
                const initializer_wide_string_t *initializer, type_t *type)
 {
-       size_t            string_len    = initializer->string.size;
        assert(type->kind == TYPE_ARRAY);
        assert(type->array.size_constant);
        size_t            len           = type->array.size;
+       size_t            string_len    = wstrlen(&initializer->string);
        ir_initializer_t *irinitializer = create_initializer_compound(len);
 
-       const wchar_rep_t *string = initializer->string.begin;
-       ir_mode           *mode   = get_type_mode(ir_type_wchar_t);
+       const char *p    = initializer->string.begin;
+       ir_mode    *mode = get_type_mode(ir_type_wchar_t);
 
        for (size_t i = 0; i < len; ++i) {
-               wchar_rep_t c = 0;
+               utf32 c = 0;
                if (i < string_len) {
-                       c = string[i];
+                       c = read_utf8_char(&p);
                }
                tarval *tv = new_tarval_from_long(c, mode);
                ir_initializer_t *char_initializer = create_initializer_tarval(tv);
@@ -5981,6 +6059,9 @@ static void global_asm_to_firm(statement_t *s)
 
 void translation_unit_to_firm(translation_unit_t *unit)
 {
+       /* initialize firm arithmetic */
+       tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
+
        /* just to be sure */
        continue_label           = NULL;
        break_label              = NULL;
diff --git a/ast_t.h b/ast_t.h
index e1611fb..2e389c0 100644 (file)
--- a/ast_t.h
+++ b/ast_t.h
@@ -29,6 +29,7 @@
 #include "type.h"
 #include "entity_t.h"
 #include "adt/obst.h"
+#include "target_value.h"
 
 /** The AST obstack contains all data that must stay in the AST. */
 extern struct obstack ast_obstack;
@@ -66,9 +67,15 @@ typedef enum expression_kind_t {
        EXPR_INVALID,
        EXPR_REFERENCE,
        EXPR_REFERENCE_ENUM_VALUE,
-       EXPR_CONST,
-       EXPR_CHARACTER_CONSTANT,
-       EXPR_WIDE_CHARACTER_CONSTANT,
+       EXPR_LITERAL_BOOLEAN,
+       EXPR_LITERAL_INTEGER,
+       EXPR_LITERAL_INTEGER_OCTAL,
+       EXPR_LITERAL_INTEGER_HEXADECIMAL,
+       EXPR_LITERAL_FLOATINGPOINT,
+       EXPR_LITERAL_FLOATINGPOINT_HEXADECIMAL,
+       EXPR_LITERAL_CHARACTER,
+       EXPR_LITERAL_WIDE_CHARACTER,
+       EXPR_LITERAL_MS_NOOP, /**< MS __noop extension */
        EXPR_STRING_LITERAL,
        EXPR_WIDE_STRING_LITERAL,
        EXPR_COMPOUND_LITERAL,
@@ -226,6 +233,17 @@ typedef enum funcname_kind_t {
        EXPR_UNARY_CASES_MANDATORY \
        EXPR_UNARY_CASES_OPTIONAL
 
+#define EXPR_LITERAL_CASES                        \
+       case EXPR_LITERAL_BOOLEAN:                    \
+       case EXPR_LITERAL_INTEGER:                    \
+       case EXPR_LITERAL_INTEGER_OCTAL:              \
+       case EXPR_LITERAL_INTEGER_HEXADECIMAL:        \
+       case EXPR_LITERAL_FLOATINGPOINT:              \
+       case EXPR_LITERAL_FLOATINGPOINT_HEXADECIMAL:  \
+       case EXPR_LITERAL_CHARACTER:                  \
+       case EXPR_LITERAL_WIDE_CHARACTER:             \
+       case EXPR_LITERAL_MS_NOOP:
+
 /**
  * The base class of every expression.
  */
@@ -240,18 +258,15 @@ struct expression_base_t {
 };
 
 /**
- * A constant.
+ * integer/float constants, character and string literals
  */
-struct const_expression_t {
+struct literal_expression_t {
        expression_base_t  base;
-       union {
-               long long      int_value;
-               long double    float_value;
-               string_t       character;
-               wide_string_t  wide_character;
-       } v;
-       bool               is_ms_noop;  /**< True, if this constant is the result
-                                            of an microsoft __noop operator */
+       string_t           value;
+       symbol_t          *suffix;
+
+       /* ast2firm data */
+       tarval            *target_value;
 };
 
 struct string_literal_expression_t {
@@ -265,11 +280,6 @@ struct funcname_expression_t {
        string_t           value;     /**< the value once assigned. */
 };
 
-struct wide_string_literal_expression_t {
-       expression_base_t  base;
-       wide_string_t      value;
-};
-
 struct compound_literal_expression_t {
        expression_base_t  base;
        type_t            *type;
@@ -395,10 +405,9 @@ struct label_address_expression_t {
 union expression_t {
        expression_kind_t                     kind;
        expression_base_t                     base;
-       const_expression_t                    conste;
+       literal_expression_t                  literal;
+       string_literal_expression_t           string_literal;
        funcname_expression_t                 funcname;
-       string_literal_expression_t           string;
-       wide_string_literal_expression_t      wide_string;
        compound_literal_expression_t         compound_literal;
        builtin_constant_expression_t         builtin_constant;
        builtin_types_compatible_expression_t builtin_types_compatible;
@@ -449,7 +458,7 @@ struct initializer_string_t {
 
 struct initializer_wide_string_t {
        initializer_base_t  base;
-       wide_string_t       string;
+       string_t            string;
 };
 
 struct initializer_designator_t {
index eca3f61..e709e61 100644 (file)
@@ -420,7 +420,7 @@ const char *get_deprecated_string(const attribute_t *attribute)
                expression_t *expression = argument->v.expression;
                if (expression->kind != EXPR_STRING_LITERAL)
                        return NULL;
-               return expression->string.value.begin;
+               return expression->literal.value.begin;
        }
        return NULL;
 }
index ad81055..0b92ae5 100644 (file)
@@ -67,12 +67,6 @@ static void diagnosticvf(const char *const fmt, va_list ap)
                                        fputc(*f, stderr);
                                        break;
 
-                               case 'C': {
-                                       const wint_t val = va_arg(ap, wint_t);
-                                       fprintf(stderr, "%lc", val);
-                                       break;
-                               }
-
                                case 'c': {
                                        const unsigned char val = (unsigned char) va_arg(ap, int);
                                        fputc(val, stderr);
@@ -91,6 +85,14 @@ static void diagnosticvf(const char *const fmt, va_list ap)
                                        break;
                                }
 
+                               case 'S': {
+                                       const string_t *str = va_arg(ap, const string_t*);
+                                       for (size_t i = 0; i < str->size; ++i) {
+                                               fputc(str->begin[i], stderr);
+                                       }
+                                       break;
+                               }
+
                                case 'u': {
                                        const unsigned int val = va_arg(ap, unsigned int);
                                        fprintf(stderr, "%u", val);
index 4c12e4c..113481c 100644 (file)
@@ -51,7 +51,7 @@
  *  %K  const token_t*
  *  %k  token_kind_t
  *  %P  const source_position_t *
- *
+ *  %S  const string_t *
  */
 void diagnosticf(const char *fmt, ...);
 void errorf(const source_position_t *pos, const char *fmt, ...);
index 7c8304e..7609018 100644 (file)
@@ -17,8 +17,9 @@
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
  * 02111-1307, USA.
  */
+#include <config.h>
+
 #include <ctype.h>
-#include <wctype.h>
 
 #include "adt/util.h"
 #include "format_check.h"
@@ -93,7 +94,7 @@ static const char* get_length_modifier_name(const format_length_modifier_t mod)
 
 static void warn_invalid_length_modifier(const source_position_t *pos,
                                          const format_length_modifier_t mod,
-                                         const wchar_rep_t conversion)
+                                         const utf32 conversion)
 {
        warningf(pos,
                "invalid length modifier '%s' for conversion specifier '%%%c'",
@@ -101,113 +102,51 @@ static void warn_invalid_length_modifier(const source_position_t *pos,
        );
 }
 
-typedef struct vchar_t vchar_t;
-struct vchar_t {
-       const void *string;   /**< the string */
-       size_t     position;  /**< current position */
-       size_t     size;      /**< size of the string */
-
-       /** return the first character of the string and setthe position to 0. */
-       unsigned (*first)(vchar_t *self);
-       /** return the next character of the string */
-       unsigned (*next)(vchar_t *self);
-       /** return non_zero if the given character is a digit */
-       int (*is_digit)(unsigned vchar);
-};
-
-static unsigned string_first(vchar_t *self)
-{
-       self->position = 0;
-       const string_t *string = self->string;
-       return string->begin[0];
-}
-
-static unsigned string_next(vchar_t *self)
-{
-       ++self->position;
-       const string_t *string = self->string;
-       return string->begin[self->position];
-}
-
-static int string_isdigit(unsigned vchar)
-{
-       return isdigit(vchar);
-}
-
-static unsigned wstring_first(vchar_t *self)
-{
-       self->position = 0;
-       const wide_string_t *wstring = self->string;
-       return wstring->begin[0];
-}
-
-static unsigned wstring_next(vchar_t *self)
-{
-       ++self->position;
-       const wide_string_t *wstring = self->string;
-       return wstring->begin[self->position];
-}
-
-static int wstring_isdigit(unsigned vchar)
-{
-       return iswdigit(vchar);
-}
-
-static bool atend(vchar_t *self)
-{
-       return self->position + 1 == self->size;
-}
-
 /**
  * Check printf-style format.
  */
 static int internal_check_printf_format(const expression_t *fmt_expr,
-    const call_argument_t *arg, const format_spec_t *spec)
+                                        const call_argument_t *arg,
+                                        const format_spec_t *spec)
 {
-       if (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) {
+       while (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) {
                fmt_expr = fmt_expr->unary.value;
        }
 
-       vchar_t vchar;
-       switch (fmt_expr->kind) {
-               case EXPR_STRING_LITERAL:
-                       vchar.string   = &fmt_expr->string.value;
-                       vchar.size     = fmt_expr->string.value.size;
-                       vchar.first    = string_first;
-                       vchar.next     = string_next;
-                       vchar.is_digit = string_isdigit;
-                       break;
-
-               case EXPR_WIDE_STRING_LITERAL:
-                       vchar.string   = &fmt_expr->wide_string.value;
-                       vchar.size     = fmt_expr->wide_string.value.size;
-                       vchar.first    = wstring_first;
-                       vchar.next     = wstring_next;
-                       vchar.is_digit = wstring_isdigit;
-                       break;
+       /*
+        * gettext results in expressions like (X ? "format_string" : Y)
+        * we assume the left part is the format string
+        */
+       if (fmt_expr->kind == EXPR_CONDITIONAL) {
+               conditional_expression_t const *const c = &fmt_expr->conditional;
+               expression_t             const *      t = c->true_expression;
+               if (t == NULL)
+                       t = c->condition;
+               int const nt = internal_check_printf_format(t,                   arg, spec);
+               int const nf = internal_check_printf_format(c->false_expression, arg, spec);
+               return nt > nf ? nt : nf;
+       }
 
-               case EXPR_CONDITIONAL: {
-                       conditional_expression_t const *const c = &fmt_expr->conditional;
-                       expression_t             const *      t = c->true_expression;
-                       if (t == NULL)
-                               t = c->condition;
-                       int const nt = internal_check_printf_format(t,                   arg, spec);
-                       int const nf = internal_check_printf_format(c->false_expression, arg, spec);
-                       return nt > nf ? nt : nf;
-               }
+       if (fmt_expr->kind != EXPR_STRING_LITERAL
+                       && fmt_expr->kind != EXPR_WIDE_STRING_LITERAL)
+               return -1;
 
-               default:
-                       return -1;
-       }
+       const char *string = fmt_expr->literal.value.begin;
+       size_t      size   = fmt_expr->literal.value.size;
+       const char *c      = string;
 
        const source_position_t *pos = &fmt_expr->base.source_position;
-       unsigned fmt     = vchar.first(&vchar);
        unsigned num_fmt = 0;
-       for (; fmt != '\0'; fmt = vchar.next(&vchar)) {
+       char     fmt;
+       for (fmt = *c; fmt != '\0'; fmt = *(++c)) {
                if (fmt != '%')
                        continue;
-               fmt = vchar.next(&vchar);
+               fmt = *(++c);
 
+               if (fmt == '\0') {
+                       warningf(pos, "dangling %% in format string");
+                       break;
+               }
                if (fmt == '%')
                        continue;
 
@@ -215,15 +154,15 @@ static int internal_check_printf_format(const expression_t *fmt_expr,
 
                format_flags_t fmt_flags = FMT_FLAG_NONE;
                if (fmt == '0') {
-                       fmt = vchar.next(&vchar);
+                       fmt = *(++c);
                        fmt_flags |= FMT_FLAG_ZERO;
                }
 
                /* argument selector or minimum field width */
-               if (vchar.is_digit(fmt)) {
+               if (isdigit(fmt)) {
                        do {
-                               fmt = vchar.next(&vchar);
-                       } while (vchar.is_digit(fmt));
+                               fmt = *(++c);
+                       } while (isdigit(fmt));
 
                        /* digit string was ... */
                        if (fmt == '$') {
@@ -263,13 +202,13 @@ static int internal_check_printf_format(const expression_t *fmt_expr,
                                        warningf(pos, "repeated flag '%c' in conversion specification %u", (char)fmt, num_fmt);
                                }
                                fmt_flags |= flag;
-                               fmt = vchar.next(&vchar);
+                               fmt = *(++c);
                        }
 break_fmt_flags:
 
                        /* minimum field width */
                        if (fmt == '*') {
-                               fmt = vchar.next(&vchar);
+                               fmt = *(++c);
                                if (arg == NULL) {
                                        warningf(pos, "missing argument for '*' field width in conversion specification %u", num_fmt);
                                        return -1;
@@ -280,17 +219,17 @@ break_fmt_flags:
                                }
                                arg = arg->next;
                        } else {
-                               while (vchar.is_digit(fmt)) {
-                                       fmt = vchar.next(&vchar);
+                               while (isdigit(fmt)) {
+                                       fmt = *(++c);
                                }
                        }
                }
 
                /* precision */
                if (fmt == '.') {
-                       fmt = vchar.next(&vchar);
+                       fmt = *(++c);
                        if (fmt == '*') {
-                               fmt = vchar.next(&vchar);
+                               fmt = *(++c);
                                if (arg == NULL) {
                                        warningf(pos, "missing argument for '*' precision in conversion specification %u", num_fmt);
                                        return -1;
@@ -302,8 +241,8 @@ break_fmt_flags:
                                arg = arg->next;
                        } else {
                                /* digit string may be omitted */
-                               while (vchar.is_digit(fmt)) {
-                                       fmt = vchar.next(&vchar);
+                               while (isdigit(fmt)) {
+                                       fmt = *(++c);
                                }
                        }
                }
@@ -312,9 +251,9 @@ break_fmt_flags:
                format_length_modifier_t fmt_mod;
                switch (fmt) {
                        case 'h':
-                               fmt = vchar.next(&vchar);
+                               fmt = *(++c);
                                if (fmt == 'h') {
-                                       fmt = vchar.next(&vchar);
+                                       fmt = *(++c);
                                        fmt_mod = FMT_MOD_hh;
                                } else {
                                        fmt_mod = FMT_MOD_h;
@@ -322,48 +261,48 @@ break_fmt_flags:
                                break;
 
                        case 'l':
-                               fmt = vchar.next(&vchar);
+                               fmt = *(++c);
                                if (fmt == 'l') {
-                                       fmt = vchar.next(&vchar);
+                                       fmt = *(++c);
                                        fmt_mod = FMT_MOD_ll;
                                } else {
                                        fmt_mod = FMT_MOD_l;
                                }
                                break;
 
-                       case 'L': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_L;    break;
-                       case 'j': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_j;    break;
-                       case 't': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_t;    break;
-                       case 'z': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_z;    break;
-                       case 'q': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_q;    break;
+                       case 'L': fmt = *(++c); fmt_mod = FMT_MOD_L;    break;
+                       case 'j': fmt = *(++c); fmt_mod = FMT_MOD_j;    break;
+                       case 't': fmt = *(++c); fmt_mod = FMT_MOD_t;    break;
+                       case 'z': fmt = *(++c); fmt_mod = FMT_MOD_z;    break;
+                       case 'q': fmt = *(++c); fmt_mod = FMT_MOD_q;    break;
                        /* microsoft mode */
                        case 'w':
                                if (c_mode & _MS) {
-                                       fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_w;
+                                       fmt = *(++c); fmt_mod = FMT_MOD_w;
                                } else {
                                        fmt_mod = FMT_MOD_NONE;
                                }
                                break;
                        case 'I':
                                if (c_mode & _MS) {
-                                       fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_I;
+                                       fmt = *(++c); fmt_mod = FMT_MOD_I;
                                        if (fmt == '3') {
-                                               fmt = vchar.next(&vchar);
+                                               fmt = *(++c);
                                                if (fmt == '2') {
-                                                       fmt = vchar.next(&vchar);
+                                                       fmt = *(++c);
                                                        fmt_mod = FMT_MOD_I32;
                                                } else {
                                                        /* rewind */
-                                                       --vchar.position;
+                                                       fmt = *(--c);
                                                }
                                        } else if (fmt == '6') {
-                                               fmt = vchar.next(&vchar);
+                                               fmt = *(++c);
                                                if (fmt == '4') {
-                                                       fmt = vchar.next(&vchar);
+                                                       fmt = *(++c);
                                                        fmt_mod = FMT_MOD_I64;
                                                } else {
                                                        /* rewind */
-                                                       --vchar.position;
+                                                       fmt = *(--c);
                                                }
                                        }
                                } else {
@@ -375,10 +314,6 @@ break_fmt_flags:
                                break;
                }
 
-               if (fmt == '\0') {
-                       warningf(pos, "dangling %% in format string");
-                       break;
-               }
 
                type_t            *expected_type;
                type_qualifiers_t  expected_qual = TYPE_QUALIFIER_NONE;
@@ -529,7 +464,7 @@ eval_fmt_mod_unsigned:
                                break;
 
                        default:
-                               warningf(pos, "encountered unknown conversion specifier '%%%C' at position %u", (wint_t)fmt, num_fmt);
+                               warningf(pos, "encountered unknown conversion specifier '%%%c' at position %u", fmt, num_fmt);
                                if (arg == NULL) {
                                        warningf(pos, "too few arguments for format string");
                                        return -1;
@@ -577,10 +512,8 @@ eval_fmt_mod_unsigned:
                                                goto next_arg;
                                        }
                                }
-                       } else {
-                               if (get_unqualified_type(arg_skip) == expected_type_skip) {
-                                       goto next_arg;
-                               }
+                       } else if (get_unqualified_type(arg_skip) == expected_type_skip) {
+                               goto next_arg;
                        }
                        if (is_type_valid(arg_skip)) {
                                warningf(pos,
@@ -591,7 +524,8 @@ eval_fmt_mod_unsigned:
 next_arg:
                arg = arg->next;
        }
-       if (!atend(&vchar)) {
+       assert(fmt == '\0');
+       if (c+1 < string + size) {
                warningf(pos, "format string contains '\\0'");
        }
        return num_fmt;
@@ -600,7 +534,8 @@ next_arg:
 /**
  * Check printf-style format.
  */
-static void check_printf_format(call_argument_t const *arg, format_spec_t const *const spec)
+static void check_printf_format(call_argument_t const *arg,
+                                format_spec_t const *const spec)
 {
        /* find format arg */
        size_t idx = 0;
@@ -625,16 +560,17 @@ static void check_printf_format(call_argument_t const *arg, format_spec_t const
                ++num_args;
        if (num_args > (size_t)num_fmt) {
                warningf(&fmt_expr->base.source_position,
-                       "%u argument%s but only %u format specifier%s",
-                       num_args, num_args != 1 ? "s" : "",
-                       num_fmt,  num_fmt  != 1 ? "s" : "");
+                        "%u argument%s but only %u format specifier%s",
+                        num_args, num_args != 1 ? "s" : "",
+                        num_fmt,  num_fmt  != 1 ? "s" : "");
        }
 }
 
 /**
  * Check scanf-style format.
  */
-static void check_scanf_format(const call_argument_t *arg, const format_spec_t *spec)
+static void check_scanf_format(const call_argument_t *arg,
+                               const format_spec_t *spec)
 {
        /* find format arg */
        unsigned idx = 0;
@@ -649,252 +585,240 @@ static void check_scanf_format(const call_argument_t *arg, const format_spec_t *
                fmt_expr = fmt_expr->unary.value;
        }
 
-       vchar_t vchar;
-       if (fmt_expr->kind == EXPR_WIDE_STRING_LITERAL) {
-               vchar.string   = &fmt_expr->wide_string.value;
-               vchar.size     = fmt_expr->wide_string.value.size;
-               vchar.first    = wstring_first;
-               vchar.next     = wstring_next;
-               vchar.is_digit = wstring_isdigit;
-       } else if (fmt_expr->kind == EXPR_STRING_LITERAL) {
-               vchar.string   = &fmt_expr->string.value;
-               vchar.size     = fmt_expr->string.value.size;
-               vchar.first    = string_first;
-               vchar.next     = string_next;
-               vchar.is_digit = string_isdigit;
-       } else {
+       if (fmt_expr->kind != EXPR_STRING_LITERAL
+                       && fmt_expr->kind != EXPR_WIDE_STRING_LITERAL)
                return;
-       }
+
+       const char *string = fmt_expr->literal.value.begin;
+       size_t      size   = fmt_expr->literal.value.size;
+       const char *c      = string;
+
        /* find the real args */
        for (; idx < spec->arg_idx && arg != NULL; ++idx)
                arg = arg->next;
 
        const source_position_t *pos = &fmt_expr->base.source_position;
-       unsigned fmt     = vchar.first(&vchar);
        unsigned num_fmt = 0;
-       for (; fmt != '\0'; fmt = vchar.next(&vchar)) {
+       char     fmt;
+       for (fmt = *c; fmt != '\0'; fmt = *(++c)) {
                if (fmt != '%')
                        continue;
-               fmt = vchar.next(&vchar);
-
+               fmt = *(++c);
+               if (fmt == '\0') {
+                       warningf(pos, "dangling '%%' in format string");
+                       break;
+               }
                if (fmt == '%')
                        continue;
 
                ++num_fmt;
 
-               /* length modifier */
-               format_length_modifier_t fmt_mod;
+               /* look for length modifiers */
+               format_length_modifier_t fmt_mod = FMT_MOD_NONE;
                switch (fmt) {
-                       case 'h':
-                               fmt = vchar.next(&vchar);
-                               if (fmt == 'h') {
-                                       fmt = vchar.next(&vchar);
-                                       fmt_mod = FMT_MOD_hh;
-                               } else {
-                                       fmt_mod = FMT_MOD_h;
-                               }
-                               break;
+               case 'h':
+                       fmt = *(++c);
+                       if (fmt == 'h') {
+                               fmt = *(++c);
+                               fmt_mod = FMT_MOD_hh;
+                       } else {
+                               fmt_mod = FMT_MOD_h;
+                       }
+                       break;
 
-                       case 'l':
-                               fmt = vchar.next(&vchar);
-                               if (fmt == 'l') {
-                                       fmt = vchar.next(&vchar);
-                                       fmt_mod = FMT_MOD_ll;
-                               } else {
-                                       fmt_mod = FMT_MOD_l;
-                               }
-                               break;
+               case 'l':
+                       fmt = *(++c);
+                       if (fmt == 'l') {
+                               fmt = *(++c);
+                               fmt_mod = FMT_MOD_ll;
+                       } else {
+                               fmt_mod = FMT_MOD_l;
+                       }
+                       break;
 
-                       case 'L': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_L;    break;
-                       case 'j': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_j;    break;
-                       case 't': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_t;    break;
-                       case 'z': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_z;    break;
-                       /* microsoft mode */
-                       case 'w':
-                               if (c_mode & _MS) {
-                                       fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_w;
-                               } else {
-                                       fmt_mod = FMT_MOD_NONE;
-                               }
-                               break;
-                       case 'I':
-                               if (c_mode & _MS) {
-                                       fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_I;
-                                       if (fmt == '3') {
-                                               fmt = vchar.next(&vchar);
-                                               if (fmt == '2') {
-                                                       fmt = vchar.next(&vchar);
-                                                       fmt_mod = FMT_MOD_I32;
-                                               } else {
-                                                       /* rewind */
-                                                       --vchar.position;
-                                               }
-                                       } else if (fmt == '6') {
-                                               fmt = vchar.next(&vchar);
-                                               if (fmt == '4') {
-                                                       fmt = vchar.next(&vchar);
-                                                       fmt_mod = FMT_MOD_I64;
-                                               } else {
-                                                       /* rewind */
-                                                       --vchar.position;
-                                               }
+               case 'L': fmt = *(++c); fmt_mod = FMT_MOD_L; break;
+               case 'j': fmt = *(++c); fmt_mod = FMT_MOD_j; break;
+               case 't': fmt = *(++c); fmt_mod = FMT_MOD_t; break;
+               case 'z': fmt = *(++c); fmt_mod = FMT_MOD_z; break;
+               /* microsoft mode */
+               case 'w':
+                       if (c_mode & _MS) {
+                               fmt = *(++c);
+                               fmt_mod = FMT_MOD_w;
+                       }
+                       break;
+               case 'I':
+                       if (c_mode & _MS) {
+                               fmt = *(++c);
+                               fmt_mod = FMT_MOD_I;
+                               if (fmt == '3') {
+                                       fmt = *(++c);
+                                       if (fmt == '2') {
+                                               fmt = *(++c);
+                                               fmt_mod = FMT_MOD_I32;
+                                       } else {
+                                               /* rewind */
+                                               fmt = *(--c);
+                                       }
+                               } else if (fmt == '6') {
+                                       fmt = *(++c);
+                                       if (fmt == '4') {
+                                               fmt = *(++c);
+                                               fmt_mod = FMT_MOD_I64;
+                                       } else {
+                                               /* rewind */
+                                               fmt = *(--c);
                                        }
-                               } else {
-                                       fmt_mod = FMT_MOD_NONE;
                                }
-                               break;
-                       default:
-                               fmt_mod = FMT_MOD_NONE;
-                               break;
+                       }
+                       break;
                }
 
                if (fmt == '\0') {
-                       warningf(pos, "dangling %% in format string");
+                       warningf(pos, "dangling % with conversion specififer in format string");
                        break;
                }
 
-               type_t            *expected_type;
+               type_t *expected_type;
                switch (fmt) {
-                       case 'd':
-                       case 'i':
-                               switch (fmt_mod) {
-                                       case FMT_MOD_NONE: expected_type = type_int;         break;
-                                       case FMT_MOD_hh:   expected_type = type_signed_char; break;
-                                       case FMT_MOD_h:    expected_type = type_short;       break;
-                                       case FMT_MOD_l:    expected_type = type_long;        break;
-                                       case FMT_MOD_ll:   expected_type = type_long_long;   break;
-                                       case FMT_MOD_j:    expected_type = type_intmax_t;    break;
-                                       case FMT_MOD_z:    expected_type = type_ssize_t;     break;
-                                       case FMT_MOD_t:    expected_type = type_ptrdiff_t;   break;
-                                       case FMT_MOD_I:    expected_type = type_ptrdiff_t;   break;
-                                       case FMT_MOD_I32:  expected_type = type_int32;       break;
-                                       case FMT_MOD_I64:  expected_type = type_int64;       break;
+               case 'd':
+               case 'i':
+                       switch (fmt_mod) {
+                       case FMT_MOD_NONE: expected_type = type_int;         break;
+                       case FMT_MOD_hh:   expected_type = type_signed_char; break;
+                       case FMT_MOD_h:    expected_type = type_short;       break;
+                       case FMT_MOD_l:    expected_type = type_long;        break;
+                       case FMT_MOD_ll:   expected_type = type_long_long;   break;
+                       case FMT_MOD_j:    expected_type = type_intmax_t;    break;
+                       case FMT_MOD_z:    expected_type = type_ssize_t;     break;
+                       case FMT_MOD_t:    expected_type = type_ptrdiff_t;   break;
+                       case FMT_MOD_I:    expected_type = type_ptrdiff_t;   break;
+                       case FMT_MOD_I32:  expected_type = type_int32;       break;
+                       case FMT_MOD_I64:  expected_type = type_int64;       break;
 
-                                       default:
-                                               warn_invalid_length_modifier(pos, fmt_mod, fmt);
-                                               goto next_arg;
-                               }
-                               break;
-
-                       case 'o':
-                       case 'X':
-                       case 'x':
-                               goto eval_fmt_mod_unsigned;
-
-                       case 'u':
-eval_fmt_mod_unsigned:
-                               switch (fmt_mod) {
-                                       case FMT_MOD_NONE: expected_type = type_unsigned_int;       break;
-                                       case FMT_MOD_hh:   expected_type = type_unsigned_char;      break;
-                                       case FMT_MOD_h:    expected_type = type_unsigned_short;     break;
-                                       case FMT_MOD_l:    expected_type = type_unsigned_long;      break;
-                                       case FMT_MOD_ll:   expected_type = type_unsigned_long_long; break;
-                                       case FMT_MOD_j:    expected_type = type_uintmax_t;          break;
-                                       case FMT_MOD_z:    expected_type = type_size_t;             break;
-                                       case FMT_MOD_t:    expected_type = type_uptrdiff_t;         break;
-                                       case FMT_MOD_I:    expected_type = type_size_t;             break;
-                                       case FMT_MOD_I32:  expected_type = type_unsigned_int32;     break;
-                                       case FMT_MOD_I64:  expected_type = type_unsigned_int64;     break;
+                       default:
+                               warn_invalid_length_modifier(pos, fmt_mod, fmt);
+                               goto next_arg;
+                       }
+                       break;
 
-                                       default:
-                                               warn_invalid_length_modifier(pos, fmt_mod, fmt);
-                                               goto next_arg;
-                               }
-                               break;
+               case 'o':
+               case 'X':
+               case 'x':
+               case 'u':
+                       switch (fmt_mod) {
+                       case FMT_MOD_NONE: expected_type = type_unsigned_int;       break;
+                       case FMT_MOD_hh:   expected_type = type_unsigned_char;      break;
+                       case FMT_MOD_h:    expected_type = type_unsigned_short;     break;
+                       case FMT_MOD_l:    expected_type = type_unsigned_long;      break;
+                       case FMT_MOD_ll:   expected_type = type_unsigned_long_long; break;
+                       case FMT_MOD_j:    expected_type = type_uintmax_t;          break;
+                       case FMT_MOD_z:    expected_type = type_size_t;             break;
+                       case FMT_MOD_t:    expected_type = type_uptrdiff_t;         break;
+                       case FMT_MOD_I:    expected_type = type_size_t;             break;
+                       case FMT_MOD_I32:  expected_type = type_unsigned_int32;     break;
+                       case FMT_MOD_I64:  expected_type = type_unsigned_int64;     break;
 
-                       case 'A':
-                       case 'a':
-                       case 'E':
-                       case 'e':
-                       case 'F':
-                       case 'f':
-                       case 'G':
-                       case 'g':
-                               switch (fmt_mod) {
-                                       case FMT_MOD_l:    /* l modifier is ignored */
-                                       case FMT_MOD_NONE: expected_type = type_double;      break;
-                                       case FMT_MOD_L:    expected_type = type_long_double; break;
+                       default:
+                               warn_invalid_length_modifier(pos, fmt_mod, fmt);
+                               goto next_arg;
+                       }
+                       break;
 
-                                       default:
-                                               warn_invalid_length_modifier(pos, fmt_mod, fmt);
-                                               goto next_arg;
-                               }
-                               break;
+               case 'A':
+               case 'a':
+               case 'E':
+               case 'e':
+               case 'F':
+               case 'f':
+               case 'G':
+               case 'g':
+                       switch (fmt_mod) {
+                       case FMT_MOD_l:    expected_type = type_double;      break;
+                       case FMT_MOD_NONE: expected_type = type_float;       break;
+                       case FMT_MOD_L:    expected_type = type_long_double; break;
 
-                       case 'C':
-                               if (fmt_mod != FMT_MOD_NONE) {
-                                       warn_invalid_length_modifier(pos, fmt_mod, fmt);
-                                       goto next_arg;
-                               }
-                               expected_type = type_wchar_t;
-                               break;
+                       default:
+                               warn_invalid_length_modifier(pos, fmt_mod, fmt);
+                               goto next_arg;
+                       }
+                       break;
 
-                       case 'c':
-                               expected_type = type_int;
-                               switch (fmt_mod) {
-                                       case FMT_MOD_NONE: expected_type = type_int;     break; /* TODO promoted char */
-                                       case FMT_MOD_l:    expected_type = type_wint_t;  break;
-                                       case FMT_MOD_w:    expected_type = type_wchar_t; break;
+               case 'C':
+                       if (fmt_mod != FMT_MOD_NONE) {
+                               warn_invalid_length_modifier(pos, fmt_mod, fmt);
+                               goto next_arg;
+                       }
+                       expected_type = type_wchar_t;
+                       break;
 
-                                       default:
-                                               warn_invalid_length_modifier(pos, fmt_mod, fmt);
-                                               goto next_arg;
-                               }
-                               break;
+               case 'c':
+                       expected_type = type_int;
+                       switch (fmt_mod) {
+                       case FMT_MOD_NONE: expected_type = type_int;     break; /* TODO promoted char */
+                       case FMT_MOD_l:    expected_type = type_wint_t;  break;
+                       case FMT_MOD_w:    expected_type = type_wchar_t; break;
 
-                       case 'S':
-                               if (fmt_mod != FMT_MOD_NONE) {
-                                       warn_invalid_length_modifier(pos, fmt_mod, fmt);
-                                       goto next_arg;
-                               }
-                               expected_type = type_wchar_t;
-                               break;
+                       default:
+                               warn_invalid_length_modifier(pos, fmt_mod, fmt);
+                               goto next_arg;
+                       }
+                       break;
 
-                       case 's':
-                       case '[':
-                               switch (fmt_mod) {
-                                       case FMT_MOD_NONE: expected_type = type_char;    break;
-                                       case FMT_MOD_l:    expected_type = type_wchar_t; break;
-                                       case FMT_MOD_w:    expected_type = type_wchar_t; break;
+               case 'S':
+                       if (fmt_mod != FMT_MOD_NONE) {
+                               warn_invalid_length_modifier(pos, fmt_mod, fmt);
+                               goto next_arg;
+                       }
+                       expected_type = type_wchar_t;
+                       break;
 
-                                       default:
-                                               warn_invalid_length_modifier(pos, fmt_mod, fmt);
-                                               goto next_arg;
-                               }
-                               break;
+               case 's':
+               case '[':
+                       switch (fmt_mod) {
+                               case FMT_MOD_NONE: expected_type = type_char;    break;
+                               case FMT_MOD_l:    expected_type = type_wchar_t; break;
+                               case FMT_MOD_w:    expected_type = type_wchar_t; break;
 
-                       case 'p':
-                               if (fmt_mod != FMT_MOD_NONE) {
+                               default:
                                        warn_invalid_length_modifier(pos, fmt_mod, fmt);
                                        goto next_arg;
-                               }
-                               expected_type = type_void_ptr;
-                               break;
+                       }
+                       break;
 
-                       case 'n':
-                               switch (fmt_mod) {
-                                       case FMT_MOD_NONE: expected_type = type_int;         break;
-                                       case FMT_MOD_hh:   expected_type = type_signed_char; break;
-                                       case FMT_MOD_h:    expected_type = type_short;       break;
-                                       case FMT_MOD_l:    expected_type = type_long;        break;
-                                       case FMT_MOD_ll:   expected_type = type_long_long;   break;
-                                       case FMT_MOD_j:    expected_type = type_intmax_t;    break;
-                                       case FMT_MOD_z:    expected_type = type_ssize_t;     break;
-                                       case FMT_MOD_t:    expected_type = type_ptrdiff_t;   break;
+               case 'p':
+                       if (fmt_mod != FMT_MOD_NONE) {
+                               warn_invalid_length_modifier(pos, fmt_mod, fmt);
+                               goto next_arg;
+                       }
+                       expected_type = type_void_ptr;
+                       break;
 
-                                       default:
-                                               warn_invalid_length_modifier(pos, fmt_mod, fmt);
-                                               goto next_arg;
-                               }
-                               break;
+               case 'n':
+                       switch (fmt_mod) {
+                       case FMT_MOD_NONE: expected_type = type_int;         break;
+                       case FMT_MOD_hh:   expected_type = type_signed_char; break;
+                       case FMT_MOD_h:    expected_type = type_short;       break;
+                       case FMT_MOD_l:    expected_type = type_long;        break;
+                       case FMT_MOD_ll:   expected_type = type_long_long;   break;
+                       case FMT_MOD_j:    expected_type = type_intmax_t;    break;
+                       case FMT_MOD_z:    expected_type = type_ssize_t;     break;
+                       case FMT_MOD_t:    expected_type = type_ptrdiff_t;   break;
 
                        default:
-                               warningf(pos, "encountered unknown conversion specifier '%%%C' at position %u", (wint_t)fmt, num_fmt);
-                               if (arg == NULL) {
-                                       warningf(pos, "too few arguments for format string");
-                                       return;
-                               }
+                               warn_invalid_length_modifier(pos, fmt_mod, fmt);
                                goto next_arg;
+                       }
+                       break;
+
+               default:
+                       warningf(pos, "encountered unknown conversion specifier '%%%c' at format %u",
+                                fmt, num_fmt);
+                       if (arg == NULL) {
+                               warningf(pos, "too few arguments for format string");
+                               return;
+                       }
+                       goto next_arg;
                }
 
                if (arg == NULL) {
@@ -938,7 +862,8 @@ error_arg_type:
 next_arg:
                arg = arg->next;
        }
-       if (!atend(&vchar)) {
+       assert(fmt == '\0');
+       if (c+1 < string + size) {
                warningf(pos, "format string contains '\\0'");
        }
        if (arg != NULL) {
@@ -948,8 +873,8 @@ next_arg:
                        arg = arg->next;
                }
                warningf(pos, "%u argument%s but only %u format specifier%s",
-                       num_args, num_args != 1 ? "s" : "",
-                       num_fmt, num_fmt != 1 ? "s" : "");
+                        num_args, num_args != 1 ? "s" : "",
+                        num_fmt, num_fmt != 1 ? "s" : "");
        }
 }
 
@@ -1018,8 +943,9 @@ void check_format(const call_expression_t *const call)
                /* the declaration has a GNU format attribute, check it */
        } else {
                /*
-                * For some functions we always check the format, even if it was not specified.
-                * This allows to check format even in MS mode or without header included.
+                * For some functions we always check the format, even if it was not
+                * specified. This allows to check format even in MS mode or without
+                * header included.
                 */
                const char *const name = entity->base.symbol->string;
                for (size_t i = 0; i < lengthof(builtin_table); ++i) {
diff --git a/lexer.c b/lexer.c
index 247083c..c3f86b3 100644 (file)
--- a/lexer.c
+++ b/lexer.c
@@ -53,8 +53,6 @@
 #define strtold(s, e) strtod(s, e)
 #endif
 
-typedef unsigned int utf32;
-
 static utf32        c;
 token_t             lexer_token;
 symbol_t           *symbol_L;
@@ -379,7 +377,7 @@ static inline void next_char(void);
 #define MATCH_NEWLINE(code)                   \
        case '\r':                                \
                next_char();                          \
-               if(c == '\n') {                       \
+               if (c == '\n') {                      \
                        next_char();                      \
                }                                     \
                lexer_token.source_position.linenr++; \
@@ -389,13 +387,13 @@ static inline void next_char(void);
                lexer_token.source_position.linenr++; \
                code
 
-#define eat(c_type)  do { assert(c == c_type); next_char(); } while(0)
+#define eat(c_type)  do { assert(c == c_type); next_char(); } while (0)
 
 static void maybe_concat_lines(void)
 {
        eat('\\');
 
-       switch(c) {
+       switch (c) {
        MATCH_NEWLINE(return;)
 
        default:
@@ -415,23 +413,23 @@ static inline void next_char(void)
        next_real_char();
 
        /* filter trigraphs */
-       if(UNLIKELY(c == '\\')) {
+       if (UNLIKELY(c == '\\')) {
                maybe_concat_lines();
                goto end_of_next_char;
        }
 
-       if(LIKELY(c != '?'))
+       if (LIKELY(c != '?'))
                goto end_of_next_char;
 
        next_real_char();
-       if(LIKELY(c != '?')) {
+       if (LIKELY(c != '?')) {
                put_back(c);
                c = '?';
                goto end_of_next_char;
        }
 
        next_real_char();
-       switch(c) {
+       switch (c) {
        case '=': c = '#'; break;
        case '(': c = '['; break;
        case '/': c = '\\'; maybe_concat_lines(); break;
@@ -528,14 +526,11 @@ end_of_next_char:;
  */
 static void parse_symbol(void)
 {
-       symbol_t *symbol;
-       char     *string;
-
        obstack_1grow(&symbol_obstack, (char) c);
        next_char();
 
-       while(1) {
-               switch(c) {
+       while (true) {
+               switch (c) {
                DIGITS
                SYMBOL_CHARS
                        obstack_1grow(&symbol_obstack, (char) c);
@@ -551,221 +546,62 @@ dollar_sign:
 end_symbol:
        obstack_1grow(&symbol_obstack, '\0');
 
-       string = obstack_finish(&symbol_obstack);
-       symbol = symbol_table_insert(string);
+       char     *string = obstack_finish(&symbol_obstack);
+       symbol_t *symbol = symbol_table_insert(string);
 
-       lexer_token.type     = symbol->ID;
-       lexer_token.v.symbol = symbol;
+       lexer_token.type   = symbol->ID;
+       lexer_token.symbol = symbol;
 
-       if(symbol->string != string) {
+       if (symbol->string != string) {
                obstack_free(&symbol_obstack, string);
        }
 }
 
-static void parse_integer_suffix(bool is_oct_hex)
+/**
+ * parse suffixes like 'LU' or 'f' after numbers
+ */
+static void parse_number_suffix(void)
 {
-       bool is_unsigned     = false;
-       bool min_long        = false;
-       bool min_longlong    = false;
-       bool not_traditional = false;
-       int  pos             = 0;
-       char suffix[4];
-
-       if (c == 'U' || c == 'u') {
-               not_traditional = true;
-               suffix[pos++]   = toupper(c);
-               is_unsigned     = true;
-               next_char();
-               if (c == 'L' || c == 'l') {
-                       suffix[pos++] = toupper(c);
-                       min_long = true;
-                       next_char();
-                       if (c == 'L' || c == 'l') {
-                               suffix[pos++] = toupper(c);
-                               min_longlong = true;
-                               next_char();
-                       }
-               }
-       } else if (c == 'l' || c == 'L') {
-               suffix[pos++] = toupper(c);
-               min_long = true;
-               next_char();
-               if (c == 'l' || c == 'L') {
-                       not_traditional = true;
-                       suffix[pos++]   = toupper(c);
-                       min_longlong    = true;
-                       next_char();
-                       if (c == 'u' || c == 'U') {
-                               suffix[pos++] = toupper(c);
-                               is_unsigned   = true;
-                               next_char();
-                       }
-               } else if (c == 'u' || c == 'U') {
-                       not_traditional = true;
-                       suffix[pos++]   = toupper(c);
-                       is_unsigned     = true;
+       assert(obstack_object_size(&symbol_obstack) == 0);
+       while (true) {
+               switch (c) {
+               SYMBOL_CHARS
+                       obstack_1grow(&symbol_obstack, (char) c);
                        next_char();
-                       lexer_token.datatype = type_unsigned_long;
+                       break;
+               default:
+               dollar_sign:
+                       goto finish_suffix;
                }
        }
-
-       if (warning.traditional && not_traditional) {
-               suffix[pos] = '\0';
-               warningf(&lexer_token.source_position,
-                       "traditional C rejects the '%s' suffix", suffix);
+finish_suffix:
+       if (obstack_object_size(&symbol_obstack) == 0) {
+               lexer_token.symbol = NULL;
+               return;
        }
-       if (!is_unsigned) {
-               long long v = lexer_token.v.intvalue;
-               if (!min_long) {
-                       if (v >= TARGET_INT_MIN && v <= TARGET_INT_MAX) {
-                               lexer_token.datatype = type_int;
-                               return;
-                       } else if (is_oct_hex && v >= 0 && v <= TARGET_UINT_MAX) {
-                               lexer_token.datatype = type_unsigned_int;
-                               return;
-                       }
-               }
-               if (!min_longlong) {
-                       if (v >= TARGET_LONG_MIN && v <= TARGET_LONG_MAX) {
-                               lexer_token.datatype = type_long;
-                               return;
-                       } else if (is_oct_hex && v >= 0 && (unsigned long long)v <= (unsigned long long)TARGET_ULONG_MAX) {
-                               lexer_token.datatype = type_unsigned_long;
-                               return;
-                       }
-               }
-               unsigned long long uv = (unsigned long long) v;
-               if (is_oct_hex && uv > (unsigned long long) TARGET_LONGLONG_MAX) {
-                       lexer_token.datatype = type_unsigned_long_long;
-                       return;
-               }
 
-               lexer_token.datatype = type_long_long;
-       } else {
-               unsigned long long v = (unsigned long long) lexer_token.v.intvalue;
-               if (!min_long && v <= TARGET_UINT_MAX) {
-                       lexer_token.datatype = type_unsigned_int;
-                       return;
-               }
-               if (!min_longlong && v <= TARGET_ULONG_MAX) {
-                       lexer_token.datatype = type_unsigned_long;
-                       return;
-               }
-               lexer_token.datatype = type_unsigned_long_long;
-       }
-}
+       obstack_1grow(&symbol_obstack, '\0');
+       char     *string = obstack_finish(&symbol_obstack);
+       symbol_t *symbol = symbol_table_insert(string);
 
-static void parse_floating_suffix(void)
-{
-       switch(c) {
-       /* TODO: do something useful with the suffixes... */
-       case 'f':
-       case 'F':
-               if (warning.traditional) {
-                       warningf(&lexer_token.source_position,
-                               "traditional C rejects the 'F' suffix");
-               }
-               next_char();
-               lexer_token.datatype = type_float;
-               break;
-       case 'l':
-       case 'L':
-               if (warning.traditional) {
-                       warningf(&lexer_token.source_position,
-                               "traditional C rejects the 'F' suffix");
-               }
-               next_char();
-               lexer_token.datatype = type_long_double;
-               break;
-       default:
-               lexer_token.datatype = type_double;
-               break;
+       if (symbol->string != string) {
+               obstack_free(&symbol_obstack, string);
        }
+       lexer_token.symbol = symbol;
 }
 
-/**
- * A replacement for strtoull. Only those parts needed for
- * our parser are implemented.
- */
-static unsigned long long parse_int_string(const char *s, const char **endptr, int base)
+static string_t identify_string(char *string, size_t len)
 {
-       unsigned long long v = 0;
-
-       switch (base) {
-       case 16:
-               for (;; ++s) {
-                       /* check for overrun */
-                       if (v >= 0x1000000000000000ULL)
-                               break;
-                       switch (tolower(*s)) {
-                       case '0': v <<= 4; break;
-                       case '1': v <<= 4; v |= 0x1; break;
-                       case '2': v <<= 4; v |= 0x2; break;
-                       case '3': v <<= 4; v |= 0x3; break;
-                       case '4': v <<= 4; v |= 0x4; break;
-                       case '5': v <<= 4; v |= 0x5; break;
-                       case '6': v <<= 4; v |= 0x6; break;
-                       case '7': v <<= 4; v |= 0x7; break;
-                       case '8': v <<= 4; v |= 0x8; break;
-                       case '9': v <<= 4; v |= 0x9; break;
-                       case 'a': v <<= 4; v |= 0xa; break;
-                       case 'b': v <<= 4; v |= 0xb; break;
-                       case 'c': v <<= 4; v |= 0xc; break;
-                       case 'd': v <<= 4; v |= 0xd; break;
-                       case 'e': v <<= 4; v |= 0xe; break;
-                       case 'f': v <<= 4; v |= 0xf; break;
-                       default:
-                               goto end;
-                       }
-               }
-               break;
-       case 8:
-               for (;; ++s) {
-                       /* check for overrun */
-                       if (v >= 0x2000000000000000ULL)
-                               break;
-                       switch (tolower(*s)) {
-                       case '0': v <<= 3; break;
-                       case '1': v <<= 3; v |= 1; break;
-                       case '2': v <<= 3; v |= 2; break;
-                       case '3': v <<= 3; v |= 3; break;
-                       case '4': v <<= 3; v |= 4; break;
-                       case '5': v <<= 3; v |= 5; break;
-                       case '6': v <<= 3; v |= 6; break;
-                       case '7': v <<= 3; v |= 7; break;
-                       default:
-                               goto end;
-                       }
-               }
-               break;
-       case 10:
-               for (;; ++s) {
-                       /* check for overrun */
-                       if (v > 0x1999999999999999ULL)
-                               break;
-                       switch (tolower(*s)) {
-                       case '0': v *= 10; break;
-                       case '1': v *= 10; v += 1; break;
-                       case '2': v *= 10; v += 2; break;
-                       case '3': v *= 10; v += 3; break;
-                       case '4': v *= 10; v += 4; break;
-                       case '5': v *= 10; v += 5; break;
-                       case '6': v *= 10; v += 6; break;
-                       case '7': v *= 10; v += 7; break;
-                       case '8': v *= 10; v += 8; break;
-                       case '9': v *= 10; v += 9; break;
-                       default:
-                               goto end;
-                       }
-               }
-               break;
-       default:
-               assert(0);
-               break;
+       /* TODO hash */
+#if 0
+       const char *result = strset_insert(&stringset, concat);
+       if (result != concat) {
+               obstack_free(&symbol_obstack, concat);
        }
-end:
-       *endptr = s;
-       return v;
+#else
+       const char *result = string;
+#endif
+       return (string_t) {result, len};
 }
 
 /**
@@ -774,29 +610,29 @@ end:
  */
 static void parse_number_hex(void)
 {
-       bool is_float = false;
-       assert(c == 'x' || c == 'X');
-       next_char();
+       bool is_float   = false;
+       bool has_digits = false;
 
-       obstack_1grow(&symbol_obstack, '0');
-       obstack_1grow(&symbol_obstack, 'x');
-
-       while(isxdigit(c)) {
+       assert(obstack_object_size(&symbol_obstack) == 0);
+       while (isxdigit(c)) {
+               has_digits = true;
                obstack_1grow(&symbol_obstack, (char) c);
                next_char();
        }
 
        if (c == '.') {
+               is_float = true;
                obstack_1grow(&symbol_obstack, (char) c);
                next_char();
 
                while (isxdigit(c)) {
+                       has_digits = true;
                        obstack_1grow(&symbol_obstack, (char) c);
                        next_char();
                }
-               is_float = true;
        }
        if (c == 'p' || c == 'P') {
+               is_float = true;
                obstack_1grow(&symbol_obstack, (char) c);
                next_char();
 
@@ -809,39 +645,26 @@ static void parse_number_hex(void)
                        obstack_1grow(&symbol_obstack, (char) c);
                        next_char();
                }
-               is_float = true;
+       } else if (is_float) {
+               errorf(&lexer_token.source_position,
+                      "hexadecimal floatingpoint constant requires an exponent");
        }
 
-       obstack_1grow(&symbol_obstack, '\0');
-       char *string = obstack_finish(&symbol_obstack);
-       if(*string == '\0') {
-               parse_error("invalid hex number");
-               lexer_token.type = T_ERROR;
-               obstack_free(&symbol_obstack, string);
-               return;
-       }
+       size_t  size   = obstack_object_size(&symbol_obstack);
+       char   *string = obstack_finish(&symbol_obstack);
+       lexer_token.literal = identify_string(string, size);
 
-       if (is_float) {
-               char *endptr;
-               lexer_token.type         = T_FLOATINGPOINT;
-               lexer_token.v.floatvalue = strtold(string, &endptr);
-
-               if(*endptr != '\0') {
-                       parse_error("invalid hex float literal");
-               }
+       lexer_token.type    =
+               is_float ? T_FLOATINGPOINT_HEXADECIMAL : T_INTEGER_HEXADECIMAL;
 
-               parse_floating_suffix();
-       } else {
-               const char *endptr;
-               lexer_token.type       = T_INTEGER;
-               lexer_token.v.intvalue = parse_int_string(string + 2, &endptr, 16);
-               if(*endptr != '\0') {
-                       parse_error("hex number literal too long");
-               }
-               parse_integer_suffix(true);
+       if (!has_digits) {
+               errorf(&lexer_token.source_position, "invalid number literal '0x%S'",
+                      &lexer_token.literal);
+               lexer_token.literal.begin = "0";
+               lexer_token.literal.size  = 1;
        }
 
-       obstack_free(&symbol_obstack, string);
+       parse_number_suffix();
 }
 
 /**
@@ -849,9 +672,9 @@ static void parse_number_hex(void)
  *
  * @param char  the character to check
  */
-static inline bool is_octal_digit(utf32 chr)
+static bool is_octal_digit(utf32 chr)
 {
-       switch(chr) {
+       switch (chr) {
        case '0':
        case '1':
        case '2':
@@ -867,132 +690,86 @@ static inline bool is_octal_digit(utf32 chr)
 }
 
 /**
- * Parses a octal number and set the lexer_token.
+ * Parses a number and sets the lexer_token.
  */
-static void parse_number_oct(void)
+static void parse_number(void)
 {
-       while(is_octal_digit(c)) {
-               obstack_1grow(&symbol_obstack, (char) c);
-               next_char();
-       }
-       obstack_1grow(&symbol_obstack, '\0');
-       char *string = obstack_finish(&symbol_obstack);
+       bool is_float   = false;
+       bool has_digits = false;
 
-       const char *endptr;
-       lexer_token.type       = T_INTEGER;
-       lexer_token.v.intvalue = parse_int_string(string, &endptr, 8);
-       if(*endptr != '\0') {
-               parse_error("octal number literal too long");
+       assert(obstack_object_size(&symbol_obstack) == 0);
+       if (c == '0') {
+               next_char();
+               if (c == 'x' || c == 'X') {
+                       next_char();
+                       parse_number_hex();
+                       return;
+               } else {
+                       has_digits = true;
+               }
+               obstack_1grow(&symbol_obstack, '0');
        }
 
-       obstack_free(&symbol_obstack, string);
-       parse_integer_suffix(true);
-}
-
-/**
- * Parses a decimal including float number and set the
- * lexer_token.
- */
-static void parse_number_dec(void)
-{
-       bool is_float = false;
        while (isdigit(c)) {
+               has_digits = true;
                obstack_1grow(&symbol_obstack, (char) c);
                next_char();
        }
 
        if (c == '.') {
+               is_float = true;
                obstack_1grow(&symbol_obstack, '.');
                next_char();
 
                while (isdigit(c)) {
+                       has_digits = true;
                        obstack_1grow(&symbol_obstack, (char) c);
                        next_char();
                }
-               is_float = true;
        }
-       if(c == 'e' || c == 'E') {
-               obstack_1grow(&symbol_obstack, (char) c);
+       if (c == 'e' || c == 'E') {
+               is_float = true;
+               obstack_1grow(&symbol_obstack, 'e');
                next_char();
 
-               if(c == '-' || c == '+') {
+               if (c == '-' || c == '+') {
                        obstack_1grow(&symbol_obstack, (char) c);
                        next_char();
                }
 
-               while(isdigit(c)) {
+               while (isdigit(c)) {
                        obstack_1grow(&symbol_obstack, (char) c);
                        next_char();
                }
-               is_float = true;
        }
 
-       obstack_1grow(&symbol_obstack, '\0');
-       char *string = obstack_finish(&symbol_obstack);
-
-       if(is_float) {
-               char *endptr;
-               lexer_token.type         = T_FLOATINGPOINT;
-               lexer_token.v.floatvalue = strtold(string, &endptr);
+       size_t  size   = obstack_object_size(&symbol_obstack);
+       char   *string = obstack_finish(&symbol_obstack);
+       lexer_token.literal = identify_string(string, size);
 
-               if(*endptr != '\0') {
-                       parse_error("invalid number literal");
+       /* is it an octal number? */
+       if (is_float) {
+               lexer_token.type = T_FLOATINGPOINT;
+       } else if (string[0] == '0') {
+               lexer_token.type = T_INTEGER_OCTAL;
+
+               /* check for invalid octal digits */
+               for (size_t i= 0; i < size; ++i) {
+                       char t = string[i];
+                       if (t == '8' || t == '9')
+                               errorf(&lexer_token.source_position,
+                                      "invalid digit '%c' in octal number", t);
                }
-
-               parse_floating_suffix();
        } else {
-               const char *endptr;
-               lexer_token.type       = T_INTEGER;
-               lexer_token.v.intvalue = parse_int_string(string, &endptr, 10);
-
-               if(*endptr != '\0') {
-                       parse_error("invalid number literal");
-               }
-
-               parse_integer_suffix(false);
+               lexer_token.type = T_INTEGER;
        }
-       obstack_free(&symbol_obstack, string);
-}
 
-/**
- * Parses a number and sets the lexer_token.
- */
-static void parse_number(void)
-{
-       if (c == '0') {
-               next_char();
-               switch (c) {
-                       case 'X':
-                       case 'x':
-                               parse_number_hex();
-                               break;
-                       case '0':
-                       case '1':
-                       case '2':
-                       case '3':
-                       case '4':
-                       case '5':
-                       case '6':
-                       case '7':
-                               parse_number_oct();
-                               break;
-                       case '8':
-                       case '9':
-                               next_char();
-                               parse_error("invalid octal number");
-                               lexer_token.type = T_ERROR;
-                               return;
-                       case '.':
-                       case 'e':
-                       case 'E':
-                       default:
-                               obstack_1grow(&symbol_obstack, '0');
-                               parse_number_dec();
-                               return;
-               }
-       } else {
-               parse_number_dec();
+       if (!has_digits) {
+               errorf(&lexer_token.source_position, "invalid number literal '%S'",
+                      &lexer_token.literal);
        }
+
+       parse_number_suffix();
 }
 
 /**
@@ -1053,7 +830,7 @@ static utf32 parse_octal_sequence(utf32 const first_digit)
 static utf32 parse_hex_sequence(void)
 {
        utf32 value = 0;
-       while(isxdigit(c)) {
+       while (isxdigit(c)) {
                value = 16 * value + digit_value(c);
                next_char();
        }
@@ -1102,12 +879,17 @@ static utf32 parse_escape_sequence(void)
        case 'e':
                if (c_mode & _GNUC)
                        return 27;   /* hopefully 27 is ALWAYS the code for ESCAPE */
-               /* FALLTHROUGH */
-       default:
-               /* Â§6.4.4.4:8 footnote 64 */
-               parse_error("unknown escape sequence");
+               break;
+       case 'u':
+       case 'U':
+               parse_error("universal character parsing not implemented yet");
                return EOF;
+       default:
+               break;
        }
+       /* Â§6.4.4.4:8 footnote 64 */
+       parse_error("unknown escape sequence");
+       return EOF;
 }
 
 /**
@@ -1122,84 +904,16 @@ string_t concat_strings(const string_t *const s1, const string_t *const s2)
        memcpy(concat, s1->begin, len1);
        memcpy(concat + len1, s2->begin, len2 + 1);
 
-       if (warning.traditional) {
-               warningf(&lexer_token.source_position,
-                       "traditional C rejects string constant concatenation");
-       }
-#if 0 /* TODO hash */
-       const char *result = strset_insert(&stringset, concat);
-       if(result != concat) {
-               obstack_free(&symbol_obstack, concat);
-       }
-
-       return result;
-#else
-       return (string_t){ concat, len1 + len2 + 1 };
-#endif
-}
-
-/**
- * Concatenate a string and a wide string.
- */
-wide_string_t concat_string_wide_string(const string_t *const s1, const wide_string_t *const s2)
-{
-       const size_t len1 = s1->size - 1;
-       const size_t len2 = s2->size - 1;
-
-       wchar_rep_t *const concat = obstack_alloc(&symbol_obstack, (len1 + len2 + 1) * sizeof(*concat));
-       const char *const src = s1->begin;
-       for (size_t i = 0; i != len1; ++i) {
-               concat[i] = src[i];
-       }
-       memcpy(concat + len1, s2->begin, (len2 + 1) * sizeof(*concat));
-       if (warning.traditional) {
-               warningf(&lexer_token.source_position,
-                       "traditional C rejects string constant concatenation");
-       }
-
-       return (wide_string_t){ concat, len1 + len2 + 1 };
-}
-
-/**
- * Concatenate two wide strings.
- */
-wide_string_t concat_wide_strings(const wide_string_t *const s1, const wide_string_t *const s2)
-{
-       const size_t len1 = s1->size - 1;
-       const size_t len2 = s2->size - 1;
-
-       wchar_rep_t *const concat = obstack_alloc(&symbol_obstack, (len1 + len2 + 1) * sizeof(*concat));
-       memcpy(concat,        s1->begin, len1       * sizeof(*concat));
-       memcpy(concat + len1, s2->begin, (len2 + 1) * sizeof(*concat));
-       if (warning.traditional) {
-               warningf(&lexer_token.source_position,
-                       "traditional C rejects string constant concatenation");
-       }
-
-       return (wide_string_t){ concat, len1 + len2 + 1 };
+       return identify_string(concat, len1 + len2 + 1);
 }
 
-/**
- * Concatenate a wide string and a string.
- */
-wide_string_t concat_wide_string_string(const wide_string_t *const s1, const string_t *const s2)
+string_t make_string(const char *string)
 {
-       const size_t len1 = s1->size - 1;
-       const size_t len2 = s2->size - 1;
-
-       wchar_rep_t *const concat = obstack_alloc(&symbol_obstack, (len1 + len2 + 1) * sizeof(*concat));
-       memcpy(concat, s1->begin, len1 * sizeof(*concat));
-       const char  *const src = s2->begin;
-       wchar_rep_t *const dst = concat + len1;
-       for (size_t i = 0; i != len2 + 1; ++i) {
-               dst[i] = src[i];
-       }
-       if (warning.traditional) {
-               warningf(&lexer_token.source_position,
-                       "traditional C rejects string constant concatenation");
-       }
+       size_t      len   = strlen(string) + 1;
+       char *const space = obstack_alloc(&symbol_obstack, len);
+       memcpy(space, string, len);
 
-       return (wide_string_t){ concat, len1 + len2 + 1 };
+       return identify_string(space, len);
 }
 
 static void grow_symbol(utf32 const tc)
@@ -1231,8 +945,8 @@ static void parse_string_literal(void)
 
        eat('"');
 
-       while(1) {
-               switch(c) {
+       while (true) {
+               switch (c) {
                case '\\': {
                        utf32 const tc = parse_escape_sequence();
                        if (tc >= 0x100) {
@@ -1269,22 +983,11 @@ end_of_string:
 
        /* add finishing 0 to the string */
        obstack_1grow(&symbol_obstack, '\0');
-       const size_t      size   = (size_t)obstack_object_size(&symbol_obstack);
-       const char *const string = obstack_finish(&symbol_obstack);
+       const size_t  size   = (size_t)obstack_object_size(&symbol_obstack);
+       char         *string = obstack_finish(&symbol_obstack);
 
-#if 0 /* TODO hash */
-       /* check if there is already a copy of the string */
-       result = strset_insert(&stringset, string);
-       if(result != string) {
-               obstack_free(&symbol_obstack, string);
-       }
-#else
-       const char *const result = string;
-#endif
-
-       lexer_token.type           = T_STRING_LITERAL;
-       lexer_token.v.string.begin = result;
-       lexer_token.v.string.size  = size;
+       lexer_token.type    = T_STRING_LITERAL;
+       lexer_token.literal = identify_string(string, size);
 }
 
 /**
@@ -1296,11 +999,11 @@ static void parse_wide_character_constant(void)
 
        eat('\'');
 
-       while(1) {
-               switch(c) {
+       while (true) {
+               switch (c) {
                case '\\': {
-                       wchar_rep_t tc = parse_escape_sequence();
-                       obstack_grow(&symbol_obstack, &tc, sizeof(tc));
+                       const utf32 tc = parse_escape_sequence();
+                       grow_symbol(tc);
                        break;
                }
 
@@ -1321,26 +1024,19 @@ static void parse_wide_character_constant(void)
                        return;
                }
 
-               default: {
-                       wchar_rep_t tc = (wchar_rep_t) c;
-                       obstack_grow(&symbol_obstack, &tc, sizeof(tc));
+               default:
+                       grow_symbol(c);
                        next_char();
                        break;
                }
-               }
        }
 
 end_of_wide_char_constant:;
-       size_t             size   = (size_t) obstack_object_size(&symbol_obstack);
-       assert(size % sizeof(wchar_rep_t) == 0);
-       size /= sizeof(wchar_rep_t);
+       size_t  size   = (size_t) obstack_object_size(&symbol_obstack);
+       char   *string = obstack_finish(&symbol_obstack);
 
-       const wchar_rep_t *string = obstack_finish(&symbol_obstack);
-
-       lexer_token.type                = T_WIDE_CHARACTER_CONSTANT;
-       lexer_token.v.wide_string.begin = string;
-       lexer_token.v.wide_string.size  = size;
-       lexer_token.datatype            = type_wchar_t;
+       lexer_token.type     = T_WIDE_CHARACTER_CONSTANT;
+       lexer_token.literal  = identify_string(string, size);
 }
 
 /**
@@ -1348,64 +1044,9 @@ end_of_wide_char_constant:;
  */
 static void parse_wide_string_literal(void)
 {
-       const unsigned start_linenr = lexer_token.source_position.linenr;
-
-       assert(c == '"');
-       next_char();
-
-       while(1) {
-               switch(c) {
-               case '\\': {
-                       wchar_rep_t tc = parse_escape_sequence();
-                       obstack_grow(&symbol_obstack, &tc, sizeof(tc));
-                       break;
-               }
-
-               case EOF: {
-                       source_position_t source_position;
-                       source_position.input_name = lexer_token.source_position.input_name;
-                       source_position.linenr     = start_linenr;
-                       errorf(&source_position, "string has no end");
-                       lexer_token.type = T_ERROR;
-                       return;
-               }
-
-               case '"':
-                       next_char();
-                       goto end_of_string;
-
-               default: {
-                       wchar_rep_t tc = c;
-                       obstack_grow(&symbol_obstack, &tc, sizeof(tc));
-                       next_char();
-                       break;
-               }
-               }
-       }
-
-end_of_string:;
-
-       /* TODO: concatenate multiple strings separated by whitespace... */
-
-       /* add finishing 0 to the string */
-       wchar_rep_t nul = L'\0';
-       obstack_grow(&symbol_obstack, &nul, sizeof(nul));
-       const size_t             size   = (size_t)obstack_object_size(&symbol_obstack) / sizeof(wchar_rep_t);
-       const wchar_rep_t *const string = obstack_finish(&symbol_obstack);
-
-#if 0 /* TODO hash */
-       /* check if there is already a copy of the string */
-       const wchar_rep_t *const result = strset_insert(&stringset, string);
-       if(result != string) {
-               obstack_free(&symbol_obstack, string);
-       }
-#else
-       const wchar_rep_t *const result = string;
-#endif
-
-       lexer_token.type                = T_WIDE_STRING_LITERAL;
-       lexer_token.v.wide_string.begin = result;
-       lexer_token.v.wide_string.size  = size;
+       parse_string_literal();
+       if (lexer_token.type == T_STRING_LITERAL)
+               lexer_token.type = T_WIDE_STRING_LITERAL;
 }
 
 /**
@@ -1417,8 +1058,8 @@ static void parse_character_constant(void)
 
        eat('\'');
 
-       while(1) {
-               switch(c) {
+       while (true) {
+               switch (c) {
                case '\\': {
                        utf32 const tc = parse_escape_sequence();
                        if (tc >= 0x100) {
@@ -1456,13 +1097,11 @@ static void parse_character_constant(void)
        }
 
 end_of_char_constant:;
-       const size_t      size   = (size_t)obstack_object_size(&symbol_obstack);
-       const char *const string = obstack_finish(&symbol_obstack);
+       const size_t        size   = (size_t)obstack_object_size(&symbol_obstack);
+       char         *const string = obstack_finish(&symbol_obstack);
 
-       lexer_token.type           = T_CHARACTER_CONSTANT;
-       lexer_token.v.string.begin = string;
-       lexer_token.v.string.size  = size;
-       lexer_token.datatype       = c_mode & _CXX && size == 1 ? type_char : type_int;
+       lexer_token.type    = T_CHARACTER_CONSTANT;
+       lexer_token.literal = identify_string(string, size);
 }
 
 /**
@@ -1472,8 +1111,8 @@ static void skip_multiline_comment(void)
 {
        unsigned start_linenr = lexer_token.source_position.linenr;
 
-       while(1) {
-               switch(c) {
+       while (true) {
+               switch (c) {
                case '/':
                        next_char();
                        if (c == '*') {
@@ -1485,7 +1124,7 @@ static void skip_multiline_comment(void)
                        break;
                case '*':
                        next_char();
-                       if(c == '/') {
+                       if (c == '/') {
                                next_char();
                                return;
                        }
@@ -1513,8 +1152,8 @@ static void skip_multiline_comment(void)
  */
 static void skip_line_comment(void)
 {
-       while(1) {
-               switch(c) {
+       while (true) {
+               switch (c) {
                case EOF:
                        return;
 
@@ -1555,7 +1194,7 @@ static inline void next_pp_token(void)
  */
 static void eat_until_newline(void)
 {
-       while(pp_token.type != '\n' && pp_token.type != T_EOF) {
+       while (pp_token.type != '\n' && pp_token.type != T_EOF) {
                next_pp_token();
        }
 }
@@ -1566,7 +1205,7 @@ static void eat_until_newline(void)
 static void define_directive(void)
 {
        lexer_next_preprocessing_token();
-       if(lexer_token.type != T_IDENTIFIER) {
+       if (lexer_token.type != T_IDENTIFIER) {
                parse_error("expected identifier after #define\n");
                eat_until_newline();
        }
@@ -1596,14 +1235,14 @@ static void endif_directive(void)
  */
 static void parse_line_directive(void)
 {
-       if(pp_token.type != T_INTEGER) {
+       if (pp_token.type != T_INTEGER) {
                parse_error("expected integer");
        } else {
-               lexer_token.source_position.linenr = (unsigned int)(pp_token.v.intvalue - 1);
+               lexer_token.source_position.linenr = atoi(pp_token.literal.begin);
                next_pp_token();
        }
-       if(pp_token.type == T_STRING_LITERAL) {
-               lexer_token.source_position.input_name = pp_token.v.string.begin;
+       if (pp_token.type == T_STRING_LITERAL) {
+               lexer_token.source_position.input_name = pp_token.literal.begin;
                next_pp_token();
        }
 
@@ -1638,13 +1277,13 @@ static void parse_pragma(void)
        bool unknown_pragma = true;
 
        next_pp_token();
-       if (pp_token.v.symbol->pp_ID == TP_STDC) {
+       if (pp_token.symbol->pp_ID == TP_STDC) {
                stdc_pragma_kind_t kind = STDC_UNKNOWN;
                /* a STDC pragma */
                if (c_mode & _C99) {
                        next_pp_token();
 
-                       switch (pp_token.v.symbol->pp_ID) {
+                       switch (pp_token.symbol->pp_ID) {
                        case TP_FP_CONTRACT:
                                kind = STDC_FP_CONTRACT;
                                break;
@@ -1660,7 +1299,7 @@ static void parse_pragma(void)
                        if (kind != STDC_UNKNOWN) {
                                stdc_pragma_value_kind_t value = STDC_VALUE_UNKNOWN;
                                next_pp_token();
-                               switch (pp_token.v.symbol->pp_ID) {
+                               switch (pp_token.symbol->pp_ID) {
                                case TP_ON:
                                        value = STDC_VALUE_ON;
                                        break;
@@ -1695,9 +1334,9 @@ static void parse_pragma(void)
 static void parse_preprocessor_identifier(void)
 {
        assert(pp_token.type == T_IDENTIFIER);
-       symbol_t *symbol = pp_token.v.symbol;
+       symbol_t *symbol = pp_token.symbol;
 
-       switch(symbol->pp_ID) {
+       switch (symbol->pp_ID) {
        case TP_include:
                printf("include - enable header name parsing!\n");
                break;
@@ -1738,7 +1377,7 @@ static void parse_preprocessor_directive(void)
 {
        next_pp_token();
 
-       switch(pp_token.type) {
+       switch (pp_token.type) {
        case T_IDENTIFIER:
                parse_preprocessor_identifier();
                break;
@@ -1757,8 +1396,8 @@ static void parse_preprocessor_directive(void)
 
 #define MAYBE_PROLOG                                       \
                        next_char();                                   \
-                       while(1) {                                     \
-                               switch(c) {
+                       while (true) {                                 \
+                               switch (c) {
 
 #define MAYBE(ch, set_type)                                \
                                case ch:                                   \
@@ -1780,7 +1419,7 @@ static void parse_preprocessor_directive(void)
                                default:                                   \
                                        code                                   \
                                }                                          \
-                       } /* end of while(1) */                        \
+                       } /* end of while (true) */                    \
                        break;
 
 #define ELSE(set_type)                                     \
@@ -1791,8 +1430,8 @@ static void parse_preprocessor_directive(void)
 
 void lexer_next_preprocessing_token(void)
 {
-       while(1) {
-               switch(c) {
+       while (true) {
+               switch (c) {
                case ' ':
                case '\t':
                        next_char();
@@ -1806,7 +1445,7 @@ void lexer_next_preprocessing_token(void)
                SYMBOL_CHARS
                        parse_symbol();
                        /* might be a wide string ( L"string" ) */
-                       if (lexer_token.v.symbol == symbol_L) {
+                       if (lexer_token.symbol == symbol_L) {
                                switch (c) {
                                        case '"':  parse_wide_string_literal();     break;
                                        case '\'': parse_wide_character_constant(); break;
@@ -1831,7 +1470,7 @@ void lexer_next_preprocessing_token(void)
                                DIGITS
                                        put_back(c);
                                        c = '.';
-                                       parse_number_dec();
+                                       parse_number();
                                        return;
 
                                case '.':
diff --git a/lexer.h b/lexer.h
index 3f83f11..a34d591 100644 (file)
--- a/lexer.h
+++ b/lexer.h
@@ -39,9 +39,7 @@ void select_input_encoding(char const* encoding);
 void lexer_open_stream(FILE *stream, const char *input_name);
 void lexer_open_buffer(const char *buffer, size_t len, const char *input_name);
 
-string_t      concat_strings(           const string_t      *s1, const string_t      *s2);
-wide_string_t concat_string_wide_string(const string_t      *s1, const wide_string_t *s2);
-wide_string_t concat_wide_strings(      const wide_string_t *s1, const wide_string_t *s2);
-wide_string_t concat_wide_string_string(const wide_string_t *s1, const string_t      *s2);
+string_t concat_strings(const string_t *s1, const string_t *s2);
+string_t make_string(const char *str);
 
 #endif
index 502dcc6..101b404 100644 (file)
--- a/parser.c
+++ b/parser.c
@@ -305,11 +305,15 @@ static size_t get_expression_struct_size(expression_kind_t kind)
                [EXPR_INVALID]                    = sizeof(expression_base_t),
                [EXPR_REFERENCE]                  = sizeof(reference_expression_t),
                [EXPR_REFERENCE_ENUM_VALUE]       = sizeof(reference_expression_t),
-               [EXPR_CONST]                      = sizeof(const_expression_t),
-               [EXPR_CHARACTER_CONSTANT]         = sizeof(const_expression_t),
-               [EXPR_WIDE_CHARACTER_CONSTANT]    = sizeof(const_expression_t),
+               [EXPR_LITERAL_INTEGER]            = sizeof(literal_expression_t),
+               [EXPR_LITERAL_INTEGER_OCTAL]      = sizeof(literal_expression_t),
+               [EXPR_LITERAL_INTEGER_HEXADECIMAL]= sizeof(literal_expression_t),
+               [EXPR_LITERAL_FLOATINGPOINT]      = sizeof(literal_expression_t),
+               [EXPR_LITERAL_FLOATINGPOINT_HEXADECIMAL] = sizeof(literal_expression_t),
+               [EXPR_LITERAL_CHARACTER]          = sizeof(literal_expression_t),
+               [EXPR_LITERAL_WIDE_CHARACTER]     = sizeof(literal_expression_t),
                [EXPR_STRING_LITERAL]             = sizeof(string_literal_expression_t),
-               [EXPR_WIDE_STRING_LITERAL]        = sizeof(wide_string_literal_expression_t),
+               [EXPR_WIDE_STRING_LITERAL]        = sizeof(string_literal_expression_t),
                [EXPR_COMPOUND_LITERAL]           = sizeof(compound_literal_expression_t),
                [EXPR_CALL]                       = sizeof(call_expression_t),
                [EXPR_UNARY_FIRST]                = sizeof(unary_expression_t),
@@ -1085,15 +1089,23 @@ static expression_t *parse_assignment_expression(void)
        return parse_sub_expression(PREC_ASSIGNMENT);
 }
 
+static void warn_string_concat(const source_position_t *pos)
+{
+       if (warning.traditional) {
+               warningf(pos, "traditional C rejects string constant concatenation");
+       }
+}
+
 static string_t parse_string_literals(void)
 {
        assert(token.type == T_STRING_LITERAL);
-       string_t result = token.v.string;
+       string_t result = token.literal;
 
        next_token();
 
        while (token.type == T_STRING_LITERAL) {
-               result = concat_strings(&result, &token.v.string);
+               warn_string_concat(&token.source_position);
+               result = concat_strings(&result, &token.literal);
                next_token();
        }
 
@@ -1161,7 +1173,7 @@ static attribute_argument_t *parse_attribute_arguments(void)
                /* is it an identifier */
                if (token.type == T_IDENTIFIER
                                && (look_ahead(1)->type == ',' || look_ahead(1)->type == ')')) {
-                       symbol_t *symbol   = token.v.symbol;
+                       symbol_t *symbol   = token.symbol;
                        argument->kind     = ATTRIBUTE_ARGUMENT_SYMBOL;
                        argument->v.symbol = symbol;
                        next_token();
@@ -1204,7 +1216,7 @@ static symbol_t *get_symbol_from_token(void)
 {
        switch(token.type) {
        case T_IDENTIFIER:
-               return token.v.symbol;
+               return token.symbol;
        case T_auto:
        case T_char:
        case T_double:
@@ -1570,11 +1582,9 @@ unary:
                        determine_lhs_ent(expr->va_starte.ap, lhs_ent);
                        return;
 
+               EXPR_LITERAL_CASES
                case EXPR_UNKNOWN:
                case EXPR_INVALID:
-               case EXPR_CONST:
-               case EXPR_CHARACTER_CONSTANT:
-               case EXPR_WIDE_CHARACTER_CONSTANT:
                case EXPR_STRING_LITERAL:
                case EXPR_WIDE_STRING_LITERAL:
                case EXPR_COMPOUND_LITERAL: // TODO init?
@@ -1620,7 +1630,7 @@ static designator_t *parse_designation(void)
                                                     T_IDENTIFIER, NULL);
                                return NULL;
                        }
-                       designator->symbol = token.v.symbol;
+                       designator->symbol = token.symbol;
                        next_token();
                        break;
                default:
@@ -1636,7 +1646,7 @@ end_error:
        return NULL;
 }
 
-static initializer_t *initializer_from_string(array_type_t *type,
+static initializer_t *initializer_from_string(array_type_t *const type,
                                               const string_t *const string)
 {
        /* TODO: check len vs. size of array type */
@@ -1649,7 +1659,7 @@ static initializer_t *initializer_from_string(array_type_t *type,
 }
 
 static initializer_t *initializer_from_wide_string(array_type_t *const type,
-                                                   wide_string_t *const string)
+                                                   const string_t *const string)
 {
        /* TODO: check len vs. size of array type */
        (void) type;
@@ -1673,6 +1683,7 @@ static initializer_t *initializer_from_expression(type_t *orig_type,
        type_t *type           = skip_typeref(orig_type);
        type_t *expr_type_orig = expression->base.type;
        type_t *expr_type      = skip_typeref(expr_type_orig);
+
        if (is_type_array(type) && expr_type->kind == TYPE_POINTER) {
                array_type_t *const array_type   = &type->array;
                type_t       *const element_type = skip_typeref(array_type->element_type);
@@ -1680,26 +1691,26 @@ static initializer_t *initializer_from_expression(type_t *orig_type,
                if (element_type->kind == TYPE_ATOMIC) {
                        atomic_type_kind_t akind = element_type->atomic.akind;
                        switch (expression->kind) {
-                               case EXPR_STRING_LITERAL:
-                                       if (akind == ATOMIC_TYPE_CHAR
-                                                       || akind == ATOMIC_TYPE_SCHAR
-                                                       || akind == ATOMIC_TYPE_UCHAR) {
-                                               return initializer_from_string(array_type,
-                                                       &expression->string.value);
-                                       }
-                                       break;
+                       case EXPR_STRING_LITERAL:
+                               if (akind == ATOMIC_TYPE_CHAR
+                                               || akind == ATOMIC_TYPE_SCHAR
+                                               || akind == ATOMIC_TYPE_UCHAR) {
+                                       return initializer_from_string(array_type,
+                                                       &expression->string_literal.value);
+                               }
+                               break;
 
-                               case EXPR_WIDE_STRING_LITERAL: {
-                                       type_t *bare_wchar_type = skip_typeref(type_wchar_t);
-                                       if (get_unqualified_type(element_type) == bare_wchar_type) {
-                                               return initializer_from_wide_string(array_type,
-                                                       &expression->wide_string.value);
-                                       }
-                                       break;
+                       case EXPR_WIDE_STRING_LITERAL: {
+                               type_t *bare_wchar_type = skip_typeref(type_wchar_t);
+                               if (get_unqualified_type(element_type) == bare_wchar_type) {
+                                       return initializer_from_wide_string(array_type,
+                                                       &expression->string_literal.value);
                                }
+                               break;
+                       }
 
-                               default:
-                                       break;
+                       default:
+                               break;
                        }
                }
        }
@@ -2113,7 +2124,7 @@ static initializer_t *parse_sub_initializer(type_path_t *path,
                        /* GNU-style designator ("identifier: value") */
                        designator = allocate_ast_zero(sizeof(designator[0]));
                        designator->source_position = token.source_position;
-                       designator->symbol          = token.v.symbol;
+                       designator->symbol          = token.symbol;
                        eat(T_IDENTIFIER);
                        eat(':');
 
@@ -2292,6 +2303,18 @@ end_error:
        return NULL;
 }
 
+static expression_t *make_size_literal(size_t value)
+{
+       expression_t *literal = allocate_ast_zero(EXPR_LITERAL_INTEGER);
+       literal->base.type    = type_size_t;
+
+       char buf[128];
+       snprintf(buf, sizeof(buf), "%u", (unsigned) value);
+       literal->literal.value = make_string(buf);
+
+       return literal;
+}
+
 /**
  * Parses an initializer. Parsers either a compound literal
  * (env->declaration == NULL) or an initializer of a declaration.
@@ -2357,13 +2380,9 @@ static initializer_t *parse_initializer(parse_initializer_env_t *env)
                        internal_errorf(HERE, "invalid initializer type");
                }
 
-               expression_t *cnst       = allocate_expression_zero(EXPR_CONST);
-               cnst->base.type          = type_size_t;
-               cnst->conste.v.int_value = size;
-
                type_t *new_type = duplicate_type(type);
 
-               new_type->array.size_expression   = cnst;
+               new_type->array.size_expression   = make_size_literal(size);
                new_type->array.size_constant     = true;
                new_type->array.has_implicit_size = true;
                new_type->array.size              = size;
@@ -2402,7 +2421,7 @@ static compound_t *parse_compound_type_specifier(bool is_struct)
        entity_kind_tag_t const kind = is_struct ? ENTITY_STRUCT : ENTITY_UNION;
        if (token.type == T_IDENTIFIER) {
                /* the compound has a name, check if we have seen it already */
-               symbol = token.v.symbol;
+               symbol = token.symbol;
                next_token();
 
                entity_t *entity = get_tag(symbol, kind);
@@ -2487,7 +2506,7 @@ static void parse_enum_entries(type_t *const enum_type)
 
                entity_t *entity             = allocate_entity_zero(ENTITY_ENUM_VALUE);
                entity->enum_value.enum_type = enum_type;
-               entity->base.symbol          = token.v.symbol;
+               entity->base.symbol          = token.symbol;
                entity->base.source_position = token.source_position;
                next_token();
 
@@ -2518,7 +2537,7 @@ static type_t *parse_enum_specifier(void)
        eat(T_enum);
        switch (token.type) {
                case T_IDENTIFIER:
-                       symbol = token.v.symbol;
+                       symbol = token.symbol;
                        next_token();
 
                        entity = get_tag(symbol, ENTITY_ENUM);
@@ -2611,7 +2630,7 @@ static type_t *parse_typeof(void)
        }
        switch (token.type) {
        case T_IDENTIFIER:
-               if (is_typedef_symbol(token.v.symbol)) {
+               if (is_typedef_symbol(token.symbol)) {
                        type = parse_typename();
                } else {
                        expression = parse_expression();
@@ -2701,7 +2720,7 @@ static attribute_t *parse_attribute_ms_property(attribute_t *attribute)
                }
 
                bool is_put;
-               symbol_t *symbol = token.v.symbol;
+               symbol_t *symbol = token.symbol;
                next_token();
                if (strcmp(symbol->string, "put") == 0) {
                        is_put = true;
@@ -2718,9 +2737,9 @@ static attribute_t *parse_attribute_ms_property(attribute_t *attribute)
                        goto end_error;
                }
                if (is_put) {
-                       property->put_symbol = token.v.symbol;
+                       property->put_symbol = token.symbol;
                } else {
-                       property->get_symbol = token.v.symbol;
+                       property->get_symbol = token.symbol;
                }
                next_token();
        } while (next_if(','));
@@ -2739,7 +2758,7 @@ static attribute_t *parse_microsoft_extended_decl_modifier_single(void)
        if (next_if(T_restrict)) {
                kind = ATTRIBUTE_MS_RESTRICT;
        } else if (token.type == T_IDENTIFIER) {
-               const char *name = token.v.symbol->string;
+               const char *name = token.symbol->string;
                next_token();
                for (attribute_kind_t k = ATTRIBUTE_MS_FIRST; k <= ATTRIBUTE_MS_LAST;
                     ++k) {
@@ -3013,7 +3032,7 @@ wrong_thread_stoarge_class:
                                }
                        }
 
-                       type_t *const typedef_type = get_typedef_type(token.v.symbol);
+                       type_t *const typedef_type = get_typedef_type(token.symbol);
                        if (typedef_type == NULL) {
                                /* Be somewhat resilient to typos like 'vodi f()' at the beginning of a
                                 * declaration, so it doesn't generate 'implicit int' followed by more
@@ -3027,7 +3046,7 @@ wrong_thread_stoarge_class:
                                                errorf(HERE, "%K does not name a type", &token);
 
                                                entity_t *entity =
-                                                       create_error_entity(token.v.symbol, ENTITY_TYPEDEF);
+                                                       create_error_entity(token.symbol, ENTITY_TYPEDEF);
 
                                                type = allocate_type_zero(TYPE_TYPEDEF);
                                                type->typedeft.typedefe = &entity->typedefe;
@@ -3290,7 +3309,7 @@ static void parse_identifier_list(scope_t *scope)
                entity_t *entity = allocate_entity_zero(ENTITY_PARAMETER);
                entity->base.source_position = token.source_position;
                entity->base.namespc         = NAMESPACE_NORMAL;
-               entity->base.symbol          = token.v.symbol;
+               entity->base.symbol          = token.symbol;
                /* a K&R parameter has no type, yet */
                next_token();
 
@@ -3332,7 +3351,7 @@ static bool has_parameters(void)
 {
        /* func(void) is not a parameter */
        if (token.type == T_IDENTIFIER) {
-               entity_t const *const entity = get_entity(token.v.symbol, NAMESPACE_NORMAL);
+               entity_t const *const entity = get_entity(token.symbol, NAMESPACE_NORMAL);
                if (entity == NULL)
                        return true;
                if (entity->kind != ENTITY_TYPEDEF)
@@ -3359,7 +3378,7 @@ static void parse_parameters(function_type_t *type, scope_t *scope)
        int saved_comma_state = save_and_reset_anchor_state(',');
 
        if (token.type == T_IDENTIFIER &&
-           !is_typedef_symbol(token.v.symbol)) {
+           !is_typedef_symbol(token.symbol)) {
                token_type_t la1_type = (token_type_t)look_ahead(1)->type;
                if (la1_type == ',' || la1_type == ')') {
                        type->kr_style_parameters = true;
@@ -3617,7 +3636,7 @@ ptr_operator_end: ;
                if (env->must_be_abstract) {
                        errorf(HERE, "no identifier expected in typename");
                } else {
-                       env->symbol          = token.v.symbol;
+                       env->symbol          = token.symbol;
                        env->source_position = token.source_position;
                }
                next_token();
@@ -4378,7 +4397,7 @@ static bool is_declaration_specifier(const token_t *token,
                TYPE_QUALIFIERS
                        return true;
                case T_IDENTIFIER:
-                       return is_typedef_symbol(token->v.symbol);
+                       return is_typedef_symbol(token->symbol);
 
                case T___extension__:
                STORAGE_CLASSES
@@ -4890,9 +4909,7 @@ static bool expression_returns(expression_t const *const expr)
 
                case EXPR_REFERENCE:
                case EXPR_REFERENCE_ENUM_VALUE:
-               case EXPR_CONST:
-               case EXPR_CHARACTER_CONSTANT:
-               case EXPR_WIDE_CHARACTER_CONSTANT:
+               EXPR_LITERAL_CASES
                case EXPR_STRING_LITERAL:
                case EXPR_WIDE_STRING_LITERAL:
                case EXPR_COMPOUND_LITERAL: // TODO descend into initialisers
@@ -5958,82 +5975,181 @@ static expression_t *expected_expression_error(void)
        return create_invalid_expression();
 }
 
+static type_t *get_string_type(void)
+{
+       return warning.write_strings ? type_const_char_ptr : type_char_ptr;
+}
+
+static type_t *get_wide_string_type(void)
+{
+       return warning.write_strings ? type_const_wchar_t_ptr : type_wchar_t_ptr;
+}
+
 /**
  * Parse a string constant.
  */
-static expression_t *parse_string_const(void)
+static expression_t *parse_string_literal(void)
 {
-       wide_string_t wres;
-       if (token.type == T_STRING_LITERAL) {
-               string_t res = token.v.string;
+       source_position_t begin   = token.source_position;
+       string_t          res     = token.literal;
+       bool              is_wide = (token.type == T_WIDE_STRING_LITERAL);
+
+       next_token();
+       while (token.type == T_STRING_LITERAL
+                       || token.type == T_WIDE_STRING_LITERAL) {
+               warn_string_concat(&token.source_position);
+               res = concat_strings(&res, &token.literal);
                next_token();
-               while (token.type == T_STRING_LITERAL) {
-                       res = concat_strings(&res, &token.v.string);
-                       next_token();
-               }
-               if (token.type != T_WIDE_STRING_LITERAL) {
-                       expression_t *const cnst = allocate_expression_zero(EXPR_STRING_LITERAL);
-                       /* note: that we use type_char_ptr here, which is already the
-                        * automatic converted type. revert_automatic_type_conversion
-                        * will construct the array type */
-                       cnst->base.type    = warning.write_strings ? type_const_char_ptr : type_char_ptr;
-                       cnst->string.value = res;
-                       return cnst;
-               }
+               is_wide |= token.type == T_WIDE_STRING_LITERAL;
+       }
 
-               wres = concat_string_wide_string(&res, &token.v.wide_string);
+       expression_t *literal;
+       if (is_wide) {
+               literal = allocate_expression_zero(EXPR_WIDE_STRING_LITERAL);
+               literal->base.type = get_wide_string_type();
        } else {
-               wres = token.v.wide_string;
+               literal = allocate_expression_zero(EXPR_STRING_LITERAL);
+               literal->base.type = get_string_type();
        }
+       literal->base.source_position = begin;
+       literal->literal.value        = res;
+
+       return literal;
+}
+
+/**
+ * Parse a boolean constant.
+ */
+static expression_t *parse_boolean_literal(bool value)
+{
+       expression_t *literal = allocate_expression_zero(EXPR_LITERAL_BOOLEAN);
+       literal->base.source_position = token.source_position;
+       literal->base.type            = type_bool;
+       literal->literal.value.begin  = value ? "true" : "false";
+       literal->literal.value.size   = value ? 4 : 5;
+
        next_token();
+       return literal;
+}
 
-       for (;;) {
-               switch (token.type) {
-                       case T_WIDE_STRING_LITERAL:
-                               wres = concat_wide_strings(&wres, &token.v.wide_string);
-                               break;
+static void warn_traditional_suffix(void)
+{
+       if (!warning.traditional)
+               return;
+       warningf(&token.source_position, "traditional C rejects the '%Y' suffix",
+                token.symbol);
+}
 
-                       case T_STRING_LITERAL:
-                               wres = concat_wide_string_string(&wres, &token.v.string);
-                               break;
+static void check_integer_suffix(void)
+{
+       symbol_t *suffix = token.symbol;
+       if (suffix == NULL)
+               return;
 
-                       default: {
-                               expression_t *const cnst = allocate_expression_zero(EXPR_WIDE_STRING_LITERAL);
-                               cnst->base.type         = warning.write_strings ? type_const_wchar_t_ptr : type_wchar_t_ptr;
-                               cnst->wide_string.value = wres;
-                               return cnst;
+       bool not_traditional = false;
+       const char *c = suffix->string;
+       if (*c == 'l' || *c == 'L') {
+               ++c;
+               if (*c == *(c-1)) {
+                       not_traditional = true;
+                       ++c;
+                       if (*c == 'u' || *c == 'U') {
+                               ++c;
+                       }
+               } else if (*c == 'u' || *c == 'U') {
+                       not_traditional = true;
+                       ++c;
+               }
+       } else if (*c == 'u' || *c == 'U') {
+               not_traditional = true;
+               ++c;
+               if (*c == 'l' || *c == 'L') {
+                       ++c;
+                       if (*c == *(c-1)) {
+                               ++c;
                        }
                }
-               next_token();
+       }
+       if (*c != '\0') {
+               errorf(&token.source_position,
+                      "invalid suffix '%s' on integer constant", suffix->string);
+       } else if (not_traditional) {
+               warn_traditional_suffix();
        }
 }
 
-/**
- * Parse a boolean constant.
- */
-static expression_t *parse_bool_const(bool value)
+static type_t *check_floatingpoint_suffix(void)
 {
-       expression_t *cnst       = allocate_expression_zero(EXPR_CONST);
-       cnst->base.type          = type_bool;
-       cnst->conste.v.int_value = value;
+       symbol_t *suffix = token.symbol;
+       type_t   *type   = type_double;
+       if (suffix == NULL)
+               return type;
 
-       next_token();
+       bool not_traditional = false;
+       const char *c = suffix->string;
+       if (*c == 'f' || *c == 'F') {
+               ++c;
+               type = type_float;
+       } else if (*c == 'l' || *c == 'L') {
+               ++c;
+               type = type_long_double;
+       }
+       if (*c != '\0') {
+               errorf(&token.source_position,
+                      "invalid suffix '%s' on floatingpoint constant", suffix->string);
+       } else if (not_traditional) {
+               warn_traditional_suffix();
+       }
 
-       return cnst;
+       return type;
 }
 
 /**
  * Parse an integer constant.
  */
-static expression_t *parse_int_const(void)
+static expression_t *parse_number_literal(void)
 {
-       expression_t *cnst       = allocate_expression_zero(EXPR_CONST);
-       cnst->base.type          = token.datatype;
-       cnst->conste.v.int_value = token.v.intvalue;
+       expression_kind_t  kind;
+       type_t            *type;
+
+       switch (token.type) {
+       case T_INTEGER:
+               kind = EXPR_LITERAL_INTEGER;
+               check_integer_suffix();
+               break;
+       case T_INTEGER_OCTAL:
+               kind = EXPR_LITERAL_INTEGER_OCTAL;
+               check_integer_suffix();
+               break;
+       case T_INTEGER_HEXADECIMAL:
+               kind = EXPR_LITERAL_INTEGER_HEXADECIMAL;
+               check_integer_suffix();
+               break;
+       case T_FLOATINGPOINT:
+               kind = EXPR_LITERAL_FLOATINGPOINT;
+               type = check_floatingpoint_suffix();
+               break;
+       case T_FLOATINGPOINT_HEXADECIMAL:
+               kind = EXPR_LITERAL_FLOATINGPOINT_HEXADECIMAL;
+               type = check_floatingpoint_suffix();
+               break;
+       default:
+               panic("unexpected token type in parse_number_literal");
+       }
 
+       expression_t *literal = allocate_expression_zero(kind);
+       literal->base.source_position = token.source_position;
+       literal->base.type            = type;
+       literal->literal.value        = token.literal;
+       literal->literal.suffix       = token.symbol;
        next_token();
 
-       return cnst;
+       /* integer type depends on the size of the number and the size
+        * representable by the types. The backend/codegeneration has to determine
+        * that
+        */
+       determine_literal_type(&literal->literal);
+       return literal;
 }
 
 /**
@@ -6041,20 +6157,23 @@ static expression_t *parse_int_const(void)
  */
 static expression_t *parse_character_constant(void)
 {
-       expression_t *cnst = allocate_expression_zero(EXPR_CHARACTER_CONSTANT);
-       cnst->base.type          = token.datatype;
-       cnst->conste.v.character = token.v.string;
+       expression_t *literal = allocate_expression_zero(EXPR_LITERAL_CHARACTER);
+       literal->base.source_position = token.source_position;
+       literal->base.type            = c_mode & _CXX ? type_char : type_int;
+       literal->literal.value        = token.literal;
 
-       if (cnst->conste.v.character.size != 1) {
-               if (!GNU_MODE) {
+       size_t len = literal->literal.value.size;
+       if (len != 1) {
+               if (!GNU_MODE && !(c_mode & _C99)) {
                        errorf(HERE, "more than 1 character in character constant");
                } else if (warning.multichar) {
+                       literal->base.type = type_int;
                        warningf(HERE, "multi-character character constant");
                }
        }
-       next_token();
 
-       return cnst;
+       next_token();
+       return literal;
 }
 
 /**
@@ -6062,34 +6181,18 @@ static expression_t *parse_character_constant(void)
  */
 static expression_t *parse_wide_character_constant(void)
 {
-       expression_t *cnst = allocate_expression_zero(EXPR_WIDE_CHARACTER_CONSTANT);
-       cnst->base.type               = token.datatype;
-       cnst->conste.v.wide_character = token.v.wide_string;
+       expression_t *literal = allocate_expression_zero(EXPR_LITERAL_WIDE_CHARACTER);
+       literal->base.source_position = token.source_position;
+       literal->base.type            = type_int;
+       literal->literal.value        = token.literal;
 
-       if (cnst->conste.v.wide_character.size != 1) {
-               if (!GNU_MODE) {
-                       errorf(HERE, "more than 1 character in character constant");
-               } else if (warning.multichar) {
-                       warningf(HERE, "multi-character character constant");
-               }
+       size_t len = wstrlen(&literal->literal.value);
+       if (len != 1) {
+               warningf(HERE, "multi-character character constant");
        }
-       next_token();
-
-       return cnst;
-}
-
-/**
- * Parse a float constant.
- */
-static expression_t *parse_float_const(void)
-{
-       expression_t *cnst         = allocate_expression_zero(EXPR_CONST);
-       cnst->base.type            = token.datatype;
-       cnst->conste.v.float_value = token.v.floatvalue;
 
        next_token();
-
-       return cnst;
+       return literal;
 }
 
 static entity_t *create_implicit_function(symbol_t *symbol,
@@ -6149,57 +6252,58 @@ static type_t *automatic_type_conversion(type_t *orig_type)
 type_t *revert_automatic_type_conversion(const expression_t *expression)
 {
        switch (expression->kind) {
-               case EXPR_REFERENCE: {
-                       entity_t *entity = expression->reference.entity;
-                       if (is_declaration(entity)) {
-                               return entity->declaration.type;
-                       } else if (entity->kind == ENTITY_ENUM_VALUE) {
-                               return entity->enum_value.enum_type;
-                       } else {
-                               panic("no declaration or enum in reference");
-                       }
+       case EXPR_REFERENCE: {
+               entity_t *entity = expression->reference.entity;
+               if (is_declaration(entity)) {
+                       return entity->declaration.type;
+               } else if (entity->kind == ENTITY_ENUM_VALUE) {
+                       return entity->enum_value.enum_type;
+               } else {
+                       panic("no declaration or enum in reference");
                }
+       }
 
-               case EXPR_SELECT: {
-                       entity_t *entity = expression->select.compound_entry;
-                       assert(is_declaration(entity));
-                       type_t   *type   = entity->declaration.type;
-                       return get_qualified_type(type,
-                                       expression->base.type->base.qualifiers);
-               }
+       case EXPR_SELECT: {
+               entity_t *entity = expression->select.compound_entry;
+               assert(is_declaration(entity));
+               type_t   *type   = entity->declaration.type;
+               return get_qualified_type(type,
+                               expression->base.type->base.qualifiers);
+       }
 
-               case EXPR_UNARY_DEREFERENCE: {
-                       const expression_t *const value = expression->unary.value;
-                       type_t             *const type  = skip_typeref(value->base.type);
-                       if (!is_type_pointer(type))
-                               return type_error_type;
-                       return type->pointer.points_to;
-               }
+       case EXPR_UNARY_DEREFERENCE: {
+               const expression_t *const value = expression->unary.value;
+               type_t             *const type  = skip_typeref(value->base.type);
+               if (!is_type_pointer(type))
+                       return type_error_type;
+               return type->pointer.points_to;
+       }
 
-               case EXPR_ARRAY_ACCESS: {
-                       const expression_t *array_ref = expression->array_access.array_ref;
-                       type_t             *type_left = skip_typeref(array_ref->base.type);
-                       if (!is_type_pointer(type_left))
-                               return type_error_type;
-                       return type_left->pointer.points_to;
-               }
+       case EXPR_ARRAY_ACCESS: {
+               const expression_t *array_ref = expression->array_access.array_ref;
+               type_t             *type_left = skip_typeref(array_ref->base.type);
+               if (!is_type_pointer(type_left))
+                       return type_error_type;
+               return type_left->pointer.points_to;
+       }
 
-               case EXPR_STRING_LITERAL: {
-                       size_t size = expression->string.value.size;
-                       return make_array_type(type_char, size, TYPE_QUALIFIER_NONE);
-               }
+       case EXPR_STRING_LITERAL: {
+               size_t size = expression->string_literal.value.size;
+               return make_array_type(type_char, size, TYPE_QUALIFIER_NONE);
+       }
 
-               case EXPR_WIDE_STRING_LITERAL: {
-                       size_t size = expression->wide_string.value.size;
-                       return make_array_type(type_wchar_t, size, TYPE_QUALIFIER_NONE);
-               }
+       case EXPR_WIDE_STRING_LITERAL: {
+               size_t size = wstrlen(&expression->string_literal.value);
+               return make_array_type(type_wchar_t, size, TYPE_QUALIFIER_NONE);
+       }
 
-               case EXPR_COMPOUND_LITERAL:
-                       return expression->compound_literal.type;
+       case EXPR_COMPOUND_LITERAL:
+               return expression->compound_literal.type;
 
-               default:
-                       return expression->base.type;
+       default:
+               break;
        }
+       return expression->base.type;
 }
 
 /**
@@ -6240,7 +6344,7 @@ static entity_t *parse_qualified_identifier(void)
                        parse_error_expected("while parsing identifier", T_IDENTIFIER, NULL);
                        return create_error_entity(sym_anonymous, ENTITY_VARIABLE);
                }
-               symbol = token.v.symbol;
+               symbol = token.symbol;
                pos    = *HERE;
                next_token();
 
@@ -6497,7 +6601,7 @@ static expression_t *parse_parenthesized_expression(void)
        TYPE_SPECIFIERS
                return parse_cast();
        case T_IDENTIFIER:
-               if (is_typedef_symbol(token.v.symbol)) {
+               if (is_typedef_symbol(token.symbol)) {
                        return parse_cast();
                }
        }
@@ -6584,7 +6688,7 @@ static designator_t *parse_designator(void)
                                     T_IDENTIFIER, NULL);
                return NULL;
        }
-       result->symbol = token.v.symbol;
+       result->symbol = token.symbol;
        next_token();
 
        designator_t *last_designator = result;
@@ -6597,7 +6701,7 @@ static designator_t *parse_designator(void)
                        }
                        designator_t *designator    = allocate_ast_zero(sizeof(result[0]));
                        designator->source_position = *HERE;
-                       designator->symbol          = token.v.symbol;
+                       designator->symbol          = token.symbol;
                        next_token();
 
                        last_designator->next = designator;
@@ -6930,7 +7034,7 @@ static expression_t *parse_label_address(void)
                parse_error_expected("while parsing label address", T_IDENTIFIER, NULL);
                goto end_error;
        }
-       symbol_t *symbol = token.v.symbol;
+       symbol_t *symbol = token.symbol;
        next_token();
 
        label_t *label       = get_label(symbol);
@@ -6954,10 +7058,11 @@ end_error:
 static expression_t *parse_noop_expression(void)
 {
        /* the result is a (int)0 */
-       expression_t *cnst         = allocate_expression_zero(EXPR_CONST);
-       cnst->base.type            = type_int;
-       cnst->conste.v.int_value   = 0;
-       cnst->conste.is_ms_noop    = true;
+       expression_t *literal = allocate_expression_zero(EXPR_LITERAL_MS_NOOP);
+       literal->base.type            = type_int;
+       literal->base.source_position = token.source_position;
+       literal->literal.value.begin  = "__noop";
+       literal->literal.value.size   = 6;
 
        eat(T___noop);
 
@@ -6976,7 +7081,7 @@ static expression_t *parse_noop_expression(void)
        expect(')', end_error);
 
 end_error:
-       return cnst;
+       return literal;
 }
 
 /**
@@ -6985,54 +7090,57 @@ end_error:
 static expression_t *parse_primary_expression(void)
 {
        switch (token.type) {
-               case T_false:                        return parse_bool_const(false);
-               case T_true:                         return parse_bool_const(true);
-               case T_INTEGER:                      return parse_int_const();
-               case T_CHARACTER_CONSTANT:           return parse_character_constant();
-               case T_WIDE_CHARACTER_CONSTANT:      return parse_wide_character_constant();
-               case T_FLOATINGPOINT:                return parse_float_const();
-               case T_STRING_LITERAL:
-               case T_WIDE_STRING_LITERAL:          return parse_string_const();
-               case T___FUNCTION__:
-               case T___func__:                     return parse_function_keyword();
-               case T___PRETTY_FUNCTION__:          return parse_pretty_function_keyword();
-               case T___FUNCSIG__:                  return parse_funcsig_keyword();
-               case T___FUNCDNAME__:                return parse_funcdname_keyword();
-               case T___builtin_offsetof:           return parse_offsetof();
-               case T___builtin_va_start:           return parse_va_start();
-               case T___builtin_va_arg:             return parse_va_arg();
-               case T___builtin_va_copy:            return parse_va_copy();
-               case T___builtin_isgreater:
-               case T___builtin_isgreaterequal:
-               case T___builtin_isless:
-               case T___builtin_islessequal:
-               case T___builtin_islessgreater:
-               case T___builtin_isunordered:        return parse_compare_builtin();
-               case T___builtin_constant_p:         return parse_builtin_constant();
-               case T___builtin_types_compatible_p: return parse_builtin_types_compatible();
-               case T__assume:                      return parse_assume();
-               case T_ANDAND:
-                       if (GNU_MODE)
-                               return parse_label_address();
-                       break;
+       case T_false:                        return parse_boolean_literal(false);
+       case T_true:                         return parse_boolean_literal(true);
+       case T_INTEGER:
+       case T_INTEGER_OCTAL:
+       case T_INTEGER_HEXADECIMAL:
+       case T_FLOATINGPOINT:
+       case T_FLOATINGPOINT_HEXADECIMAL:    return parse_number_literal();
+       case T_CHARACTER_CONSTANT:           return parse_character_constant();
+       case T_WIDE_CHARACTER_CONSTANT:      return parse_wide_character_constant();
+       case T_STRING_LITERAL:
+       case T_WIDE_STRING_LITERAL:          return parse_string_literal();
+       case T___FUNCTION__:
+       case T___func__:                     return parse_function_keyword();
+       case T___PRETTY_FUNCTION__:          return parse_pretty_function_keyword();
+       case T___FUNCSIG__:                  return parse_funcsig_keyword();
+       case T___FUNCDNAME__:                return parse_funcdname_keyword();
+       case T___builtin_offsetof:           return parse_offsetof();
+       case T___builtin_va_start:           return parse_va_start();
+       case T___builtin_va_arg:             return parse_va_arg();
+       case T___builtin_va_copy:            return parse_va_copy();
+       case T___builtin_isgreater:
+       case T___builtin_isgreaterequal:
+       case T___builtin_isless:
+       case T___builtin_islessequal:
+       case T___builtin_islessgreater:
+       case T___builtin_isunordered:        return parse_compare_builtin();
+       case T___builtin_constant_p:         return parse_builtin_constant();
+       case T___builtin_types_compatible_p: return parse_builtin_types_compatible();
+       case T__assume:                      return parse_assume();
+       case T_ANDAND:
+               if (GNU_MODE)
+                       return parse_label_address();
+               break;
 
-               case '(':                            return parse_parenthesized_expression();
-               case T___noop:                       return parse_noop_expression();
+       case '(':                            return parse_parenthesized_expression();
+       case T___noop:                       return parse_noop_expression();
 
-               /* Gracefully handle type names while parsing expressions. */
-               case T_COLONCOLON:
+       /* Gracefully handle type names while parsing expressions. */
+       case T_COLONCOLON:
+               return parse_reference();
+       case T_IDENTIFIER:
+               if (!is_typedef_symbol(token.symbol)) {
                        return parse_reference();
-               case T_IDENTIFIER:
-                       if (!is_typedef_symbol(token.v.symbol)) {
-                               return parse_reference();
-                       }
-                       /* FALLTHROUGH */
-               TYPENAME_START {
-                       source_position_t  const pos  = *HERE;
-                       type_t const      *const type = parse_typename();
-                       errorf(&pos, "encountered type '%T' while parsing expression", type);
-                       return create_invalid_expression();
                }
+               /* FALLTHROUGH */
+       TYPENAME_START {
+               source_position_t  const pos  = *HERE;
+               type_t const      *const type = parse_typename();
+               errorf(&pos, "encountered type '%T' while parsing expression", type);
+               return create_invalid_expression();
+       }
        }
 
        errorf(HERE, "unexpected token %K, expected an expression", &token);
@@ -7177,7 +7285,7 @@ static expression_t *parse_select_expression(expression_t *addr)
                parse_error_expected("while parsing select", T_IDENTIFIER, NULL);
                return create_invalid_expression();
        }
-       symbol_t *symbol = token.v.symbol;
+       symbol_t *symbol = token.symbol;
        next_token();
 
        type_t *const orig_type = addr->base.type;
@@ -8277,8 +8385,8 @@ static void warn_string_literal_address(expression_t const* expr)
                expr = expr->unary.value;
        }
 
-       if (expr->kind == EXPR_STRING_LITERAL ||
-           expr->kind == EXPR_WIDE_STRING_LITERAL) {
+       if (expr->kind == EXPR_STRING_LITERAL
+                       || expr->kind == EXPR_WIDE_STRING_LITERAL) {
                warningf(&expr->base.source_position,
                        "comparison with string literal results in unspecified behaviour");
        }
@@ -8628,13 +8736,20 @@ static bool expression_has_effect(const expression_t *const expr)
                case EXPR_INVALID:                    return true; /* do NOT warn */
                case EXPR_REFERENCE:                  return false;
                case EXPR_REFERENCE_ENUM_VALUE:       return false;
+               case EXPR_LABEL_ADDRESS:              return false;
+
                /* suppress the warning for microsoft __noop operations */
-               case EXPR_CONST:                      return expr->conste.is_ms_noop;
-               case EXPR_CHARACTER_CONSTANT:         return false;
-               case EXPR_WIDE_CHARACTER_CONSTANT:    return false;
+               case EXPR_LITERAL_MS_NOOP:            return true;
+               case EXPR_LITERAL_BOOLEAN:
+               case EXPR_LITERAL_CHARACTER:
+               case EXPR_LITERAL_WIDE_CHARACTER:
+               case EXPR_LITERAL_INTEGER:
+               case EXPR_LITERAL_INTEGER_OCTAL:
+               case EXPR_LITERAL_INTEGER_HEXADECIMAL:
+               case EXPR_LITERAL_FLOATINGPOINT:
+               case EXPR_LITERAL_FLOATINGPOINT_HEXADECIMAL: return false;
                case EXPR_STRING_LITERAL:             return false;
                case EXPR_WIDE_STRING_LITERAL:        return false;
-               case EXPR_LABEL_ADDRESS:              return false;
 
                case EXPR_CALL: {
                        const call_expression_t *const call = &expr->call;
@@ -8974,7 +9089,7 @@ static asm_argument_t *parse_asm_arguments(bool is_out)
                                                     T_IDENTIFIER, NULL);
                                return NULL;
                        }
-                       argument->symbol = token.v.symbol;
+                       argument->symbol = token.symbol;
 
                        expect(']', end_error);
                }
@@ -9276,7 +9391,7 @@ end_error:
 static statement_t *parse_label_statement(void)
 {
        assert(token.type == T_IDENTIFIER);
-       symbol_t *symbol = token.v.symbol;
+       symbol_t *symbol = token.symbol;
        label_t  *label  = get_label(symbol);
 
        statement_t *const statement = allocate_statement_zero(STATEMENT_LABEL);
@@ -9658,7 +9773,7 @@ static statement_t *parse_goto(void)
 
                statement->gotos.expression = expression;
        } else if (token.type == T_IDENTIFIER) {
-               symbol_t *symbol = token.v.symbol;
+               symbol_t *symbol = token.symbol;
                next_token();
                statement->gotos.label = get_label(symbol);
        } else {
@@ -9972,7 +10087,7 @@ static statement_t *parse_local_label_declaration(void)
                                T_IDENTIFIER, NULL);
                        goto end_error;
                }
-               symbol_t *symbol = token.v.symbol;
+               symbol_t *symbol = token.symbol;
                entity_t *entity = get_entity(symbol, NAMESPACE_LABEL);
                if (entity != NULL && entity->base.parent_scope == current_scope) {
                        errorf(HERE, "multiple definitions of '__label__ %Y' (previous definition %P)",
@@ -10010,7 +10125,7 @@ static void parse_namespace_definition(void)
        symbol_t *symbol = NULL;
 
        if (token.type == T_IDENTIFIER) {
-               symbol = token.v.symbol;
+               symbol = token.symbol;
                next_token();
 
                entity = get_entity(symbol, NAMESPACE_NORMAL);
@@ -10075,7 +10190,7 @@ static statement_t *intern_parse_statement(void)
                token_type_t la1_type = (token_type_t)look_ahead(1)->type;
                if (la1_type == ':') {
                        statement = parse_label_statement();
-               } else if (is_typedef_symbol(token.v.symbol)) {
+               } else if (is_typedef_symbol(token.symbol)) {
                        statement = parse_declaration_statement();
                } else {
                        /* it's an identifier, the grammar says this must be an
@@ -10085,7 +10200,7 @@ static statement_t *intern_parse_statement(void)
                        switch (la1_type) {
                        case '&':
                        case '*':
-                               if (get_entity(token.v.symbol, NAMESPACE_NORMAL) != NULL)
+                               if (get_entity(token.symbol, NAMESPACE_NORMAL) != NULL)
                                        goto expression_statment;
                                /* FALLTHROUGH */
 
index 570182c..9c4dd66 100644 (file)
@@ -484,9 +484,9 @@ end_of_string:
        const char *const result = string;
 #endif
 
-       pp_token.type           = TP_STRING_LITERAL;
-       pp_token.v.string.begin = result;
-       pp_token.v.string.size  = size;
+       pp_token.type          = TP_STRING_LITERAL;
+       pp_token.literal.begin = result;
+       pp_token.literal.size  = size;
 }
 
 static void parse_wide_character_constant(void)
@@ -532,67 +532,6 @@ end_of_wide_char_constant:
        /* TODO... */
 }
 
-static void parse_wide_string_literal(void)
-{
-       const unsigned start_linenr = input.position.linenr;
-
-       assert(CC == '"');
-       next_char();
-
-       while(1) {
-               switch(CC) {
-               case '\\': {
-                       wchar_rep_t tc = parse_escape_sequence();
-                       obstack_grow(&symbol_obstack, &tc, sizeof(tc));
-                       break;
-               }
-
-               case EOF: {
-                       source_position_t source_position;
-                       source_position.input_name = pp_token.source_position.input_name;
-                       source_position.linenr     = start_linenr;
-                       errorf(&source_position, "string has no end");
-                       pp_token.type = TP_ERROR;
-                       return;
-               }
-
-               case '"':
-                       next_char();
-                       goto end_of_string;
-
-               default: {
-                       wchar_rep_t tc = CC;
-                       obstack_grow(&symbol_obstack, &tc, sizeof(tc));
-                       next_char();
-                       break;
-               }
-               }
-       }
-
-end_of_string:;
-       /* add finishing 0 to the string */
-       static const wchar_rep_t nul = L'\0';
-       obstack_grow(&symbol_obstack, &nul, sizeof(nul));
-
-       const size_t size
-               = (size_t)obstack_object_size(&symbol_obstack) / sizeof(wchar_rep_t);
-       const wchar_rep_t *const string = obstack_finish(&symbol_obstack);
-
-#if 0 /* TODO hash */
-       /* check if there is already a copy of the string */
-       const wchar_rep_t *const result = strset_insert(&stringset, string);
-       if(result != string) {
-               obstack_free(&symbol_obstack, string);
-       }
-#else
-       const wchar_rep_t *const result = string;
-#endif
-
-       pp_token.type                = TP_WIDE_STRING_LITERAL;
-       pp_token.v.wide_string.begin = result;
-       pp_token.v.wide_string.size  = size;
-}
-
 static void parse_character_constant(void)
 {
        const unsigned start_linenr = input.position.linenr;
@@ -637,9 +576,9 @@ end_of_char_constant:;
        const size_t      size   = (size_t)obstack_object_size(&symbol_obstack);
        const char *const string = obstack_finish(&symbol_obstack);
 
-       pp_token.type           = TP_CHARACTER_CONSTANT;
-       pp_token.v.string.begin = string;
-       pp_token.v.string.size  = size;
+       pp_token.type          = TP_CHARACTER_CONSTANT;
+       pp_token.literal.begin = string;
+       pp_token.literal.size  = size;
 }
 
 #define SYMBOL_CHARS_WITHOUT_E_P \
@@ -747,7 +686,7 @@ restart:
                return;
 
        /* if it was an identifier then we might need to expand again */
-       pp_definition_t *symbol_definition = pp_token.v.symbol->pp_definition;
+       pp_definition_t *symbol_definition = pp_token.symbol->pp_definition;
        if(symbol_definition != NULL && !symbol_definition->is_expanding) {
                symbol_definition->parent_expansion = definition;
                symbol_definition->expand_pos       = 0;
@@ -912,7 +851,7 @@ end_symbol:
        /* might be a wide string or character constant ( L"string"/L'c' ) */
        if (CC == '"' && string[0] == 'L' && string[1] == '\0') {
                obstack_free(&symbol_obstack, string);
-               parse_wide_string_literal();
+               /* TODO */
                return;
        } else if (CC == '\'' && string[0] == 'L' && string[1] == '\0') {
                obstack_free(&symbol_obstack, string);
@@ -922,8 +861,8 @@ end_symbol:
 
        symbol_t *symbol = symbol_table_insert(string);
 
-       pp_token.type     = symbol->pp_ID;
-       pp_token.v.symbol = symbol;
+       pp_token.type   = symbol->pp_ID;
+       pp_token.symbol = symbol;
 
        /* we can free the memory from symbol obstack if we already had an entry in
         * the symbol table */
@@ -993,13 +932,12 @@ end_number:
        size_t  size   = obstack_object_size(&symbol_obstack);
        char   *string = obstack_finish(&symbol_obstack);
 
-       pp_token.type           = TP_NUMBER;
-       pp_token.v.string.begin = string;
-       pp_token.v.string.size  = size;
+       pp_token.type          = TP_NUMBER;
+       pp_token.literal.begin = string;
+       pp_token.literal.size  = size;
 }
 
 
-
 #define MAYBE_PROLOG                                       \
                        next_char();                                   \
                        while(1) {                                     \
@@ -1292,14 +1230,14 @@ static void emit_pp_token(void)
 
        switch(pp_token.type) {
        case TP_IDENTIFIER:
-               fputs(pp_token.v.symbol->string, out);
+               fputs(pp_token.symbol->string, out);
                break;
        case TP_NUMBER:
-               fputs(pp_token.v.string.begin, out);
+               fputs(pp_token.literal.begin, out);
                break;
        case TP_STRING_LITERAL:
                fputc('"', out);
-               fputs(pp_token.v.string.begin, out);
+               fputs(pp_token.literal.begin, out);
                fputc('"', out);
                break;
        case '\n':
@@ -1332,22 +1270,6 @@ static bool strings_equal(const string_t *string1, const string_t *string2)
        return true;
 }
 
-static bool wide_strings_equal(const wide_string_t *string1,
-                               const wide_string_t *string2)
-{
-       size_t size = string1->size;
-       if(size != string2->size)
-               return false;
-
-       const wchar_rep_t *c1 = string1->begin;
-       const wchar_rep_t *c2 = string2->begin;
-       for(size_t i = 0; i < size; ++i, ++c1, ++c2) {
-               if(*c1 != *c2)
-                       return false;
-       }
-       return true;
-}
-
 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
 {
        if(token1->type != token2->type)
@@ -1358,16 +1280,12 @@ static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
                /* TODO */
                return false;
        case TP_IDENTIFIER:
-               return token1->v.symbol == token2->v.symbol;
+               return token1->symbol == token2->symbol;
        case TP_NUMBER:
        case TP_CHARACTER_CONSTANT:
        case TP_STRING_LITERAL:
-               return strings_equal(&token1->v.string, &token2->v.string);
+               return strings_equal(&token1->literal, &token2->literal);
 
-       case TP_WIDE_CHARACTER_CONSTANT:
-       case TP_WIDE_STRING_LITERAL:
-               return wide_strings_equal(&token1->v.wide_string,
-                                         &token2->v.wide_string);
        default:
                return true;
        }
@@ -1399,7 +1317,7 @@ static void parse_define_directive(void)
                       "expected identifier after #define, got '%t'", &pp_token);
                goto error_out;
        }
-       symbol_t *symbol = pp_token.v.symbol;
+       symbol_t *symbol = pp_token.symbol;
 
        pp_definition_t *new_definition
                = obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
@@ -1427,7 +1345,7 @@ static void parse_define_directive(void)
                                }
                                break;
                        case TP_IDENTIFIER:
-                               obstack_ptr_grow(&pp_obstack, pp_token.v.symbol);
+                               obstack_ptr_grow(&pp_obstack, pp_token.symbol);
                                next_preprocessing_token();
 
                                if (pp_token.type == ',') {
@@ -1508,7 +1426,7 @@ static void parse_undef_directive(void)
                return;
        }
 
-       symbol_t *symbol = pp_token.v.symbol;
+       symbol_t *symbol = pp_token.symbol;
        symbol->pp_definition = NULL;
        next_preprocessing_token();
 
@@ -1696,7 +1614,7 @@ static void parse_ifdef_ifndef_directive(void)
                /* just take the true case in the hope to avoid further errors */
                condition = true;
        } else {
-               symbol_t        *symbol        = pp_token.v.symbol;
+               symbol_t        *symbol        = pp_token.symbol;
                pp_definition_t *pp_definition = symbol->pp_definition;
                next_preprocessing_token();
 
index 71d48f8..7e6e8f5 100644 (file)
--- a/printer.c
+++ b/printer.c
 
 static FILE* out;
 
+static void print_char_file(const char c)
+{
+       fputc(c, out);
+}
+
 static void print_string_file(const char *str)
 {
        fputs(str, out);
@@ -36,26 +41,6 @@ static void print_vformat_file(const char *format, va_list ap)
        vfprintf(out, format, ap);
 }
 
-static void print_char_file(wchar_rep_t c)
-{
-       const unsigned tc = (unsigned) c;
-       if (tc < 0x80) {
-               fputc(tc, out);
-       } else if (tc < 0x800) {
-               fputc(0xC0 | (tc >> 6),   out);
-               fputc(0x80 | (tc & 0x3F), out);
-       } else if (tc < 0x10000) {
-               fputc(0xE0 | ( tc >> 12),         out);
-               fputc(0x80 | ((tc >>  6) & 0x3F), out);
-               fputc(0x80 | ( tc        & 0x3F), out);
-       } else {
-               fputc(0xF0 | ( tc >> 18),         out);
-               fputc(0x80 | ((tc >> 12) & 0x3F), out);
-               fputc(0x80 | ((tc >>  6) & 0x3F), out);
-               fputc(0x80 | ( tc        & 0x3F), out);
-       }
-}
-
 void print_to_file(FILE *new_out)
 {
        out = new_out;
@@ -68,6 +53,11 @@ void print_to_file(FILE *new_out)
 
 static struct obstack *obst;
 
+static void print_char_obstack(const char c)
+{
+       obstack_1grow(obst, c);
+}
+
 static void print_string_obstack(const char *str)
 {
        size_t len = strlen(str);
@@ -79,26 +69,6 @@ static void print_vformat_obstack(const char *format, va_list ap)
        obstack_vprintf(obst, format, ap);
 }
 
-static void print_char_obstack(wchar_rep_t c)
-{
-       const unsigned tc = (unsigned) c;
-       if (tc < 0x80) {
-               obstack_1grow(obst, tc);
-       } else if (tc < 0x800) {
-               obstack_1grow(obst, 0xC0 | (tc >> 6));
-               obstack_1grow(obst, 0x80 | (tc & 0x3F));
-       } else if (tc < 0x10000) {
-               obstack_1grow(obst, 0xE0 | ( tc >> 12));
-               obstack_1grow(obst, 0x80 | ((tc >>  6) & 0x3F));
-               obstack_1grow(obst, 0x80 | ( tc        & 0x3F));
-       } else {
-               obstack_1grow(obst, 0xF0 | ( tc >> 18));
-               obstack_1grow(obst, 0x80 | ((tc >> 12) & 0x3F));
-               obstack_1grow(obst, 0x80 | ((tc >>  6) & 0x3F));
-               obstack_1grow(obst, 0x80 | ( tc        & 0x3F));
-       }
-}
-
 void print_to_obstack(struct obstack *new_obst)
 {
        obst = new_obst;
@@ -112,7 +82,7 @@ void print_to_obstack(struct obstack *new_obst)
 static char *buffer_pos;
 static char *buffer_end;
 
-static inline void buffer_add_char(int c)
+static void print_char_buffer(const char c)
 {
        if (buffer_pos == buffer_end)
                return;
@@ -122,7 +92,7 @@ static inline void buffer_add_char(int c)
 static void print_string_buffer(const char *str)
 {
        for (const char *c = str; *c != '\0'; ++c) {
-               buffer_add_char(*c);
+               print_char_buffer(*c);
        }
 }
 
@@ -133,26 +103,6 @@ static void print_vformat_buffer(const char *format, va_list ap)
        buffer_pos    += written < size ? written : size;
 }
 
-static void print_char_buffer(wchar_rep_t c)
-{
-       const unsigned tc = (unsigned) c;
-       if (tc < 0x80) {
-               buffer_add_char(tc);
-       } else if (tc < 0x800) {
-               buffer_add_char(0xC0 | (tc >> 6));
-               buffer_add_char(0x80 | (tc & 0x3F));
-       } else if (tc < 0x10000) {
-               buffer_add_char(0xE0 | ( tc >> 12));
-               buffer_add_char(0x80 | ((tc >>  6) & 0x3F));
-               buffer_add_char(0x80 | ( tc        & 0x3F));
-       } else {
-               buffer_add_char(0xF0 | ( tc >> 18));
-               buffer_add_char(0x80 | ((tc >> 12) & 0x3F));
-               buffer_add_char(0x80 | ((tc >>  6) & 0x3F));
-               buffer_add_char(0x80 | ( tc        & 0x3F));
-       }
-}
-
 void print_to_buffer(char *buffer, size_t buffer_size)
 {
        buffer_pos = buffer;
@@ -173,7 +123,7 @@ void finish_print_to_buffer(void)
 
 void (*print_string)(const char *str) = print_string_file;
 void (*print_vformat)(const char *format, va_list ap) = print_vformat_file;
-void (*print_char)(wchar_rep_t c) = print_char_file;
+void (*print_char)(const char c) = print_char_file;
 
 void printer_push(void)
 {
index e30a4b9..f57d3cf 100644 (file)
--- a/printer.h
+++ b/printer.h
@@ -35,8 +35,7 @@
 /** print a string into current output */
 extern void (*print_string)(const char *str);
 extern void (*print_vformat)(const char *format, va_list ap);
-/** print a single unicode character to current output (encoded as UTF-8) */
-extern void (*print_char)(wchar_rep_t c);
+extern void (*print_char)(const char c);
 
 /** print a printf style format string to current output */
 static inline void __attribute__((format(printf,1,2))) print_format(const char *format, ...)
index e24b40c..873563b 100644 (file)
 #ifndef STRING_REP_H
 #define STRING_REP_H
 
-#include <wchar.h>
-
-typedef wchar_t wchar_rep_t;
+#include <assert.h>
+#include <stdlib.h>
 
 typedef struct string_t {
-       const char *begin;
-       size_t      size;
+       const char *begin; /**< UTF-8 encoded string, the last character is
+                                               * guaranteed to be 0 */
+       size_t      size;  /**< size of string in bytes (not characters) */
 } string_t;
 
-typedef struct wide_string_t {
-       const wchar_rep_t *begin;
-       size_t             size;
-} wide_string_t;
+typedef unsigned int utf32;
+#define UTF32_PRINTF_FORMAT "%u"
+
+/**
+ * "parse" an utf8 character from a string.
+ * Warning: This function only works for valid utf-8 inputs. The behaviour
+ * is undefined for invalid utf-8 input.
+ *
+ * @param p    A pointer to a pointer into the string. The pointer
+ *             is incremented for each consumed char
+ */
+static inline utf32 read_utf8_char(const char **p)
+{
+       const unsigned char *c      = (const unsigned char *) *p;
+       utf32                result;
+
+       if ((*c & 0x80) == 0) {
+               /* 1 character encoding: 0b0??????? */
+               result = *c++;
+       } else if ((*c & 0xE0) == 0xC0) {
+               /* 2 character encoding: 0b110?????, 0b10?????? */
+               result = *c++ & 0x1F;
+               result = (result << 6) | (*c++ & 0x3F);
+       } else if ((*c & 0xF0) == 0xE0) {
+               /* 3 character encoding: 0b1110????, 0b10??????, 0b10?????? */
+               result = *c++ & 0x0F;
+               result = (result << 6) | (*c++ & 0x3F);
+               result = (result << 6) | (*c++ & 0x3F);
+       } else {
+               /* 4 character enc.: 0b11110???, 0b10??????, 0b10??????, 0b10?????? */
+               assert((*c & 0xF8) == 0xF0);
+               result = *c++ & 0x07;
+               result = (result << 6) | (*c++ & 0x3F);
+               result = (result << 6) | (*c++ & 0x3F);
+               result = (result << 6) | (*c++ & 0x3F);
+       }
+
+       *p = (const char*) c;
+       return result;
+}
+
+static inline size_t wstrlen(const string_t *string)
+{
+       size_t      result = 0;
+       const char *p      = string->begin;
+       const char *end    = p + string->size;
+       while (p < end) {
+               read_utf8_char(&p);
+               ++result;
+       }
+       return result;
+}
 
 #endif
diff --git a/token.c b/token.c
index 3145d79..1fbaafd 100644 (file)
--- a/token.c
+++ b/token.c
@@ -124,20 +124,42 @@ symbol_t *get_token_symbol(const token_t *token)
        return token_symbols[token->type];
 }
 
+static void print_stringrep(const string_t *string, FILE *f)
+{
+       for (size_t i = 0; i < string->size; ++i) {
+               fputc(string->begin[i], f);
+       }
+}
+
 void print_token(FILE *f, const token_t *token)
 {
        switch(token->type) {
        case T_IDENTIFIER:
-               fprintf(f, "identifier '%s'", token->v.symbol->string);
+               fprintf(f, "identifier '%s'", token->symbol->string);
                break;
        case T_INTEGER:
-               fprintf(f, "integer number '%lld'", token->v.intvalue);
-               break;
+       case T_INTEGER_OCTAL:
+       case T_INTEGER_HEXADECIMAL:
        case T_FLOATINGPOINT:
-               fprintf(f, "floating-point number '%LF'", token->v.floatvalue);
+       case T_FLOATINGPOINT_HEXADECIMAL:
+               print_token_type(f, (token_type_t)token->type);
+               fputs(" '", f);
+               print_stringrep(&token->literal, f);
+               if (token->symbol != NULL)
+                       fputs(token->symbol->string, f);
+               fputc('\'', f);
                break;
+       case T_WIDE_STRING_LITERAL:
        case T_STRING_LITERAL:
-               fprintf(f, "string \"%s\"", token->v.string.begin);
+               print_token_type(f, (token_type_t)token->type);
+               fprintf(f, " \"%s\"", token->literal.begin);
+               break;
+       case T_CHARACTER_CONSTANT:
+       case T_WIDE_CHARACTER_CONSTANT:
+               print_token_type(f, (token_type_t)token->type);
+               fputs(" \'", f);
+               print_stringrep(&token->literal, f);
+               fputs("'", f);
                break;
        default:
                fputc('\'', f);
@@ -180,13 +202,13 @@ void print_pp_token(FILE *f, const token_t *token)
 {
        switch((preprocessor_token_type_t) token->type) {
        case TP_IDENTIFIER:
-               fprintf(f, "identifier '%s'", token->v.symbol->string);
+               fprintf(f, "identifier '%s'", token->symbol->string);
                break;
        case TP_NUMBER:
-               fprintf(f, "number '%s'", token->v.string.begin);
+               fprintf(f, "number '%s'", token->literal.begin);
                break;
        case TP_STRING_LITERAL:
-               fprintf(f, "string \"%s\"", token->v.string.begin);
+               fprintf(f, "string \"%s\"", token->literal.begin);
                break;
        default:
                print_pp_token_type(f, (preprocessor_token_type_t) token->type);
index 10d2248..7bdc8d8 100644 (file)
--- a/token_t.h
+++ b/token_t.h
@@ -60,15 +60,9 @@ struct source_position_t {
 extern const source_position_t builtin_source_position;
 
 typedef struct {
-       int type;
-       union {
-               symbol_t      *symbol;
-               long long      intvalue;
-               long double    floatvalue;
-               string_t       string;
-               wide_string_t  wide_string;
-       } v;
-       type_t            *datatype;
+       int                type;
+       symbol_t          *symbol;  /**< contains identifier. Contains number suffix for numbers */
+       string_t           literal; /**< string value/literal value */
        source_position_t  source_position;
 } token_t;
 
index d801de9..39c5a6a 100644 (file)
@@ -2,13 +2,16 @@
 #define TS(x,str,val)
 #endif
 
-TS(IDENTIFIER,              "identifier", = 256)
-TS(INTEGER,                 "integer number",)
-TS(CHARACTER_CONSTANT,      "character constant",)
-TS(WIDE_CHARACTER_CONSTANT, "wide character constant",)
-TS(FLOATINGPOINT,           "floatingpoint number",)
-TS(STRING_LITERAL,          "string literal",)
-TS(WIDE_STRING_LITERAL,     "wide string literal",)
+TS(IDENTIFIER,                "identifier", = 256)
+TS(INTEGER,                   "integer number",)
+TS(INTEGER_OCTAL,             "octal integer number",)
+TS(INTEGER_HEXADECIMAL,       "hexadecimal integer number",)
+TS(FLOATINGPOINT,             "floatingpoint number",)
+TS(FLOATINGPOINT_HEXADECIMAL, "hexadecimal floatingpoint number",)
+TS(CHARACTER_CONSTANT,        "character constant",)
+TS(WIDE_CHARACTER_CONSTANT,   "wide character constant",)
+TS(STRING_LITERAL,            "string literal",)
+TS(WIDE_STRING_LITERAL,       "wide string literal",)
 
 #define ALTERNATE(name, val)          T(_CXX, name, #name,  val)
 #define PUNCTUATOR(name, string, val) T(_ALL, name, string, val)
index 4c8a6c6..883a889 100644 (file)
@@ -107,13 +107,11 @@ static void walk_expression(expression_t const *const expr,
                walk_expression(expr->va_copye.dst, callback, env);
                return;
 
+       EXPR_LITERAL_CASES
        case EXPR_INVALID:
        case EXPR_OFFSETOF:
        case EXPR_REFERENCE:
        case EXPR_REFERENCE_ENUM_VALUE:
-       case EXPR_CONST:
-       case EXPR_CHARACTER_CONSTANT:
-       case EXPR_WIDE_CHARACTER_CONSTANT:
        case EXPR_STRING_LITERAL:
        case EXPR_WIDE_STRING_LITERAL:
        case EXPR_FUNCNAME:
index 236e625..05763d4 100644 (file)
@@ -223,16 +223,9 @@ static void write_unary_expression(const unary_expression_t *expression)
 
 static void write_expression(const expression_t *expression)
 {
-       const const_expression_t *constant;
-       /* TODO */
        switch(expression->kind) {
-       case EXPR_CONST:
-               constant = &expression->conste;
-               if(is_type_integer(expression->base.type)) {
-                       fprintf(out, "%lld", constant->v.int_value);
-               } else {
-                       fprintf(out, "%Lf", constant->v.float_value);
-               }
+       case EXPR_LITERAL_INTEGER:
+               fprintf(out, "%s", expression->literal.value.begin);
                break;
        EXPR_UNARY_CASES
                write_unary_expression((const unary_expression_t*) expression);
index 5ab41fe..26bfd4e 100644 (file)
@@ -298,16 +298,10 @@ static void write_binary_expression(const binary_expression_t *expression)
 
 static void write_expression(const expression_t *expression)
 {
-       const const_expression_t *constant;
        /* TODO */
        switch(expression->kind) {
-       case EXPR_CONST:
-               constant = &expression->conste;
-               if(is_type_integer(expression->base.type)) {
-                       fprintf(out, "%lld", constant->v.int_value);
-               } else {
-                       fprintf(out, "%Lf", constant->v.float_value);
-               }
+       case EXPR_LITERAL_INTEGER:
+               fprintf(out, "%s", expression->literal.value.begin);
                break;
        case EXPR_REFERENCE_ENUM_VALUE: {
                /* UHOH... hacking */