From: Matthias Braun <matze@braunis.de>
Date: Wed, 24 Feb 2010 15:06:07 +0000 (+0000)
Subject: - Rework the way literals are handled, these are now kept as strings until
X-Git-Url: http://nsz.repo.hu/git/?a=commitdiff_plain;h=2beaa4f65961fe297663e1cec9e5632b7f3e1cba;hp=373f63fc99fa81faf78910353564fda6ce44d7c1;p=cparser

- Rework the way literals are handled, these are now kept as strings until
  codegeneration
- Don't work with wide-strings inside the compiler but always use utf-8
  encoded "normal" strings. This simplifies some places (like the printf
  format checker) and avoids code duplication.

[r27212]
---

diff --git a/ast.c b/ast.c
index 5dc9b4c..fa3cd71 100644
--- a/ast.c
+++ b/ast.c
@@ -26,6 +26,7 @@
 #include "lang_features.h"
 #include "entity_t.h"
 #include "printer.h"
+#include "types.h"
 
 #include <assert.h>
 #include <stdio.h>
@@ -68,6 +69,18 @@ void print_indent(void)
 		print_string("\t");
 }
 
+static void print_symbol(const symbol_t *symbol)
+{
+	print_string(symbol->string);
+}
+
+static void print_stringrep(const string_t *string)
+{
+	for (size_t i = 0; i < string->size; ++i) {
+		print_char(string->begin[i]);
+	}
+}
+
 /**
  * Returns 1 if a given precedence level has right-to-left
  * associativity, else 0.
@@ -95,88 +108,93 @@ static int right_to_left(unsigned precedence)
 static unsigned get_expression_precedence(expression_kind_t kind)
 {
 	static const unsigned prec[] = {
-		[EXPR_UNKNOWN]                    = PREC_PRIMARY,
-		[EXPR_INVALID]                    = PREC_PRIMARY,
-		[EXPR_REFERENCE]                  = PREC_PRIMARY,
-		[EXPR_REFERENCE_ENUM_VALUE]       = PREC_PRIMARY,
-		[EXPR_CHARACTER_CONSTANT]         = PREC_PRIMARY,
-		[EXPR_WIDE_CHARACTER_CONSTANT]    = PREC_PRIMARY,
-		[EXPR_CONST]                      = PREC_PRIMARY,
-		[EXPR_STRING_LITERAL]             = PREC_PRIMARY,
-		[EXPR_WIDE_STRING_LITERAL]        = PREC_PRIMARY,
-		[EXPR_COMPOUND_LITERAL]           = PREC_UNARY,
-		[EXPR_CALL]                       = PREC_POSTFIX,
-		[EXPR_CONDITIONAL]                = PREC_CONDITIONAL,
-		[EXPR_SELECT]                     = PREC_POSTFIX,
-		[EXPR_ARRAY_ACCESS]               = PREC_POSTFIX,
-		[EXPR_SIZEOF]                     = PREC_UNARY,
-		[EXPR_CLASSIFY_TYPE]              = PREC_UNARY,
-		[EXPR_ALIGNOF]                    = PREC_UNARY,
-
-		[EXPR_FUNCNAME]                   = PREC_PRIMARY,
-		[EXPR_BUILTIN_CONSTANT_P]         = PREC_PRIMARY,
-		[EXPR_BUILTIN_TYPES_COMPATIBLE_P] = PREC_PRIMARY,
-		[EXPR_OFFSETOF]                   = PREC_PRIMARY,
-		[EXPR_VA_START]                   = PREC_PRIMARY,
-		[EXPR_VA_ARG]                     = PREC_PRIMARY,
-		[EXPR_VA_COPY]                    = PREC_PRIMARY,
-		[EXPR_STATEMENT]                  = PREC_PRIMARY,
-		[EXPR_LABEL_ADDRESS]              = PREC_PRIMARY,
-
-		[EXPR_UNARY_NEGATE]               = PREC_UNARY,
-		[EXPR_UNARY_PLUS]                 = PREC_UNARY,
-		[EXPR_UNARY_BITWISE_NEGATE]       = PREC_UNARY,
-		[EXPR_UNARY_NOT]                  = PREC_UNARY,
-		[EXPR_UNARY_DEREFERENCE]          = PREC_UNARY,
-		[EXPR_UNARY_TAKE_ADDRESS]         = PREC_UNARY,
-		[EXPR_UNARY_POSTFIX_INCREMENT]    = PREC_POSTFIX,
-		[EXPR_UNARY_POSTFIX_DECREMENT]    = PREC_POSTFIX,
-		[EXPR_UNARY_PREFIX_INCREMENT]     = PREC_UNARY,
-		[EXPR_UNARY_PREFIX_DECREMENT]     = PREC_UNARY,
-		[EXPR_UNARY_CAST]                 = PREC_UNARY,
-		[EXPR_UNARY_CAST_IMPLICIT]        = PREC_UNARY,
-		[EXPR_UNARY_ASSUME]               = PREC_PRIMARY,
-		[EXPR_UNARY_DELETE]               = PREC_UNARY,
-		[EXPR_UNARY_DELETE_ARRAY]         = PREC_UNARY,
-		[EXPR_UNARY_THROW]                = PREC_ASSIGNMENT,
-
-		[EXPR_BINARY_ADD]                 = PREC_ADDITIVE,
-		[EXPR_BINARY_SUB]                 = PREC_ADDITIVE,
-		[EXPR_BINARY_MUL]                 = PREC_MULTIPLICATIVE,
-		[EXPR_BINARY_DIV]                 = PREC_MULTIPLICATIVE,
-		[EXPR_BINARY_MOD]                 = PREC_MULTIPLICATIVE,
-		[EXPR_BINARY_EQUAL]               = PREC_EQUALITY,
-		[EXPR_BINARY_NOTEQUAL]            = PREC_EQUALITY,
-		[EXPR_BINARY_LESS]                = PREC_RELATIONAL,
-		[EXPR_BINARY_LESSEQUAL]           = PREC_RELATIONAL,
-		[EXPR_BINARY_GREATER]             = PREC_RELATIONAL,
-		[EXPR_BINARY_GREATEREQUAL]        = PREC_RELATIONAL,
-		[EXPR_BINARY_BITWISE_AND]         = PREC_AND,
-		[EXPR_BINARY_BITWISE_OR]          = PREC_OR,
-		[EXPR_BINARY_BITWISE_XOR]         = PREC_XOR,
-		[EXPR_BINARY_LOGICAL_AND]         = PREC_LOGICAL_AND,
-		[EXPR_BINARY_LOGICAL_OR]          = PREC_LOGICAL_OR,
-		[EXPR_BINARY_SHIFTLEFT]           = PREC_SHIFT,
-		[EXPR_BINARY_SHIFTRIGHT]          = PREC_SHIFT,
-		[EXPR_BINARY_ASSIGN]              = PREC_ASSIGNMENT,
-		[EXPR_BINARY_MUL_ASSIGN]          = PREC_ASSIGNMENT,
-		[EXPR_BINARY_DIV_ASSIGN]          = PREC_ASSIGNMENT,
-		[EXPR_BINARY_MOD_ASSIGN]          = PREC_ASSIGNMENT,
-		[EXPR_BINARY_ADD_ASSIGN]          = PREC_ASSIGNMENT,
-		[EXPR_BINARY_SUB_ASSIGN]          = PREC_ASSIGNMENT,
-		[EXPR_BINARY_SHIFTLEFT_ASSIGN]    = PREC_ASSIGNMENT,
-		[EXPR_BINARY_SHIFTRIGHT_ASSIGN]   = PREC_ASSIGNMENT,
-		[EXPR_BINARY_BITWISE_AND_ASSIGN]  = PREC_ASSIGNMENT,
-		[EXPR_BINARY_BITWISE_XOR_ASSIGN]  = PREC_ASSIGNMENT,
-		[EXPR_BINARY_BITWISE_OR_ASSIGN]   = PREC_ASSIGNMENT,
-		[EXPR_BINARY_COMMA]               = PREC_EXPRESSION,
-
-		[EXPR_BINARY_ISGREATER]           = PREC_PRIMARY,
-		[EXPR_BINARY_ISGREATEREQUAL]      = PREC_PRIMARY,
-		[EXPR_BINARY_ISLESS]              = PREC_PRIMARY,
-		[EXPR_BINARY_ISLESSEQUAL]         = PREC_PRIMARY,
-		[EXPR_BINARY_ISLESSGREATER]       = PREC_PRIMARY,
-		[EXPR_BINARY_ISUNORDERED]         = PREC_PRIMARY
+		[EXPR_UNKNOWN]                           = PREC_PRIMARY,
+		[EXPR_INVALID]                           = PREC_PRIMARY,
+		[EXPR_REFERENCE]                         = PREC_PRIMARY,
+		[EXPR_REFERENCE_ENUM_VALUE]              = PREC_PRIMARY,
+		[EXPR_LITERAL_INTEGER]                   = PREC_PRIMARY,
+		[EXPR_LITERAL_INTEGER_OCTAL]             = PREC_PRIMARY,
+		[EXPR_LITERAL_INTEGER_HEXADECIMAL]       = PREC_PRIMARY,
+		[EXPR_LITERAL_FLOATINGPOINT]             = PREC_PRIMARY,
+		[EXPR_LITERAL_FLOATINGPOINT_HEXADECIMAL] = PREC_PRIMARY,
+		[EXPR_LITERAL_CHARACTER]                 = PREC_PRIMARY,
+		[EXPR_LITERAL_WIDE_CHARACTER]            = PREC_PRIMARY,
+		[EXPR_LITERAL_MS_NOOP]                   = PREC_PRIMARY,
+		[EXPR_STRING_LITERAL]                    = PREC_PRIMARY,
+		[EXPR_WIDE_STRING_LITERAL]               = PREC_PRIMARY,
+		[EXPR_COMPOUND_LITERAL]                  = PREC_UNARY,
+		[EXPR_CALL]                              = PREC_POSTFIX,
+		[EXPR_CONDITIONAL]                       = PREC_CONDITIONAL,
+		[EXPR_SELECT]                            = PREC_POSTFIX,
+		[EXPR_ARRAY_ACCESS]                      = PREC_POSTFIX,
+		[EXPR_SIZEOF]                            = PREC_UNARY,
+		[EXPR_CLASSIFY_TYPE]                     = PREC_UNARY,
+		[EXPR_ALIGNOF]                           = PREC_UNARY,
+
+		[EXPR_FUNCNAME]                          = PREC_PRIMARY,
+		[EXPR_BUILTIN_CONSTANT_P]                = PREC_PRIMARY,
+		[EXPR_BUILTIN_TYPES_COMPATIBLE_P]        = PREC_PRIMARY,
+		[EXPR_OFFSETOF]                          = PREC_PRIMARY,
+		[EXPR_VA_START]                          = PREC_PRIMARY,
+		[EXPR_VA_ARG]                            = PREC_PRIMARY,
+		[EXPR_VA_COPY]                           = PREC_PRIMARY,
+		[EXPR_STATEMENT]                         = PREC_PRIMARY,
+		[EXPR_LABEL_ADDRESS]                     = PREC_PRIMARY,
+
+		[EXPR_UNARY_NEGATE]                      = PREC_UNARY,
+		[EXPR_UNARY_PLUS]                        = PREC_UNARY,
+		[EXPR_UNARY_BITWISE_NEGATE]              = PREC_UNARY,
+		[EXPR_UNARY_NOT]                         = PREC_UNARY,
+		[EXPR_UNARY_DEREFERENCE]                 = PREC_UNARY,
+		[EXPR_UNARY_TAKE_ADDRESS]                = PREC_UNARY,
+		[EXPR_UNARY_POSTFIX_INCREMENT]           = PREC_POSTFIX,
+		[EXPR_UNARY_POSTFIX_DECREMENT]           = PREC_POSTFIX,
+		[EXPR_UNARY_PREFIX_INCREMENT]            = PREC_UNARY,
+		[EXPR_UNARY_PREFIX_DECREMENT]            = PREC_UNARY,
+		[EXPR_UNARY_CAST]                        = PREC_UNARY,
+		[EXPR_UNARY_CAST_IMPLICIT]               = PREC_UNARY,
+		[EXPR_UNARY_ASSUME]                      = PREC_PRIMARY,
+		[EXPR_UNARY_DELETE]                      = PREC_UNARY,
+		[EXPR_UNARY_DELETE_ARRAY]                = PREC_UNARY,
+		[EXPR_UNARY_THROW]                       = PREC_ASSIGNMENT,
+
+		[EXPR_BINARY_ADD]                        = PREC_ADDITIVE,
+		[EXPR_BINARY_SUB]                        = PREC_ADDITIVE,
+		[EXPR_BINARY_MUL]                        = PREC_MULTIPLICATIVE,
+		[EXPR_BINARY_DIV]                        = PREC_MULTIPLICATIVE,
+		[EXPR_BINARY_MOD]                        = PREC_MULTIPLICATIVE,
+		[EXPR_BINARY_EQUAL]                      = PREC_EQUALITY,
+		[EXPR_BINARY_NOTEQUAL]                   = PREC_EQUALITY,
+		[EXPR_BINARY_LESS]                       = PREC_RELATIONAL,
+		[EXPR_BINARY_LESSEQUAL]                  = PREC_RELATIONAL,
+		[EXPR_BINARY_GREATER]                    = PREC_RELATIONAL,
+		[EXPR_BINARY_GREATEREQUAL]               = PREC_RELATIONAL,
+		[EXPR_BINARY_BITWISE_AND]                = PREC_AND,
+		[EXPR_BINARY_BITWISE_OR]                 = PREC_OR,
+		[EXPR_BINARY_BITWISE_XOR]                = PREC_XOR,
+		[EXPR_BINARY_LOGICAL_AND]                = PREC_LOGICAL_AND,
+		[EXPR_BINARY_LOGICAL_OR]                 = PREC_LOGICAL_OR,
+		[EXPR_BINARY_SHIFTLEFT]                  = PREC_SHIFT,
+		[EXPR_BINARY_SHIFTRIGHT]                 = PREC_SHIFT,
+		[EXPR_BINARY_ASSIGN]                     = PREC_ASSIGNMENT,
+		[EXPR_BINARY_MUL_ASSIGN]                 = PREC_ASSIGNMENT,
+		[EXPR_BINARY_DIV_ASSIGN]                 = PREC_ASSIGNMENT,
+		[EXPR_BINARY_MOD_ASSIGN]                 = PREC_ASSIGNMENT,
+		[EXPR_BINARY_ADD_ASSIGN]                 = PREC_ASSIGNMENT,
+		[EXPR_BINARY_SUB_ASSIGN]                 = PREC_ASSIGNMENT,
+		[EXPR_BINARY_SHIFTLEFT_ASSIGN]           = PREC_ASSIGNMENT,
+		[EXPR_BINARY_SHIFTRIGHT_ASSIGN]          = PREC_ASSIGNMENT,
+		[EXPR_BINARY_BITWISE_AND_ASSIGN]         = PREC_ASSIGNMENT,
+		[EXPR_BINARY_BITWISE_XOR_ASSIGN]         = PREC_ASSIGNMENT,
+		[EXPR_BINARY_BITWISE_OR_ASSIGN]          = PREC_ASSIGNMENT,
+		[EXPR_BINARY_COMMA]                      = PREC_EXPRESSION,
+
+		[EXPR_BINARY_ISGREATER]                  = PREC_PRIMARY,
+		[EXPR_BINARY_ISGREATEREQUAL]             = PREC_PRIMARY,
+		[EXPR_BINARY_ISLESS]                     = PREC_PRIMARY,
+		[EXPR_BINARY_ISLESSEQUAL]                = PREC_PRIMARY,
+		[EXPR_BINARY_ISLESSGREATER]              = PREC_PRIMARY,
+		[EXPR_BINARY_ISUNORDERED]                = PREC_PRIMARY
 	};
 	assert((size_t)kind < lengthof(prec));
 	unsigned res = prec[kind];
@@ -185,51 +203,6 @@ static unsigned get_expression_precedence(expression_kind_t kind)
 	return res;
 }
 
-/**
- * Print a constant expression.
- *
- * @param cnst  the constant expression
- */
-static void print_const(const const_expression_t *cnst)
-{
-	if (cnst->base.type == NULL)
-		return;
-
-	const type_t *const type = skip_typeref(cnst->base.type);
-
-	if (is_type_atomic(type, ATOMIC_TYPE_BOOL)) {
-		print_string(cnst->v.int_value ? "true" : "false");
-	} else if (is_type_integer(type)) {
-		print_format("%lld", cnst->v.int_value);
-	} else if (is_type_float(type)) {
-		long double const val = cnst->v.float_value;
-#ifdef _WIN32
-		/* ARG, no way to print long double */
-		print_format("%.20g", (double)val);
-#else
-		print_format("%.20Lg", val);
-#endif
-		if (isfinite(val) && truncl(val) == val)
-			print_string(".0");
-	} else {
-		panic("unknown constant");
-	}
-
-	char const* suffix;
-	switch (type->atomic.akind) {
-		case ATOMIC_TYPE_UINT:        suffix = "U";   break;
-		case ATOMIC_TYPE_LONG:        suffix = "L";   break;
-		case ATOMIC_TYPE_ULONG:       suffix = "UL";  break;
-		case ATOMIC_TYPE_LONGLONG:    suffix = "LL";  break;
-		case ATOMIC_TYPE_ULONGLONG:   suffix = "ULL"; break;
-		case ATOMIC_TYPE_FLOAT:       suffix = "F";   break;
-		case ATOMIC_TYPE_LONG_DOUBLE: suffix = "L";   break;
-
-		default: return;
-	}
-	print_string(suffix);
-}
-
 /**
  * Print a quoted string constant.
  *
@@ -237,7 +210,8 @@ static void print_const(const const_expression_t *cnst)
  * @param border  the border char
  * @param skip    number of chars to skip at the end
  */
-static void print_quoted_string(const string_t *const string, char border, int skip)
+static void print_quoted_string(const string_t *const string, char border,
+                                int skip)
 {
 	print_char(border);
 	const char *end = string->begin + string->size - skip;
@@ -247,15 +221,15 @@ static void print_quoted_string(const string_t *const string, char border, int s
 			print_string("\\");
 		}
 		switch (tc) {
-		case '\\':  print_string("\\\\"); break;
-		case '\a':  print_string("\\a"); break;
-		case '\b':  print_string("\\b"); break;
-		case '\f':  print_string("\\f"); break;
-		case '\n':  print_string("\\n"); break;
-		case '\r':  print_string("\\r"); break;
-		case '\t':  print_string("\\t"); break;
-		case '\v':  print_string("\\v"); break;
-		case '\?':  print_string("\\?"); break;
+		case '\\': print_string("\\\\"); break;
+		case '\a': print_string("\\a"); break;
+		case '\b': print_string("\\b"); break;
+		case '\f': print_string("\\f"); break;
+		case '\n': print_string("\\n"); break;
+		case '\r': print_string("\\r"); break;
+		case '\t': print_string("\\t"); break;
+		case '\v': print_string("\\v"); break;
+		case '\?': print_string("\\?"); break;
 		case 27:
 			if (c_mode & _GNUC) {
 				print_string("\\e"); break;
@@ -273,77 +247,42 @@ static void print_quoted_string(const string_t *const string, char border, int s
 	print_char(border);
 }
 
-/**
- * Prints a wide string literal expression.
- *
- * @param wstr    the wide string literal expression
- * @param border  the border char
- * @param skip    number of chars to skip at the end
- */
-static void print_quoted_wide_string(const wide_string_t *const wstr,
-                                     char border, int skip)
+static void print_string_literal(const string_literal_expression_t *literal)
 {
-	print_string("L");
-	print_char(border);
-	const wchar_rep_t *end = wstr->begin + wstr->size - skip;
-	for (const wchar_rep_t *c = wstr->begin; c != end; ++c) {
-		switch (*c) {
-			case L'\"':  print_string("\\\""); break;
-			case L'\\':  print_string("\\\\"); break;
-			case L'\a':  print_string("\\a");  break;
-			case L'\b':  print_string("\\b");  break;
-			case L'\f':  print_string("\\f");  break;
-			case L'\n':  print_string("\\n");  break;
-			case L'\r':  print_string("\\r");  break;
-			case L'\t':  print_string("\\t");  break;
-			case L'\v':  print_string("\\v");  break;
-			case L'\?':  print_string("\\?");  break;
-			case 27:
-				if (c_mode & _GNUC) {
-					print_string("\\e"); break;
-				}
-				/* FALLTHROUGH */
-			default: {
-				const unsigned tc = *c;
-				if (tc < 0x80U) {
-					if (isprint(*c)) {
-						print_char(*c);
-					} else {
-						print_format("\\%03o", tc);
-					}
-				} else {
-					print_char(tc);
-				}
-			}
-		}
+	if (literal->base.kind == EXPR_WIDE_STRING_LITERAL) {
+		print_char('L');
 	}
-	print_char(border);
+	print_quoted_string(&literal->value, '"', 1);
 }
 
-/**
- * Print a constant character expression.
- *
- * @param cnst  the constant character expression
- */
-static void print_character_constant(const const_expression_t *cnst)
-{
-	print_quoted_string(&cnst->v.character, '\'', 0);
-}
-
-static void print_wide_character_constant(const const_expression_t *cnst)
+static void print_literal(const literal_expression_t *literal)
 {
-	print_quoted_wide_string(&cnst->v.wide_character, '\'', 0);
-}
-
-/**
- * Prints a string literal expression.
- *
- * @param string_literal  the string literal expression
- */
-static void print_string_literal(
-		const string_literal_expression_t *string_literal)
-{
-	print_quoted_string(&string_literal->value, '"', 1);
+	switch (literal->base.kind) {
+	case EXPR_LITERAL_MS_NOOP:
+		print_string("__noop");
+		return;
+	case EXPR_LITERAL_INTEGER_HEXADECIMAL:
+	case EXPR_LITERAL_FLOATINGPOINT_HEXADECIMAL:
+		print_string("0x");
+		/* FALLTHROUGH */
+	case EXPR_LITERAL_BOOLEAN:
+	case EXPR_LITERAL_INTEGER:
+	case EXPR_LITERAL_INTEGER_OCTAL:
+	case EXPR_LITERAL_FLOATINGPOINT:
+		print_stringrep(&literal->value);
+		if (literal->suffix != NULL)
+			print_symbol(literal->suffix);
+		return;
+	case EXPR_LITERAL_WIDE_CHARACTER:
+		print_char('L');
+		/* FALLTHROUGH */
+	case EXPR_LITERAL_CHARACTER:
+		print_quoted_string(&literal->value, '\'', 0);
+		return;
+	default:
+		break;
+	}
+	print_string("INVALID LITERAL KIND");
 }
 
 /**
@@ -361,12 +300,6 @@ static void print_funcname(const funcname_expression_t *funcname)
 	print_string(s);
 }
 
-static void print_wide_string_literal(
-	const wide_string_literal_expression_t *const wstr)
-{
-	print_quoted_wide_string(&wstr->value, '"', 1);
-}
-
 static void print_compound_literal(
 		const compound_literal_expression_t *expression)
 {
@@ -758,24 +691,16 @@ static void print_expression_prec(const expression_t *expression, unsigned top_p
 	case EXPR_INVALID:
 		print_string("$invalid expression$");
 		break;
-	case EXPR_CHARACTER_CONSTANT:
-		print_character_constant(&expression->conste);
-		break;
-	case EXPR_WIDE_CHARACTER_CONSTANT:
-		print_wide_character_constant(&expression->conste);
+	case EXPR_WIDE_STRING_LITERAL:
+	case EXPR_STRING_LITERAL:
+		print_string_literal(&expression->string_literal);
 		break;
-	case EXPR_CONST:
-		print_const(&expression->conste);
+	EXPR_LITERAL_CASES
+		print_literal(&expression->literal);
 		break;
 	case EXPR_FUNCNAME:
 		print_funcname(&expression->funcname);
 		break;
-	case EXPR_STRING_LITERAL:
-		print_string_literal(&expression->string);
-		break;
-	case EXPR_WIDE_STRING_LITERAL:
-		print_wide_string_literal(&expression->wide_string);
-		break;
 	case EXPR_COMPOUND_LITERAL:
 		print_compound_literal(&expression->compound_literal);
 		break;
@@ -833,10 +758,12 @@ static void print_expression_prec(const expression_t *expression, unsigned top_p
 		print_statement_expression(&expression->statement);
 		break;
 
+#if 0
 	default:
 		/* TODO */
 		print_format("some expression of type %d", (int)expression->kind);
 		break;
+#endif
 	}
 	if (parenthesized)
 		print_string(")");
@@ -1328,7 +1255,7 @@ void print_initializer(const initializer_t *initializer)
 		print_quoted_string(&initializer->string.string, '"', 1);
 		return;
 	case INITIALIZER_WIDE_STRING:
-		print_quoted_wide_string(&initializer->wide_string.string, '"', 1);
+		print_quoted_string(&initializer->string.string, '"', 1);
 		return;
 	case INITIALIZER_DESIGNATOR:
 		print_designator(initializer->designator.designator);
@@ -1851,10 +1778,7 @@ static bool is_object_with_constant_address(const expression_t *expression)
 bool is_constant_expression(const expression_t *expression)
 {
 	switch (expression->kind) {
-
-	case EXPR_CONST:
-	case EXPR_CHARACTER_CONSTANT:
-	case EXPR_WIDE_CHARACTER_CONSTANT:
+	EXPR_LITERAL_CASES
 	case EXPR_CLASSIFY_TYPE:
 	case EXPR_OFFSETOF:
 	case EXPR_ALIGNOF:
diff --git a/ast.h b/ast.h
index 9d8c9b7..0f70f3b 100644
--- a/ast.h
+++ b/ast.h
@@ -25,10 +25,9 @@
 #include "entity.h"
 
 typedef struct expression_base_t                     expression_base_t;
-typedef struct const_expression_t                    const_expression_t;
+typedef struct literal_expression_t                  literal_expression_t;
 typedef struct string_literal_expression_t           string_literal_expression_t;
 typedef struct funcname_expression_t                 funcname_expression_t;
-typedef struct wide_string_literal_expression_t      wide_string_literal_expression_t;
 typedef struct compound_literal_expression_t         compound_literal_expression_t;
 typedef struct reference_expression_t                reference_expression_t;
 typedef struct cast_expression_t                     cast_expression_t;
@@ -125,4 +124,11 @@ bool is_address_constant(const expression_t *expression);
 long fold_constant_to_int(const expression_t *expression);
 bool fold_constant_to_bool(const expression_t *expression);
 
+/**
+ * the type of a literal is usually the biggest type that can hold the value.
+ * Since this is backend dependent the parses needs this call exposed.
+ * Works for EXPR_LITERAL_* expressions.
+ */
+void determine_literal_type(literal_expression_t *literal);
+
 #endif
diff --git a/ast2firm.c b/ast2firm.c
index 5e7f363..47430b5 100644
--- a/ast2firm.c
+++ b/ast2firm.c
@@ -1043,6 +1043,22 @@ entity_created:
 	return irentity;
 }
 
+/**
+ * Creates a SymConst for a given entity.
+ *
+ * @param dbgi    debug info
+ * @param mode    the (reference) mode for the SymConst
+ * @param entity  the entity
+ */
+static ir_node *create_symconst(dbg_info *dbgi, ir_mode *mode,
+                                ir_entity *entity)
+{
+	assert(entity != NULL);
+	union symconst_symbol sym;
+	sym.entity_p = entity;
+	return new_d_SymConst(dbgi, mode, sym, symconst_addr_ent);
+}
+
 static ir_node *create_conv(dbg_info *dbgi, ir_node *value, ir_mode *dest_mode)
 {
 	ir_mode *value_mode = get_irn_mode(value);
@@ -1061,74 +1077,251 @@ static ir_node *create_conv(dbg_info *dbgi, ir_node *value, ir_mode *dest_mode)
 }
 
 /**
- * Creates a Const node representing a constant.
+ * Creates a SymConst node representing a wide string literal.
+ *
+ * @param literal   the wide string literal
  */
-static ir_node *const_to_firm(const const_expression_t *cnst)
+static ir_node *wide_string_literal_to_firm(
+		const string_literal_expression_t *literal)
 {
-	dbg_info *dbgi = get_dbg_info(&cnst->base.source_position);
-	type_t   *type = skip_typeref(cnst->base.type);
-	ir_mode  *mode = get_ir_mode_storage(type);
+	ir_type  *const global_type = get_glob_type();
+	ir_type  *const elem_type   = ir_type_wchar_t;
+	dbg_info *const dbgi        = get_dbg_info(&literal->base.source_position);
+	ir_type  *const type        = new_type_array(1, elem_type);
 
-	char    buf[128];
-	tarval *tv;
-	size_t  len;
-	if (mode_is_float(mode)) {
-		tv = new_tarval_from_double(cnst->v.float_value, mode);
-	} else {
-		if (mode_is_signed(mode)) {
-			len = snprintf(buf, sizeof(buf), "%lld", cnst->v.int_value);
-		} else {
-			len = snprintf(buf, sizeof(buf), "%llu",
-			               (unsigned long long) cnst->v.int_value);
-		}
-		tv = new_tarval_from_str(buf, len, mode);
+	ident     *const id     = id_unique("str.%u");
+	ir_entity *const entity = new_d_entity(global_type, id, type, dbgi);
+	set_entity_ld_ident(entity, id);
+	set_entity_visibility(entity, ir_visibility_private);
+	add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
+
+	ir_mode      *const mode = get_type_mode(elem_type);
+	const size_t        slen = wstrlen(&literal->value);
+
+	set_array_lower_bound_int(type, 0, 0);
+	set_array_upper_bound_int(type, 0, slen);
+	set_type_size_bytes(type, slen * get_mode_size_bytes(mode));
+	set_type_state(type, layout_fixed);
+
+	ir_initializer_t *initializer = create_initializer_compound(slen);
+	const char              *p    = literal->value.begin;
+	for (size_t i = 0; i < slen; ++i) {
+		assert(p < literal->value.begin + literal->value.size);
+		utf32              v  = read_utf8_char(&p);
+		tarval           *tv  = new_tarval_from_long(v, mode);
+		ir_initializer_t *val = create_initializer_tarval(tv);
+		set_initializer_compound_value(initializer, i, val);
 	}
+	set_entity_initializer(entity, initializer);
 
-	ir_node *res        = new_d_Const(dbgi, tv);
-	ir_mode *mode_arith = get_ir_mode_arithmetic(type);
-	return create_conv(dbgi, res, mode_arith);
+	return create_symconst(dbgi, mode_P_data, entity);
 }
 
 /**
- * Creates a Const node representing a character constant.
+ * Creates a SymConst node representing a string constant.
+ *
+ * @param src_pos    the source position of the string constant
+ * @param id_prefix  a prefix for the name of the generated string constant
+ * @param value      the value of the string constant
  */
-static ir_node *character_constant_to_firm(const const_expression_t *cnst)
+static ir_node *string_to_firm(const source_position_t *const src_pos,
+                               const char *const id_prefix,
+                               const string_t *const value)
 {
-	dbg_info *dbgi = get_dbg_info(&cnst->base.source_position);
-	ir_mode  *mode = get_ir_mode_arithmetic(cnst->base.type);
+	ir_type  *const global_type = get_glob_type();
+	dbg_info *const dbgi        = get_dbg_info(src_pos);
+	ir_type  *const type        = new_type_array(1, ir_type_const_char);
 
-	long long int v;
-	size_t const  size = cnst->v.character.size;
-	if (size == 1 && char_is_signed) {
-		v = (signed char)cnst->v.character.begin[0];
-	} else {
-		v = 0;
-		for (size_t i = 0; i < size; ++i) {
-			v = (v << 8) | ((unsigned char)cnst->v.character.begin[i]);
+	ident     *const id     = id_unique(id_prefix);
+	ir_entity *const entity = new_d_entity(global_type, id, type, dbgi);
+	set_entity_ld_ident(entity, id);
+	set_entity_visibility(entity, ir_visibility_private);
+	add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
+
+	ir_type *const elem_type = ir_type_const_char;
+	ir_mode *const mode      = get_type_mode(elem_type);
+
+	const char* const string = value->begin;
+	const size_t      slen   = value->size;
+
+	set_array_lower_bound_int(type, 0, 0);
+	set_array_upper_bound_int(type, 0, slen);
+	set_type_size_bytes(type, slen);
+	set_type_state(type, layout_fixed);
+
+	ir_initializer_t *initializer = create_initializer_compound(slen);
+	for (size_t i = 0; i < slen; ++i) {
+		tarval           *tv  = new_tarval_from_long(string[i], mode);
+		ir_initializer_t *val = create_initializer_tarval(tv);
+		set_initializer_compound_value(initializer, i, val);
+	}
+	set_entity_initializer(entity, initializer);
+
+	return create_symconst(dbgi, mode_P_data, entity);
+}
+
+static bool try_create_integer(literal_expression_t *literal,
+                               type_t *type, unsigned char base)
+{
+	const char *string = literal->value.begin;
+	size_t      size   = literal->value.size;
+
+	assert(type->kind == TYPE_ATOMIC);
+	atomic_type_kind_t akind = type->atomic.akind;
+
+	ir_mode *mode = atomic_modes[akind];
+	tarval  *tv   = new_integer_tarval_from_str(string, size, 1, base, mode);
+	if (tv == tarval_bad)
+		return false;
+
+	literal->base.type    = type;
+	literal->target_value = tv;
+	return true;
+}
+
+static void create_integer_tarval(literal_expression_t *literal)
+{
+	unsigned  us     = 0;
+	unsigned  ls     = 0;
+	symbol_t *suffix = literal->suffix;
+	/* parse suffix */
+	if (suffix != NULL) {
+		for (const char *c = suffix->string; *c != '\0'; ++c) {
+			if (*c == 'u' || *c == 'U') { ++us; }
+			if (*c == 'l' || *c == 'L') { ++ls; }
 		}
 	}
-	char    buf[128];
-	size_t  len = snprintf(buf, sizeof(buf), "%lld", v);
-	tarval *tv = new_tarval_from_str(buf, len, mode);
 
-	return new_d_Const(dbgi, tv);
+	unsigned char base = 10;
+	if (literal->base.kind == EXPR_LITERAL_INTEGER_OCTAL) {
+		base = 8;
+	} else if (literal->base.kind == EXPR_LITERAL_INTEGER_HEXADECIMAL) {
+		base = 16;
+	} else {
+		assert(literal->base.kind == EXPR_LITERAL_INTEGER);
+	}
+
+	tarval_int_overflow_mode_t old_mode = tarval_get_integer_overflow_mode();
+
+	/* now try if the constant is small enough for some types */
+	tarval_set_integer_overflow_mode(TV_OVERFLOW_BAD);
+	if (ls < 1) {
+		if (us == 0 && try_create_integer(literal, type_int, base))
+			goto finished;
+		if ((us == 1 || base != 10)
+				&& try_create_integer(literal, type_unsigned_int, base))
+			goto finished;
+	}
+	if (ls < 2) {
+		if (us == 0 && try_create_integer(literal, type_long, base))
+			goto finished;
+		if ((us == 1 || base != 10)
+				&& try_create_integer(literal, type_unsigned_long, base))
+			goto finished;
+	}
+	/* last try? then we should not report tarval_bad */
+	if (us != 1 && base == 10)
+		tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
+	if (us == 0 && try_create_integer(literal, type_long_long, base))
+		goto finished;
+
+	/* last try */
+	assert(us == 1 || base != 10);
+	tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
+	bool res = try_create_integer(literal, type_unsigned_long_long, base);
+	if (res == false)
+		panic("internal error when parsing number literal");
+
+finished:
+	tarval_set_integer_overflow_mode(old_mode);
+}
+
+void determine_literal_type(literal_expression_t *literal)
+{
+	switch (literal->base.kind) {
+	case EXPR_LITERAL_INTEGER:
+	case EXPR_LITERAL_INTEGER_OCTAL:
+	case EXPR_LITERAL_INTEGER_HEXADECIMAL:
+		create_integer_tarval(literal);
+		return;
+	default:
+		break;
+	}
 }
 
 /**
- * Creates a Const node representing a wide character constant.
+ * Creates a Const node representing a constant.
  */
-static ir_node *wide_character_constant_to_firm(const const_expression_t *cnst)
+static ir_node *literal_to_firm(const literal_expression_t *literal)
 {
-	dbg_info *dbgi = get_dbg_info(&cnst->base.source_position);
-	ir_mode  *mode = get_ir_mode_arithmetic(cnst->base.type);
+	type_t     *type   = skip_typeref(literal->base.type);
+	ir_mode    *mode   = get_ir_mode_storage(type);
+	const char *string = literal->value.begin;
+	size_t      size   = literal->value.size;
+	tarval     *tv;
 
-	long long int v = cnst->v.wide_character.begin[0];
+	switch (literal->base.kind) {
+	case EXPR_LITERAL_WIDE_CHARACTER: {
+		utf32  v = read_utf8_char(&string);
+		char   buf[128];
+		size_t len = snprintf(buf, sizeof(buf), UTF32_PRINTF_FORMAT, v);
 
-	char    buf[128];
-	size_t  len = snprintf(buf, sizeof(buf), "%lld", v);
-	tarval *tv = new_tarval_from_str(buf, len, mode);
+		tv = new_tarval_from_str(buf, len, mode);
+		goto make_const;
+	}
+	case EXPR_LITERAL_CHARACTER: {
+		long long int v;
+		if (size == 1 && char_is_signed) {
+			v = (signed char)string[0];
+		} else {
+			v = 0;
+			for (size_t i = 0; i < size; ++i) {
+				v = (v << 8) | ((unsigned char)string[i]);
+			}
+		}
+		char   buf[128];
+		size_t len = snprintf(buf, sizeof(buf), "%lld", v);
 
-	return new_d_Const(dbgi, tv);
+		tv = new_tarval_from_str(buf, len, mode);
+		goto make_const;
+	}
+	case EXPR_LITERAL_INTEGER:
+	case EXPR_LITERAL_INTEGER_OCTAL:
+	case EXPR_LITERAL_INTEGER_HEXADECIMAL:
+		assert(literal->target_value != NULL);
+		tv = literal->target_value;
+		goto make_const;
+	case EXPR_LITERAL_FLOATINGPOINT:
+		tv = new_tarval_from_str(string, size, mode);
+		goto make_const;
+	case EXPR_LITERAL_FLOATINGPOINT_HEXADECIMAL: {
+		char *buffer = alloca(size + 2);
+		memcpy(buffer, "0x", 2);
+		memcpy(buffer+2, string, size);
+		tv = new_tarval_from_str(buffer, size+2, mode);
+		goto make_const;
+	}
+	case EXPR_LITERAL_BOOLEAN:
+		if (string[0] == 't') {
+			tv = get_mode_one(mode);
+		} else {
+			assert(string[0] == 'f');
+			tv = get_mode_null(mode);
+		}
+		goto make_const;
+	case EXPR_LITERAL_MS_NOOP:
+		tv = get_mode_null(mode);
+		goto make_const;
+	default:
+		break;
+	}
+	panic("Invalid literal kind found");
+
+make_const: ;
+	dbg_info *dbgi       = get_dbg_info(&literal->base.source_position);
+	ir_node  *res        = new_d_Const(dbgi, tv);
+	ir_mode  *mode_arith = get_ir_mode_arithmetic(type);
+	return create_conv(dbgi, res, mode_arith);
 }
 
 /*
@@ -1192,23 +1385,6 @@ static ir_node *get_trampoline_region(dbg_info *dbgi, ir_entity *entity)
 	                       region);
 }
 
-
-/**
- * Creates a SymConst for a given entity.
- *
- * @param dbgi    debug info
- * @param mode    the (reference) mode for the SymConst
- * @param entity  the entity
- */
-static ir_node *create_symconst(dbg_info *dbgi, ir_mode *mode,
-                                ir_entity *entity)
-{
-	assert(entity != NULL);
-	union symconst_symbol sym;
-	sym.entity_p = entity;
-	return new_d_SymConst(dbgi, mode, sym, symconst_addr_ent);
-}
-
 /**
  * Creates a trampoline for a function represented by an entity.
  *
@@ -1230,101 +1406,6 @@ static ir_node *create_trampoline(dbg_info *dbgi, ir_mode *mode,
 	return new_Proj(irn, mode, pn_Builtin_1_result);
 }
 
-/**
- * Creates a SymConst node representing a string constant.
- *
- * @param src_pos    the source position of the string constant
- * @param id_prefix  a prefix for the name of the generated string constant
- * @param value      the value of the string constant
- */
-static ir_node *string_to_firm(const source_position_t *const src_pos,
-                               const char *const id_prefix,
-                               const string_t *const value)
-{
-	ir_type  *const global_type = get_glob_type();
-	dbg_info *const dbgi        = get_dbg_info(src_pos);
-	ir_type  *const type        = new_type_array(1, ir_type_const_char);
-
-	ident     *const id     = id_unique(id_prefix);
-	ir_entity *const entity = new_d_entity(global_type, id, type, dbgi);
-	set_entity_ld_ident(entity, id);
-	set_entity_visibility(entity, ir_visibility_private);
-	add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
-
-	ir_type *const elem_type = ir_type_const_char;
-	ir_mode *const mode      = get_type_mode(elem_type);
-
-	const char* const string = value->begin;
-	const size_t      slen   = value->size;
-
-	set_array_lower_bound_int(type, 0, 0);
-	set_array_upper_bound_int(type, 0, slen);
-	set_type_size_bytes(type, slen);
-	set_type_state(type, layout_fixed);
-
-	ir_initializer_t *initializer = create_initializer_compound(slen);
-	for (size_t i = 0; i < slen; ++i) {
-		tarval           *tv  = new_tarval_from_long(string[i], mode);
-		ir_initializer_t *val = create_initializer_tarval(tv);
-		set_initializer_compound_value(initializer, i, val);
-	}
-	set_entity_initializer(entity, initializer);
-
-	return create_symconst(dbgi, mode_P_data, entity);
-}
-
-/**
- * Creates a SymConst node representing a string literal.
- *
- * @param literal   the string literal
- */
-static ir_node *string_literal_to_firm(
-		const string_literal_expression_t* literal)
-{
-	return string_to_firm(&literal->base.source_position, "str.%u",
-	                      &literal->value);
-}
-
-/**
- * Creates a SymConst node representing a wide string literal.
- *
- * @param literal   the wide string literal
- */
-static ir_node *wide_string_literal_to_firm(
-	const wide_string_literal_expression_t* const literal)
-{
-	ir_type *const global_type = get_glob_type();
-	ir_type *const elem_type   = ir_type_wchar_t;
-	dbg_info *const dbgi       = get_dbg_info(&literal->base.source_position);
-	ir_type *const type        = new_type_array(1, elem_type);
-
-	ident     *const id     = id_unique("str.%u");
-	ir_entity *const entity = new_d_entity(global_type, id, type, dbgi);
-	set_entity_ld_ident(entity, id);
-	set_entity_visibility(entity, ir_visibility_private);
-	add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
-
-	ir_mode *const mode      = get_type_mode(elem_type);
-
-	const wchar_rep_t *const string = literal->value.begin;
-	const size_t             slen   = literal->value.size;
-
-	set_array_lower_bound_int(type, 0, 0);
-	set_array_upper_bound_int(type, 0, slen);
-	set_type_size_bytes(type, slen * get_mode_size_bytes(mode));
-	set_type_state(type, layout_fixed);
-
-	ir_initializer_t *initializer = create_initializer_compound(slen);
-	for (size_t i = 0; i < slen; ++i) {
-		tarval           *tv  = new_tarval_from_long(string[i], mode);
-		ir_initializer_t *val = create_initializer_tarval(tv);
-		set_initializer_compound_value(initializer, i, val);
-	}
-	set_entity_initializer(entity, initializer);
-
-	return create_symconst(dbgi, mode_P_data, entity);
-}
-
 /**
  * Dereference an address.
  *
@@ -3449,16 +3530,13 @@ static ir_node *_expression_to_firm(const expression_t *expression)
 #endif
 
 	switch (expression->kind) {
-	case EXPR_CHARACTER_CONSTANT:
-		return character_constant_to_firm(&expression->conste);
-	case EXPR_WIDE_CHARACTER_CONSTANT:
-		return wide_character_constant_to_firm(&expression->conste);
-	case EXPR_CONST:
-		return const_to_firm(&expression->conste);
+	EXPR_LITERAL_CASES
+		return literal_to_firm(&expression->literal);
 	case EXPR_STRING_LITERAL:
-		return string_literal_to_firm(&expression->string);
+		return string_to_firm(&expression->base.source_position, "str.%u",
+		                      &expression->literal.value);
 	case EXPR_WIDE_STRING_LITERAL:
-		return wide_string_literal_to_firm(&expression->wide_string);
+		return wide_string_literal_to_firm(&expression->string_literal);
 	case EXPR_REFERENCE:
 		return reference_expression_to_firm(&expression->reference);
 	case EXPR_REFERENCE_ENUM_VALUE:
@@ -4084,19 +4162,19 @@ static ir_initializer_t *create_ir_initializer_string(
 static ir_initializer_t *create_ir_initializer_wide_string(
 		const initializer_wide_string_t *initializer, type_t *type)
 {
-	size_t            string_len    = initializer->string.size;
 	assert(type->kind == TYPE_ARRAY);
 	assert(type->array.size_constant);
 	size_t            len           = type->array.size;
+	size_t            string_len    = wstrlen(&initializer->string);
 	ir_initializer_t *irinitializer = create_initializer_compound(len);
 
-	const wchar_rep_t *string = initializer->string.begin;
-	ir_mode           *mode   = get_type_mode(ir_type_wchar_t);
+	const char *p    = initializer->string.begin;
+	ir_mode    *mode = get_type_mode(ir_type_wchar_t);
 
 	for (size_t i = 0; i < len; ++i) {
-		wchar_rep_t c = 0;
+		utf32 c = 0;
 		if (i < string_len) {
-			c = string[i];
+			c = read_utf8_char(&p);
 		}
 		tarval *tv = new_tarval_from_long(c, mode);
 		ir_initializer_t *char_initializer = create_initializer_tarval(tv);
@@ -5981,6 +6059,9 @@ static void global_asm_to_firm(statement_t *s)
 
 void translation_unit_to_firm(translation_unit_t *unit)
 {
+	/* initialize firm arithmetic */
+	tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
+
 	/* just to be sure */
 	continue_label           = NULL;
 	break_label              = NULL;
diff --git a/ast_t.h b/ast_t.h
index e1611fb..2e389c0 100644
--- a/ast_t.h
+++ b/ast_t.h
@@ -29,6 +29,7 @@
 #include "type.h"
 #include "entity_t.h"
 #include "adt/obst.h"
+#include "target_value.h"
 
 /** The AST obstack contains all data that must stay in the AST. */
 extern struct obstack ast_obstack;
@@ -66,9 +67,15 @@ typedef enum expression_kind_t {
 	EXPR_INVALID,
 	EXPR_REFERENCE,
 	EXPR_REFERENCE_ENUM_VALUE,
-	EXPR_CONST,
-	EXPR_CHARACTER_CONSTANT,
-	EXPR_WIDE_CHARACTER_CONSTANT,
+	EXPR_LITERAL_BOOLEAN,
+	EXPR_LITERAL_INTEGER,
+	EXPR_LITERAL_INTEGER_OCTAL,
+	EXPR_LITERAL_INTEGER_HEXADECIMAL,
+	EXPR_LITERAL_FLOATINGPOINT,
+	EXPR_LITERAL_FLOATINGPOINT_HEXADECIMAL,
+	EXPR_LITERAL_CHARACTER,
+	EXPR_LITERAL_WIDE_CHARACTER,
+	EXPR_LITERAL_MS_NOOP, /**< MS __noop extension */
 	EXPR_STRING_LITERAL,
 	EXPR_WIDE_STRING_LITERAL,
 	EXPR_COMPOUND_LITERAL,
@@ -226,6 +233,17 @@ typedef enum funcname_kind_t {
 	EXPR_UNARY_CASES_MANDATORY \
 	EXPR_UNARY_CASES_OPTIONAL
 
+#define EXPR_LITERAL_CASES                        \
+	case EXPR_LITERAL_BOOLEAN:                    \
+	case EXPR_LITERAL_INTEGER:                    \
+	case EXPR_LITERAL_INTEGER_OCTAL:              \
+	case EXPR_LITERAL_INTEGER_HEXADECIMAL:        \
+	case EXPR_LITERAL_FLOATINGPOINT:              \
+	case EXPR_LITERAL_FLOATINGPOINT_HEXADECIMAL:  \
+	case EXPR_LITERAL_CHARACTER:                  \
+	case EXPR_LITERAL_WIDE_CHARACTER:             \
+	case EXPR_LITERAL_MS_NOOP:
+
 /**
  * The base class of every expression.
  */
@@ -240,18 +258,15 @@ struct expression_base_t {
 };
 
 /**
- * A constant.
+ * integer/float constants, character and string literals
  */
-struct const_expression_t {
+struct literal_expression_t {
 	expression_base_t  base;
-	union {
-		long long      int_value;
-		long double    float_value;
-		string_t       character;
-		wide_string_t  wide_character;
-	} v;
-	bool               is_ms_noop;  /**< True, if this constant is the result
-	                                     of an microsoft __noop operator */
+	string_t           value;
+	symbol_t          *suffix;
+
+	/* ast2firm data */
+	tarval            *target_value;
 };
 
 struct string_literal_expression_t {
@@ -265,11 +280,6 @@ struct funcname_expression_t {
 	string_t           value;     /**< the value once assigned. */
 };
 
-struct wide_string_literal_expression_t {
-	expression_base_t  base;
-	wide_string_t      value;
-};
-
 struct compound_literal_expression_t {
 	expression_base_t  base;
 	type_t            *type;
@@ -395,10 +405,9 @@ struct label_address_expression_t {
 union expression_t {
 	expression_kind_t                     kind;
 	expression_base_t                     base;
-	const_expression_t                    conste;
+	literal_expression_t                  literal;
+	string_literal_expression_t           string_literal;
 	funcname_expression_t                 funcname;
-	string_literal_expression_t           string;
-	wide_string_literal_expression_t      wide_string;
 	compound_literal_expression_t         compound_literal;
 	builtin_constant_expression_t         builtin_constant;
 	builtin_types_compatible_expression_t builtin_types_compatible;
@@ -449,7 +458,7 @@ struct initializer_string_t {
 
 struct initializer_wide_string_t {
 	initializer_base_t  base;
-	wide_string_t       string;
+	string_t            string;
 };
 
 struct initializer_designator_t {
diff --git a/attribute.c b/attribute.c
index eca3f61..e709e61 100644
--- a/attribute.c
+++ b/attribute.c
@@ -420,7 +420,7 @@ const char *get_deprecated_string(const attribute_t *attribute)
 		expression_t *expression = argument->v.expression;
 		if (expression->kind != EXPR_STRING_LITERAL)
 			return NULL;
-		return expression->string.value.begin;
+		return expression->literal.value.begin;
 	}
 	return NULL;
 }
diff --git a/diagnostic.c b/diagnostic.c
index ad81055..0b92ae5 100644
--- a/diagnostic.c
+++ b/diagnostic.c
@@ -67,12 +67,6 @@ static void diagnosticvf(const char *const fmt, va_list ap)
 					fputc(*f, stderr);
 					break;
 
-				case 'C': {
-					const wint_t val = va_arg(ap, wint_t);
-					fprintf(stderr, "%lc", val);
-					break;
-				}
-
 				case 'c': {
 					const unsigned char val = (unsigned char) va_arg(ap, int);
 					fputc(val, stderr);
@@ -91,6 +85,14 @@ static void diagnosticvf(const char *const fmt, va_list ap)
 					break;
 				}
 
+				case 'S': {
+					const string_t *str = va_arg(ap, const string_t*);
+					for (size_t i = 0; i < str->size; ++i) {
+						fputc(str->begin[i], stderr);
+					}
+					break;
+				}
+
 				case 'u': {
 					const unsigned int val = va_arg(ap, unsigned int);
 					fprintf(stderr, "%u", val);
diff --git a/diagnostic.h b/diagnostic.h
index 4c12e4c..113481c 100644
--- a/diagnostic.h
+++ b/diagnostic.h
@@ -51,7 +51,7 @@
  *  %K  const token_t*
  *  %k  token_kind_t
  *  %P  const source_position_t *
- *
+ *  %S  const string_t *
  */
 void diagnosticf(const char *fmt, ...);
 void errorf(const source_position_t *pos, const char *fmt, ...);
diff --git a/format_check.c b/format_check.c
index 7c8304e..7609018 100644
--- a/format_check.c
+++ b/format_check.c
@@ -17,8 +17,9 @@
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
  * 02111-1307, USA.
  */
+#include <config.h>
+
 #include <ctype.h>
-#include <wctype.h>
 
 #include "adt/util.h"
 #include "format_check.h"
@@ -93,7 +94,7 @@ static const char* get_length_modifier_name(const format_length_modifier_t mod)
 
 static void warn_invalid_length_modifier(const source_position_t *pos,
                                          const format_length_modifier_t mod,
-                                         const wchar_rep_t conversion)
+                                         const utf32 conversion)
 {
 	warningf(pos,
 		"invalid length modifier '%s' for conversion specifier '%%%c'",
@@ -101,113 +102,51 @@ static void warn_invalid_length_modifier(const source_position_t *pos,
 	);
 }
 
-typedef struct vchar_t vchar_t;
-struct vchar_t {
-	const void *string;   /**< the string */
-	size_t     position;  /**< current position */
-	size_t     size;      /**< size of the string */
-
-	/** return the first character of the string and setthe position to 0. */
-	unsigned (*first)(vchar_t *self);
-	/** return the next character of the string */
-	unsigned (*next)(vchar_t *self);
-	/** return non_zero if the given character is a digit */
-	int (*is_digit)(unsigned vchar);
-};
-
-static unsigned string_first(vchar_t *self)
-{
-	self->position = 0;
-	const string_t *string = self->string;
-	return string->begin[0];
-}
-
-static unsigned string_next(vchar_t *self)
-{
-	++self->position;
-	const string_t *string = self->string;
-	return string->begin[self->position];
-}
-
-static int string_isdigit(unsigned vchar)
-{
-	return isdigit(vchar);
-}
-
-static unsigned wstring_first(vchar_t *self)
-{
-	self->position = 0;
-	const wide_string_t *wstring = self->string;
-	return wstring->begin[0];
-}
-
-static unsigned wstring_next(vchar_t *self)
-{
-	++self->position;
-	const wide_string_t *wstring = self->string;
-	return wstring->begin[self->position];
-}
-
-static int wstring_isdigit(unsigned vchar)
-{
-	return iswdigit(vchar);
-}
-
-static bool atend(vchar_t *self)
-{
-	return self->position + 1 == self->size;
-}
-
 /**
  * Check printf-style format.
  */
 static int internal_check_printf_format(const expression_t *fmt_expr,
-    const call_argument_t *arg, const format_spec_t *spec)
+                                        const call_argument_t *arg,
+                                        const format_spec_t *spec)
 {
-	if (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) {
+	while (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) {
 		fmt_expr = fmt_expr->unary.value;
 	}
 
-	vchar_t vchar;
-	switch (fmt_expr->kind) {
-		case EXPR_STRING_LITERAL:
-			vchar.string   = &fmt_expr->string.value;
-			vchar.size     = fmt_expr->string.value.size;
-			vchar.first    = string_first;
-			vchar.next     = string_next;
-			vchar.is_digit = string_isdigit;
-			break;
-
-		case EXPR_WIDE_STRING_LITERAL:
-			vchar.string   = &fmt_expr->wide_string.value;
-			vchar.size     = fmt_expr->wide_string.value.size;
-			vchar.first    = wstring_first;
-			vchar.next     = wstring_next;
-			vchar.is_digit = wstring_isdigit;
-			break;
+	/*
+	 * gettext results in expressions like (X ? "format_string" : Y)
+	 * we assume the left part is the format string
+	 */
+	if (fmt_expr->kind == EXPR_CONDITIONAL) {
+		conditional_expression_t const *const c = &fmt_expr->conditional;
+		expression_t             const *      t = c->true_expression;
+		if (t == NULL)
+			t = c->condition;
+		int const nt = internal_check_printf_format(t,                   arg, spec);
+		int const nf = internal_check_printf_format(c->false_expression, arg, spec);
+		return nt > nf ? nt : nf;
+	}
 
-		case EXPR_CONDITIONAL: {
-			conditional_expression_t const *const c = &fmt_expr->conditional;
-			expression_t             const *      t = c->true_expression;
-			if (t == NULL)
-				t = c->condition;
-			int const nt = internal_check_printf_format(t,                   arg, spec);
-			int const nf = internal_check_printf_format(c->false_expression, arg, spec);
-			return nt > nf ? nt : nf;
-		}
+	if (fmt_expr->kind != EXPR_STRING_LITERAL
+			&& fmt_expr->kind != EXPR_WIDE_STRING_LITERAL)
+		return -1;
 
-		default:
-			return -1;
-	}
+	const char *string = fmt_expr->literal.value.begin;
+	size_t      size   = fmt_expr->literal.value.size;
+	const char *c      = string;
 
 	const source_position_t *pos = &fmt_expr->base.source_position;
-	unsigned fmt     = vchar.first(&vchar);
 	unsigned num_fmt = 0;
-	for (; fmt != '\0'; fmt = vchar.next(&vchar)) {
+	char     fmt;
+	for (fmt = *c; fmt != '\0'; fmt = *(++c)) {
 		if (fmt != '%')
 			continue;
-		fmt = vchar.next(&vchar);
+		fmt = *(++c);
 
+		if (fmt == '\0') {
+			warningf(pos, "dangling %% in format string");
+			break;
+		}
 		if (fmt == '%')
 			continue;
 
@@ -215,15 +154,15 @@ static int internal_check_printf_format(const expression_t *fmt_expr,
 
 		format_flags_t fmt_flags = FMT_FLAG_NONE;
 		if (fmt == '0') {
-			fmt = vchar.next(&vchar);
+			fmt = *(++c);
 			fmt_flags |= FMT_FLAG_ZERO;
 		}
 
 		/* argument selector or minimum field width */
-		if (vchar.is_digit(fmt)) {
+		if (isdigit(fmt)) {
 			do {
-				fmt = vchar.next(&vchar);
-			} while (vchar.is_digit(fmt));
+				fmt = *(++c);
+			} while (isdigit(fmt));
 
 			/* digit string was ... */
 			if (fmt == '$') {
@@ -263,13 +202,13 @@ static int internal_check_printf_format(const expression_t *fmt_expr,
 					warningf(pos, "repeated flag '%c' in conversion specification %u", (char)fmt, num_fmt);
 				}
 				fmt_flags |= flag;
-				fmt = vchar.next(&vchar);
+				fmt = *(++c);
 			}
 break_fmt_flags:
 
 			/* minimum field width */
 			if (fmt == '*') {
-				fmt = vchar.next(&vchar);
+				fmt = *(++c);
 				if (arg == NULL) {
 					warningf(pos, "missing argument for '*' field width in conversion specification %u", num_fmt);
 					return -1;
@@ -280,17 +219,17 @@ break_fmt_flags:
 				}
 				arg = arg->next;
 			} else {
-				while (vchar.is_digit(fmt)) {
-					fmt = vchar.next(&vchar);
+				while (isdigit(fmt)) {
+					fmt = *(++c);
 				}
 			}
 		}
 
 		/* precision */
 		if (fmt == '.') {
-			fmt = vchar.next(&vchar);
+			fmt = *(++c);
 			if (fmt == '*') {
-				fmt = vchar.next(&vchar);
+				fmt = *(++c);
 				if (arg == NULL) {
 					warningf(pos, "missing argument for '*' precision in conversion specification %u", num_fmt);
 					return -1;
@@ -302,8 +241,8 @@ break_fmt_flags:
 				arg = arg->next;
 			} else {
 				/* digit string may be omitted */
-				while (vchar.is_digit(fmt)) {
-					fmt = vchar.next(&vchar);
+				while (isdigit(fmt)) {
+					fmt = *(++c);
 				}
 			}
 		}
@@ -312,9 +251,9 @@ break_fmt_flags:
 		format_length_modifier_t fmt_mod;
 		switch (fmt) {
 			case 'h':
-				fmt = vchar.next(&vchar);
+				fmt = *(++c);
 				if (fmt == 'h') {
-					fmt = vchar.next(&vchar);
+					fmt = *(++c);
 					fmt_mod = FMT_MOD_hh;
 				} else {
 					fmt_mod = FMT_MOD_h;
@@ -322,48 +261,48 @@ break_fmt_flags:
 				break;
 
 			case 'l':
-				fmt = vchar.next(&vchar);
+				fmt = *(++c);
 				if (fmt == 'l') {
-					fmt = vchar.next(&vchar);
+					fmt = *(++c);
 					fmt_mod = FMT_MOD_ll;
 				} else {
 					fmt_mod = FMT_MOD_l;
 				}
 				break;
 
-			case 'L': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_L;    break;
-			case 'j': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_j;    break;
-			case 't': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_t;    break;
-			case 'z': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_z;    break;
-			case 'q': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_q;    break;
+			case 'L': fmt = *(++c); fmt_mod = FMT_MOD_L;    break;
+			case 'j': fmt = *(++c); fmt_mod = FMT_MOD_j;    break;
+			case 't': fmt = *(++c); fmt_mod = FMT_MOD_t;    break;
+			case 'z': fmt = *(++c); fmt_mod = FMT_MOD_z;    break;
+			case 'q': fmt = *(++c); fmt_mod = FMT_MOD_q;    break;
 			/* microsoft mode */
 			case 'w':
 				if (c_mode & _MS) {
-					fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_w;
+					fmt = *(++c); fmt_mod = FMT_MOD_w;
 				} else {
 					fmt_mod = FMT_MOD_NONE;
 				}
 				break;
 			case 'I':
 				if (c_mode & _MS) {
-					fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_I;
+					fmt = *(++c); fmt_mod = FMT_MOD_I;
 					if (fmt == '3') {
-						fmt = vchar.next(&vchar);
+						fmt = *(++c);
 						if (fmt == '2') {
-							fmt = vchar.next(&vchar);
+							fmt = *(++c);
 							fmt_mod = FMT_MOD_I32;
 						} else {
 							/* rewind */
-							--vchar.position;
+							fmt = *(--c);
 						}
 					} else if (fmt == '6') {
-						fmt = vchar.next(&vchar);
+						fmt = *(++c);
 						if (fmt == '4') {
-							fmt = vchar.next(&vchar);
+							fmt = *(++c);
 							fmt_mod = FMT_MOD_I64;
 						} else {
 							/* rewind */
-							--vchar.position;
+							fmt = *(--c);
 						}
 					}
 				} else {
@@ -375,10 +314,6 @@ break_fmt_flags:
 				break;
 		}
 
-		if (fmt == '\0') {
-			warningf(pos, "dangling %% in format string");
-			break;
-		}
 
 		type_t            *expected_type;
 		type_qualifiers_t  expected_qual = TYPE_QUALIFIER_NONE;
@@ -529,7 +464,7 @@ eval_fmt_mod_unsigned:
 				break;
 
 			default:
-				warningf(pos, "encountered unknown conversion specifier '%%%C' at position %u", (wint_t)fmt, num_fmt);
+				warningf(pos, "encountered unknown conversion specifier '%%%c' at position %u", fmt, num_fmt);
 				if (arg == NULL) {
 					warningf(pos, "too few arguments for format string");
 					return -1;
@@ -577,10 +512,8 @@ eval_fmt_mod_unsigned:
 						goto next_arg;
 					}
 				}
-			} else {
-				if (get_unqualified_type(arg_skip) == expected_type_skip) {
-					goto next_arg;
-				}
+			} else if (get_unqualified_type(arg_skip) == expected_type_skip) {
+				goto next_arg;
 			}
 			if (is_type_valid(arg_skip)) {
 				warningf(pos,
@@ -591,7 +524,8 @@ eval_fmt_mod_unsigned:
 next_arg:
 		arg = arg->next;
 	}
-	if (!atend(&vchar)) {
+	assert(fmt == '\0');
+	if (c+1 < string + size) {
 		warningf(pos, "format string contains '\\0'");
 	}
 	return num_fmt;
@@ -600,7 +534,8 @@ next_arg:
 /**
  * Check printf-style format.
  */
-static void check_printf_format(call_argument_t const *arg, format_spec_t const *const spec)
+static void check_printf_format(call_argument_t const *arg,
+                                format_spec_t const *const spec)
 {
 	/* find format arg */
 	size_t idx = 0;
@@ -625,16 +560,17 @@ static void check_printf_format(call_argument_t const *arg, format_spec_t const
 		++num_args;
 	if (num_args > (size_t)num_fmt) {
 		warningf(&fmt_expr->base.source_position,
-			"%u argument%s but only %u format specifier%s",
-			num_args, num_args != 1 ? "s" : "",
-			num_fmt,  num_fmt  != 1 ? "s" : "");
+		         "%u argument%s but only %u format specifier%s",
+		         num_args, num_args != 1 ? "s" : "",
+		         num_fmt,  num_fmt  != 1 ? "s" : "");
 	}
 }
 
 /**
  * Check scanf-style format.
  */
-static void check_scanf_format(const call_argument_t *arg, const format_spec_t *spec)
+static void check_scanf_format(const call_argument_t *arg,
+                               const format_spec_t *spec)
 {
 	/* find format arg */
 	unsigned idx = 0;
@@ -649,252 +585,240 @@ static void check_scanf_format(const call_argument_t *arg, const format_spec_t *
 		fmt_expr = fmt_expr->unary.value;
 	}
 
-	vchar_t vchar;
-	if (fmt_expr->kind == EXPR_WIDE_STRING_LITERAL) {
-		vchar.string   = &fmt_expr->wide_string.value;
-		vchar.size     = fmt_expr->wide_string.value.size;
-		vchar.first    = wstring_first;
-		vchar.next     = wstring_next;
-		vchar.is_digit = wstring_isdigit;
-	} else if (fmt_expr->kind == EXPR_STRING_LITERAL) {
-		vchar.string   = &fmt_expr->string.value;
-		vchar.size     = fmt_expr->string.value.size;
-		vchar.first    = string_first;
-		vchar.next     = string_next;
-		vchar.is_digit = string_isdigit;
-	} else {
+	if (fmt_expr->kind != EXPR_STRING_LITERAL
+			&& fmt_expr->kind != EXPR_WIDE_STRING_LITERAL)
 		return;
-	}
+
+	const char *string = fmt_expr->literal.value.begin;
+	size_t      size   = fmt_expr->literal.value.size;
+	const char *c      = string;
+
 	/* find the real args */
 	for (; idx < spec->arg_idx && arg != NULL; ++idx)
 		arg = arg->next;
 
 	const source_position_t *pos = &fmt_expr->base.source_position;
-	unsigned fmt     = vchar.first(&vchar);
 	unsigned num_fmt = 0;
-	for (; fmt != '\0'; fmt = vchar.next(&vchar)) {
+	char     fmt;
+	for (fmt = *c; fmt != '\0'; fmt = *(++c)) {
 		if (fmt != '%')
 			continue;
-		fmt = vchar.next(&vchar);
-
+		fmt = *(++c);
+		if (fmt == '\0') {
+			warningf(pos, "dangling '%%' in format string");
+			break;
+		}
 		if (fmt == '%')
 			continue;
 
 		++num_fmt;
 
-		/* length modifier */
-		format_length_modifier_t fmt_mod;
+		/* look for length modifiers */
+		format_length_modifier_t fmt_mod = FMT_MOD_NONE;
 		switch (fmt) {
-			case 'h':
-				fmt = vchar.next(&vchar);
-				if (fmt == 'h') {
-					fmt = vchar.next(&vchar);
-					fmt_mod = FMT_MOD_hh;
-				} else {
-					fmt_mod = FMT_MOD_h;
-				}
-				break;
+		case 'h':
+			fmt = *(++c);
+			if (fmt == 'h') {
+				fmt = *(++c);
+				fmt_mod = FMT_MOD_hh;
+			} else {
+				fmt_mod = FMT_MOD_h;
+			}
+			break;
 
-			case 'l':
-				fmt = vchar.next(&vchar);
-				if (fmt == 'l') {
-					fmt = vchar.next(&vchar);
-					fmt_mod = FMT_MOD_ll;
-				} else {
-					fmt_mod = FMT_MOD_l;
-				}
-				break;
+		case 'l':
+			fmt = *(++c);
+			if (fmt == 'l') {
+				fmt = *(++c);
+				fmt_mod = FMT_MOD_ll;
+			} else {
+				fmt_mod = FMT_MOD_l;
+			}
+			break;
 
-			case 'L': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_L;    break;
-			case 'j': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_j;    break;
-			case 't': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_t;    break;
-			case 'z': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_z;    break;
-			/* microsoft mode */
-			case 'w':
-				if (c_mode & _MS) {
-					fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_w;
-				} else {
-					fmt_mod = FMT_MOD_NONE;
-				}
-				break;
-			case 'I':
-				if (c_mode & _MS) {
-					fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_I;
-					if (fmt == '3') {
-						fmt = vchar.next(&vchar);
-						if (fmt == '2') {
-							fmt = vchar.next(&vchar);
-							fmt_mod = FMT_MOD_I32;
-						} else {
-							/* rewind */
-							--vchar.position;
-						}
-					} else if (fmt == '6') {
-						fmt = vchar.next(&vchar);
-						if (fmt == '4') {
-							fmt = vchar.next(&vchar);
-							fmt_mod = FMT_MOD_I64;
-						} else {
-							/* rewind */
-							--vchar.position;
-						}
+		case 'L': fmt = *(++c); fmt_mod = FMT_MOD_L; break;
+		case 'j': fmt = *(++c); fmt_mod = FMT_MOD_j; break;
+		case 't': fmt = *(++c); fmt_mod = FMT_MOD_t; break;
+		case 'z': fmt = *(++c); fmt_mod = FMT_MOD_z; break;
+		/* microsoft mode */
+		case 'w':
+			if (c_mode & _MS) {
+				fmt = *(++c);
+				fmt_mod = FMT_MOD_w;
+			}
+			break;
+		case 'I':
+			if (c_mode & _MS) {
+				fmt = *(++c);
+				fmt_mod = FMT_MOD_I;
+				if (fmt == '3') {
+					fmt = *(++c);
+					if (fmt == '2') {
+						fmt = *(++c);
+						fmt_mod = FMT_MOD_I32;
+					} else {
+						/* rewind */
+						fmt = *(--c);
+					}
+				} else if (fmt == '6') {
+					fmt = *(++c);
+					if (fmt == '4') {
+						fmt = *(++c);
+						fmt_mod = FMT_MOD_I64;
+					} else {
+						/* rewind */
+						fmt = *(--c);
 					}
-				} else {
-					fmt_mod = FMT_MOD_NONE;
 				}
-				break;
-			default:
-				fmt_mod = FMT_MOD_NONE;
-				break;
+			}
+			break;
 		}
 
 		if (fmt == '\0') {
-			warningf(pos, "dangling %% in format string");
+			warningf(pos, "dangling % with conversion specififer in format string");
 			break;
 		}
 
-		type_t            *expected_type;
+		type_t *expected_type;
 		switch (fmt) {
-			case 'd':
-			case 'i':
-				switch (fmt_mod) {
-					case FMT_MOD_NONE: expected_type = type_int;         break;
-					case FMT_MOD_hh:   expected_type = type_signed_char; break;
-					case FMT_MOD_h:    expected_type = type_short;       break;
-					case FMT_MOD_l:    expected_type = type_long;        break;
-					case FMT_MOD_ll:   expected_type = type_long_long;   break;
-					case FMT_MOD_j:    expected_type = type_intmax_t;    break;
-					case FMT_MOD_z:    expected_type = type_ssize_t;     break;
-					case FMT_MOD_t:    expected_type = type_ptrdiff_t;   break;
-					case FMT_MOD_I:    expected_type = type_ptrdiff_t;   break;
-					case FMT_MOD_I32:  expected_type = type_int32;       break;
-					case FMT_MOD_I64:  expected_type = type_int64;       break;
+		case 'd':
+		case 'i':
+			switch (fmt_mod) {
+			case FMT_MOD_NONE: expected_type = type_int;         break;
+			case FMT_MOD_hh:   expected_type = type_signed_char; break;
+			case FMT_MOD_h:    expected_type = type_short;       break;
+			case FMT_MOD_l:    expected_type = type_long;        break;
+			case FMT_MOD_ll:   expected_type = type_long_long;   break;
+			case FMT_MOD_j:    expected_type = type_intmax_t;    break;
+			case FMT_MOD_z:    expected_type = type_ssize_t;     break;
+			case FMT_MOD_t:    expected_type = type_ptrdiff_t;   break;
+			case FMT_MOD_I:    expected_type = type_ptrdiff_t;   break;
+			case FMT_MOD_I32:  expected_type = type_int32;       break;
+			case FMT_MOD_I64:  expected_type = type_int64;       break;
 
-					default:
-						warn_invalid_length_modifier(pos, fmt_mod, fmt);
-						goto next_arg;
-				}
-				break;
-
-			case 'o':
-			case 'X':
-			case 'x':
-				goto eval_fmt_mod_unsigned;
-
-			case 'u':
-eval_fmt_mod_unsigned:
-				switch (fmt_mod) {
-					case FMT_MOD_NONE: expected_type = type_unsigned_int;       break;
-					case FMT_MOD_hh:   expected_type = type_unsigned_char;      break;
-					case FMT_MOD_h:    expected_type = type_unsigned_short;     break;
-					case FMT_MOD_l:    expected_type = type_unsigned_long;      break;
-					case FMT_MOD_ll:   expected_type = type_unsigned_long_long; break;
-					case FMT_MOD_j:    expected_type = type_uintmax_t;          break;
-					case FMT_MOD_z:    expected_type = type_size_t;             break;
-					case FMT_MOD_t:    expected_type = type_uptrdiff_t;         break;
-					case FMT_MOD_I:    expected_type = type_size_t;             break;
-					case FMT_MOD_I32:  expected_type = type_unsigned_int32;     break;
-					case FMT_MOD_I64:  expected_type = type_unsigned_int64;     break;
+			default:
+				warn_invalid_length_modifier(pos, fmt_mod, fmt);
+				goto next_arg;
+			}
+			break;
 
-					default:
-						warn_invalid_length_modifier(pos, fmt_mod, fmt);
-						goto next_arg;
-				}
-				break;
+		case 'o':
+		case 'X':
+		case 'x':
+		case 'u':
+			switch (fmt_mod) {
+			case FMT_MOD_NONE: expected_type = type_unsigned_int;       break;
+			case FMT_MOD_hh:   expected_type = type_unsigned_char;      break;
+			case FMT_MOD_h:    expected_type = type_unsigned_short;     break;
+			case FMT_MOD_l:    expected_type = type_unsigned_long;      break;
+			case FMT_MOD_ll:   expected_type = type_unsigned_long_long; break;
+			case FMT_MOD_j:    expected_type = type_uintmax_t;          break;
+			case FMT_MOD_z:    expected_type = type_size_t;             break;
+			case FMT_MOD_t:    expected_type = type_uptrdiff_t;         break;
+			case FMT_MOD_I:    expected_type = type_size_t;             break;
+			case FMT_MOD_I32:  expected_type = type_unsigned_int32;     break;
+			case FMT_MOD_I64:  expected_type = type_unsigned_int64;     break;
 
-			case 'A':
-			case 'a':
-			case 'E':
-			case 'e':
-			case 'F':
-			case 'f':
-			case 'G':
-			case 'g':
-				switch (fmt_mod) {
-					case FMT_MOD_l:    /* l modifier is ignored */
-					case FMT_MOD_NONE: expected_type = type_double;      break;
-					case FMT_MOD_L:    expected_type = type_long_double; break;
+			default:
+				warn_invalid_length_modifier(pos, fmt_mod, fmt);
+				goto next_arg;
+			}
+			break;
 
-					default:
-						warn_invalid_length_modifier(pos, fmt_mod, fmt);
-						goto next_arg;
-				}
-				break;
+		case 'A':
+		case 'a':
+		case 'E':
+		case 'e':
+		case 'F':
+		case 'f':
+		case 'G':
+		case 'g':
+			switch (fmt_mod) {
+			case FMT_MOD_l:    expected_type = type_double;      break;
+			case FMT_MOD_NONE: expected_type = type_float;       break;
+			case FMT_MOD_L:    expected_type = type_long_double; break;
 
-			case 'C':
-				if (fmt_mod != FMT_MOD_NONE) {
-					warn_invalid_length_modifier(pos, fmt_mod, fmt);
-					goto next_arg;
-				}
-				expected_type = type_wchar_t;
-				break;
+			default:
+				warn_invalid_length_modifier(pos, fmt_mod, fmt);
+				goto next_arg;
+			}
+			break;
 
-			case 'c':
-				expected_type = type_int;
-				switch (fmt_mod) {
-					case FMT_MOD_NONE: expected_type = type_int;     break; /* TODO promoted char */
-					case FMT_MOD_l:    expected_type = type_wint_t;  break;
-					case FMT_MOD_w:    expected_type = type_wchar_t; break;
+		case 'C':
+			if (fmt_mod != FMT_MOD_NONE) {
+				warn_invalid_length_modifier(pos, fmt_mod, fmt);
+				goto next_arg;
+			}
+			expected_type = type_wchar_t;
+			break;
 
-					default:
-						warn_invalid_length_modifier(pos, fmt_mod, fmt);
-						goto next_arg;
-				}
-				break;
+		case 'c':
+			expected_type = type_int;
+			switch (fmt_mod) {
+			case FMT_MOD_NONE: expected_type = type_int;     break; /* TODO promoted char */
+			case FMT_MOD_l:    expected_type = type_wint_t;  break;
+			case FMT_MOD_w:    expected_type = type_wchar_t; break;
 
-			case 'S':
-				if (fmt_mod != FMT_MOD_NONE) {
-					warn_invalid_length_modifier(pos, fmt_mod, fmt);
-					goto next_arg;
-				}
-				expected_type = type_wchar_t;
-				break;
+			default:
+				warn_invalid_length_modifier(pos, fmt_mod, fmt);
+				goto next_arg;
+			}
+			break;
 
-			case 's':
-			case '[':
-				switch (fmt_mod) {
-					case FMT_MOD_NONE: expected_type = type_char;    break;
-					case FMT_MOD_l:    expected_type = type_wchar_t; break;
-					case FMT_MOD_w:    expected_type = type_wchar_t; break;
+		case 'S':
+			if (fmt_mod != FMT_MOD_NONE) {
+				warn_invalid_length_modifier(pos, fmt_mod, fmt);
+				goto next_arg;
+			}
+			expected_type = type_wchar_t;
+			break;
 
-					default:
-						warn_invalid_length_modifier(pos, fmt_mod, fmt);
-						goto next_arg;
-				}
-				break;
+		case 's':
+		case '[':
+			switch (fmt_mod) {
+				case FMT_MOD_NONE: expected_type = type_char;    break;
+				case FMT_MOD_l:    expected_type = type_wchar_t; break;
+				case FMT_MOD_w:    expected_type = type_wchar_t; break;
 
-			case 'p':
-				if (fmt_mod != FMT_MOD_NONE) {
+				default:
 					warn_invalid_length_modifier(pos, fmt_mod, fmt);
 					goto next_arg;
-				}
-				expected_type = type_void_ptr;
-				break;
+			}
+			break;
 
-			case 'n':
-				switch (fmt_mod) {
-					case FMT_MOD_NONE: expected_type = type_int;         break;
-					case FMT_MOD_hh:   expected_type = type_signed_char; break;
-					case FMT_MOD_h:    expected_type = type_short;       break;
-					case FMT_MOD_l:    expected_type = type_long;        break;
-					case FMT_MOD_ll:   expected_type = type_long_long;   break;
-					case FMT_MOD_j:    expected_type = type_intmax_t;    break;
-					case FMT_MOD_z:    expected_type = type_ssize_t;     break;
-					case FMT_MOD_t:    expected_type = type_ptrdiff_t;   break;
+		case 'p':
+			if (fmt_mod != FMT_MOD_NONE) {
+				warn_invalid_length_modifier(pos, fmt_mod, fmt);
+				goto next_arg;
+			}
+			expected_type = type_void_ptr;
+			break;
 
-					default:
-						warn_invalid_length_modifier(pos, fmt_mod, fmt);
-						goto next_arg;
-				}
-				break;
+		case 'n':
+			switch (fmt_mod) {
+			case FMT_MOD_NONE: expected_type = type_int;         break;
+			case FMT_MOD_hh:   expected_type = type_signed_char; break;
+			case FMT_MOD_h:    expected_type = type_short;       break;
+			case FMT_MOD_l:    expected_type = type_long;        break;
+			case FMT_MOD_ll:   expected_type = type_long_long;   break;
+			case FMT_MOD_j:    expected_type = type_intmax_t;    break;
+			case FMT_MOD_z:    expected_type = type_ssize_t;     break;
+			case FMT_MOD_t:    expected_type = type_ptrdiff_t;   break;
 
 			default:
-				warningf(pos, "encountered unknown conversion specifier '%%%C' at position %u", (wint_t)fmt, num_fmt);
-				if (arg == NULL) {
-					warningf(pos, "too few arguments for format string");
-					return;
-				}
+				warn_invalid_length_modifier(pos, fmt_mod, fmt);
 				goto next_arg;
+			}
+			break;
+
+		default:
+			warningf(pos, "encountered unknown conversion specifier '%%%c' at format %u",
+			         fmt, num_fmt);
+			if (arg == NULL) {
+				warningf(pos, "too few arguments for format string");
+				return;
+			}
+			goto next_arg;
 		}
 
 		if (arg == NULL) {
@@ -938,7 +862,8 @@ error_arg_type:
 next_arg:
 		arg = arg->next;
 	}
-	if (!atend(&vchar)) {
+	assert(fmt == '\0');
+	if (c+1 < string + size) {
 		warningf(pos, "format string contains '\\0'");
 	}
 	if (arg != NULL) {
@@ -948,8 +873,8 @@ next_arg:
 			arg = arg->next;
 		}
 		warningf(pos, "%u argument%s but only %u format specifier%s",
-			num_args, num_args != 1 ? "s" : "",
-			num_fmt, num_fmt != 1 ? "s" : "");
+		         num_args, num_args != 1 ? "s" : "",
+		         num_fmt, num_fmt != 1 ? "s" : "");
 	}
 }
 
@@ -1018,8 +943,9 @@ void check_format(const call_expression_t *const call)
 		/* the declaration has a GNU format attribute, check it */
 	} else {
 		/*
-		 * For some functions we always check the format, even if it was not specified.
-		 * This allows to check format even in MS mode or without header included.
+		 * For some functions we always check the format, even if it was not
+		 * specified. This allows to check format even in MS mode or without
+		 * header included.
 		 */
 		const char *const name = entity->base.symbol->string;
 		for (size_t i = 0; i < lengthof(builtin_table); ++i) {
diff --git a/lexer.c b/lexer.c
index 247083c..c3f86b3 100644
--- a/lexer.c
+++ b/lexer.c
@@ -53,8 +53,6 @@
 #define strtold(s, e) strtod(s, e)
 #endif
 
-typedef unsigned int utf32;
-
 static utf32        c;
 token_t             lexer_token;
 symbol_t           *symbol_L;
@@ -379,7 +377,7 @@ static inline void next_char(void);
 #define MATCH_NEWLINE(code)                   \
 	case '\r':                                \
 		next_char();                          \
-		if(c == '\n') {                       \
+		if (c == '\n') {                      \
 			next_char();                      \
 		}                                     \
 		lexer_token.source_position.linenr++; \
@@ -389,13 +387,13 @@ static inline void next_char(void);
 		lexer_token.source_position.linenr++; \
 		code
 
-#define eat(c_type)  do { assert(c == c_type); next_char(); } while(0)
+#define eat(c_type)  do { assert(c == c_type); next_char(); } while (0)
 
 static void maybe_concat_lines(void)
 {
 	eat('\\');
 
-	switch(c) {
+	switch (c) {
 	MATCH_NEWLINE(return;)
 
 	default:
@@ -415,23 +413,23 @@ static inline void next_char(void)
 	next_real_char();
 
 	/* filter trigraphs */
-	if(UNLIKELY(c == '\\')) {
+	if (UNLIKELY(c == '\\')) {
 		maybe_concat_lines();
 		goto end_of_next_char;
 	}
 
-	if(LIKELY(c != '?'))
+	if (LIKELY(c != '?'))
 		goto end_of_next_char;
 
 	next_real_char();
-	if(LIKELY(c != '?')) {
+	if (LIKELY(c != '?')) {
 		put_back(c);
 		c = '?';
 		goto end_of_next_char;
 	}
 
 	next_real_char();
-	switch(c) {
+	switch (c) {
 	case '=': c = '#'; break;
 	case '(': c = '['; break;
 	case '/': c = '\\'; maybe_concat_lines(); break;
@@ -528,14 +526,11 @@ end_of_next_char:;
  */
 static void parse_symbol(void)
 {
-	symbol_t *symbol;
-	char     *string;
-
 	obstack_1grow(&symbol_obstack, (char) c);
 	next_char();
 
-	while(1) {
-		switch(c) {
+	while (true) {
+		switch (c) {
 		DIGITS
 		SYMBOL_CHARS
 			obstack_1grow(&symbol_obstack, (char) c);
@@ -551,221 +546,62 @@ dollar_sign:
 end_symbol:
 	obstack_1grow(&symbol_obstack, '\0');
 
-	string = obstack_finish(&symbol_obstack);
-	symbol = symbol_table_insert(string);
+	char     *string = obstack_finish(&symbol_obstack);
+	symbol_t *symbol = symbol_table_insert(string);
 
-	lexer_token.type     = symbol->ID;
-	lexer_token.v.symbol = symbol;
+	lexer_token.type   = symbol->ID;
+	lexer_token.symbol = symbol;
 
-	if(symbol->string != string) {
+	if (symbol->string != string) {
 		obstack_free(&symbol_obstack, string);
 	}
 }
 
-static void parse_integer_suffix(bool is_oct_hex)
+/**
+ * parse suffixes like 'LU' or 'f' after numbers
+ */
+static void parse_number_suffix(void)
 {
-	bool is_unsigned     = false;
-	bool min_long        = false;
-	bool min_longlong    = false;
-	bool not_traditional = false;
-	int  pos             = 0;
-	char suffix[4];
-
-	if (c == 'U' || c == 'u') {
-		not_traditional = true;
-		suffix[pos++]   = toupper(c);
-		is_unsigned     = true;
-		next_char();
-		if (c == 'L' || c == 'l') {
-			suffix[pos++] = toupper(c);
-			min_long = true;
-			next_char();
-			if (c == 'L' || c == 'l') {
-				suffix[pos++] = toupper(c);
-				min_longlong = true;
-				next_char();
-			}
-		}
-	} else if (c == 'l' || c == 'L') {
-		suffix[pos++] = toupper(c);
-		min_long = true;
-		next_char();
-		if (c == 'l' || c == 'L') {
-			not_traditional = true;
-			suffix[pos++]   = toupper(c);
-			min_longlong    = true;
-			next_char();
-			if (c == 'u' || c == 'U') {
-				suffix[pos++] = toupper(c);
-				is_unsigned   = true;
-				next_char();
-			}
-		} else if (c == 'u' || c == 'U') {
-			not_traditional = true;
-			suffix[pos++]   = toupper(c);
-			is_unsigned     = true;
+	assert(obstack_object_size(&symbol_obstack) == 0);
+	while (true) {
+		switch (c) {
+		SYMBOL_CHARS
+			obstack_1grow(&symbol_obstack, (char) c);
 			next_char();
-			lexer_token.datatype = type_unsigned_long;
+			break;
+		default:
+		dollar_sign:
+			goto finish_suffix;
 		}
 	}
-
-	if (warning.traditional && not_traditional) {
-		suffix[pos] = '\0';
-		warningf(&lexer_token.source_position,
-			"traditional C rejects the '%s' suffix", suffix);
+finish_suffix:
+	if (obstack_object_size(&symbol_obstack) == 0) {
+		lexer_token.symbol = NULL;
+		return;
 	}
-	if (!is_unsigned) {
-		long long v = lexer_token.v.intvalue;
-		if (!min_long) {
-			if (v >= TARGET_INT_MIN && v <= TARGET_INT_MAX) {
-				lexer_token.datatype = type_int;
-				return;
-			} else if (is_oct_hex && v >= 0 && v <= TARGET_UINT_MAX) {
-				lexer_token.datatype = type_unsigned_int;
-				return;
-			}
-		}
-		if (!min_longlong) {
-			if (v >= TARGET_LONG_MIN && v <= TARGET_LONG_MAX) {
-				lexer_token.datatype = type_long;
-				return;
-			} else if (is_oct_hex && v >= 0 && (unsigned long long)v <= (unsigned long long)TARGET_ULONG_MAX) {
-				lexer_token.datatype = type_unsigned_long;
-				return;
-			}
-		}
-		unsigned long long uv = (unsigned long long) v;
-		if (is_oct_hex && uv > (unsigned long long) TARGET_LONGLONG_MAX) {
-			lexer_token.datatype = type_unsigned_long_long;
-			return;
-		}
 
-		lexer_token.datatype = type_long_long;
-	} else {
-		unsigned long long v = (unsigned long long) lexer_token.v.intvalue;
-		if (!min_long && v <= TARGET_UINT_MAX) {
-			lexer_token.datatype = type_unsigned_int;
-			return;
-		}
-		if (!min_longlong && v <= TARGET_ULONG_MAX) {
-			lexer_token.datatype = type_unsigned_long;
-			return;
-		}
-		lexer_token.datatype = type_unsigned_long_long;
-	}
-}
+	obstack_1grow(&symbol_obstack, '\0');
+	char     *string = obstack_finish(&symbol_obstack);
+	symbol_t *symbol = symbol_table_insert(string);
 
-static void parse_floating_suffix(void)
-{
-	switch(c) {
-	/* TODO: do something useful with the suffixes... */
-	case 'f':
-	case 'F':
-		if (warning.traditional) {
-			warningf(&lexer_token.source_position,
-				"traditional C rejects the 'F' suffix");
-		}
-		next_char();
-		lexer_token.datatype = type_float;
-		break;
-	case 'l':
-	case 'L':
-		if (warning.traditional) {
-			warningf(&lexer_token.source_position,
-				"traditional C rejects the 'F' suffix");
-		}
-		next_char();
-		lexer_token.datatype = type_long_double;
-		break;
-	default:
-		lexer_token.datatype = type_double;
-		break;
+	if (symbol->string != string) {
+		obstack_free(&symbol_obstack, string);
 	}
+	lexer_token.symbol = symbol;
 }
 
-/**
- * A replacement for strtoull. Only those parts needed for
- * our parser are implemented.
- */
-static unsigned long long parse_int_string(const char *s, const char **endptr, int base)
+static string_t identify_string(char *string, size_t len)
 {
-	unsigned long long v = 0;
-
-	switch (base) {
-	case 16:
-		for (;; ++s) {
-			/* check for overrun */
-			if (v >= 0x1000000000000000ULL)
-				break;
-			switch (tolower(*s)) {
-			case '0': v <<= 4; break;
-			case '1': v <<= 4; v |= 0x1; break;
-			case '2': v <<= 4; v |= 0x2; break;
-			case '3': v <<= 4; v |= 0x3; break;
-			case '4': v <<= 4; v |= 0x4; break;
-			case '5': v <<= 4; v |= 0x5; break;
-			case '6': v <<= 4; v |= 0x6; break;
-			case '7': v <<= 4; v |= 0x7; break;
-			case '8': v <<= 4; v |= 0x8; break;
-			case '9': v <<= 4; v |= 0x9; break;
-			case 'a': v <<= 4; v |= 0xa; break;
-			case 'b': v <<= 4; v |= 0xb; break;
-			case 'c': v <<= 4; v |= 0xc; break;
-			case 'd': v <<= 4; v |= 0xd; break;
-			case 'e': v <<= 4; v |= 0xe; break;
-			case 'f': v <<= 4; v |= 0xf; break;
-			default:
-				goto end;
-			}
-		}
-		break;
-	case 8:
-		for (;; ++s) {
-			/* check for overrun */
-			if (v >= 0x2000000000000000ULL)
-				break;
-			switch (tolower(*s)) {
-			case '0': v <<= 3; break;
-			case '1': v <<= 3; v |= 1; break;
-			case '2': v <<= 3; v |= 2; break;
-			case '3': v <<= 3; v |= 3; break;
-			case '4': v <<= 3; v |= 4; break;
-			case '5': v <<= 3; v |= 5; break;
-			case '6': v <<= 3; v |= 6; break;
-			case '7': v <<= 3; v |= 7; break;
-			default:
-				goto end;
-			}
-		}
-		break;
-	case 10:
-		for (;; ++s) {
-			/* check for overrun */
-			if (v > 0x1999999999999999ULL)
-				break;
-			switch (tolower(*s)) {
-			case '0': v *= 10; break;
-			case '1': v *= 10; v += 1; break;
-			case '2': v *= 10; v += 2; break;
-			case '3': v *= 10; v += 3; break;
-			case '4': v *= 10; v += 4; break;
-			case '5': v *= 10; v += 5; break;
-			case '6': v *= 10; v += 6; break;
-			case '7': v *= 10; v += 7; break;
-			case '8': v *= 10; v += 8; break;
-			case '9': v *= 10; v += 9; break;
-			default:
-				goto end;
-			}
-		}
-		break;
-	default:
-		assert(0);
-		break;
+	/* TODO hash */
+#if 0
+	const char *result = strset_insert(&stringset, concat);
+	if (result != concat) {
+		obstack_free(&symbol_obstack, concat);
 	}
-end:
-	*endptr = s;
-	return v;
+#else
+	const char *result = string;
+#endif
+	return (string_t) {result, len};
 }
 
 /**
@@ -774,29 +610,29 @@ end:
  */
 static void parse_number_hex(void)
 {
-	bool is_float = false;
-	assert(c == 'x' || c == 'X');
-	next_char();
+	bool is_float   = false;
+	bool has_digits = false;
 
-	obstack_1grow(&symbol_obstack, '0');
-	obstack_1grow(&symbol_obstack, 'x');
-
-	while(isxdigit(c)) {
+	assert(obstack_object_size(&symbol_obstack) == 0);
+	while (isxdigit(c)) {
+		has_digits = true;
 		obstack_1grow(&symbol_obstack, (char) c);
 		next_char();
 	}
 
 	if (c == '.') {
+		is_float = true;
 		obstack_1grow(&symbol_obstack, (char) c);
 		next_char();
 
 		while (isxdigit(c)) {
+			has_digits = true;
 			obstack_1grow(&symbol_obstack, (char) c);
 			next_char();
 		}
-		is_float = true;
 	}
 	if (c == 'p' || c == 'P') {
+		is_float = true;
 		obstack_1grow(&symbol_obstack, (char) c);
 		next_char();
 
@@ -809,39 +645,26 @@ static void parse_number_hex(void)
 			obstack_1grow(&symbol_obstack, (char) c);
 			next_char();
 		}
-		is_float = true;
+	} else if (is_float) {
+		errorf(&lexer_token.source_position,
+		       "hexadecimal floatingpoint constant requires an exponent");
 	}
 
-	obstack_1grow(&symbol_obstack, '\0');
-	char *string = obstack_finish(&symbol_obstack);
-	if(*string == '\0') {
-		parse_error("invalid hex number");
-		lexer_token.type = T_ERROR;
-		obstack_free(&symbol_obstack, string);
-		return;
-	}
+	size_t  size   = obstack_object_size(&symbol_obstack);
+	char   *string = obstack_finish(&symbol_obstack);
+	lexer_token.literal = identify_string(string, size);
 
-	if (is_float) {
-		char *endptr;
-		lexer_token.type         = T_FLOATINGPOINT;
-		lexer_token.v.floatvalue = strtold(string, &endptr);
-
-		if(*endptr != '\0') {
-			parse_error("invalid hex float literal");
-		}
+	lexer_token.type    =
+		is_float ? T_FLOATINGPOINT_HEXADECIMAL : T_INTEGER_HEXADECIMAL;
 
-		parse_floating_suffix();
-	} else {
-		const char *endptr;
-		lexer_token.type       = T_INTEGER;
-		lexer_token.v.intvalue = parse_int_string(string + 2, &endptr, 16);
-		if(*endptr != '\0') {
-			parse_error("hex number literal too long");
-		}
-		parse_integer_suffix(true);
+	if (!has_digits) {
+		errorf(&lexer_token.source_position, "invalid number literal '0x%S'",
+		       &lexer_token.literal);
+		lexer_token.literal.begin = "0";
+		lexer_token.literal.size  = 1;
 	}
 
-	obstack_free(&symbol_obstack, string);
+	parse_number_suffix();
 }
 
 /**
@@ -849,9 +672,9 @@ static void parse_number_hex(void)
  *
  * @param char  the character to check
  */
-static inline bool is_octal_digit(utf32 chr)
+static bool is_octal_digit(utf32 chr)
 {
-	switch(chr) {
+	switch (chr) {
 	case '0':
 	case '1':
 	case '2':
@@ -867,132 +690,86 @@ static inline bool is_octal_digit(utf32 chr)
 }
 
 /**
- * Parses a octal number and set the lexer_token.
+ * Parses a number and sets the lexer_token.
  */
-static void parse_number_oct(void)
+static void parse_number(void)
 {
-	while(is_octal_digit(c)) {
-		obstack_1grow(&symbol_obstack, (char) c);
-		next_char();
-	}
-	obstack_1grow(&symbol_obstack, '\0');
-	char *string = obstack_finish(&symbol_obstack);
+	bool is_float   = false;
+	bool has_digits = false;
 
-	const char *endptr;
-	lexer_token.type       = T_INTEGER;
-	lexer_token.v.intvalue = parse_int_string(string, &endptr, 8);
-	if(*endptr != '\0') {
-		parse_error("octal number literal too long");
+	assert(obstack_object_size(&symbol_obstack) == 0);
+	if (c == '0') {
+		next_char();
+		if (c == 'x' || c == 'X') {
+			next_char();
+			parse_number_hex();
+			return;
+		} else {
+			has_digits = true;
+		}
+		obstack_1grow(&symbol_obstack, '0');
 	}
 
-	obstack_free(&symbol_obstack, string);
-	parse_integer_suffix(true);
-}
-
-/**
- * Parses a decimal including float number and set the
- * lexer_token.
- */
-static void parse_number_dec(void)
-{
-	bool is_float = false;
 	while (isdigit(c)) {
+		has_digits = true;
 		obstack_1grow(&symbol_obstack, (char) c);
 		next_char();
 	}
 
 	if (c == '.') {
+		is_float = true;
 		obstack_1grow(&symbol_obstack, '.');
 		next_char();
 
 		while (isdigit(c)) {
+			has_digits = true;
 			obstack_1grow(&symbol_obstack, (char) c);
 			next_char();
 		}
-		is_float = true;
 	}
-	if(c == 'e' || c == 'E') {
-		obstack_1grow(&symbol_obstack, (char) c);
+	if (c == 'e' || c == 'E') {
+		is_float = true;
+		obstack_1grow(&symbol_obstack, 'e');
 		next_char();
 
-		if(c == '-' || c == '+') {
+		if (c == '-' || c == '+') {
 			obstack_1grow(&symbol_obstack, (char) c);
 			next_char();
 		}
 
-		while(isdigit(c)) {
+		while (isdigit(c)) {
 			obstack_1grow(&symbol_obstack, (char) c);
 			next_char();
 		}
-		is_float = true;
 	}
 
-	obstack_1grow(&symbol_obstack, '\0');
-	char *string = obstack_finish(&symbol_obstack);
-
-	if(is_float) {
-		char *endptr;
-		lexer_token.type         = T_FLOATINGPOINT;
-		lexer_token.v.floatvalue = strtold(string, &endptr);
+	size_t  size   = obstack_object_size(&symbol_obstack);
+	char   *string = obstack_finish(&symbol_obstack);
+	lexer_token.literal = identify_string(string, size);
 
-		if(*endptr != '\0') {
-			parse_error("invalid number literal");
+	/* is it an octal number? */
+	if (is_float) {
+		lexer_token.type = T_FLOATINGPOINT;
+	} else if (string[0] == '0') {
+		lexer_token.type = T_INTEGER_OCTAL;
+
+		/* check for invalid octal digits */
+		for (size_t i= 0; i < size; ++i) {
+			char t = string[i];
+			if (t == '8' || t == '9')
+				errorf(&lexer_token.source_position,
+				       "invalid digit '%c' in octal number", t);
 		}
-
-		parse_floating_suffix();
 	} else {
-		const char *endptr;
-		lexer_token.type       = T_INTEGER;
-		lexer_token.v.intvalue = parse_int_string(string, &endptr, 10);
-
-		if(*endptr != '\0') {
-			parse_error("invalid number literal");
-		}
-
-		parse_integer_suffix(false);
+		lexer_token.type = T_INTEGER;
 	}
-	obstack_free(&symbol_obstack, string);
-}
 
-/**
- * Parses a number and sets the lexer_token.
- */
-static void parse_number(void)
-{
-	if (c == '0') {
-		next_char();
-		switch (c) {
-			case 'X':
-			case 'x':
-				parse_number_hex();
-				break;
-			case '0':
-			case '1':
-			case '2':
-			case '3':
-			case '4':
-			case '5':
-			case '6':
-			case '7':
-				parse_number_oct();
-				break;
-			case '8':
-			case '9':
-				next_char();
-				parse_error("invalid octal number");
-				lexer_token.type = T_ERROR;
-				return;
-			case '.':
-			case 'e':
-			case 'E':
-			default:
-				obstack_1grow(&symbol_obstack, '0');
-				parse_number_dec();
-				return;
-		}
-	} else {
-		parse_number_dec();
+	if (!has_digits) {
+		errorf(&lexer_token.source_position, "invalid number literal '%S'",
+		       &lexer_token.literal);
 	}
+
+	parse_number_suffix();
 }
 
 /**
@@ -1053,7 +830,7 @@ static utf32 parse_octal_sequence(utf32 const first_digit)
 static utf32 parse_hex_sequence(void)
 {
 	utf32 value = 0;
-	while(isxdigit(c)) {
+	while (isxdigit(c)) {
 		value = 16 * value + digit_value(c);
 		next_char();
 	}
@@ -1102,12 +879,17 @@ static utf32 parse_escape_sequence(void)
 	case 'e':
 		if (c_mode & _GNUC)
 			return 27;   /* hopefully 27 is ALWAYS the code for ESCAPE */
-		/* FALLTHROUGH */
-	default:
-		/* Â§6.4.4.4:8 footnote 64 */
-		parse_error("unknown escape sequence");
+		break;
+	case 'u':
+	case 'U':
+		parse_error("universal character parsing not implemented yet");
 		return EOF;
+	default:
+		break;
 	}
+	/* Â§6.4.4.4:8 footnote 64 */
+	parse_error("unknown escape sequence");
+	return EOF;
 }
 
 /**
@@ -1122,84 +904,16 @@ string_t concat_strings(const string_t *const s1, const string_t *const s2)
 	memcpy(concat, s1->begin, len1);
 	memcpy(concat + len1, s2->begin, len2 + 1);
 
-	if (warning.traditional) {
-		warningf(&lexer_token.source_position,
-			"traditional C rejects string constant concatenation");
-	}
-#if 0 /* TODO hash */
-	const char *result = strset_insert(&stringset, concat);
-	if(result != concat) {
-		obstack_free(&symbol_obstack, concat);
-	}
-
-	return result;
-#else
-	return (string_t){ concat, len1 + len2 + 1 };
-#endif
-}
-
-/**
- * Concatenate a string and a wide string.
- */
-wide_string_t concat_string_wide_string(const string_t *const s1, const wide_string_t *const s2)
-{
-	const size_t len1 = s1->size - 1;
-	const size_t len2 = s2->size - 1;
-
-	wchar_rep_t *const concat = obstack_alloc(&symbol_obstack, (len1 + len2 + 1) * sizeof(*concat));
-	const char *const src = s1->begin;
-	for (size_t i = 0; i != len1; ++i) {
-		concat[i] = src[i];
-	}
-	memcpy(concat + len1, s2->begin, (len2 + 1) * sizeof(*concat));
-	if (warning.traditional) {
-		warningf(&lexer_token.source_position,
-			"traditional C rejects string constant concatenation");
-	}
-
-	return (wide_string_t){ concat, len1 + len2 + 1 };
-}
-
-/**
- * Concatenate two wide strings.
- */
-wide_string_t concat_wide_strings(const wide_string_t *const s1, const wide_string_t *const s2)
-{
-	const size_t len1 = s1->size - 1;
-	const size_t len2 = s2->size - 1;
-
-	wchar_rep_t *const concat = obstack_alloc(&symbol_obstack, (len1 + len2 + 1) * sizeof(*concat));
-	memcpy(concat,        s1->begin, len1       * sizeof(*concat));
-	memcpy(concat + len1, s2->begin, (len2 + 1) * sizeof(*concat));
-	if (warning.traditional) {
-		warningf(&lexer_token.source_position,
-			"traditional C rejects string constant concatenation");
-	}
-
-	return (wide_string_t){ concat, len1 + len2 + 1 };
+	return identify_string(concat, len1 + len2 + 1);
 }
 
-/**
- * Concatenate a wide string and a string.
- */
-wide_string_t concat_wide_string_string(const wide_string_t *const s1, const string_t *const s2)
+string_t make_string(const char *string)
 {
-	const size_t len1 = s1->size - 1;
-	const size_t len2 = s2->size - 1;
-
-	wchar_rep_t *const concat = obstack_alloc(&symbol_obstack, (len1 + len2 + 1) * sizeof(*concat));
-	memcpy(concat, s1->begin, len1 * sizeof(*concat));
-	const char  *const src = s2->begin;
-	wchar_rep_t *const dst = concat + len1;
-	for (size_t i = 0; i != len2 + 1; ++i) {
-		dst[i] = src[i];
-	}
-	if (warning.traditional) {
-		warningf(&lexer_token.source_position,
-			"traditional C rejects string constant concatenation");
-	}
+	size_t      len   = strlen(string) + 1;
+	char *const space = obstack_alloc(&symbol_obstack, len);
+	memcpy(space, string, len);
 
-	return (wide_string_t){ concat, len1 + len2 + 1 };
+	return identify_string(space, len);
 }
 
 static void grow_symbol(utf32 const tc)
@@ -1231,8 +945,8 @@ static void parse_string_literal(void)
 
 	eat('"');
 
-	while(1) {
-		switch(c) {
+	while (true) {
+		switch (c) {
 		case '\\': {
 			utf32 const tc = parse_escape_sequence();
 			if (tc >= 0x100) {
@@ -1269,22 +983,11 @@ end_of_string:
 
 	/* add finishing 0 to the string */
 	obstack_1grow(&symbol_obstack, '\0');
-	const size_t      size   = (size_t)obstack_object_size(&symbol_obstack);
-	const char *const string = obstack_finish(&symbol_obstack);
+	const size_t  size   = (size_t)obstack_object_size(&symbol_obstack);
+	char         *string = obstack_finish(&symbol_obstack);
 
-#if 0 /* TODO hash */
-	/* check if there is already a copy of the string */
-	result = strset_insert(&stringset, string);
-	if(result != string) {
-		obstack_free(&symbol_obstack, string);
-	}
-#else
-	const char *const result = string;
-#endif
-
-	lexer_token.type           = T_STRING_LITERAL;
-	lexer_token.v.string.begin = result;
-	lexer_token.v.string.size  = size;
+	lexer_token.type    = T_STRING_LITERAL;
+	lexer_token.literal = identify_string(string, size);
 }
 
 /**
@@ -1296,11 +999,11 @@ static void parse_wide_character_constant(void)
 
 	eat('\'');
 
-	while(1) {
-		switch(c) {
+	while (true) {
+		switch (c) {
 		case '\\': {
-			wchar_rep_t tc = parse_escape_sequence();
-			obstack_grow(&symbol_obstack, &tc, sizeof(tc));
+			const utf32 tc = parse_escape_sequence();
+			grow_symbol(tc);
 			break;
 		}
 
@@ -1321,26 +1024,19 @@ static void parse_wide_character_constant(void)
 			return;
 		}
 
-		default: {
-			wchar_rep_t tc = (wchar_rep_t) c;
-			obstack_grow(&symbol_obstack, &tc, sizeof(tc));
+		default:
+			grow_symbol(c);
 			next_char();
 			break;
 		}
-		}
 	}
 
 end_of_wide_char_constant:;
-	size_t             size   = (size_t) obstack_object_size(&symbol_obstack);
-	assert(size % sizeof(wchar_rep_t) == 0);
-	size /= sizeof(wchar_rep_t);
+	size_t  size   = (size_t) obstack_object_size(&symbol_obstack);
+	char   *string = obstack_finish(&symbol_obstack);
 
-	const wchar_rep_t *string = obstack_finish(&symbol_obstack);
-
-	lexer_token.type                = T_WIDE_CHARACTER_CONSTANT;
-	lexer_token.v.wide_string.begin = string;
-	lexer_token.v.wide_string.size  = size;
-	lexer_token.datatype            = type_wchar_t;
+	lexer_token.type     = T_WIDE_CHARACTER_CONSTANT;
+	lexer_token.literal  = identify_string(string, size);
 }
 
 /**
@@ -1348,64 +1044,9 @@ end_of_wide_char_constant:;
  */
 static void parse_wide_string_literal(void)
 {
-	const unsigned start_linenr = lexer_token.source_position.linenr;
-
-	assert(c == '"');
-	next_char();
-
-	while(1) {
-		switch(c) {
-		case '\\': {
-			wchar_rep_t tc = parse_escape_sequence();
-			obstack_grow(&symbol_obstack, &tc, sizeof(tc));
-			break;
-		}
-
-		case EOF: {
-			source_position_t source_position;
-			source_position.input_name = lexer_token.source_position.input_name;
-			source_position.linenr     = start_linenr;
-			errorf(&source_position, "string has no end");
-			lexer_token.type = T_ERROR;
-			return;
-		}
-
-		case '"':
-			next_char();
-			goto end_of_string;
-
-		default: {
-			wchar_rep_t tc = c;
-			obstack_grow(&symbol_obstack, &tc, sizeof(tc));
-			next_char();
-			break;
-		}
-		}
-	}
-
-end_of_string:;
-
-	/* TODO: concatenate multiple strings separated by whitespace... */
-
-	/* add finishing 0 to the string */
-	wchar_rep_t nul = L'\0';
-	obstack_grow(&symbol_obstack, &nul, sizeof(nul));
-	const size_t             size   = (size_t)obstack_object_size(&symbol_obstack) / sizeof(wchar_rep_t);
-	const wchar_rep_t *const string = obstack_finish(&symbol_obstack);
-
-#if 0 /* TODO hash */
-	/* check if there is already a copy of the string */
-	const wchar_rep_t *const result = strset_insert(&stringset, string);
-	if(result != string) {
-		obstack_free(&symbol_obstack, string);
-	}
-#else
-	const wchar_rep_t *const result = string;
-#endif
-
-	lexer_token.type                = T_WIDE_STRING_LITERAL;
-	lexer_token.v.wide_string.begin = result;
-	lexer_token.v.wide_string.size  = size;
+	parse_string_literal();
+	if (lexer_token.type == T_STRING_LITERAL)
+		lexer_token.type = T_WIDE_STRING_LITERAL;
 }
 
 /**
@@ -1417,8 +1058,8 @@ static void parse_character_constant(void)
 
 	eat('\'');
 
-	while(1) {
-		switch(c) {
+	while (true) {
+		switch (c) {
 		case '\\': {
 			utf32 const tc = parse_escape_sequence();
 			if (tc >= 0x100) {
@@ -1456,13 +1097,11 @@ static void parse_character_constant(void)
 	}
 
 end_of_char_constant:;
-	const size_t      size   = (size_t)obstack_object_size(&symbol_obstack);
-	const char *const string = obstack_finish(&symbol_obstack);
+	const size_t        size   = (size_t)obstack_object_size(&symbol_obstack);
+	char         *const string = obstack_finish(&symbol_obstack);
 
-	lexer_token.type           = T_CHARACTER_CONSTANT;
-	lexer_token.v.string.begin = string;
-	lexer_token.v.string.size  = size;
-	lexer_token.datatype       = c_mode & _CXX && size == 1 ? type_char : type_int;
+	lexer_token.type    = T_CHARACTER_CONSTANT;
+	lexer_token.literal = identify_string(string, size);
 }
 
 /**
@@ -1472,8 +1111,8 @@ static void skip_multiline_comment(void)
 {
 	unsigned start_linenr = lexer_token.source_position.linenr;
 
-	while(1) {
-		switch(c) {
+	while (true) {
+		switch (c) {
 		case '/':
 			next_char();
 			if (c == '*') {
@@ -1485,7 +1124,7 @@ static void skip_multiline_comment(void)
 			break;
 		case '*':
 			next_char();
-			if(c == '/') {
+			if (c == '/') {
 				next_char();
 				return;
 			}
@@ -1513,8 +1152,8 @@ static void skip_multiline_comment(void)
  */
 static void skip_line_comment(void)
 {
-	while(1) {
-		switch(c) {
+	while (true) {
+		switch (c) {
 		case EOF:
 			return;
 
@@ -1555,7 +1194,7 @@ static inline void next_pp_token(void)
  */
 static void eat_until_newline(void)
 {
-	while(pp_token.type != '\n' && pp_token.type != T_EOF) {
+	while (pp_token.type != '\n' && pp_token.type != T_EOF) {
 		next_pp_token();
 	}
 }
@@ -1566,7 +1205,7 @@ static void eat_until_newline(void)
 static void define_directive(void)
 {
 	lexer_next_preprocessing_token();
-	if(lexer_token.type != T_IDENTIFIER) {
+	if (lexer_token.type != T_IDENTIFIER) {
 		parse_error("expected identifier after #define\n");
 		eat_until_newline();
 	}
@@ -1596,14 +1235,14 @@ static void endif_directive(void)
  */
 static void parse_line_directive(void)
 {
-	if(pp_token.type != T_INTEGER) {
+	if (pp_token.type != T_INTEGER) {
 		parse_error("expected integer");
 	} else {
-		lexer_token.source_position.linenr = (unsigned int)(pp_token.v.intvalue - 1);
+		lexer_token.source_position.linenr = atoi(pp_token.literal.begin);
 		next_pp_token();
 	}
-	if(pp_token.type == T_STRING_LITERAL) {
-		lexer_token.source_position.input_name = pp_token.v.string.begin;
+	if (pp_token.type == T_STRING_LITERAL) {
+		lexer_token.source_position.input_name = pp_token.literal.begin;
 		next_pp_token();
 	}
 
@@ -1638,13 +1277,13 @@ static void parse_pragma(void)
 	bool unknown_pragma = true;
 
 	next_pp_token();
-	if (pp_token.v.symbol->pp_ID == TP_STDC) {
+	if (pp_token.symbol->pp_ID == TP_STDC) {
 		stdc_pragma_kind_t kind = STDC_UNKNOWN;
 		/* a STDC pragma */
 		if (c_mode & _C99) {
 			next_pp_token();
 
-			switch (pp_token.v.symbol->pp_ID) {
+			switch (pp_token.symbol->pp_ID) {
 			case TP_FP_CONTRACT:
 				kind = STDC_FP_CONTRACT;
 				break;
@@ -1660,7 +1299,7 @@ static void parse_pragma(void)
 			if (kind != STDC_UNKNOWN) {
 				stdc_pragma_value_kind_t value = STDC_VALUE_UNKNOWN;
 				next_pp_token();
-				switch (pp_token.v.symbol->pp_ID) {
+				switch (pp_token.symbol->pp_ID) {
 				case TP_ON:
 					value = STDC_VALUE_ON;
 					break;
@@ -1695,9 +1334,9 @@ static void parse_pragma(void)
 static void parse_preprocessor_identifier(void)
 {
 	assert(pp_token.type == T_IDENTIFIER);
-	symbol_t *symbol = pp_token.v.symbol;
+	symbol_t *symbol = pp_token.symbol;
 
-	switch(symbol->pp_ID) {
+	switch (symbol->pp_ID) {
 	case TP_include:
 		printf("include - enable header name parsing!\n");
 		break;
@@ -1738,7 +1377,7 @@ static void parse_preprocessor_directive(void)
 {
 	next_pp_token();
 
-	switch(pp_token.type) {
+	switch (pp_token.type) {
 	case T_IDENTIFIER:
 		parse_preprocessor_identifier();
 		break;
@@ -1757,8 +1396,8 @@ static void parse_preprocessor_directive(void)
 
 #define MAYBE_PROLOG                                       \
 			next_char();                                   \
-			while(1) {                                     \
-				switch(c) {
+			while (true) {                                 \
+				switch (c) {
 
 #define MAYBE(ch, set_type)                                \
 				case ch:                                   \
@@ -1780,7 +1419,7 @@ static void parse_preprocessor_directive(void)
 				default:                                   \
 					code                                   \
 				}                                          \
-			} /* end of while(1) */                        \
+			} /* end of while (true) */                    \
 			break;
 
 #define ELSE(set_type)                                     \
@@ -1791,8 +1430,8 @@ static void parse_preprocessor_directive(void)
 
 void lexer_next_preprocessing_token(void)
 {
-	while(1) {
-		switch(c) {
+	while (true) {
+		switch (c) {
 		case ' ':
 		case '\t':
 			next_char();
@@ -1806,7 +1445,7 @@ void lexer_next_preprocessing_token(void)
 		SYMBOL_CHARS
 			parse_symbol();
 			/* might be a wide string ( L"string" ) */
-			if (lexer_token.v.symbol == symbol_L) {
+			if (lexer_token.symbol == symbol_L) {
 				switch (c) {
 					case '"':  parse_wide_string_literal();     break;
 					case '\'': parse_wide_character_constant(); break;
@@ -1831,7 +1470,7 @@ void lexer_next_preprocessing_token(void)
 				DIGITS
 					put_back(c);
 					c = '.';
-					parse_number_dec();
+					parse_number();
 					return;
 
 				case '.':
diff --git a/lexer.h b/lexer.h
index 3f83f11..a34d591 100644
--- a/lexer.h
+++ b/lexer.h
@@ -39,9 +39,7 @@ void select_input_encoding(char const* encoding);
 void lexer_open_stream(FILE *stream, const char *input_name);
 void lexer_open_buffer(const char *buffer, size_t len, const char *input_name);
 
-string_t      concat_strings(           const string_t      *s1, const string_t      *s2);
-wide_string_t concat_string_wide_string(const string_t      *s1, const wide_string_t *s2);
-wide_string_t concat_wide_strings(      const wide_string_t *s1, const wide_string_t *s2);
-wide_string_t concat_wide_string_string(const wide_string_t *s1, const string_t      *s2);
+string_t concat_strings(const string_t *s1, const string_t *s2);
+string_t make_string(const char *str);
 
 #endif
diff --git a/parser.c b/parser.c
index 502dcc6..101b404 100644
--- a/parser.c
+++ b/parser.c
@@ -305,11 +305,15 @@ static size_t get_expression_struct_size(expression_kind_t kind)
 		[EXPR_INVALID]                    = sizeof(expression_base_t),
 		[EXPR_REFERENCE]                  = sizeof(reference_expression_t),
 		[EXPR_REFERENCE_ENUM_VALUE]       = sizeof(reference_expression_t),
-		[EXPR_CONST]                      = sizeof(const_expression_t),
-		[EXPR_CHARACTER_CONSTANT]         = sizeof(const_expression_t),
-		[EXPR_WIDE_CHARACTER_CONSTANT]    = sizeof(const_expression_t),
+		[EXPR_LITERAL_INTEGER]            = sizeof(literal_expression_t),
+		[EXPR_LITERAL_INTEGER_OCTAL]      = sizeof(literal_expression_t),
+		[EXPR_LITERAL_INTEGER_HEXADECIMAL]= sizeof(literal_expression_t),
+		[EXPR_LITERAL_FLOATINGPOINT]      = sizeof(literal_expression_t),
+		[EXPR_LITERAL_FLOATINGPOINT_HEXADECIMAL] = sizeof(literal_expression_t),
+		[EXPR_LITERAL_CHARACTER]          = sizeof(literal_expression_t),
+		[EXPR_LITERAL_WIDE_CHARACTER]     = sizeof(literal_expression_t),
 		[EXPR_STRING_LITERAL]             = sizeof(string_literal_expression_t),
-		[EXPR_WIDE_STRING_LITERAL]        = sizeof(wide_string_literal_expression_t),
+		[EXPR_WIDE_STRING_LITERAL]        = sizeof(string_literal_expression_t),
 		[EXPR_COMPOUND_LITERAL]           = sizeof(compound_literal_expression_t),
 		[EXPR_CALL]                       = sizeof(call_expression_t),
 		[EXPR_UNARY_FIRST]                = sizeof(unary_expression_t),
@@ -1085,15 +1089,23 @@ static expression_t *parse_assignment_expression(void)
 	return parse_sub_expression(PREC_ASSIGNMENT);
 }
 
+static void warn_string_concat(const source_position_t *pos)
+{
+	if (warning.traditional) {
+		warningf(pos, "traditional C rejects string constant concatenation");
+	}
+}
+
 static string_t parse_string_literals(void)
 {
 	assert(token.type == T_STRING_LITERAL);
-	string_t result = token.v.string;
+	string_t result = token.literal;
 
 	next_token();
 
 	while (token.type == T_STRING_LITERAL) {
-		result = concat_strings(&result, &token.v.string);
+		warn_string_concat(&token.source_position);
+		result = concat_strings(&result, &token.literal);
 		next_token();
 	}
 
@@ -1161,7 +1173,7 @@ static attribute_argument_t *parse_attribute_arguments(void)
 		/* is it an identifier */
 		if (token.type == T_IDENTIFIER
 				&& (look_ahead(1)->type == ',' || look_ahead(1)->type == ')')) {
-			symbol_t *symbol   = token.v.symbol;
+			symbol_t *symbol   = token.symbol;
 			argument->kind     = ATTRIBUTE_ARGUMENT_SYMBOL;
 			argument->v.symbol = symbol;
 			next_token();
@@ -1204,7 +1216,7 @@ static symbol_t *get_symbol_from_token(void)
 {
 	switch(token.type) {
 	case T_IDENTIFIER:
-		return token.v.symbol;
+		return token.symbol;
 	case T_auto:
 	case T_char:
 	case T_double:
@@ -1570,11 +1582,9 @@ unary:
 			determine_lhs_ent(expr->va_starte.ap, lhs_ent);
 			return;
 
+		EXPR_LITERAL_CASES
 		case EXPR_UNKNOWN:
 		case EXPR_INVALID:
-		case EXPR_CONST:
-		case EXPR_CHARACTER_CONSTANT:
-		case EXPR_WIDE_CHARACTER_CONSTANT:
 		case EXPR_STRING_LITERAL:
 		case EXPR_WIDE_STRING_LITERAL:
 		case EXPR_COMPOUND_LITERAL: // TODO init?
@@ -1620,7 +1630,7 @@ static designator_t *parse_designation(void)
 				                     T_IDENTIFIER, NULL);
 				return NULL;
 			}
-			designator->symbol = token.v.symbol;
+			designator->symbol = token.symbol;
 			next_token();
 			break;
 		default:
@@ -1636,7 +1646,7 @@ end_error:
 	return NULL;
 }
 
-static initializer_t *initializer_from_string(array_type_t *type,
+static initializer_t *initializer_from_string(array_type_t *const type,
                                               const string_t *const string)
 {
 	/* TODO: check len vs. size of array type */
@@ -1649,7 +1659,7 @@ static initializer_t *initializer_from_string(array_type_t *type,
 }
 
 static initializer_t *initializer_from_wide_string(array_type_t *const type,
-                                                   wide_string_t *const string)
+                                                   const string_t *const string)
 {
 	/* TODO: check len vs. size of array type */
 	(void) type;
@@ -1673,6 +1683,7 @@ static initializer_t *initializer_from_expression(type_t *orig_type,
 	type_t *type           = skip_typeref(orig_type);
 	type_t *expr_type_orig = expression->base.type;
 	type_t *expr_type      = skip_typeref(expr_type_orig);
+
 	if (is_type_array(type) && expr_type->kind == TYPE_POINTER) {
 		array_type_t *const array_type   = &type->array;
 		type_t       *const element_type = skip_typeref(array_type->element_type);
@@ -1680,26 +1691,26 @@ static initializer_t *initializer_from_expression(type_t *orig_type,
 		if (element_type->kind == TYPE_ATOMIC) {
 			atomic_type_kind_t akind = element_type->atomic.akind;
 			switch (expression->kind) {
-				case EXPR_STRING_LITERAL:
-					if (akind == ATOMIC_TYPE_CHAR
-							|| akind == ATOMIC_TYPE_SCHAR
-							|| akind == ATOMIC_TYPE_UCHAR) {
-						return initializer_from_string(array_type,
-							&expression->string.value);
-					}
-					break;
+			case EXPR_STRING_LITERAL:
+				if (akind == ATOMIC_TYPE_CHAR
+						|| akind == ATOMIC_TYPE_SCHAR
+						|| akind == ATOMIC_TYPE_UCHAR) {
+					return initializer_from_string(array_type,
+							&expression->string_literal.value);
+				}
+				break;
 
-				case EXPR_WIDE_STRING_LITERAL: {
-					type_t *bare_wchar_type = skip_typeref(type_wchar_t);
-					if (get_unqualified_type(element_type) == bare_wchar_type) {
-						return initializer_from_wide_string(array_type,
-							&expression->wide_string.value);
-					}
-					break;
+			case EXPR_WIDE_STRING_LITERAL: {
+				type_t *bare_wchar_type = skip_typeref(type_wchar_t);
+				if (get_unqualified_type(element_type) == bare_wchar_type) {
+					return initializer_from_wide_string(array_type,
+							&expression->string_literal.value);
 				}
+				break;
+			}
 
-				default:
-					break;
+			default:
+				break;
 			}
 		}
 	}
@@ -2113,7 +2124,7 @@ static initializer_t *parse_sub_initializer(type_path_t *path,
 			/* GNU-style designator ("identifier: value") */
 			designator = allocate_ast_zero(sizeof(designator[0]));
 			designator->source_position = token.source_position;
-			designator->symbol          = token.v.symbol;
+			designator->symbol          = token.symbol;
 			eat(T_IDENTIFIER);
 			eat(':');
 
@@ -2292,6 +2303,18 @@ end_error:
 	return NULL;
 }
 
+static expression_t *make_size_literal(size_t value)
+{
+	expression_t *literal = allocate_ast_zero(EXPR_LITERAL_INTEGER);
+	literal->base.type    = type_size_t;
+
+	char buf[128];
+	snprintf(buf, sizeof(buf), "%u", (unsigned) value);
+	literal->literal.value = make_string(buf);
+
+	return literal;
+}
+
 /**
  * Parses an initializer. Parsers either a compound literal
  * (env->declaration == NULL) or an initializer of a declaration.
@@ -2357,13 +2380,9 @@ static initializer_t *parse_initializer(parse_initializer_env_t *env)
 			internal_errorf(HERE, "invalid initializer type");
 		}
 
-		expression_t *cnst       = allocate_expression_zero(EXPR_CONST);
-		cnst->base.type          = type_size_t;
-		cnst->conste.v.int_value = size;
-
 		type_t *new_type = duplicate_type(type);
 
-		new_type->array.size_expression   = cnst;
+		new_type->array.size_expression   = make_size_literal(size);
 		new_type->array.size_constant     = true;
 		new_type->array.has_implicit_size = true;
 		new_type->array.size              = size;
@@ -2402,7 +2421,7 @@ static compound_t *parse_compound_type_specifier(bool is_struct)
 	entity_kind_tag_t const kind = is_struct ? ENTITY_STRUCT : ENTITY_UNION;
 	if (token.type == T_IDENTIFIER) {
 		/* the compound has a name, check if we have seen it already */
-		symbol = token.v.symbol;
+		symbol = token.symbol;
 		next_token();
 
 		entity_t *entity = get_tag(symbol, kind);
@@ -2487,7 +2506,7 @@ static void parse_enum_entries(type_t *const enum_type)
 
 		entity_t *entity             = allocate_entity_zero(ENTITY_ENUM_VALUE);
 		entity->enum_value.enum_type = enum_type;
-		entity->base.symbol          = token.v.symbol;
+		entity->base.symbol          = token.symbol;
 		entity->base.source_position = token.source_position;
 		next_token();
 
@@ -2518,7 +2537,7 @@ static type_t *parse_enum_specifier(void)
 	eat(T_enum);
 	switch (token.type) {
 		case T_IDENTIFIER:
-			symbol = token.v.symbol;
+			symbol = token.symbol;
 			next_token();
 
 			entity = get_tag(symbol, ENTITY_ENUM);
@@ -2611,7 +2630,7 @@ static type_t *parse_typeof(void)
 	}
 	switch (token.type) {
 	case T_IDENTIFIER:
-		if (is_typedef_symbol(token.v.symbol)) {
+		if (is_typedef_symbol(token.symbol)) {
 			type = parse_typename();
 		} else {
 			expression = parse_expression();
@@ -2701,7 +2720,7 @@ static attribute_t *parse_attribute_ms_property(attribute_t *attribute)
 		}
 
 		bool is_put;
-		symbol_t *symbol = token.v.symbol;
+		symbol_t *symbol = token.symbol;
 		next_token();
 		if (strcmp(symbol->string, "put") == 0) {
 			is_put = true;
@@ -2718,9 +2737,9 @@ static attribute_t *parse_attribute_ms_property(attribute_t *attribute)
 			goto end_error;
 		}
 		if (is_put) {
-			property->put_symbol = token.v.symbol;
+			property->put_symbol = token.symbol;
 		} else {
-			property->get_symbol = token.v.symbol;
+			property->get_symbol = token.symbol;
 		}
 		next_token();
 	} while (next_if(','));
@@ -2739,7 +2758,7 @@ static attribute_t *parse_microsoft_extended_decl_modifier_single(void)
 	if (next_if(T_restrict)) {
 		kind = ATTRIBUTE_MS_RESTRICT;
 	} else if (token.type == T_IDENTIFIER) {
-		const char *name = token.v.symbol->string;
+		const char *name = token.symbol->string;
 		next_token();
 		for (attribute_kind_t k = ATTRIBUTE_MS_FIRST; k <= ATTRIBUTE_MS_LAST;
 		     ++k) {
@@ -3013,7 +3032,7 @@ wrong_thread_stoarge_class:
 				}
 			}
 
-			type_t *const typedef_type = get_typedef_type(token.v.symbol);
+			type_t *const typedef_type = get_typedef_type(token.symbol);
 			if (typedef_type == NULL) {
 				/* Be somewhat resilient to typos like 'vodi f()' at the beginning of a
 				 * declaration, so it doesn't generate 'implicit int' followed by more
@@ -3027,7 +3046,7 @@ wrong_thread_stoarge_class:
 						errorf(HERE, "%K does not name a type", &token);
 
 						entity_t *entity =
-							create_error_entity(token.v.symbol, ENTITY_TYPEDEF);
+							create_error_entity(token.symbol, ENTITY_TYPEDEF);
 
 						type = allocate_type_zero(TYPE_TYPEDEF);
 						type->typedeft.typedefe = &entity->typedefe;
@@ -3290,7 +3309,7 @@ static void parse_identifier_list(scope_t *scope)
 		entity_t *entity = allocate_entity_zero(ENTITY_PARAMETER);
 		entity->base.source_position = token.source_position;
 		entity->base.namespc         = NAMESPACE_NORMAL;
-		entity->base.symbol          = token.v.symbol;
+		entity->base.symbol          = token.symbol;
 		/* a K&R parameter has no type, yet */
 		next_token();
 
@@ -3332,7 +3351,7 @@ static bool has_parameters(void)
 {
 	/* func(void) is not a parameter */
 	if (token.type == T_IDENTIFIER) {
-		entity_t const *const entity = get_entity(token.v.symbol, NAMESPACE_NORMAL);
+		entity_t const *const entity = get_entity(token.symbol, NAMESPACE_NORMAL);
 		if (entity == NULL)
 			return true;
 		if (entity->kind != ENTITY_TYPEDEF)
@@ -3359,7 +3378,7 @@ static void parse_parameters(function_type_t *type, scope_t *scope)
 	int saved_comma_state = save_and_reset_anchor_state(',');
 
 	if (token.type == T_IDENTIFIER &&
-	    !is_typedef_symbol(token.v.symbol)) {
+	    !is_typedef_symbol(token.symbol)) {
 		token_type_t la1_type = (token_type_t)look_ahead(1)->type;
 		if (la1_type == ',' || la1_type == ')') {
 			type->kr_style_parameters = true;
@@ -3617,7 +3636,7 @@ ptr_operator_end: ;
 		if (env->must_be_abstract) {
 			errorf(HERE, "no identifier expected in typename");
 		} else {
-			env->symbol          = token.v.symbol;
+			env->symbol          = token.symbol;
 			env->source_position = token.source_position;
 		}
 		next_token();
@@ -4378,7 +4397,7 @@ static bool is_declaration_specifier(const token_t *token,
 		TYPE_QUALIFIERS
 			return true;
 		case T_IDENTIFIER:
-			return is_typedef_symbol(token->v.symbol);
+			return is_typedef_symbol(token->symbol);
 
 		case T___extension__:
 		STORAGE_CLASSES
@@ -4890,9 +4909,7 @@ static bool expression_returns(expression_t const *const expr)
 
 		case EXPR_REFERENCE:
 		case EXPR_REFERENCE_ENUM_VALUE:
-		case EXPR_CONST:
-		case EXPR_CHARACTER_CONSTANT:
-		case EXPR_WIDE_CHARACTER_CONSTANT:
+		EXPR_LITERAL_CASES
 		case EXPR_STRING_LITERAL:
 		case EXPR_WIDE_STRING_LITERAL:
 		case EXPR_COMPOUND_LITERAL: // TODO descend into initialisers
@@ -5958,82 +5975,181 @@ static expression_t *expected_expression_error(void)
 	return create_invalid_expression();
 }
 
+static type_t *get_string_type(void)
+{
+	return warning.write_strings ? type_const_char_ptr : type_char_ptr;
+}
+
+static type_t *get_wide_string_type(void)
+{
+	return warning.write_strings ? type_const_wchar_t_ptr : type_wchar_t_ptr;
+}
+
 /**
  * Parse a string constant.
  */
-static expression_t *parse_string_const(void)
+static expression_t *parse_string_literal(void)
 {
-	wide_string_t wres;
-	if (token.type == T_STRING_LITERAL) {
-		string_t res = token.v.string;
+	source_position_t begin   = token.source_position;
+	string_t          res     = token.literal;
+	bool              is_wide = (token.type == T_WIDE_STRING_LITERAL);
+
+	next_token();
+	while (token.type == T_STRING_LITERAL
+			|| token.type == T_WIDE_STRING_LITERAL) {
+		warn_string_concat(&token.source_position);
+		res = concat_strings(&res, &token.literal);
 		next_token();
-		while (token.type == T_STRING_LITERAL) {
-			res = concat_strings(&res, &token.v.string);
-			next_token();
-		}
-		if (token.type != T_WIDE_STRING_LITERAL) {
-			expression_t *const cnst = allocate_expression_zero(EXPR_STRING_LITERAL);
-			/* note: that we use type_char_ptr here, which is already the
-			 * automatic converted type. revert_automatic_type_conversion
-			 * will construct the array type */
-			cnst->base.type    = warning.write_strings ? type_const_char_ptr : type_char_ptr;
-			cnst->string.value = res;
-			return cnst;
-		}
+		is_wide |= token.type == T_WIDE_STRING_LITERAL;
+	}
 
-		wres = concat_string_wide_string(&res, &token.v.wide_string);
+	expression_t *literal;
+	if (is_wide) {
+		literal = allocate_expression_zero(EXPR_WIDE_STRING_LITERAL);
+		literal->base.type = get_wide_string_type();
 	} else {
-		wres = token.v.wide_string;
+		literal = allocate_expression_zero(EXPR_STRING_LITERAL);
+		literal->base.type = get_string_type();
 	}
+	literal->base.source_position = begin;
+	literal->literal.value        = res;
+
+	return literal;
+}
+
+/**
+ * Parse a boolean constant.
+ */
+static expression_t *parse_boolean_literal(bool value)
+{
+	expression_t *literal = allocate_expression_zero(EXPR_LITERAL_BOOLEAN);
+	literal->base.source_position = token.source_position;
+	literal->base.type            = type_bool;
+	literal->literal.value.begin  = value ? "true" : "false";
+	literal->literal.value.size   = value ? 4 : 5;
+
 	next_token();
+	return literal;
+}
 
-	for (;;) {
-		switch (token.type) {
-			case T_WIDE_STRING_LITERAL:
-				wres = concat_wide_strings(&wres, &token.v.wide_string);
-				break;
+static void warn_traditional_suffix(void)
+{
+	if (!warning.traditional)
+		return;
+	warningf(&token.source_position, "traditional C rejects the '%Y' suffix",
+	         token.symbol);
+}
 
-			case T_STRING_LITERAL:
-				wres = concat_wide_string_string(&wres, &token.v.string);
-				break;
+static void check_integer_suffix(void)
+{
+	symbol_t *suffix = token.symbol;
+	if (suffix == NULL)
+		return;
 
-			default: {
-				expression_t *const cnst = allocate_expression_zero(EXPR_WIDE_STRING_LITERAL);
-				cnst->base.type         = warning.write_strings ? type_const_wchar_t_ptr : type_wchar_t_ptr;
-				cnst->wide_string.value = wres;
-				return cnst;
+	bool not_traditional = false;
+	const char *c = suffix->string;
+	if (*c == 'l' || *c == 'L') {
+		++c;
+		if (*c == *(c-1)) {
+			not_traditional = true;
+			++c;
+			if (*c == 'u' || *c == 'U') {
+				++c;
+			}
+		} else if (*c == 'u' || *c == 'U') {
+			not_traditional = true;
+			++c;
+		}
+	} else if (*c == 'u' || *c == 'U') {
+		not_traditional = true;
+		++c;
+		if (*c == 'l' || *c == 'L') {
+			++c;
+			if (*c == *(c-1)) {
+				++c;
 			}
 		}
-		next_token();
+	}
+	if (*c != '\0') {
+		errorf(&token.source_position,
+		       "invalid suffix '%s' on integer constant", suffix->string);
+	} else if (not_traditional) {
+		warn_traditional_suffix();
 	}
 }
 
-/**
- * Parse a boolean constant.
- */
-static expression_t *parse_bool_const(bool value)
+static type_t *check_floatingpoint_suffix(void)
 {
-	expression_t *cnst       = allocate_expression_zero(EXPR_CONST);
-	cnst->base.type          = type_bool;
-	cnst->conste.v.int_value = value;
+	symbol_t *suffix = token.symbol;
+	type_t   *type   = type_double;
+	if (suffix == NULL)
+		return type;
 
-	next_token();
+	bool not_traditional = false;
+	const char *c = suffix->string;
+	if (*c == 'f' || *c == 'F') {
+		++c;
+		type = type_float;
+	} else if (*c == 'l' || *c == 'L') {
+		++c;
+		type = type_long_double;
+	}
+	if (*c != '\0') {
+		errorf(&token.source_position,
+		       "invalid suffix '%s' on floatingpoint constant", suffix->string);
+	} else if (not_traditional) {
+		warn_traditional_suffix();
+	}
 
-	return cnst;
+	return type;
 }
 
 /**
  * Parse an integer constant.
  */
-static expression_t *parse_int_const(void)
+static expression_t *parse_number_literal(void)
 {
-	expression_t *cnst       = allocate_expression_zero(EXPR_CONST);
-	cnst->base.type          = token.datatype;
-	cnst->conste.v.int_value = token.v.intvalue;
+	expression_kind_t  kind;
+	type_t            *type;
+
+	switch (token.type) {
+	case T_INTEGER:
+		kind = EXPR_LITERAL_INTEGER;
+		check_integer_suffix();
+		break;
+	case T_INTEGER_OCTAL:
+		kind = EXPR_LITERAL_INTEGER_OCTAL;
+		check_integer_suffix();
+		break;
+	case T_INTEGER_HEXADECIMAL:
+		kind = EXPR_LITERAL_INTEGER_HEXADECIMAL;
+		check_integer_suffix();
+		break;
+	case T_FLOATINGPOINT:
+		kind = EXPR_LITERAL_FLOATINGPOINT;
+		type = check_floatingpoint_suffix();
+		break;
+	case T_FLOATINGPOINT_HEXADECIMAL:
+		kind = EXPR_LITERAL_FLOATINGPOINT_HEXADECIMAL;
+		type = check_floatingpoint_suffix();
+		break;
+	default:
+		panic("unexpected token type in parse_number_literal");
+	}
 
+	expression_t *literal = allocate_expression_zero(kind);
+	literal->base.source_position = token.source_position;
+	literal->base.type            = type;
+	literal->literal.value        = token.literal;
+	literal->literal.suffix       = token.symbol;
 	next_token();
 
-	return cnst;
+	/* integer type depends on the size of the number and the size
+	 * representable by the types. The backend/codegeneration has to determine
+	 * that
+	 */
+	determine_literal_type(&literal->literal);
+	return literal;
 }
 
 /**
@@ -6041,20 +6157,23 @@ static expression_t *parse_int_const(void)
  */
 static expression_t *parse_character_constant(void)
 {
-	expression_t *cnst = allocate_expression_zero(EXPR_CHARACTER_CONSTANT);
-	cnst->base.type          = token.datatype;
-	cnst->conste.v.character = token.v.string;
+	expression_t *literal = allocate_expression_zero(EXPR_LITERAL_CHARACTER);
+	literal->base.source_position = token.source_position;
+	literal->base.type            = c_mode & _CXX ? type_char : type_int;
+	literal->literal.value        = token.literal;
 
-	if (cnst->conste.v.character.size != 1) {
-		if (!GNU_MODE) {
+	size_t len = literal->literal.value.size;
+	if (len != 1) {
+		if (!GNU_MODE && !(c_mode & _C99)) {
 			errorf(HERE, "more than 1 character in character constant");
 		} else if (warning.multichar) {
+			literal->base.type = type_int;
 			warningf(HERE, "multi-character character constant");
 		}
 	}
-	next_token();
 
-	return cnst;
+	next_token();
+	return literal;
 }
 
 /**
@@ -6062,34 +6181,18 @@ static expression_t *parse_character_constant(void)
  */
 static expression_t *parse_wide_character_constant(void)
 {
-	expression_t *cnst = allocate_expression_zero(EXPR_WIDE_CHARACTER_CONSTANT);
-	cnst->base.type               = token.datatype;
-	cnst->conste.v.wide_character = token.v.wide_string;
+	expression_t *literal = allocate_expression_zero(EXPR_LITERAL_WIDE_CHARACTER);
+	literal->base.source_position = token.source_position;
+	literal->base.type            = type_int;
+	literal->literal.value        = token.literal;
 
-	if (cnst->conste.v.wide_character.size != 1) {
-		if (!GNU_MODE) {
-			errorf(HERE, "more than 1 character in character constant");
-		} else if (warning.multichar) {
-			warningf(HERE, "multi-character character constant");
-		}
+	size_t len = wstrlen(&literal->literal.value);
+	if (len != 1) {
+		warningf(HERE, "multi-character character constant");
 	}
-	next_token();
-
-	return cnst;
-}
-
-/**
- * Parse a float constant.
- */
-static expression_t *parse_float_const(void)
-{
-	expression_t *cnst         = allocate_expression_zero(EXPR_CONST);
-	cnst->base.type            = token.datatype;
-	cnst->conste.v.float_value = token.v.floatvalue;
 
 	next_token();
-
-	return cnst;
+	return literal;
 }
 
 static entity_t *create_implicit_function(symbol_t *symbol,
@@ -6149,57 +6252,58 @@ static type_t *automatic_type_conversion(type_t *orig_type)
 type_t *revert_automatic_type_conversion(const expression_t *expression)
 {
 	switch (expression->kind) {
-		case EXPR_REFERENCE: {
-			entity_t *entity = expression->reference.entity;
-			if (is_declaration(entity)) {
-				return entity->declaration.type;
-			} else if (entity->kind == ENTITY_ENUM_VALUE) {
-				return entity->enum_value.enum_type;
-			} else {
-				panic("no declaration or enum in reference");
-			}
+	case EXPR_REFERENCE: {
+		entity_t *entity = expression->reference.entity;
+		if (is_declaration(entity)) {
+			return entity->declaration.type;
+		} else if (entity->kind == ENTITY_ENUM_VALUE) {
+			return entity->enum_value.enum_type;
+		} else {
+			panic("no declaration or enum in reference");
 		}
+	}
 
-		case EXPR_SELECT: {
-			entity_t *entity = expression->select.compound_entry;
-			assert(is_declaration(entity));
-			type_t   *type   = entity->declaration.type;
-			return get_qualified_type(type,
-					expression->base.type->base.qualifiers);
-		}
+	case EXPR_SELECT: {
+		entity_t *entity = expression->select.compound_entry;
+		assert(is_declaration(entity));
+		type_t   *type   = entity->declaration.type;
+		return get_qualified_type(type,
+				expression->base.type->base.qualifiers);
+	}
 
-		case EXPR_UNARY_DEREFERENCE: {
-			const expression_t *const value = expression->unary.value;
-			type_t             *const type  = skip_typeref(value->base.type);
-			if (!is_type_pointer(type))
-				return type_error_type;
-			return type->pointer.points_to;
-		}
+	case EXPR_UNARY_DEREFERENCE: {
+		const expression_t *const value = expression->unary.value;
+		type_t             *const type  = skip_typeref(value->base.type);
+		if (!is_type_pointer(type))
+			return type_error_type;
+		return type->pointer.points_to;
+	}
 
-		case EXPR_ARRAY_ACCESS: {
-			const expression_t *array_ref = expression->array_access.array_ref;
-			type_t             *type_left = skip_typeref(array_ref->base.type);
-			if (!is_type_pointer(type_left))
-				return type_error_type;
-			return type_left->pointer.points_to;
-		}
+	case EXPR_ARRAY_ACCESS: {
+		const expression_t *array_ref = expression->array_access.array_ref;
+		type_t             *type_left = skip_typeref(array_ref->base.type);
+		if (!is_type_pointer(type_left))
+			return type_error_type;
+		return type_left->pointer.points_to;
+	}
 
-		case EXPR_STRING_LITERAL: {
-			size_t size = expression->string.value.size;
-			return make_array_type(type_char, size, TYPE_QUALIFIER_NONE);
-		}
+	case EXPR_STRING_LITERAL: {
+		size_t size = expression->string_literal.value.size;
+		return make_array_type(type_char, size, TYPE_QUALIFIER_NONE);
+	}
 
-		case EXPR_WIDE_STRING_LITERAL: {
-			size_t size = expression->wide_string.value.size;
-			return make_array_type(type_wchar_t, size, TYPE_QUALIFIER_NONE);
-		}
+	case EXPR_WIDE_STRING_LITERAL: {
+		size_t size = wstrlen(&expression->string_literal.value);
+		return make_array_type(type_wchar_t, size, TYPE_QUALIFIER_NONE);
+	}
 
-		case EXPR_COMPOUND_LITERAL:
-			return expression->compound_literal.type;
+	case EXPR_COMPOUND_LITERAL:
+		return expression->compound_literal.type;
 
-		default:
-			return expression->base.type;
+	default:
+		break;
 	}
+	return expression->base.type;
 }
 
 /**
@@ -6240,7 +6344,7 @@ static entity_t *parse_qualified_identifier(void)
 			parse_error_expected("while parsing identifier", T_IDENTIFIER, NULL);
 			return create_error_entity(sym_anonymous, ENTITY_VARIABLE);
 		}
-		symbol = token.v.symbol;
+		symbol = token.symbol;
 		pos    = *HERE;
 		next_token();
 
@@ -6497,7 +6601,7 @@ static expression_t *parse_parenthesized_expression(void)
 	TYPE_SPECIFIERS
 		return parse_cast();
 	case T_IDENTIFIER:
-		if (is_typedef_symbol(token.v.symbol)) {
+		if (is_typedef_symbol(token.symbol)) {
 			return parse_cast();
 		}
 	}
@@ -6584,7 +6688,7 @@ static designator_t *parse_designator(void)
 		                     T_IDENTIFIER, NULL);
 		return NULL;
 	}
-	result->symbol = token.v.symbol;
+	result->symbol = token.symbol;
 	next_token();
 
 	designator_t *last_designator = result;
@@ -6597,7 +6701,7 @@ static designator_t *parse_designator(void)
 			}
 			designator_t *designator    = allocate_ast_zero(sizeof(result[0]));
 			designator->source_position = *HERE;
-			designator->symbol          = token.v.symbol;
+			designator->symbol          = token.symbol;
 			next_token();
 
 			last_designator->next = designator;
@@ -6930,7 +7034,7 @@ static expression_t *parse_label_address(void)
 		parse_error_expected("while parsing label address", T_IDENTIFIER, NULL);
 		goto end_error;
 	}
-	symbol_t *symbol = token.v.symbol;
+	symbol_t *symbol = token.symbol;
 	next_token();
 
 	label_t *label       = get_label(symbol);
@@ -6954,10 +7058,11 @@ end_error:
 static expression_t *parse_noop_expression(void)
 {
 	/* the result is a (int)0 */
-	expression_t *cnst         = allocate_expression_zero(EXPR_CONST);
-	cnst->base.type            = type_int;
-	cnst->conste.v.int_value   = 0;
-	cnst->conste.is_ms_noop    = true;
+	expression_t *literal = allocate_expression_zero(EXPR_LITERAL_MS_NOOP);
+	literal->base.type            = type_int;
+	literal->base.source_position = token.source_position;
+	literal->literal.value.begin  = "__noop";
+	literal->literal.value.size   = 6;
 
 	eat(T___noop);
 
@@ -6976,7 +7081,7 @@ static expression_t *parse_noop_expression(void)
 	expect(')', end_error);
 
 end_error:
-	return cnst;
+	return literal;
 }
 
 /**
@@ -6985,54 +7090,57 @@ end_error:
 static expression_t *parse_primary_expression(void)
 {
 	switch (token.type) {
-		case T_false:                        return parse_bool_const(false);
-		case T_true:                         return parse_bool_const(true);
-		case T_INTEGER:                      return parse_int_const();
-		case T_CHARACTER_CONSTANT:           return parse_character_constant();
-		case T_WIDE_CHARACTER_CONSTANT:      return parse_wide_character_constant();
-		case T_FLOATINGPOINT:                return parse_float_const();
-		case T_STRING_LITERAL:
-		case T_WIDE_STRING_LITERAL:          return parse_string_const();
-		case T___FUNCTION__:
-		case T___func__:                     return parse_function_keyword();
-		case T___PRETTY_FUNCTION__:          return parse_pretty_function_keyword();
-		case T___FUNCSIG__:                  return parse_funcsig_keyword();
-		case T___FUNCDNAME__:                return parse_funcdname_keyword();
-		case T___builtin_offsetof:           return parse_offsetof();
-		case T___builtin_va_start:           return parse_va_start();
-		case T___builtin_va_arg:             return parse_va_arg();
-		case T___builtin_va_copy:            return parse_va_copy();
-		case T___builtin_isgreater:
-		case T___builtin_isgreaterequal:
-		case T___builtin_isless:
-		case T___builtin_islessequal:
-		case T___builtin_islessgreater:
-		case T___builtin_isunordered:        return parse_compare_builtin();
-		case T___builtin_constant_p:         return parse_builtin_constant();
-		case T___builtin_types_compatible_p: return parse_builtin_types_compatible();
-		case T__assume:                      return parse_assume();
-		case T_ANDAND:
-			if (GNU_MODE)
-				return parse_label_address();
-			break;
+	case T_false:                        return parse_boolean_literal(false);
+	case T_true:                         return parse_boolean_literal(true);
+	case T_INTEGER:
+	case T_INTEGER_OCTAL:
+	case T_INTEGER_HEXADECIMAL:
+	case T_FLOATINGPOINT:
+	case T_FLOATINGPOINT_HEXADECIMAL:    return parse_number_literal();
+	case T_CHARACTER_CONSTANT:           return parse_character_constant();
+	case T_WIDE_CHARACTER_CONSTANT:      return parse_wide_character_constant();
+	case T_STRING_LITERAL:
+	case T_WIDE_STRING_LITERAL:          return parse_string_literal();
+	case T___FUNCTION__:
+	case T___func__:                     return parse_function_keyword();
+	case T___PRETTY_FUNCTION__:          return parse_pretty_function_keyword();
+	case T___FUNCSIG__:                  return parse_funcsig_keyword();
+	case T___FUNCDNAME__:                return parse_funcdname_keyword();
+	case T___builtin_offsetof:           return parse_offsetof();
+	case T___builtin_va_start:           return parse_va_start();
+	case T___builtin_va_arg:             return parse_va_arg();
+	case T___builtin_va_copy:            return parse_va_copy();
+	case T___builtin_isgreater:
+	case T___builtin_isgreaterequal:
+	case T___builtin_isless:
+	case T___builtin_islessequal:
+	case T___builtin_islessgreater:
+	case T___builtin_isunordered:        return parse_compare_builtin();
+	case T___builtin_constant_p:         return parse_builtin_constant();
+	case T___builtin_types_compatible_p: return parse_builtin_types_compatible();
+	case T__assume:                      return parse_assume();
+	case T_ANDAND:
+		if (GNU_MODE)
+			return parse_label_address();
+		break;
 
-		case '(':                            return parse_parenthesized_expression();
-		case T___noop:                       return parse_noop_expression();
+	case '(':                            return parse_parenthesized_expression();
+	case T___noop:                       return parse_noop_expression();
 
-		/* Gracefully handle type names while parsing expressions. */
-		case T_COLONCOLON:
+	/* Gracefully handle type names while parsing expressions. */
+	case T_COLONCOLON:
+		return parse_reference();
+	case T_IDENTIFIER:
+		if (!is_typedef_symbol(token.symbol)) {
 			return parse_reference();
-		case T_IDENTIFIER:
-			if (!is_typedef_symbol(token.v.symbol)) {
-				return parse_reference();
-			}
-			/* FALLTHROUGH */
-		TYPENAME_START {
-			source_position_t  const pos  = *HERE;
-			type_t const      *const type = parse_typename();
-			errorf(&pos, "encountered type '%T' while parsing expression", type);
-			return create_invalid_expression();
 		}
+		/* FALLTHROUGH */
+	TYPENAME_START {
+		source_position_t  const pos  = *HERE;
+		type_t const      *const type = parse_typename();
+		errorf(&pos, "encountered type '%T' while parsing expression", type);
+		return create_invalid_expression();
+	}
 	}
 
 	errorf(HERE, "unexpected token %K, expected an expression", &token);
@@ -7177,7 +7285,7 @@ static expression_t *parse_select_expression(expression_t *addr)
 		parse_error_expected("while parsing select", T_IDENTIFIER, NULL);
 		return create_invalid_expression();
 	}
-	symbol_t *symbol = token.v.symbol;
+	symbol_t *symbol = token.symbol;
 	next_token();
 
 	type_t *const orig_type = addr->base.type;
@@ -8277,8 +8385,8 @@ static void warn_string_literal_address(expression_t const* expr)
 		expr = expr->unary.value;
 	}
 
-	if (expr->kind == EXPR_STRING_LITERAL ||
-	    expr->kind == EXPR_WIDE_STRING_LITERAL) {
+	if (expr->kind == EXPR_STRING_LITERAL
+			|| expr->kind == EXPR_WIDE_STRING_LITERAL) {
 		warningf(&expr->base.source_position,
 			"comparison with string literal results in unspecified behaviour");
 	}
@@ -8628,13 +8736,20 @@ static bool expression_has_effect(const expression_t *const expr)
 		case EXPR_INVALID:                    return true; /* do NOT warn */
 		case EXPR_REFERENCE:                  return false;
 		case EXPR_REFERENCE_ENUM_VALUE:       return false;
+		case EXPR_LABEL_ADDRESS:              return false;
+
 		/* suppress the warning for microsoft __noop operations */
-		case EXPR_CONST:                      return expr->conste.is_ms_noop;
-		case EXPR_CHARACTER_CONSTANT:         return false;
-		case EXPR_WIDE_CHARACTER_CONSTANT:    return false;
+		case EXPR_LITERAL_MS_NOOP:            return true;
+		case EXPR_LITERAL_BOOLEAN:
+		case EXPR_LITERAL_CHARACTER:
+		case EXPR_LITERAL_WIDE_CHARACTER:
+		case EXPR_LITERAL_INTEGER:
+		case EXPR_LITERAL_INTEGER_OCTAL:
+		case EXPR_LITERAL_INTEGER_HEXADECIMAL:
+		case EXPR_LITERAL_FLOATINGPOINT:
+		case EXPR_LITERAL_FLOATINGPOINT_HEXADECIMAL: return false;
 		case EXPR_STRING_LITERAL:             return false;
 		case EXPR_WIDE_STRING_LITERAL:        return false;
-		case EXPR_LABEL_ADDRESS:              return false;
 
 		case EXPR_CALL: {
 			const call_expression_t *const call = &expr->call;
@@ -8974,7 +9089,7 @@ static asm_argument_t *parse_asm_arguments(bool is_out)
 				                     T_IDENTIFIER, NULL);
 				return NULL;
 			}
-			argument->symbol = token.v.symbol;
+			argument->symbol = token.symbol;
 
 			expect(']', end_error);
 		}
@@ -9276,7 +9391,7 @@ end_error:
 static statement_t *parse_label_statement(void)
 {
 	assert(token.type == T_IDENTIFIER);
-	symbol_t *symbol = token.v.symbol;
+	symbol_t *symbol = token.symbol;
 	label_t  *label  = get_label(symbol);
 
 	statement_t *const statement = allocate_statement_zero(STATEMENT_LABEL);
@@ -9658,7 +9773,7 @@ static statement_t *parse_goto(void)
 
 		statement->gotos.expression = expression;
 	} else if (token.type == T_IDENTIFIER) {
-		symbol_t *symbol = token.v.symbol;
+		symbol_t *symbol = token.symbol;
 		next_token();
 		statement->gotos.label = get_label(symbol);
 	} else {
@@ -9972,7 +10087,7 @@ static statement_t *parse_local_label_declaration(void)
 				T_IDENTIFIER, NULL);
 			goto end_error;
 		}
-		symbol_t *symbol = token.v.symbol;
+		symbol_t *symbol = token.symbol;
 		entity_t *entity = get_entity(symbol, NAMESPACE_LABEL);
 		if (entity != NULL && entity->base.parent_scope == current_scope) {
 			errorf(HERE, "multiple definitions of '__label__ %Y' (previous definition %P)",
@@ -10010,7 +10125,7 @@ static void parse_namespace_definition(void)
 	symbol_t *symbol = NULL;
 
 	if (token.type == T_IDENTIFIER) {
-		symbol = token.v.symbol;
+		symbol = token.symbol;
 		next_token();
 
 		entity = get_entity(symbol, NAMESPACE_NORMAL);
@@ -10075,7 +10190,7 @@ static statement_t *intern_parse_statement(void)
 		token_type_t la1_type = (token_type_t)look_ahead(1)->type;
 		if (la1_type == ':') {
 			statement = parse_label_statement();
-		} else if (is_typedef_symbol(token.v.symbol)) {
+		} else if (is_typedef_symbol(token.symbol)) {
 			statement = parse_declaration_statement();
 		} else {
 			/* it's an identifier, the grammar says this must be an
@@ -10085,7 +10200,7 @@ static statement_t *intern_parse_statement(void)
 			switch (la1_type) {
 			case '&':
 			case '*':
-				if (get_entity(token.v.symbol, NAMESPACE_NORMAL) != NULL)
+				if (get_entity(token.symbol, NAMESPACE_NORMAL) != NULL)
 					goto expression_statment;
 				/* FALLTHROUGH */
 
diff --git a/preprocessor.c b/preprocessor.c
index 570182c..9c4dd66 100644
--- a/preprocessor.c
+++ b/preprocessor.c
@@ -484,9 +484,9 @@ end_of_string:
 	const char *const result = string;
 #endif
 
-	pp_token.type           = TP_STRING_LITERAL;
-	pp_token.v.string.begin = result;
-	pp_token.v.string.size  = size;
+	pp_token.type          = TP_STRING_LITERAL;
+	pp_token.literal.begin = result;
+	pp_token.literal.size  = size;
 }
 
 static void parse_wide_character_constant(void)
@@ -532,67 +532,6 @@ end_of_wide_char_constant:
 	/* TODO... */
 }
 
-static void parse_wide_string_literal(void)
-{
-	const unsigned start_linenr = input.position.linenr;
-
-	assert(CC == '"');
-	next_char();
-
-	while(1) {
-		switch(CC) {
-		case '\\': {
-			wchar_rep_t tc = parse_escape_sequence();
-			obstack_grow(&symbol_obstack, &tc, sizeof(tc));
-			break;
-		}
-
-		case EOF: {
-			source_position_t source_position;
-			source_position.input_name = pp_token.source_position.input_name;
-			source_position.linenr     = start_linenr;
-			errorf(&source_position, "string has no end");
-			pp_token.type = TP_ERROR;
-			return;
-		}
-
-		case '"':
-			next_char();
-			goto end_of_string;
-
-		default: {
-			wchar_rep_t tc = CC;
-			obstack_grow(&symbol_obstack, &tc, sizeof(tc));
-			next_char();
-			break;
-		}
-		}
-	}
-
-end_of_string:;
-	/* add finishing 0 to the string */
-	static const wchar_rep_t nul = L'\0';
-	obstack_grow(&symbol_obstack, &nul, sizeof(nul));
-
-	const size_t size
-		= (size_t)obstack_object_size(&symbol_obstack) / sizeof(wchar_rep_t);
-	const wchar_rep_t *const string = obstack_finish(&symbol_obstack);
-
-#if 0 /* TODO hash */
-	/* check if there is already a copy of the string */
-	const wchar_rep_t *const result = strset_insert(&stringset, string);
-	if(result != string) {
-		obstack_free(&symbol_obstack, string);
-	}
-#else
-	const wchar_rep_t *const result = string;
-#endif
-
-	pp_token.type                = TP_WIDE_STRING_LITERAL;
-	pp_token.v.wide_string.begin = result;
-	pp_token.v.wide_string.size  = size;
-}
-
 static void parse_character_constant(void)
 {
 	const unsigned start_linenr = input.position.linenr;
@@ -637,9 +576,9 @@ end_of_char_constant:;
 	const size_t      size   = (size_t)obstack_object_size(&symbol_obstack);
 	const char *const string = obstack_finish(&symbol_obstack);
 
-	pp_token.type           = TP_CHARACTER_CONSTANT;
-	pp_token.v.string.begin = string;
-	pp_token.v.string.size  = size;
+	pp_token.type          = TP_CHARACTER_CONSTANT;
+	pp_token.literal.begin = string;
+	pp_token.literal.size  = size;
 }
 
 #define SYMBOL_CHARS_WITHOUT_E_P \
@@ -747,7 +686,7 @@ restart:
 		return;
 
 	/* if it was an identifier then we might need to expand again */
-	pp_definition_t *symbol_definition = pp_token.v.symbol->pp_definition;
+	pp_definition_t *symbol_definition = pp_token.symbol->pp_definition;
 	if(symbol_definition != NULL && !symbol_definition->is_expanding) {
 		symbol_definition->parent_expansion = definition;
 		symbol_definition->expand_pos       = 0;
@@ -912,7 +851,7 @@ end_symbol:
 	/* might be a wide string or character constant ( L"string"/L'c' ) */
 	if (CC == '"' && string[0] == 'L' && string[1] == '\0') {
 		obstack_free(&symbol_obstack, string);
-		parse_wide_string_literal();
+		/* TODO */
 		return;
 	} else if (CC == '\'' && string[0] == 'L' && string[1] == '\0') {
 		obstack_free(&symbol_obstack, string);
@@ -922,8 +861,8 @@ end_symbol:
 
 	symbol_t *symbol = symbol_table_insert(string);
 
-	pp_token.type     = symbol->pp_ID;
-	pp_token.v.symbol = symbol;
+	pp_token.type   = symbol->pp_ID;
+	pp_token.symbol = symbol;
 
 	/* we can free the memory from symbol obstack if we already had an entry in
 	 * the symbol table */
@@ -993,13 +932,12 @@ end_number:
 	size_t  size   = obstack_object_size(&symbol_obstack);
 	char   *string = obstack_finish(&symbol_obstack);
 
-	pp_token.type           = TP_NUMBER;
-	pp_token.v.string.begin = string;
-	pp_token.v.string.size  = size;
+	pp_token.type          = TP_NUMBER;
+	pp_token.literal.begin = string;
+	pp_token.literal.size  = size;
 }
 
 
-
 #define MAYBE_PROLOG                                       \
 			next_char();                                   \
 			while(1) {                                     \
@@ -1292,14 +1230,14 @@ static void emit_pp_token(void)
 
 	switch(pp_token.type) {
 	case TP_IDENTIFIER:
-		fputs(pp_token.v.symbol->string, out);
+		fputs(pp_token.symbol->string, out);
 		break;
 	case TP_NUMBER:
-		fputs(pp_token.v.string.begin, out);
+		fputs(pp_token.literal.begin, out);
 		break;
 	case TP_STRING_LITERAL:
 		fputc('"', out);
-		fputs(pp_token.v.string.begin, out);
+		fputs(pp_token.literal.begin, out);
 		fputc('"', out);
 		break;
 	case '\n':
@@ -1332,22 +1270,6 @@ static bool strings_equal(const string_t *string1, const string_t *string2)
 	return true;
 }
 
-static bool wide_strings_equal(const wide_string_t *string1,
-                               const wide_string_t *string2)
-{
-	size_t size = string1->size;
-	if(size != string2->size)
-		return false;
-
-	const wchar_rep_t *c1 = string1->begin;
-	const wchar_rep_t *c2 = string2->begin;
-	for(size_t i = 0; i < size; ++i, ++c1, ++c2) {
-		if(*c1 != *c2)
-			return false;
-	}
-	return true;
-}
-
 static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
 {
 	if(token1->type != token2->type)
@@ -1358,16 +1280,12 @@ static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
 		/* TODO */
 		return false;
 	case TP_IDENTIFIER:
-		return token1->v.symbol == token2->v.symbol;
+		return token1->symbol == token2->symbol;
 	case TP_NUMBER:
 	case TP_CHARACTER_CONSTANT:
 	case TP_STRING_LITERAL:
-		return strings_equal(&token1->v.string, &token2->v.string);
+		return strings_equal(&token1->literal, &token2->literal);
 
-	case TP_WIDE_CHARACTER_CONSTANT:
-	case TP_WIDE_STRING_LITERAL:
-		return wide_strings_equal(&token1->v.wide_string,
-		                          &token2->v.wide_string);
 	default:
 		return true;
 	}
@@ -1399,7 +1317,7 @@ static void parse_define_directive(void)
 		       "expected identifier after #define, got '%t'", &pp_token);
 		goto error_out;
 	}
-	symbol_t *symbol = pp_token.v.symbol;
+	symbol_t *symbol = pp_token.symbol;
 
 	pp_definition_t *new_definition
 		= obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
@@ -1427,7 +1345,7 @@ static void parse_define_directive(void)
 				}
 				break;
 			case TP_IDENTIFIER:
-				obstack_ptr_grow(&pp_obstack, pp_token.v.symbol);
+				obstack_ptr_grow(&pp_obstack, pp_token.symbol);
 				next_preprocessing_token();
 
 				if (pp_token.type == ',') {
@@ -1508,7 +1426,7 @@ static void parse_undef_directive(void)
 		return;
 	}
 
-	symbol_t *symbol = pp_token.v.symbol;
+	symbol_t *symbol = pp_token.symbol;
 	symbol->pp_definition = NULL;
 	next_preprocessing_token();
 
@@ -1696,7 +1614,7 @@ static void parse_ifdef_ifndef_directive(void)
 		/* just take the true case in the hope to avoid further errors */
 		condition = true;
 	} else {
-		symbol_t        *symbol        = pp_token.v.symbol;
+		symbol_t        *symbol        = pp_token.symbol;
 		pp_definition_t *pp_definition = symbol->pp_definition;
 		next_preprocessing_token();
 
diff --git a/printer.c b/printer.c
index 71d48f8..7e6e8f5 100644
--- a/printer.c
+++ b/printer.c
@@ -26,6 +26,11 @@
 
 static FILE* out;
 
+static void print_char_file(const char c)
+{
+	fputc(c, out);
+}
+
 static void print_string_file(const char *str)
 {
 	fputs(str, out);
@@ -36,26 +41,6 @@ static void print_vformat_file(const char *format, va_list ap)
 	vfprintf(out, format, ap);
 }
 
-static void print_char_file(wchar_rep_t c)
-{
-	const unsigned tc = (unsigned) c;
-	if (tc < 0x80) {
-		fputc(tc, out);
-	} else if (tc < 0x800) {
-		fputc(0xC0 | (tc >> 6),   out);
-		fputc(0x80 | (tc & 0x3F), out);
-	} else if (tc < 0x10000) {
-		fputc(0xE0 | ( tc >> 12),         out);
-		fputc(0x80 | ((tc >>  6) & 0x3F), out);
-		fputc(0x80 | ( tc        & 0x3F), out);
-	} else {
-		fputc(0xF0 | ( tc >> 18),         out);
-		fputc(0x80 | ((tc >> 12) & 0x3F), out);
-		fputc(0x80 | ((tc >>  6) & 0x3F), out);
-		fputc(0x80 | ( tc        & 0x3F), out);
-	}
-}
-
 void print_to_file(FILE *new_out)
 {
 	out = new_out;
@@ -68,6 +53,11 @@ void print_to_file(FILE *new_out)
 
 static struct obstack *obst;
 
+static void print_char_obstack(const char c)
+{
+	obstack_1grow(obst, c);
+}
+
 static void print_string_obstack(const char *str)
 {
 	size_t len = strlen(str);
@@ -79,26 +69,6 @@ static void print_vformat_obstack(const char *format, va_list ap)
 	obstack_vprintf(obst, format, ap);
 }
 
-static void print_char_obstack(wchar_rep_t c)
-{
-	const unsigned tc = (unsigned) c;
-	if (tc < 0x80) {
-		obstack_1grow(obst, tc);
-	} else if (tc < 0x800) {
-		obstack_1grow(obst, 0xC0 | (tc >> 6));
-		obstack_1grow(obst, 0x80 | (tc & 0x3F));
-	} else if (tc < 0x10000) {
-		obstack_1grow(obst, 0xE0 | ( tc >> 12));
-		obstack_1grow(obst, 0x80 | ((tc >>  6) & 0x3F));
-		obstack_1grow(obst, 0x80 | ( tc        & 0x3F));
-	} else {
-		obstack_1grow(obst, 0xF0 | ( tc >> 18));
-		obstack_1grow(obst, 0x80 | ((tc >> 12) & 0x3F));
-		obstack_1grow(obst, 0x80 | ((tc >>  6) & 0x3F));
-		obstack_1grow(obst, 0x80 | ( tc        & 0x3F));
-	}
-}
-
 void print_to_obstack(struct obstack *new_obst)
 {
 	obst = new_obst;
@@ -112,7 +82,7 @@ void print_to_obstack(struct obstack *new_obst)
 static char *buffer_pos;
 static char *buffer_end;
 
-static inline void buffer_add_char(int c)
+static void print_char_buffer(const char c)
 {
 	if (buffer_pos == buffer_end)
 		return;
@@ -122,7 +92,7 @@ static inline void buffer_add_char(int c)
 static void print_string_buffer(const char *str)
 {
 	for (const char *c = str; *c != '\0'; ++c) {
-		buffer_add_char(*c);
+		print_char_buffer(*c);
 	}
 }
 
@@ -133,26 +103,6 @@ static void print_vformat_buffer(const char *format, va_list ap)
 	buffer_pos    += written < size ? written : size;
 }
 
-static void print_char_buffer(wchar_rep_t c)
-{
-	const unsigned tc = (unsigned) c;
-	if (tc < 0x80) {
-		buffer_add_char(tc);
-	} else if (tc < 0x800) {
-		buffer_add_char(0xC0 | (tc >> 6));
-		buffer_add_char(0x80 | (tc & 0x3F));
-	} else if (tc < 0x10000) {
-		buffer_add_char(0xE0 | ( tc >> 12));
-		buffer_add_char(0x80 | ((tc >>  6) & 0x3F));
-		buffer_add_char(0x80 | ( tc        & 0x3F));
-	} else {
-		buffer_add_char(0xF0 | ( tc >> 18));
-		buffer_add_char(0x80 | ((tc >> 12) & 0x3F));
-		buffer_add_char(0x80 | ((tc >>  6) & 0x3F));
-		buffer_add_char(0x80 | ( tc        & 0x3F));
-	}
-}
-
 void print_to_buffer(char *buffer, size_t buffer_size)
 {
 	buffer_pos = buffer;
@@ -173,7 +123,7 @@ void finish_print_to_buffer(void)
 
 void (*print_string)(const char *str) = print_string_file;
 void (*print_vformat)(const char *format, va_list ap) = print_vformat_file;
-void (*print_char)(wchar_rep_t c) = print_char_file;
+void (*print_char)(const char c) = print_char_file;
 
 void printer_push(void)
 {
diff --git a/printer.h b/printer.h
index e30a4b9..f57d3cf 100644
--- a/printer.h
+++ b/printer.h
@@ -35,8 +35,7 @@
 /** print a string into current output */
 extern void (*print_string)(const char *str);
 extern void (*print_vformat)(const char *format, va_list ap);
-/** print a single unicode character to current output (encoded as UTF-8) */
-extern void (*print_char)(wchar_rep_t c);
+extern void (*print_char)(const char c);
 
 /** print a printf style format string to current output */
 static inline void __attribute__((format(printf,1,2))) print_format(const char *format, ...)
diff --git a/string_rep.h b/string_rep.h
index e24b40c..873563b 100644
--- a/string_rep.h
+++ b/string_rep.h
@@ -20,18 +20,66 @@
 #ifndef STRING_REP_H
 #define STRING_REP_H
 
-#include <wchar.h>
-
-typedef wchar_t wchar_rep_t;
+#include <assert.h>
+#include <stdlib.h>
 
 typedef struct string_t {
-	const char *begin;
-	size_t      size;
+	const char *begin; /**< UTF-8 encoded string, the last character is
+						* guaranteed to be 0 */
+	size_t      size;  /**< size of string in bytes (not characters) */
 } string_t;
 
-typedef struct wide_string_t {
-	const wchar_rep_t *begin;
-	size_t             size;
-} wide_string_t;
+typedef unsigned int utf32;
+#define UTF32_PRINTF_FORMAT "%u"
+
+/**
+ * "parse" an utf8 character from a string.
+ * Warning: This function only works for valid utf-8 inputs. The behaviour
+ * is undefined for invalid utf-8 input.
+ *
+ * @param p    A pointer to a pointer into the string. The pointer
+ *             is incremented for each consumed char
+ */
+static inline utf32 read_utf8_char(const char **p)
+{
+	const unsigned char *c      = (const unsigned char *) *p;
+	utf32                result;
+
+	if ((*c & 0x80) == 0) {
+		/* 1 character encoding: 0b0??????? */
+ 		result = *c++;
+	} else if ((*c & 0xE0) == 0xC0) {
+		/* 2 character encoding: 0b110?????, 0b10?????? */
+		result = *c++ & 0x1F;
+		result = (result << 6) | (*c++ & 0x3F);
+	} else if ((*c & 0xF0) == 0xE0) {
+		/* 3 character encoding: 0b1110????, 0b10??????, 0b10?????? */
+		result = *c++ & 0x0F;
+		result = (result << 6) | (*c++ & 0x3F);
+		result = (result << 6) | (*c++ & 0x3F);
+	} else {
+		/* 4 character enc.: 0b11110???, 0b10??????, 0b10??????, 0b10?????? */
+		assert((*c & 0xF8) == 0xF0);
+		result = *c++ & 0x07;
+		result = (result << 6) | (*c++ & 0x3F);
+		result = (result << 6) | (*c++ & 0x3F);
+		result = (result << 6) | (*c++ & 0x3F);
+	}
+
+	*p = (const char*) c;
+	return result;
+}
+
+static inline size_t wstrlen(const string_t *string)
+{
+	size_t      result = 0;
+	const char *p      = string->begin;
+	const char *end    = p + string->size;
+	while (p < end) {
+		read_utf8_char(&p);
+		++result;
+	}
+	return result;
+}
 
 #endif
diff --git a/token.c b/token.c
index 3145d79..1fbaafd 100644
--- a/token.c
+++ b/token.c
@@ -124,20 +124,42 @@ symbol_t *get_token_symbol(const token_t *token)
 	return token_symbols[token->type];
 }
 
+static void print_stringrep(const string_t *string, FILE *f)
+{
+	for (size_t i = 0; i < string->size; ++i) {
+		fputc(string->begin[i], f);
+	}
+}
+
 void print_token(FILE *f, const token_t *token)
 {
 	switch(token->type) {
 	case T_IDENTIFIER:
-		fprintf(f, "identifier '%s'", token->v.symbol->string);
+		fprintf(f, "identifier '%s'", token->symbol->string);
 		break;
 	case T_INTEGER:
-		fprintf(f, "integer number '%lld'", token->v.intvalue);
-		break;
+	case T_INTEGER_OCTAL:
+	case T_INTEGER_HEXADECIMAL:
 	case T_FLOATINGPOINT:
-		fprintf(f, "floating-point number '%LF'", token->v.floatvalue);
+	case T_FLOATINGPOINT_HEXADECIMAL:
+		print_token_type(f, (token_type_t)token->type);
+		fputs(" '", f);
+		print_stringrep(&token->literal, f);
+		if (token->symbol != NULL)
+			fputs(token->symbol->string, f);
+		fputc('\'', f);
 		break;
+	case T_WIDE_STRING_LITERAL:
 	case T_STRING_LITERAL:
-		fprintf(f, "string \"%s\"", token->v.string.begin);
+		print_token_type(f, (token_type_t)token->type);
+		fprintf(f, " \"%s\"", token->literal.begin);
+		break;
+	case T_CHARACTER_CONSTANT:
+	case T_WIDE_CHARACTER_CONSTANT:
+		print_token_type(f, (token_type_t)token->type);
+		fputs(" \'", f);
+		print_stringrep(&token->literal, f);
+		fputs("'", f);
 		break;
 	default:
 		fputc('\'', f);
@@ -180,13 +202,13 @@ void print_pp_token(FILE *f, const token_t *token)
 {
 	switch((preprocessor_token_type_t) token->type) {
 	case TP_IDENTIFIER:
-		fprintf(f, "identifier '%s'", token->v.symbol->string);
+		fprintf(f, "identifier '%s'", token->symbol->string);
 		break;
 	case TP_NUMBER:
-		fprintf(f, "number '%s'", token->v.string.begin);
+		fprintf(f, "number '%s'", token->literal.begin);
 		break;
 	case TP_STRING_LITERAL:
-		fprintf(f, "string \"%s\"", token->v.string.begin);
+		fprintf(f, "string \"%s\"", token->literal.begin);
 		break;
 	default:
 		print_pp_token_type(f, (preprocessor_token_type_t) token->type);
diff --git a/token_t.h b/token_t.h
index 10d2248..7bdc8d8 100644
--- a/token_t.h
+++ b/token_t.h
@@ -60,15 +60,9 @@ struct source_position_t {
 extern const source_position_t builtin_source_position;
 
 typedef struct {
-	int type;
-	union {
-		symbol_t      *symbol;
-		long long      intvalue;
-		long double    floatvalue;
-		string_t       string;
-		wide_string_t  wide_string;
-	} v;
-	type_t            *datatype;
+	int                type;
+	symbol_t          *symbol;  /**< contains identifier. Contains number suffix for numbers */
+	string_t           literal; /**< string value/literal value */
 	source_position_t  source_position;
 } token_t;
 
diff --git a/tokens.inc b/tokens.inc
index d801de9..39c5a6a 100644
--- a/tokens.inc
+++ b/tokens.inc
@@ -2,13 +2,16 @@
 #define TS(x,str,val)
 #endif
 
-TS(IDENTIFIER,              "identifier", = 256)
-TS(INTEGER,                 "integer number",)
-TS(CHARACTER_CONSTANT,      "character constant",)
-TS(WIDE_CHARACTER_CONSTANT, "wide character constant",)
-TS(FLOATINGPOINT,           "floatingpoint number",)
-TS(STRING_LITERAL,          "string literal",)
-TS(WIDE_STRING_LITERAL,     "wide string literal",)
+TS(IDENTIFIER,                "identifier", = 256)
+TS(INTEGER,                   "integer number",)
+TS(INTEGER_OCTAL,             "octal integer number",)
+TS(INTEGER_HEXADECIMAL,       "hexadecimal integer number",)
+TS(FLOATINGPOINT,             "floatingpoint number",)
+TS(FLOATINGPOINT_HEXADECIMAL, "hexadecimal floatingpoint number",)
+TS(CHARACTER_CONSTANT,        "character constant",)
+TS(WIDE_CHARACTER_CONSTANT,   "wide character constant",)
+TS(STRING_LITERAL,            "string literal",)
+TS(WIDE_STRING_LITERAL,       "wide string literal",)
 
 #define ALTERNATE(name, val)          T(_CXX, name, #name,  val)
 #define PUNCTUATOR(name, string, val) T(_ALL, name, string, val)
diff --git a/walk_statements.c b/walk_statements.c
index 4c8a6c6..883a889 100644
--- a/walk_statements.c
+++ b/walk_statements.c
@@ -107,13 +107,11 @@ static void walk_expression(expression_t const *const expr,
 		walk_expression(expr->va_copye.dst, callback, env);
 		return;
 
+	EXPR_LITERAL_CASES
 	case EXPR_INVALID:
 	case EXPR_OFFSETOF:
 	case EXPR_REFERENCE:
 	case EXPR_REFERENCE_ENUM_VALUE:
-	case EXPR_CONST:
-	case EXPR_CHARACTER_CONSTANT:
-	case EXPR_WIDE_CHARACTER_CONSTANT:
 	case EXPR_STRING_LITERAL:
 	case EXPR_WIDE_STRING_LITERAL:
 	case EXPR_FUNCNAME:
diff --git a/wrappergen/write_fluffy.c b/wrappergen/write_fluffy.c
index 236e625..05763d4 100644
--- a/wrappergen/write_fluffy.c
+++ b/wrappergen/write_fluffy.c
@@ -223,16 +223,9 @@ static void write_unary_expression(const unary_expression_t *expression)
 
 static void write_expression(const expression_t *expression)
 {
-	const const_expression_t *constant;
-	/* TODO */
 	switch(expression->kind) {
-	case EXPR_CONST:
-		constant = &expression->conste;
-		if(is_type_integer(expression->base.type)) {
-			fprintf(out, "%lld", constant->v.int_value);
-		} else {
-			fprintf(out, "%Lf", constant->v.float_value);
-		}
+	case EXPR_LITERAL_INTEGER:
+		fprintf(out, "%s", expression->literal.value.begin);
 		break;
 	EXPR_UNARY_CASES
 		write_unary_expression((const unary_expression_t*) expression);
diff --git a/wrappergen/write_jna.c b/wrappergen/write_jna.c
index 5ab41fe..26bfd4e 100644
--- a/wrappergen/write_jna.c
+++ b/wrappergen/write_jna.c
@@ -298,16 +298,10 @@ static void write_binary_expression(const binary_expression_t *expression)
 
 static void write_expression(const expression_t *expression)
 {
-	const const_expression_t *constant;
 	/* TODO */
 	switch(expression->kind) {
-	case EXPR_CONST:
-		constant = &expression->conste;
-		if(is_type_integer(expression->base.type)) {
-			fprintf(out, "%lld", constant->v.int_value);
-		} else {
-			fprintf(out, "%Lf", constant->v.float_value);
-		}
+	case EXPR_LITERAL_INTEGER:
+		fprintf(out, "%s", expression->literal.value.begin);
 		break;
 	case EXPR_REFERENCE_ENUM_VALUE: {
 		/* UHOH... hacking */