#include "lang_features.h"
#include "entity_t.h"
#include "printer.h"
+#include "types.h"
#include <assert.h>
#include <stdio.h>
print_string("\t");
}
+static void print_symbol(const symbol_t *symbol)
+{
+ print_string(symbol->string);
+}
+
+static void print_stringrep(const string_t *string)
+{
+ for (size_t i = 0; i < string->size; ++i) {
+ print_char(string->begin[i]);
+ }
+}
+
/**
* Returns 1 if a given precedence level has right-to-left
* associativity, else 0.
static unsigned get_expression_precedence(expression_kind_t kind)
{
static const unsigned prec[] = {
- [EXPR_UNKNOWN] = PREC_PRIMARY,
- [EXPR_INVALID] = PREC_PRIMARY,
- [EXPR_REFERENCE] = PREC_PRIMARY,
- [EXPR_REFERENCE_ENUM_VALUE] = PREC_PRIMARY,
- [EXPR_CHARACTER_CONSTANT] = PREC_PRIMARY,
- [EXPR_WIDE_CHARACTER_CONSTANT] = PREC_PRIMARY,
- [EXPR_CONST] = PREC_PRIMARY,
- [EXPR_STRING_LITERAL] = PREC_PRIMARY,
- [EXPR_WIDE_STRING_LITERAL] = PREC_PRIMARY,
- [EXPR_COMPOUND_LITERAL] = PREC_UNARY,
- [EXPR_CALL] = PREC_POSTFIX,
- [EXPR_CONDITIONAL] = PREC_CONDITIONAL,
- [EXPR_SELECT] = PREC_POSTFIX,
- [EXPR_ARRAY_ACCESS] = PREC_POSTFIX,
- [EXPR_SIZEOF] = PREC_UNARY,
- [EXPR_CLASSIFY_TYPE] = PREC_UNARY,
- [EXPR_ALIGNOF] = PREC_UNARY,
-
- [EXPR_FUNCNAME] = PREC_PRIMARY,
- [EXPR_BUILTIN_CONSTANT_P] = PREC_PRIMARY,
- [EXPR_BUILTIN_TYPES_COMPATIBLE_P] = PREC_PRIMARY,
- [EXPR_OFFSETOF] = PREC_PRIMARY,
- [EXPR_VA_START] = PREC_PRIMARY,
- [EXPR_VA_ARG] = PREC_PRIMARY,
- [EXPR_VA_COPY] = PREC_PRIMARY,
- [EXPR_STATEMENT] = PREC_PRIMARY,
- [EXPR_LABEL_ADDRESS] = PREC_PRIMARY,
-
- [EXPR_UNARY_NEGATE] = PREC_UNARY,
- [EXPR_UNARY_PLUS] = PREC_UNARY,
- [EXPR_UNARY_BITWISE_NEGATE] = PREC_UNARY,
- [EXPR_UNARY_NOT] = PREC_UNARY,
- [EXPR_UNARY_DEREFERENCE] = PREC_UNARY,
- [EXPR_UNARY_TAKE_ADDRESS] = PREC_UNARY,
- [EXPR_UNARY_POSTFIX_INCREMENT] = PREC_POSTFIX,
- [EXPR_UNARY_POSTFIX_DECREMENT] = PREC_POSTFIX,
- [EXPR_UNARY_PREFIX_INCREMENT] = PREC_UNARY,
- [EXPR_UNARY_PREFIX_DECREMENT] = PREC_UNARY,
- [EXPR_UNARY_CAST] = PREC_UNARY,
- [EXPR_UNARY_CAST_IMPLICIT] = PREC_UNARY,
- [EXPR_UNARY_ASSUME] = PREC_PRIMARY,
- [EXPR_UNARY_DELETE] = PREC_UNARY,
- [EXPR_UNARY_DELETE_ARRAY] = PREC_UNARY,
- [EXPR_UNARY_THROW] = PREC_ASSIGNMENT,
-
- [EXPR_BINARY_ADD] = PREC_ADDITIVE,
- [EXPR_BINARY_SUB] = PREC_ADDITIVE,
- [EXPR_BINARY_MUL] = PREC_MULTIPLICATIVE,
- [EXPR_BINARY_DIV] = PREC_MULTIPLICATIVE,
- [EXPR_BINARY_MOD] = PREC_MULTIPLICATIVE,
- [EXPR_BINARY_EQUAL] = PREC_EQUALITY,
- [EXPR_BINARY_NOTEQUAL] = PREC_EQUALITY,
- [EXPR_BINARY_LESS] = PREC_RELATIONAL,
- [EXPR_BINARY_LESSEQUAL] = PREC_RELATIONAL,
- [EXPR_BINARY_GREATER] = PREC_RELATIONAL,
- [EXPR_BINARY_GREATEREQUAL] = PREC_RELATIONAL,
- [EXPR_BINARY_BITWISE_AND] = PREC_AND,
- [EXPR_BINARY_BITWISE_OR] = PREC_OR,
- [EXPR_BINARY_BITWISE_XOR] = PREC_XOR,
- [EXPR_BINARY_LOGICAL_AND] = PREC_LOGICAL_AND,
- [EXPR_BINARY_LOGICAL_OR] = PREC_LOGICAL_OR,
- [EXPR_BINARY_SHIFTLEFT] = PREC_SHIFT,
- [EXPR_BINARY_SHIFTRIGHT] = PREC_SHIFT,
- [EXPR_BINARY_ASSIGN] = PREC_ASSIGNMENT,
- [EXPR_BINARY_MUL_ASSIGN] = PREC_ASSIGNMENT,
- [EXPR_BINARY_DIV_ASSIGN] = PREC_ASSIGNMENT,
- [EXPR_BINARY_MOD_ASSIGN] = PREC_ASSIGNMENT,
- [EXPR_BINARY_ADD_ASSIGN] = PREC_ASSIGNMENT,
- [EXPR_BINARY_SUB_ASSIGN] = PREC_ASSIGNMENT,
- [EXPR_BINARY_SHIFTLEFT_ASSIGN] = PREC_ASSIGNMENT,
- [EXPR_BINARY_SHIFTRIGHT_ASSIGN] = PREC_ASSIGNMENT,
- [EXPR_BINARY_BITWISE_AND_ASSIGN] = PREC_ASSIGNMENT,
- [EXPR_BINARY_BITWISE_XOR_ASSIGN] = PREC_ASSIGNMENT,
- [EXPR_BINARY_BITWISE_OR_ASSIGN] = PREC_ASSIGNMENT,
- [EXPR_BINARY_COMMA] = PREC_EXPRESSION,
-
- [EXPR_BINARY_ISGREATER] = PREC_PRIMARY,
- [EXPR_BINARY_ISGREATEREQUAL] = PREC_PRIMARY,
- [EXPR_BINARY_ISLESS] = PREC_PRIMARY,
- [EXPR_BINARY_ISLESSEQUAL] = PREC_PRIMARY,
- [EXPR_BINARY_ISLESSGREATER] = PREC_PRIMARY,
- [EXPR_BINARY_ISUNORDERED] = PREC_PRIMARY
+ [EXPR_UNKNOWN] = PREC_PRIMARY,
+ [EXPR_INVALID] = PREC_PRIMARY,
+ [EXPR_REFERENCE] = PREC_PRIMARY,
+ [EXPR_REFERENCE_ENUM_VALUE] = PREC_PRIMARY,
+ [EXPR_LITERAL_INTEGER] = PREC_PRIMARY,
+ [EXPR_LITERAL_INTEGER_OCTAL] = PREC_PRIMARY,
+ [EXPR_LITERAL_INTEGER_HEXADECIMAL] = PREC_PRIMARY,
+ [EXPR_LITERAL_FLOATINGPOINT] = PREC_PRIMARY,
+ [EXPR_LITERAL_FLOATINGPOINT_HEXADECIMAL] = PREC_PRIMARY,
+ [EXPR_LITERAL_CHARACTER] = PREC_PRIMARY,
+ [EXPR_LITERAL_WIDE_CHARACTER] = PREC_PRIMARY,
+ [EXPR_LITERAL_MS_NOOP] = PREC_PRIMARY,
+ [EXPR_STRING_LITERAL] = PREC_PRIMARY,
+ [EXPR_WIDE_STRING_LITERAL] = PREC_PRIMARY,
+ [EXPR_COMPOUND_LITERAL] = PREC_UNARY,
+ [EXPR_CALL] = PREC_POSTFIX,
+ [EXPR_CONDITIONAL] = PREC_CONDITIONAL,
+ [EXPR_SELECT] = PREC_POSTFIX,
+ [EXPR_ARRAY_ACCESS] = PREC_POSTFIX,
+ [EXPR_SIZEOF] = PREC_UNARY,
+ [EXPR_CLASSIFY_TYPE] = PREC_UNARY,
+ [EXPR_ALIGNOF] = PREC_UNARY,
+
+ [EXPR_FUNCNAME] = PREC_PRIMARY,
+ [EXPR_BUILTIN_CONSTANT_P] = PREC_PRIMARY,
+ [EXPR_BUILTIN_TYPES_COMPATIBLE_P] = PREC_PRIMARY,
+ [EXPR_OFFSETOF] = PREC_PRIMARY,
+ [EXPR_VA_START] = PREC_PRIMARY,
+ [EXPR_VA_ARG] = PREC_PRIMARY,
+ [EXPR_VA_COPY] = PREC_PRIMARY,
+ [EXPR_STATEMENT] = PREC_PRIMARY,
+ [EXPR_LABEL_ADDRESS] = PREC_PRIMARY,
+
+ [EXPR_UNARY_NEGATE] = PREC_UNARY,
+ [EXPR_UNARY_PLUS] = PREC_UNARY,
+ [EXPR_UNARY_BITWISE_NEGATE] = PREC_UNARY,
+ [EXPR_UNARY_NOT] = PREC_UNARY,
+ [EXPR_UNARY_DEREFERENCE] = PREC_UNARY,
+ [EXPR_UNARY_TAKE_ADDRESS] = PREC_UNARY,
+ [EXPR_UNARY_POSTFIX_INCREMENT] = PREC_POSTFIX,
+ [EXPR_UNARY_POSTFIX_DECREMENT] = PREC_POSTFIX,
+ [EXPR_UNARY_PREFIX_INCREMENT] = PREC_UNARY,
+ [EXPR_UNARY_PREFIX_DECREMENT] = PREC_UNARY,
+ [EXPR_UNARY_CAST] = PREC_UNARY,
+ [EXPR_UNARY_CAST_IMPLICIT] = PREC_UNARY,
+ [EXPR_UNARY_ASSUME] = PREC_PRIMARY,
+ [EXPR_UNARY_DELETE] = PREC_UNARY,
+ [EXPR_UNARY_DELETE_ARRAY] = PREC_UNARY,
+ [EXPR_UNARY_THROW] = PREC_ASSIGNMENT,
+
+ [EXPR_BINARY_ADD] = PREC_ADDITIVE,
+ [EXPR_BINARY_SUB] = PREC_ADDITIVE,
+ [EXPR_BINARY_MUL] = PREC_MULTIPLICATIVE,
+ [EXPR_BINARY_DIV] = PREC_MULTIPLICATIVE,
+ [EXPR_BINARY_MOD] = PREC_MULTIPLICATIVE,
+ [EXPR_BINARY_EQUAL] = PREC_EQUALITY,
+ [EXPR_BINARY_NOTEQUAL] = PREC_EQUALITY,
+ [EXPR_BINARY_LESS] = PREC_RELATIONAL,
+ [EXPR_BINARY_LESSEQUAL] = PREC_RELATIONAL,
+ [EXPR_BINARY_GREATER] = PREC_RELATIONAL,
+ [EXPR_BINARY_GREATEREQUAL] = PREC_RELATIONAL,
+ [EXPR_BINARY_BITWISE_AND] = PREC_AND,
+ [EXPR_BINARY_BITWISE_OR] = PREC_OR,
+ [EXPR_BINARY_BITWISE_XOR] = PREC_XOR,
+ [EXPR_BINARY_LOGICAL_AND] = PREC_LOGICAL_AND,
+ [EXPR_BINARY_LOGICAL_OR] = PREC_LOGICAL_OR,
+ [EXPR_BINARY_SHIFTLEFT] = PREC_SHIFT,
+ [EXPR_BINARY_SHIFTRIGHT] = PREC_SHIFT,
+ [EXPR_BINARY_ASSIGN] = PREC_ASSIGNMENT,
+ [EXPR_BINARY_MUL_ASSIGN] = PREC_ASSIGNMENT,
+ [EXPR_BINARY_DIV_ASSIGN] = PREC_ASSIGNMENT,
+ [EXPR_BINARY_MOD_ASSIGN] = PREC_ASSIGNMENT,
+ [EXPR_BINARY_ADD_ASSIGN] = PREC_ASSIGNMENT,
+ [EXPR_BINARY_SUB_ASSIGN] = PREC_ASSIGNMENT,
+ [EXPR_BINARY_SHIFTLEFT_ASSIGN] = PREC_ASSIGNMENT,
+ [EXPR_BINARY_SHIFTRIGHT_ASSIGN] = PREC_ASSIGNMENT,
+ [EXPR_BINARY_BITWISE_AND_ASSIGN] = PREC_ASSIGNMENT,
+ [EXPR_BINARY_BITWISE_XOR_ASSIGN] = PREC_ASSIGNMENT,
+ [EXPR_BINARY_BITWISE_OR_ASSIGN] = PREC_ASSIGNMENT,
+ [EXPR_BINARY_COMMA] = PREC_EXPRESSION,
+
+ [EXPR_BINARY_ISGREATER] = PREC_PRIMARY,
+ [EXPR_BINARY_ISGREATEREQUAL] = PREC_PRIMARY,
+ [EXPR_BINARY_ISLESS] = PREC_PRIMARY,
+ [EXPR_BINARY_ISLESSEQUAL] = PREC_PRIMARY,
+ [EXPR_BINARY_ISLESSGREATER] = PREC_PRIMARY,
+ [EXPR_BINARY_ISUNORDERED] = PREC_PRIMARY
};
assert((size_t)kind < lengthof(prec));
unsigned res = prec[kind];
return res;
}
-/**
- * Print a constant expression.
- *
- * @param cnst the constant expression
- */
-static void print_const(const const_expression_t *cnst)
-{
- if (cnst->base.type == NULL)
- return;
-
- const type_t *const type = skip_typeref(cnst->base.type);
-
- if (is_type_atomic(type, ATOMIC_TYPE_BOOL)) {
- print_string(cnst->v.int_value ? "true" : "false");
- } else if (is_type_integer(type)) {
- print_format("%lld", cnst->v.int_value);
- } else if (is_type_float(type)) {
- long double const val = cnst->v.float_value;
-#ifdef _WIN32
- /* ARG, no way to print long double */
- print_format("%.20g", (double)val);
-#else
- print_format("%.20Lg", val);
-#endif
- if (isfinite(val) && truncl(val) == val)
- print_string(".0");
- } else {
- panic("unknown constant");
- }
-
- char const* suffix;
- switch (type->atomic.akind) {
- case ATOMIC_TYPE_UINT: suffix = "U"; break;
- case ATOMIC_TYPE_LONG: suffix = "L"; break;
- case ATOMIC_TYPE_ULONG: suffix = "UL"; break;
- case ATOMIC_TYPE_LONGLONG: suffix = "LL"; break;
- case ATOMIC_TYPE_ULONGLONG: suffix = "ULL"; break;
- case ATOMIC_TYPE_FLOAT: suffix = "F"; break;
- case ATOMIC_TYPE_LONG_DOUBLE: suffix = "L"; break;
-
- default: return;
- }
- print_string(suffix);
-}
-
/**
* Print a quoted string constant.
*
* @param border the border char
* @param skip number of chars to skip at the end
*/
-static void print_quoted_string(const string_t *const string, char border, int skip)
+static void print_quoted_string(const string_t *const string, char border,
+ int skip)
{
print_char(border);
const char *end = string->begin + string->size - skip;
print_string("\\");
}
switch (tc) {
- case '\\': print_string("\\\\"); break;
- case '\a': print_string("\\a"); break;
- case '\b': print_string("\\b"); break;
- case '\f': print_string("\\f"); break;
- case '\n': print_string("\\n"); break;
- case '\r': print_string("\\r"); break;
- case '\t': print_string("\\t"); break;
- case '\v': print_string("\\v"); break;
- case '\?': print_string("\\?"); break;
+ case '\\': print_string("\\\\"); break;
+ case '\a': print_string("\\a"); break;
+ case '\b': print_string("\\b"); break;
+ case '\f': print_string("\\f"); break;
+ case '\n': print_string("\\n"); break;
+ case '\r': print_string("\\r"); break;
+ case '\t': print_string("\\t"); break;
+ case '\v': print_string("\\v"); break;
+ case '\?': print_string("\\?"); break;
case 27:
if (c_mode & _GNUC) {
print_string("\\e"); break;
print_char(border);
}
-/**
- * Prints a wide string literal expression.
- *
- * @param wstr the wide string literal expression
- * @param border the border char
- * @param skip number of chars to skip at the end
- */
-static void print_quoted_wide_string(const wide_string_t *const wstr,
- char border, int skip)
+static void print_string_literal(const string_literal_expression_t *literal)
{
- print_string("L");
- print_char(border);
- const wchar_rep_t *end = wstr->begin + wstr->size - skip;
- for (const wchar_rep_t *c = wstr->begin; c != end; ++c) {
- switch (*c) {
- case L'\"': print_string("\\\""); break;
- case L'\\': print_string("\\\\"); break;
- case L'\a': print_string("\\a"); break;
- case L'\b': print_string("\\b"); break;
- case L'\f': print_string("\\f"); break;
- case L'\n': print_string("\\n"); break;
- case L'\r': print_string("\\r"); break;
- case L'\t': print_string("\\t"); break;
- case L'\v': print_string("\\v"); break;
- case L'\?': print_string("\\?"); break;
- case 27:
- if (c_mode & _GNUC) {
- print_string("\\e"); break;
- }
- /* FALLTHROUGH */
- default: {
- const unsigned tc = *c;
- if (tc < 0x80U) {
- if (isprint(*c)) {
- print_char(*c);
- } else {
- print_format("\\%03o", tc);
- }
- } else {
- print_char(tc);
- }
- }
- }
+ if (literal->base.kind == EXPR_WIDE_STRING_LITERAL) {
+ print_char('L');
}
- print_char(border);
+ print_quoted_string(&literal->value, '"', 1);
}
-/**
- * Print a constant character expression.
- *
- * @param cnst the constant character expression
- */
-static void print_character_constant(const const_expression_t *cnst)
-{
- print_quoted_string(&cnst->v.character, '\'', 0);
-}
-
-static void print_wide_character_constant(const const_expression_t *cnst)
+static void print_literal(const literal_expression_t *literal)
{
- print_quoted_wide_string(&cnst->v.wide_character, '\'', 0);
-}
-
-/**
- * Prints a string literal expression.
- *
- * @param string_literal the string literal expression
- */
-static void print_string_literal(
- const string_literal_expression_t *string_literal)
-{
- print_quoted_string(&string_literal->value, '"', 1);
+ switch (literal->base.kind) {
+ case EXPR_LITERAL_MS_NOOP:
+ print_string("__noop");
+ return;
+ case EXPR_LITERAL_INTEGER_HEXADECIMAL:
+ case EXPR_LITERAL_FLOATINGPOINT_HEXADECIMAL:
+ print_string("0x");
+ /* FALLTHROUGH */
+ case EXPR_LITERAL_BOOLEAN:
+ case EXPR_LITERAL_INTEGER:
+ case EXPR_LITERAL_INTEGER_OCTAL:
+ case EXPR_LITERAL_FLOATINGPOINT:
+ print_stringrep(&literal->value);
+ if (literal->suffix != NULL)
+ print_symbol(literal->suffix);
+ return;
+ case EXPR_LITERAL_WIDE_CHARACTER:
+ print_char('L');
+ /* FALLTHROUGH */
+ case EXPR_LITERAL_CHARACTER:
+ print_quoted_string(&literal->value, '\'', 0);
+ return;
+ default:
+ break;
+ }
+ print_string("INVALID LITERAL KIND");
}
/**
print_string(s);
}
-static void print_wide_string_literal(
- const wide_string_literal_expression_t *const wstr)
-{
- print_quoted_wide_string(&wstr->value, '"', 1);
-}
-
static void print_compound_literal(
const compound_literal_expression_t *expression)
{
case EXPR_INVALID:
print_string("$invalid expression$");
break;
- case EXPR_CHARACTER_CONSTANT:
- print_character_constant(&expression->conste);
- break;
- case EXPR_WIDE_CHARACTER_CONSTANT:
- print_wide_character_constant(&expression->conste);
+ case EXPR_WIDE_STRING_LITERAL:
+ case EXPR_STRING_LITERAL:
+ print_string_literal(&expression->string_literal);
break;
- case EXPR_CONST:
- print_const(&expression->conste);
+ EXPR_LITERAL_CASES
+ print_literal(&expression->literal);
break;
case EXPR_FUNCNAME:
print_funcname(&expression->funcname);
break;
- case EXPR_STRING_LITERAL:
- print_string_literal(&expression->string);
- break;
- case EXPR_WIDE_STRING_LITERAL:
- print_wide_string_literal(&expression->wide_string);
- break;
case EXPR_COMPOUND_LITERAL:
print_compound_literal(&expression->compound_literal);
break;
print_statement_expression(&expression->statement);
break;
+#if 0
default:
/* TODO */
print_format("some expression of type %d", (int)expression->kind);
break;
+#endif
}
if (parenthesized)
print_string(")");
print_quoted_string(&initializer->string.string, '"', 1);
return;
case INITIALIZER_WIDE_STRING:
- print_quoted_wide_string(&initializer->wide_string.string, '"', 1);
+ print_quoted_string(&initializer->string.string, '"', 1);
return;
case INITIALIZER_DESIGNATOR:
print_designator(initializer->designator.designator);
bool is_constant_expression(const expression_t *expression)
{
switch (expression->kind) {
-
- case EXPR_CONST:
- case EXPR_CHARACTER_CONSTANT:
- case EXPR_WIDE_CHARACTER_CONSTANT:
+ EXPR_LITERAL_CASES
case EXPR_CLASSIFY_TYPE:
case EXPR_OFFSETOF:
case EXPR_ALIGNOF:
#include "entity.h"
typedef struct expression_base_t expression_base_t;
-typedef struct const_expression_t const_expression_t;
+typedef struct literal_expression_t literal_expression_t;
typedef struct string_literal_expression_t string_literal_expression_t;
typedef struct funcname_expression_t funcname_expression_t;
-typedef struct wide_string_literal_expression_t wide_string_literal_expression_t;
typedef struct compound_literal_expression_t compound_literal_expression_t;
typedef struct reference_expression_t reference_expression_t;
typedef struct cast_expression_t cast_expression_t;
long fold_constant_to_int(const expression_t *expression);
bool fold_constant_to_bool(const expression_t *expression);
+/**
+ * the type of a literal is usually the biggest type that can hold the value.
+ * Since this is backend dependent the parses needs this call exposed.
+ * Works for EXPR_LITERAL_* expressions.
+ */
+void determine_literal_type(literal_expression_t *literal);
+
#endif
return irentity;
}
+/**
+ * Creates a SymConst for a given entity.
+ *
+ * @param dbgi debug info
+ * @param mode the (reference) mode for the SymConst
+ * @param entity the entity
+ */
+static ir_node *create_symconst(dbg_info *dbgi, ir_mode *mode,
+ ir_entity *entity)
+{
+ assert(entity != NULL);
+ union symconst_symbol sym;
+ sym.entity_p = entity;
+ return new_d_SymConst(dbgi, mode, sym, symconst_addr_ent);
+}
+
static ir_node *create_conv(dbg_info *dbgi, ir_node *value, ir_mode *dest_mode)
{
ir_mode *value_mode = get_irn_mode(value);
}
/**
- * Creates a Const node representing a constant.
+ * Creates a SymConst node representing a wide string literal.
+ *
+ * @param literal the wide string literal
*/
-static ir_node *const_to_firm(const const_expression_t *cnst)
+static ir_node *wide_string_literal_to_firm(
+ const string_literal_expression_t *literal)
{
- dbg_info *dbgi = get_dbg_info(&cnst->base.source_position);
- type_t *type = skip_typeref(cnst->base.type);
- ir_mode *mode = get_ir_mode_storage(type);
+ ir_type *const global_type = get_glob_type();
+ ir_type *const elem_type = ir_type_wchar_t;
+ dbg_info *const dbgi = get_dbg_info(&literal->base.source_position);
+ ir_type *const type = new_type_array(1, elem_type);
- char buf[128];
- tarval *tv;
- size_t len;
- if (mode_is_float(mode)) {
- tv = new_tarval_from_double(cnst->v.float_value, mode);
- } else {
- if (mode_is_signed(mode)) {
- len = snprintf(buf, sizeof(buf), "%lld", cnst->v.int_value);
- } else {
- len = snprintf(buf, sizeof(buf), "%llu",
- (unsigned long long) cnst->v.int_value);
- }
- tv = new_tarval_from_str(buf, len, mode);
+ ident *const id = id_unique("str.%u");
+ ir_entity *const entity = new_d_entity(global_type, id, type, dbgi);
+ set_entity_ld_ident(entity, id);
+ set_entity_visibility(entity, ir_visibility_private);
+ add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
+
+ ir_mode *const mode = get_type_mode(elem_type);
+ const size_t slen = wstrlen(&literal->value);
+
+ set_array_lower_bound_int(type, 0, 0);
+ set_array_upper_bound_int(type, 0, slen);
+ set_type_size_bytes(type, slen * get_mode_size_bytes(mode));
+ set_type_state(type, layout_fixed);
+
+ ir_initializer_t *initializer = create_initializer_compound(slen);
+ const char *p = literal->value.begin;
+ for (size_t i = 0; i < slen; ++i) {
+ assert(p < literal->value.begin + literal->value.size);
+ utf32 v = read_utf8_char(&p);
+ tarval *tv = new_tarval_from_long(v, mode);
+ ir_initializer_t *val = create_initializer_tarval(tv);
+ set_initializer_compound_value(initializer, i, val);
}
+ set_entity_initializer(entity, initializer);
- ir_node *res = new_d_Const(dbgi, tv);
- ir_mode *mode_arith = get_ir_mode_arithmetic(type);
- return create_conv(dbgi, res, mode_arith);
+ return create_symconst(dbgi, mode_P_data, entity);
}
/**
- * Creates a Const node representing a character constant.
+ * Creates a SymConst node representing a string constant.
+ *
+ * @param src_pos the source position of the string constant
+ * @param id_prefix a prefix for the name of the generated string constant
+ * @param value the value of the string constant
*/
-static ir_node *character_constant_to_firm(const const_expression_t *cnst)
+static ir_node *string_to_firm(const source_position_t *const src_pos,
+ const char *const id_prefix,
+ const string_t *const value)
{
- dbg_info *dbgi = get_dbg_info(&cnst->base.source_position);
- ir_mode *mode = get_ir_mode_arithmetic(cnst->base.type);
+ ir_type *const global_type = get_glob_type();
+ dbg_info *const dbgi = get_dbg_info(src_pos);
+ ir_type *const type = new_type_array(1, ir_type_const_char);
- long long int v;
- size_t const size = cnst->v.character.size;
- if (size == 1 && char_is_signed) {
- v = (signed char)cnst->v.character.begin[0];
- } else {
- v = 0;
- for (size_t i = 0; i < size; ++i) {
- v = (v << 8) | ((unsigned char)cnst->v.character.begin[i]);
+ ident *const id = id_unique(id_prefix);
+ ir_entity *const entity = new_d_entity(global_type, id, type, dbgi);
+ set_entity_ld_ident(entity, id);
+ set_entity_visibility(entity, ir_visibility_private);
+ add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
+
+ ir_type *const elem_type = ir_type_const_char;
+ ir_mode *const mode = get_type_mode(elem_type);
+
+ const char* const string = value->begin;
+ const size_t slen = value->size;
+
+ set_array_lower_bound_int(type, 0, 0);
+ set_array_upper_bound_int(type, 0, slen);
+ set_type_size_bytes(type, slen);
+ set_type_state(type, layout_fixed);
+
+ ir_initializer_t *initializer = create_initializer_compound(slen);
+ for (size_t i = 0; i < slen; ++i) {
+ tarval *tv = new_tarval_from_long(string[i], mode);
+ ir_initializer_t *val = create_initializer_tarval(tv);
+ set_initializer_compound_value(initializer, i, val);
+ }
+ set_entity_initializer(entity, initializer);
+
+ return create_symconst(dbgi, mode_P_data, entity);
+}
+
+static bool try_create_integer(literal_expression_t *literal,
+ type_t *type, unsigned char base)
+{
+ const char *string = literal->value.begin;
+ size_t size = literal->value.size;
+
+ assert(type->kind == TYPE_ATOMIC);
+ atomic_type_kind_t akind = type->atomic.akind;
+
+ ir_mode *mode = atomic_modes[akind];
+ tarval *tv = new_integer_tarval_from_str(string, size, 1, base, mode);
+ if (tv == tarval_bad)
+ return false;
+
+ literal->base.type = type;
+ literal->target_value = tv;
+ return true;
+}
+
+static void create_integer_tarval(literal_expression_t *literal)
+{
+ unsigned us = 0;
+ unsigned ls = 0;
+ symbol_t *suffix = literal->suffix;
+ /* parse suffix */
+ if (suffix != NULL) {
+ for (const char *c = suffix->string; *c != '\0'; ++c) {
+ if (*c == 'u' || *c == 'U') { ++us; }
+ if (*c == 'l' || *c == 'L') { ++ls; }
}
}
- char buf[128];
- size_t len = snprintf(buf, sizeof(buf), "%lld", v);
- tarval *tv = new_tarval_from_str(buf, len, mode);
- return new_d_Const(dbgi, tv);
+ unsigned char base = 10;
+ if (literal->base.kind == EXPR_LITERAL_INTEGER_OCTAL) {
+ base = 8;
+ } else if (literal->base.kind == EXPR_LITERAL_INTEGER_HEXADECIMAL) {
+ base = 16;
+ } else {
+ assert(literal->base.kind == EXPR_LITERAL_INTEGER);
+ }
+
+ tarval_int_overflow_mode_t old_mode = tarval_get_integer_overflow_mode();
+
+ /* now try if the constant is small enough for some types */
+ tarval_set_integer_overflow_mode(TV_OVERFLOW_BAD);
+ if (ls < 1) {
+ if (us == 0 && try_create_integer(literal, type_int, base))
+ goto finished;
+ if ((us == 1 || base != 10)
+ && try_create_integer(literal, type_unsigned_int, base))
+ goto finished;
+ }
+ if (ls < 2) {
+ if (us == 0 && try_create_integer(literal, type_long, base))
+ goto finished;
+ if ((us == 1 || base != 10)
+ && try_create_integer(literal, type_unsigned_long, base))
+ goto finished;
+ }
+ /* last try? then we should not report tarval_bad */
+ if (us != 1 && base == 10)
+ tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
+ if (us == 0 && try_create_integer(literal, type_long_long, base))
+ goto finished;
+
+ /* last try */
+ assert(us == 1 || base != 10);
+ tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
+ bool res = try_create_integer(literal, type_unsigned_long_long, base);
+ if (res == false)
+ panic("internal error when parsing number literal");
+
+finished:
+ tarval_set_integer_overflow_mode(old_mode);
+}
+
+void determine_literal_type(literal_expression_t *literal)
+{
+ switch (literal->base.kind) {
+ case EXPR_LITERAL_INTEGER:
+ case EXPR_LITERAL_INTEGER_OCTAL:
+ case EXPR_LITERAL_INTEGER_HEXADECIMAL:
+ create_integer_tarval(literal);
+ return;
+ default:
+ break;
+ }
}
/**
- * Creates a Const node representing a wide character constant.
+ * Creates a Const node representing a constant.
*/
-static ir_node *wide_character_constant_to_firm(const const_expression_t *cnst)
+static ir_node *literal_to_firm(const literal_expression_t *literal)
{
- dbg_info *dbgi = get_dbg_info(&cnst->base.source_position);
- ir_mode *mode = get_ir_mode_arithmetic(cnst->base.type);
+ type_t *type = skip_typeref(literal->base.type);
+ ir_mode *mode = get_ir_mode_storage(type);
+ const char *string = literal->value.begin;
+ size_t size = literal->value.size;
+ tarval *tv;
- long long int v = cnst->v.wide_character.begin[0];
+ switch (literal->base.kind) {
+ case EXPR_LITERAL_WIDE_CHARACTER: {
+ utf32 v = read_utf8_char(&string);
+ char buf[128];
+ size_t len = snprintf(buf, sizeof(buf), UTF32_PRINTF_FORMAT, v);
- char buf[128];
- size_t len = snprintf(buf, sizeof(buf), "%lld", v);
- tarval *tv = new_tarval_from_str(buf, len, mode);
+ tv = new_tarval_from_str(buf, len, mode);
+ goto make_const;
+ }
+ case EXPR_LITERAL_CHARACTER: {
+ long long int v;
+ if (size == 1 && char_is_signed) {
+ v = (signed char)string[0];
+ } else {
+ v = 0;
+ for (size_t i = 0; i < size; ++i) {
+ v = (v << 8) | ((unsigned char)string[i]);
+ }
+ }
+ char buf[128];
+ size_t len = snprintf(buf, sizeof(buf), "%lld", v);
- return new_d_Const(dbgi, tv);
+ tv = new_tarval_from_str(buf, len, mode);
+ goto make_const;
+ }
+ case EXPR_LITERAL_INTEGER:
+ case EXPR_LITERAL_INTEGER_OCTAL:
+ case EXPR_LITERAL_INTEGER_HEXADECIMAL:
+ assert(literal->target_value != NULL);
+ tv = literal->target_value;
+ goto make_const;
+ case EXPR_LITERAL_FLOATINGPOINT:
+ tv = new_tarval_from_str(string, size, mode);
+ goto make_const;
+ case EXPR_LITERAL_FLOATINGPOINT_HEXADECIMAL: {
+ char *buffer = alloca(size + 2);
+ memcpy(buffer, "0x", 2);
+ memcpy(buffer+2, string, size);
+ tv = new_tarval_from_str(buffer, size+2, mode);
+ goto make_const;
+ }
+ case EXPR_LITERAL_BOOLEAN:
+ if (string[0] == 't') {
+ tv = get_mode_one(mode);
+ } else {
+ assert(string[0] == 'f');
+ tv = get_mode_null(mode);
+ }
+ goto make_const;
+ case EXPR_LITERAL_MS_NOOP:
+ tv = get_mode_null(mode);
+ goto make_const;
+ default:
+ break;
+ }
+ panic("Invalid literal kind found");
+
+make_const: ;
+ dbg_info *dbgi = get_dbg_info(&literal->base.source_position);
+ ir_node *res = new_d_Const(dbgi, tv);
+ ir_mode *mode_arith = get_ir_mode_arithmetic(type);
+ return create_conv(dbgi, res, mode_arith);
}
/*
region);
}
-
-/**
- * Creates a SymConst for a given entity.
- *
- * @param dbgi debug info
- * @param mode the (reference) mode for the SymConst
- * @param entity the entity
- */
-static ir_node *create_symconst(dbg_info *dbgi, ir_mode *mode,
- ir_entity *entity)
-{
- assert(entity != NULL);
- union symconst_symbol sym;
- sym.entity_p = entity;
- return new_d_SymConst(dbgi, mode, sym, symconst_addr_ent);
-}
-
/**
* Creates a trampoline for a function represented by an entity.
*
return new_Proj(irn, mode, pn_Builtin_1_result);
}
-/**
- * Creates a SymConst node representing a string constant.
- *
- * @param src_pos the source position of the string constant
- * @param id_prefix a prefix for the name of the generated string constant
- * @param value the value of the string constant
- */
-static ir_node *string_to_firm(const source_position_t *const src_pos,
- const char *const id_prefix,
- const string_t *const value)
-{
- ir_type *const global_type = get_glob_type();
- dbg_info *const dbgi = get_dbg_info(src_pos);
- ir_type *const type = new_type_array(1, ir_type_const_char);
-
- ident *const id = id_unique(id_prefix);
- ir_entity *const entity = new_d_entity(global_type, id, type, dbgi);
- set_entity_ld_ident(entity, id);
- set_entity_visibility(entity, ir_visibility_private);
- add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
-
- ir_type *const elem_type = ir_type_const_char;
- ir_mode *const mode = get_type_mode(elem_type);
-
- const char* const string = value->begin;
- const size_t slen = value->size;
-
- set_array_lower_bound_int(type, 0, 0);
- set_array_upper_bound_int(type, 0, slen);
- set_type_size_bytes(type, slen);
- set_type_state(type, layout_fixed);
-
- ir_initializer_t *initializer = create_initializer_compound(slen);
- for (size_t i = 0; i < slen; ++i) {
- tarval *tv = new_tarval_from_long(string[i], mode);
- ir_initializer_t *val = create_initializer_tarval(tv);
- set_initializer_compound_value(initializer, i, val);
- }
- set_entity_initializer(entity, initializer);
-
- return create_symconst(dbgi, mode_P_data, entity);
-}
-
-/**
- * Creates a SymConst node representing a string literal.
- *
- * @param literal the string literal
- */
-static ir_node *string_literal_to_firm(
- const string_literal_expression_t* literal)
-{
- return string_to_firm(&literal->base.source_position, "str.%u",
- &literal->value);
-}
-
-/**
- * Creates a SymConst node representing a wide string literal.
- *
- * @param literal the wide string literal
- */
-static ir_node *wide_string_literal_to_firm(
- const wide_string_literal_expression_t* const literal)
-{
- ir_type *const global_type = get_glob_type();
- ir_type *const elem_type = ir_type_wchar_t;
- dbg_info *const dbgi = get_dbg_info(&literal->base.source_position);
- ir_type *const type = new_type_array(1, elem_type);
-
- ident *const id = id_unique("str.%u");
- ir_entity *const entity = new_d_entity(global_type, id, type, dbgi);
- set_entity_ld_ident(entity, id);
- set_entity_visibility(entity, ir_visibility_private);
- add_entity_linkage(entity, IR_LINKAGE_CONSTANT);
-
- ir_mode *const mode = get_type_mode(elem_type);
-
- const wchar_rep_t *const string = literal->value.begin;
- const size_t slen = literal->value.size;
-
- set_array_lower_bound_int(type, 0, 0);
- set_array_upper_bound_int(type, 0, slen);
- set_type_size_bytes(type, slen * get_mode_size_bytes(mode));
- set_type_state(type, layout_fixed);
-
- ir_initializer_t *initializer = create_initializer_compound(slen);
- for (size_t i = 0; i < slen; ++i) {
- tarval *tv = new_tarval_from_long(string[i], mode);
- ir_initializer_t *val = create_initializer_tarval(tv);
- set_initializer_compound_value(initializer, i, val);
- }
- set_entity_initializer(entity, initializer);
-
- return create_symconst(dbgi, mode_P_data, entity);
-}
-
/**
* Dereference an address.
*
#endif
switch (expression->kind) {
- case EXPR_CHARACTER_CONSTANT:
- return character_constant_to_firm(&expression->conste);
- case EXPR_WIDE_CHARACTER_CONSTANT:
- return wide_character_constant_to_firm(&expression->conste);
- case EXPR_CONST:
- return const_to_firm(&expression->conste);
+ EXPR_LITERAL_CASES
+ return literal_to_firm(&expression->literal);
case EXPR_STRING_LITERAL:
- return string_literal_to_firm(&expression->string);
+ return string_to_firm(&expression->base.source_position, "str.%u",
+ &expression->literal.value);
case EXPR_WIDE_STRING_LITERAL:
- return wide_string_literal_to_firm(&expression->wide_string);
+ return wide_string_literal_to_firm(&expression->string_literal);
case EXPR_REFERENCE:
return reference_expression_to_firm(&expression->reference);
case EXPR_REFERENCE_ENUM_VALUE:
static ir_initializer_t *create_ir_initializer_wide_string(
const initializer_wide_string_t *initializer, type_t *type)
{
- size_t string_len = initializer->string.size;
assert(type->kind == TYPE_ARRAY);
assert(type->array.size_constant);
size_t len = type->array.size;
+ size_t string_len = wstrlen(&initializer->string);
ir_initializer_t *irinitializer = create_initializer_compound(len);
- const wchar_rep_t *string = initializer->string.begin;
- ir_mode *mode = get_type_mode(ir_type_wchar_t);
+ const char *p = initializer->string.begin;
+ ir_mode *mode = get_type_mode(ir_type_wchar_t);
for (size_t i = 0; i < len; ++i) {
- wchar_rep_t c = 0;
+ utf32 c = 0;
if (i < string_len) {
- c = string[i];
+ c = read_utf8_char(&p);
}
tarval *tv = new_tarval_from_long(c, mode);
ir_initializer_t *char_initializer = create_initializer_tarval(tv);
void translation_unit_to_firm(translation_unit_t *unit)
{
+ /* initialize firm arithmetic */
+ tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
+
/* just to be sure */
continue_label = NULL;
break_label = NULL;
#include "type.h"
#include "entity_t.h"
#include "adt/obst.h"
+#include "target_value.h"
/** The AST obstack contains all data that must stay in the AST. */
extern struct obstack ast_obstack;
EXPR_INVALID,
EXPR_REFERENCE,
EXPR_REFERENCE_ENUM_VALUE,
- EXPR_CONST,
- EXPR_CHARACTER_CONSTANT,
- EXPR_WIDE_CHARACTER_CONSTANT,
+ EXPR_LITERAL_BOOLEAN,
+ EXPR_LITERAL_INTEGER,
+ EXPR_LITERAL_INTEGER_OCTAL,
+ EXPR_LITERAL_INTEGER_HEXADECIMAL,
+ EXPR_LITERAL_FLOATINGPOINT,
+ EXPR_LITERAL_FLOATINGPOINT_HEXADECIMAL,
+ EXPR_LITERAL_CHARACTER,
+ EXPR_LITERAL_WIDE_CHARACTER,
+ EXPR_LITERAL_MS_NOOP, /**< MS __noop extension */
EXPR_STRING_LITERAL,
EXPR_WIDE_STRING_LITERAL,
EXPR_COMPOUND_LITERAL,
EXPR_UNARY_CASES_MANDATORY \
EXPR_UNARY_CASES_OPTIONAL
+#define EXPR_LITERAL_CASES \
+ case EXPR_LITERAL_BOOLEAN: \
+ case EXPR_LITERAL_INTEGER: \
+ case EXPR_LITERAL_INTEGER_OCTAL: \
+ case EXPR_LITERAL_INTEGER_HEXADECIMAL: \
+ case EXPR_LITERAL_FLOATINGPOINT: \
+ case EXPR_LITERAL_FLOATINGPOINT_HEXADECIMAL: \
+ case EXPR_LITERAL_CHARACTER: \
+ case EXPR_LITERAL_WIDE_CHARACTER: \
+ case EXPR_LITERAL_MS_NOOP:
+
/**
* The base class of every expression.
*/
};
/**
- * A constant.
+ * integer/float constants, character and string literals
*/
-struct const_expression_t {
+struct literal_expression_t {
expression_base_t base;
- union {
- long long int_value;
- long double float_value;
- string_t character;
- wide_string_t wide_character;
- } v;
- bool is_ms_noop; /**< True, if this constant is the result
- of an microsoft __noop operator */
+ string_t value;
+ symbol_t *suffix;
+
+ /* ast2firm data */
+ tarval *target_value;
};
struct string_literal_expression_t {
string_t value; /**< the value once assigned. */
};
-struct wide_string_literal_expression_t {
- expression_base_t base;
- wide_string_t value;
-};
-
struct compound_literal_expression_t {
expression_base_t base;
type_t *type;
union expression_t {
expression_kind_t kind;
expression_base_t base;
- const_expression_t conste;
+ literal_expression_t literal;
+ string_literal_expression_t string_literal;
funcname_expression_t funcname;
- string_literal_expression_t string;
- wide_string_literal_expression_t wide_string;
compound_literal_expression_t compound_literal;
builtin_constant_expression_t builtin_constant;
builtin_types_compatible_expression_t builtin_types_compatible;
struct initializer_wide_string_t {
initializer_base_t base;
- wide_string_t string;
+ string_t string;
};
struct initializer_designator_t {
expression_t *expression = argument->v.expression;
if (expression->kind != EXPR_STRING_LITERAL)
return NULL;
- return expression->string.value.begin;
+ return expression->literal.value.begin;
}
return NULL;
}
fputc(*f, stderr);
break;
- case 'C': {
- const wint_t val = va_arg(ap, wint_t);
- fprintf(stderr, "%lc", val);
- break;
- }
-
case 'c': {
const unsigned char val = (unsigned char) va_arg(ap, int);
fputc(val, stderr);
break;
}
+ case 'S': {
+ const string_t *str = va_arg(ap, const string_t*);
+ for (size_t i = 0; i < str->size; ++i) {
+ fputc(str->begin[i], stderr);
+ }
+ break;
+ }
+
case 'u': {
const unsigned int val = va_arg(ap, unsigned int);
fprintf(stderr, "%u", val);
* %K const token_t*
* %k token_kind_t
* %P const source_position_t *
- *
+ * %S const string_t *
*/
void diagnosticf(const char *fmt, ...);
void errorf(const source_position_t *pos, const char *fmt, ...);
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
* 02111-1307, USA.
*/
+#include <config.h>
+
#include <ctype.h>
-#include <wctype.h>
#include "adt/util.h"
#include "format_check.h"
static void warn_invalid_length_modifier(const source_position_t *pos,
const format_length_modifier_t mod,
- const wchar_rep_t conversion)
+ const utf32 conversion)
{
warningf(pos,
"invalid length modifier '%s' for conversion specifier '%%%c'",
);
}
-typedef struct vchar_t vchar_t;
-struct vchar_t {
- const void *string; /**< the string */
- size_t position; /**< current position */
- size_t size; /**< size of the string */
-
- /** return the first character of the string and setthe position to 0. */
- unsigned (*first)(vchar_t *self);
- /** return the next character of the string */
- unsigned (*next)(vchar_t *self);
- /** return non_zero if the given character is a digit */
- int (*is_digit)(unsigned vchar);
-};
-
-static unsigned string_first(vchar_t *self)
-{
- self->position = 0;
- const string_t *string = self->string;
- return string->begin[0];
-}
-
-static unsigned string_next(vchar_t *self)
-{
- ++self->position;
- const string_t *string = self->string;
- return string->begin[self->position];
-}
-
-static int string_isdigit(unsigned vchar)
-{
- return isdigit(vchar);
-}
-
-static unsigned wstring_first(vchar_t *self)
-{
- self->position = 0;
- const wide_string_t *wstring = self->string;
- return wstring->begin[0];
-}
-
-static unsigned wstring_next(vchar_t *self)
-{
- ++self->position;
- const wide_string_t *wstring = self->string;
- return wstring->begin[self->position];
-}
-
-static int wstring_isdigit(unsigned vchar)
-{
- return iswdigit(vchar);
-}
-
-static bool atend(vchar_t *self)
-{
- return self->position + 1 == self->size;
-}
-
/**
* Check printf-style format.
*/
static int internal_check_printf_format(const expression_t *fmt_expr,
- const call_argument_t *arg, const format_spec_t *spec)
+ const call_argument_t *arg,
+ const format_spec_t *spec)
{
- if (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) {
+ while (fmt_expr->kind == EXPR_UNARY_CAST_IMPLICIT) {
fmt_expr = fmt_expr->unary.value;
}
- vchar_t vchar;
- switch (fmt_expr->kind) {
- case EXPR_STRING_LITERAL:
- vchar.string = &fmt_expr->string.value;
- vchar.size = fmt_expr->string.value.size;
- vchar.first = string_first;
- vchar.next = string_next;
- vchar.is_digit = string_isdigit;
- break;
-
- case EXPR_WIDE_STRING_LITERAL:
- vchar.string = &fmt_expr->wide_string.value;
- vchar.size = fmt_expr->wide_string.value.size;
- vchar.first = wstring_first;
- vchar.next = wstring_next;
- vchar.is_digit = wstring_isdigit;
- break;
+ /*
+ * gettext results in expressions like (X ? "format_string" : Y)
+ * we assume the left part is the format string
+ */
+ if (fmt_expr->kind == EXPR_CONDITIONAL) {
+ conditional_expression_t const *const c = &fmt_expr->conditional;
+ expression_t const * t = c->true_expression;
+ if (t == NULL)
+ t = c->condition;
+ int const nt = internal_check_printf_format(t, arg, spec);
+ int const nf = internal_check_printf_format(c->false_expression, arg, spec);
+ return nt > nf ? nt : nf;
+ }
- case EXPR_CONDITIONAL: {
- conditional_expression_t const *const c = &fmt_expr->conditional;
- expression_t const * t = c->true_expression;
- if (t == NULL)
- t = c->condition;
- int const nt = internal_check_printf_format(t, arg, spec);
- int const nf = internal_check_printf_format(c->false_expression, arg, spec);
- return nt > nf ? nt : nf;
- }
+ if (fmt_expr->kind != EXPR_STRING_LITERAL
+ && fmt_expr->kind != EXPR_WIDE_STRING_LITERAL)
+ return -1;
- default:
- return -1;
- }
+ const char *string = fmt_expr->literal.value.begin;
+ size_t size = fmt_expr->literal.value.size;
+ const char *c = string;
const source_position_t *pos = &fmt_expr->base.source_position;
- unsigned fmt = vchar.first(&vchar);
unsigned num_fmt = 0;
- for (; fmt != '\0'; fmt = vchar.next(&vchar)) {
+ char fmt;
+ for (fmt = *c; fmt != '\0'; fmt = *(++c)) {
if (fmt != '%')
continue;
- fmt = vchar.next(&vchar);
+ fmt = *(++c);
+ if (fmt == '\0') {
+ warningf(pos, "dangling %% in format string");
+ break;
+ }
if (fmt == '%')
continue;
format_flags_t fmt_flags = FMT_FLAG_NONE;
if (fmt == '0') {
- fmt = vchar.next(&vchar);
+ fmt = *(++c);
fmt_flags |= FMT_FLAG_ZERO;
}
/* argument selector or minimum field width */
- if (vchar.is_digit(fmt)) {
+ if (isdigit(fmt)) {
do {
- fmt = vchar.next(&vchar);
- } while (vchar.is_digit(fmt));
+ fmt = *(++c);
+ } while (isdigit(fmt));
/* digit string was ... */
if (fmt == '$') {
warningf(pos, "repeated flag '%c' in conversion specification %u", (char)fmt, num_fmt);
}
fmt_flags |= flag;
- fmt = vchar.next(&vchar);
+ fmt = *(++c);
}
break_fmt_flags:
/* minimum field width */
if (fmt == '*') {
- fmt = vchar.next(&vchar);
+ fmt = *(++c);
if (arg == NULL) {
warningf(pos, "missing argument for '*' field width in conversion specification %u", num_fmt);
return -1;
}
arg = arg->next;
} else {
- while (vchar.is_digit(fmt)) {
- fmt = vchar.next(&vchar);
+ while (isdigit(fmt)) {
+ fmt = *(++c);
}
}
}
/* precision */
if (fmt == '.') {
- fmt = vchar.next(&vchar);
+ fmt = *(++c);
if (fmt == '*') {
- fmt = vchar.next(&vchar);
+ fmt = *(++c);
if (arg == NULL) {
warningf(pos, "missing argument for '*' precision in conversion specification %u", num_fmt);
return -1;
arg = arg->next;
} else {
/* digit string may be omitted */
- while (vchar.is_digit(fmt)) {
- fmt = vchar.next(&vchar);
+ while (isdigit(fmt)) {
+ fmt = *(++c);
}
}
}
format_length_modifier_t fmt_mod;
switch (fmt) {
case 'h':
- fmt = vchar.next(&vchar);
+ fmt = *(++c);
if (fmt == 'h') {
- fmt = vchar.next(&vchar);
+ fmt = *(++c);
fmt_mod = FMT_MOD_hh;
} else {
fmt_mod = FMT_MOD_h;
break;
case 'l':
- fmt = vchar.next(&vchar);
+ fmt = *(++c);
if (fmt == 'l') {
- fmt = vchar.next(&vchar);
+ fmt = *(++c);
fmt_mod = FMT_MOD_ll;
} else {
fmt_mod = FMT_MOD_l;
}
break;
- case 'L': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_L; break;
- case 'j': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_j; break;
- case 't': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_t; break;
- case 'z': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_z; break;
- case 'q': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_q; break;
+ case 'L': fmt = *(++c); fmt_mod = FMT_MOD_L; break;
+ case 'j': fmt = *(++c); fmt_mod = FMT_MOD_j; break;
+ case 't': fmt = *(++c); fmt_mod = FMT_MOD_t; break;
+ case 'z': fmt = *(++c); fmt_mod = FMT_MOD_z; break;
+ case 'q': fmt = *(++c); fmt_mod = FMT_MOD_q; break;
/* microsoft mode */
case 'w':
if (c_mode & _MS) {
- fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_w;
+ fmt = *(++c); fmt_mod = FMT_MOD_w;
} else {
fmt_mod = FMT_MOD_NONE;
}
break;
case 'I':
if (c_mode & _MS) {
- fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_I;
+ fmt = *(++c); fmt_mod = FMT_MOD_I;
if (fmt == '3') {
- fmt = vchar.next(&vchar);
+ fmt = *(++c);
if (fmt == '2') {
- fmt = vchar.next(&vchar);
+ fmt = *(++c);
fmt_mod = FMT_MOD_I32;
} else {
/* rewind */
- --vchar.position;
+ fmt = *(--c);
}
} else if (fmt == '6') {
- fmt = vchar.next(&vchar);
+ fmt = *(++c);
if (fmt == '4') {
- fmt = vchar.next(&vchar);
+ fmt = *(++c);
fmt_mod = FMT_MOD_I64;
} else {
/* rewind */
- --vchar.position;
+ fmt = *(--c);
}
}
} else {
break;
}
- if (fmt == '\0') {
- warningf(pos, "dangling %% in format string");
- break;
- }
type_t *expected_type;
type_qualifiers_t expected_qual = TYPE_QUALIFIER_NONE;
break;
default:
- warningf(pos, "encountered unknown conversion specifier '%%%C' at position %u", (wint_t)fmt, num_fmt);
+ warningf(pos, "encountered unknown conversion specifier '%%%c' at position %u", fmt, num_fmt);
if (arg == NULL) {
warningf(pos, "too few arguments for format string");
return -1;
goto next_arg;
}
}
- } else {
- if (get_unqualified_type(arg_skip) == expected_type_skip) {
- goto next_arg;
- }
+ } else if (get_unqualified_type(arg_skip) == expected_type_skip) {
+ goto next_arg;
}
if (is_type_valid(arg_skip)) {
warningf(pos,
next_arg:
arg = arg->next;
}
- if (!atend(&vchar)) {
+ assert(fmt == '\0');
+ if (c+1 < string + size) {
warningf(pos, "format string contains '\\0'");
}
return num_fmt;
/**
* Check printf-style format.
*/
-static void check_printf_format(call_argument_t const *arg, format_spec_t const *const spec)
+static void check_printf_format(call_argument_t const *arg,
+ format_spec_t const *const spec)
{
/* find format arg */
size_t idx = 0;
++num_args;
if (num_args > (size_t)num_fmt) {
warningf(&fmt_expr->base.source_position,
- "%u argument%s but only %u format specifier%s",
- num_args, num_args != 1 ? "s" : "",
- num_fmt, num_fmt != 1 ? "s" : "");
+ "%u argument%s but only %u format specifier%s",
+ num_args, num_args != 1 ? "s" : "",
+ num_fmt, num_fmt != 1 ? "s" : "");
}
}
/**
* Check scanf-style format.
*/
-static void check_scanf_format(const call_argument_t *arg, const format_spec_t *spec)
+static void check_scanf_format(const call_argument_t *arg,
+ const format_spec_t *spec)
{
/* find format arg */
unsigned idx = 0;
fmt_expr = fmt_expr->unary.value;
}
- vchar_t vchar;
- if (fmt_expr->kind == EXPR_WIDE_STRING_LITERAL) {
- vchar.string = &fmt_expr->wide_string.value;
- vchar.size = fmt_expr->wide_string.value.size;
- vchar.first = wstring_first;
- vchar.next = wstring_next;
- vchar.is_digit = wstring_isdigit;
- } else if (fmt_expr->kind == EXPR_STRING_LITERAL) {
- vchar.string = &fmt_expr->string.value;
- vchar.size = fmt_expr->string.value.size;
- vchar.first = string_first;
- vchar.next = string_next;
- vchar.is_digit = string_isdigit;
- } else {
+ if (fmt_expr->kind != EXPR_STRING_LITERAL
+ && fmt_expr->kind != EXPR_WIDE_STRING_LITERAL)
return;
- }
+
+ const char *string = fmt_expr->literal.value.begin;
+ size_t size = fmt_expr->literal.value.size;
+ const char *c = string;
+
/* find the real args */
for (; idx < spec->arg_idx && arg != NULL; ++idx)
arg = arg->next;
const source_position_t *pos = &fmt_expr->base.source_position;
- unsigned fmt = vchar.first(&vchar);
unsigned num_fmt = 0;
- for (; fmt != '\0'; fmt = vchar.next(&vchar)) {
+ char fmt;
+ for (fmt = *c; fmt != '\0'; fmt = *(++c)) {
if (fmt != '%')
continue;
- fmt = vchar.next(&vchar);
-
+ fmt = *(++c);
+ if (fmt == '\0') {
+ warningf(pos, "dangling '%%' in format string");
+ break;
+ }
if (fmt == '%')
continue;
++num_fmt;
- /* length modifier */
- format_length_modifier_t fmt_mod;
+ /* look for length modifiers */
+ format_length_modifier_t fmt_mod = FMT_MOD_NONE;
switch (fmt) {
- case 'h':
- fmt = vchar.next(&vchar);
- if (fmt == 'h') {
- fmt = vchar.next(&vchar);
- fmt_mod = FMT_MOD_hh;
- } else {
- fmt_mod = FMT_MOD_h;
- }
- break;
+ case 'h':
+ fmt = *(++c);
+ if (fmt == 'h') {
+ fmt = *(++c);
+ fmt_mod = FMT_MOD_hh;
+ } else {
+ fmt_mod = FMT_MOD_h;
+ }
+ break;
- case 'l':
- fmt = vchar.next(&vchar);
- if (fmt == 'l') {
- fmt = vchar.next(&vchar);
- fmt_mod = FMT_MOD_ll;
- } else {
- fmt_mod = FMT_MOD_l;
- }
- break;
+ case 'l':
+ fmt = *(++c);
+ if (fmt == 'l') {
+ fmt = *(++c);
+ fmt_mod = FMT_MOD_ll;
+ } else {
+ fmt_mod = FMT_MOD_l;
+ }
+ break;
- case 'L': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_L; break;
- case 'j': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_j; break;
- case 't': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_t; break;
- case 'z': fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_z; break;
- /* microsoft mode */
- case 'w':
- if (c_mode & _MS) {
- fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_w;
- } else {
- fmt_mod = FMT_MOD_NONE;
- }
- break;
- case 'I':
- if (c_mode & _MS) {
- fmt = vchar.next(&vchar); fmt_mod = FMT_MOD_I;
- if (fmt == '3') {
- fmt = vchar.next(&vchar);
- if (fmt == '2') {
- fmt = vchar.next(&vchar);
- fmt_mod = FMT_MOD_I32;
- } else {
- /* rewind */
- --vchar.position;
- }
- } else if (fmt == '6') {
- fmt = vchar.next(&vchar);
- if (fmt == '4') {
- fmt = vchar.next(&vchar);
- fmt_mod = FMT_MOD_I64;
- } else {
- /* rewind */
- --vchar.position;
- }
+ case 'L': fmt = *(++c); fmt_mod = FMT_MOD_L; break;
+ case 'j': fmt = *(++c); fmt_mod = FMT_MOD_j; break;
+ case 't': fmt = *(++c); fmt_mod = FMT_MOD_t; break;
+ case 'z': fmt = *(++c); fmt_mod = FMT_MOD_z; break;
+ /* microsoft mode */
+ case 'w':
+ if (c_mode & _MS) {
+ fmt = *(++c);
+ fmt_mod = FMT_MOD_w;
+ }
+ break;
+ case 'I':
+ if (c_mode & _MS) {
+ fmt = *(++c);
+ fmt_mod = FMT_MOD_I;
+ if (fmt == '3') {
+ fmt = *(++c);
+ if (fmt == '2') {
+ fmt = *(++c);
+ fmt_mod = FMT_MOD_I32;
+ } else {
+ /* rewind */
+ fmt = *(--c);
+ }
+ } else if (fmt == '6') {
+ fmt = *(++c);
+ if (fmt == '4') {
+ fmt = *(++c);
+ fmt_mod = FMT_MOD_I64;
+ } else {
+ /* rewind */
+ fmt = *(--c);
}
- } else {
- fmt_mod = FMT_MOD_NONE;
}
- break;
- default:
- fmt_mod = FMT_MOD_NONE;
- break;
+ }
+ break;
}
if (fmt == '\0') {
- warningf(pos, "dangling %% in format string");
+ warningf(pos, "dangling % with conversion specififer in format string");
break;
}
- type_t *expected_type;
+ type_t *expected_type;
switch (fmt) {
- case 'd':
- case 'i':
- switch (fmt_mod) {
- case FMT_MOD_NONE: expected_type = type_int; break;
- case FMT_MOD_hh: expected_type = type_signed_char; break;
- case FMT_MOD_h: expected_type = type_short; break;
- case FMT_MOD_l: expected_type = type_long; break;
- case FMT_MOD_ll: expected_type = type_long_long; break;
- case FMT_MOD_j: expected_type = type_intmax_t; break;
- case FMT_MOD_z: expected_type = type_ssize_t; break;
- case FMT_MOD_t: expected_type = type_ptrdiff_t; break;
- case FMT_MOD_I: expected_type = type_ptrdiff_t; break;
- case FMT_MOD_I32: expected_type = type_int32; break;
- case FMT_MOD_I64: expected_type = type_int64; break;
+ case 'd':
+ case 'i':
+ switch (fmt_mod) {
+ case FMT_MOD_NONE: expected_type = type_int; break;
+ case FMT_MOD_hh: expected_type = type_signed_char; break;
+ case FMT_MOD_h: expected_type = type_short; break;
+ case FMT_MOD_l: expected_type = type_long; break;
+ case FMT_MOD_ll: expected_type = type_long_long; break;
+ case FMT_MOD_j: expected_type = type_intmax_t; break;
+ case FMT_MOD_z: expected_type = type_ssize_t; break;
+ case FMT_MOD_t: expected_type = type_ptrdiff_t; break;
+ case FMT_MOD_I: expected_type = type_ptrdiff_t; break;
+ case FMT_MOD_I32: expected_type = type_int32; break;
+ case FMT_MOD_I64: expected_type = type_int64; break;
- default:
- warn_invalid_length_modifier(pos, fmt_mod, fmt);
- goto next_arg;
- }
- break;
-
- case 'o':
- case 'X':
- case 'x':
- goto eval_fmt_mod_unsigned;
-
- case 'u':
-eval_fmt_mod_unsigned:
- switch (fmt_mod) {
- case FMT_MOD_NONE: expected_type = type_unsigned_int; break;
- case FMT_MOD_hh: expected_type = type_unsigned_char; break;
- case FMT_MOD_h: expected_type = type_unsigned_short; break;
- case FMT_MOD_l: expected_type = type_unsigned_long; break;
- case FMT_MOD_ll: expected_type = type_unsigned_long_long; break;
- case FMT_MOD_j: expected_type = type_uintmax_t; break;
- case FMT_MOD_z: expected_type = type_size_t; break;
- case FMT_MOD_t: expected_type = type_uptrdiff_t; break;
- case FMT_MOD_I: expected_type = type_size_t; break;
- case FMT_MOD_I32: expected_type = type_unsigned_int32; break;
- case FMT_MOD_I64: expected_type = type_unsigned_int64; break;
+ default:
+ warn_invalid_length_modifier(pos, fmt_mod, fmt);
+ goto next_arg;
+ }
+ break;
- default:
- warn_invalid_length_modifier(pos, fmt_mod, fmt);
- goto next_arg;
- }
- break;
+ case 'o':
+ case 'X':
+ case 'x':
+ case 'u':
+ switch (fmt_mod) {
+ case FMT_MOD_NONE: expected_type = type_unsigned_int; break;
+ case FMT_MOD_hh: expected_type = type_unsigned_char; break;
+ case FMT_MOD_h: expected_type = type_unsigned_short; break;
+ case FMT_MOD_l: expected_type = type_unsigned_long; break;
+ case FMT_MOD_ll: expected_type = type_unsigned_long_long; break;
+ case FMT_MOD_j: expected_type = type_uintmax_t; break;
+ case FMT_MOD_z: expected_type = type_size_t; break;
+ case FMT_MOD_t: expected_type = type_uptrdiff_t; break;
+ case FMT_MOD_I: expected_type = type_size_t; break;
+ case FMT_MOD_I32: expected_type = type_unsigned_int32; break;
+ case FMT_MOD_I64: expected_type = type_unsigned_int64; break;
- case 'A':
- case 'a':
- case 'E':
- case 'e':
- case 'F':
- case 'f':
- case 'G':
- case 'g':
- switch (fmt_mod) {
- case FMT_MOD_l: /* l modifier is ignored */
- case FMT_MOD_NONE: expected_type = type_double; break;
- case FMT_MOD_L: expected_type = type_long_double; break;
+ default:
+ warn_invalid_length_modifier(pos, fmt_mod, fmt);
+ goto next_arg;
+ }
+ break;
- default:
- warn_invalid_length_modifier(pos, fmt_mod, fmt);
- goto next_arg;
- }
- break;
+ case 'A':
+ case 'a':
+ case 'E':
+ case 'e':
+ case 'F':
+ case 'f':
+ case 'G':
+ case 'g':
+ switch (fmt_mod) {
+ case FMT_MOD_l: expected_type = type_double; break;
+ case FMT_MOD_NONE: expected_type = type_float; break;
+ case FMT_MOD_L: expected_type = type_long_double; break;
- case 'C':
- if (fmt_mod != FMT_MOD_NONE) {
- warn_invalid_length_modifier(pos, fmt_mod, fmt);
- goto next_arg;
- }
- expected_type = type_wchar_t;
- break;
+ default:
+ warn_invalid_length_modifier(pos, fmt_mod, fmt);
+ goto next_arg;
+ }
+ break;
- case 'c':
- expected_type = type_int;
- switch (fmt_mod) {
- case FMT_MOD_NONE: expected_type = type_int; break; /* TODO promoted char */
- case FMT_MOD_l: expected_type = type_wint_t; break;
- case FMT_MOD_w: expected_type = type_wchar_t; break;
+ case 'C':
+ if (fmt_mod != FMT_MOD_NONE) {
+ warn_invalid_length_modifier(pos, fmt_mod, fmt);
+ goto next_arg;
+ }
+ expected_type = type_wchar_t;
+ break;
- default:
- warn_invalid_length_modifier(pos, fmt_mod, fmt);
- goto next_arg;
- }
- break;
+ case 'c':
+ expected_type = type_int;
+ switch (fmt_mod) {
+ case FMT_MOD_NONE: expected_type = type_int; break; /* TODO promoted char */
+ case FMT_MOD_l: expected_type = type_wint_t; break;
+ case FMT_MOD_w: expected_type = type_wchar_t; break;
- case 'S':
- if (fmt_mod != FMT_MOD_NONE) {
- warn_invalid_length_modifier(pos, fmt_mod, fmt);
- goto next_arg;
- }
- expected_type = type_wchar_t;
- break;
+ default:
+ warn_invalid_length_modifier(pos, fmt_mod, fmt);
+ goto next_arg;
+ }
+ break;
- case 's':
- case '[':
- switch (fmt_mod) {
- case FMT_MOD_NONE: expected_type = type_char; break;
- case FMT_MOD_l: expected_type = type_wchar_t; break;
- case FMT_MOD_w: expected_type = type_wchar_t; break;
+ case 'S':
+ if (fmt_mod != FMT_MOD_NONE) {
+ warn_invalid_length_modifier(pos, fmt_mod, fmt);
+ goto next_arg;
+ }
+ expected_type = type_wchar_t;
+ break;
- default:
- warn_invalid_length_modifier(pos, fmt_mod, fmt);
- goto next_arg;
- }
- break;
+ case 's':
+ case '[':
+ switch (fmt_mod) {
+ case FMT_MOD_NONE: expected_type = type_char; break;
+ case FMT_MOD_l: expected_type = type_wchar_t; break;
+ case FMT_MOD_w: expected_type = type_wchar_t; break;
- case 'p':
- if (fmt_mod != FMT_MOD_NONE) {
+ default:
warn_invalid_length_modifier(pos, fmt_mod, fmt);
goto next_arg;
- }
- expected_type = type_void_ptr;
- break;
+ }
+ break;
- case 'n':
- switch (fmt_mod) {
- case FMT_MOD_NONE: expected_type = type_int; break;
- case FMT_MOD_hh: expected_type = type_signed_char; break;
- case FMT_MOD_h: expected_type = type_short; break;
- case FMT_MOD_l: expected_type = type_long; break;
- case FMT_MOD_ll: expected_type = type_long_long; break;
- case FMT_MOD_j: expected_type = type_intmax_t; break;
- case FMT_MOD_z: expected_type = type_ssize_t; break;
- case FMT_MOD_t: expected_type = type_ptrdiff_t; break;
+ case 'p':
+ if (fmt_mod != FMT_MOD_NONE) {
+ warn_invalid_length_modifier(pos, fmt_mod, fmt);
+ goto next_arg;
+ }
+ expected_type = type_void_ptr;
+ break;
- default:
- warn_invalid_length_modifier(pos, fmt_mod, fmt);
- goto next_arg;
- }
- break;
+ case 'n':
+ switch (fmt_mod) {
+ case FMT_MOD_NONE: expected_type = type_int; break;
+ case FMT_MOD_hh: expected_type = type_signed_char; break;
+ case FMT_MOD_h: expected_type = type_short; break;
+ case FMT_MOD_l: expected_type = type_long; break;
+ case FMT_MOD_ll: expected_type = type_long_long; break;
+ case FMT_MOD_j: expected_type = type_intmax_t; break;
+ case FMT_MOD_z: expected_type = type_ssize_t; break;
+ case FMT_MOD_t: expected_type = type_ptrdiff_t; break;
default:
- warningf(pos, "encountered unknown conversion specifier '%%%C' at position %u", (wint_t)fmt, num_fmt);
- if (arg == NULL) {
- warningf(pos, "too few arguments for format string");
- return;
- }
+ warn_invalid_length_modifier(pos, fmt_mod, fmt);
goto next_arg;
+ }
+ break;
+
+ default:
+ warningf(pos, "encountered unknown conversion specifier '%%%c' at format %u",
+ fmt, num_fmt);
+ if (arg == NULL) {
+ warningf(pos, "too few arguments for format string");
+ return;
+ }
+ goto next_arg;
}
if (arg == NULL) {
next_arg:
arg = arg->next;
}
- if (!atend(&vchar)) {
+ assert(fmt == '\0');
+ if (c+1 < string + size) {
warningf(pos, "format string contains '\\0'");
}
if (arg != NULL) {
arg = arg->next;
}
warningf(pos, "%u argument%s but only %u format specifier%s",
- num_args, num_args != 1 ? "s" : "",
- num_fmt, num_fmt != 1 ? "s" : "");
+ num_args, num_args != 1 ? "s" : "",
+ num_fmt, num_fmt != 1 ? "s" : "");
}
}
/* the declaration has a GNU format attribute, check it */
} else {
/*
- * For some functions we always check the format, even if it was not specified.
- * This allows to check format even in MS mode or without header included.
+ * For some functions we always check the format, even if it was not
+ * specified. This allows to check format even in MS mode or without
+ * header included.
*/
const char *const name = entity->base.symbol->string;
for (size_t i = 0; i < lengthof(builtin_table); ++i) {
#define strtold(s, e) strtod(s, e)
#endif
-typedef unsigned int utf32;
-
static utf32 c;
token_t lexer_token;
symbol_t *symbol_L;
#define MATCH_NEWLINE(code) \
case '\r': \
next_char(); \
- if(c == '\n') { \
+ if (c == '\n') { \
next_char(); \
} \
lexer_token.source_position.linenr++; \
lexer_token.source_position.linenr++; \
code
-#define eat(c_type) do { assert(c == c_type); next_char(); } while(0)
+#define eat(c_type) do { assert(c == c_type); next_char(); } while (0)
static void maybe_concat_lines(void)
{
eat('\\');
- switch(c) {
+ switch (c) {
MATCH_NEWLINE(return;)
default:
next_real_char();
/* filter trigraphs */
- if(UNLIKELY(c == '\\')) {
+ if (UNLIKELY(c == '\\')) {
maybe_concat_lines();
goto end_of_next_char;
}
- if(LIKELY(c != '?'))
+ if (LIKELY(c != '?'))
goto end_of_next_char;
next_real_char();
- if(LIKELY(c != '?')) {
+ if (LIKELY(c != '?')) {
put_back(c);
c = '?';
goto end_of_next_char;
}
next_real_char();
- switch(c) {
+ switch (c) {
case '=': c = '#'; break;
case '(': c = '['; break;
case '/': c = '\\'; maybe_concat_lines(); break;
*/
static void parse_symbol(void)
{
- symbol_t *symbol;
- char *string;
-
obstack_1grow(&symbol_obstack, (char) c);
next_char();
- while(1) {
- switch(c) {
+ while (true) {
+ switch (c) {
DIGITS
SYMBOL_CHARS
obstack_1grow(&symbol_obstack, (char) c);
end_symbol:
obstack_1grow(&symbol_obstack, '\0');
- string = obstack_finish(&symbol_obstack);
- symbol = symbol_table_insert(string);
+ char *string = obstack_finish(&symbol_obstack);
+ symbol_t *symbol = symbol_table_insert(string);
- lexer_token.type = symbol->ID;
- lexer_token.v.symbol = symbol;
+ lexer_token.type = symbol->ID;
+ lexer_token.symbol = symbol;
- if(symbol->string != string) {
+ if (symbol->string != string) {
obstack_free(&symbol_obstack, string);
}
}
-static void parse_integer_suffix(bool is_oct_hex)
+/**
+ * parse suffixes like 'LU' or 'f' after numbers
+ */
+static void parse_number_suffix(void)
{
- bool is_unsigned = false;
- bool min_long = false;
- bool min_longlong = false;
- bool not_traditional = false;
- int pos = 0;
- char suffix[4];
-
- if (c == 'U' || c == 'u') {
- not_traditional = true;
- suffix[pos++] = toupper(c);
- is_unsigned = true;
- next_char();
- if (c == 'L' || c == 'l') {
- suffix[pos++] = toupper(c);
- min_long = true;
- next_char();
- if (c == 'L' || c == 'l') {
- suffix[pos++] = toupper(c);
- min_longlong = true;
- next_char();
- }
- }
- } else if (c == 'l' || c == 'L') {
- suffix[pos++] = toupper(c);
- min_long = true;
- next_char();
- if (c == 'l' || c == 'L') {
- not_traditional = true;
- suffix[pos++] = toupper(c);
- min_longlong = true;
- next_char();
- if (c == 'u' || c == 'U') {
- suffix[pos++] = toupper(c);
- is_unsigned = true;
- next_char();
- }
- } else if (c == 'u' || c == 'U') {
- not_traditional = true;
- suffix[pos++] = toupper(c);
- is_unsigned = true;
+ assert(obstack_object_size(&symbol_obstack) == 0);
+ while (true) {
+ switch (c) {
+ SYMBOL_CHARS
+ obstack_1grow(&symbol_obstack, (char) c);
next_char();
- lexer_token.datatype = type_unsigned_long;
+ break;
+ default:
+ dollar_sign:
+ goto finish_suffix;
}
}
-
- if (warning.traditional && not_traditional) {
- suffix[pos] = '\0';
- warningf(&lexer_token.source_position,
- "traditional C rejects the '%s' suffix", suffix);
+finish_suffix:
+ if (obstack_object_size(&symbol_obstack) == 0) {
+ lexer_token.symbol = NULL;
+ return;
}
- if (!is_unsigned) {
- long long v = lexer_token.v.intvalue;
- if (!min_long) {
- if (v >= TARGET_INT_MIN && v <= TARGET_INT_MAX) {
- lexer_token.datatype = type_int;
- return;
- } else if (is_oct_hex && v >= 0 && v <= TARGET_UINT_MAX) {
- lexer_token.datatype = type_unsigned_int;
- return;
- }
- }
- if (!min_longlong) {
- if (v >= TARGET_LONG_MIN && v <= TARGET_LONG_MAX) {
- lexer_token.datatype = type_long;
- return;
- } else if (is_oct_hex && v >= 0 && (unsigned long long)v <= (unsigned long long)TARGET_ULONG_MAX) {
- lexer_token.datatype = type_unsigned_long;
- return;
- }
- }
- unsigned long long uv = (unsigned long long) v;
- if (is_oct_hex && uv > (unsigned long long) TARGET_LONGLONG_MAX) {
- lexer_token.datatype = type_unsigned_long_long;
- return;
- }
- lexer_token.datatype = type_long_long;
- } else {
- unsigned long long v = (unsigned long long) lexer_token.v.intvalue;
- if (!min_long && v <= TARGET_UINT_MAX) {
- lexer_token.datatype = type_unsigned_int;
- return;
- }
- if (!min_longlong && v <= TARGET_ULONG_MAX) {
- lexer_token.datatype = type_unsigned_long;
- return;
- }
- lexer_token.datatype = type_unsigned_long_long;
- }
-}
+ obstack_1grow(&symbol_obstack, '\0');
+ char *string = obstack_finish(&symbol_obstack);
+ symbol_t *symbol = symbol_table_insert(string);
-static void parse_floating_suffix(void)
-{
- switch(c) {
- /* TODO: do something useful with the suffixes... */
- case 'f':
- case 'F':
- if (warning.traditional) {
- warningf(&lexer_token.source_position,
- "traditional C rejects the 'F' suffix");
- }
- next_char();
- lexer_token.datatype = type_float;
- break;
- case 'l':
- case 'L':
- if (warning.traditional) {
- warningf(&lexer_token.source_position,
- "traditional C rejects the 'F' suffix");
- }
- next_char();
- lexer_token.datatype = type_long_double;
- break;
- default:
- lexer_token.datatype = type_double;
- break;
+ if (symbol->string != string) {
+ obstack_free(&symbol_obstack, string);
}
+ lexer_token.symbol = symbol;
}
-/**
- * A replacement for strtoull. Only those parts needed for
- * our parser are implemented.
- */
-static unsigned long long parse_int_string(const char *s, const char **endptr, int base)
+static string_t identify_string(char *string, size_t len)
{
- unsigned long long v = 0;
-
- switch (base) {
- case 16:
- for (;; ++s) {
- /* check for overrun */
- if (v >= 0x1000000000000000ULL)
- break;
- switch (tolower(*s)) {
- case '0': v <<= 4; break;
- case '1': v <<= 4; v |= 0x1; break;
- case '2': v <<= 4; v |= 0x2; break;
- case '3': v <<= 4; v |= 0x3; break;
- case '4': v <<= 4; v |= 0x4; break;
- case '5': v <<= 4; v |= 0x5; break;
- case '6': v <<= 4; v |= 0x6; break;
- case '7': v <<= 4; v |= 0x7; break;
- case '8': v <<= 4; v |= 0x8; break;
- case '9': v <<= 4; v |= 0x9; break;
- case 'a': v <<= 4; v |= 0xa; break;
- case 'b': v <<= 4; v |= 0xb; break;
- case 'c': v <<= 4; v |= 0xc; break;
- case 'd': v <<= 4; v |= 0xd; break;
- case 'e': v <<= 4; v |= 0xe; break;
- case 'f': v <<= 4; v |= 0xf; break;
- default:
- goto end;
- }
- }
- break;
- case 8:
- for (;; ++s) {
- /* check for overrun */
- if (v >= 0x2000000000000000ULL)
- break;
- switch (tolower(*s)) {
- case '0': v <<= 3; break;
- case '1': v <<= 3; v |= 1; break;
- case '2': v <<= 3; v |= 2; break;
- case '3': v <<= 3; v |= 3; break;
- case '4': v <<= 3; v |= 4; break;
- case '5': v <<= 3; v |= 5; break;
- case '6': v <<= 3; v |= 6; break;
- case '7': v <<= 3; v |= 7; break;
- default:
- goto end;
- }
- }
- break;
- case 10:
- for (;; ++s) {
- /* check for overrun */
- if (v > 0x1999999999999999ULL)
- break;
- switch (tolower(*s)) {
- case '0': v *= 10; break;
- case '1': v *= 10; v += 1; break;
- case '2': v *= 10; v += 2; break;
- case '3': v *= 10; v += 3; break;
- case '4': v *= 10; v += 4; break;
- case '5': v *= 10; v += 5; break;
- case '6': v *= 10; v += 6; break;
- case '7': v *= 10; v += 7; break;
- case '8': v *= 10; v += 8; break;
- case '9': v *= 10; v += 9; break;
- default:
- goto end;
- }
- }
- break;
- default:
- assert(0);
- break;
+ /* TODO hash */
+#if 0
+ const char *result = strset_insert(&stringset, concat);
+ if (result != concat) {
+ obstack_free(&symbol_obstack, concat);
}
-end:
- *endptr = s;
- return v;
+#else
+ const char *result = string;
+#endif
+ return (string_t) {result, len};
}
/**
*/
static void parse_number_hex(void)
{
- bool is_float = false;
- assert(c == 'x' || c == 'X');
- next_char();
+ bool is_float = false;
+ bool has_digits = false;
- obstack_1grow(&symbol_obstack, '0');
- obstack_1grow(&symbol_obstack, 'x');
-
- while(isxdigit(c)) {
+ assert(obstack_object_size(&symbol_obstack) == 0);
+ while (isxdigit(c)) {
+ has_digits = true;
obstack_1grow(&symbol_obstack, (char) c);
next_char();
}
if (c == '.') {
+ is_float = true;
obstack_1grow(&symbol_obstack, (char) c);
next_char();
while (isxdigit(c)) {
+ has_digits = true;
obstack_1grow(&symbol_obstack, (char) c);
next_char();
}
- is_float = true;
}
if (c == 'p' || c == 'P') {
+ is_float = true;
obstack_1grow(&symbol_obstack, (char) c);
next_char();
obstack_1grow(&symbol_obstack, (char) c);
next_char();
}
- is_float = true;
+ } else if (is_float) {
+ errorf(&lexer_token.source_position,
+ "hexadecimal floatingpoint constant requires an exponent");
}
- obstack_1grow(&symbol_obstack, '\0');
- char *string = obstack_finish(&symbol_obstack);
- if(*string == '\0') {
- parse_error("invalid hex number");
- lexer_token.type = T_ERROR;
- obstack_free(&symbol_obstack, string);
- return;
- }
+ size_t size = obstack_object_size(&symbol_obstack);
+ char *string = obstack_finish(&symbol_obstack);
+ lexer_token.literal = identify_string(string, size);
- if (is_float) {
- char *endptr;
- lexer_token.type = T_FLOATINGPOINT;
- lexer_token.v.floatvalue = strtold(string, &endptr);
-
- if(*endptr != '\0') {
- parse_error("invalid hex float literal");
- }
+ lexer_token.type =
+ is_float ? T_FLOATINGPOINT_HEXADECIMAL : T_INTEGER_HEXADECIMAL;
- parse_floating_suffix();
- } else {
- const char *endptr;
- lexer_token.type = T_INTEGER;
- lexer_token.v.intvalue = parse_int_string(string + 2, &endptr, 16);
- if(*endptr != '\0') {
- parse_error("hex number literal too long");
- }
- parse_integer_suffix(true);
+ if (!has_digits) {
+ errorf(&lexer_token.source_position, "invalid number literal '0x%S'",
+ &lexer_token.literal);
+ lexer_token.literal.begin = "0";
+ lexer_token.literal.size = 1;
}
- obstack_free(&symbol_obstack, string);
+ parse_number_suffix();
}
/**
*
* @param char the character to check
*/
-static inline bool is_octal_digit(utf32 chr)
+static bool is_octal_digit(utf32 chr)
{
- switch(chr) {
+ switch (chr) {
case '0':
case '1':
case '2':
}
/**
- * Parses a octal number and set the lexer_token.
+ * Parses a number and sets the lexer_token.
*/
-static void parse_number_oct(void)
+static void parse_number(void)
{
- while(is_octal_digit(c)) {
- obstack_1grow(&symbol_obstack, (char) c);
- next_char();
- }
- obstack_1grow(&symbol_obstack, '\0');
- char *string = obstack_finish(&symbol_obstack);
+ bool is_float = false;
+ bool has_digits = false;
- const char *endptr;
- lexer_token.type = T_INTEGER;
- lexer_token.v.intvalue = parse_int_string(string, &endptr, 8);
- if(*endptr != '\0') {
- parse_error("octal number literal too long");
+ assert(obstack_object_size(&symbol_obstack) == 0);
+ if (c == '0') {
+ next_char();
+ if (c == 'x' || c == 'X') {
+ next_char();
+ parse_number_hex();
+ return;
+ } else {
+ has_digits = true;
+ }
+ obstack_1grow(&symbol_obstack, '0');
}
- obstack_free(&symbol_obstack, string);
- parse_integer_suffix(true);
-}
-
-/**
- * Parses a decimal including float number and set the
- * lexer_token.
- */
-static void parse_number_dec(void)
-{
- bool is_float = false;
while (isdigit(c)) {
+ has_digits = true;
obstack_1grow(&symbol_obstack, (char) c);
next_char();
}
if (c == '.') {
+ is_float = true;
obstack_1grow(&symbol_obstack, '.');
next_char();
while (isdigit(c)) {
+ has_digits = true;
obstack_1grow(&symbol_obstack, (char) c);
next_char();
}
- is_float = true;
}
- if(c == 'e' || c == 'E') {
- obstack_1grow(&symbol_obstack, (char) c);
+ if (c == 'e' || c == 'E') {
+ is_float = true;
+ obstack_1grow(&symbol_obstack, 'e');
next_char();
- if(c == '-' || c == '+') {
+ if (c == '-' || c == '+') {
obstack_1grow(&symbol_obstack, (char) c);
next_char();
}
- while(isdigit(c)) {
+ while (isdigit(c)) {
obstack_1grow(&symbol_obstack, (char) c);
next_char();
}
- is_float = true;
}
- obstack_1grow(&symbol_obstack, '\0');
- char *string = obstack_finish(&symbol_obstack);
-
- if(is_float) {
- char *endptr;
- lexer_token.type = T_FLOATINGPOINT;
- lexer_token.v.floatvalue = strtold(string, &endptr);
+ size_t size = obstack_object_size(&symbol_obstack);
+ char *string = obstack_finish(&symbol_obstack);
+ lexer_token.literal = identify_string(string, size);
- if(*endptr != '\0') {
- parse_error("invalid number literal");
+ /* is it an octal number? */
+ if (is_float) {
+ lexer_token.type = T_FLOATINGPOINT;
+ } else if (string[0] == '0') {
+ lexer_token.type = T_INTEGER_OCTAL;
+
+ /* check for invalid octal digits */
+ for (size_t i= 0; i < size; ++i) {
+ char t = string[i];
+ if (t == '8' || t == '9')
+ errorf(&lexer_token.source_position,
+ "invalid digit '%c' in octal number", t);
}
-
- parse_floating_suffix();
} else {
- const char *endptr;
- lexer_token.type = T_INTEGER;
- lexer_token.v.intvalue = parse_int_string(string, &endptr, 10);
-
- if(*endptr != '\0') {
- parse_error("invalid number literal");
- }
-
- parse_integer_suffix(false);
+ lexer_token.type = T_INTEGER;
}
- obstack_free(&symbol_obstack, string);
-}
-/**
- * Parses a number and sets the lexer_token.
- */
-static void parse_number(void)
-{
- if (c == '0') {
- next_char();
- switch (c) {
- case 'X':
- case 'x':
- parse_number_hex();
- break;
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- parse_number_oct();
- break;
- case '8':
- case '9':
- next_char();
- parse_error("invalid octal number");
- lexer_token.type = T_ERROR;
- return;
- case '.':
- case 'e':
- case 'E':
- default:
- obstack_1grow(&symbol_obstack, '0');
- parse_number_dec();
- return;
- }
- } else {
- parse_number_dec();
+ if (!has_digits) {
+ errorf(&lexer_token.source_position, "invalid number literal '%S'",
+ &lexer_token.literal);
}
+
+ parse_number_suffix();
}
/**
static utf32 parse_hex_sequence(void)
{
utf32 value = 0;
- while(isxdigit(c)) {
+ while (isxdigit(c)) {
value = 16 * value + digit_value(c);
next_char();
}
case 'e':
if (c_mode & _GNUC)
return 27; /* hopefully 27 is ALWAYS the code for ESCAPE */
- /* FALLTHROUGH */
- default:
- /* §6.4.4.4:8 footnote 64 */
- parse_error("unknown escape sequence");
+ break;
+ case 'u':
+ case 'U':
+ parse_error("universal character parsing not implemented yet");
return EOF;
+ default:
+ break;
}
+ /* §6.4.4.4:8 footnote 64 */
+ parse_error("unknown escape sequence");
+ return EOF;
}
/**
memcpy(concat, s1->begin, len1);
memcpy(concat + len1, s2->begin, len2 + 1);
- if (warning.traditional) {
- warningf(&lexer_token.source_position,
- "traditional C rejects string constant concatenation");
- }
-#if 0 /* TODO hash */
- const char *result = strset_insert(&stringset, concat);
- if(result != concat) {
- obstack_free(&symbol_obstack, concat);
- }
-
- return result;
-#else
- return (string_t){ concat, len1 + len2 + 1 };
-#endif
-}
-
-/**
- * Concatenate a string and a wide string.
- */
-wide_string_t concat_string_wide_string(const string_t *const s1, const wide_string_t *const s2)
-{
- const size_t len1 = s1->size - 1;
- const size_t len2 = s2->size - 1;
-
- wchar_rep_t *const concat = obstack_alloc(&symbol_obstack, (len1 + len2 + 1) * sizeof(*concat));
- const char *const src = s1->begin;
- for (size_t i = 0; i != len1; ++i) {
- concat[i] = src[i];
- }
- memcpy(concat + len1, s2->begin, (len2 + 1) * sizeof(*concat));
- if (warning.traditional) {
- warningf(&lexer_token.source_position,
- "traditional C rejects string constant concatenation");
- }
-
- return (wide_string_t){ concat, len1 + len2 + 1 };
-}
-
-/**
- * Concatenate two wide strings.
- */
-wide_string_t concat_wide_strings(const wide_string_t *const s1, const wide_string_t *const s2)
-{
- const size_t len1 = s1->size - 1;
- const size_t len2 = s2->size - 1;
-
- wchar_rep_t *const concat = obstack_alloc(&symbol_obstack, (len1 + len2 + 1) * sizeof(*concat));
- memcpy(concat, s1->begin, len1 * sizeof(*concat));
- memcpy(concat + len1, s2->begin, (len2 + 1) * sizeof(*concat));
- if (warning.traditional) {
- warningf(&lexer_token.source_position,
- "traditional C rejects string constant concatenation");
- }
-
- return (wide_string_t){ concat, len1 + len2 + 1 };
+ return identify_string(concat, len1 + len2 + 1);
}
-/**
- * Concatenate a wide string and a string.
- */
-wide_string_t concat_wide_string_string(const wide_string_t *const s1, const string_t *const s2)
+string_t make_string(const char *string)
{
- const size_t len1 = s1->size - 1;
- const size_t len2 = s2->size - 1;
-
- wchar_rep_t *const concat = obstack_alloc(&symbol_obstack, (len1 + len2 + 1) * sizeof(*concat));
- memcpy(concat, s1->begin, len1 * sizeof(*concat));
- const char *const src = s2->begin;
- wchar_rep_t *const dst = concat + len1;
- for (size_t i = 0; i != len2 + 1; ++i) {
- dst[i] = src[i];
- }
- if (warning.traditional) {
- warningf(&lexer_token.source_position,
- "traditional C rejects string constant concatenation");
- }
+ size_t len = strlen(string) + 1;
+ char *const space = obstack_alloc(&symbol_obstack, len);
+ memcpy(space, string, len);
- return (wide_string_t){ concat, len1 + len2 + 1 };
+ return identify_string(space, len);
}
static void grow_symbol(utf32 const tc)
eat('"');
- while(1) {
- switch(c) {
+ while (true) {
+ switch (c) {
case '\\': {
utf32 const tc = parse_escape_sequence();
if (tc >= 0x100) {
/* add finishing 0 to the string */
obstack_1grow(&symbol_obstack, '\0');
- const size_t size = (size_t)obstack_object_size(&symbol_obstack);
- const char *const string = obstack_finish(&symbol_obstack);
+ const size_t size = (size_t)obstack_object_size(&symbol_obstack);
+ char *string = obstack_finish(&symbol_obstack);
-#if 0 /* TODO hash */
- /* check if there is already a copy of the string */
- result = strset_insert(&stringset, string);
- if(result != string) {
- obstack_free(&symbol_obstack, string);
- }
-#else
- const char *const result = string;
-#endif
-
- lexer_token.type = T_STRING_LITERAL;
- lexer_token.v.string.begin = result;
- lexer_token.v.string.size = size;
+ lexer_token.type = T_STRING_LITERAL;
+ lexer_token.literal = identify_string(string, size);
}
/**
eat('\'');
- while(1) {
- switch(c) {
+ while (true) {
+ switch (c) {
case '\\': {
- wchar_rep_t tc = parse_escape_sequence();
- obstack_grow(&symbol_obstack, &tc, sizeof(tc));
+ const utf32 tc = parse_escape_sequence();
+ grow_symbol(tc);
break;
}
return;
}
- default: {
- wchar_rep_t tc = (wchar_rep_t) c;
- obstack_grow(&symbol_obstack, &tc, sizeof(tc));
+ default:
+ grow_symbol(c);
next_char();
break;
}
- }
}
end_of_wide_char_constant:;
- size_t size = (size_t) obstack_object_size(&symbol_obstack);
- assert(size % sizeof(wchar_rep_t) == 0);
- size /= sizeof(wchar_rep_t);
+ size_t size = (size_t) obstack_object_size(&symbol_obstack);
+ char *string = obstack_finish(&symbol_obstack);
- const wchar_rep_t *string = obstack_finish(&symbol_obstack);
-
- lexer_token.type = T_WIDE_CHARACTER_CONSTANT;
- lexer_token.v.wide_string.begin = string;
- lexer_token.v.wide_string.size = size;
- lexer_token.datatype = type_wchar_t;
+ lexer_token.type = T_WIDE_CHARACTER_CONSTANT;
+ lexer_token.literal = identify_string(string, size);
}
/**
*/
static void parse_wide_string_literal(void)
{
- const unsigned start_linenr = lexer_token.source_position.linenr;
-
- assert(c == '"');
- next_char();
-
- while(1) {
- switch(c) {
- case '\\': {
- wchar_rep_t tc = parse_escape_sequence();
- obstack_grow(&symbol_obstack, &tc, sizeof(tc));
- break;
- }
-
- case EOF: {
- source_position_t source_position;
- source_position.input_name = lexer_token.source_position.input_name;
- source_position.linenr = start_linenr;
- errorf(&source_position, "string has no end");
- lexer_token.type = T_ERROR;
- return;
- }
-
- case '"':
- next_char();
- goto end_of_string;
-
- default: {
- wchar_rep_t tc = c;
- obstack_grow(&symbol_obstack, &tc, sizeof(tc));
- next_char();
- break;
- }
- }
- }
-
-end_of_string:;
-
- /* TODO: concatenate multiple strings separated by whitespace... */
-
- /* add finishing 0 to the string */
- wchar_rep_t nul = L'\0';
- obstack_grow(&symbol_obstack, &nul, sizeof(nul));
- const size_t size = (size_t)obstack_object_size(&symbol_obstack) / sizeof(wchar_rep_t);
- const wchar_rep_t *const string = obstack_finish(&symbol_obstack);
-
-#if 0 /* TODO hash */
- /* check if there is already a copy of the string */
- const wchar_rep_t *const result = strset_insert(&stringset, string);
- if(result != string) {
- obstack_free(&symbol_obstack, string);
- }
-#else
- const wchar_rep_t *const result = string;
-#endif
-
- lexer_token.type = T_WIDE_STRING_LITERAL;
- lexer_token.v.wide_string.begin = result;
- lexer_token.v.wide_string.size = size;
+ parse_string_literal();
+ if (lexer_token.type == T_STRING_LITERAL)
+ lexer_token.type = T_WIDE_STRING_LITERAL;
}
/**
eat('\'');
- while(1) {
- switch(c) {
+ while (true) {
+ switch (c) {
case '\\': {
utf32 const tc = parse_escape_sequence();
if (tc >= 0x100) {
}
end_of_char_constant:;
- const size_t size = (size_t)obstack_object_size(&symbol_obstack);
- const char *const string = obstack_finish(&symbol_obstack);
+ const size_t size = (size_t)obstack_object_size(&symbol_obstack);
+ char *const string = obstack_finish(&symbol_obstack);
- lexer_token.type = T_CHARACTER_CONSTANT;
- lexer_token.v.string.begin = string;
- lexer_token.v.string.size = size;
- lexer_token.datatype = c_mode & _CXX && size == 1 ? type_char : type_int;
+ lexer_token.type = T_CHARACTER_CONSTANT;
+ lexer_token.literal = identify_string(string, size);
}
/**
{
unsigned start_linenr = lexer_token.source_position.linenr;
- while(1) {
- switch(c) {
+ while (true) {
+ switch (c) {
case '/':
next_char();
if (c == '*') {
break;
case '*':
next_char();
- if(c == '/') {
+ if (c == '/') {
next_char();
return;
}
*/
static void skip_line_comment(void)
{
- while(1) {
- switch(c) {
+ while (true) {
+ switch (c) {
case EOF:
return;
*/
static void eat_until_newline(void)
{
- while(pp_token.type != '\n' && pp_token.type != T_EOF) {
+ while (pp_token.type != '\n' && pp_token.type != T_EOF) {
next_pp_token();
}
}
static void define_directive(void)
{
lexer_next_preprocessing_token();
- if(lexer_token.type != T_IDENTIFIER) {
+ if (lexer_token.type != T_IDENTIFIER) {
parse_error("expected identifier after #define\n");
eat_until_newline();
}
*/
static void parse_line_directive(void)
{
- if(pp_token.type != T_INTEGER) {
+ if (pp_token.type != T_INTEGER) {
parse_error("expected integer");
} else {
- lexer_token.source_position.linenr = (unsigned int)(pp_token.v.intvalue - 1);
+ lexer_token.source_position.linenr = atoi(pp_token.literal.begin);
next_pp_token();
}
- if(pp_token.type == T_STRING_LITERAL) {
- lexer_token.source_position.input_name = pp_token.v.string.begin;
+ if (pp_token.type == T_STRING_LITERAL) {
+ lexer_token.source_position.input_name = pp_token.literal.begin;
next_pp_token();
}
bool unknown_pragma = true;
next_pp_token();
- if (pp_token.v.symbol->pp_ID == TP_STDC) {
+ if (pp_token.symbol->pp_ID == TP_STDC) {
stdc_pragma_kind_t kind = STDC_UNKNOWN;
/* a STDC pragma */
if (c_mode & _C99) {
next_pp_token();
- switch (pp_token.v.symbol->pp_ID) {
+ switch (pp_token.symbol->pp_ID) {
case TP_FP_CONTRACT:
kind = STDC_FP_CONTRACT;
break;
if (kind != STDC_UNKNOWN) {
stdc_pragma_value_kind_t value = STDC_VALUE_UNKNOWN;
next_pp_token();
- switch (pp_token.v.symbol->pp_ID) {
+ switch (pp_token.symbol->pp_ID) {
case TP_ON:
value = STDC_VALUE_ON;
break;
static void parse_preprocessor_identifier(void)
{
assert(pp_token.type == T_IDENTIFIER);
- symbol_t *symbol = pp_token.v.symbol;
+ symbol_t *symbol = pp_token.symbol;
- switch(symbol->pp_ID) {
+ switch (symbol->pp_ID) {
case TP_include:
printf("include - enable header name parsing!\n");
break;
{
next_pp_token();
- switch(pp_token.type) {
+ switch (pp_token.type) {
case T_IDENTIFIER:
parse_preprocessor_identifier();
break;
#define MAYBE_PROLOG \
next_char(); \
- while(1) { \
- switch(c) {
+ while (true) { \
+ switch (c) {
#define MAYBE(ch, set_type) \
case ch: \
default: \
code \
} \
- } /* end of while(1) */ \
+ } /* end of while (true) */ \
break;
#define ELSE(set_type) \
void lexer_next_preprocessing_token(void)
{
- while(1) {
- switch(c) {
+ while (true) {
+ switch (c) {
case ' ':
case '\t':
next_char();
SYMBOL_CHARS
parse_symbol();
/* might be a wide string ( L"string" ) */
- if (lexer_token.v.symbol == symbol_L) {
+ if (lexer_token.symbol == symbol_L) {
switch (c) {
case '"': parse_wide_string_literal(); break;
case '\'': parse_wide_character_constant(); break;
DIGITS
put_back(c);
c = '.';
- parse_number_dec();
+ parse_number();
return;
case '.':
void lexer_open_stream(FILE *stream, const char *input_name);
void lexer_open_buffer(const char *buffer, size_t len, const char *input_name);
-string_t concat_strings( const string_t *s1, const string_t *s2);
-wide_string_t concat_string_wide_string(const string_t *s1, const wide_string_t *s2);
-wide_string_t concat_wide_strings( const wide_string_t *s1, const wide_string_t *s2);
-wide_string_t concat_wide_string_string(const wide_string_t *s1, const string_t *s2);
+string_t concat_strings(const string_t *s1, const string_t *s2);
+string_t make_string(const char *str);
#endif
[EXPR_INVALID] = sizeof(expression_base_t),
[EXPR_REFERENCE] = sizeof(reference_expression_t),
[EXPR_REFERENCE_ENUM_VALUE] = sizeof(reference_expression_t),
- [EXPR_CONST] = sizeof(const_expression_t),
- [EXPR_CHARACTER_CONSTANT] = sizeof(const_expression_t),
- [EXPR_WIDE_CHARACTER_CONSTANT] = sizeof(const_expression_t),
+ [EXPR_LITERAL_INTEGER] = sizeof(literal_expression_t),
+ [EXPR_LITERAL_INTEGER_OCTAL] = sizeof(literal_expression_t),
+ [EXPR_LITERAL_INTEGER_HEXADECIMAL]= sizeof(literal_expression_t),
+ [EXPR_LITERAL_FLOATINGPOINT] = sizeof(literal_expression_t),
+ [EXPR_LITERAL_FLOATINGPOINT_HEXADECIMAL] = sizeof(literal_expression_t),
+ [EXPR_LITERAL_CHARACTER] = sizeof(literal_expression_t),
+ [EXPR_LITERAL_WIDE_CHARACTER] = sizeof(literal_expression_t),
[EXPR_STRING_LITERAL] = sizeof(string_literal_expression_t),
- [EXPR_WIDE_STRING_LITERAL] = sizeof(wide_string_literal_expression_t),
+ [EXPR_WIDE_STRING_LITERAL] = sizeof(string_literal_expression_t),
[EXPR_COMPOUND_LITERAL] = sizeof(compound_literal_expression_t),
[EXPR_CALL] = sizeof(call_expression_t),
[EXPR_UNARY_FIRST] = sizeof(unary_expression_t),
return parse_sub_expression(PREC_ASSIGNMENT);
}
+static void warn_string_concat(const source_position_t *pos)
+{
+ if (warning.traditional) {
+ warningf(pos, "traditional C rejects string constant concatenation");
+ }
+}
+
static string_t parse_string_literals(void)
{
assert(token.type == T_STRING_LITERAL);
- string_t result = token.v.string;
+ string_t result = token.literal;
next_token();
while (token.type == T_STRING_LITERAL) {
- result = concat_strings(&result, &token.v.string);
+ warn_string_concat(&token.source_position);
+ result = concat_strings(&result, &token.literal);
next_token();
}
/* is it an identifier */
if (token.type == T_IDENTIFIER
&& (look_ahead(1)->type == ',' || look_ahead(1)->type == ')')) {
- symbol_t *symbol = token.v.symbol;
+ symbol_t *symbol = token.symbol;
argument->kind = ATTRIBUTE_ARGUMENT_SYMBOL;
argument->v.symbol = symbol;
next_token();
{
switch(token.type) {
case T_IDENTIFIER:
- return token.v.symbol;
+ return token.symbol;
case T_auto:
case T_char:
case T_double:
determine_lhs_ent(expr->va_starte.ap, lhs_ent);
return;
+ EXPR_LITERAL_CASES
case EXPR_UNKNOWN:
case EXPR_INVALID:
- case EXPR_CONST:
- case EXPR_CHARACTER_CONSTANT:
- case EXPR_WIDE_CHARACTER_CONSTANT:
case EXPR_STRING_LITERAL:
case EXPR_WIDE_STRING_LITERAL:
case EXPR_COMPOUND_LITERAL: // TODO init?
T_IDENTIFIER, NULL);
return NULL;
}
- designator->symbol = token.v.symbol;
+ designator->symbol = token.symbol;
next_token();
break;
default:
return NULL;
}
-static initializer_t *initializer_from_string(array_type_t *type,
+static initializer_t *initializer_from_string(array_type_t *const type,
const string_t *const string)
{
/* TODO: check len vs. size of array type */
}
static initializer_t *initializer_from_wide_string(array_type_t *const type,
- wide_string_t *const string)
+ const string_t *const string)
{
/* TODO: check len vs. size of array type */
(void) type;
type_t *type = skip_typeref(orig_type);
type_t *expr_type_orig = expression->base.type;
type_t *expr_type = skip_typeref(expr_type_orig);
+
if (is_type_array(type) && expr_type->kind == TYPE_POINTER) {
array_type_t *const array_type = &type->array;
type_t *const element_type = skip_typeref(array_type->element_type);
if (element_type->kind == TYPE_ATOMIC) {
atomic_type_kind_t akind = element_type->atomic.akind;
switch (expression->kind) {
- case EXPR_STRING_LITERAL:
- if (akind == ATOMIC_TYPE_CHAR
- || akind == ATOMIC_TYPE_SCHAR
- || akind == ATOMIC_TYPE_UCHAR) {
- return initializer_from_string(array_type,
- &expression->string.value);
- }
- break;
+ case EXPR_STRING_LITERAL:
+ if (akind == ATOMIC_TYPE_CHAR
+ || akind == ATOMIC_TYPE_SCHAR
+ || akind == ATOMIC_TYPE_UCHAR) {
+ return initializer_from_string(array_type,
+ &expression->string_literal.value);
+ }
+ break;
- case EXPR_WIDE_STRING_LITERAL: {
- type_t *bare_wchar_type = skip_typeref(type_wchar_t);
- if (get_unqualified_type(element_type) == bare_wchar_type) {
- return initializer_from_wide_string(array_type,
- &expression->wide_string.value);
- }
- break;
+ case EXPR_WIDE_STRING_LITERAL: {
+ type_t *bare_wchar_type = skip_typeref(type_wchar_t);
+ if (get_unqualified_type(element_type) == bare_wchar_type) {
+ return initializer_from_wide_string(array_type,
+ &expression->string_literal.value);
}
+ break;
+ }
- default:
- break;
+ default:
+ break;
}
}
}
/* GNU-style designator ("identifier: value") */
designator = allocate_ast_zero(sizeof(designator[0]));
designator->source_position = token.source_position;
- designator->symbol = token.v.symbol;
+ designator->symbol = token.symbol;
eat(T_IDENTIFIER);
eat(':');
return NULL;
}
+static expression_t *make_size_literal(size_t value)
+{
+ expression_t *literal = allocate_ast_zero(EXPR_LITERAL_INTEGER);
+ literal->base.type = type_size_t;
+
+ char buf[128];
+ snprintf(buf, sizeof(buf), "%u", (unsigned) value);
+ literal->literal.value = make_string(buf);
+
+ return literal;
+}
+
/**
* Parses an initializer. Parsers either a compound literal
* (env->declaration == NULL) or an initializer of a declaration.
internal_errorf(HERE, "invalid initializer type");
}
- expression_t *cnst = allocate_expression_zero(EXPR_CONST);
- cnst->base.type = type_size_t;
- cnst->conste.v.int_value = size;
-
type_t *new_type = duplicate_type(type);
- new_type->array.size_expression = cnst;
+ new_type->array.size_expression = make_size_literal(size);
new_type->array.size_constant = true;
new_type->array.has_implicit_size = true;
new_type->array.size = size;
entity_kind_tag_t const kind = is_struct ? ENTITY_STRUCT : ENTITY_UNION;
if (token.type == T_IDENTIFIER) {
/* the compound has a name, check if we have seen it already */
- symbol = token.v.symbol;
+ symbol = token.symbol;
next_token();
entity_t *entity = get_tag(symbol, kind);
entity_t *entity = allocate_entity_zero(ENTITY_ENUM_VALUE);
entity->enum_value.enum_type = enum_type;
- entity->base.symbol = token.v.symbol;
+ entity->base.symbol = token.symbol;
entity->base.source_position = token.source_position;
next_token();
eat(T_enum);
switch (token.type) {
case T_IDENTIFIER:
- symbol = token.v.symbol;
+ symbol = token.symbol;
next_token();
entity = get_tag(symbol, ENTITY_ENUM);
}
switch (token.type) {
case T_IDENTIFIER:
- if (is_typedef_symbol(token.v.symbol)) {
+ if (is_typedef_symbol(token.symbol)) {
type = parse_typename();
} else {
expression = parse_expression();
}
bool is_put;
- symbol_t *symbol = token.v.symbol;
+ symbol_t *symbol = token.symbol;
next_token();
if (strcmp(symbol->string, "put") == 0) {
is_put = true;
goto end_error;
}
if (is_put) {
- property->put_symbol = token.v.symbol;
+ property->put_symbol = token.symbol;
} else {
- property->get_symbol = token.v.symbol;
+ property->get_symbol = token.symbol;
}
next_token();
} while (next_if(','));
if (next_if(T_restrict)) {
kind = ATTRIBUTE_MS_RESTRICT;
} else if (token.type == T_IDENTIFIER) {
- const char *name = token.v.symbol->string;
+ const char *name = token.symbol->string;
next_token();
for (attribute_kind_t k = ATTRIBUTE_MS_FIRST; k <= ATTRIBUTE_MS_LAST;
++k) {
}
}
- type_t *const typedef_type = get_typedef_type(token.v.symbol);
+ type_t *const typedef_type = get_typedef_type(token.symbol);
if (typedef_type == NULL) {
/* Be somewhat resilient to typos like 'vodi f()' at the beginning of a
* declaration, so it doesn't generate 'implicit int' followed by more
errorf(HERE, "%K does not name a type", &token);
entity_t *entity =
- create_error_entity(token.v.symbol, ENTITY_TYPEDEF);
+ create_error_entity(token.symbol, ENTITY_TYPEDEF);
type = allocate_type_zero(TYPE_TYPEDEF);
type->typedeft.typedefe = &entity->typedefe;
entity_t *entity = allocate_entity_zero(ENTITY_PARAMETER);
entity->base.source_position = token.source_position;
entity->base.namespc = NAMESPACE_NORMAL;
- entity->base.symbol = token.v.symbol;
+ entity->base.symbol = token.symbol;
/* a K&R parameter has no type, yet */
next_token();
{
/* func(void) is not a parameter */
if (token.type == T_IDENTIFIER) {
- entity_t const *const entity = get_entity(token.v.symbol, NAMESPACE_NORMAL);
+ entity_t const *const entity = get_entity(token.symbol, NAMESPACE_NORMAL);
if (entity == NULL)
return true;
if (entity->kind != ENTITY_TYPEDEF)
int saved_comma_state = save_and_reset_anchor_state(',');
if (token.type == T_IDENTIFIER &&
- !is_typedef_symbol(token.v.symbol)) {
+ !is_typedef_symbol(token.symbol)) {
token_type_t la1_type = (token_type_t)look_ahead(1)->type;
if (la1_type == ',' || la1_type == ')') {
type->kr_style_parameters = true;
if (env->must_be_abstract) {
errorf(HERE, "no identifier expected in typename");
} else {
- env->symbol = token.v.symbol;
+ env->symbol = token.symbol;
env->source_position = token.source_position;
}
next_token();
TYPE_QUALIFIERS
return true;
case T_IDENTIFIER:
- return is_typedef_symbol(token->v.symbol);
+ return is_typedef_symbol(token->symbol);
case T___extension__:
STORAGE_CLASSES
case EXPR_REFERENCE:
case EXPR_REFERENCE_ENUM_VALUE:
- case EXPR_CONST:
- case EXPR_CHARACTER_CONSTANT:
- case EXPR_WIDE_CHARACTER_CONSTANT:
+ EXPR_LITERAL_CASES
case EXPR_STRING_LITERAL:
case EXPR_WIDE_STRING_LITERAL:
case EXPR_COMPOUND_LITERAL: // TODO descend into initialisers
return create_invalid_expression();
}
+static type_t *get_string_type(void)
+{
+ return warning.write_strings ? type_const_char_ptr : type_char_ptr;
+}
+
+static type_t *get_wide_string_type(void)
+{
+ return warning.write_strings ? type_const_wchar_t_ptr : type_wchar_t_ptr;
+}
+
/**
* Parse a string constant.
*/
-static expression_t *parse_string_const(void)
+static expression_t *parse_string_literal(void)
{
- wide_string_t wres;
- if (token.type == T_STRING_LITERAL) {
- string_t res = token.v.string;
+ source_position_t begin = token.source_position;
+ string_t res = token.literal;
+ bool is_wide = (token.type == T_WIDE_STRING_LITERAL);
+
+ next_token();
+ while (token.type == T_STRING_LITERAL
+ || token.type == T_WIDE_STRING_LITERAL) {
+ warn_string_concat(&token.source_position);
+ res = concat_strings(&res, &token.literal);
next_token();
- while (token.type == T_STRING_LITERAL) {
- res = concat_strings(&res, &token.v.string);
- next_token();
- }
- if (token.type != T_WIDE_STRING_LITERAL) {
- expression_t *const cnst = allocate_expression_zero(EXPR_STRING_LITERAL);
- /* note: that we use type_char_ptr here, which is already the
- * automatic converted type. revert_automatic_type_conversion
- * will construct the array type */
- cnst->base.type = warning.write_strings ? type_const_char_ptr : type_char_ptr;
- cnst->string.value = res;
- return cnst;
- }
+ is_wide |= token.type == T_WIDE_STRING_LITERAL;
+ }
- wres = concat_string_wide_string(&res, &token.v.wide_string);
+ expression_t *literal;
+ if (is_wide) {
+ literal = allocate_expression_zero(EXPR_WIDE_STRING_LITERAL);
+ literal->base.type = get_wide_string_type();
} else {
- wres = token.v.wide_string;
+ literal = allocate_expression_zero(EXPR_STRING_LITERAL);
+ literal->base.type = get_string_type();
}
+ literal->base.source_position = begin;
+ literal->literal.value = res;
+
+ return literal;
+}
+
+/**
+ * Parse a boolean constant.
+ */
+static expression_t *parse_boolean_literal(bool value)
+{
+ expression_t *literal = allocate_expression_zero(EXPR_LITERAL_BOOLEAN);
+ literal->base.source_position = token.source_position;
+ literal->base.type = type_bool;
+ literal->literal.value.begin = value ? "true" : "false";
+ literal->literal.value.size = value ? 4 : 5;
+
next_token();
+ return literal;
+}
- for (;;) {
- switch (token.type) {
- case T_WIDE_STRING_LITERAL:
- wres = concat_wide_strings(&wres, &token.v.wide_string);
- break;
+static void warn_traditional_suffix(void)
+{
+ if (!warning.traditional)
+ return;
+ warningf(&token.source_position, "traditional C rejects the '%Y' suffix",
+ token.symbol);
+}
- case T_STRING_LITERAL:
- wres = concat_wide_string_string(&wres, &token.v.string);
- break;
+static void check_integer_suffix(void)
+{
+ symbol_t *suffix = token.symbol;
+ if (suffix == NULL)
+ return;
- default: {
- expression_t *const cnst = allocate_expression_zero(EXPR_WIDE_STRING_LITERAL);
- cnst->base.type = warning.write_strings ? type_const_wchar_t_ptr : type_wchar_t_ptr;
- cnst->wide_string.value = wres;
- return cnst;
+ bool not_traditional = false;
+ const char *c = suffix->string;
+ if (*c == 'l' || *c == 'L') {
+ ++c;
+ if (*c == *(c-1)) {
+ not_traditional = true;
+ ++c;
+ if (*c == 'u' || *c == 'U') {
+ ++c;
+ }
+ } else if (*c == 'u' || *c == 'U') {
+ not_traditional = true;
+ ++c;
+ }
+ } else if (*c == 'u' || *c == 'U') {
+ not_traditional = true;
+ ++c;
+ if (*c == 'l' || *c == 'L') {
+ ++c;
+ if (*c == *(c-1)) {
+ ++c;
}
}
- next_token();
+ }
+ if (*c != '\0') {
+ errorf(&token.source_position,
+ "invalid suffix '%s' on integer constant", suffix->string);
+ } else if (not_traditional) {
+ warn_traditional_suffix();
}
}
-/**
- * Parse a boolean constant.
- */
-static expression_t *parse_bool_const(bool value)
+static type_t *check_floatingpoint_suffix(void)
{
- expression_t *cnst = allocate_expression_zero(EXPR_CONST);
- cnst->base.type = type_bool;
- cnst->conste.v.int_value = value;
+ symbol_t *suffix = token.symbol;
+ type_t *type = type_double;
+ if (suffix == NULL)
+ return type;
- next_token();
+ bool not_traditional = false;
+ const char *c = suffix->string;
+ if (*c == 'f' || *c == 'F') {
+ ++c;
+ type = type_float;
+ } else if (*c == 'l' || *c == 'L') {
+ ++c;
+ type = type_long_double;
+ }
+ if (*c != '\0') {
+ errorf(&token.source_position,
+ "invalid suffix '%s' on floatingpoint constant", suffix->string);
+ } else if (not_traditional) {
+ warn_traditional_suffix();
+ }
- return cnst;
+ return type;
}
/**
* Parse an integer constant.
*/
-static expression_t *parse_int_const(void)
+static expression_t *parse_number_literal(void)
{
- expression_t *cnst = allocate_expression_zero(EXPR_CONST);
- cnst->base.type = token.datatype;
- cnst->conste.v.int_value = token.v.intvalue;
+ expression_kind_t kind;
+ type_t *type;
+
+ switch (token.type) {
+ case T_INTEGER:
+ kind = EXPR_LITERAL_INTEGER;
+ check_integer_suffix();
+ break;
+ case T_INTEGER_OCTAL:
+ kind = EXPR_LITERAL_INTEGER_OCTAL;
+ check_integer_suffix();
+ break;
+ case T_INTEGER_HEXADECIMAL:
+ kind = EXPR_LITERAL_INTEGER_HEXADECIMAL;
+ check_integer_suffix();
+ break;
+ case T_FLOATINGPOINT:
+ kind = EXPR_LITERAL_FLOATINGPOINT;
+ type = check_floatingpoint_suffix();
+ break;
+ case T_FLOATINGPOINT_HEXADECIMAL:
+ kind = EXPR_LITERAL_FLOATINGPOINT_HEXADECIMAL;
+ type = check_floatingpoint_suffix();
+ break;
+ default:
+ panic("unexpected token type in parse_number_literal");
+ }
+ expression_t *literal = allocate_expression_zero(kind);
+ literal->base.source_position = token.source_position;
+ literal->base.type = type;
+ literal->literal.value = token.literal;
+ literal->literal.suffix = token.symbol;
next_token();
- return cnst;
+ /* integer type depends on the size of the number and the size
+ * representable by the types. The backend/codegeneration has to determine
+ * that
+ */
+ determine_literal_type(&literal->literal);
+ return literal;
}
/**
*/
static expression_t *parse_character_constant(void)
{
- expression_t *cnst = allocate_expression_zero(EXPR_CHARACTER_CONSTANT);
- cnst->base.type = token.datatype;
- cnst->conste.v.character = token.v.string;
+ expression_t *literal = allocate_expression_zero(EXPR_LITERAL_CHARACTER);
+ literal->base.source_position = token.source_position;
+ literal->base.type = c_mode & _CXX ? type_char : type_int;
+ literal->literal.value = token.literal;
- if (cnst->conste.v.character.size != 1) {
- if (!GNU_MODE) {
+ size_t len = literal->literal.value.size;
+ if (len != 1) {
+ if (!GNU_MODE && !(c_mode & _C99)) {
errorf(HERE, "more than 1 character in character constant");
} else if (warning.multichar) {
+ literal->base.type = type_int;
warningf(HERE, "multi-character character constant");
}
}
- next_token();
- return cnst;
+ next_token();
+ return literal;
}
/**
*/
static expression_t *parse_wide_character_constant(void)
{
- expression_t *cnst = allocate_expression_zero(EXPR_WIDE_CHARACTER_CONSTANT);
- cnst->base.type = token.datatype;
- cnst->conste.v.wide_character = token.v.wide_string;
+ expression_t *literal = allocate_expression_zero(EXPR_LITERAL_WIDE_CHARACTER);
+ literal->base.source_position = token.source_position;
+ literal->base.type = type_int;
+ literal->literal.value = token.literal;
- if (cnst->conste.v.wide_character.size != 1) {
- if (!GNU_MODE) {
- errorf(HERE, "more than 1 character in character constant");
- } else if (warning.multichar) {
- warningf(HERE, "multi-character character constant");
- }
+ size_t len = wstrlen(&literal->literal.value);
+ if (len != 1) {
+ warningf(HERE, "multi-character character constant");
}
- next_token();
-
- return cnst;
-}
-
-/**
- * Parse a float constant.
- */
-static expression_t *parse_float_const(void)
-{
- expression_t *cnst = allocate_expression_zero(EXPR_CONST);
- cnst->base.type = token.datatype;
- cnst->conste.v.float_value = token.v.floatvalue;
next_token();
-
- return cnst;
+ return literal;
}
static entity_t *create_implicit_function(symbol_t *symbol,
type_t *revert_automatic_type_conversion(const expression_t *expression)
{
switch (expression->kind) {
- case EXPR_REFERENCE: {
- entity_t *entity = expression->reference.entity;
- if (is_declaration(entity)) {
- return entity->declaration.type;
- } else if (entity->kind == ENTITY_ENUM_VALUE) {
- return entity->enum_value.enum_type;
- } else {
- panic("no declaration or enum in reference");
- }
+ case EXPR_REFERENCE: {
+ entity_t *entity = expression->reference.entity;
+ if (is_declaration(entity)) {
+ return entity->declaration.type;
+ } else if (entity->kind == ENTITY_ENUM_VALUE) {
+ return entity->enum_value.enum_type;
+ } else {
+ panic("no declaration or enum in reference");
}
+ }
- case EXPR_SELECT: {
- entity_t *entity = expression->select.compound_entry;
- assert(is_declaration(entity));
- type_t *type = entity->declaration.type;
- return get_qualified_type(type,
- expression->base.type->base.qualifiers);
- }
+ case EXPR_SELECT: {
+ entity_t *entity = expression->select.compound_entry;
+ assert(is_declaration(entity));
+ type_t *type = entity->declaration.type;
+ return get_qualified_type(type,
+ expression->base.type->base.qualifiers);
+ }
- case EXPR_UNARY_DEREFERENCE: {
- const expression_t *const value = expression->unary.value;
- type_t *const type = skip_typeref(value->base.type);
- if (!is_type_pointer(type))
- return type_error_type;
- return type->pointer.points_to;
- }
+ case EXPR_UNARY_DEREFERENCE: {
+ const expression_t *const value = expression->unary.value;
+ type_t *const type = skip_typeref(value->base.type);
+ if (!is_type_pointer(type))
+ return type_error_type;
+ return type->pointer.points_to;
+ }
- case EXPR_ARRAY_ACCESS: {
- const expression_t *array_ref = expression->array_access.array_ref;
- type_t *type_left = skip_typeref(array_ref->base.type);
- if (!is_type_pointer(type_left))
- return type_error_type;
- return type_left->pointer.points_to;
- }
+ case EXPR_ARRAY_ACCESS: {
+ const expression_t *array_ref = expression->array_access.array_ref;
+ type_t *type_left = skip_typeref(array_ref->base.type);
+ if (!is_type_pointer(type_left))
+ return type_error_type;
+ return type_left->pointer.points_to;
+ }
- case EXPR_STRING_LITERAL: {
- size_t size = expression->string.value.size;
- return make_array_type(type_char, size, TYPE_QUALIFIER_NONE);
- }
+ case EXPR_STRING_LITERAL: {
+ size_t size = expression->string_literal.value.size;
+ return make_array_type(type_char, size, TYPE_QUALIFIER_NONE);
+ }
- case EXPR_WIDE_STRING_LITERAL: {
- size_t size = expression->wide_string.value.size;
- return make_array_type(type_wchar_t, size, TYPE_QUALIFIER_NONE);
- }
+ case EXPR_WIDE_STRING_LITERAL: {
+ size_t size = wstrlen(&expression->string_literal.value);
+ return make_array_type(type_wchar_t, size, TYPE_QUALIFIER_NONE);
+ }
- case EXPR_COMPOUND_LITERAL:
- return expression->compound_literal.type;
+ case EXPR_COMPOUND_LITERAL:
+ return expression->compound_literal.type;
- default:
- return expression->base.type;
+ default:
+ break;
}
+ return expression->base.type;
}
/**
parse_error_expected("while parsing identifier", T_IDENTIFIER, NULL);
return create_error_entity(sym_anonymous, ENTITY_VARIABLE);
}
- symbol = token.v.symbol;
+ symbol = token.symbol;
pos = *HERE;
next_token();
TYPE_SPECIFIERS
return parse_cast();
case T_IDENTIFIER:
- if (is_typedef_symbol(token.v.symbol)) {
+ if (is_typedef_symbol(token.symbol)) {
return parse_cast();
}
}
T_IDENTIFIER, NULL);
return NULL;
}
- result->symbol = token.v.symbol;
+ result->symbol = token.symbol;
next_token();
designator_t *last_designator = result;
}
designator_t *designator = allocate_ast_zero(sizeof(result[0]));
designator->source_position = *HERE;
- designator->symbol = token.v.symbol;
+ designator->symbol = token.symbol;
next_token();
last_designator->next = designator;
parse_error_expected("while parsing label address", T_IDENTIFIER, NULL);
goto end_error;
}
- symbol_t *symbol = token.v.symbol;
+ symbol_t *symbol = token.symbol;
next_token();
label_t *label = get_label(symbol);
static expression_t *parse_noop_expression(void)
{
/* the result is a (int)0 */
- expression_t *cnst = allocate_expression_zero(EXPR_CONST);
- cnst->base.type = type_int;
- cnst->conste.v.int_value = 0;
- cnst->conste.is_ms_noop = true;
+ expression_t *literal = allocate_expression_zero(EXPR_LITERAL_MS_NOOP);
+ literal->base.type = type_int;
+ literal->base.source_position = token.source_position;
+ literal->literal.value.begin = "__noop";
+ literal->literal.value.size = 6;
eat(T___noop);
expect(')', end_error);
end_error:
- return cnst;
+ return literal;
}
/**
static expression_t *parse_primary_expression(void)
{
switch (token.type) {
- case T_false: return parse_bool_const(false);
- case T_true: return parse_bool_const(true);
- case T_INTEGER: return parse_int_const();
- case T_CHARACTER_CONSTANT: return parse_character_constant();
- case T_WIDE_CHARACTER_CONSTANT: return parse_wide_character_constant();
- case T_FLOATINGPOINT: return parse_float_const();
- case T_STRING_LITERAL:
- case T_WIDE_STRING_LITERAL: return parse_string_const();
- case T___FUNCTION__:
- case T___func__: return parse_function_keyword();
- case T___PRETTY_FUNCTION__: return parse_pretty_function_keyword();
- case T___FUNCSIG__: return parse_funcsig_keyword();
- case T___FUNCDNAME__: return parse_funcdname_keyword();
- case T___builtin_offsetof: return parse_offsetof();
- case T___builtin_va_start: return parse_va_start();
- case T___builtin_va_arg: return parse_va_arg();
- case T___builtin_va_copy: return parse_va_copy();
- case T___builtin_isgreater:
- case T___builtin_isgreaterequal:
- case T___builtin_isless:
- case T___builtin_islessequal:
- case T___builtin_islessgreater:
- case T___builtin_isunordered: return parse_compare_builtin();
- case T___builtin_constant_p: return parse_builtin_constant();
- case T___builtin_types_compatible_p: return parse_builtin_types_compatible();
- case T__assume: return parse_assume();
- case T_ANDAND:
- if (GNU_MODE)
- return parse_label_address();
- break;
+ case T_false: return parse_boolean_literal(false);
+ case T_true: return parse_boolean_literal(true);
+ case T_INTEGER:
+ case T_INTEGER_OCTAL:
+ case T_INTEGER_HEXADECIMAL:
+ case T_FLOATINGPOINT:
+ case T_FLOATINGPOINT_HEXADECIMAL: return parse_number_literal();
+ case T_CHARACTER_CONSTANT: return parse_character_constant();
+ case T_WIDE_CHARACTER_CONSTANT: return parse_wide_character_constant();
+ case T_STRING_LITERAL:
+ case T_WIDE_STRING_LITERAL: return parse_string_literal();
+ case T___FUNCTION__:
+ case T___func__: return parse_function_keyword();
+ case T___PRETTY_FUNCTION__: return parse_pretty_function_keyword();
+ case T___FUNCSIG__: return parse_funcsig_keyword();
+ case T___FUNCDNAME__: return parse_funcdname_keyword();
+ case T___builtin_offsetof: return parse_offsetof();
+ case T___builtin_va_start: return parse_va_start();
+ case T___builtin_va_arg: return parse_va_arg();
+ case T___builtin_va_copy: return parse_va_copy();
+ case T___builtin_isgreater:
+ case T___builtin_isgreaterequal:
+ case T___builtin_isless:
+ case T___builtin_islessequal:
+ case T___builtin_islessgreater:
+ case T___builtin_isunordered: return parse_compare_builtin();
+ case T___builtin_constant_p: return parse_builtin_constant();
+ case T___builtin_types_compatible_p: return parse_builtin_types_compatible();
+ case T__assume: return parse_assume();
+ case T_ANDAND:
+ if (GNU_MODE)
+ return parse_label_address();
+ break;
- case '(': return parse_parenthesized_expression();
- case T___noop: return parse_noop_expression();
+ case '(': return parse_parenthesized_expression();
+ case T___noop: return parse_noop_expression();
- /* Gracefully handle type names while parsing expressions. */
- case T_COLONCOLON:
+ /* Gracefully handle type names while parsing expressions. */
+ case T_COLONCOLON:
+ return parse_reference();
+ case T_IDENTIFIER:
+ if (!is_typedef_symbol(token.symbol)) {
return parse_reference();
- case T_IDENTIFIER:
- if (!is_typedef_symbol(token.v.symbol)) {
- return parse_reference();
- }
- /* FALLTHROUGH */
- TYPENAME_START {
- source_position_t const pos = *HERE;
- type_t const *const type = parse_typename();
- errorf(&pos, "encountered type '%T' while parsing expression", type);
- return create_invalid_expression();
}
+ /* FALLTHROUGH */
+ TYPENAME_START {
+ source_position_t const pos = *HERE;
+ type_t const *const type = parse_typename();
+ errorf(&pos, "encountered type '%T' while parsing expression", type);
+ return create_invalid_expression();
+ }
}
errorf(HERE, "unexpected token %K, expected an expression", &token);
parse_error_expected("while parsing select", T_IDENTIFIER, NULL);
return create_invalid_expression();
}
- symbol_t *symbol = token.v.symbol;
+ symbol_t *symbol = token.symbol;
next_token();
type_t *const orig_type = addr->base.type;
expr = expr->unary.value;
}
- if (expr->kind == EXPR_STRING_LITERAL ||
- expr->kind == EXPR_WIDE_STRING_LITERAL) {
+ if (expr->kind == EXPR_STRING_LITERAL
+ || expr->kind == EXPR_WIDE_STRING_LITERAL) {
warningf(&expr->base.source_position,
"comparison with string literal results in unspecified behaviour");
}
case EXPR_INVALID: return true; /* do NOT warn */
case EXPR_REFERENCE: return false;
case EXPR_REFERENCE_ENUM_VALUE: return false;
+ case EXPR_LABEL_ADDRESS: return false;
+
/* suppress the warning for microsoft __noop operations */
- case EXPR_CONST: return expr->conste.is_ms_noop;
- case EXPR_CHARACTER_CONSTANT: return false;
- case EXPR_WIDE_CHARACTER_CONSTANT: return false;
+ case EXPR_LITERAL_MS_NOOP: return true;
+ case EXPR_LITERAL_BOOLEAN:
+ case EXPR_LITERAL_CHARACTER:
+ case EXPR_LITERAL_WIDE_CHARACTER:
+ case EXPR_LITERAL_INTEGER:
+ case EXPR_LITERAL_INTEGER_OCTAL:
+ case EXPR_LITERAL_INTEGER_HEXADECIMAL:
+ case EXPR_LITERAL_FLOATINGPOINT:
+ case EXPR_LITERAL_FLOATINGPOINT_HEXADECIMAL: return false;
case EXPR_STRING_LITERAL: return false;
case EXPR_WIDE_STRING_LITERAL: return false;
- case EXPR_LABEL_ADDRESS: return false;
case EXPR_CALL: {
const call_expression_t *const call = &expr->call;
T_IDENTIFIER, NULL);
return NULL;
}
- argument->symbol = token.v.symbol;
+ argument->symbol = token.symbol;
expect(']', end_error);
}
static statement_t *parse_label_statement(void)
{
assert(token.type == T_IDENTIFIER);
- symbol_t *symbol = token.v.symbol;
+ symbol_t *symbol = token.symbol;
label_t *label = get_label(symbol);
statement_t *const statement = allocate_statement_zero(STATEMENT_LABEL);
statement->gotos.expression = expression;
} else if (token.type == T_IDENTIFIER) {
- symbol_t *symbol = token.v.symbol;
+ symbol_t *symbol = token.symbol;
next_token();
statement->gotos.label = get_label(symbol);
} else {
T_IDENTIFIER, NULL);
goto end_error;
}
- symbol_t *symbol = token.v.symbol;
+ symbol_t *symbol = token.symbol;
entity_t *entity = get_entity(symbol, NAMESPACE_LABEL);
if (entity != NULL && entity->base.parent_scope == current_scope) {
errorf(HERE, "multiple definitions of '__label__ %Y' (previous definition %P)",
symbol_t *symbol = NULL;
if (token.type == T_IDENTIFIER) {
- symbol = token.v.symbol;
+ symbol = token.symbol;
next_token();
entity = get_entity(symbol, NAMESPACE_NORMAL);
token_type_t la1_type = (token_type_t)look_ahead(1)->type;
if (la1_type == ':') {
statement = parse_label_statement();
- } else if (is_typedef_symbol(token.v.symbol)) {
+ } else if (is_typedef_symbol(token.symbol)) {
statement = parse_declaration_statement();
} else {
/* it's an identifier, the grammar says this must be an
switch (la1_type) {
case '&':
case '*':
- if (get_entity(token.v.symbol, NAMESPACE_NORMAL) != NULL)
+ if (get_entity(token.symbol, NAMESPACE_NORMAL) != NULL)
goto expression_statment;
/* FALLTHROUGH */
const char *const result = string;
#endif
- pp_token.type = TP_STRING_LITERAL;
- pp_token.v.string.begin = result;
- pp_token.v.string.size = size;
+ pp_token.type = TP_STRING_LITERAL;
+ pp_token.literal.begin = result;
+ pp_token.literal.size = size;
}
static void parse_wide_character_constant(void)
/* TODO... */
}
-static void parse_wide_string_literal(void)
-{
- const unsigned start_linenr = input.position.linenr;
-
- assert(CC == '"');
- next_char();
-
- while(1) {
- switch(CC) {
- case '\\': {
- wchar_rep_t tc = parse_escape_sequence();
- obstack_grow(&symbol_obstack, &tc, sizeof(tc));
- break;
- }
-
- case EOF: {
- source_position_t source_position;
- source_position.input_name = pp_token.source_position.input_name;
- source_position.linenr = start_linenr;
- errorf(&source_position, "string has no end");
- pp_token.type = TP_ERROR;
- return;
- }
-
- case '"':
- next_char();
- goto end_of_string;
-
- default: {
- wchar_rep_t tc = CC;
- obstack_grow(&symbol_obstack, &tc, sizeof(tc));
- next_char();
- break;
- }
- }
- }
-
-end_of_string:;
- /* add finishing 0 to the string */
- static const wchar_rep_t nul = L'\0';
- obstack_grow(&symbol_obstack, &nul, sizeof(nul));
-
- const size_t size
- = (size_t)obstack_object_size(&symbol_obstack) / sizeof(wchar_rep_t);
- const wchar_rep_t *const string = obstack_finish(&symbol_obstack);
-
-#if 0 /* TODO hash */
- /* check if there is already a copy of the string */
- const wchar_rep_t *const result = strset_insert(&stringset, string);
- if(result != string) {
- obstack_free(&symbol_obstack, string);
- }
-#else
- const wchar_rep_t *const result = string;
-#endif
-
- pp_token.type = TP_WIDE_STRING_LITERAL;
- pp_token.v.wide_string.begin = result;
- pp_token.v.wide_string.size = size;
-}
-
static void parse_character_constant(void)
{
const unsigned start_linenr = input.position.linenr;
const size_t size = (size_t)obstack_object_size(&symbol_obstack);
const char *const string = obstack_finish(&symbol_obstack);
- pp_token.type = TP_CHARACTER_CONSTANT;
- pp_token.v.string.begin = string;
- pp_token.v.string.size = size;
+ pp_token.type = TP_CHARACTER_CONSTANT;
+ pp_token.literal.begin = string;
+ pp_token.literal.size = size;
}
#define SYMBOL_CHARS_WITHOUT_E_P \
return;
/* if it was an identifier then we might need to expand again */
- pp_definition_t *symbol_definition = pp_token.v.symbol->pp_definition;
+ pp_definition_t *symbol_definition = pp_token.symbol->pp_definition;
if(symbol_definition != NULL && !symbol_definition->is_expanding) {
symbol_definition->parent_expansion = definition;
symbol_definition->expand_pos = 0;
/* might be a wide string or character constant ( L"string"/L'c' ) */
if (CC == '"' && string[0] == 'L' && string[1] == '\0') {
obstack_free(&symbol_obstack, string);
- parse_wide_string_literal();
+ /* TODO */
return;
} else if (CC == '\'' && string[0] == 'L' && string[1] == '\0') {
obstack_free(&symbol_obstack, string);
symbol_t *symbol = symbol_table_insert(string);
- pp_token.type = symbol->pp_ID;
- pp_token.v.symbol = symbol;
+ pp_token.type = symbol->pp_ID;
+ pp_token.symbol = symbol;
/* we can free the memory from symbol obstack if we already had an entry in
* the symbol table */
size_t size = obstack_object_size(&symbol_obstack);
char *string = obstack_finish(&symbol_obstack);
- pp_token.type = TP_NUMBER;
- pp_token.v.string.begin = string;
- pp_token.v.string.size = size;
+ pp_token.type = TP_NUMBER;
+ pp_token.literal.begin = string;
+ pp_token.literal.size = size;
}
-
#define MAYBE_PROLOG \
next_char(); \
while(1) { \
switch(pp_token.type) {
case TP_IDENTIFIER:
- fputs(pp_token.v.symbol->string, out);
+ fputs(pp_token.symbol->string, out);
break;
case TP_NUMBER:
- fputs(pp_token.v.string.begin, out);
+ fputs(pp_token.literal.begin, out);
break;
case TP_STRING_LITERAL:
fputc('"', out);
- fputs(pp_token.v.string.begin, out);
+ fputs(pp_token.literal.begin, out);
fputc('"', out);
break;
case '\n':
return true;
}
-static bool wide_strings_equal(const wide_string_t *string1,
- const wide_string_t *string2)
-{
- size_t size = string1->size;
- if(size != string2->size)
- return false;
-
- const wchar_rep_t *c1 = string1->begin;
- const wchar_rep_t *c2 = string2->begin;
- for(size_t i = 0; i < size; ++i, ++c1, ++c2) {
- if(*c1 != *c2)
- return false;
- }
- return true;
-}
-
static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
{
if(token1->type != token2->type)
/* TODO */
return false;
case TP_IDENTIFIER:
- return token1->v.symbol == token2->v.symbol;
+ return token1->symbol == token2->symbol;
case TP_NUMBER:
case TP_CHARACTER_CONSTANT:
case TP_STRING_LITERAL:
- return strings_equal(&token1->v.string, &token2->v.string);
+ return strings_equal(&token1->literal, &token2->literal);
- case TP_WIDE_CHARACTER_CONSTANT:
- case TP_WIDE_STRING_LITERAL:
- return wide_strings_equal(&token1->v.wide_string,
- &token2->v.wide_string);
default:
return true;
}
"expected identifier after #define, got '%t'", &pp_token);
goto error_out;
}
- symbol_t *symbol = pp_token.v.symbol;
+ symbol_t *symbol = pp_token.symbol;
pp_definition_t *new_definition
= obstack_alloc(&pp_obstack, sizeof(new_definition[0]));
}
break;
case TP_IDENTIFIER:
- obstack_ptr_grow(&pp_obstack, pp_token.v.symbol);
+ obstack_ptr_grow(&pp_obstack, pp_token.symbol);
next_preprocessing_token();
if (pp_token.type == ',') {
return;
}
- symbol_t *symbol = pp_token.v.symbol;
+ symbol_t *symbol = pp_token.symbol;
symbol->pp_definition = NULL;
next_preprocessing_token();
/* just take the true case in the hope to avoid further errors */
condition = true;
} else {
- symbol_t *symbol = pp_token.v.symbol;
+ symbol_t *symbol = pp_token.symbol;
pp_definition_t *pp_definition = symbol->pp_definition;
next_preprocessing_token();
static FILE* out;
+static void print_char_file(const char c)
+{
+ fputc(c, out);
+}
+
static void print_string_file(const char *str)
{
fputs(str, out);
vfprintf(out, format, ap);
}
-static void print_char_file(wchar_rep_t c)
-{
- const unsigned tc = (unsigned) c;
- if (tc < 0x80) {
- fputc(tc, out);
- } else if (tc < 0x800) {
- fputc(0xC0 | (tc >> 6), out);
- fputc(0x80 | (tc & 0x3F), out);
- } else if (tc < 0x10000) {
- fputc(0xE0 | ( tc >> 12), out);
- fputc(0x80 | ((tc >> 6) & 0x3F), out);
- fputc(0x80 | ( tc & 0x3F), out);
- } else {
- fputc(0xF0 | ( tc >> 18), out);
- fputc(0x80 | ((tc >> 12) & 0x3F), out);
- fputc(0x80 | ((tc >> 6) & 0x3F), out);
- fputc(0x80 | ( tc & 0x3F), out);
- }
-}
-
void print_to_file(FILE *new_out)
{
out = new_out;
static struct obstack *obst;
+static void print_char_obstack(const char c)
+{
+ obstack_1grow(obst, c);
+}
+
static void print_string_obstack(const char *str)
{
size_t len = strlen(str);
obstack_vprintf(obst, format, ap);
}
-static void print_char_obstack(wchar_rep_t c)
-{
- const unsigned tc = (unsigned) c;
- if (tc < 0x80) {
- obstack_1grow(obst, tc);
- } else if (tc < 0x800) {
- obstack_1grow(obst, 0xC0 | (tc >> 6));
- obstack_1grow(obst, 0x80 | (tc & 0x3F));
- } else if (tc < 0x10000) {
- obstack_1grow(obst, 0xE0 | ( tc >> 12));
- obstack_1grow(obst, 0x80 | ((tc >> 6) & 0x3F));
- obstack_1grow(obst, 0x80 | ( tc & 0x3F));
- } else {
- obstack_1grow(obst, 0xF0 | ( tc >> 18));
- obstack_1grow(obst, 0x80 | ((tc >> 12) & 0x3F));
- obstack_1grow(obst, 0x80 | ((tc >> 6) & 0x3F));
- obstack_1grow(obst, 0x80 | ( tc & 0x3F));
- }
-}
-
void print_to_obstack(struct obstack *new_obst)
{
obst = new_obst;
static char *buffer_pos;
static char *buffer_end;
-static inline void buffer_add_char(int c)
+static void print_char_buffer(const char c)
{
if (buffer_pos == buffer_end)
return;
static void print_string_buffer(const char *str)
{
for (const char *c = str; *c != '\0'; ++c) {
- buffer_add_char(*c);
+ print_char_buffer(*c);
}
}
buffer_pos += written < size ? written : size;
}
-static void print_char_buffer(wchar_rep_t c)
-{
- const unsigned tc = (unsigned) c;
- if (tc < 0x80) {
- buffer_add_char(tc);
- } else if (tc < 0x800) {
- buffer_add_char(0xC0 | (tc >> 6));
- buffer_add_char(0x80 | (tc & 0x3F));
- } else if (tc < 0x10000) {
- buffer_add_char(0xE0 | ( tc >> 12));
- buffer_add_char(0x80 | ((tc >> 6) & 0x3F));
- buffer_add_char(0x80 | ( tc & 0x3F));
- } else {
- buffer_add_char(0xF0 | ( tc >> 18));
- buffer_add_char(0x80 | ((tc >> 12) & 0x3F));
- buffer_add_char(0x80 | ((tc >> 6) & 0x3F));
- buffer_add_char(0x80 | ( tc & 0x3F));
- }
-}
-
void print_to_buffer(char *buffer, size_t buffer_size)
{
buffer_pos = buffer;
void (*print_string)(const char *str) = print_string_file;
void (*print_vformat)(const char *format, va_list ap) = print_vformat_file;
-void (*print_char)(wchar_rep_t c) = print_char_file;
+void (*print_char)(const char c) = print_char_file;
void printer_push(void)
{
/** print a string into current output */
extern void (*print_string)(const char *str);
extern void (*print_vformat)(const char *format, va_list ap);
-/** print a single unicode character to current output (encoded as UTF-8) */
-extern void (*print_char)(wchar_rep_t c);
+extern void (*print_char)(const char c);
/** print a printf style format string to current output */
static inline void __attribute__((format(printf,1,2))) print_format(const char *format, ...)
#ifndef STRING_REP_H
#define STRING_REP_H
-#include <wchar.h>
-
-typedef wchar_t wchar_rep_t;
+#include <assert.h>
+#include <stdlib.h>
typedef struct string_t {
- const char *begin;
- size_t size;
+ const char *begin; /**< UTF-8 encoded string, the last character is
+ * guaranteed to be 0 */
+ size_t size; /**< size of string in bytes (not characters) */
} string_t;
-typedef struct wide_string_t {
- const wchar_rep_t *begin;
- size_t size;
-} wide_string_t;
+typedef unsigned int utf32;
+#define UTF32_PRINTF_FORMAT "%u"
+
+/**
+ * "parse" an utf8 character from a string.
+ * Warning: This function only works for valid utf-8 inputs. The behaviour
+ * is undefined for invalid utf-8 input.
+ *
+ * @param p A pointer to a pointer into the string. The pointer
+ * is incremented for each consumed char
+ */
+static inline utf32 read_utf8_char(const char **p)
+{
+ const unsigned char *c = (const unsigned char *) *p;
+ utf32 result;
+
+ if ((*c & 0x80) == 0) {
+ /* 1 character encoding: 0b0??????? */
+ result = *c++;
+ } else if ((*c & 0xE0) == 0xC0) {
+ /* 2 character encoding: 0b110?????, 0b10?????? */
+ result = *c++ & 0x1F;
+ result = (result << 6) | (*c++ & 0x3F);
+ } else if ((*c & 0xF0) == 0xE0) {
+ /* 3 character encoding: 0b1110????, 0b10??????, 0b10?????? */
+ result = *c++ & 0x0F;
+ result = (result << 6) | (*c++ & 0x3F);
+ result = (result << 6) | (*c++ & 0x3F);
+ } else {
+ /* 4 character enc.: 0b11110???, 0b10??????, 0b10??????, 0b10?????? */
+ assert((*c & 0xF8) == 0xF0);
+ result = *c++ & 0x07;
+ result = (result << 6) | (*c++ & 0x3F);
+ result = (result << 6) | (*c++ & 0x3F);
+ result = (result << 6) | (*c++ & 0x3F);
+ }
+
+ *p = (const char*) c;
+ return result;
+}
+
+static inline size_t wstrlen(const string_t *string)
+{
+ size_t result = 0;
+ const char *p = string->begin;
+ const char *end = p + string->size;
+ while (p < end) {
+ read_utf8_char(&p);
+ ++result;
+ }
+ return result;
+}
#endif
return token_symbols[token->type];
}
+static void print_stringrep(const string_t *string, FILE *f)
+{
+ for (size_t i = 0; i < string->size; ++i) {
+ fputc(string->begin[i], f);
+ }
+}
+
void print_token(FILE *f, const token_t *token)
{
switch(token->type) {
case T_IDENTIFIER:
- fprintf(f, "identifier '%s'", token->v.symbol->string);
+ fprintf(f, "identifier '%s'", token->symbol->string);
break;
case T_INTEGER:
- fprintf(f, "integer number '%lld'", token->v.intvalue);
- break;
+ case T_INTEGER_OCTAL:
+ case T_INTEGER_HEXADECIMAL:
case T_FLOATINGPOINT:
- fprintf(f, "floating-point number '%LF'", token->v.floatvalue);
+ case T_FLOATINGPOINT_HEXADECIMAL:
+ print_token_type(f, (token_type_t)token->type);
+ fputs(" '", f);
+ print_stringrep(&token->literal, f);
+ if (token->symbol != NULL)
+ fputs(token->symbol->string, f);
+ fputc('\'', f);
break;
+ case T_WIDE_STRING_LITERAL:
case T_STRING_LITERAL:
- fprintf(f, "string \"%s\"", token->v.string.begin);
+ print_token_type(f, (token_type_t)token->type);
+ fprintf(f, " \"%s\"", token->literal.begin);
+ break;
+ case T_CHARACTER_CONSTANT:
+ case T_WIDE_CHARACTER_CONSTANT:
+ print_token_type(f, (token_type_t)token->type);
+ fputs(" \'", f);
+ print_stringrep(&token->literal, f);
+ fputs("'", f);
break;
default:
fputc('\'', f);
{
switch((preprocessor_token_type_t) token->type) {
case TP_IDENTIFIER:
- fprintf(f, "identifier '%s'", token->v.symbol->string);
+ fprintf(f, "identifier '%s'", token->symbol->string);
break;
case TP_NUMBER:
- fprintf(f, "number '%s'", token->v.string.begin);
+ fprintf(f, "number '%s'", token->literal.begin);
break;
case TP_STRING_LITERAL:
- fprintf(f, "string \"%s\"", token->v.string.begin);
+ fprintf(f, "string \"%s\"", token->literal.begin);
break;
default:
print_pp_token_type(f, (preprocessor_token_type_t) token->type);
extern const source_position_t builtin_source_position;
typedef struct {
- int type;
- union {
- symbol_t *symbol;
- long long intvalue;
- long double floatvalue;
- string_t string;
- wide_string_t wide_string;
- } v;
- type_t *datatype;
+ int type;
+ symbol_t *symbol; /**< contains identifier. Contains number suffix for numbers */
+ string_t literal; /**< string value/literal value */
source_position_t source_position;
} token_t;
#define TS(x,str,val)
#endif
-TS(IDENTIFIER, "identifier", = 256)
-TS(INTEGER, "integer number",)
-TS(CHARACTER_CONSTANT, "character constant",)
-TS(WIDE_CHARACTER_CONSTANT, "wide character constant",)
-TS(FLOATINGPOINT, "floatingpoint number",)
-TS(STRING_LITERAL, "string literal",)
-TS(WIDE_STRING_LITERAL, "wide string literal",)
+TS(IDENTIFIER, "identifier", = 256)
+TS(INTEGER, "integer number",)
+TS(INTEGER_OCTAL, "octal integer number",)
+TS(INTEGER_HEXADECIMAL, "hexadecimal integer number",)
+TS(FLOATINGPOINT, "floatingpoint number",)
+TS(FLOATINGPOINT_HEXADECIMAL, "hexadecimal floatingpoint number",)
+TS(CHARACTER_CONSTANT, "character constant",)
+TS(WIDE_CHARACTER_CONSTANT, "wide character constant",)
+TS(STRING_LITERAL, "string literal",)
+TS(WIDE_STRING_LITERAL, "wide string literal",)
#define ALTERNATE(name, val) T(_CXX, name, #name, val)
#define PUNCTUATOR(name, string, val) T(_ALL, name, string, val)
walk_expression(expr->va_copye.dst, callback, env);
return;
+ EXPR_LITERAL_CASES
case EXPR_INVALID:
case EXPR_OFFSETOF:
case EXPR_REFERENCE:
case EXPR_REFERENCE_ENUM_VALUE:
- case EXPR_CONST:
- case EXPR_CHARACTER_CONSTANT:
- case EXPR_WIDE_CHARACTER_CONSTANT:
case EXPR_STRING_LITERAL:
case EXPR_WIDE_STRING_LITERAL:
case EXPR_FUNCNAME:
static void write_expression(const expression_t *expression)
{
- const const_expression_t *constant;
- /* TODO */
switch(expression->kind) {
- case EXPR_CONST:
- constant = &expression->conste;
- if(is_type_integer(expression->base.type)) {
- fprintf(out, "%lld", constant->v.int_value);
- } else {
- fprintf(out, "%Lf", constant->v.float_value);
- }
+ case EXPR_LITERAL_INTEGER:
+ fprintf(out, "%s", expression->literal.value.begin);
break;
EXPR_UNARY_CASES
write_unary_expression((const unary_expression_t*) expression);
static void write_expression(const expression_t *expression)
{
- const const_expression_t *constant;
/* TODO */
switch(expression->kind) {
- case EXPR_CONST:
- constant = &expression->conste;
- if(is_type_integer(expression->base.type)) {
- fprintf(out, "%lld", constant->v.int_value);
- } else {
- fprintf(out, "%Lf", constant->v.float_value);
- }
+ case EXPR_LITERAL_INTEGER:
+ fprintf(out, "%s", expression->literal.value.begin);
break;
case EXPR_REFERENCE_ENUM_VALUE: {
/* UHOH... hacking */