#include <config.h>
#include <assert.h>
+#include <ctype.h>
#include <stdarg.h>
#include <stdbool.h>
#include "parser.h"
#include "diagnostic.h"
#include "format_check.h"
-#include "lexer.h"
+#include "preprocessor.h"
#include "symbol_t.h"
#include "token_t.h"
#include "types.h"
#define POP_EXTENSION() \
((void)(in_gcc_extension = old_gcc_extension))
-/** special symbol used for anonymous entities. */
-static symbol_t *sym_anonymous = NULL;
-
/** The token anchor set */
static unsigned short token_anchor_set[T_LAST_TOKEN];
case '~': \
case T_ANDAND: \
case T_CHARACTER_CONSTANT: \
- case T_FLOATINGPOINT: \
- case T_INTEGER: \
+ case T_NUMBER: \
case T_MINUSMINUS: \
case T_PLUSPLUS: \
case T_STRING_LITERAL: \
{
static const size_t sizes[] = {
[INITIALIZER_VALUE] = sizeof(initializer_value_t),
- [INITIALIZER_STRING] = sizeof(initializer_string_t),
+ [INITIALIZER_STRING] = sizeof(initializer_value_t),
[INITIALIZER_LIST] = sizeof(initializer_list_t),
[INITIALIZER_DESIGNATOR] = sizeof(initializer_designator_t)
};
static inline void next_token(void)
{
token = lookahead_buffer[lookahead_bufpos];
- lookahead_buffer[lookahead_bufpos] = lexer_token;
- lexer_next_token();
+ lookahead_buffer[lookahead_bufpos] = pp_token;
+ next_preprocessing_token();
lookahead_bufpos = (lookahead_bufpos + 1) % MAX_LOOKAHEAD;
#endif
}
-#define eat(token_kind) (assert(token.kind == (token_kind)), next_token())
+static inline void eat(token_kind_t const kind)
+{
+ assert(token.kind == kind);
+ (void)kind;
+ next_token();
+}
-static inline bool next_if(token_kind_t const type)
+static inline bool next_if(token_kind_t const kind)
{
- if (token.kind == type) {
- eat(type);
+ if (token.kind == kind) {
+ eat(kind);
return true;
} else {
return false;
obstack_grow(&ast_obstack, s->begin, s->size);
}
-static string_t finish_string(void)
+static string_t finish_string(string_encoding_t const enc)
{
obstack_1grow(&ast_obstack, '\0');
size_t const size = obstack_object_size(&ast_obstack) - 1;
char const *const string = obstack_finish(&ast_obstack);
- return (string_t){ string, size };
+ return (string_t){ string, size, enc };
}
-static string_t concat_string_literals(string_encoding_t *const out_enc)
+static string_t concat_string_literals(void)
{
assert(token.kind == T_STRING_LITERAL);
- string_t result;
- string_encoding_t enc = token.string.encoding;
+ string_t result;
if (look_ahead(1)->kind == T_STRING_LITERAL) {
- append_string(&token.string.string);
+ append_string(&token.literal.string);
eat(T_STRING_LITERAL);
warningf(WARN_TRADITIONAL, HERE, "traditional C rejects string constant concatenation");
+ string_encoding_t enc = token.literal.string.encoding;
do {
- if (token.string.encoding != STRING_ENCODING_CHAR) {
- enc = token.string.encoding;
+ if (token.literal.string.encoding != STRING_ENCODING_CHAR) {
+ enc = token.literal.string.encoding;
}
- append_string(&token.string.string);
+ append_string(&token.literal.string);
eat(T_STRING_LITERAL);
} while (token.kind == T_STRING_LITERAL);
- result = finish_string();
+ result = finish_string(enc);
} else {
- result = token.string.string;
+ result = token.literal.string;
eat(T_STRING_LITERAL);
}
- *out_enc = enc;
return result;
}
static string_t parse_string_literals(char const *const context)
{
if (!skip_till(T_STRING_LITERAL, context))
- return (string_t){ "", 0 };
+ return (string_t){ "", 0, STRING_ENCODING_CHAR };
- string_encoding_t enc;
source_position_t const pos = *HERE;
- string_t const res = concat_string_literals(&enc);
+ string_t const res = concat_string_literals();
- if (enc != STRING_ENCODING_CHAR) {
+ if (res.encoding != STRING_ENCODING_CHAR) {
errorf(&pos, "expected plain string literal, got wide string literal");
}
}
}
-static initializer_t *initializer_from_string(array_type_t *const type, string_encoding_t const enc, string_t const *const string)
-{
- /* TODO: check len vs. size of array type */
- (void) type;
-
- initializer_t *initializer = allocate_initializer_zero(INITIALIZER_STRING);
- initializer->string.encoding = enc;
- initializer->string.string = *string;
-
- return initializer;
-}
-
/**
* Build an initializer from a given expression.
*/
if (expression->kind == EXPR_STRING_LITERAL && is_type_array(type)) {
array_type_t *const array_type = &type->array;
type_t *const element_type = skip_typeref(array_type->element_type);
- switch (expression->string_literal.encoding) {
+ switch (expression->string_literal.value.encoding) {
case STRING_ENCODING_CHAR: {
if (is_type_atomic(element_type, ATOMIC_TYPE_CHAR) ||
is_type_atomic(element_type, ATOMIC_TYPE_SCHAR) ||
case STRING_ENCODING_WIDE: {
type_t *bare_wchar_type = skip_typeref(type_wchar_t);
if (get_unqualified_type(element_type) == bare_wchar_type) {
-make_string_init:
- return initializer_from_string(array_type, expression->string_literal.encoding, &expression->string_literal.value);
+make_string_init:;
+ initializer_t *const init = allocate_initializer_zero(INITIALIZER_STRING);
+ init->value.value = expression;
+ return init;
}
break;
}
size = max_index + 1;
break;
- case INITIALIZER_STRING:
- size = get_string_len(result->string.encoding, &result->string.string) + 1;
+ case INITIALIZER_STRING: {
+ size = get_string_len(&get_init_string(result)->value) + 1;
break;
+ }
case INITIALIZER_DESIGNATOR:
case INITIALIZER_VALUE:
}
typedef enum specifiers_t {
+ SPECIFIER_NONE = 0,
SPECIFIER_SIGNED = 1 << 0,
SPECIFIER_UNSIGNED = 1 << 1,
SPECIFIER_LONG = 1 << 2,
} else {
errorf(pos, "multiple datatypes in declaration");
}
- goto end_error;
+ specifiers->type = type_error_type;
+ return;
}
}
if (specifiers->attributes != NULL)
type = handle_type_attributes(specifiers->attributes, type);
specifiers->type = type;
- return;
-
-end_error:
- specifiers->type = type_error_type;
}
static type_qualifiers_t parse_type_qualifiers(void)
symbol_t *iter_symbol = iter->base.symbol;
if (iter_symbol == NULL) {
type_t *type = iter->declaration.type;
- if (type->kind != TYPE_COMPOUND_STRUCT
- && type->kind != TYPE_COMPOUND_UNION)
+ if (!is_type_compound(type))
continue;
compound_t *sub_compound = type->compound.compound;
static expression_t *parse_string_literal(void)
{
expression_t *const expr = allocate_expression_zero(EXPR_STRING_LITERAL);
- expr->string_literal.value = concat_string_literals(&expr->string_literal.encoding);
- expr->base.type = get_string_type(expr->string_literal.encoding);
+ expr->string_literal.value = concat_string_literals();
+ expr->base.type = get_string_type(expr->string_literal.value.encoding);
return expr;
}
return literal;
}
-static void warn_traditional_suffix(void)
+static void warn_traditional_suffix(char const *const suffix)
{
- warningf(WARN_TRADITIONAL, HERE, "traditional C rejects the '%S' suffix",
- &token.number.suffix);
+ warningf(WARN_TRADITIONAL, HERE, "traditional C rejects the '%s' suffix", suffix);
}
-static void check_integer_suffix(void)
+static void check_integer_suffix(expression_t *const expr, char const *const suffix)
{
- const string_t *suffix = &token.number.suffix;
- if (suffix->size == 0)
- return;
-
- bool not_traditional = false;
- const char *c = suffix->begin;
- if (*c == 'l' || *c == 'L') {
- ++c;
- if (*c == *(c-1)) {
- not_traditional = true;
- ++c;
- if (*c == 'u' || *c == 'U') {
+ unsigned spec = SPECIFIER_NONE;
+ char const *c = suffix;
+ for (;;) {
+ specifiers_t add;
+ if (*c == 'L' || *c == 'l') {
+ add = SPECIFIER_LONG;
+ if (*c == c[1]) {
+ add |= SPECIFIER_LONG_LONG;
++c;
}
- } else if (*c == 'u' || *c == 'U') {
- not_traditional = true;
- ++c;
+ } else if (*c == 'U' || *c == 'u') {
+ add = SPECIFIER_UNSIGNED;
+ } else {
+ break;
}
- } else if (*c == 'u' || *c == 'U') {
- not_traditional = true;
++c;
- if (*c == 'l' || *c == 'L') {
- ++c;
- if (*c == *(c-1)) {
- ++c;
- }
- }
- }
- if (*c != '\0') {
- errorf(HERE, "invalid suffix '%S' on integer constant", suffix);
- } else if (not_traditional) {
- warn_traditional_suffix();
+ if (spec & add)
+ goto error;
+ spec |= add;
+ }
+
+ if (*c == '\0') {
+ type_t *type;
+ switch (spec) {
+ case SPECIFIER_NONE: type = type_int; break;
+ case SPECIFIER_LONG: type = type_long; break;
+ case SPECIFIER_LONG | SPECIFIER_LONG_LONG: type = type_long_long; break;
+ case SPECIFIER_UNSIGNED: type = type_unsigned_int; break;
+ case SPECIFIER_UNSIGNED | SPECIFIER_LONG: type = type_unsigned_long; break;
+ case SPECIFIER_UNSIGNED | SPECIFIER_LONG | SPECIFIER_LONG_LONG: type = type_unsigned_long_long; break;
+ default: panic("inconsistent suffix");
+ }
+ if (spec != SPECIFIER_NONE && spec != SPECIFIER_LONG) {
+ warn_traditional_suffix(suffix);
+ }
+ expr->base.type = type;
+ /* Integer type depends on the size of the number and the size
+ * representable by the types. The backend/codegeneration has to
+ * determine that. */
+ determine_literal_type(&expr->literal);
+ } else {
+error:
+ errorf(HERE, "invalid suffix '%s' on integer constant", suffix);
}
}
-static type_t *check_floatingpoint_suffix(void)
+static void check_floatingpoint_suffix(expression_t *const expr, char const *const suffix)
{
- const string_t *suffix = &token.number.suffix;
- type_t *type = type_double;
- if (suffix->size == 0)
- return type;
-
- bool not_traditional = false;
- const char *c = suffix->begin;
- if (*c == 'f' || *c == 'F') {
- ++c;
- type = type_float;
- } else if (*c == 'l' || *c == 'L') {
- ++c;
- type = type_long_double;
- }
- if (*c != '\0') {
- errorf(HERE, "invalid suffix '%S' on floatingpoint constant", suffix);
- } else if (not_traditional) {
- warn_traditional_suffix();
+ type_t *type;
+ char const *c = suffix;
+ switch (*c) {
+ case 'F':
+ case 'f': type = type_float; ++c; break;
+ case 'L':
+ case 'l': type = type_long_double; ++c; break;
+ default: type = type_double; break;
}
- return type;
+ if (*c == '\0') {
+ expr->base.type = type;
+ if (suffix[0] != '\0') {
+ warn_traditional_suffix(suffix);
+ }
+ } else {
+ errorf(HERE, "invalid suffix '%s' on floatingpoint constant", suffix);
+ }
}
-/**
- * Parse an integer constant.
- */
static expression_t *parse_number_literal(void)
{
- expression_kind_t kind;
- type_t *type;
+ string_t const *const str = &token.literal.string;
+ char const * i = str->begin;
+ unsigned digits = 0;
+ bool is_float = false;
- switch (token.kind) {
- case T_INTEGER:
- kind = EXPR_LITERAL_INTEGER;
- check_integer_suffix();
- type = type_int;
+ /* Parse base prefix. */
+ unsigned base;
+ if (*i == '0') {
+ switch (*++i) {
+ case 'B': case 'b': base = 2; ++i; break;
+ case 'X': case 'x': base = 16; ++i; break;
+ default: base = 8; digits |= 1U << 0; break;
+ }
+ } else {
+ base = 10;
+ }
+
+ /* Parse mantissa. */
+ for (;; ++i) {
+ unsigned digit;
+ switch (*i) {
+ case '.':
+ if (is_float) {
+ errorf(HERE, "multiple decimal points in %K", &token);
+ i = 0;
+ goto done;
+ }
+ is_float = true;
+ if (base == 8)
+ base = 10;
+ continue;
+
+ case '0': digit = 0; break;
+ case '1': digit = 1; break;
+ case '2': digit = 2; break;
+ case '3': digit = 3; break;
+ case '4': digit = 4; break;
+ case '5': digit = 5; break;
+ case '6': digit = 6; break;
+ case '7': digit = 7; break;
+ case '8': digit = 8; break;
+ case '9': digit = 9; break;
+ case 'A': case 'a': digit = 10; break;
+ case 'B': case 'b': digit = 11; break;
+ case 'C': case 'c': digit = 12; break;
+ case 'D': case 'd': digit = 13; break;
+ case 'E': case 'e': digit = 14; break;
+ case 'F': case 'f': digit = 15; break;
+
+ default: goto done_mantissa;
+ }
+
+ if (digit >= 10 && base != 16)
+ goto done_mantissa;
+
+ digits |= 1U << digit;
+ }
+done_mantissa:
+
+ /* Parse exponent. */
+ switch (base) {
+ case 2:
+ if (is_float)
+ errorf(HERE, "binary floating %K not allowed", &token);
break;
- case T_FLOATINGPOINT:
- kind = EXPR_LITERAL_FLOATINGPOINT;
- type = check_floatingpoint_suffix();
+ case 8:
+ case 10:
+ if (*i == 'E' || *i == 'e') {
+ base = 10;
+ goto parse_exponent;
+ }
+ break;
+
+ case 16:
+ if (*i == 'P' || *i == 'p') {
+parse_exponent:
+ ++i;
+ is_float = true;
+
+ if (*i == '-' || *i == '+')
+ ++i;
+
+ if (isdigit(*i)) {
+ do {
+ ++i;
+ } while (isdigit(*i));
+ } else {
+ errorf(HERE, "exponent of %K has no digits", &token);
+ }
+ } else if (is_float) {
+ errorf(HERE, "hexadecimal floating %K requires an exponent", &token);
+ i = 0;
+ }
break;
default:
- panic("unexpected token type in parse_number_literal");
+ panic("invalid base");
}
- expression_t *literal = allocate_expression_zero(kind);
- literal->base.type = type;
- literal->literal.value = token.number.number;
- literal->literal.suffix = token.number.suffix;
- next_token();
+done:;
+ expression_t *const expr = allocate_expression_zero(is_float ? EXPR_LITERAL_FLOATINGPOINT : EXPR_LITERAL_INTEGER);
+ expr->literal.value = *str;
- /* integer type depends on the size of the number and the size
- * representable by the types. The backend/codegeneration has to determine
- * that
- */
- determine_literal_type(&literal->literal);
- return literal;
+ if (i) {
+ if (digits == 0) {
+ errorf(HERE, "%K has no digits", &token);
+ } else if (digits & ~((1U << base) - 1)) {
+ errorf(HERE, "invalid digit in %K", &token);
+ } else {
+ expr->literal.suffix = i;
+ if (is_float) {
+ check_floatingpoint_suffix(expr, i);
+ } else {
+ check_integer_suffix(expr, i);
+ }
+ }
+ }
+
+ eat(T_NUMBER);
+ return expr;
}
/**
static expression_t *parse_character_constant(void)
{
expression_t *const literal = allocate_expression_zero(EXPR_LITERAL_CHARACTER);
- literal->string_literal.encoding = token.string.encoding;
- literal->string_literal.value = token.string.string;
+ literal->string_literal.value = token.literal.string;
- size_t const size = get_string_len(token.string.encoding, &token.string.string);
- switch (token.string.encoding) {
+ size_t const size = get_string_len(&token.literal.string);
+ switch (token.literal.string.encoding) {
case STRING_ENCODING_CHAR:
literal->base.type = c_mode & _CXX ? type_char : type_int;
if (size > 1) {
}
case EXPR_STRING_LITERAL: {
- size_t const size = get_string_len(expression->string_literal.encoding, &expression->string_literal.value) + 1;
+ size_t const size = get_string_len(&expression->string_literal.value) + 1;
type_t *const elem = get_unqualified_type(expression->base.type->pointer.points_to);
return make_array_type(elem, size, TYPE_QUALIFIER_NONE);
}
switch (token.kind) {
case T_false: return parse_boolean_literal(false);
case T_true: return parse_boolean_literal(true);
- case T_INTEGER:
- case T_FLOATINGPOINT: return parse_number_literal();
+ case T_NUMBER: return parse_number_literal();
case T_CHARACTER_CONSTANT: return parse_character_constant();
case T_STRING_LITERAL: return parse_string_literal();
case T___func__: return parse_function_keyword(FUNCNAME_FUNCTION);
type_left = type;
}
- if (type_left->kind != TYPE_COMPOUND_STRUCT &&
- type_left->kind != TYPE_COMPOUND_UNION) {
-
+ if (!is_type_compound(type_left)) {
if (is_type_valid(type_left) && !saw_error) {
errorf(&pos,
"request for member '%Y' in something not a struct or union, but '%T'",
add_anchor_token('~');
add_anchor_token(T_CHARACTER_CONSTANT);
add_anchor_token(T_COLONCOLON);
- add_anchor_token(T_FLOATINGPOINT);
add_anchor_token(T_IDENTIFIER);
- add_anchor_token(T_INTEGER);
add_anchor_token(T_MINUSMINUS);
+ add_anchor_token(T_NUMBER);
add_anchor_token(T_PLUSPLUS);
add_anchor_token(T_STRING_LITERAL);
add_anchor_token(T__Bool);
rem_anchor_token(T__Bool);
rem_anchor_token(T_STRING_LITERAL);
rem_anchor_token(T_PLUSPLUS);
+ rem_anchor_token(T_NUMBER);
rem_anchor_token(T_MINUSMINUS);
- rem_anchor_token(T_INTEGER);
rem_anchor_token(T_IDENTIFIER);
- rem_anchor_token(T_FLOATINGPOINT);
rem_anchor_token(T_COLONCOLON);
rem_anchor_token(T_CHARACTER_CONSTANT);
rem_anchor_token('~');
*/
void init_parser(void)
{
- sym_anonymous = symbol_table_insert("<anonymous>");
-
memset(token_anchor_set, 0, sizeof(token_anchor_set));
init_expression_parsers();