Let the lexer produce preprocessor numbers T_NUMBER instead of T_INTEGER and T_FLOATI...
authorChristoph Mallon <christoph.mallon@gmx.de>
Sun, 17 Jun 2012 11:51:56 +0000 (13:51 +0200)
committerChristoph Mallon <christoph.mallon@gmx.de>
Sun, 17 Jun 2012 11:51:56 +0000 (13:51 +0200)
Handle the details of parsing a number in the parser.

ast.c
ast2firm.c
ast_t.h
lexer.c
parser.c
preprocessor.c
token.c
token_t.h
tokens.inc

diff --git a/ast.c b/ast.c
index 0667b97..cb40c3f 100644 (file)
--- a/ast.c
+++ b/ast.c
@@ -67,13 +67,6 @@ void print_indent(void)
                print_char('\t');
 }
 
-static void print_stringrep(const string_t *string)
-{
-       for (size_t i = 0; i < string->size; ++i) {
-               print_char(string->begin[i]);
-       }
-}
-
 /**
  * Returns 1 if a given precedence level has right-to-left
  * associativity, else 0.
@@ -248,8 +241,7 @@ static void print_literal(const literal_expression_t *literal)
        case EXPR_LITERAL_BOOLEAN:
        case EXPR_LITERAL_FLOATINGPOINT:
        case EXPR_LITERAL_INTEGER:
-               print_stringrep(&literal->value);
-               print_stringrep(&literal->suffix);
+               print_string(literal->value.begin);
                return;
 
        default:
index c3a1e69..dc0941f 100644 (file)
@@ -1157,14 +1157,12 @@ finish:;
 
 static bool try_create_integer(literal_expression_t *literal, type_t *type)
 {
-       const char *string = literal->value.begin;
-       size_t      size   = literal->value.size;
-
        assert(type->kind == TYPE_ATOMIC);
        atomic_type_kind_t akind = type->atomic.akind;
 
-       ir_mode   *const mode = atomic_modes[akind];
-       ir_tarval *const tv   = new_tarval_from_str(string, size, mode);
+       ir_mode    *const mode = atomic_modes[akind];
+       char const *const str  = literal->value.begin;
+       ir_tarval  *const tv   = new_tarval_from_str(str, literal->suffix - str, mode);
        if (tv == tarval_bad)
                return false;
 
@@ -1173,36 +1171,29 @@ static bool try_create_integer(literal_expression_t *literal, type_t *type)
        return true;
 }
 
-static void create_integer_tarval(literal_expression_t *literal)
+void determine_literal_type(literal_expression_t *const literal)
 {
+       assert(literal->base.kind == EXPR_LITERAL_INTEGER);
+
        /* -1: signed only, 0: any, 1: unsigned only */
-       int             sign   = literal->value.begin[0] != '0' /* decimal */ ? -1 : 0;
-       unsigned        ls     = 0;
-       const string_t *suffix = &literal->suffix;
-       /* parse suffix */
-       if (suffix->size > 0) {
-               for (const char *c = suffix->begin; *c != '\0'; ++c) {
-                       if (*c == 'u' || *c == 'U') sign = 1;
-                       if (*c == 'l' || *c == 'L') { ++ls; }
-               }
-       }
+       int const sign =
+               !is_type_signed(literal->base.type) ? 1 :
+               literal->value.begin[0] == '0'      ? 0 :
+               -1; /* Decimal literals only try signed types. */
 
        tarval_int_overflow_mode_t old_mode = tarval_get_integer_overflow_mode();
+       tarval_set_integer_overflow_mode(TV_OVERFLOW_BAD);
+
+       if (try_create_integer(literal, literal->base.type))
+               goto finished;
 
        /* now try if the constant is small enough for some types */
-       tarval_set_integer_overflow_mode(TV_OVERFLOW_BAD);
-       if (ls < 1) {
-               if (sign <= 0 && try_create_integer(literal, type_int))
-                       goto finished;
-               if (sign >= 0 && try_create_integer(literal, type_unsigned_int))
-                       goto finished;
-       }
-       if (ls < 2) {
-               if (sign <= 0 && try_create_integer(literal, type_long))
-                       goto finished;
-               if (sign >= 0 && try_create_integer(literal, type_unsigned_long))
-                       goto finished;
-       }
+       if (sign >= 0 && try_create_integer(literal, type_unsigned_int))
+               goto finished;
+       if (sign <= 0 && try_create_integer(literal, type_long))
+               goto finished;
+       if (sign >= 0 && try_create_integer(literal, type_unsigned_long))
+               goto finished;
        /* last try? then we should not report tarval_bad */
        if (sign < 0)
                tarval_set_integer_overflow_mode(TV_OVERFLOW_WRAP);
@@ -1220,17 +1211,6 @@ finished:
        tarval_set_integer_overflow_mode(old_mode);
 }
 
-void determine_literal_type(literal_expression_t *literal)
-{
-       switch (literal->base.kind) {
-       case EXPR_LITERAL_INTEGER:
-               create_integer_tarval(literal);
-               return;
-       default:
-               break;
-       }
-}
-
 /**
  * Creates a Const node representing a constant.
  */
diff --git a/ast_t.h b/ast_t.h
index 74ccf40..10a96b8 100644 (file)
--- a/ast_t.h
+++ b/ast_t.h
@@ -252,12 +252,12 @@ struct expression_base_t {
  * integer, float and boolean constants
  */
 struct literal_expression_t {
-       expression_base_t  base;
-       string_t           value;
-       string_t           suffix;
+       expression_base_t base;
+       string_t          value;
+       char const       *suffix; /**< Start of the suffix in value. */
 
        /* ast2firm data */
-       ir_tarval         *target_value;
+       ir_tarval        *target_value;
 };
 
 /**
diff --git a/lexer.c b/lexer.c
index d368aed..8a2dbdd 100644 (file)
--- a/lexer.c
+++ b/lexer.c
@@ -181,13 +181,12 @@ static inline void next_char(void)
        }
 }
 
-#define SYMBOL_CHARS  \
+#define SYMBOL_CHARS_WITHOUT_E_P \
        case '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
        case 'a':         \
        case 'b':         \
        case 'c':         \
        case 'd':         \
-       case 'e':         \
        case 'f':         \
        case 'g':         \
        case 'h':         \
@@ -198,7 +197,6 @@ static inline void next_char(void)
        case 'm':         \
        case 'n':         \
        case 'o':         \
-       case 'p':         \
        case 'q':         \
        case 'r':         \
        case 's':         \
@@ -213,7 +211,6 @@ static inline void next_char(void)
        case 'B':         \
        case 'C':         \
        case 'D':         \
-       case 'E':         \
        case 'F':         \
        case 'G':         \
        case 'H':         \
@@ -224,7 +221,6 @@ static inline void next_char(void)
        case 'M':         \
        case 'N':         \
        case 'O':         \
-       case 'P':         \
        case 'Q':         \
        case 'R':         \
        case 'S':         \
@@ -237,6 +233,16 @@ static inline void next_char(void)
        case 'Z':         \
        case '_':
 
+#define SYMBOL_CHARS_E_P \
+       case 'E': \
+       case 'P': \
+       case 'e': \
+       case 'p':
+
+#define SYMBOL_CHARS  \
+       SYMBOL_CHARS_WITHOUT_E_P \
+       SYMBOL_CHARS_E_P
+
 #define DIGITS        \
        case '0':         \
        case '1':         \
@@ -420,119 +426,29 @@ static string_t sym_make_string(string_encoding_t const enc)
        return (string_t){ result, len, enc };
 }
 
-/**
- * parse suffixes like 'LU' or 'f' after numbers
- */
-static void parse_number_suffix(void)
+static void parse_pp_number(void)
 {
-       assert(obstack_object_size(&symbol_obstack) == 0);
-       while (true) {
+       for (;;) {
                switch (c) {
-               SYMBOL_CHARS
-                       obstack_1grow(&symbol_obstack, (char) c);
+               SYMBOL_CHARS_E_P
+                       obstack_1grow(&symbol_obstack, (char)c);
                        next_char();
+                       if (c == '+' || c == '-') {
+               case '.':
+               DIGITS
+               SYMBOL_CHARS_WITHOUT_E_P
+                               obstack_1grow(&symbol_obstack, (char)c);
+                               next_char();
+                       }
                        break;
+
                default:
-               dollar_sign:
-                       goto finish_suffix;
+dollar_sign:
+                       lexer_token.kind           = T_NUMBER;
+                       lexer_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
+                       return;
                }
        }
-finish_suffix:
-       if (obstack_object_size(&symbol_obstack) == 0) {
-               lexer_token.number.suffix.begin = NULL;
-               lexer_token.number.suffix.size  = 0;
-               return;
-       }
-
-       lexer_token.number.suffix = sym_make_string(STRING_ENCODING_CHAR);
-}
-
-static void parse_exponent(void)
-{
-       if (c == '-' || c == '+') {
-               obstack_1grow(&symbol_obstack, (char)c);
-               next_char();
-       }
-
-       if (isdigit(c)) {
-               do {
-                       obstack_1grow(&symbol_obstack, (char)c);
-                       next_char();
-               } while (isdigit(c));
-       } else {
-               errorf(&lexer_token.base.source_position, "exponent has no digits");
-       }
-}
-
-/**
- * Parses a hex number including hex floats and set the
- * lexer_token.
- */
-static void parse_number_hex(void)
-{
-       bool is_float   = false;
-       bool has_digits = false;
-
-       while (isxdigit(c)) {
-               has_digits = true;
-               obstack_1grow(&symbol_obstack, (char) c);
-               next_char();
-       }
-
-       if (c == '.') {
-               is_float = true;
-               obstack_1grow(&symbol_obstack, (char) c);
-               next_char();
-
-               while (isxdigit(c)) {
-                       has_digits = true;
-                       obstack_1grow(&symbol_obstack, (char) c);
-                       next_char();
-               }
-       }
-       if (c == 'p' || c == 'P') {
-               is_float = true;
-               obstack_1grow(&symbol_obstack, (char) c);
-               next_char();
-               parse_exponent();
-       } else if (is_float) {
-               errorf(&lexer_token.base.source_position,
-                      "hexadecimal floatingpoint constant requires an exponent");
-       }
-
-       lexer_token.number.number = sym_make_string(STRING_ENCODING_CHAR);
-
-       lexer_token.kind = is_float ? T_FLOATINGPOINT : T_INTEGER;
-
-       if (!has_digits) {
-               errorf(&lexer_token.base.source_position, "invalid number literal '%S'", &lexer_token.number.number);
-               lexer_token.number.number.begin = "0";
-               lexer_token.number.number.size  = 1;
-       }
-
-       parse_number_suffix();
-}
-
-static void parse_number_bin(void)
-{
-       bool has_digits = false;
-
-       while (c == '0' || c == '1') {
-               has_digits = true;
-               obstack_1grow(&symbol_obstack, (char)c);
-               next_char();
-       }
-
-       lexer_token.number.number = sym_make_string(STRING_ENCODING_CHAR);
-       lexer_token.kind          = T_INTEGER;
-
-       if (!has_digits) {
-               errorf(&lexer_token.base.source_position, "invalid number literal '%S'", &lexer_token.number.number);
-               lexer_token.number.number.begin = "0";
-               lexer_token.number.number.size  = 1;
-       }
-
-       parse_number_suffix();
 }
 
 /**
@@ -545,82 +461,6 @@ static bool is_octal_digit(utf32 chr)
        return '0' <= chr && chr <= '7';
 }
 
-/**
- * Parses a number and sets the lexer_token.
- */
-static void parse_number(void)
-{
-       bool is_float   = false;
-       bool has_digits = false;
-
-       assert(obstack_object_size(&symbol_obstack) == 0);
-       if (c == '0') {
-               obstack_1grow(&symbol_obstack, (char)c);
-               next_char();
-               if (c == 'x' || c == 'X') {
-                       obstack_1grow(&symbol_obstack, (char)c);
-                       next_char();
-                       parse_number_hex();
-                       return;
-               } else if (c == 'b' || c == 'B') {
-                       /* GCC extension: binary constant 0x[bB][01]+.  */
-                       obstack_1grow(&symbol_obstack, (char)c);
-                       next_char();
-                       parse_number_bin();
-                       return;
-               }
-               has_digits = true;
-       }
-
-       while (isdigit(c)) {
-               has_digits = true;
-               obstack_1grow(&symbol_obstack, (char) c);
-               next_char();
-       }
-
-       if (c == '.') {
-               is_float = true;
-               obstack_1grow(&symbol_obstack, '.');
-               next_char();
-
-               while (isdigit(c)) {
-                       has_digits = true;
-                       obstack_1grow(&symbol_obstack, (char) c);
-                       next_char();
-               }
-       }
-       if (c == 'e' || c == 'E') {
-               is_float = true;
-               obstack_1grow(&symbol_obstack, 'e');
-               next_char();
-               parse_exponent();
-       }
-
-       lexer_token.number.number = sym_make_string(STRING_ENCODING_CHAR);
-
-       if (is_float) {
-               lexer_token.kind = T_FLOATINGPOINT;
-       } else {
-               lexer_token.kind = T_INTEGER;
-
-               if (lexer_token.number.number.begin[0] == '0') {
-                       /* check for invalid octal digits */
-                       for (size_t i= 0; i < lexer_token.number.number.size; ++i) {
-                               char t = lexer_token.number.number.begin[i];
-                               if (t >= '8')
-                                       errorf(&lexer_token.base.source_position, "invalid digit '%c' in octal number", t);
-                       }
-               }
-       }
-
-       if (!has_digits) {
-               errorf(&lexer_token.base.source_position, "invalid number literal '%S'",
-                      &lexer_token.number.number);
-       }
-
-       parse_number_suffix();
-}
-
 /**
  * Returns the value of a digit.
  * The only portable way to do it ...
@@ -787,8 +627,8 @@ static void parse_string(utf32 const delim, token_kind_t const kind, string_enco
        }
 
 end_of_string:
-       lexer_token.kind          = kind;
-       lexer_token.string.string = sym_make_string(enc);
+       lexer_token.kind           = kind;
+       lexer_token.literal.string = sym_make_string(enc);
 }
 
 /**
@@ -805,7 +645,7 @@ static void parse_string_literal(string_encoding_t const enc)
 static void parse_character_constant(string_encoding_t const enc)
 {
        parse_string('\'', T_CHARACTER_CONSTANT, enc, "character constant");
-       if (lexer_token.string.string.size == 0) {
+       if (lexer_token.literal.string.size == 0) {
                errorf(&lexer_token.base.source_position, "empty character constant");
        }
 }
@@ -904,20 +744,26 @@ static void eat_until_newline(void)
  */
 static void parse_line_directive(void)
 {
-       if (pp_token.kind != T_INTEGER) {
+       if (pp_token.kind != T_NUMBER) {
                parse_error("expected integer");
        } else {
                /* use offset -1 as this is about the next line */
-               lexer_pos.lineno = atoi(pp_token.number.number.begin) - 1;
+               char      *end;
+               long const line = strtol(pp_token.literal.string.begin, &end, 0);
+               if (*end == '\0') {
+                       lexer_pos.lineno = line - 1;
+               } else {
+                       errorf(&lexer_pos, "'%S' is not a valid line number", &pp_token.literal.string);
+               }
                next_pp_token();
        }
-       if (pp_token.kind == T_STRING_LITERAL && pp_token.string.string.encoding == STRING_ENCODING_CHAR) {
-               lexer_pos.input_name       = pp_token.string.string.begin;
+       if (pp_token.kind == T_STRING_LITERAL && pp_token.literal.string.encoding == STRING_ENCODING_CHAR) {
+               lexer_pos.input_name       = pp_token.literal.string.begin;
                lexer_pos.is_system_header = false;
                next_pp_token();
 
                /* attempt to parse numeric flags as outputted by gcc preprocessor */
-               while (pp_token.kind == T_INTEGER) {
+               while (pp_token.kind == T_NUMBER) {
                        /* flags:
                         * 1 - indicates start of a new file
                         * 2 - indicates return from a file
@@ -926,7 +772,7 @@ static void parse_line_directive(void)
                         *
                         * currently we're only interested in "3"
                         */
-                       if (streq(pp_token.number.number.begin, "3")) {
+                       if (streq(pp_token.literal.string.begin, "3")) {
                                lexer_pos.is_system_header = true;
                        }
                        next_pp_token();
@@ -1034,7 +880,7 @@ static void parse_preprocessor_directive(void)
        case T_IDENTIFIER:
                parse_preprocessor_identifier();
                break;
-       case T_INTEGER:
+       case T_NUMBER:
                parse_line_directive();
                break;
        case '\n':
@@ -1110,7 +956,7 @@ void lexer_next_preprocessing_token(void)
                }
 
                DIGITS
-                       parse_number();
+                       parse_pp_number();
                        return;
 
                case '"':
@@ -1126,7 +972,7 @@ void lexer_next_preprocessing_token(void)
                                DIGITS
                                        put_back(c);
                                        c = '.';
-                                       parse_number();
+                                       parse_pp_number();
                                        return;
 
                                case '.':
index e2e560a..578087e 100644 (file)
--- a/parser.c
+++ b/parser.c
@@ -20,6 +20,7 @@
 #include <config.h>
 
 #include <assert.h>
+#include <ctype.h>
 #include <stdarg.h>
 #include <stdbool.h>
 
@@ -247,8 +248,7 @@ static void semantic_comparison(binary_expression_t *expression);
        case '~':                         \
        case T_ANDAND:                    \
        case T_CHARACTER_CONSTANT:        \
-       case T_FLOATINGPOINT:             \
-       case T_INTEGER:                   \
+       case T_NUMBER:                    \
        case T_MINUSMINUS:                \
        case T_PLUSPLUS:                  \
        case T_STRING_LITERAL:            \
@@ -1052,20 +1052,20 @@ static string_t concat_string_literals(void)
 
        string_t result;
        if (look_ahead(1)->kind == T_STRING_LITERAL) {
-               append_string(&token.string.string);
+               append_string(&token.literal.string);
                eat(T_STRING_LITERAL);
                warningf(WARN_TRADITIONAL, HERE, "traditional C rejects string constant concatenation");
-               string_encoding_t enc = token.string.string.encoding;
+               string_encoding_t enc = token.literal.string.encoding;
                do {
-                       if (token.string.string.encoding != STRING_ENCODING_CHAR) {
-                               enc = token.string.string.encoding;
+                       if (token.literal.string.encoding != STRING_ENCODING_CHAR) {
+                               enc = token.literal.string.encoding;
                        }
-                       append_string(&token.string.string);
+                       append_string(&token.literal.string);
                        eat(T_STRING_LITERAL);
                } while (token.kind == T_STRING_LITERAL);
                result = finish_string(enc);
        } else {
-               result = token.string.string;
+               result = token.literal.string;
                eat(T_STRING_LITERAL);
        }
 
@@ -2460,6 +2460,7 @@ static type_t *parse_typeof(void)
 }
 
 typedef enum specifiers_t {
+       SPECIFIER_NONE      = 0,
        SPECIFIER_SIGNED    = 1 << 0,
        SPECIFIER_UNSIGNED  = 1 << 1,
        SPECIFIER_LONG      = 1 << 2,
@@ -5703,110 +5704,204 @@ static expression_t *parse_boolean_literal(bool value)
        return literal;
 }
 
-static void warn_traditional_suffix(void)
+static void warn_traditional_suffix(char const *const suffix)
 {
-       warningf(WARN_TRADITIONAL, HERE, "traditional C rejects the '%S' suffix",
-                &token.number.suffix);
+       warningf(WARN_TRADITIONAL, HERE, "traditional C rejects the '%s' suffix", suffix);
 }
 
-static void check_integer_suffix(void)
+static void check_integer_suffix(expression_t *const expr, char const *const suffix)
 {
-       const string_t *suffix = &token.number.suffix;
-       if (suffix->size == 0)
-               return;
-
-       bool not_traditional = false;
-       const char *c = suffix->begin;
-       if (*c == 'l' || *c == 'L') {
-               ++c;
-               if (*c == *(c-1)) {
-                       not_traditional = true;
-                       ++c;
-                       if (*c == 'u' || *c == 'U') {
+       specifiers_t spec = SPECIFIER_NONE;
+       char const  *c    = suffix;
+       for (;;) {
+               specifiers_t add;
+               if (*c == 'L' || *c == 'l') {
+                       add = SPECIFIER_LONG;
+                       if (*c == c[1]) {
+                               add |= SPECIFIER_LONG_LONG;
                                ++c;
                        }
-               } else if (*c == 'u' || *c == 'U') {
-                       not_traditional = true;
-                       ++c;
+               } else if (*c == 'U' || *c == 'u') {
+                       add = SPECIFIER_UNSIGNED;
+               } else {
+                       break;
                }
-       } else if (*c == 'u' || *c == 'U') {
-               not_traditional = true;
                ++c;
-               if (*c == 'l' || *c == 'L') {
-                       ++c;
-                       if (*c == *(c-1)) {
-                               ++c;
-                       }
-               }
-       }
-       if (*c != '\0') {
-               errorf(HERE, "invalid suffix '%S' on integer constant", suffix);
-       } else if (not_traditional) {
-               warn_traditional_suffix();
+               if (spec & add)
+                       goto error;
+               spec |= add;
+       }
+
+       if (*c == '\0') {
+               type_t *type;
+               switch (spec) {
+               case SPECIFIER_NONE:                                            type = type_int;                break;
+               case                      SPECIFIER_LONG:                       type = type_long;               break;
+               case                      SPECIFIER_LONG | SPECIFIER_LONG_LONG: type = type_long_long;          break;
+               case SPECIFIER_UNSIGNED:                                        type = type_unsigned_int;       break;
+               case SPECIFIER_UNSIGNED | SPECIFIER_LONG:                       type = type_unsigned_long;      break;
+               case SPECIFIER_UNSIGNED | SPECIFIER_LONG | SPECIFIER_LONG_LONG: type = type_unsigned_long_long; break;
+               default: panic("inconsistent suffix");
+               }
+               if (spec != SPECIFIER_NONE && spec != SPECIFIER_LONG) {
+                       warn_traditional_suffix(suffix);
+               }
+               expr->base.type = type;
+               /* Integer type depends on the size of the number and the size
+                * representable by the types. The backend/codegeneration has to
+                * determine that. */
+               determine_literal_type(&expr->literal);
+       } else {
+error:
+               errorf(HERE, "invalid suffix '%s' on integer constant", suffix);
        }
 }
 
-static type_t *check_floatingpoint_suffix(void)
+static void check_floatingpoint_suffix(expression_t *const expr, char const *const suffix)
 {
-       const string_t *suffix = &token.number.suffix;
-       type_t         *type   = type_double;
-       if (suffix->size == 0)
-               return type;
-
-       bool not_traditional = false;
-       const char *c = suffix->begin;
-       if (*c == 'f' || *c == 'F') {
-               ++c;
-               type = type_float;
-       } else if (*c == 'l' || *c == 'L') {
-               ++c;
-               type = type_long_double;
-       }
-       if (*c != '\0') {
-               errorf(HERE, "invalid suffix '%S' on floatingpoint constant", suffix);
-       } else if (not_traditional) {
-               warn_traditional_suffix();
+       type_t     *type;
+       char const *c    = suffix;
+       switch (*c) {
+       case 'F':
+       case 'f': type = type_float;       ++c; break;
+       case 'L':
+       case 'l': type = type_long_double; ++c; break;
+       default:  type = type_double;           break;
        }
 
-       return type;
+       if (*c == '\0') {
+               expr->base.type = type;
+               if (suffix[0] != '\0') {
+                       warn_traditional_suffix(suffix);
+               }
+       } else {
+               errorf(HERE, "invalid suffix '%s' on floatingpoint constant", suffix);
+       }
 }
 
-/**
- * Parse an integer constant.
- */
 static expression_t *parse_number_literal(void)
 {
-       expression_kind_t  kind;
-       type_t            *type;
+       string_t const *const str      = &token.literal.string;
+       char     const *      i        = str->begin;
+       unsigned              digits   = 0;
+       bool                  is_float = false;
 
-       switch (token.kind) {
-       case T_INTEGER:
-               kind = EXPR_LITERAL_INTEGER;
-               check_integer_suffix();
-               type = type_int;
+       /* Parse base prefix. */
+       unsigned base;
+       if (*i == '0') {
+               switch (*++i) {
+               case 'B': case 'b': base =  2; ++i;               break;
+               case 'X': case 'x': base = 16; ++i;               break;
+               default:            base =  8; digits |= 1U << 0; break;
+               }
+       } else {
+               base = 10;
+       }
+
+       /* Parse mantissa. */
+       for (;; ++i) {
+               unsigned digit;
+               switch (*i) {
+               case '.':
+                       if (is_float) {
+                               errorf(HERE, "multiple decimal points in %K", &token);
+                               i = 0;
+                               goto done;
+                       }
+                       is_float = true;
+                       if (base == 8)
+                               base = 10;
+                       continue;
+
+               case '0':           digit =  0; break;
+               case '1':           digit =  1; break;
+               case '2':           digit =  2; break;
+               case '3':           digit =  3; break;
+               case '4':           digit =  4; break;
+               case '5':           digit =  5; break;
+               case '6':           digit =  6; break;
+               case '7':           digit =  7; break;
+               case '8':           digit =  8; break;
+               case '9':           digit =  9; break;
+               case 'A': case 'a': digit = 10; break;
+               case 'B': case 'b': digit = 11; break;
+               case 'C': case 'c': digit = 12; break;
+               case 'D': case 'd': digit = 13; break;
+               case 'E': case 'e': digit = 14; break;
+               case 'F': case 'f': digit = 15; break;
+
+               default: goto done_mantissa;
+               }
+
+               if (digit >= 10 && base != 16)
+                       goto done_mantissa;
+
+               digits |= 1U << digit;
+       }
+done_mantissa:
+
+       /* Parse exponent. */
+       switch (base) {
+       case 2:
+               if (is_float)
+                       errorf(HERE, "binary floating %K not allowed", &token);
                break;
 
-       case T_FLOATINGPOINT:
-               kind = EXPR_LITERAL_FLOATINGPOINT;
-               type = check_floatingpoint_suffix();
+       case  8:
+       case 10:
+               if (*i == 'E' || *i == 'e') {
+                       base = 10;
+                       goto parse_exponent;
+               }
+               break;
+
+       case 16:
+               if (*i == 'P' || *i == 'p') {
+parse_exponent:
+                       ++i;
+                       is_float = true;
+
+                       if (*i == '-' || *i == '+')
+                               ++i;
+
+                       if (isdigit(*i)) {
+                               do {
+                                       ++i;
+                               } while (isdigit(*i));
+                       } else {
+                               errorf(HERE, "exponent of %K has no digits", &token);
+                       }
+               } else if (is_float) {
+                       errorf(HERE, "hexadecimal floating %K requires an exponent", &token);
+                       i = 0;
+               }
                break;
 
        default:
-               panic("unexpected token type in parse_number_literal");
+               panic("invalid base");
        }
 
-       expression_t *literal = allocate_expression_zero(kind);
-       literal->base.type      = type;
-       literal->literal.value  = token.number.number;
-       literal->literal.suffix = token.number.suffix;
-       next_token();
+done:;
+       expression_t *const expr = allocate_expression_zero(is_float ? EXPR_LITERAL_FLOATINGPOINT : EXPR_LITERAL_INTEGER);
+       expr->literal.value = *str;
 
-       /* integer type depends on the size of the number and the size
-        * representable by the types. The backend/codegeneration has to determine
-        * that
-        */
-       determine_literal_type(&literal->literal);
-       return literal;
+       if (i) {
+               if (digits == 0) {
+                       errorf(HERE, "%K has no digits", &token);
+               } else if (digits & ~((1U << base) - 1)) {
+                       errorf(HERE, "invalid digit in %K", &token);
+               } else {
+                       expr->literal.suffix = i;
+                       if (is_float) {
+                               check_floatingpoint_suffix(expr, i);
+                       } else {
+                               check_integer_suffix(expr, i);
+                       }
+               }
+       }
+
+       eat(T_NUMBER);
+       return expr;
 }
 
 /**
@@ -5815,10 +5910,10 @@ static expression_t *parse_number_literal(void)
 static expression_t *parse_character_constant(void)
 {
        expression_t *const literal = allocate_expression_zero(EXPR_LITERAL_CHARACTER);
-       literal->string_literal.value = token.string.string;
+       literal->string_literal.value = token.literal.string;
 
-       size_t const size = get_string_len(&token.string.string);
-       switch (token.string.string.encoding) {
+       size_t const size = get_string_len(&token.literal.string);
+       switch (token.literal.string.encoding) {
        case STRING_ENCODING_CHAR:
                literal->base.type = c_mode & _CXX ? type_char : type_int;
                if (size > 1) {
@@ -6622,8 +6717,7 @@ static expression_t *parse_primary_expression(void)
        switch (token.kind) {
        case T_false:                        return parse_boolean_literal(false);
        case T_true:                         return parse_boolean_literal(true);
-       case T_INTEGER:
-       case T_FLOATINGPOINT:                return parse_number_literal();
+       case T_NUMBER:                       return parse_number_literal();
        case T_CHARACTER_CONSTANT:           return parse_character_constant();
        case T_STRING_LITERAL:               return parse_string_literal();
        case T___func__:                     return parse_function_keyword(FUNCNAME_FUNCTION);
@@ -9715,10 +9809,9 @@ static statement_t *parse_compound_statement(bool inside_expression_statement)
        add_anchor_token('~');
        add_anchor_token(T_CHARACTER_CONSTANT);
        add_anchor_token(T_COLONCOLON);
-       add_anchor_token(T_FLOATINGPOINT);
        add_anchor_token(T_IDENTIFIER);
-       add_anchor_token(T_INTEGER);
        add_anchor_token(T_MINUSMINUS);
+       add_anchor_token(T_NUMBER);
        add_anchor_token(T_PLUSPLUS);
        add_anchor_token(T_STRING_LITERAL);
        add_anchor_token(T__Bool);
@@ -9896,10 +9989,9 @@ static statement_t *parse_compound_statement(bool inside_expression_statement)
        rem_anchor_token(T__Bool);
        rem_anchor_token(T_STRING_LITERAL);
        rem_anchor_token(T_PLUSPLUS);
+       rem_anchor_token(T_NUMBER);
        rem_anchor_token(T_MINUSMINUS);
-       rem_anchor_token(T_INTEGER);
        rem_anchor_token(T_IDENTIFIER);
-       rem_anchor_token(T_FLOATINGPOINT);
        rem_anchor_token(T_COLONCOLON);
        rem_anchor_token(T_CHARACTER_CONSTANT);
        rem_anchor_token('~');
index 3829329..90dedc2 100644 (file)
@@ -513,8 +513,8 @@ static void parse_string(utf32 const delimiter, preprocessor_token_kind_t const
        }
 
 end_of_string:
-       pp_token.kind          = kind;
-       pp_token.string.string = sym_make_string(enc);
+       pp_token.kind           = kind;
+       pp_token.literal.string = sym_make_string(enc);
 }
 
 static void parse_string_literal(string_encoding_t const enc)
@@ -525,7 +525,7 @@ static void parse_string_literal(string_encoding_t const enc)
 static void parse_character_constant(string_encoding_t const enc)
 {
        parse_string('\'', TP_CHARACTER_CONSTANT, enc, "character constant");
-       if (pp_token.string.string.size == 0) {
+       if (pp_token.literal.string.size == 0) {
                parse_error("empty character constant");
        }
 }
@@ -822,8 +822,8 @@ static void parse_number(void)
        }
 
 end_number:
-       pp_token.kind          = TP_NUMBER;
-       pp_token.number.number = sym_make_string(STRING_ENCODING_CHAR);
+       pp_token.kind           = TP_NUMBER;
+       pp_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
 }
 
 
@@ -1133,20 +1133,20 @@ static void emit_pp_token(void)
                fputs(pp_token.base.symbol->string, out);
                break;
        case TP_NUMBER:
-               fputs(pp_token.number.number.begin, out);
+               fputs(pp_token.literal.string.begin, out);
                break;
 
        case TP_STRING_LITERAL:
-               fputs(get_string_encoding_prefix(pp_token.string.string.encoding), out);
+               fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
                fputc('"', out);
-               fputs(pp_token.string.string.begin, out);
+               fputs(pp_token.literal.string.begin, out);
                fputc('"', out);
                break;
 
        case TP_CHARACTER_CONSTANT:
-               fputs(get_string_encoding_prefix(pp_token.string.string.encoding), out);
+               fputs(get_string_encoding_prefix(pp_token.literal.string.encoding), out);
                fputc('\'', out);
-               fputs(pp_token.string.string.begin, out);
+               fputs(pp_token.literal.string.begin, out);
                fputc('\'', out);
                break;
        default:
@@ -1190,7 +1190,7 @@ static bool pp_tokens_equal(const token_t *token1, const token_t *token2)
        case TP_NUMBER:
        case TP_CHARACTER_CONSTANT:
        case TP_STRING_LITERAL:
-               return strings_equal(&token1->string.string, &token2->string.string);
+               return strings_equal(&token1->literal.string, &token2->literal.string);
 
        default:
                return true;
@@ -1395,7 +1395,7 @@ finished_headername:
 finish_error:
        pp_token.base.source_position = start_position;
        pp_token.kind                 = TP_HEADERNAME;
-       pp_token.string.string        = string;
+       pp_token.literal.string       = string;
 }
 
 static bool do_include(bool system_include, const char *headername)
@@ -1467,7 +1467,7 @@ static bool parse_include_directive(void)
        skip_whitespace();
        bool system_include = input.c == '<';
        parse_headername();
-       string_t headername = pp_token.string.string;
+       string_t headername = pp_token.literal.string;
        if (headername.begin == NULL) {
                eat_pp_directive();
                return false;
@@ -1490,10 +1490,9 @@ static bool parse_include_directive(void)
        /* switch inputs */
        emit_newlines();
        push_input();
-       bool res = do_include(system_include, pp_token.string.string.begin);
+       bool res = do_include(system_include, pp_token.literal.string.begin);
        if (!res) {
-               errorf(&pp_token.base.source_position,
-                      "failed including '%S': %s", pp_token.string, strerror(errno));
+               errorf(&pp_token.base.source_position, "failed including '%S': %s", &pp_token.literal, strerror(errno));
                pop_restore_input();
                return false;
        }
diff --git a/token.c b/token.c
index 8184554..933c3a3 100644 (file)
--- a/token.c
+++ b/token.c
@@ -146,14 +146,9 @@ void print_token(FILE *f, const token_t *token)
        case T_IDENTIFIER:
                fprintf(f, "identifier '%s'", token->base.symbol->string);
                break;
-       case T_INTEGER:
-       case T_FLOATINGPOINT:
-               print_token_kind(f, (token_kind_t)token->kind);
-               fputs(" '", f);
-               print_stringrep(&token->number.number, f);
-               if (token->number.suffix.size > 0)
-                       print_stringrep(&token->number.suffix, f);
-               fputc('\'', f);
+
+       case T_NUMBER:
+               fprintf(f, "number '%s'", token->literal.string.begin);
                break;
 
                char delim;
@@ -161,8 +156,8 @@ void print_token(FILE *f, const token_t *token)
        case T_CHARACTER_CONSTANT: delim = '\''; goto print_string;
 print_string:
                print_token_kind(f, (token_kind_t)token->kind);
-               fprintf(f, " %s%c", get_string_encoding_prefix(token->string.string.encoding), delim);
-               print_stringrep(&token->string.string, f);
+               fprintf(f, " %s%c", get_string_encoding_prefix(token->literal.string.encoding), delim);
+               print_stringrep(&token->literal.string, f);
                fputc(delim, f);
                break;
 
@@ -210,10 +205,10 @@ void print_pp_token(FILE *f, const token_t *token)
                fprintf(f, "identifier '%s'", token->base.symbol->string);
                break;
        case TP_NUMBER:
-               fprintf(f, "number '%s'", token->number.number.begin);
+               fprintf(f, "number '%s'", token->literal.string.begin);
                break;
        case TP_STRING_LITERAL:
-               fprintf(f, "string \"%s\"", token->string.string.begin);
+               fprintf(f, "string \"%s\"", token->literal.string.begin);
                break;
        default:
                print_pp_token_kind(f, (preprocessor_token_kind_t) token->kind);
index 7de0e6a..741845d 100644 (file)
--- a/token_t.h
+++ b/token_t.h
@@ -59,10 +59,9 @@ struct source_position_t {
 /* position used for "builtin" declarations/types */
 extern const source_position_t builtin_source_position;
 
-typedef struct token_base_t     token_base_t;
-typedef struct string_literal_t string_literal_t;
-typedef struct number_literal_t number_literal_t;
-typedef union  token_t          token_t;
+typedef struct token_base_t token_base_t;
+typedef struct literal_t    literal_t;
+typedef union  token_t      token_t;
 
 struct token_base_t {
        unsigned          kind;
@@ -70,22 +69,15 @@ struct token_base_t {
        symbol_t         *symbol;
 };
 
-struct string_literal_t {
+struct literal_t {
        token_base_t base;
        string_t     string;
 };
 
-struct number_literal_t {
-       token_base_t  base;
-       string_t      number;
-       string_t      suffix;
-};
-
 union token_t {
-       unsigned          kind;
-       token_base_t      base;
-       string_literal_t  string;
-       number_literal_t  number;
+       unsigned     kind;
+       token_base_t base;
+       literal_t    literal;
 };
 
 char const *get_string_encoding_prefix(string_encoding_t);
index 37b2e84..eadbb71 100644 (file)
@@ -10,8 +10,7 @@
 #undef ALTERNATE
 
 TS(IDENTIFIER,         "identifier",)
-TS(INTEGER,            "integer number",)
-TS(FLOATINGPOINT,      "floatingpoint number",)
+TS(NUMBER,             "number constant",)
 TS(CHARACTER_CONSTANT, "character constant",)
 TS(STRING_LITERAL,     "string literal",)