Renamed enum entry.

[cparser] / parser.c
diff --git a/parser.c b/parser.c

index a558d94..53f8fa2 100644 (file)
--- a/parser.c
+++ b/parser.c
@@ -20,6 +20,7 @@
  #include <config.h>
  
  #include <assert.h>
+#include <ctype.h>
  #include <stdarg.h>
  #include <stdbool.h>
  
@@ -27,7 +28,7 @@
  #include "parser.h"
  #include "diagnostic.h"
  #include "format_check.h"
-#include "lexer.h"
+#include "preprocessor.h"
  #include "symbol_t.h"
  #include "token_t.h"
  #include "types.h"
@@ -138,9 +139,6 @@ static elf_visibility_tag_t default_visibility = ELF_VISIBILITY_DEFAULT;
  #define POP_EXTENSION() \
         ((void)(in_gcc_extension = old_gcc_extension))
  
-/** special symbol used for anonymous entities. */
-static symbol_t *sym_anonymous = NULL;
-
  /** The token anchor set */
  static unsigned short token_anchor_set[T_LAST_TOKEN];
  
@@ -247,8 +245,7 @@ static void semantic_comparison(binary_expression_t *expression);
         case '~':                         \
         case T_ANDAND:                    \
         case T_CHARACTER_CONSTANT:        \
-       case T_FLOATINGPOINT:             \
-       case T_INTEGER:                   \
+       case T_NUMBER:                    \
         case T_MINUSMINUS:                \
         case T_PLUSPLUS:                  \
         case T_STRING_LITERAL:            \
@@ -427,7 +424,7 @@ static size_t get_initializer_size(initializer_kind_t kind)
  {
         static const size_t sizes[] = {
                 [INITIALIZER_VALUE]       = sizeof(initializer_value_t),
-               [INITIALIZER_STRING]      = sizeof(initializer_string_t),
+               [INITIALIZER_STRING]      = sizeof(initializer_value_t),
                 [INITIALIZER_LIST]        = sizeof(initializer_list_t),
                 [INITIALIZER_DESIGNATOR]  = sizeof(initializer_designator_t)
         };
@@ -470,8 +467,8 @@ static size_t label_top(void)
  static inline void next_token(void)
  {
         token                              = lookahead_buffer[lookahead_bufpos];
-       lookahead_buffer[lookahead_bufpos] = lexer_token;
-       lexer_next_token();
+       lookahead_buffer[lookahead_bufpos] = pp_token;
+       next_preprocessing_token();
  
         lookahead_bufpos = (lookahead_bufpos + 1) % MAX_LOOKAHEAD;
  
@@ -481,12 +478,17 @@ static inline void next_token(void)
  #endif
  }
  
-#define eat(token_kind) (assert(token.kind == (token_kind)), next_token())
+static inline void eat(token_kind_t const kind)
+{
+       assert(token.kind == kind);
+       (void)kind;
+       next_token();
+}
  
-static inline bool next_if(token_kind_t const type)
+static inline bool next_if(token_kind_t const kind)
  {
-       if (token.kind == type) {
-               eat(type);
+       if (token.kind == kind) {
+               eat(kind);
                 return true;
         } else {
                 return false;
@@ -1038,51 +1040,49 @@ static void append_string(string_t const *const s)
         obstack_grow(&ast_obstack, s->begin, s->size);
  }
  
-static string_t finish_string(void)
+static string_t finish_string(string_encoding_t const enc)
  {
         obstack_1grow(&ast_obstack, '\0');
         size_t      const size   = obstack_object_size(&ast_obstack) - 1;
         char const *const string = obstack_finish(&ast_obstack);
-       return (string_t){ string, size };
+       return (string_t){ string, size, enc };
  }
  
-static string_t concat_string_literals(string_encoding_t *const out_enc)
+static string_t concat_string_literals(void)
  {
         assert(token.kind == T_STRING_LITERAL);
  
-       string_t          result;
-       string_encoding_t enc = token.string.encoding;
+       string_t result;
         if (look_ahead(1)->kind == T_STRING_LITERAL) {
-               append_string(&token.string.string);
+               append_string(&token.literal.string);
                 eat(T_STRING_LITERAL);
                 warningf(WARN_TRADITIONAL, HERE, "traditional C rejects string constant concatenation");
+               string_encoding_t enc = token.literal.string.encoding;
                 do {
-                       if (token.string.encoding != STRING_ENCODING_CHAR) {
-                               enc = token.string.encoding;
+                       if (token.literal.string.encoding != STRING_ENCODING_CHAR) {
+                               enc = token.literal.string.encoding;
                         }
-                       append_string(&token.string.string);
+                       append_string(&token.literal.string);
                         eat(T_STRING_LITERAL);
                 } while (token.kind == T_STRING_LITERAL);
-               result = finish_string();
+               result = finish_string(enc);
         } else {
-               result = token.string.string;
+               result = token.literal.string;
                 eat(T_STRING_LITERAL);
         }
  
-       *out_enc = enc;
         return result;
  }
  
  static string_t parse_string_literals(char const *const context)
  {
         if (!skip_till(T_STRING_LITERAL, context))
-               return (string_t){ "", 0 };
+               return (string_t){ "", 0, STRING_ENCODING_CHAR };
  
-       string_encoding_t       enc;
         source_position_t const pos = *HERE;
-       string_t          const res = concat_string_literals(&enc);
+       string_t          const res = concat_string_literals();
  
-       if (enc != STRING_ENCODING_CHAR) {
+       if (res.encoding != STRING_ENCODING_CHAR) {
                 errorf(&pos, "expected plain string literal, got wide string literal");
         }
  
@@ -1543,18 +1543,6 @@ static designator_t *parse_designation(void)
         }
  }
  
-static initializer_t *initializer_from_string(array_type_t *const type, string_encoding_t const enc, string_t const *const string)
-{
-       /* TODO: check len vs. size of array type */
-       (void) type;
-
-       initializer_t *initializer = allocate_initializer_zero(INITIALIZER_STRING);
-       initializer->string.encoding = enc;
-       initializer->string.string   = *string;
-
-       return initializer;
-}
-
  /**
   * Build an initializer from a given expression.
   */
@@ -1569,7 +1557,7 @@ static initializer_t *initializer_from_expression(type_t *orig_type,
         if (expression->kind == EXPR_STRING_LITERAL && is_type_array(type)) {
                 array_type_t *const array_type   = &type->array;
                 type_t       *const element_type = skip_typeref(array_type->element_type);
-               switch (expression->string_literal.encoding) {
+               switch (expression->string_literal.value.encoding) {
                 case STRING_ENCODING_CHAR: {
                         if (is_type_atomic(element_type, ATOMIC_TYPE_CHAR)  ||
                             is_type_atomic(element_type, ATOMIC_TYPE_SCHAR) ||
@@ -1582,8 +1570,10 @@ static initializer_t *initializer_from_expression(type_t *orig_type,
                 case STRING_ENCODING_WIDE: {
                         type_t *bare_wchar_type = skip_typeref(type_wchar_t);
                         if (get_unqualified_type(element_type) == bare_wchar_type) {
-make_string_init:
-                               return initializer_from_string(array_type, expression->string_literal.encoding, &expression->string_literal.value);
+make_string_init:;
+                               initializer_t *const init = allocate_initializer_zero(INITIALIZER_STRING);
+                               init->value.value = expression;
+                               return init;
                         }
                         break;
                 }
@@ -2215,9 +2205,10 @@ static initializer_t *parse_initializer(parse_initializer_env_t *env)
                         size = max_index + 1;
                         break;
  
-               case INITIALIZER_STRING:
-                       size = get_string_len(result->string.encoding, &result->string.string) + 1;
+               case INITIALIZER_STRING: {
+                       size = get_string_len(&get_init_string(result)->value) + 1;
                         break;
+               }
  
                 case INITIALIZER_DESIGNATOR:
                 case INITIALIZER_VALUE:
@@ -2471,6 +2462,7 @@ static type_t *parse_typeof(void)
  }
  
  typedef enum specifiers_t {
+       SPECIFIER_NONE      = 0,
         SPECIFIER_SIGNED    = 1 << 0,
         SPECIFIER_UNSIGNED  = 1 << 1,
         SPECIFIER_LONG      = 1 << 2,
@@ -3009,7 +3001,8 @@ warn_about_long_long:
                         } else {
                                 errorf(pos, "multiple datatypes in declaration");
                         }
-                       goto end_error;
+                       specifiers->type = type_error_type;
+                       return;
                 }
                 }
  
@@ -3038,10 +3031,6 @@ warn_about_long_long:
         if (specifiers->attributes != NULL)
                 type = handle_type_attributes(specifiers->attributes, type);
         specifiers->type = type;
-       return;
-
-end_error:
-       specifiers->type = type_error_type;
  }
  
  static type_qualifiers_t parse_type_qualifiers(void)
@@ -5475,8 +5464,7 @@ static expression_t *find_create_select(const source_position_t *pos,
                 symbol_t *iter_symbol = iter->base.symbol;
                 if (iter_symbol == NULL) {
                         type_t *type = iter->declaration.type;
-                       if (type->kind != TYPE_COMPOUND_STRUCT
-                                       && type->kind != TYPE_COMPOUND_UNION)
+                       if (!is_type_compound(type))
                                 continue;
  
                         compound_t *sub_compound = type->compound.compound;
@@ -5699,8 +5687,8 @@ static type_t *get_string_type(string_encoding_t const enc)
  static expression_t *parse_string_literal(void)
  {
         expression_t *const expr = allocate_expression_zero(EXPR_STRING_LITERAL);
-       expr->string_literal.value = concat_string_literals(&expr->string_literal.encoding);
-       expr->base.type            = get_string_type(expr->string_literal.encoding);
+       expr->string_literal.value = concat_string_literals();
+       expr->base.type            = get_string_type(expr->string_literal.value.encoding);
         return expr;
  }
  
@@ -5718,110 +5706,204 @@ static expression_t *parse_boolean_literal(bool value)
         return literal;
  }
  
-static void warn_traditional_suffix(void)
+static void warn_traditional_suffix(char const *const suffix)
  {
-       warningf(WARN_TRADITIONAL, HERE, "traditional C rejects the '%S' suffix",
-                &token.number.suffix);
+       warningf(WARN_TRADITIONAL, HERE, "traditional C rejects the '%s' suffix", suffix);
  }
  
-static void check_integer_suffix(void)
+static void check_integer_suffix(expression_t *const expr, char const *const suffix)
  {
-       const string_t *suffix = &token.number.suffix;
-       if (suffix->size == 0)
-               return;
-
-       bool not_traditional = false;
-       const char *c = suffix->begin;
-       if (*c == 'l' || *c == 'L') {
-               ++c;
-               if (*c == *(c-1)) {
-                       not_traditional = true;
-                       ++c;
-                       if (*c == 'u' || *c == 'U') {
+       unsigned     spec = SPECIFIER_NONE;
+       char const  *c    = suffix;
+       for (;;) {
+               specifiers_t add;
+               if (*c == 'L' || *c == 'l') {
+                       add = SPECIFIER_LONG;
+                       if (*c == c[1]) {
+                               add |= SPECIFIER_LONG_LONG;
                                 ++c;
                         }
-               } else if (*c == 'u' || *c == 'U') {
-                       not_traditional = true;
-                       ++c;
+               } else if (*c == 'U' || *c == 'u') {
+                       add = SPECIFIER_UNSIGNED;
+               } else {
+                       break;
                 }
-       } else if (*c == 'u' || *c == 'U') {
-               not_traditional = true;
                 ++c;
-               if (*c == 'l' || *c == 'L') {
-                       ++c;
-                       if (*c == *(c-1)) {
-                               ++c;
-                       }
-               }
-       }
-       if (*c != '\0') {
-               errorf(HERE, "invalid suffix '%S' on integer constant", suffix);
-       } else if (not_traditional) {
-               warn_traditional_suffix();
+               if (spec & add)
+                       goto error;
+               spec |= add;
+       }
+
+       if (*c == '\0') {
+               type_t *type;
+               switch (spec) {
+               case SPECIFIER_NONE:                                            type = type_int;                break;
+               case                      SPECIFIER_LONG:                       type = type_long;               break;
+               case                      SPECIFIER_LONG | SPECIFIER_LONG_LONG: type = type_long_long;          break;
+               case SPECIFIER_UNSIGNED:                                        type = type_unsigned_int;       break;
+               case SPECIFIER_UNSIGNED | SPECIFIER_LONG:                       type = type_unsigned_long;      break;
+               case SPECIFIER_UNSIGNED | SPECIFIER_LONG | SPECIFIER_LONG_LONG: type = type_unsigned_long_long; break;
+               default: panic("inconsistent suffix");
+               }
+               if (spec != SPECIFIER_NONE && spec != SPECIFIER_LONG) {
+                       warn_traditional_suffix(suffix);
+               }
+               expr->base.type = type;
+               /* Integer type depends on the size of the number and the size
+                * representable by the types. The backend/codegeneration has to
+                * determine that. */
+               determine_literal_type(&expr->literal);
+       } else {
+error:
+               errorf(HERE, "invalid suffix '%s' on integer constant", suffix);
         }
  }
  
-static type_t *check_floatingpoint_suffix(void)
+static void check_floatingpoint_suffix(expression_t *const expr, char const *const suffix)
  {
-       const string_t *suffix = &token.number.suffix;
-       type_t         *type   = type_double;
-       if (suffix->size == 0)
-               return type;
-
-       bool not_traditional = false;
-       const char *c = suffix->begin;
-       if (*c == 'f' || *c == 'F') {
-               ++c;
-               type = type_float;
-       } else if (*c == 'l' || *c == 'L') {
-               ++c;
-               type = type_long_double;
-       }
-       if (*c != '\0') {
-               errorf(HERE, "invalid suffix '%S' on floatingpoint constant", suffix);
-       } else if (not_traditional) {
-               warn_traditional_suffix();
+       type_t     *type;
+       char const *c    = suffix;
+       switch (*c) {
+       case 'F':
+       case 'f': type = type_float;       ++c; break;
+       case 'L':
+       case 'l': type = type_long_double; ++c; break;
+       default:  type = type_double;           break;
         }
  
-       return type;
+       if (*c == '\0') {
+               expr->base.type = type;
+               if (suffix[0] != '\0') {
+                       warn_traditional_suffix(suffix);
+               }
+       } else {
+               errorf(HERE, "invalid suffix '%s' on floatingpoint constant", suffix);
+       }
  }
  
-/**
- * Parse an integer constant.
- */
  static expression_t *parse_number_literal(void)
  {
-       expression_kind_t  kind;
-       type_t            *type;
+       string_t const *const str      = &token.literal.string;
+       char     const *      i        = str->begin;
+       unsigned              digits   = 0;
+       bool                  is_float = false;
  
-       switch (token.kind) {
-       case T_INTEGER:
-               kind = EXPR_LITERAL_INTEGER;
-               check_integer_suffix();
-               type = type_int;
+       /* Parse base prefix. */
+       unsigned base;
+       if (*i == '0') {
+               switch (*++i) {
+               case 'B': case 'b': base =  2; ++i;               break;
+               case 'X': case 'x': base = 16; ++i;               break;
+               default:            base =  8; digits |= 1U << 0; break;
+               }
+       } else {
+               base = 10;
+       }
+
+       /* Parse mantissa. */
+       for (;; ++i) {
+               unsigned digit;
+               switch (*i) {
+               case '.':
+                       if (is_float) {
+                               errorf(HERE, "multiple decimal points in %K", &token);
+                               i = 0;
+                               goto done;
+                       }
+                       is_float = true;
+                       if (base == 8)
+                               base = 10;
+                       continue;
+
+               case '0':           digit =  0; break;
+               case '1':           digit =  1; break;
+               case '2':           digit =  2; break;
+               case '3':           digit =  3; break;
+               case '4':           digit =  4; break;
+               case '5':           digit =  5; break;
+               case '6':           digit =  6; break;
+               case '7':           digit =  7; break;
+               case '8':           digit =  8; break;
+               case '9':           digit =  9; break;
+               case 'A': case 'a': digit = 10; break;
+               case 'B': case 'b': digit = 11; break;
+               case 'C': case 'c': digit = 12; break;
+               case 'D': case 'd': digit = 13; break;
+               case 'E': case 'e': digit = 14; break;
+               case 'F': case 'f': digit = 15; break;
+
+               default: goto done_mantissa;
+               }
+
+               if (digit >= 10 && base != 16)
+                       goto done_mantissa;
+
+               digits |= 1U << digit;
+       }
+done_mantissa:
+
+       /* Parse exponent. */
+       switch (base) {
+       case 2:
+               if (is_float)
+                       errorf(HERE, "binary floating %K not allowed", &token);
                 break;
  
-       case T_FLOATINGPOINT:
-               kind = EXPR_LITERAL_FLOATINGPOINT;
-               type = check_floatingpoint_suffix();
+       case  8:
+       case 10:
+               if (*i == 'E' || *i == 'e') {
+                       base = 10;
+                       goto parse_exponent;
+               }
+               break;
+
+       case 16:
+               if (*i == 'P' || *i == 'p') {
+parse_exponent:
+                       ++i;
+                       is_float = true;
+
+                       if (*i == '-' || *i == '+')
+                               ++i;
+
+                       if (isdigit(*i)) {
+                               do {
+                                       ++i;
+                               } while (isdigit(*i));
+                       } else {
+                               errorf(HERE, "exponent of %K has no digits", &token);
+                       }
+               } else if (is_float) {
+                       errorf(HERE, "hexadecimal floating %K requires an exponent", &token);
+                       i = 0;
+               }
                 break;
  
         default:
-               panic("unexpected token type in parse_number_literal");
+               panic("invalid base");
         }
  
-       expression_t *literal = allocate_expression_zero(kind);
-       literal->base.type      = type;
-       literal->literal.value  = token.number.number;
-       literal->literal.suffix = token.number.suffix;
-       next_token();
+done:;
+       expression_t *const expr = allocate_expression_zero(is_float ? EXPR_LITERAL_FLOATINGPOINT : EXPR_LITERAL_INTEGER);
+       expr->literal.value = *str;
  
-       /* integer type depends on the size of the number and the size
-        * representable by the types. The backend/codegeneration has to determine
-        * that
-        */
-       determine_literal_type(&literal->literal);
-       return literal;
+       if (i) {
+               if (digits == 0) {
+                       errorf(HERE, "%K has no digits", &token);
+               } else if (digits & ~((1U << base) - 1)) {
+                       errorf(HERE, "invalid digit in %K", &token);
+               } else {
+                       expr->literal.suffix = i;
+                       if (is_float) {
+                               check_floatingpoint_suffix(expr, i);
+                       } else {
+                               check_integer_suffix(expr, i);
+                       }
+               }
+       }
+
+       eat(T_NUMBER);
+       return expr;
  }
  
  /**
@@ -5830,11 +5912,10 @@ static expression_t *parse_number_literal(void)
  static expression_t *parse_character_constant(void)
  {
         expression_t *const literal = allocate_expression_zero(EXPR_LITERAL_CHARACTER);
-       literal->string_literal.encoding = token.string.encoding;
-       literal->string_literal.value    = token.string.string;
+       literal->string_literal.value = token.literal.string;
  
-       size_t const size = get_string_len(token.string.encoding, &token.string.string);
-       switch (token.string.encoding) {
+       size_t const size = get_string_len(&token.literal.string);
+       switch (token.literal.string.encoding) {
         case STRING_ENCODING_CHAR:
                 literal->base.type = c_mode & _CXX ? type_char : type_int;
                 if (size > 1) {
@@ -5944,7 +6025,7 @@ type_t *revert_automatic_type_conversion(const expression_t *expression)
         }
  
         case EXPR_STRING_LITERAL: {
-               size_t  const size = get_string_len(expression->string_literal.encoding, &expression->string_literal.value) + 1;
+               size_t  const size = get_string_len(&expression->string_literal.value) + 1;
                 type_t *const elem = get_unqualified_type(expression->base.type->pointer.points_to);
                 return make_array_type(elem, size, TYPE_QUALIFIER_NONE);
         }
@@ -6638,8 +6719,7 @@ static expression_t *parse_primary_expression(void)
         switch (token.kind) {
         case T_false:                        return parse_boolean_literal(false);
         case T_true:                         return parse_boolean_literal(true);
-       case T_INTEGER:
-       case T_FLOATINGPOINT:                return parse_number_literal();
+       case T_NUMBER:                       return parse_number_literal();
         case T_CHARACTER_CONSTANT:           return parse_character_constant();
         case T_STRING_LITERAL:               return parse_string_literal();
         case T___func__:                     return parse_function_keyword(FUNCNAME_FUNCTION);
@@ -6862,9 +6942,7 @@ static expression_t *parse_select_expression(expression_t *addr)
                 type_left = type;
         }
  
-       if (type_left->kind != TYPE_COMPOUND_STRUCT &&
-           type_left->kind != TYPE_COMPOUND_UNION) {
-
+       if (!is_type_compound(type_left)) {
                 if (is_type_valid(type_left) && !saw_error) {
                         errorf(&pos,
                                "request for member '%Y' in something not a struct or union, but '%T'",
@@ -9733,10 +9811,9 @@ static statement_t *parse_compound_statement(bool inside_expression_statement)
         add_anchor_token('~');
         add_anchor_token(T_CHARACTER_CONSTANT);
         add_anchor_token(T_COLONCOLON);
-       add_anchor_token(T_FLOATINGPOINT);
         add_anchor_token(T_IDENTIFIER);
-       add_anchor_token(T_INTEGER);
         add_anchor_token(T_MINUSMINUS);
+       add_anchor_token(T_NUMBER);
         add_anchor_token(T_PLUSPLUS);
         add_anchor_token(T_STRING_LITERAL);
         add_anchor_token(T__Bool);
@@ -9914,10 +9991,9 @@ static statement_t *parse_compound_statement(bool inside_expression_statement)
         rem_anchor_token(T__Bool);
         rem_anchor_token(T_STRING_LITERAL);
         rem_anchor_token(T_PLUSPLUS);
+       rem_anchor_token(T_NUMBER);
         rem_anchor_token(T_MINUSMINUS);
-       rem_anchor_token(T_INTEGER);
         rem_anchor_token(T_IDENTIFIER);
-       rem_anchor_token(T_FLOATINGPOINT);
         rem_anchor_token(T_COLONCOLON);
         rem_anchor_token(T_CHARACTER_CONSTANT);
         rem_anchor_token('~');
@@ -10253,8 +10329,6 @@ void parse(void)
   */
  void init_parser(void)
  {
-       sym_anonymous = symbol_table_insert("<anonymous>");
-
         memset(token_anchor_set, 0, sizeof(token_anchor_set));
  
         init_expression_parsers();