}
}
-#define SYMBOL_CHARS \
- case '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
+#define SYMBOL_CHARS_WITHOUT_E_P \
+ '$': if (!allow_dollar_in_symbol) goto dollar_sign; \
case 'a': \
case 'b': \
case 'c': \
case 'd': \
- case 'e': \
case 'f': \
case 'g': \
case 'h': \
case 'm': \
case 'n': \
case 'o': \
- case 'p': \
case 'q': \
case 'r': \
case 's': \
case 'B': \
case 'C': \
case 'D': \
- case 'E': \
case 'F': \
case 'G': \
case 'H': \
case 'M': \
case 'N': \
case 'O': \
- case 'P': \
case 'Q': \
case 'R': \
case 'S': \
case 'X': \
case 'Y': \
case 'Z': \
- case '_':
+ case '_'
+
+#define SYMBOL_CHARS_E_P \
+ 'E': \
+ case 'P': \
+ case 'e': \
+ case 'p'
+
+#define SYMBOL_CHARS \
+ SYMBOL_CHARS_WITHOUT_E_P: \
+ case SYMBOL_CHARS_E_P
#define DIGITS \
- case '0': \
+ '0': \
case '1': \
case '2': \
case '3': \
case '6': \
case '7': \
case '8': \
- case '9':
+ case '9'
static bool is_universal_char_valid(utf32 const v)
{
{
while (true) {
switch (c) {
- DIGITS
- SYMBOL_CHARS
+ case DIGITS:
+ case SYMBOL_CHARS:
obstack_1grow(&symbol_obstack, (char) c);
next_char();
break;
} else if (obstack_object_size(&symbol_obstack) == 0 && !is_universal_char_valid_identifier_start(v)) {
errorf(&lexer_pos, "universal character \\%c%0*X is not valid as start of an identifier", n == 4 ? 'u' : 'U', (int)n, v);
} else {
- obstack_grow_symbol(&symbol_obstack, v);
+ obstack_grow_utf8(&symbol_obstack, v);
}
break;
}
return (string_t){ result, len, enc };
}
-/**
- * parse suffixes like 'LU' or 'f' after numbers
- */
-static void parse_number_suffix(void)
+static void parse_pp_number(void)
{
- assert(obstack_object_size(&symbol_obstack) == 0);
- while (true) {
+ for (;;) {
switch (c) {
- SYMBOL_CHARS
- obstack_1grow(&symbol_obstack, (char) c);
+ case SYMBOL_CHARS_E_P:
+ obstack_1grow(&symbol_obstack, (char)c);
next_char();
+ if (c == '+' || c == '-') {
+ case '.':
+ case DIGITS:
+ case SYMBOL_CHARS_WITHOUT_E_P:
+ obstack_1grow(&symbol_obstack, (char)c);
+ next_char();
+ }
break;
+
default:
- dollar_sign:
- goto finish_suffix;
+dollar_sign:
+ lexer_token.kind = T_NUMBER;
+ lexer_token.literal.string = sym_make_string(STRING_ENCODING_CHAR);
+ return;
}
}
-finish_suffix:
- if (obstack_object_size(&symbol_obstack) == 0) {
- lexer_token.number.suffix.begin = NULL;
- lexer_token.number.suffix.size = 0;
- return;
- }
-
- lexer_token.number.suffix = sym_make_string(STRING_ENCODING_CHAR);
-}
-
-static void parse_exponent(void)
-{
- if (c == '-' || c == '+') {
- obstack_1grow(&symbol_obstack, (char)c);
- next_char();
- }
-
- if (isdigit(c)) {
- do {
- obstack_1grow(&symbol_obstack, (char)c);
- next_char();
- } while (isdigit(c));
- } else {
- errorf(&lexer_token.base.source_position, "exponent has no digits");
- }
-}
-
-/**
- * Parses a hex number including hex floats and set the
- * lexer_token.
- */
-static void parse_number_hex(void)
-{
- bool is_float = false;
- bool has_digits = false;
-
- while (isxdigit(c)) {
- has_digits = true;
- obstack_1grow(&symbol_obstack, (char) c);
- next_char();
- }
-
- if (c == '.') {
- is_float = true;
- obstack_1grow(&symbol_obstack, (char) c);
- next_char();
-
- while (isxdigit(c)) {
- has_digits = true;
- obstack_1grow(&symbol_obstack, (char) c);
- next_char();
- }
- }
- if (c == 'p' || c == 'P') {
- is_float = true;
- obstack_1grow(&symbol_obstack, (char) c);
- next_char();
- parse_exponent();
- } else if (is_float) {
- errorf(&lexer_token.base.source_position,
- "hexadecimal floatingpoint constant requires an exponent");
- }
-
- lexer_token.number.number = sym_make_string(STRING_ENCODING_CHAR);
-
- lexer_token.kind = is_float ? T_FLOATINGPOINT : T_INTEGER;
-
- if (!has_digits) {
- errorf(&lexer_token.base.source_position, "invalid number literal '%S'", &lexer_token.number.number);
- lexer_token.number.number.begin = "0";
- lexer_token.number.number.size = 1;
- }
-
- parse_number_suffix();
-}
-
-static void parse_number_bin(void)
-{
- bool has_digits = false;
-
- while (c == '0' || c == '1') {
- has_digits = true;
- obstack_1grow(&symbol_obstack, (char)c);
- next_char();
- }
-
- lexer_token.number.number = sym_make_string(STRING_ENCODING_CHAR);
- lexer_token.kind = T_INTEGER;
-
- if (!has_digits) {
- errorf(&lexer_token.base.source_position, "invalid number literal '%S'", &lexer_token.number.number);
- lexer_token.number.number.begin = "0";
- lexer_token.number.number.size = 1;
- }
-
- parse_number_suffix();
}
/**
return '0' <= chr && chr <= '7';
}
-/**
- * Parses a number and sets the lexer_token.
- */
-static void parse_number(void)
-{
- bool is_float = false;
- bool has_digits = false;
-
- assert(obstack_object_size(&symbol_obstack) == 0);
- if (c == '0') {
- obstack_1grow(&symbol_obstack, (char)c);
- next_char();
- if (c == 'x' || c == 'X') {
- obstack_1grow(&symbol_obstack, (char)c);
- next_char();
- parse_number_hex();
- return;
- } else if (c == 'b' || c == 'B') {
- /* GCC extension: binary constant 0x[bB][01]+. */
- obstack_1grow(&symbol_obstack, (char)c);
- next_char();
- parse_number_bin();
- return;
- }
- has_digits = true;
- }
-
- while (isdigit(c)) {
- has_digits = true;
- obstack_1grow(&symbol_obstack, (char) c);
- next_char();
- }
-
- if (c == '.') {
- is_float = true;
- obstack_1grow(&symbol_obstack, '.');
- next_char();
-
- while (isdigit(c)) {
- has_digits = true;
- obstack_1grow(&symbol_obstack, (char) c);
- next_char();
- }
- }
- if (c == 'e' || c == 'E') {
- is_float = true;
- obstack_1grow(&symbol_obstack, 'e');
- next_char();
- parse_exponent();
- }
-
- lexer_token.number.number = sym_make_string(STRING_ENCODING_CHAR);
-
- if (is_float) {
- lexer_token.kind = T_FLOATINGPOINT;
- } else {
- lexer_token.kind = T_INTEGER;
-
- if (lexer_token.number.number.begin[0] == '0') {
- /* check for invalid octal digits */
- for (size_t i= 0; i < lexer_token.number.number.size; ++i) {
- char t = lexer_token.number.number.begin[i];
- if (t >= '8')
- errorf(&lexer_token.base.source_position, "invalid digit '%c' in octal number", t);
- }
- }
- }
-
- if (!has_digits) {
- errorf(&lexer_token.base.source_position, "invalid number literal '%S'",
- &lexer_token.number.number);
- }
-
- parse_number_suffix();
-}
-
/**
* Returns the value of a digit.
* The only portable way to do it ...
}
obstack_1grow(&symbol_obstack, tc);
} else {
- obstack_grow_symbol(&symbol_obstack, tc);
+ obstack_grow_utf8(&symbol_obstack, tc);
}
break;
}
next_char();
goto end_of_string;
} else {
- obstack_grow_symbol(&symbol_obstack, c);
+ obstack_grow_utf8(&symbol_obstack, c);
next_char();
break;
}
}
end_of_string:
- lexer_token.kind = kind;
- lexer_token.string.string = sym_make_string(enc);
+ lexer_token.kind = kind;
+ lexer_token.literal.string = sym_make_string(enc);
}
/**
static void parse_character_constant(string_encoding_t const enc)
{
parse_string('\'', T_CHARACTER_CONSTANT, enc, "character constant");
- if (lexer_token.string.string.size == 0) {
+ if (lexer_token.literal.string.size == 0) {
errorf(&lexer_token.base.source_position, "empty character constant");
}
}
*/
static void parse_line_directive(void)
{
- if (pp_token.kind != T_INTEGER) {
+ if (pp_token.kind != T_NUMBER) {
parse_error("expected integer");
} else {
/* use offset -1 as this is about the next line */
- lexer_pos.lineno = atoi(pp_token.number.number.begin) - 1;
+ char *end;
+ long const line = strtol(pp_token.literal.string.begin, &end, 0);
+ if (*end == '\0') {
+ lexer_pos.lineno = line - 1;
+ } else {
+ errorf(&lexer_pos, "'%S' is not a valid line number", &pp_token.literal.string);
+ }
next_pp_token();
}
- if (pp_token.kind == T_STRING_LITERAL && pp_token.string.string.encoding == STRING_ENCODING_CHAR) {
- lexer_pos.input_name = pp_token.string.string.begin;
+ if (pp_token.kind == T_STRING_LITERAL && pp_token.literal.string.encoding == STRING_ENCODING_CHAR) {
+ lexer_pos.input_name = pp_token.literal.string.begin;
lexer_pos.is_system_header = false;
next_pp_token();
/* attempt to parse numeric flags as outputted by gcc preprocessor */
- while (pp_token.kind == T_INTEGER) {
+ while (pp_token.kind == T_NUMBER) {
/* flags:
* 1 - indicates start of a new file
* 2 - indicates return from a file
*
* currently we're only interested in "3"
*/
- if (streq(pp_token.number.number.begin, "3")) {
+ if (streq(pp_token.literal.string.begin, "3")) {
lexer_pos.is_system_header = true;
}
next_pp_token();
case T_IDENTIFIER:
parse_preprocessor_identifier();
break;
- case T_INTEGER:
+ case T_NUMBER:
parse_line_directive();
break;
case '\n':
lexer_token.kind = '\n';
return;
- SYMBOL_CHARS {
+ case SYMBOL_CHARS: {
parse_symbol();
/* might be a wide string ( L"string" ) */
string_encoding_t const enc = STRING_ENCODING_WIDE;
return;
}
- DIGITS
- parse_number();
+ case DIGITS:
+ parse_pp_number();
return;
case '"':
case '.':
MAYBE_PROLOG
- DIGITS
+ case DIGITS:
put_back(c);
c = '.';
- parse_number();
+ parse_pp_number();
return;
case '.':
default:
dollar_sign:
- errorf(&lexer_pos, "unknown character '%c' found", c);
+ errorf(&lexer_pos, "unknown character '%lc' found", c);
next_char();
break;
}