X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=lexer.c;h=2d1df82296a1afb8894bdb50ae380da5188a0f76;hb=b63886b188eeedf36d8df12df3fe1f5fd90818d4;hp=faecfe36fd75f32a12ceca0c883bdb597f1e97b1;hpb=506423738bdd438ce4609b37e9b3896189e8c483;p=cparser diff --git a/lexer.c b/lexer.c index faecfe3..2d1df82 100644 --- a/lexer.c +++ b/lexer.c @@ -56,6 +56,7 @@ static char buf[1024 + MAX_PUTBACK]; static const char *bufend; static const char *bufpos; static strset_t stringset; +bool allow_dollar_in_symbol = true; /** * Prints a parse error message at the current token. @@ -64,7 +65,7 @@ static strset_t stringset; */ static void parse_error(const char *msg) { - errorf(&lexer_token.source_position, "%s", msg); + errorf(&lexer_token.source_position, "%s", msg); } /** @@ -74,13 +75,18 @@ static void parse_error(const char *msg) */ static NORETURN internal_error(const char *msg) { - internal_errorf(&lexer_token.source_position, "%s", msg); + internal_errorf(&lexer_token.source_position, "%s", msg); } static inline void next_real_char(void) { assert(bufpos <= bufend); if (bufpos >= bufend) { + if (input == NULL) { + c = EOF; + return; + } + size_t s = fread(buf + MAX_PUTBACK, 1, sizeof(buf) - MAX_PUTBACK, input); if(s == 0) { @@ -189,6 +195,7 @@ end_of_next_char:; } #define SYMBOL_CHARS \ + case '$': if (!allow_dollar_in_symbol) goto dollar_sign; \ case 'a': \ case 'b': \ case 'c': \ @@ -276,6 +283,7 @@ static void parse_symbol(void) break; default: +dollar_sign: goto end_symbol; } } @@ -296,60 +304,78 @@ end_symbol: static void parse_integer_suffix(bool is_oct_hex) { - bool is_unsigned = false; - bool min_long = false; - bool min_longlong = false; - - if(c == 'U' || c == 'u') { - is_unsigned = true; + bool is_unsigned = false; + bool min_long = false; + bool min_longlong = false; + bool not_traditional = false; + int pos = 0; + char suffix[4]; + + if (c == 'U' || c == 'u') { + not_traditional = true; + suffix[pos++] = toupper(c); + is_unsigned = true; next_char(); - if(c == 'L' || c == 'l') { + if (c == 'L' || c == 'l') { + suffix[pos++] = toupper(c); min_long = true; next_char(); - if(c == 'L' || c == 'l') { + if (c == 'L' || c == 'l') { + suffix[pos++] = toupper(c); min_longlong = true; next_char(); } } - } else if(c == 'l' || c == 'L') { + } else if (c == 'l' || c == 'L') { + suffix[pos++] = toupper(c); min_long = true; next_char(); - if(c == 'l' || c == 'L') { - min_longlong = true; + if (c == 'l' || c == 'L') { + not_traditional = true; + suffix[pos++] = toupper(c); + min_longlong = true; next_char(); - if(c == 'u' || c == 'U') { - is_unsigned = true; + if (c == 'u' || c == 'U') { + suffix[pos++] = toupper(c); + is_unsigned = true; next_char(); } - } else if(c == 'u' || c == 'U') { - is_unsigned = true; + } else if (c == 'u' || c == 'U') { + not_traditional = true; + suffix[pos++] = toupper(c); + is_unsigned = true; next_char(); lexer_token.datatype = type_unsigned_long; } } - if(!is_unsigned) { + if (warning.traditional && not_traditional) { + suffix[pos] = '\0'; + warningf(&lexer_token.source_position, + "traditional C rejects the '%s' suffix", suffix); + } + if (!is_unsigned) { long long v = lexer_token.v.intvalue; - if(!min_long) { - if(v >= TARGET_INT_MIN && v <= TARGET_INT_MAX) { + if (!min_long) { + if (v >= TARGET_INT_MIN && v <= TARGET_INT_MAX) { lexer_token.datatype = type_int; return; - } else if(is_oct_hex && v >= 0 && v <= TARGET_UINT_MAX) { + } else if (is_oct_hex && v >= 0 && v <= TARGET_UINT_MAX) { lexer_token.datatype = type_unsigned_int; return; } } - if(!min_longlong) { - if(v >= TARGET_LONG_MIN && v <= TARGET_LONG_MAX) { + if (!min_longlong) { + if (v >= TARGET_LONG_MIN && v <= TARGET_LONG_MAX) { lexer_token.datatype = type_long; return; - } else if(is_oct_hex && v >= 0 && (unsigned long long)v <= (unsigned long long)TARGET_ULONG_MAX) { + } else if (is_oct_hex && v >= 0 && (unsigned long long)v <= (unsigned long long)TARGET_ULONG_MAX) { lexer_token.datatype = type_unsigned_long; return; } } unsigned long long uv = (unsigned long long) v; - if(is_oct_hex && uv > (unsigned long long) TARGET_LONGLONG_MAX) { + if (is_oct_hex && uv > (unsigned long long) TARGET_LONGLONG_MAX) { lexer_token.datatype = type_unsigned_long_long; return; } @@ -357,11 +383,11 @@ static void parse_integer_suffix(bool is_oct_hex) lexer_token.datatype = type_long_long; } else { unsigned long long v = (unsigned long long) lexer_token.v.intvalue; - if(!min_long && v <= TARGET_UINT_MAX) { + if (!min_long && v <= TARGET_UINT_MAX) { lexer_token.datatype = type_unsigned_int; return; } - if(!min_longlong && v <= TARGET_ULONG_MAX) { + if (!min_longlong && v <= TARGET_ULONG_MAX) { lexer_token.datatype = type_unsigned_long; return; } @@ -375,11 +401,19 @@ static void parse_floating_suffix(void) /* TODO: do something useful with the suffixes... */ case 'f': case 'F': + if (warning.traditional) { + warningf(&lexer_token.source_position, + "traditional C rejects the 'F' suffix"); + } next_char(); lexer_token.datatype = type_float; break; case 'l': case 'L': + if (warning.traditional) { + warningf(&lexer_token.source_position, + "traditional C rejects the 'F' suffix"); + } next_char(); lexer_token.datatype = type_long_double; break; @@ -479,34 +513,74 @@ end: */ static void parse_number_hex(void) { + bool is_float = false; assert(c == 'x' || c == 'X'); next_char(); + obstack_1grow(&symbol_obstack, '0'); + obstack_1grow(&symbol_obstack, 'x'); + while(isxdigit(c)) { obstack_1grow(&symbol_obstack, (char) c); next_char(); } - obstack_1grow(&symbol_obstack, '\0'); - char *string = obstack_finish(&symbol_obstack); - if(c == '.' || c == 'p' || c == 'P') { + if (c == '.') { + obstack_1grow(&symbol_obstack, (char) c); + next_char(); + + while (isxdigit(c)) { + obstack_1grow(&symbol_obstack, (char) c); + next_char(); + } + is_float = true; + } + if (c == 'p' || c == 'P') { + obstack_1grow(&symbol_obstack, (char) c); next_char(); - internal_error("Hex floating point numbers not implemented yet"); + + if (c == '-' || c == '+') { + obstack_1grow(&symbol_obstack, (char) c); + next_char(); + } + + while (isxdigit(c)) { + obstack_1grow(&symbol_obstack, (char) c); + next_char(); + } + is_float = true; } + + obstack_1grow(&symbol_obstack, '\0'); + char *string = obstack_finish(&symbol_obstack); if(*string == '\0') { parse_error("invalid hex number"); lexer_token.type = T_ERROR; + obstack_free(&symbol_obstack, string); + return; } - const char *endptr; - lexer_token.type = T_INTEGER; - lexer_token.v.intvalue = parse_int_string(string, &endptr, 16); - if(*endptr != '\0') { - parse_error("hex number literal too long"); + if (is_float) { + char *endptr; + lexer_token.type = T_FLOATINGPOINT; + lexer_token.v.floatvalue = strtold(string, &endptr); + + if(*endptr != '\0') { + parse_error("invalid hex float literal"); + } + + parse_floating_suffix(); + } else { + const char *endptr; + lexer_token.type = T_INTEGER; + lexer_token.v.intvalue = parse_int_string(string + 2, &endptr, 16); + if(*endptr != '\0') { + parse_error("hex number literal too long"); + } + parse_integer_suffix(true); } obstack_free(&symbol_obstack, string); - parse_integer_suffix(true); } /** @@ -561,23 +635,23 @@ static void parse_number_oct(void) static void parse_number_dec(void) { bool is_float = false; - while(isdigit(c)) { + while (isdigit(c)) { obstack_1grow(&symbol_obstack, (char) c); next_char(); } - if(c == '.') { + if (c == '.') { obstack_1grow(&symbol_obstack, '.'); next_char(); - while(isdigit(c)) { + while (isdigit(c)) { obstack_1grow(&symbol_obstack, (char) c); next_char(); } is_float = true; } if(c == 'e' || c == 'E') { - obstack_1grow(&symbol_obstack, 'e'); + obstack_1grow(&symbol_obstack, (char) c); next_char(); if(c == '-' || c == '+') { @@ -744,7 +818,7 @@ static int parse_escape_sequence(void) int ec = c; next_char(); - switch(ec) { + switch (ec) { case '"': return '"'; case '\'': return '\''; case '\\': return '\\'; @@ -770,6 +844,10 @@ static int parse_escape_sequence(void) case EOF: parse_error("reached end of file while parsing escape sequence"); return EOF; + case 'e': + if (c_mode & _GNUC) + return 27; /* hopefully 27 is ALWAYS the code for ESACAPE */ + /*fallthrough*/ default: parse_error("unknown escape sequence"); return EOF; @@ -788,6 +866,10 @@ string_t concat_strings(const string_t *const s1, const string_t *const s2) memcpy(concat, s1->begin, len1); memcpy(concat + len1, s2->begin, len2 + 1); + if (warning.traditional) { + warningf(&lexer_token.source_position, + "traditional C rejects string constant concatenation"); + } #if 0 /* TODO hash */ const char *result = strset_insert(&stringset, concat); if(result != concat) { @@ -814,6 +896,10 @@ wide_string_t concat_string_wide_string(const string_t *const s1, const wide_str concat[i] = src[i]; } memcpy(concat + len1, s2->begin, (len2 + 1) * sizeof(*concat)); + if (warning.traditional) { + warningf(&lexer_token.source_position, + "traditional C rejects string constant concatenation"); + } return (wide_string_t){ concat, len1 + len2 + 1 }; } @@ -829,6 +915,10 @@ wide_string_t concat_wide_strings(const wide_string_t *const s1, const wide_stri wchar_rep_t *const concat = obstack_alloc(&symbol_obstack, (len1 + len2 + 1) * sizeof(*concat)); memcpy(concat, s1->begin, len1 * sizeof(*concat)); memcpy(concat + len1, s2->begin, (len2 + 1) * sizeof(*concat)); + if (warning.traditional) { + warningf(&lexer_token.source_position, + "traditional C rejects string constant concatenation"); + } return (wide_string_t){ concat, len1 + len2 + 1 }; } @@ -847,6 +937,10 @@ wide_string_t concat_wide_string_string(const wide_string_t *const s1, const str for (size_t i = 0; i != len2 + 1; ++i) { concat[i] = src[i]; } + if (warning.traditional) { + warningf(&lexer_token.source_position, + "traditional C rejects string constant concatenation"); + } return (wide_string_t){ concat, len1 + len2 + 1 }; } @@ -1083,7 +1177,7 @@ end_of_char_constant:; lexer_token.type = T_CHARACTER_CONSTANT; lexer_token.v.string.begin = string; lexer_token.v.string.size = size; - lexer_token.datatype = type_int; + lexer_token.datatype = c_mode & _CXX && size == 1 ? type_char : type_int; } /** @@ -1098,7 +1192,10 @@ static void skip_multiline_comment(void) case '/': next_char(); if (c == '*') { - /* TODO: nested comment, warn here */ + /* nested comment, warn here */ + if (warning.comment) { + warningf(&lexer_token.source_position, "'/*' within comment"); + } } break; case '*': @@ -1140,6 +1237,15 @@ static void skip_line_comment(void) case '\r': return; + case '\\': + next_char(); + if (c == '\n' || c == '\r') { + if (warning.comment) + warningf(&lexer_token.source_position, "multi-line comment"); + return; + } + break; + default: next_char(); break; @@ -1222,7 +1328,7 @@ static void parse_line_directive(void) /** * STDC pragmas. */ -typedef enum { +typedef enum stdc_pragma_kind_t { STDC_UNKNOWN, STDC_FP_CONTRACT, STDC_FENV_ACCESS, @@ -1232,7 +1338,7 @@ typedef enum { /** * STDC pragma values. */ -typedef enum { +typedef enum stdc_pragma_value_kind_t { STDC_VALUE_UNKNOWN, STDC_VALUE_ON, STDC_VALUE_OFF, @@ -1428,16 +1534,7 @@ void lexer_next_preprocessing_token(void) case '.': MAYBE_PROLOG - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': + DIGITS put_back(c); c = '.'; parse_number_dec(); @@ -1568,8 +1665,9 @@ void lexer_next_preprocessing_token(void) return; default: +dollar_sign: + errorf(&lexer_token.source_position, "unknown character '%c' found", c); next_char(); - errorf(&lexer_token.source_position, "unknown character '%c' found\n", c); lexer_token.type = T_ERROR; return; } @@ -1579,15 +1677,13 @@ void lexer_next_preprocessing_token(void) void lexer_next_token(void) { lexer_next_preprocessing_token(); - if(lexer_token.type != '\n') - return; + while (lexer_token.type == '\n') { newline_found: - do { lexer_next_preprocessing_token(); - } while(lexer_token.type == '\n'); + } - if(lexer_token.type == '#') { + if (lexer_token.type == '#') { parse_preprocessor_directive(); goto newline_found; } @@ -1596,6 +1692,7 @@ newline_found: void init_lexer(void) { strset_init(&stringset); + symbol_L = symbol_table_insert("L"); } void lexer_open_stream(FILE *stream, const char *input_name) @@ -1604,7 +1701,6 @@ void lexer_open_stream(FILE *stream, const char *input_name) lexer_token.source_position.linenr = 0; lexer_token.source_position.input_name = input_name; - symbol_L = symbol_table_insert("L"); bufpos = NULL; bufend = NULL; @@ -1613,6 +1709,20 @@ void lexer_open_stream(FILE *stream, const char *input_name) c = '\n'; } +void lexer_open_buffer(const char *buffer, size_t len, const char *input_name) +{ + input = NULL; + lexer_token.source_position.linenr = 0; + lexer_token.source_position.input_name = input_name; + + bufpos = buffer; + bufend = buffer + len; + + /* place a virtual \n at the beginning so the lexer knows that we're + * at the beginning of a line */ + c = '\n'; +} + void exit_lexer(void) { strset_destroy(&stringset);