X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=preprocessor.c;h=3829329242d79061e8041e78d24efe954b0a1659;hb=2fb66fd8bd2a5956ab2cad26978ccfb7e105d45f;hp=f829db422ca4591c471cc4dac8235b9b8af04896;hpb=688a7e1d39d4de8a544a815cd71e8fd5a105328a;p=cparser diff --git a/preprocessor.c b/preprocessor.c index f829db4..3829329 100644 --- a/preprocessor.c +++ b/preprocessor.c @@ -218,17 +218,18 @@ static inline void put_back(utf32 const pc) --input.position.colno; } -#define MATCH_NEWLINE(code) \ - case '\r': \ - next_char(); \ - if (input.c == '\n') { \ - case '\n': \ - next_char(); \ - } \ - info.whitespace = 0; \ - ++input.position.lineno; \ - input.position.colno = 1; \ - code +#define NEWLINE \ + '\r': \ + next_char(); \ + if (input.c == '\n') { \ + case '\n': \ + next_char(); \ + } \ + info.whitespace = 0; \ + ++input.position.lineno; \ + input.position.colno = 1; \ + goto newline; \ + newline // Let it look like an ordinary case label. #define eat(c_type) (assert(input.c == c_type), next_char()) @@ -237,9 +238,8 @@ static void maybe_concat_lines(void) eat('\\'); switch (input.c) { - MATCH_NEWLINE( + case NEWLINE: return; - ) default: break; @@ -451,165 +451,81 @@ static const char *identify_string(char *string) return result; } -static string_t make_string(char *string, size_t len) +static string_t sym_make_string(string_encoding_t const enc) { - const char *result = identify_string(string); - return (string_t) {result, len}; + obstack_1grow(&symbol_obstack, '\0'); + size_t const len = obstack_object_size(&symbol_obstack) - 1; + char *const string = obstack_finish(&symbol_obstack); + char const *const result = identify_string(string); + return (string_t){ result, len, enc }; } -static void parse_string_literal(void) +static void parse_string(utf32 const delimiter, preprocessor_token_kind_t const kind, string_encoding_t const enc, char const *const context) { const unsigned start_linenr = input.position.lineno; - eat('"'); + eat(delimiter); while (true) { switch (input.c) { case '\\': { - utf32 tc; if (resolve_escape_sequences) { - tc = parse_escape_sequence(); - obstack_1grow(&symbol_obstack, (char) tc); + utf32 const tc = parse_escape_sequence(); + if (enc == STRING_ENCODING_CHAR) { + if (tc >= 0x100) { + warningf(WARN_OTHER, &pp_token.base.source_position, "escape sequence out of range"); + } + obstack_1grow(&symbol_obstack, tc); + } else { + obstack_grow_symbol(&symbol_obstack, tc); + } } else { - obstack_1grow(&symbol_obstack, (char) input.c); + obstack_1grow(&symbol_obstack, (char)input.c); next_char(); - obstack_1grow(&symbol_obstack, (char) input.c); + obstack_1grow(&symbol_obstack, (char)input.c); next_char(); } break; } + case NEWLINE: + errorf(&pp_token.base.source_position, "newline while parsing %s", context); + break; + case EOF: { source_position_t source_position; source_position.input_name = pp_token.base.source_position.input_name; source_position.lineno = start_linenr; - errorf(&source_position, "string has no end"); + errorf(&source_position, "EOF while parsing %s", context); goto end_of_string; } - case '"': - next_char(); - goto end_of_string; - default: - obstack_grow_symbol(&symbol_obstack, input.c); - next_char(); - break; + if (input.c == delimiter) { + next_char(); + goto end_of_string; + } else { + obstack_grow_symbol(&symbol_obstack, input.c); + next_char(); + break; + } } } end_of_string: - /* add finishing 0 to the string */ - obstack_1grow(&symbol_obstack, '\0'); - const size_t size = (size_t)obstack_object_size(&symbol_obstack); - char *const string = obstack_finish(&symbol_obstack); - - pp_token.kind = TP_STRING_LITERAL; - pp_token.string.string = make_string(string, size); + pp_token.kind = kind; + pp_token.string.string = sym_make_string(enc); } -/** - * Parse a wide string literal and set lexer_token. - */ -static void parse_wide_string_literal(void) +static void parse_string_literal(string_encoding_t const enc) { - parse_string_literal(); - if (pp_token.kind == TP_STRING_LITERAL) - pp_token.kind = TP_WIDE_STRING_LITERAL; + parse_string('"', TP_STRING_LITERAL, enc, "string literal"); } -static void parse_wide_character_constant(void) +static void parse_character_constant(string_encoding_t const enc) { - eat('\''); - - while (true) { - switch (input.c) { - case '\\': { - const utf32 tc = parse_escape_sequence(); - obstack_grow_symbol(&symbol_obstack, tc); - break; - } - - MATCH_NEWLINE( - parse_error("newline while parsing character constant"); - break; - ) - - case '\'': - next_char(); - goto end_of_wide_char_constant; - - case EOF: - parse_error("EOF while parsing character constant"); - goto end_of_wide_char_constant; - - default: - obstack_grow_symbol(&symbol_obstack, input.c); - next_char(); - break; - } - } - -end_of_wide_char_constant: - obstack_1grow(&symbol_obstack, '\0'); - size_t size = (size_t) obstack_object_size(&symbol_obstack)-1; - char *string = obstack_finish(&symbol_obstack); - pp_token.kind = TP_WIDE_CHARACTER_CONSTANT; - pp_token.string.string = make_string(string, size); - - if (size == 0) { - parse_error("empty character constant"); - } -} - -static void parse_character_constant(void) -{ - const unsigned start_linenr = input.position.lineno; - - eat('\''); - - int tc; - while (true) { - switch (input.c) { - case '\\': - tc = parse_escape_sequence(); - obstack_1grow(&symbol_obstack, (char) tc); - break; - - MATCH_NEWLINE( - parse_error("newline while parsing character constant"); - break; - ) - - case EOF: { - source_position_t source_position; - source_position.input_name = pp_token.base.source_position.input_name; - source_position.lineno = start_linenr; - errorf(&source_position, "EOF while parsing character constant"); - goto end_of_char_constant; - } - - case '\'': - next_char(); - goto end_of_char_constant; - - default: - obstack_1grow(&symbol_obstack, (char) input.c); - next_char(); - break; - - } - } - -end_of_char_constant:; - obstack_1grow(&symbol_obstack, '\0'); - const size_t size = (size_t)obstack_object_size(&symbol_obstack); - char *const string = obstack_finish(&symbol_obstack); - - pp_token.kind = TP_CHARACTER_CONSTANT; - pp_token.string.string = make_string(string, size); - - if (size == 0) { + parse_string('\'', TP_CHARACTER_CONSTANT, enc, "character constant"); + if (pp_token.string.string.size == 0) { parse_error("empty character constant"); } } @@ -770,9 +686,8 @@ static void skip_multiline_comment(void) } break; - MATCH_NEWLINE( + case NEWLINE: break; - ) case EOF: { source_position_t source_position; @@ -798,10 +713,9 @@ static void skip_whitespace(void) next_char(); continue; - MATCH_NEWLINE( + case NEWLINE: info.at_line_begin = true; return; - ) case '/': next_char(); @@ -856,11 +770,11 @@ end_symbol: /* might be a wide string or character constant ( L"string"/L'c' ) */ if (input.c == '"' && string[0] == 'L' && string[1] == '\0') { obstack_free(&symbol_obstack, string); - parse_wide_string_literal(); + parse_string_literal(STRING_ENCODING_WIDE); return; } else if (input.c == '\'' && string[0] == 'L' && string[1] == '\0') { obstack_free(&symbol_obstack, string); - parse_wide_character_constant(); + parse_character_constant(STRING_ENCODING_WIDE); return; } @@ -908,12 +822,8 @@ static void parse_number(void) } end_number: - obstack_1grow(&symbol_obstack, '\0'); - size_t size = obstack_object_size(&symbol_obstack); - char *string = obstack_finish(&symbol_obstack); - pp_token.kind = TP_NUMBER; - pp_token.number.number = make_string(string, size); + pp_token.number.number = sym_make_string(STRING_ENCODING_CHAR); } @@ -961,11 +871,10 @@ restart: next_char(); goto restart; - MATCH_NEWLINE( + case NEWLINE: info.at_line_begin = true; info.had_whitespace = true; goto restart; - ) SYMBOL_CHARS parse_symbol(); @@ -976,11 +885,11 @@ restart: return; case '"': - parse_string_literal(); + parse_string_literal(STRING_ENCODING_CHAR); return; case '\'': - parse_character_constant(); + parse_character_constant(STRING_ENCODING_CHAR); return; case '.': @@ -1226,16 +1135,16 @@ static void emit_pp_token(void) case TP_NUMBER: fputs(pp_token.number.number.begin, out); break; - case TP_WIDE_STRING_LITERAL: - fputc('L', out); + case TP_STRING_LITERAL: + fputs(get_string_encoding_prefix(pp_token.string.string.encoding), out); fputc('"', out); fputs(pp_token.string.string.begin, out); fputc('"', out); break; - case TP_WIDE_CHARACTER_CONSTANT: - fputc('L', out); + case TP_CHARACTER_CONSTANT: + fputs(get_string_encoding_prefix(pp_token.string.string.encoding), out); fputc('\'', out); fputs(pp_token.string.string.begin, out); fputc('\'', out); @@ -1434,7 +1343,7 @@ static void parse_undef_directive(void) static void parse_headername(void) { const source_position_t start_position = input.position; - string_t string = {NULL, 0}; + string_t string = { NULL, 0, STRING_ENCODING_CHAR }; assert(obstack_object_size(&symbol_obstack) == 0); /* behind an #include we can have the special headername lexems. @@ -1448,43 +1357,32 @@ static void parse_headername(void) /* check wether we have a "... or <... headername */ switch (input.c) { - case '<': + { + utf32 delimiter; + case '<': delimiter = '>'; goto parse_name; + case '"': delimiter = '"'; goto parse_name; +parse_name: next_char(); while (true) { switch (input.c) { + case NEWLINE: case EOF: - /* fallthrough */ - MATCH_NEWLINE( - parse_error("header name without closing '>'"); + errorf(&pp_token.base.source_position, "header name without closing '%c'", (char)delimiter); goto finish_error; - ) - case '>': - next_char(); - goto finished_headername; - } - obstack_1grow(&symbol_obstack, (char) input.c); - next_char(); - } - /* we should never be here */ - case '"': - next_char(); - while (true) { - switch (input.c) { - case EOF: - /* fallthrough */ - MATCH_NEWLINE( - parse_error("header name without closing '>'"); - goto finish_error; - ) - case '"': - next_char(); - goto finished_headername; + default: + if (input.c == delimiter) { + next_char(); + goto finished_headername; + } else { + obstack_1grow(&symbol_obstack, (char)input.c); + next_char(); + } + break; } - obstack_1grow(&symbol_obstack, (char) input.c); - next_char(); } /* we should never be here */ + } default: /* TODO: do normal pp_token parsing and concatenate results */ @@ -1492,10 +1390,7 @@ static void parse_headername(void) } finished_headername: - obstack_1grow(&symbol_obstack, '\0'); - const size_t size = (size_t)obstack_object_size(&symbol_obstack); - char *const headername = obstack_finish(&symbol_obstack); - string = make_string(headername, size); + string = sym_make_string(STRING_ENCODING_CHAR); finish_error: pp_token.base.source_position = start_position; @@ -1557,9 +1452,7 @@ static void skip_till_newline(void) /* skip till newline */ while (true) { switch (input.c) { - MATCH_NEWLINE( - return; - ) + case NEWLINE: case EOF: return; }