preprocessor: keep unknown chars in preproc mode
authorMatthias Braun <matthias.braun@kit.edu>
Fri, 25 May 2012 17:59:03 +0000 (19:59 +0200)
committerChristoph Mallon <christoph.mallon@gmx.de>
Wed, 20 Jun 2012 19:52:08 +0000 (21:52 +0200)
preprocessor.c
token.c
tokens.inc

index 897940d..47275a1 100644 (file)
@@ -90,7 +90,7 @@ static pp_conditional_t *conditional_stack;
 
 static token_t           pp_token;
 static bool              resolve_escape_sequences = false;
-static bool              ignore_unknown_chars     = true;
+static bool              error_on_unknown_chars   = true;
 static bool              skip_mode;
 static FILE             *out;
 static struct obstack    pp_obstack;
@@ -1052,13 +1052,23 @@ restart:
                return;
 
        default:
-               next_char();
-               if (!ignore_unknown_chars) {
+               if (error_on_unknown_chars) {
                        errorf(&pp_token.base.source_position,
                               "unknown character '%lc' found\n", input.c);
+                       next_char();
                        goto restart;
                } else {
-                       pp_token.kind = input.c;
+                       assert(obstack_object_size(&symbol_obstack) == 0);
+                       obstack_grow_utf8(&symbol_obstack, input.c);
+                       obstack_1grow(&symbol_obstack, '\0');
+                       char     *const string = obstack_finish(&symbol_obstack);
+                       symbol_t *const symbol = symbol_table_insert(string);
+                       if (symbol->string != string)
+                               obstack_free(&symbol_obstack, string);
+
+                       pp_token.kind        = T_UNKNOWN_CHAR;
+                       pp_token.base.symbol = symbol;
+                       next_char();
                        return;
                }
        }
@@ -1738,6 +1748,8 @@ int pptest_main(int argc, char **argv)
        obstack_init(&input_obstack);
        strset_init(&stringset);
 
+       error_on_unknown_chars = false;
+
        setup_include_path();
 
        /* simplistic commandline parser */
diff --git a/token.c b/token.c
index 841bbba..e7c7abc 100644 (file)
--- a/token.c
+++ b/token.c
@@ -96,16 +96,7 @@ void print_token_kind(FILE *f, token_kind_t token_kind)
                return;
        }
 
-       const symbol_t *symbol = token_symbols[token_kind];
-       if(symbol != NULL) {
-               fputs(symbol->string, f);
-       } else {
-               if (token_kind < 256) {
-                       fputc(token_kind, f);
-                       return;
-               }
-               fputs("unknown token", f);
-       }
+       fputs(token_symbols[token_kind]->string, f);
 }
 
 char const *get_string_encoding_prefix(string_encoding_t const enc)
@@ -128,7 +119,9 @@ void print_token(FILE *f, const token_t *token)
 {
        switch(token->kind) {
        case T_IDENTIFIER:
-               fprintf(f, "identifier '%s'", token->base.symbol->string);
+       case T_UNKNOWN_CHAR:
+               print_token_kind(f, (token_kind_t)token->kind);
+               fprintf(f, " '%s'", token->base.symbol->string);
                break;
 
        case T_NUMBER:
@@ -159,7 +152,7 @@ print_string:
 
 bool tokens_would_paste(token_kind_t token1, token_kind_t token2)
 {
-       char const c = token2 < 256 ? (char)token2 : token_symbols[token2]->string[0];
+       char const c = token_symbols[token2]->string[0];
 
        switch (token1) {
        case '>': return c == '>' || c == '=';
index 0de24d6..74dc86c 100644 (file)
@@ -14,6 +14,7 @@ TS(NUMBER,             "number constant",)
 TS(CHARACTER_CONSTANT, "character constant",)
 TS(STRING_LITERAL,     "string literal",)
 TS(HEADERNAME,         "headername",)
+TS(UNKNOWN_CHAR,       "unknown character",)
 
 #define S(mode,x)   T(mode,x,#x,)
 S(_ALL, auto)