type: Add missing space for printing complex types.
[cparser] / token.c
diff --git a/token.c b/token.c
index c9b0de8..3d69874 100644 (file)
--- a/token.c
+++ b/token.c
 
 #include <assert.h>
 #include <stdio.h>
+#include <stdbool.h>
 
 #include "symbol.h"
 #include "lang_features.h"
 #include "adt/array.h"
+#include "adt/error.h"
 #include "adt/util.h"
 
-static symbol_t *token_symbols[T_LAST_TOKEN];
-static symbol_t *pp_token_symbols[TP_LAST_TOKEN];
+symbol_t *token_symbols[T_LAST_TOKEN];
 
-const source_position_t builtin_source_position = { "<built-in>", 0, 0, true };
+const position_t builtin_position = { "<built-in>", 0, 0, true };
 
 static token_kind_t last_id;
 
@@ -46,15 +47,6 @@ static symbol_t *intern_register_token(token_kind_t id, const char *string)
        return symbol;
 }
 
-static symbol_t *intern_register_pp_token(preprocessor_token_kind_t id, const char *string)
-{
-       assert(id < TP_LAST_TOKEN);
-       symbol_t *symbol = symbol_table_insert(string);
-       if (pp_token_symbols[id] == NULL)
-               pp_token_symbols[id] = symbol;
-       return symbol;
-}
-
 static void register_token(unsigned mode, token_kind_t id, const char *string)
 {
        if (id > 255) {
@@ -67,31 +59,28 @@ static void register_token(unsigned mode, token_kind_t id, const char *string)
        }
 }
 
-static void register_pp_token(unsigned mode, preprocessor_token_kind_t id,
-                              const char *string)
+static void register_pp_token(pp_token_kind_t const id, char const *const string)
 {
-       if (! (c_mode & mode))
-               return;
-
-       symbol_t *symbol = intern_register_pp_token(id, string);
+       assert(id < TP_LAST_TOKEN);
+       symbol_t *const symbol = symbol_table_insert(string);
        symbol->pp_ID = id;
 }
 
 void init_tokens(void)
 {
+       static bool tokens_initialized = false;
+       if (tokens_initialized)
+               return;
+       tokens_initialized = true;
+
        memset(token_symbols, 0, T_LAST_TOKEN * sizeof(token_symbols[0]));
-       memset(pp_token_symbols, 0, TP_LAST_TOKEN * sizeof(pp_token_symbols[0]));
 
-#define T(mode,x,str,val)  register_token(mode, T_##x, str);
-#define TS(x,str,val)      intern_register_token(T_##x, str);
+#define T(mode,x,str,val)  register_token(mode, x, str);
 #include "tokens.inc"
-#undef TS
 #undef T
 
-#define T(mode,x,str,val)  register_pp_token(mode, TP_##x, str);
-#define TS(x,str,val)      intern_register_pp_token(TP_##x, str);
+#define T(token) register_pp_token(TP_##token, #token);
 #include "tokens_preprocessor.inc"
-#undef TS
 #undef T
 }
 
@@ -101,122 +90,60 @@ void exit_tokens(void)
 
 void print_token_kind(FILE *f, token_kind_t token_kind)
 {
-       if(token_kind == T_EOF) {
-               fputs("end of file", f);
-               return;
-       }
-
        if (token_kind >= lengthof(token_symbols)) {
                fputs("invalid token", f);
                return;
        }
 
-       const symbol_t *symbol = token_symbols[token_kind];
-       if(symbol != NULL) {
-               fputs(symbol->string, f);
-       } else {
-               if (token_kind < 256) {
-                       fputc(token_kind, f);
-                       return;
-               }
-               fputs("unknown token", f);
-       }
+       fputs(token_symbols[token_kind]->string, f);
 }
 
-static void print_stringrep(const string_t *string, FILE *f)
+char const *get_string_encoding_prefix(string_encoding_t const enc)
 {
-       for (size_t i = 0; i < string->size; ++i) {
-               fputc(string->begin[i], f);
+       switch (enc) {
+       case STRING_ENCODING_CHAR:   return "";
+       case STRING_ENCODING_CHAR16: return "u";
+       case STRING_ENCODING_CHAR32: return "U";
+       case STRING_ENCODING_UTF8:   return "u8";
+       case STRING_ENCODING_WIDE:   return "L";
        }
+       panic("invalid string encoding");
 }
 
 void print_token(FILE *f, const token_t *token)
 {
-       switch(token->kind) {
+       char        delim = '\'';
+       char const *enc   = "";
+       char const *val;
+       switch (token->kind) {
        case T_IDENTIFIER:
-               fprintf(f, "identifier '%s'", token->base.symbol->string);
-               break;
-       case T_INTEGER:
-       case T_FLOATINGPOINT:
-               print_token_kind(f, (token_kind_t)token->kind);
-               fputs(" '", f);
-               print_stringrep(&token->number.number, f);
-               if (token->number.suffix.size > 0)
-                       print_stringrep(&token->number.suffix, f);
-               fputc('\'', f);
+       case T_MACRO_PARAMETER:
+       case T_UNKNOWN_CHAR:
+               val = token->base.symbol->string;
                break;
-       case T_WIDE_STRING_LITERAL:
+
        case T_STRING_LITERAL:
-               print_token_kind(f, (token_kind_t)token->kind);
-               fprintf(f, " \"%s\"", token->string.string.begin);
-               break;
+               delim = '"';
+               /* FALLTHROUGH */
        case T_CHARACTER_CONSTANT:
-       case T_WIDE_CHARACTER_CONSTANT:
-               print_token_kind(f, (token_kind_t)token->kind);
-               fputs(" \'", f);
-               print_stringrep(&token->string.string, f);
-               fputs("'", f);
-               break;
-
-       default:
-               if (token->base.symbol) {
-                       fprintf(f, "'%s'", token->base.symbol->string);
-               } else {
-                       fputc('\'', f);
-                       print_token_kind(f, (token_kind_t)token->kind);
-                       fputc('\'', f);
-               }
+               enc = get_string_encoding_prefix(token->literal.string.encoding);
+               /* FALLTHROUGH */
+       case T_NUMBER:
+               val = token->literal.string.begin;
                break;
-       }
-}
 
-void print_pp_token_kind(FILE *f, int token_kind)
-{
-       if (token_kind == TP_EOF) {
-               fputs("end of file", f);
+       default: {
+               char const *kind  = (token->base.symbol ? token->base.symbol : token_symbols[token->kind])->string;
+               fprintf(f, "'%s'", kind);
                return;
        }
-
-       int token_symbols_len = TP_LAST_TOKEN;
-       if (token_kind < 0 || token_kind >= token_symbols_len) {
-               fputs("invalid token", f);
-               return;
-       }
-
-       const symbol_t *symbol = pp_token_symbols[token_kind];
-       if (symbol != NULL) {
-               fputs(symbol->string, f);
-       } else {
-               if(token_kind >= 0 && token_kind < 256) {
-                       fputc(token_kind, f);
-                       return;
-               }
-               fputs("unknown token", f);
        }
+       fprintf(f, "%s %s%c%s%c", token_symbols[token->kind]->string, enc, delim, val, delim);
 }
 
-void print_pp_token(FILE *f, const token_t *token)
+bool tokens_would_paste(token_kind_t token1, token_kind_t token2)
 {
-       switch((preprocessor_token_kind_t) token->kind) {
-       case TP_IDENTIFIER:
-               fprintf(f, "identifier '%s'", token->base.symbol->string);
-               break;
-       case TP_NUMBER:
-               fprintf(f, "number '%s'", token->number.number.begin);
-               break;
-       case TP_STRING_LITERAL:
-               fprintf(f, "string \"%s\"", token->string.string.begin);
-               break;
-       default:
-               print_pp_token_kind(f, (preprocessor_token_kind_t) token->kind);
-               break;
-       }
-}
-
-bool tokens_would_paste(preprocessor_token_kind_t token1,
-                        preprocessor_token_kind_t token2)
-{
-       char c = token2 < 256 ? (char) token2 : pp_token_symbols[token2]->string[0];
+       char const c = token_symbols[token2]->string[0];
 
        switch (token1) {
        case '>': return c == '>' || c == '=';
@@ -229,21 +156,23 @@ bool tokens_would_paste(preprocessor_token_kind_t token1,
        case '|': return c == '|' || c == '=';
        case ':': return c == ':' || c == '>';
        case '*': return c == '*' || c == '=';
-       case '.': return c == '.' || c == '%' || token2 == TP_NUMBER;
+       case '.': return c == '.' || c == '%' || token2 == T_NUMBER;
        case '#': return c == '#' || c == '%';
-       case TP_GREATERGREATER: return c == '=';
-       case TP_LESSLESS:       return c == '=';
-       case '^':               return c == '=';
-       case '!':               return c == '=';
-       case TP_IDENTIFIER:
-               return token2 == TP_IDENTIFIER || token2 == TP_NUMBER ||
-                      token2 == TP_CHARACTER_CONSTANT ||
-                      token2 == TP_WIDE_CHARACTER_CONSTANT ||
-                      token2 == TP_WIDE_STRING_LITERAL ||
-                      token2 == TP_STRING_LITERAL; /* L */
-       case TP_NUMBER:
-               return token2 == TP_NUMBER || token2 == TP_IDENTIFIER ||
+       case T_GREATERGREATER: return c == '=';
+       case T_LESSLESS:       return c == '=';
+       case '^':              return c == '=';
+       case '!':              return c == '=';
+
+       case T_IDENTIFIER:
+               return token2 == T_CHARACTER_CONSTANT ||
+                      token2 == T_IDENTIFIER         ||
+                      token2 == T_NUMBER             ||
+                      token2 == T_STRING_LITERAL; /* L */
+
+       case T_NUMBER:
+               return token2 == T_IDENTIFIER || token2 == T_NUMBER ||
                       token2 == '.' || token2 == '+' || token2 == '-';
+
        default:
                return false;
        }