#include "symbol.h"
#include "lang_features.h"
#include "adt/array.h"
+#include "adt/error.h"
+#include "adt/util.h"
-static symbol_t *token_symbols[T_LAST_TOKEN];
-static symbol_t *pp_token_symbols[TP_LAST_TOKEN];
+symbol_t *token_symbols[T_LAST_TOKEN];
-const source_position_t builtin_source_position = { "<built-in>", 0, 0 };
+const source_position_t builtin_source_position = { "<built-in>", 0, 0, true };
-static int last_id;
+static token_kind_t last_id;
-static symbol_t *intern_register_token(token_type_t id, const char *string)
+static symbol_t *intern_register_token(token_kind_t id, const char *string)
{
- assert(0 <= id && id < T_LAST_TOKEN);
+ assert(id < T_LAST_TOKEN);
symbol_t *symbol = symbol_table_insert(string);
if (token_symbols[id] == NULL)
token_symbols[id] = symbol;
return symbol;
}
-static symbol_t *intern_register_pp_token(preprocessor_token_type_t id, const char *string)
-{
- assert(0 <= id && id < TP_LAST_TOKEN);
- symbol_t *symbol = symbol_table_insert(string);
- if (pp_token_symbols[id] == NULL)
- pp_token_symbols[id] = symbol;
- return symbol;
-}
-
-static void register_token(unsigned mode, token_type_t id, const char *string)
+static void register_token(unsigned mode, token_kind_t id, const char *string)
{
if (id > 255) {
assert(id >= last_id);
}
}
-static void register_pp_token(unsigned mode, token_type_t id,
- const char *string)
+static void register_pp_token(pp_token_kind_t const id, char const *const string)
{
- if (! (c_mode & mode))
- return;
-
- symbol_t *symbol = intern_register_pp_token(id, string);
+ assert(id < TP_LAST_TOKEN);
+ symbol_t *const symbol = symbol_table_insert(string);
symbol->pp_ID = id;
}
void init_tokens(void)
{
memset(token_symbols, 0, T_LAST_TOKEN * sizeof(token_symbols[0]));
- memset(pp_token_symbols, 0, TP_LAST_TOKEN * sizeof(pp_token_symbols[0]));
-
- last_id = -2;
-#define T(mode,x,str,val) register_token(mode, T_##x, str);
-#define TS(x,str,val) intern_register_token(T_##x, str);
+#define T(mode,x,str,val) register_token(mode, x, str);
#include "tokens.inc"
-#undef TS
#undef T
-#define T(mode,x,str,val) register_pp_token(mode, TP_##x, str);
-#define TS(x,str,val) intern_register_pp_token(TP_##x, str);
+#define T(token) register_pp_token(TP_##token, #token);
#include "tokens_preprocessor.inc"
-#undef TS
#undef T
}
{
}
-void print_token_type(FILE *f, token_type_t token_type)
+void print_token_kind(FILE *f, token_kind_t token_kind)
{
- if(token_type == T_EOF) {
- fputs("end of file", f);
- return;
- }
- if(token_type == T_ERROR) {
- fputs("error", f);
- return;
- }
-
- int token_symbols_len = T_LAST_TOKEN;
- if(token_type < 0 || token_type >= token_symbols_len) {
+ if (token_kind >= lengthof(token_symbols)) {
fputs("invalid token", f);
return;
}
- const symbol_t *symbol = token_symbols[token_type];
- if(symbol != NULL) {
- fputs(symbol->string, f);
- } else {
- if(token_type >= 0 && token_type < 256) {
- fputc(token_type, f);
- return;
- }
- fputs("unknown token", f);
- }
-}
-
-symbol_t *get_token_symbol(const token_t *token)
-{
- return token_symbols[token->type];
+ fputs(token_symbols[token_kind]->string, f);
}
-static void print_stringrep(const string_t *string, FILE *f)
+char const *get_string_encoding_prefix(string_encoding_t const enc)
{
- for (size_t i = 0; i < string->size; ++i) {
- fputc(string->begin[i], f);
+ switch (enc) {
+ case STRING_ENCODING_CHAR: return "";
+ case STRING_ENCODING_CHAR16: return "u";
+ case STRING_ENCODING_CHAR32: return "U";
+ case STRING_ENCODING_UTF8: return "u8";
+ case STRING_ENCODING_WIDE: return "L";
}
+ panic("invalid string encoding");
}
void print_token(FILE *f, const token_t *token)
{
- switch(token->type) {
+ char delim = '\'';
+ char const *enc = "";
+ char const *val;
+ switch (token->kind) {
case T_IDENTIFIER:
- fprintf(f, "identifier '%s'", token->symbol->string);
- break;
- case T_INTEGER:
- case T_INTEGER_OCTAL:
- case T_INTEGER_HEXADECIMAL:
- case T_FLOATINGPOINT:
- case T_FLOATINGPOINT_HEXADECIMAL:
- print_token_type(f, (token_type_t)token->type);
- fputs(" '", f);
- print_stringrep(&token->literal, f);
- if (token->symbol != NULL)
- fputs(token->symbol->string, f);
- fputc('\'', f);
+ case T_MACRO_PARAMETER:
+ case T_UNKNOWN_CHAR:
+ val = token->base.symbol->string;
break;
- case T_WIDE_STRING_LITERAL:
+
case T_STRING_LITERAL:
- print_token_type(f, (token_type_t)token->type);
- fprintf(f, " \"%s\"", token->literal.begin);
- break;
+ delim = '"';
+ /* FALLTHROUGH */
case T_CHARACTER_CONSTANT:
- case T_WIDE_CHARACTER_CONSTANT:
- print_token_type(f, (token_type_t)token->type);
- fputs(" \'", f);
- print_stringrep(&token->literal, f);
- fputs("'", f);
+ enc = get_string_encoding_prefix(token->literal.string.encoding);
+ /* FALLTHROUGH */
+ case T_NUMBER:
+ val = token->literal.string.begin;
break;
- default:
- fputc('\'', f);
- print_token_type(f, (token_type_t)token->type);
- fputc('\'', f);
- break;
- }
-}
-
-void print_pp_token_type(FILE *f, int token_type)
-{
- if (token_type == TP_EOF) {
- fputs("end of file", f);
- return;
- }
- if (token_type == TP_ERROR) {
- fputs("error", f);
- return;
- }
- int token_symbols_len = TP_LAST_TOKEN;
- if (token_type < 0 || token_type >= token_symbols_len) {
- fputs("invalid token", f);
+ default: {
+ char const *kind = (token->base.symbol ? token->base.symbol : token_symbols[token->kind])->string;
+ fprintf(f, "'%s'", kind);
return;
}
-
- const symbol_t *symbol = pp_token_symbols[token_type];
- if (symbol != NULL) {
- fputs(symbol->string, f);
- } else {
- if(token_type >= 0 && token_type < 256) {
- fputc(token_type, f);
- return;
- }
- fputs("unknown token", f);
}
+ fprintf(f, "%s %s%c%s%c", token_symbols[token->kind]->string, enc, delim, val, delim);
}
-void print_pp_token(FILE *f, const token_t *token)
+bool tokens_would_paste(token_kind_t token1, token_kind_t token2)
{
- switch((preprocessor_token_type_t) token->type) {
- case TP_IDENTIFIER:
- fprintf(f, "identifier '%s'", token->symbol->string);
- break;
- case TP_NUMBER:
- fprintf(f, "number '%s'", token->literal.begin);
- break;
- case TP_STRING_LITERAL:
- fprintf(f, "string \"%s\"", token->literal.begin);
- break;
+ char const c = token_symbols[token2]->string[0];
+
+ switch (token1) {
+ case '>': return c == '>' || c == '=';
+ case '<': return c == '<' || c == '=' || c == '%' || c == ':';
+ case '+': return c == '+' || c == '=';
+ case '-': return c == '-' || c == '>';
+ case '/': return c == '/' || c == '=' || c == '*';
+ case '%': return c == ':' || c == '=' || c == '>';
+ case '&': return c == '&' || c == '=';
+ case '|': return c == '|' || c == '=';
+ case ':': return c == ':' || c == '>';
+ case '*': return c == '*' || c == '=';
+ case '.': return c == '.' || c == '%' || token2 == T_NUMBER;
+ case '#': return c == '#' || c == '%';
+ case T_GREATERGREATER: return c == '=';
+ case T_LESSLESS: return c == '=';
+ case '^': return c == '=';
+ case '!': return c == '=';
+
+ case T_IDENTIFIER:
+ return token2 == T_CHARACTER_CONSTANT ||
+ token2 == T_IDENTIFIER ||
+ token2 == T_NUMBER ||
+ token2 == T_STRING_LITERAL; /* L */
+
+ case T_NUMBER:
+ return token2 == T_IDENTIFIER || token2 == T_NUMBER ||
+ token2 == '.' || token2 == '+' || token2 == '-';
+
default:
- print_pp_token_type(f, (preprocessor_token_type_t) token->type);
- break;
+ return false;
}
}