#include <assert.h>
#include <stdio.h>
+#include <stdbool.h>
#include "symbol.h"
#include "lang_features.h"
#include "adt/error.h"
#include "adt/util.h"
-static symbol_t *token_symbols[T_LAST_TOKEN];
+symbol_t *token_symbols[T_LAST_TOKEN];
-const source_position_t builtin_source_position = { "<built-in>", 0, 0, true };
+const position_t builtin_position = { "<built-in>", 0, 0, true };
static token_kind_t last_id;
void init_tokens(void)
{
+ static bool tokens_initialized = false;
+ if (tokens_initialized)
+ return;
+ tokens_initialized = true;
+
memset(token_symbols, 0, T_LAST_TOKEN * sizeof(token_symbols[0]));
-#define T(mode,x,str,val) register_token(mode, T_##x, str);
-#define TS(x,str,val) intern_register_token(T_##x, str);
+#define T(mode,x,str,val) register_token(mode, x, str);
#include "tokens.inc"
-#undef TS
#undef T
#define T(token) register_pp_token(TP_##token, #token);
void print_token_kind(FILE *f, token_kind_t token_kind)
{
- if(token_kind == T_EOF) {
- fputs("end of file", f);
- return;
- }
-
if (token_kind >= lengthof(token_symbols)) {
fputs("invalid token", f);
return;
}
- const symbol_t *symbol = token_symbols[token_kind];
- if(symbol != NULL) {
- fputs(symbol->string, f);
- } else {
- if (token_kind < 256) {
- fputc(token_kind, f);
- return;
- }
- fputs("unknown token", f);
- }
+ fputs(token_symbols[token_kind]->string, f);
}
char const *get_string_encoding_prefix(string_encoding_t const enc)
{
switch (enc) {
- case STRING_ENCODING_CHAR: return "";
- case STRING_ENCODING_WIDE: return "L";
+ case STRING_ENCODING_CHAR: return "";
+ case STRING_ENCODING_CHAR16: return "u";
+ case STRING_ENCODING_CHAR32: return "U";
+ case STRING_ENCODING_UTF8: return "u8";
+ case STRING_ENCODING_WIDE: return "L";
}
panic("invalid string encoding");
}
-static void print_stringrep(const string_t *string, FILE *f)
-{
- for (size_t i = 0; i < string->size; ++i) {
- fputc(string->begin[i], f);
- }
-}
-
void print_token(FILE *f, const token_t *token)
{
- switch(token->kind) {
+ char delim = '\'';
+ char const *enc = "";
+ char const *val;
+ switch (token->kind) {
case T_IDENTIFIER:
- fprintf(f, "identifier '%s'", token->base.symbol->string);
+ case T_MACRO_PARAMETER:
+ case T_UNKNOWN_CHAR:
+ val = token->base.symbol->string;
break;
+ case T_STRING_LITERAL:
+ delim = '"';
+ /* FALLTHROUGH */
+ case T_CHARACTER_CONSTANT:
+ enc = get_string_encoding_prefix(token->literal.string.encoding);
+ /* FALLTHROUGH */
case T_NUMBER:
- fprintf(f, "number '%s'", token->literal.string.begin);
- break;
-
- char delim;
- case T_STRING_LITERAL: delim = '"'; goto print_string;
- case T_CHARACTER_CONSTANT: delim = '\''; goto print_string;
-print_string:
- print_token_kind(f, (token_kind_t)token->kind);
- fprintf(f, " %s%c", get_string_encoding_prefix(token->literal.string.encoding), delim);
- print_stringrep(&token->literal.string, f);
- fputc(delim, f);
+ val = token->literal.string.begin;
break;
- default:
- if (token->base.symbol) {
- fprintf(f, "'%s'", token->base.symbol->string);
- } else {
- fputc('\'', f);
- print_token_kind(f, (token_kind_t)token->kind);
- fputc('\'', f);
- }
- break;
+ default: {
+ char const *kind = (token->base.symbol ? token->base.symbol : token_symbols[token->kind])->string;
+ fprintf(f, "'%s'", kind);
+ return;
+ }
}
+ fprintf(f, "%s %s%c%s%c", token_symbols[token->kind]->string, enc, delim, val, delim);
}
bool tokens_would_paste(token_kind_t token1, token_kind_t token2)
{
- char const c = token2 < 256 ? (char)token2 : token_symbols[token2]->string[0];
+ char const c = token_symbols[token2]->string[0];
switch (token1) {
case '>': return c == '>' || c == '=';