case 'Z': \
case '_':
+#define DIGITS \
+ case '0': \
+ case '1': \
+ case '2': \
+ case '3': \
+ case '4': \
+ case '5': \
+ case '6': \
+ case '7': \
+ case '8': \
+ case '9':
+
static
void parse_symbol(token_t *token)
{
EAT_NEWLINE(break;)
goto end_symbol;
+ DIGITS
SYMBOL_CHARS
obstack_1grow(&symbol_obstack, c);
next_char();
case 'x': /* TODO parse hex number ... */
parse_error("hex escape sequences not implemented yet");
return EOF;
- case 0:
- case 1:
- case 2:
- case 3:
- case 4:
- case 5:
- case 6:
- case 7:
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
/* TODO parse octal number ... */
parse_error("octal escape sequences not implemented yet");
return EOF;
}
}
-static
-void lexer_next_preprocessing_token(token_t *token);
-
static token_t pp_token;
static inline
static
void eat_until_newline(void)
{
- /* TODO */
+ while(pp_token.type != '\n' && pp_token.type != T_EOF) {
+ next_pp_token();
+ }
}
static
next_pp_token();
}
- while(pp_token.type != T_EOF && pp_token.type != '\n') {
- next_pp_token();
- }
+ eat_until_newline();
}
static
}
static
-void parse_preprocessor_directive(token_t *result_token)
+void parse_preprocessor_directive()
{
next_pp_token();
case T_INTEGER:
parse_line_directive();
break;
+ default:
+ parse_error("invalid preprocessor directive");
+ eat_until_newline();
+ break;
}
-
- lexer_next_token(result_token);
}
#define MAYBE_PROLOG \
return; \
)
-static
-void eat_whitespace()
-{
- while(1) {
- switch(c) {
- case ' ':
- case '\t':
- next_char();
- break;
-
- case '\r':
- case '\n':
- return;
-
- case '\\':
- next_char();
- if(c == '\n') {
- next_char();
- source_position.linenr++;
- break;
- }
-
- put_back(c);
- c = '\\';
- return;
-
- SKIP_TRIGRAPHS(,
- return;
- )
-
- case '/':
- next_char();
- while(1) {
- switch(c) {
- case '*':
- next_char();
- skip_multiline_comment();
- eat_whitespace();
- return;
- case '/':
- next_char();
- skip_line_comment();
- eat_whitespace();
- return;
-
- SKIP_TRIGRAPHS(
- put_back('?');
- ,
- c = '/';
- return;
- )
-
- case '\\':
- next_char();
- EAT_NEWLINE(break;)
- /* fallthrough */
- default:
- return;
- }
- }
- break;
-
- default:
- return;
- }
- }
-}
-
-static
void lexer_next_preprocessing_token(token_t *token)
{
while(1) {
break;
MATCH_NEWLINE(
- eat_whitespace();
- if(c == '#') {
- next_char();
- parse_preprocessor_directive(token);
- return;
- }
token->type = '\n';
return;
)
parse_symbol(token);
return;
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
+ DIGITS
parse_number(token);
return;
void lexer_next_token(token_t *token)
{
- do {
+ while(1) {
lexer_next_preprocessing_token(token);
- } while(token->type == '\n');
+ if(token->type == '\n') {
+ do {
+ lexer_next_preprocessing_token(token);
+ } while(token->type == '\n');
+
+ if(token->type == '#') {
+ parse_preprocessor_directive();
+ continue;
+ }
+ }
+ return;
+ }
}
void init_lexer(void)
void lexer_next_token(token_t *token);
+/* for debugging */
+void lexer_next_preprocessing_token(token_t *token);
+
#endif
for i in tokenstreams/*; do
if [ "$i" != "tokenstreams/refresults" ]; then
echo "==> Checking $i"
- ../cparser $i > /tmp/tokenstream
- diff -u /tmp/tokenstream tokenstreams/refresults/`basename $i`
+ ../cparser --lextest $i > /tmp/tokenstream
+ diff -u tokenstreams/refresults/`basename $i` /tmp/tokenstream
fi
done
+'newline'
integer number 97
+'newline'
integer number 98
+'newline'
integer number 99
+'newline'
integer number 92
+'newline'
integer number 92
+'newline'
integer number 92
+'newline'
integer number 92
+'newline'
integer number 92
+'newline'
+'newline'
end of file
+'newline'
+'newline'
+'newline'
+'newline'
+'newline'
+'newline'
+'newline'
+'newline'
end of file
+'newline'
symbol 'hohohohohohoho'
+'newline'
symbol 'ho'
+'newline'
symbol 'ho'
+'newline'
symbol 'ho'
end of file
-++
-++
+'newline'
+'++'
+'++'
'+'
+'newline'
'.'
+'newline'
'.'
'.'
+'newline'
'.'
'.'
-...
-...
+'newline'
+'...'
+'newline'
+'...'
+'newline'
'.'
+'newline'
'.'
-...
+'newline'
+'...'
'.'
+'newline'
'.'
'.'
'.'
-++
-<<=
->>=
+'newline'
+'++'
+'newline'
+'<<='
+'newline'
+'>>='
+'newline'
'%'
+'newline'
'}'
-%=
+'newline'
+'%='
+'newline'
'#'
+'newline'
'#'
'%'
-%:%:
-%:%:
+'newline'
+'%:%:'
+'newline'
+'%:%:'
+'newline'
'<'
-<<
-<<=
-<<
+'newline'
+'<<'
+'newline'
+'<<='
+'newline'
+'<<'
'<'
'='
-<<=
+'newline'
+'<<='
+'newline'
'>'
->>
->>=
->>
+'newline'
+'>>'
+'newline'
+'>>='
+'newline'
+'>>'
'>'
'='
->>=
+'newline'
+'>>='
+'newline'
end of file
+'newline'
string 'bla?'
+'newline'
string 'bla??'
+'newline'
string 'bla???'
+'newline'
string 'bla
'
+'newline'
string 'bla?
'
+'newline'
string 'bla??
'
+'newline'
string 'bla'
+'newline'
string 'bla?'
+'newline'
string 'bla??'
+'newline'
string 'bla'
+'newline'
string 'bla
'
+'newline'
end of file
+'newline'
symbol 't'
+'newline'
symbol 'tt'
+'newline'
symbol 'one'
symbol 'two'
+'newline'
symbol 'symbo'
'?'
'?'
'?'
+'newline'
symbol 'symbo'
'?'
'?'
+'newline'
symbol 'symbo'
'?'
+'newline'
+'newline'
symbol 'one'
symbol 'two'
+'newline'
end of file
+'newline'
'?'
+'newline'
'?'
'?'
+'newline'
'?'
'?'
'?'
+'newline'
integer number 35
+'newline'
'['
+'newline'
']'
+'newline'
'^'
+'newline'
'{'
+'newline'
'|'
+'newline'
'}'
+'newline'
'~'
+'newline'
'?'
'?'
'>'
+'newline'
end of file
lexer_open_stream(in, fname);
-#if 0
+#if 1
token_t token;
do {
lexer_next_token(&token);
fclose(in);
}
+static
+void lextest(const char *fname)
+{
+ FILE *in = fopen(fname, "r");
+ if(in == NULL) {
+ fprintf(stderr, "Couldn't open '%s': %s\n", fname, strerror(errno));
+ exit(1);
+ }
+
+ lexer_open_stream(in, fname);
+
+ token_t token;
+ do {
+ lexer_next_preprocessing_token(&token);
+ print_token(stdout, &token);
+ puts("");
+ } while(token.type != T_EOF);
+
+ fclose(in);
+}
+
int main(int argc, char **argv)
{
init_symbol_table();
init_ast();
init_parser();
+ if(argc > 2 && strcmp(argv[1], "--lextest") == 0) {
+ lextest(argv[2]);
+ return 0;
+ }
+
for(int i = 1; i < argc; ++i) {
compile(argv[i]);
}
if(storage_class == STORAGE_CLASS_TYPEDEF) {
symbol->ID = T_TYPENAME;
+ fprintf(stderr, "typedef '%s'\n", symbol->string);
} else {
symbol->ID = T_IDENTIFIER;
}
#define T_LAST_TOKEN (T_HASHHASH+1)
+T(LESSCOLON, "<:", = '[')
+T(COLONGREATER, ":>", = ']')
+T(LESSPERCENT, "<%", = '{')
+T(PERCENTGREATER, "%>", = '}')
+T(PERCENTCOLON, "%:", = '#')
+T(PERCENTCOLONPERCENTCOLON, "%:%:", = T_HASHHASH)
+
T(RBRACK, "[", = '[')
T(LBRACK, "]", = ']')
T(LBRACE, "(", = '(')
T(COMMA, ",", = ',')
T(HASH, "#", = '#')
-T(LESSCOLON, "<:", = '[')
-T(COLONGREATER, ":>", = ']')
-T(LESSPERCENT, "<%", = '{')
-T(PERCENTGREATER, "%>", = '}')
-T(PERCENTCOLON, "%:", = '#')
-T(PERCENTCOLONPERCENTCOLON, "%:%:", = T_HASHHASH)
-
TS(NEWLINE, "newline", = '\n')
switch(type->atype) {
case ATOMIC_TYPE_INVALID: fputs("INVALIDATOMIC", out); break;
+ case ATOMIC_TYPE_VOID: fputs("void", out); break;
case ATOMIC_TYPE_BOOL: fputs("bool", out); break;
case ATOMIC_TYPE_CHAR: fputs("char", out); break;
case ATOMIC_TYPE_SCHAR: fputs("signed char", out); break;