#include <string.h>
#include <ctype.h>
-#define DEBUG_CHARS
+//#define DEBUG_CHARS
#define MAX_PUTBACK 3
static int c;
token_t lexer_token;
+symbol_t *symbol_L;
static FILE *input;
static char buf[1024 + MAX_PUTBACK];
static const char *bufend;
static const char *bufpos;
static strset_t stringset;
-//static FILE **input_stack;
-//static char **buf_stack;
static void error_prefix_at(const char *input_name, unsigned linenr)
{
static inline void put_back(int pc)
{
- char *p = (char*) bufpos - 1;
- bufpos--;
- assert(p >= buf);
+ assert(bufpos >= buf);
+ assert(bufpos < buf+MAX_PUTBACK || *bufpos == pc);
+
+ char *p = buf + (bufpos - buf);
*p = pc;
+ /* going backwards in the buffer is legal as long as it's not more often
+ * than MAX_PUTBACK */
+ bufpos--;
+
#ifdef DEBUG_CHARS
printf("putback '%c'\n", pc);
#endif
lexer_token.source_position.linenr++; \
code;
-static inline void eat(char c_type)
-{
- assert(c == c_type);
- next_char();
-}
+#define eat(c_type) do { assert(c == c_type); next_char(); } while(0)
static void maybe_concat_lines(void)
{
{
next_real_char();
+#if 0
/* filter trigraphs */
if(UNLIKELY(c == '\\')) {
maybe_concat_lines();
}
end_of_next_char:
+#endif
+ (void) maybe_concat_lines;
#ifdef DEBUG_CHARS
printf("nchar '%c'\n", c);
#else
}
}
+static void parse_floating_suffix(void)
+{
+ switch(c) {
+ /* TODO: do something usefull with the suffixes... */
+ case 'f':
+ case 'F':
+ case 'l':
+ case 'L':
+ next_char();
+ break;
+ default:
+ break;
+ }
+}
+
static void parse_number_hex(void)
{
assert(c == 'x' || c == 'X');
lexer_token.type = T_FLOATINGPOINT;
lexer_token.v.floatvalue = value;
+
+ parse_floating_suffix();
}
static void parse_floatingpoint_fract(int integer_part)
lexer_token.type = T_FLOATINGPOINT;
lexer_token.v.floatvalue = value;
+
+ parse_floating_suffix();
}
static void parse_number_dec(void)
}
}
-static int parse_octal_sequence(void)
+static inline int is_octal_digit(int chr)
{
- int value = 0;
- while(1) {
- if(c < '0' || c > '7')
- break;
- value = 8 * value + c - '0';
- next_char();
- }
+ return '0' <= chr && chr <= '7';
+}
+static int parse_octal_sequence(const int first_digit)
+{
+ assert(is_octal_digit(first_digit));
+ int value = first_digit - '0';
+ if (!is_octal_digit(c)) return value;
+ value = 8 * value + c - '0';
+ next_char();
+ if (!is_octal_digit(c)) return value;
+ value = 8 * value + c - '0';
+ next_char();
return value;
}
switch(ec) {
case '"': return '"';
- case '\'': return'\'';
+ case '\'': return '\'';
case '\\': return '\\';
case '?': return '\?';
case 'a': return '\a';
case '5':
case '6':
case '7':
- return parse_octal_sequence();
+ return parse_octal_sequence(ec);
case EOF:
parse_error("reached end of file while parsing escape sequence");
return EOF;
}
}
-static void parse_preprocessor_directive()
+static void parse_preprocessor_directive(void)
{
next_pp_token();
SYMBOL_CHARS
parse_symbol();
+ /* might be a wide string ( L"string" ) */
+ if(c == '"' && (lexer_token.type == T_IDENTIFIER &&
+ lexer_token.v.symbol == symbol_L)) {
+ parse_string_literal();
+ return;
+ }
return;
DIGITS
void lexer_open_stream(FILE *stream, const char *input_name)
{
input = stream;
- lexer_token.source_position.linenr = 1;
+ lexer_token.source_position.linenr = 0;
lexer_token.source_position.input_name = input_name;
- next_char();
+ symbol_L = symbol_table_insert("L");
+
+ /* place a virtual \n at the beginning so the lexer knows that we're
+ * at the beginning of a line */
+ c = '\n';
}
void exit_lexer(void)