#define strtold(s, e) strtod(s, e)
#endif
-static utf32 c;
-token_t lexer_token;
-symbol_t *symbol_L;
-static FILE *input;
-static utf32 buf[BUF_SIZE + MAX_PUTBACK];
-static const utf32 *bufend;
-static const utf32 *bufpos;
-static strset_t stringset;
-bool allow_dollar_in_symbol = true;
+static utf32 c;
+static source_position_t lexer_pos;
+token_t lexer_token;
+static symbol_t *symbol_L;
+static FILE *input;
+static utf32 buf[BUF_SIZE + MAX_PUTBACK];
+static const utf32 *bufend;
+static const utf32 *bufpos;
+static strset_t stringset;
+bool allow_dollar_in_symbol = true;
/**
* Prints a parse error message at the current token.
*/
static void parse_error(const char *msg)
{
- errorf(&lexer_token.source_position, "%s", msg);
+ errorf(&lexer_pos, "%s", msg);
}
/**
*/
static NORETURN internal_error(const char *msg)
{
- internal_errorf(&lexer_token.source_position, "%s", msg);
+ internal_errorf(&lexer_pos, "%s", msg);
}
static size_t read_block(unsigned char *const read_buf, size_t const n)
{
size_t const s = fread(read_buf, 1, n, input);
if (s == 0) {
- if (ferror(input))
+ /* on OS/X ferror appears to return true on eof as well when running
+ * the application in gdb... */
+ if (!feof(input) && ferror(input))
parse_error("read from input failed");
buf[MAX_PUTBACK] = EOF;
bufpos = buf + MAX_PUTBACK;
{ NULL, NULL }
};
+/** strcasecmp is not part of C99 so we need our own implementation here */
+static int my_strcasecmp(const char *s1, const char *s2)
+{
+ for ( ; *s1 != 0; ++s1, ++s2) {
+ if (tolower(*s1) != tolower(*s2))
+ break;
+ }
+ return (unsigned char)*s1 - (unsigned char)*s2;
+}
+
void select_input_encoding(char const* const encoding)
{
for (named_decoder_t const *i = decoders; i->name != NULL; ++i) {
- if (strcasecmp(encoding, i->name) != 0)
+ if (my_strcasecmp(encoding, i->name) != 0)
continue;
decoder = i->decoder;
return;
decoder();
}
c = *bufpos++;
+ ++lexer_pos.colno;
}
/**
{
assert(bufpos > buf);
*(--bufpos - buf + buf) = pc;
+ --lexer_pos.colno;
#ifdef DEBUG_CHARS
printf("putback '%lc'\n", pc);
static inline void next_char(void);
-#define MATCH_NEWLINE(code) \
- case '\r': \
- next_char(); \
- if (c == '\n') { \
- next_char(); \
- } \
- lexer_token.source_position.linenr++; \
- code \
- case '\n': \
- next_char(); \
- lexer_token.source_position.linenr++; \
+#define MATCH_NEWLINE(code) \
+ case '\r': \
+ next_char(); \
+ if (c == '\n') { \
+ case '\n': \
+ next_char(); \
+ } \
+ lexer_pos.lineno++; \
+ lexer_pos.colno = 1; \
code
-#define eat(c_type) do { assert(c == c_type); next_char(); } while (0)
+#define eat(c_type) (assert(c == c_type), next_char())
static void maybe_concat_lines(void)
{
errorf(&lexer_token.source_position,
"hexadecimal floatingpoint constant requires an exponent");
}
+ obstack_1grow(&symbol_obstack, '\0');
- size_t size = obstack_object_size(&symbol_obstack);
+ size_t size = obstack_object_size(&symbol_obstack) - 1;
char *string = obstack_finish(&symbol_obstack);
lexer_token.literal = identify_string(string, size);
*/
static bool is_octal_digit(utf32 chr)
{
- switch (chr) {
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- return true;
- default:
- return false;
- }
+ return '0' <= chr && chr <= '7';
}
/**
}
}
- size_t size = obstack_object_size(&symbol_obstack);
+ obstack_1grow(&symbol_obstack, '\0');
+ size_t size = obstack_object_size(&symbol_obstack) - 1;
char *string = obstack_finish(&symbol_obstack);
lexer_token.literal = identify_string(string, size);
/* check for invalid octal digits */
for (size_t i= 0; i < size; ++i) {
char t = string[i];
- if (t == '8' || t == '9')
+ if (t >= '8')
errorf(&lexer_token.source_position,
"invalid digit '%c' in octal number", t);
}
*/
static void parse_string_literal(void)
{
- const unsigned start_linenr = lexer_token.source_position.linenr;
-
eat('"');
while (true) {
case '\\': {
utf32 const tc = parse_escape_sequence();
if (tc >= 0x100) {
- warningf(&lexer_token.source_position,
- "escape sequence out of range");
+ warningf(WARN_OTHER, &lexer_pos, "escape sequence out of range");
}
obstack_1grow(&symbol_obstack, tc);
break;
}
case EOF: {
- source_position_t source_position;
- source_position.input_name = lexer_token.source_position.input_name;
- source_position.linenr = start_linenr;
- errorf(&source_position, "string has no end");
+ errorf(&lexer_token.source_position, "string has no end");
lexer_token.type = T_ERROR;
return;
}
*/
static void parse_wide_character_constant(void)
{
- const unsigned start_linenr = lexer_token.source_position.linenr;
-
eat('\'');
while (true) {
goto end_of_wide_char_constant;
case EOF: {
- source_position_t source_position = lexer_token.source_position;
- source_position.linenr = start_linenr;
- errorf(&source_position, "EOF while parsing character constant");
+ errorf(&lexer_token.source_position, "EOF while parsing character constant");
lexer_token.type = T_ERROR;
return;
}
}
end_of_wide_char_constant:;
- size_t size = (size_t) obstack_object_size(&symbol_obstack);
+ obstack_1grow(&symbol_obstack, '\0');
+ size_t size = (size_t) obstack_object_size(&symbol_obstack) - 1;
char *string = obstack_finish(&symbol_obstack);
lexer_token.type = T_WIDE_CHARACTER_CONSTANT;
lexer_token.literal = identify_string(string, size);
+
+ if (size == 0) {
+ errorf(&lexer_token.source_position, "empty character constant");
+ }
}
/**
*/
static void parse_character_constant(void)
{
- const unsigned start_linenr = lexer_token.source_position.linenr;
-
eat('\'');
while (true) {
case '\\': {
utf32 const tc = parse_escape_sequence();
if (tc >= 0x100) {
- warningf(&lexer_token.source_position,
- "escape sequence out of range");
+ warningf(WARN_OTHER, &lexer_pos, "escape sequence out of range");
}
obstack_1grow(&symbol_obstack, tc);
break;
goto end_of_char_constant;
case EOF: {
- source_position_t source_position;
- source_position.input_name = lexer_token.source_position.input_name;
- source_position.linenr = start_linenr;
- errorf(&source_position, "EOF while parsing character constant");
+ errorf(&lexer_token.source_position, "EOF while parsing character constant");
lexer_token.type = T_ERROR;
return;
}
}
end_of_char_constant:;
- const size_t size = (size_t)obstack_object_size(&symbol_obstack);
+ obstack_1grow(&symbol_obstack, '\0');
+ const size_t size = (size_t)obstack_object_size(&symbol_obstack)-1;
char *const string = obstack_finish(&symbol_obstack);
lexer_token.type = T_CHARACTER_CONSTANT;
lexer_token.literal = identify_string(string, size);
+
+ if (size == 0) {
+ errorf(&lexer_token.source_position, "empty character constant");
+ }
}
/**
*/
static void skip_multiline_comment(void)
{
- unsigned start_linenr = lexer_token.source_position.linenr;
-
while (true) {
switch (c) {
case '/':
next_char();
if (c == '*') {
/* nested comment, warn here */
- if (warning.comment) {
- warningf(&lexer_token.source_position, "'/*' within comment");
- }
+ warningf(WARN_COMMENT, &lexer_pos, "'/*' within comment");
}
break;
case '*':
MATCH_NEWLINE(break;)
case EOF: {
- source_position_t source_position;
- source_position.input_name = lexer_token.source_position.input_name;
- source_position.linenr = start_linenr;
- errorf(&source_position, "at end of file while looking for comment end");
+ errorf(&lexer_token.source_position, "at end of file while looking for comment end");
return;
}
case '\\':
next_char();
if (c == '\n' || c == '\r') {
- if (warning.comment)
- warningf(&lexer_token.source_position, "multi-line comment");
+ warningf(WARN_COMMENT, &lexer_pos, "multi-line comment");
return;
}
break;
if (pp_token.type != T_INTEGER) {
parse_error("expected integer");
} else {
- lexer_token.source_position.linenr = atoi(pp_token.literal.begin);
+ /* use offset -1 as this is about the next line */
+ lexer_pos.lineno = atoi(pp_token.literal.begin) - 1;
next_pp_token();
}
if (pp_token.type == T_STRING_LITERAL) {
- lexer_token.source_position.input_name = pp_token.literal.begin;
+ lexer_pos.input_name = pp_token.literal.begin;
next_pp_token();
}
unknown_pragma = true;
}
eat_until_newline();
- if (unknown_pragma && warning.unknown_pragmas) {
- warningf(&pp_token.source_position, "encountered unknown #pragma");
+ if (unknown_pragma) {
+ warningf(WARN_UNKNOWN_PRAGMAS, &pp_token.source_position, "encountered unknown #pragma");
}
}
#define ELSE_CODE(code) \
default: \
code \
+ return; \
} \
} /* end of while (true) */ \
- break;
#define ELSE(set_type) \
ELSE_CODE( \
lexer_token.type = set_type; \
- return; \
)
void lexer_next_preprocessing_token(void)
{
while (true) {
+ lexer_token.source_position = lexer_pos;
+
switch (c) {
case ' ':
case '\t':
put_back(c);
c = '.';
lexer_token.type = '.';
- return;
)
ELSE('.')
case '&':
put_back(c);
c = '%';
lexer_token.type = '#';
- return;
)
ELSE('#')
ELSE('%')
default:
dollar_sign:
- errorf(&lexer_token.source_position, "unknown character '%c' found", c);
+ errorf(&lexer_pos, "unknown character '%c' found", c);
next_char();
lexer_token.type = T_ERROR;
return;
void lexer_open_stream(FILE *stream, const char *input_name)
{
- input = stream;
- lexer_token.source_position.linenr = 0;
- lexer_token.source_position.input_name = input_name;
+ input = stream;
+ lexer_pos.lineno = 0;
+ lexer_pos.colno = 0;
+ lexer_pos.input_name = input_name;
bufpos = NULL;
bufend = NULL;
void lexer_open_buffer(const char *buffer, size_t len, const char *input_name)
{
- input = NULL;
- lexer_token.source_position.linenr = 0;
- lexer_token.source_position.input_name = input_name;
+ input = NULL;
+ lexer_pos.lineno = 0;
+ lexer_pos.colno = 0;
+ lexer_pos.input_name = input_name;
#if 0 // TODO
bufpos = buffer;
bufend = buffer + len;
+
+ /* place a virtual \n at the beginning so the lexer knows that we're
+ * at the beginning of a line */
+ c = '\n';
#else
(void)buffer;
(void)len;
panic("builtin lexing not done yet");
#endif
-
- /* place a virtual \n at the beginning so the lexer knows that we're
- * at the beginning of a line */
- c = '\n';
}
void exit_lexer(void)
static __attribute__((unused))
void dbg_pos(const source_position_t source_position)
{
- fprintf(stdout, "%s:%u\n", source_position.input_name,
- source_position.linenr);
+ fprintf(stdout, "%s:%u:%u\n", source_position.input_name,
+ source_position.lineno, source_position.colno);
fflush(stdout);
}