X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=lexer.c;h=1300b3d26f23cda1c90093658115266e03a941ab;hb=4b6bbffc7e1d9a9ee5a75da79ced92f5bc92a913;hp=7cb82bc4fa8430ac0847d93014d88b8cd315f375;hpb=6734a093fb9d14f6a626293849de8a38b39b9457;p=cparser

diff --git a/lexer.c b/lexer.c
index 7cb82bc..1300b3d 100644
--- a/lexer.c
+++ b/lexer.c
@@ -17,13 +17,12 @@
 
 static int         c;
 token_t            lexer_token;
+symbol_t          *symbol_L;
 static FILE       *input;
 static char        buf[1024 + MAX_PUTBACK];
 static const char *bufend;
 static const char *bufpos;
 static strset_t    stringset;
-//static FILE      **input_stack;
-//static char      **buf_stack;
 
 static void error_prefix_at(const char *input_name, unsigned linenr)
 {
@@ -60,11 +59,16 @@ static inline void next_real_char(void)
 
 static inline void put_back(int pc)
 {
-	char *p = (char*) bufpos - 1;
-	bufpos--;
-	assert(p >= buf);
+	assert(bufpos >= buf);
+	assert(bufpos < buf+MAX_PUTBACK || *bufpos == pc);
+
+	char *p = buf + (bufpos - buf);
 	*p = pc;
 
+	/* going backwards in the buffer is legal as long as it's not more often
+	 * than MAX_PUTBACK */
+	bufpos--;
+
 #ifdef DEBUG_CHARS
 	printf("putback '%c'\n", pc);
 #endif
@@ -85,9 +89,12 @@ static inline void next_char(void);
 		lexer_token.source_position.linenr++; \
 		code;
 
+#define eat(c_type)  do { assert(c == c_type); next_char(); } while(0)
+
 static void maybe_concat_lines(void)
 {
-	next_char();
+	eat('\\');
+
 	switch(c) {
 	MATCH_NEWLINE(return;)
 
@@ -103,6 +110,7 @@ static inline void next_char(void)
 {
 	next_real_char();
 
+#if 0
 	/* filter trigraphs */
 	if(UNLIKELY(c == '\\')) {
 		maybe_concat_lines();
@@ -138,6 +146,8 @@ static inline void next_char(void)
 	}
 
 end_of_next_char:
+#endif
+	(void) maybe_concat_lines;
 #ifdef DEBUG_CHARS
 	printf("nchar '%c'\n", c);
 #else
@@ -247,6 +257,45 @@ end_symbol:
 	}
 }
 
+static void parse_integer_suffix(void)
+{
+	if(c == 'U' || c == 'U') {
+		/* TODO do something with the suffixes... */
+		next_char();
+		if(c == 'L' || c == 'l') {
+			next_char();
+			if(c == 'L' || c == 'l') {
+				next_char();
+			}
+		}
+	} else if(c == 'l' || c == 'L') {
+		next_char();
+		if(c == 'l' || c == 'L') {
+			next_char();
+			if(c == 'u' || c == 'U') {
+				next_char();
+			}
+		} else if(c == 'u' || c == 'U') {
+			next_char();
+		}
+	}
+}
+
+static void parse_floating_suffix(void)
+{
+	switch(c) {
+	/* TODO: do something usefull with the suffixes... */
+	case 'f':
+	case 'F':
+	case 'l':
+	case 'L':
+		next_char();
+		break;
+	default:
+		break;
+	}
+}
+
 static void parse_number_hex(void)
 {
 	assert(c == 'x' || c == 'X');
@@ -269,71 +318,118 @@ static void parse_number_hex(void)
 		} else if ('a' <= c && c <= 'f') {
 			value = 16 * value + c - 'a' + 10;
 		} else {
-			lexer_token.type     = T_INTEGER;
+			parse_integer_suffix();
+
+			lexer_token.type       = T_INTEGER;
 			lexer_token.v.intvalue = value;
 			return;
 		}
 		next_char();
 	}
+
+	if(c == '.' || c == 'p' || c == 'P') {
+		next_char();
+		panic("Hex floating point numbers not implemented yet");
+	}
 }
 
 static void parse_number_oct(void)
 {
 	int value = 0;
+	while(c >= '0' && c <= '7') {
+		value = 8 * value + c - '0';
+		next_char();
+	}
+	if (c == '8' || c == '9') {
+		parse_error("invalid octal number");
+		lexer_token.type = T_ERROR;
+		return;
+	}
+
+	lexer_token.type       = T_INTEGER;
+	lexer_token.v.intvalue = value;
+
+	parse_integer_suffix();
+}
+
+static void parse_floatingpoint_exponent(long double value)
+{
+	unsigned int expo = 0;
+	long double  factor = 10.;
+
+	if(c == '-') {
+		next_char();
+		factor = 0.1;
+	} else if(c == '+') {
+		next_char();
+	}
+
+	while(c >= '0' && c <= '9') {
+		expo = 10 * expo + (c - '0');
+		next_char();
+	}
+
 	while(1) {
-		if ('0' <= c && c <= '7') {
-			value = 8 * value + c - '0';
-		} else if (c == '8' || c == '9') {
-			parse_error("invalid octal number");
-			lexer_token.type = T_ERROR;
-			return;
-		} else {
-			lexer_token.type       = T_INTEGER;
-			lexer_token.v.intvalue = value;
-			return;
-		}
+		if(expo & 1)
+			value *= factor;
+		expo >>= 1;
+		if(expo == 0)
+			break;
+		factor *= factor;
+	}
+
+	lexer_token.type         = T_FLOATINGPOINT;
+	lexer_token.v.floatvalue = value;
+
+	parse_floating_suffix();
+}
+
+static void parse_floatingpoint_fract(int integer_part)
+{
+	long double value  = integer_part;
+	long double factor = 1.;
+
+	while(c >= '0' && c <= '9') {
+		factor *= 0.1;
+		value  += (c - '0') * factor;
 		next_char();
 	}
+
+	if(c == 'e' || c == 'E') {
+		next_char();
+		parse_floatingpoint_exponent(value);
+		return;
+	}
+
+	lexer_token.type         = T_FLOATINGPOINT;
+	lexer_token.v.floatvalue = value;
+
+	parse_floating_suffix();
 }
 
 static void parse_number_dec(void)
 {
 	int value = 0;
 
-	for(;;) {
-		if (isdigit(c)) {
-			value = 10 * value + c - '0';
-		} else {
-			lexer_token.type       = T_INTEGER;
-			lexer_token.v.intvalue = value;
-			return;
-		}
+	while(isdigit(c)) {
+		value = 10 * value + c - '0';
 		next_char();
 	}
-}
 
-static void parse_integer_suffix(void)
-{
-	if(c == 'U' || c == 'U') {
-		/* TODO do something with the suffixes... */
+	if(c == '.') {
 		next_char();
-		if(c == 'L' || c == 'l') {
-			next_char();
-			if(c == 'L' || c == 'l') {
-				next_char();
-			}
-		}
-	} else if(c == 'l' || c == 'L') {
+		parse_floatingpoint_fract(value);
+		return;
+	}
+	if(c == 'e' || c == 'E') {
 		next_char();
-		if(c == 'l' || c == 'L') {
-			next_char();
-			if(c == 'u' || c == 'U') {
-				next_char();
-			}
-		} else if(c == 'u' || c == 'U') {
-			next_char();
-		}
+		parse_floatingpoint_exponent(value);
+		return;
 	}
+	parse_integer_suffix();
+
+	lexer_token.type       = T_INTEGER;
+	lexer_token.v.intvalue = value;
 }
 
 static void parse_number(void)
@@ -342,14 +438,42 @@ static void parse_number(void)
 		next_char();
 		switch (c) {
 			case 'X':
-			case 'x': parse_number_hex(); break;
-			default:  parse_number_oct(); break;
+			case 'x':
+				parse_number_hex();
+				break;
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+				parse_number_oct();
+				break;
+			case '.':
+				next_char();
+				parse_floatingpoint_fract(0);
+				break;
+			case 'e':
+			case 'E':
+				parse_floatingpoint_exponent(0);
+				break;
+			case '8':
+			case '9':
+				next_char();
+				parse_error("invalid octal number");
+				lexer_token.type = T_ERROR;
+				return;
+			default:
+				put_back(c);
+				c = '0';
+				parse_number_dec();
+				return;
 		}
 	} else {
 		parse_number_dec();
 	}
-
-	parse_integer_suffix();
 }
 
 static int parse_octal_sequence(void)
@@ -386,40 +510,40 @@ static int parse_hex_sequence(void)
 
 static int parse_escape_sequence(void)
 {
-	while(1) {
-		int ec = c;
-		next_char();
+	eat('\\');
 
-		switch(ec) {
-		case '"':  return '"';
-		case '\'': return'\'';
-		case '\\': return '\\';
-		case '?': return '\?';
-		case 'a': return '\a';
-		case 'b': return '\b';
-		case 'f': return '\f';
-		case 'n': return '\n';
-		case 'r': return '\r';
-		case 't': return '\t';
-		case 'v': return '\v';
-		case 'x':
-			return parse_hex_sequence();
-		case '0':
-		case '1':
-		case '2':
-		case '3':
-		case '4':
-		case '5':
-		case '6':
-		case '7':
-			return parse_octal_sequence();
-		case EOF:
-			parse_error("reached end of file while parsing escape sequence");
-			return EOF;
-		default:
-			parse_error("unknown escape sequence");
-			return EOF;
-		}
+	int ec = c;
+	next_char();
+
+	switch(ec) {
+	case '"':  return '"';
+	case '\'': return'\'';
+	case '\\': return '\\';
+	case '?': return '\?';
+	case 'a': return '\a';
+	case 'b': return '\b';
+	case 'f': return '\f';
+	case 'n': return '\n';
+	case 'r': return '\r';
+	case 't': return '\t';
+	case 'v': return '\v';
+	case 'x':
+		return parse_hex_sequence();
+	case '0':
+	case '1':
+	case '2':
+	case '3':
+	case '4':
+	case '5':
+	case '6':
+	case '7':
+		return parse_octal_sequence();
+	case EOF:
+		parse_error("reached end of file while parsing escape sequence");
+		return EOF;
+	default:
+		parse_error("unknown escape sequence");
+		return EOF;
 	}
 }
 
@@ -449,12 +573,12 @@ static void parse_string_literal(void)
 	assert(c == '"');
 	next_char();
 
+	int tc;
 	while(1) {
 		switch(c) {
 		case '\\':
-			next_char();
-			int ec = parse_escape_sequence();
-			obstack_1grow(&symbol_obstack, ec);
+			tc = parse_escape_sequence();
+			obstack_1grow(&symbol_obstack, tc);
 			break;
 
 		case EOF:
@@ -495,14 +619,12 @@ end_of_string:
 
 static void parse_character_constant(void)
 {
-	assert(c == '\'');
-	next_char();
+	eat('\'');
 
 	int found_char = 0;
 	while(1) {
 		switch(c) {
 		case '\\':
-			next_char();
 			found_char = parse_escape_sequence();
 			break;
 
@@ -683,7 +805,7 @@ static void parse_preprocessor_identifier(void)
 	}
 }
 
-static void parse_preprocessor_directive()
+static void parse_preprocessor_directive(void)
 {
 	next_pp_token();
 
@@ -741,6 +863,12 @@ void lexer_next_preprocessing_token(void)
 
 		SYMBOL_CHARS
 			parse_symbol();
+			/* might be a wide string ( L"string" ) */
+			if(c == '"' && (lexer_token.type == T_IDENTIFIER &&
+			   lexer_token.v.symbol == symbol_L)) {
+			   	parse_string_literal();
+			   	return;
+			}
 			return;
 
 		DIGITS
@@ -916,10 +1044,14 @@ void init_lexer(void)
 void lexer_open_stream(FILE *stream, const char *input_name)
 {
 	input                                  = stream;
-	lexer_token.source_position.linenr     = 1;
+	lexer_token.source_position.linenr     = 0;
 	lexer_token.source_position.input_name = input_name;
 
-	next_char();
+	symbol_L = symbol_table_insert("L");
+
+	/* place a virtual \n at the beginning so the lexer knows that we're
+	 * at the beginning of a line */
+	c = '\n';
 }
 
 void exit_lexer(void)