Handle string literals with embedded \0 correctly.
authorChristoph Mallon <christoph.mallon@gmx.de>
Wed, 12 Dec 2007 13:44:44 +0000 (13:44 +0000)
committerChristoph Mallon <christoph.mallon@gmx.de>
Wed, 12 Dec 2007 13:44:44 +0000 (13:44 +0000)
[r18708]

ast.c
ast2firm.c
ast_t.h
lexer.c
lexer.h
parser.c
string_rep.h
token.c
token_t.h

diff --git a/ast.c b/ast.c
index 073ab7d..c29b730 100644 (file)
--- a/ast.c
+++ b/ast.c
@@ -42,10 +42,10 @@ static void print_const(const const_expression_t *cnst)
        }
 }
 
-static void print_quoted_string(const char *string)
+static void print_quoted_string(const string_t *const string)
 {
        fputc('"', out);
-       for(const char *c = string; *c != '\0'; ++c) {
+       for (const char *c = string->begin, *const end = c + string->size; c != end; ++c) {
                switch(*c) {
                case '\"':  fputs("\\\"", out); break;
                case '\\':  fputs("\\\\", out); break;
@@ -59,7 +59,7 @@ static void print_quoted_string(const char *string)
                case '\?':  fputs("\\?", out); break;
                default:
                        if(!isprint(*c)) {
-                               fprintf(out, "\\x%x", *c);
+                               fprintf(out, "\\%03o", *c);
                                break;
                        }
                        fputc(*c, out);
@@ -72,7 +72,7 @@ static void print_quoted_string(const char *string)
 static void print_string_literal(
                const string_literal_expression_t *string_literal)
 {
-       print_quoted_string(string_literal->value);
+       print_quoted_string(&string_literal->value);
 }
 
 static void print_wide_string_literal(
@@ -626,7 +626,7 @@ static void print_asm_constraints(asm_constraint_t *constraints)
                if(constraint->symbol) {
                        fprintf(out, "[%s] ", constraint->symbol->string);
                }
-               print_quoted_string(constraint->constraints);
+               print_quoted_string(&constraint->constraints);
                fputs(" (", out);
                print_expression(constraint->expression);
                fputs(")", out);
@@ -640,7 +640,7 @@ static void print_asm_clobbers(asm_clobber_t *clobbers)
                if(clobber != clobbers)
                        fputs(", ", out);
 
-               print_quoted_string(clobber->clobber);
+               print_quoted_string(&clobber->clobber);
        }
 }
 
@@ -651,7 +651,7 @@ static void print_asm_statement(const asm_statement_t *statement)
                fputs("volatile ", out);
        }
        fputs("(", out);
-       print_quoted_string(statement->asm_text);
+       print_quoted_string(&statement->asm_text);
        if(statement->inputs == NULL && statement->outputs == NULL
                        && statement->clobbers == NULL)
                goto end_of_print_asm_statement;
index 0c427b4..3bda245 100644 (file)
@@ -960,7 +960,7 @@ static ir_node *create_symconst(dbg_info *dbgi, ir_mode *mode,
 
 static ir_node *string_to_firm(const source_position_t *const src_pos,
                                const char *const id_prefix,
-                               const char *const string)
+                               const string_t *const value)
 {
        ir_type *const global_type = get_glob_type();
        ir_type *const type        = new_type_array(unique_ident("strtype"), 1,
@@ -976,7 +976,8 @@ static ir_node *string_to_firm(const source_position_t *const src_pos,
        ir_type *const elem_type = ir_type_const_char;
        ir_mode *const mode      = get_type_mode(elem_type);
 
-       const size_t slen = strlen(string) + 1;
+       const char* const string = value->begin;
+       const size_t      slen   = value->size;
 
        set_array_lower_bound_int(type, 0, 0);
        set_array_upper_bound_int(type, 0, slen);
@@ -998,7 +999,7 @@ static ir_node *string_literal_to_firm(
                const string_literal_expression_t* literal)
 {
        return string_to_firm(&literal->expression.source_position, "Lstr",
-                             literal->value);
+                             &literal->value);
 }
 
 static ir_node *wide_string_literal_to_firm(
@@ -2366,7 +2367,8 @@ static ir_node *function_name_to_firm(
                const source_position_t *const src_pos =
                        &expr->expression.source_position;
                const char *const name = current_function_decl->symbol->string;
-               current_function_name = string_to_firm(src_pos, "__func__", name);
+               const string_t string = { name, strlen(name) + 1 };
+               current_function_name = string_to_firm(src_pos, "__func__", &string);
        }
 
        return current_function_name;
@@ -2780,19 +2782,18 @@ static void create_initializer_string(initializer_string_t *initializer,
        entry.prev = last_entry;
        ++len;
 
-       ir_type    *irtype  = get_entity_type(entity);
-       size_t      arr_len = get_array_type_size(type);
-       const char *p       = initializer->string;
-       size_t      i       = 0;
-       for(i = 0; i < arr_len; ++i, ++p) {
+       ir_type    *const irtype  = get_entity_type(entity);
+       size_t            arr_len = get_array_type_size(type);
+       const char *const p       = initializer->string.begin;
+       if (initializer->string.size < arr_len) {
+               arr_len = initializer->string.size;
+       }
+       for (size_t i = 0; i < arr_len; ++i) {
                entry.v.array_index = i;
 
-               ir_node             *node = new_Const_long(mode_Bs, *p);
+               ir_node             *node = new_Const_long(mode_Bs, p[i]);
                compound_graph_path *path = create_compound_path(irtype, &entry, len);
                add_compound_ent_value_w_path(entity, node, path);
-
-               if(*p == '\0')
-                       break;
        }
 }
 
diff --git a/ast_t.h b/ast_t.h
index daceed6..e65c3c8 100644 (file)
--- a/ast_t.h
+++ b/ast_t.h
@@ -172,7 +172,7 @@ struct const_expression_t {
 
 struct string_literal_expression_t {
        expression_base_t  expression;
-       const char        *value;
+       string_t           value;
 };
 
 struct wide_string_literal_expression_t {
@@ -360,8 +360,8 @@ struct initializer_list_t {
 };
 
 struct initializer_string_t {
-       initializer_base_t  initializer;
-       const char         *string;
+       initializer_base_t initializer;
+       string_t           string;
 };
 
 struct initializer_wide_string_t {
@@ -526,20 +526,20 @@ struct for_statement_t {
 };
 
 struct asm_constraint_t {
-       const char       *constraints;
+       string_t          constraints;
        expression_t     *expression;
        symbol_t         *symbol;
        asm_constraint_t *next;
 };
 
 struct asm_clobber_t {
-       const char    *clobber;
+       string_t       clobber;
        asm_clobber_t *next;
 };
 
 struct asm_statement_t {
        statement_base_t  statement;
-       const char       *asm_text;
+       string_t          asm_text;
        asm_constraint_t *inputs;
        asm_constraint_t *outputs;
        asm_clobber_t    *clobbers;
diff --git a/lexer.c b/lexer.c
index 6a6a945..b00b8a2 100644 (file)
--- a/lexer.c
+++ b/lexer.c
@@ -670,28 +670,30 @@ static int parse_escape_sequence(void)
        }
 }
 
-const char *concat_strings(const char *s1, const char *s2)
+string_t concat_strings(const string_t *const s1, const string_t *const s2)
 {
-       size_t  len1   = strlen(s1);
-       size_t  len2   = strlen(s2);
+       const size_t len1 = s1->size - 1;
+       const size_t len2 = s2->size - 1;
 
-       char   *concat = obstack_alloc(&symbol_obstack, len1 + len2 + 1);
-       memcpy(concat, s1, len1);
-       memcpy(concat + len1, s2, len2 + 1);
+       char *const concat = obstack_alloc(&symbol_obstack, len1 + len2 + 1);
+       memcpy(concat, s1->begin, len1);
+       memcpy(concat + len1, s2->begin, len2 + 1);
 
+#if 0 /* TODO hash */
        const char *result = strset_insert(&stringset, concat);
        if(result != concat) {
                obstack_free(&symbol_obstack, concat);
        }
 
        return result;
+#else
+       return (string_t){ concat, len1 + len2 + 1 };
+#endif
 }
 
 static void parse_string_literal(void)
 {
-       unsigned    start_linenr = lexer_token.source_position.linenr;
-       char       *string;
-       const char *result;
+       const unsigned start_linenr = lexer_token.source_position.linenr;
 
        assert(c == '"');
        next_char();
@@ -728,16 +730,22 @@ end_of_string:
 
        /* add finishing 0 to the string */
        obstack_1grow(&symbol_obstack, '\0');
-       string = obstack_finish(&symbol_obstack);
+       const size_t      size   = (size_t)obstack_object_size(&symbol_obstack);
+       const char *const string = obstack_finish(&symbol_obstack);
 
+#if 0 /* TODO hash */
        /* check if there is already a copy of the string */
        result = strset_insert(&stringset, string);
        if(result != string) {
                obstack_free(&symbol_obstack, string);
        }
+#else
+       const char *const result = string;
+#endif
 
-       lexer_token.type     = T_STRING_LITERAL;
-       lexer_token.v.string = result;
+       lexer_token.type           = T_STRING_LITERAL;
+       lexer_token.v.string.begin = result;
+       lexer_token.v.string.size  = size;
 }
 
 static void parse_wide_character_constant(void)
@@ -989,7 +997,7 @@ static void parse_line_directive(void)
                next_pp_token();
        }
        if(pp_token.type == T_STRING_LITERAL) {
-               lexer_token.source_position.input_name = pp_token.v.string;
+               lexer_token.source_position.input_name = pp_token.v.string.begin;
                next_pp_token();
        }
 
diff --git a/lexer.h b/lexer.h
index 34d965d..35827e9 100644 (file)
--- a/lexer.h
+++ b/lexer.h
@@ -16,6 +16,6 @@ void exit_lexer(void);
 
 void lexer_open_stream(FILE *stream, const char *input_name);
 
-const char *concat_strings(const char *string1, const char *string2);
+string_t concat_strings(const string_t *s1, const string_t *s2);
 
 #endif
index 6e95e80..98354ff 100644 (file)
--- a/parser.c
+++ b/parser.c
@@ -868,15 +868,15 @@ static type_t *make_global_typedef(const char *name, type_t *type)
        return typedef_type;
 }
 
-static const char *parse_string_literals(void)
+static string_t parse_string_literals(void)
 {
        assert(token.type == T_STRING_LITERAL);
-       const char *result = token.v.string;
+       string_t result = token.v.string;
 
        next_token();
 
-       while(token.type == T_STRING_LITERAL) {
-               result = concat_strings(result, token.v.string);
+       while (token.type == T_STRING_LITERAL) {
+               result = concat_strings(&result, &token.v.string);
                next_token();
        }
 
@@ -979,13 +979,13 @@ static designator_t *parse_designation(void)
 #endif
 
 static initializer_t *initializer_from_string(array_type_t *type,
-                                              const char *string)
+                                              const string_t *const string)
 {
        /* TODO: check len vs. size of array type */
        (void) type;
 
        initializer_t *initializer = allocate_initializer_zero(INITIALIZER_STRING);
-       initializer->string.string = string;
+       initializer->string.string = *string;
 
        return initializer;
 }
@@ -1019,7 +1019,7 @@ static initializer_t *initializer_from_expression(type_t *type,
                                case EXPR_STRING_LITERAL:
                                        if (element_type->atomic.akind == ATOMIC_TYPE_CHAR) {
                                                return initializer_from_string(array_type,
-                                                       expression->string.value);
+                                                       &expression->string.value);
                                        }
 
                                case EXPR_WIDE_STRING_LITERAL: {
@@ -2472,7 +2472,7 @@ static void parse_init_declarator_rest(declaration_t *declaration)
 
                                case INITIALIZER_STRING: {
                                        initializer_string_t *const string = &initializer->string;
-                                       cnst->conste.v.int_value = strlen(string->string) + 1;
+                                       cnst->conste.v.int_value = string->string.size;
                                        break;
                                }
 
@@ -3284,7 +3284,6 @@ static expression_t *parse_function_keyword(void)
 
        expression->expression.kind     = EXPR_FUNCTION;
        expression->expression.datatype = type_string;
-       expression->value               = current_function->symbol->string;
 
        return (expression_t*) expression;
 }
@@ -3303,7 +3302,6 @@ static expression_t *parse_pretty_function_keyword(void)
 
        expression->expression.kind     = EXPR_PRETTY_FUNCTION;
        expression->expression.datatype = type_string;
-       expression->value               = current_function->symbol->string;
 
        return (expression_t*) expression;
 }
index 1df0b5b..8e2b083 100644 (file)
@@ -5,12 +5,10 @@
 
 typedef wchar_t wchar_rep_t;
 
-#if 0 /* TODO */
 typedef struct string_t {
        const char *begin;
-       const char *end;
+       size_t      size;
 } string_t;
-#endif
 
 typedef struct wide_string_t {
        const wchar_rep_t *begin;
diff --git a/token.c b/token.c
index c8aa47d..c2d5345 100644 (file)
--- a/token.c
+++ b/token.c
@@ -98,7 +98,7 @@ void print_token(FILE *f, const token_t *token)
                fprintf(f, "floatingpointer number %LF", token->v.floatvalue);
                break;
        case T_STRING_LITERAL:
-               fprintf(f, "string '%s'", token->v.string);
+               fprintf(f, "string '%s'", token->v.string.begin); /* TODO suboptimal */
                break;
        default:
                print_token_type(f, (token_type_t)token->type);
index 317cc3b..c8e0e10 100644 (file)
--- a/token_t.h
+++ b/token_t.h
@@ -41,7 +41,7 @@ typedef struct {
                symbol_t      *symbol;
                long long      intvalue;
                long double    floatvalue;
-               const char    *string;
+               string_t       string;
                wide_string_t  wide_string;
        } v;
        type_t            *datatype;