From 114805b7248d659ba51f411951c60b6e7849a071 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Tue, 27 Feb 2007 15:23:31 +0000 Subject: [PATCH] fix SSE2 float calls + returns --- ir/be/ia32/ia32_emitter.c | 20 ++++ ir/be/ia32/ia32_spec.pl | 29 ++--- ir/be/ia32/ia32_transform.c | 216 +++++++++++++++++------------------- 3 files changed, 135 insertions(+), 130 deletions(-) diff --git a/ir/be/ia32/ia32_emitter.c b/ir/be/ia32/ia32_emitter.c index c5ec338d1..93007645b 100644 --- a/ir/be/ia32/ia32_emitter.c +++ b/ir/be/ia32/ia32_emitter.c @@ -382,6 +382,25 @@ void ia32_emit_x87_mode_suffix(ia32_emit_env_t *env, const ir_node *node) ia32_emit_mode_suffix(env, mode); } +void ia32_emit_xmm_mode_suffix(ia32_emit_env_t *env, const ir_node *node) +{ + ir_mode *mode = get_ia32_ls_mode(node); + ia32_emit_char(env, 's'); + if(mode != NULL) { + assert(mode_is_float(mode)); + switch(get_mode_size_bits(mode)) { + case 32: + ia32_emit_char(env, 's'); + break; + case 64: + ia32_emit_char(env, 'd'); + break; + default: + assert(0); + } + } +} + void ia32_emit_extend_suffix(ia32_emit_env_t *env, const ir_mode *mode) { if(get_mode_size_bits(mode) == 32) @@ -1436,6 +1455,7 @@ static void emit_ia32_Conv_with_FP(ia32_emit_env_t *env, const ir_node *node) { ia32_emit_cstring(env, "ss2sd"); } } + ia32_emit_char(env, ' '); switch(get_ia32_op_type(node)) { case ia32_Normal: diff --git a/ir/be/ia32/ia32_spec.pl b/ir/be/ia32/ia32_spec.pl index 116c9b376..af4fc4870 100644 --- a/ir/be/ia32/ia32_spec.pl +++ b/ir/be/ia32/ia32_spec.pl @@ -200,6 +200,7 @@ $arch = "ia32"; ${arch}_emit_mode_suffix(env, get_ia32_ls_mode(node));", "M" => "${arch}_emit_mode_suffix(env, get_ia32_ls_mode(node));", "XM" => "${arch}_emit_x87_mode_suffix(env, node);", + "XXM" => "${arch}_emit_xmm_mode_suffix(env, node);", "AM" => "${arch}_emit_am(env, node);", "unop" => "${arch}_emit_unop(env, node);", "binop" => "${arch}_emit_binop(env, node);", @@ -938,7 +939,7 @@ if (get_ia32_immop_type(node) == ia32_ImmNone) { "irn_flags" => "R", "comment" => "construct SSE Add: Add(a, b) = Add(b, a) = a + b", "reg_req" => { "in" => [ "gp", "gp", "xmm", "xmm", "none" ], "out" => [ "in_r3" ] }, - "emit" => '. adds%M %binop', + "emit" => '. add%XXM %binop', "latency" => 4, "units" => [ "SSE" ], "mode" => "mode_E", @@ -948,7 +949,7 @@ if (get_ia32_immop_type(node) == ia32_ImmNone) { "irn_flags" => "R", "comment" => "construct SSE Mul: Mul(a, b) = Mul(b, a) = a * b", "reg_req" => { "in" => [ "gp", "gp", "xmm", "xmm", "none" ], "out" => [ "in_r3" ] }, - "emit" => '. muls%M %binop', + "emit" => '. mul%XXM %binop', "latency" => 4, "units" => [ "SSE" ], "mode" => "mode_E", @@ -958,7 +959,7 @@ if (get_ia32_immop_type(node) == ia32_ImmNone) { "irn_flags" => "R", "comment" => "construct SSE Max: Max(a, b) = Max(b, a) = a > b ? a : b", "reg_req" => { "in" => [ "gp", "gp", "xmm", "xmm", "none" ], "out" => [ "in_r3" ] }, - "emit" => '. maxs%M %binop', + "emit" => '. max%XXM %binop', "latency" => 2, "units" => [ "SSE" ], "mode" => "mode_E", @@ -968,7 +969,7 @@ if (get_ia32_immop_type(node) == ia32_ImmNone) { "irn_flags" => "R", "comment" => "construct SSE Min: Min(a, b) = Min(b, a) = a < b ? a : b", "reg_req" => { "in" => [ "gp", "gp", "xmm", "xmm", "none" ], "out" => [ "in_r3" ] }, - "emit" => '. mins%M %binop', + "emit" => '. min%XXM %binop', "latency" => 2, "units" => [ "SSE" ], "mode" => "mode_E", @@ -978,7 +979,7 @@ if (get_ia32_immop_type(node) == ia32_ImmNone) { "irn_flags" => "R", "comment" => "construct SSE And: And(a, b) = a AND b", "reg_req" => { "in" => [ "gp", "gp", "xmm", "xmm", "none" ], "out" => [ "in_r3" ] }, - "emit" => '. andp%M %binop', + "emit" => '. andp%XXM %binop', "latency" => 3, "units" => [ "SSE" ], "mode" => "mode_E", @@ -988,7 +989,7 @@ if (get_ia32_immop_type(node) == ia32_ImmNone) { "irn_flags" => "R", "comment" => "construct SSE Or: Or(a, b) = a OR b", "reg_req" => { "in" => [ "gp", "gp", "xmm", "xmm", "none" ], "out" => [ "in_r3" ] }, - "emit" => '. orp%M %binop', + "emit" => '. orp%XXM %binop', "units" => [ "SSE" ], "mode" => "mode_E", }, @@ -997,7 +998,7 @@ if (get_ia32_immop_type(node) == ia32_ImmNone) { "irn_flags" => "R", "comment" => "construct SSE Xor: Xor(a, b) = a XOR b", "reg_req" => { "in" => [ "gp", "gp", "xmm", "xmm", "none" ], "out" => [ "in_r3" ] }, - "emit" => '. xorp%M %binop', + "emit" => '. xorp%XXM %binop', "latency" => 3, "units" => [ "SSE" ], "mode" => "mode_E", @@ -1009,7 +1010,7 @@ if (get_ia32_immop_type(node) == ia32_ImmNone) { "irn_flags" => "R", "comment" => "construct SSE AndNot: AndNot(a, b) = a AND NOT b", "reg_req" => { "in" => [ "gp", "gp", "xmm", "xmm", "none" ], "out" => [ "in_r3 !in_r4" ] }, - "emit" => '. andnp%M %binop', + "emit" => '. andnp%XXM %binop', "latency" => 3, "units" => [ "SSE" ], "mode" => "mode_E", @@ -1019,7 +1020,7 @@ if (get_ia32_immop_type(node) == ia32_ImmNone) { "irn_flags" => "R", "comment" => "construct SSE Sub: Sub(a, b) = a - b", "reg_req" => { "in" => [ "gp", "gp", "xmm", "xmm", "none" ], "out" => [ "in_r3" ] }, - "emit" => '. subs%M %binop', + "emit" => '. sub%XXM %binop', "latency" => 4, "units" => [ "SSE" ], "mode" => "mode_E", @@ -1030,7 +1031,7 @@ if (get_ia32_immop_type(node) == ia32_ImmNone) { "comment" => "construct SSE Div: Div(a, b) = a / b", "reg_req" => { "in" => [ "gp", "gp", "xmm", "xmm", "none" ], "out" => [ "in_r3 !in_r4" ] }, "outs" => [ "res", "M" ], - "emit" => '. divs%M %binop', + "emit" => '. div%XXM %binop', "latency" => 16, "units" => [ "SSE" ], }, @@ -1060,7 +1061,7 @@ if (get_ia32_immop_type(node) == ia32_ImmNone) { "irn_flags" => "R", "comment" => "represents a SSE constant", "reg_req" => { "out" => [ "xmm" ] }, - "emit" => '. movs%M %D1, $%C', + "emit" => '. mov%XXM $%C, %D1', "latency" => 2, "units" => [ "SSE" ], "mode" => "mode_E", @@ -1073,7 +1074,7 @@ if (get_ia32_immop_type(node) == ia32_ImmNone) { "state" => "exc_pinned", "comment" => "construct SSE Load: Load(ptr, mem) = LD ptr", "reg_req" => { "in" => [ "gp", "gp", "none" ], "out" => [ "xmm", "none" ] }, - "emit" => '. movs%M %D1, %AM', + "emit" => '. mov%XXM %AM, %D1', "outs" => [ "res", "M" ], "latency" => 2, "units" => [ "SSE" ], @@ -1084,7 +1085,7 @@ if (get_ia32_immop_type(node) == ia32_ImmNone) { "state" => "exc_pinned", "comment" => "construct Store: Store(ptr, val, mem) = ST ptr,val", "reg_req" => { "in" => [ "gp", "gp", "xmm", "none" ] }, - "emit" => '. movs%M %binop', + "emit" => '. mov%XXM %binop', "latency" => 2, "units" => [ "SSE" ], "mode" => "mode_M", @@ -1095,7 +1096,7 @@ if (get_ia32_immop_type(node) == ia32_ImmNone) { "state" => "exc_pinned", "comment" => "construct Store without index: Store(ptr, val, mem) = ST ptr,val", "reg_req" => { "in" => [ "gp", "xmm", "none" ] }, - "emit" => '. movs%M %AM, %S2', + "emit" => '. mov%XXM %S2, %AM', "latency" => 2, "units" => [ "SSE" ], "mode" => "mode_M", diff --git a/ir/be/ia32/ia32_transform.c b/ir/be/ia32/ia32_transform.c index 434d6dd79..be2b26234 100644 --- a/ir/be/ia32/ia32_transform.c +++ b/ir/be/ia32/ia32_transform.c @@ -2567,105 +2567,6 @@ static ir_node *gen_be_FrameStore(ia32_transform_env_t *env, ir_node *node) { return new_op; } -/** - * In case SSE is used we need to copy the result from FPU TOS. - */ -static ir_node *gen_be_Call(ia32_transform_env_t *env, ir_node *node) { - ir_graph *irg = env->irg; - dbg_info *dbg = get_irn_dbg_info(node); - ir_node *block = transform_node(env, get_nodes_block(node)); - ir_node *call_res = be_get_Proj_for_pn(node, pn_be_Call_first_res); - ir_node *call_mem = be_get_Proj_for_pn(node, pn_be_Call_M_regular); - ir_mode *mode; - ir_node *nomem = new_NoMem(); - ir_node *noreg = ia32_new_NoReg_gp(env->cg); - - if (! call_res || ! USE_SSE2(env->cg)) { - return duplicate_node(env, node); - } - - mode = get_irn_mode(call_res); - - /* in case there is no memory output: create one to serialize the copy FPU -> SSE */ - if (call_mem == NULL) - call_mem = new_rd_Proj(dbg, irg, block, node, mode_M, pn_be_Call_M_regular); - - if (mode_is_float(mode)) { - /* store st(0) onto stack */ - ir_node *frame = get_irg_frame(irg); - ir_node *fstp = new_rd_ia32_GetST0(dbg, irg, block, frame, noreg, nomem); - ir_entity *ent = frame_alloc_area(get_irg_frame_type(irg), get_mode_size_bytes(mode), 16, 0); - ir_node *sse_load, *p, *bad, *keep; - ir_node *mproj; - ir_node **in_keep; - int keep_arity, i; - - // Matze: TODO, fix this for new transform code... - assert(0); - - set_ia32_ls_mode(fstp, mode); - set_ia32_op_type(fstp, ia32_AddrModeD); - set_ia32_use_frame(fstp); - set_ia32_frame_ent(fstp, ent); - set_ia32_am_flavour(fstp, ia32_am_B); - set_ia32_am_support(fstp, ia32_am_Dest); - - /* load into SSE register */ - sse_load = new_rd_ia32_xLoad(dbg, irg, block, frame, ia32_new_NoReg_gp(env->cg), fstp); - set_ia32_ls_mode(sse_load, mode); - set_ia32_op_type(sse_load, ia32_AddrModeS); - set_ia32_use_frame(sse_load); - set_ia32_frame_ent(sse_load, ent); - set_ia32_am_flavour(sse_load, ia32_am_B); - set_ia32_am_support(sse_load, ia32_am_Source); - mproj = new_rd_Proj(dbg, irg, block, sse_load, mode_M, pn_ia32_xLoad_M); - sse_load = new_rd_Proj(dbg, irg, block, sse_load, mode, pn_ia32_xLoad_res); - - /* reroute all users of the result proj to the sse load */ - edges_reroute(call_res, sse_load, irg); - edges_reroute_kind(call_res, sse_load, EDGE_KIND_DEP, irg); - - /* reroute all users of the old call memory to the sse load memory */ - edges_reroute(call_mem, mproj, irg); - edges_reroute_kind(call_mem, mproj, EDGE_KIND_DEP, irg); - - /* now, we can set the old call mem as input of GetST0 */ - set_irn_n(fstp, 1, call_mem); - - /* now: create new Keep whith all former ins and one additional in - the result Proj */ - - /* get a Proj representing a caller save register */ - p = be_get_Proj_for_pn(node, pn_be_Call_first_res + 1); - assert(is_Proj(p) && "Proj expected."); - - /* user of the the proj is the Keep */ - p = get_edge_src_irn(get_irn_out_edge_first(p)); - assert(be_is_Keep(p) && "Keep expected."); - - /* copy in array of the old keep and set the result proj as additional in */ - keep_arity = get_irn_arity(p) + 1; - NEW_ARR_A(ir_node *, in_keep, keep_arity); - in_keep[keep_arity - 1] = call_res; - for (i = 0; i < keep_arity - 1; ++i) - in_keep[i] = get_irn_n(p, i); - - /* create new keep and set the in class requirements properly */ - keep = be_new_Keep(NULL, irg, block, keep_arity, in_keep); - for(i = 0; i < keep_arity; ++i) { - const arch_register_class_t *cls = arch_get_irn_reg_class(env->cg->arch_env, in_keep[i], -1); - be_node_set_reg_class(keep, i, cls); - } - - /* kill the old keep */ - bad = get_irg_bad(irg); - for (i = 0; i < keep_arity - 1; i++) - set_irn_n(p, i, bad); - remove_End_keepalive(get_irg_end(irg), p); - } - - return duplicate_node(env, node); -} - /** * In case SSE is used we need to copy the result from XMM0 to FPU TOS before return. */ @@ -2680,7 +2581,7 @@ static ir_node *gen_be_Return(ia32_transform_env_t *env, ir_node *node) { ir_type *res_type; ir_mode *mode; ir_node *frame, *sse_store, *fld, *mproj, *barrier; - ir_node *new_barrier, *new_frame, *new_ret_val, *new_ret_mem; + ir_node *new_barrier, *new_ret_val, *new_ret_mem; ir_node **in; int pn_ret_val, pn_ret_mem, arity, i; @@ -2701,7 +2602,6 @@ static ir_node *gen_be_Return(ia32_transform_env_t *env, ir_node *node) { } assert(get_method_n_ress(tp) == 1); - mode = mode_E; pn_ret_val = get_Proj_proj(ret_val); pn_ret_mem = get_Proj_proj(ret_mem); @@ -2718,13 +2618,12 @@ static ir_node *gen_be_Return(ia32_transform_env_t *env, ir_node *node) { new_ret_mem = transform_node(env, ret_mem); frame = get_irg_frame(irg); - new_frame = transform_node(env, frame); dbg = get_irn_dbg_info(barrier); block = transform_node(env, get_nodes_block(barrier)); /* store xmm0 onto stack */ - sse_store = new_rd_ia32_xStoreSimple(dbg, irg, block, new_frame, new_ret_val, new_ret_mem); + sse_store = new_rd_ia32_xStoreSimple(dbg, irg, block, frame, new_ret_val, new_ret_mem); set_ia32_ls_mode(sse_store, mode); set_ia32_op_type(sse_store, ia32_AddrModeD); set_ia32_use_frame(sse_store); @@ -2732,7 +2631,7 @@ static ir_node *gen_be_Return(ia32_transform_env_t *env, ir_node *node) { set_ia32_am_support(sse_store, ia32_am_Dest); /* load into st0 */ - fld = new_rd_ia32_SetST0(dbg, irg, block, new_frame, sse_store); + fld = new_rd_ia32_SetST0(dbg, irg, block, frame, sse_store); set_ia32_ls_mode(fld, mode); set_ia32_op_type(fld, ia32_AddrModeS); set_ia32_use_frame(fld); @@ -3613,7 +3512,8 @@ static ir_node *gen_Proj_Quot(ia32_transform_env_t *env, ir_node *node) static ir_node *gen_Proj_tls(ia32_transform_env_t *env, ir_node *node) { ir_graph *irg = env->irg; - dbg_info *dbg = get_irn_dbg_info(node); + //dbg_info *dbg = get_irn_dbg_info(node); + dbg_info *dbg = NULL; ir_node *block = transform_node(env, get_nodes_block(node)); ir_node *res = new_rd_ia32_LdTls(dbg, irg, block, mode_Iu); @@ -3624,11 +3524,87 @@ static ir_node *gen_Proj_tls(ia32_transform_env_t *env, ir_node *node) { static ir_node *gen_Proj_be_Call(ia32_transform_env_t *env, ir_node *node) { ir_graph *irg = env->irg; dbg_info *dbg = get_irn_dbg_info(node); - ir_node *pred = get_Proj_pred(node); long proj = get_Proj_proj(node); ir_mode *mode = get_irn_mode(node); ir_node *block = transform_node(env, get_nodes_block(node)); - ir_node *new_pred = transform_node(env, pred); + ir_node *sse_load; + ir_node *call = get_Proj_pred(node); + ir_node *new_call = transform_node(env, call); + + /* The following is kinda tricky: If we're using SSE, then we have to + * move the result value of the call in floating point registers to an + * xmm register, we therefore construct a GetST0 -> xLoad sequence + * after the call, we have to make sure to correctly make the + * MemProj and the result Proj use these 2 nodes + */ + if(proj == pn_be_Call_M_regular) { + // get new node for result, are we doing the sse load/store hack? + ir_node *call_res = be_get_Proj_for_pn(call, pn_be_Call_first_res); + ir_node *call_res_new; + ir_node *call_res_pred = NULL; + + if(call_res != NULL) { + call_res_new = transform_node(env, call_res); + call_res_pred = get_Proj_pred(call_res_new); + } + + if(call_res_pred == NULL || be_is_Call(call_res_pred)) { + return new_rd_Proj(dbg, irg, block, new_call, mode_M, pn_be_Call_M_regular); + } else { + assert(is_ia32_xLoad(call_res_pred)); + return new_rd_Proj(dbg, irg, block, call_res_pred, mode_M, pn_ia32_xLoad_M); + } + } + if(proj == pn_be_Call_first_res && mode_is_float(mode) + && USE_SSE2(env->cg)) { + ir_node *fstp; + ir_node *frame = get_irg_frame(irg); + ir_node *noreg = ia32_new_NoReg_gp(env->cg); + ir_node *p; + ir_node *call_mem = be_get_Proj_for_pn(call, pn_be_Call_M_regular); + ir_node *keepin[1]; + const arch_register_class_t *cls; + + /* in case there is no memory output: create one to serialize the copy FPU -> SSE */ + call_mem = new_rd_Proj(dbg, irg, block, new_call, mode_M, pn_be_Call_M_regular); + + /* store st(0) onto stack */ + fstp = new_rd_ia32_GetST0(dbg, irg, block, frame, noreg, call_mem); + + set_ia32_ls_mode(fstp, mode); + set_ia32_op_type(fstp, ia32_AddrModeD); + set_ia32_use_frame(fstp); + set_ia32_am_flavour(fstp, ia32_am_B); + set_ia32_am_support(fstp, ia32_am_Dest); + + /* load into SSE register */ + sse_load = new_rd_ia32_xLoad(dbg, irg, block, frame, noreg, fstp); + set_ia32_ls_mode(sse_load, mode); + set_ia32_op_type(sse_load, ia32_AddrModeS); + set_ia32_use_frame(sse_load); + set_ia32_am_flavour(sse_load, ia32_am_B); + set_ia32_am_support(sse_load, ia32_am_Source); + + //mproj = new_rd_Proj(dbg, irg, block, sse_load, mode_M, pn_ia32_xLoad_M); + sse_load = new_rd_Proj(dbg, irg, block, sse_load, mode_E, pn_ia32_xLoad_res); + + /* now: create new Keep whith all former ins and one additional in - the result Proj */ + + /* get a Proj representing a caller save register */ + p = be_get_Proj_for_pn(call, pn_be_Call_first_res + 1); + assert(is_Proj(p) && "Proj expected."); + + /* user of the the proj is the Keep */ + p = get_edge_src_irn(get_irn_out_edge_first(p)); + assert(be_is_Keep(p) && "Keep expected."); + + /* keep the result */ + cls = arch_get_irn_reg_class(env->cg->arch_env, sse_load, -1); + keepin[0] = sse_load; + be_new_Keep(cls, irg, block, 1, keepin); + + return sse_load; + } /* transform call modes to the mode_Iu or mode_E */ if(mode_is_float(mode)) { @@ -3637,7 +3613,7 @@ static ir_node *gen_Proj_be_Call(ia32_transform_env_t *env, ir_node *node) { mode = mode_Iu; } - return new_rd_Proj(dbg, irg, block, new_pred, mode, proj); + return new_rd_Proj(dbg, irg, block, new_call, mode, proj); } static ir_node *gen_Proj(ia32_transform_env_t *env, ir_node *node) { @@ -3782,7 +3758,7 @@ static void register_transformers(void) { /* handle generic backend nodes */ GEN(be_FrameAddr); - GEN(be_Call); + //GEN(be_Call); GEN(be_Return); GEN(be_FrameLoad); GEN(be_FrameStore); @@ -3830,19 +3806,27 @@ static ir_node *duplicate_node(ia32_transform_env_t *env, ir_node *node) ir_node *block; ir_node *new_node; int i, arity; - ir_node **ins; block = transform_node(env, get_nodes_block(node)); arity = get_irn_arity(node); - ins = alloca(arity * sizeof(ins[0])); - for(i = 0; i < arity; ++i) { - ir_node *in = get_irn_n(node, i); - ins[i] = transform_node(env, in); + if(op->opar == oparity_dynamic) { + new_node = new_ir_node(dbg, irg, block, op, mode, -1, NULL); + for(i = 0; i < arity; ++i) { + ir_node *in = get_irn_n(node, i); + in = transform_node(env, in); + add_irn_n(new_node, in); + } + } else { + ir_node **ins = alloca(arity * sizeof(ins[0])); + for(i = 0; i < arity; ++i) { + ir_node *in = get_irn_n(node, i); + ins[i] = transform_node(env, in); + } + + new_node = new_ir_node(dbg, irg, block, op, mode, arity, ins); } - new_node = new_ir_node(dbg, irg, block, - op, mode, arity, ins); copy_node_attr(node, new_node); duplicate_deps(env, node, new_node); -- 2.20.1