X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fbe%2Fia32%2Fia32_transform.c;h=c8b310dd33b130463c1d15d84264c845b9b54bfe;hb=68d3f5c9bed0971130a8150edafb5e5372c14509;hp=7c8d69ba07564c0cec2f22f251fc072183fbe4e3;hpb=10511be2e42aaa9ee8ac636c0ba8379164a812f3;p=libfirm diff --git a/ir/be/ia32/ia32_transform.c b/ir/be/ia32/ia32_transform.c index 7c8d69ba0..c8b310dd3 100644 --- a/ir/be/ia32/ia32_transform.c +++ b/ir/be/ia32/ia32_transform.c @@ -90,8 +90,6 @@ DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;) static ir_node *initial_fpcw = NULL; -extern ir_op *get_op_Mulh(void); - typedef ir_node *construct_binop_func(dbg_info *db, ir_node *block, ir_node *base, ir_node *index, ir_node *mem, ir_node *op1, ir_node *op2); @@ -768,7 +766,7 @@ static void match_arguments(ia32_address_mode_t *am, ir_node *block, assert(use_am || !(flags & match_16bit_am)); if ((mode_bits == 8 && !(flags & match_8bit_am)) || - (mode_bits == 16 && !(flags & match_16bit_am))) { + (mode_bits == 16 && !(flags & match_16bit_am))) { use_am = 0; } @@ -2947,7 +2945,6 @@ static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block, int ins_permuted) { ir_node *noreg = ia32_new_NoReg_gp(env_cg); - ir_node *nomem = new_NoMem(); ir_mode *mode = get_irn_mode(orig_node); ir_node *new_node; @@ -2956,6 +2953,7 @@ static ir_node *create_set_32bit(dbg_info *dbgi, ir_node *new_block, /* we might need to conv the result up */ if (get_mode_size_bits(mode) > 8) { + ir_node *nomem = new_NoMem(); new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg, noreg, nomem, new_node, mode_Bu); SET_IA32_ORIG_NODE(new_node, orig_node); @@ -4520,6 +4518,464 @@ static ir_node *gen_be_Call(ir_node *node) return call; } +/** + * Transform Builtin return_address + */ +static ir_node *gen_return_address(ir_node *node) { + ir_node *param = get_Builtin_param(node, 0); + ir_node *frame = get_Builtin_param(node, 1); + dbg_info *dbgi = get_irn_dbg_info(node); + tarval *tv = get_Const_tarval(param); + unsigned long value = get_tarval_long(tv); + + ir_node *block = be_transform_node(get_nodes_block(node)); + ir_node *ptr = be_transform_node(frame); + ir_node *noreg = ia32_new_NoReg_gp(env_cg); + ir_node *load; + + if (value > 0) { + ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block); + ir_node *res = new_bd_ia32_ProduceVal(dbgi, block); + ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value); + } + + /* load the return address from this frame */ + load = new_bd_ia32_Load(dbgi, block, ptr, noreg, get_irg_no_mem(current_ir_graph)); + + set_irn_pinned(load, get_irn_pinned(node)); + set_ia32_op_type(load, ia32_AddrModeS); + set_ia32_ls_mode(load, mode_Iu); + + set_ia32_am_offs_int(load, 0); + set_ia32_use_frame(load); + set_ia32_frame_ent(load, ia32_get_return_address_entity()); + + if (get_irn_pinned(node) == op_pin_state_floats) { + assert(pn_ia32_xLoad_res == pn_ia32_vfld_res + && pn_ia32_vfld_res == pn_ia32_Load_res + && pn_ia32_Load_res == pn_ia32_res); + arch_irn_add_flags(load, arch_irn_flags_rematerializable); + } + + SET_IA32_ORIG_NODE(load, node); + return new_r_Proj(current_ir_graph, block, load, mode_Iu, pn_ia32_Load_res); +} + +/** + * Transform Builtin frame_address + */ +static ir_node *gen_frame_address(ir_node *node) { + ir_node *param = get_Builtin_param(node, 0); + ir_node *frame = get_Builtin_param(node, 1); + dbg_info *dbgi = get_irn_dbg_info(node); + tarval *tv = get_Const_tarval(param); + unsigned long value = get_tarval_long(tv); + + ir_node *block = be_transform_node(get_nodes_block(node)); + ir_node *ptr = be_transform_node(frame); + ir_node *noreg = ia32_new_NoReg_gp(env_cg); + ir_node *load; + ir_entity *ent; + + if (value > 0) { + ir_node *cnt = new_bd_ia32_ProduceVal(dbgi, block); + ir_node *res = new_bd_ia32_ProduceVal(dbgi, block); + ptr = new_bd_ia32_ClimbFrame(dbgi, block, ptr, cnt, res, value); + } + + /* load the return address from this frame */ + load = new_bd_ia32_Load(dbgi, block, ptr, noreg, get_irg_no_mem(current_ir_graph)); + + set_irn_pinned(load, get_irn_pinned(node)); + set_ia32_op_type(load, ia32_AddrModeS); + set_ia32_ls_mode(load, mode_Iu); + + ent = ia32_get_frame_address_entity(); + if (ent != NULL) { + set_ia32_am_offs_int(load, 0); + set_ia32_use_frame(load); + set_ia32_frame_ent(load, ent); + } else { + /* will fail anyway, but gcc does this: */ + set_ia32_am_offs_int(load, 0); + } + + if (get_irn_pinned(node) == op_pin_state_floats) { + assert(pn_ia32_xLoad_res == pn_ia32_vfld_res + && pn_ia32_vfld_res == pn_ia32_Load_res + && pn_ia32_Load_res == pn_ia32_res); + arch_irn_add_flags(load, arch_irn_flags_rematerializable); + } + + SET_IA32_ORIG_NODE(load, node); + return new_r_Proj(current_ir_graph, block, load, mode_Iu, pn_ia32_Load_res); +} + +/** + * Transform Builtin frame_address + */ +static ir_node *gen_prefetch(ir_node *node) { + dbg_info *dbgi; + ir_node *ptr, *block, *mem, *noreg, *base, *index; + ir_node *param, *new_node; + long rw, locality; + tarval *tv; + ia32_address_t addr; + + if (!ia32_cg_config.use_sse_prefetch && !ia32_cg_config.use_3dnow_prefetch) { + /* no prefetch at all, route memory */ + return be_transform_node(get_Builtin_mem(node)); + } + + param = get_Builtin_param(node, 1); + tv = get_Const_tarval(param); + rw = get_tarval_long(tv); + + /* construct load address */ + memset(&addr, 0, sizeof(addr)); + ptr = get_Builtin_param(node, 0); + ia32_create_address_mode(&addr, ptr, 0); + base = addr.base; + index = addr.index; + + noreg = ia32_new_NoReg_gp(env_cg); + if (base == NULL) { + base = noreg; + } else { + base = be_transform_node(base); + } + + if (index == NULL) { + index = noreg; + } else { + index = be_transform_node(index); + } + + dbgi = get_irn_dbg_info(node); + block = be_transform_node(get_nodes_block(node)); + mem = be_transform_node(get_Builtin_mem(node)); + + if (rw == 1 && ia32_cg_config.use_3dnow_prefetch) { + /* we have 3DNow!, this was already checked above */ + new_node = new_bd_ia32_PrefetchW(dbgi, block, base, index, mem); + } else if (ia32_cg_config.use_sse_prefetch) { + /* note: rw == 1 is IGNORED in that case */ + param = get_Builtin_param(node, 2); + tv = get_Const_tarval(param); + locality = get_tarval_long(tv); + + /* SSE style prefetch */ + switch (locality) { + case 0: + new_node = new_bd_ia32_PrefetchNTA(dbgi, block, base, index, mem); + break; + case 1: + new_node = new_bd_ia32_Prefetch2(dbgi, block, base, index, mem); + break; + case 2: + new_node = new_bd_ia32_Prefetch1(dbgi, block, base, index, mem); + break; + default: + new_node = new_bd_ia32_Prefetch0(dbgi, block, base, index, mem); + break; + } + } else { + assert(ia32_cg_config.use_3dnow_prefetch); + /* 3DNow! style prefetch */ + new_node = new_bd_ia32_Prefetch(dbgi, block, base, index, mem); + } + + set_irn_pinned(new_node, get_irn_pinned(node)); + set_ia32_op_type(new_node, ia32_AddrModeS); + set_ia32_ls_mode(new_node, mode_Bu); + set_address(new_node, &addr); + + SET_IA32_ORIG_NODE(new_node, node); + + be_dep_on_frame(new_node); + return new_r_Proj(current_ir_graph, block, new_node, mode_M, pn_ia32_Prefetch_M); +} + +/** + * Transform ... + */ +static ir_node *gen_unop_dest(ir_node *node, construct_binop_dest_func *func) { + ir_node *param = get_Builtin_param(node, 0); + dbg_info *dbgi = get_irn_dbg_info(node); + + ir_node *block = get_nodes_block(node); + ir_node *new_block = be_transform_node(block); + + ia32_address_mode_t am; + ia32_address_t *addr = &am.addr; + ir_node *cnt; + + match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am); + + cnt = (*func)(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2); + set_am_attributes(cnt, &am); + set_ia32_ls_mode(cnt, get_irn_mode(param)); + + SET_IA32_ORIG_NODE(cnt, node); + return fix_mem_proj(cnt, &am); +} + +/** + * Transform builtin ffs. + */ +static ir_node *gen_ffs(ir_node *node) { + ir_node *bsf = gen_unop_dest(node, new_bd_ia32_Bsf); + ir_node *real = skip_Proj(bsf); + dbg_info *dbgi = get_irn_dbg_info(real); + ir_node *block = get_nodes_block(real); + ir_node *imm = create_Immediate(NULL, 0, 31); + ir_node *noreg = ia32_new_NoReg_gp(env_cg); + ir_node *nomem = new_NoMem(); + ir_node *flag, *set, *conv, *neg, *or; + + /* bsf x */ + if (get_irn_mode(real) != mode_T) { + set_irn_mode(real, mode_T); + bsf = new_r_Proj(current_ir_graph, block, real, mode_Iu, pn_ia32_res); + } + + flag = new_r_Proj(current_ir_graph, block, real, mode_b, pn_ia32_flags); + + /* sete */ + set = new_bd_ia32_Set(dbgi, block, flag, pn_Cmp_Eq, 0); + SET_IA32_ORIG_NODE(set, node); + + /* conv to 32bit */ + conv = new_bd_ia32_Conv_I2I8Bit(dbgi, block, noreg, noreg, nomem, set, mode_Bu); + SET_IA32_ORIG_NODE(conv, node); + + /* neg */ + neg = new_bd_ia32_Neg(dbgi, block, conv); + + /* or */ + or = new_bd_ia32_Or(dbgi, block, noreg, noreg, nomem, bsf, neg); + set_ia32_commutative(or); + + /* add 1 */ + return new_bd_ia32_Add(dbgi, block, noreg, noreg, nomem, or, create_Immediate(NULL, 0, 1)); +} + +/** + * Transform builtin clz. + */ +static ir_node *gen_clz(ir_node *node) { + ir_node *bsr = gen_unop_dest(node, new_bd_ia32_Bsr); + ir_node *real = skip_Proj(bsr); + dbg_info *dbgi = get_irn_dbg_info(real); + ir_node *block = get_nodes_block(real); + ir_node *imm = create_Immediate(NULL, 0, 31); + ir_node *noreg = ia32_new_NoReg_gp(env_cg); + + return new_bd_ia32_Xor(dbgi, block, noreg, noreg, new_NoMem(), bsr, imm); +} + +/** + * Transform builtin ctz. + */ +static ir_node *gen_ctz(ir_node *node) { + return gen_unop_dest(node, new_bd_ia32_Bsf); +} + +/** + * Transform builtin parity. + */ +static ir_node *gen_parity(ir_node *node) { + ir_node *param = get_Builtin_param(node, 0); + dbg_info *dbgi = get_irn_dbg_info(node); + + ir_node *block = get_nodes_block(node); + + ir_node *new_block = be_transform_node(block); + ir_node *noreg = ia32_new_NoReg_gp(env_cg); + ir_node *imm, *cmp, *new_node; + + ia32_address_mode_t am; + ia32_address_t *addr = &am.addr; + + + /* cmp param, 0 */ + match_arguments(&am, block, NULL, param, NULL, match_am); + imm = create_Immediate(NULL, 0, 0); + cmp = new_bd_ia32_Cmp(dbgi, new_block, addr->base, addr->index, + addr->mem, imm, am.new_op2, am.ins_permuted, 0); + set_am_attributes(cmp, &am); + set_ia32_ls_mode(cmp, mode_Iu); + + SET_IA32_ORIG_NODE(cmp, node); + + cmp = fix_mem_proj(cmp, &am); + + /* setp */ + new_node = new_bd_ia32_Set(dbgi, new_block, cmp, ia32_pn_Cmp_parity, 0); + SET_IA32_ORIG_NODE(new_node, node); + + /* conv to 32bit */ + new_node = new_bd_ia32_Conv_I2I8Bit(dbgi, new_block, noreg, noreg, + new_NoMem(), new_node, mode_Bu); + SET_IA32_ORIG_NODE(new_node, node); + return new_node; +} + +/** + * Transform builtin popcount + */ +static ir_node *gen_popcount(ir_node *node) { + ir_node *param = get_Builtin_param(node, 0); + dbg_info *dbgi = get_irn_dbg_info(node); + + ir_node *block = get_nodes_block(node); + ir_node *new_block = be_transform_node(block); + + ir_node *noreg, *nomem, *new_param; + ir_node *imm, *simm, *m1, *s1, *s2, *s3, *s4, *s5, *m2, *m3, *m4, *m5, *m6, *m7, *m8, *m9, *m10, *m11, *m12, *m13; + + /* check for SSE4.2 or SSE4a and use the popcnt instruction */ + if (ia32_cg_config.use_popcnt) { + ia32_address_mode_t am; + ia32_address_t *addr = &am.addr; + ir_node *cnt; + + match_arguments(&am, block, NULL, param, NULL, match_am | match_16bit_am); + + cnt = new_bd_ia32_Popcnt(dbgi, new_block, addr->base, addr->index, addr->mem, am.new_op2); + set_am_attributes(cnt, &am); + set_ia32_ls_mode(cnt, get_irn_mode(param)); + + SET_IA32_ORIG_NODE(cnt, node); + return fix_mem_proj(cnt, &am); + } + + noreg = ia32_new_NoReg_gp(env_cg); + nomem = new_NoMem(); + new_param = be_transform_node(param); + + /* do the standard popcount algo */ + + /* m1 = x & 0x55555555 */ + imm = create_Immediate(NULL, 0, 0x55555555); + m1 = new_bd_ia32_And(dbgi, new_block, noreg, noreg, nomem, new_param, imm); + + /* s1 = x >> 1 */ + simm = create_Immediate(NULL, 0, 1); + s1 = new_bd_ia32_Shl(dbgi, new_block, new_param, simm); + + /* m2 = s1 & 0x55555555 */ + m2 = new_bd_ia32_And(dbgi, new_block, noreg, noreg, nomem, s1, imm); + + /* m3 = m1 + m2 */ + m3 = new_bd_ia32_Lea(dbgi, new_block, m2, m1); + + /* m4 = m3 & 0x33333333 */ + imm = create_Immediate(NULL, 0, 0x33333333); + m4 = new_bd_ia32_And(dbgi, new_block, noreg, noreg, nomem, m3, imm); + + /* s2 = m3 >> 2 */ + simm = create_Immediate(NULL, 0, 2); + s2 = new_bd_ia32_Shl(dbgi, new_block, m3, simm); + + /* m5 = s2 & 0x33333333 */ + m5 = new_bd_ia32_And(dbgi, new_block, noreg, noreg, nomem, s2, imm); + + /* m6 = m4 + m5 */ + m6 = new_bd_ia32_Lea(dbgi, new_block, m4, m5); + + /* m7 = m6 & 0x0F0F0F0F */ + imm = create_Immediate(NULL, 0, 0x0F0F0F0F); + m7 = new_bd_ia32_And(dbgi, new_block, noreg, noreg, nomem, m6, imm); + + /* s3 = m6 >> 4 */ + simm = create_Immediate(NULL, 0, 4); + s3 = new_bd_ia32_Shl(dbgi, new_block, m6, simm); + + /* m8 = s3 & 0x0F0F0F0F */ + m8 = new_bd_ia32_And(dbgi, new_block, noreg, noreg, nomem, s3, imm); + + /* m9 = m7 + m8 */ + m9 = new_bd_ia32_Lea(dbgi, new_block, m7, m8); + + /* m10 = m9 & 0x00FF00FF */ + imm = create_Immediate(NULL, 0, 0x00FF00FF); + m10 = new_bd_ia32_And(dbgi, new_block, noreg, noreg, nomem, m9, imm); + + /* s4 = m9 >> 8 */ + simm = create_Immediate(NULL, 0, 8); + s4 = new_bd_ia32_Shl(dbgi, new_block, m9, simm); + + /* m11 = s4 & 0x00FF00FF */ + m11 = new_bd_ia32_And(dbgi, new_block, noreg, noreg, nomem, s4, imm); + + /* m12 = m10 + m11 */ + m12 = new_bd_ia32_Lea(dbgi, new_block, m10, m11); + + /* m13 = m12 & 0x0000FFFF */ + imm = create_Immediate(NULL, 0, 0x0000FFFF); + m13 = new_bd_ia32_And(dbgi, new_block, noreg, noreg, nomem, m12, imm); + + /* s5 = m12 >> 16 */ + simm = create_Immediate(NULL, 0, 16); + s5 = new_bd_ia32_Shl(dbgi, new_block, m12, simm); + + /* res = m13 + s5 */ + return new_bd_ia32_Lea(dbgi, new_block, m13, s5); +} + +/** + * Transform Builtin node. + */ +static ir_node *gen_Builtin(ir_node *node) { + ir_builtin_kind kind = get_Builtin_kind(node); + + switch (kind) { + case ir_bk_return_address: + return gen_return_address(node); + case ir_bk_frame_addess: + return gen_frame_address(node); + case ir_bk_prefetch: + return gen_prefetch(node); + case ir_bk_ffs: + return gen_ffs(node); + case ir_bk_clz: + return gen_clz(node); + case ir_bk_ctz: + return gen_ctz(node); + case ir_bk_parity: + return gen_parity(node); + case ir_bk_popcount: + return gen_popcount(node); + } + panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind)); +} + +/** + * Transform Proj(Builtin) node. + */ +static ir_node *gen_Proj_Builtin(ir_node *proj) { + ir_node *node = get_Proj_pred(proj); + ir_node *new_node = be_transform_node(node); + ir_builtin_kind kind = get_Builtin_kind(node); + + switch (kind) { + case ir_bk_return_address: + case ir_bk_frame_addess: + case ir_bk_ffs: + case ir_bk_clz: + case ir_bk_ctz: + case ir_bk_parity: + case ir_bk_popcount: + assert(get_Proj_proj(proj) == pn_Builtin_1_result); + return new_node; + case ir_bk_prefetch: + assert(get_Proj_proj(proj) == pn_Builtin_M); + return new_node; + } + panic("Builtin %s not implemented in IA32", get_builtin_kind_name(kind)); +} + static ir_node *gen_be_IncSP(ir_node *node) { ir_node *res = be_duplicate_node(node); @@ -4725,6 +5181,8 @@ static ir_node *gen_Proj(ir_node *node) return gen_Proj_Load(node); case iro_ASM: return gen_Proj_ASM(node); + case iro_Builtin: + return gen_Proj_Builtin(node); case iro_Div: case iro_Mod: case iro_DivMod: @@ -4790,8 +5248,6 @@ static ir_node *gen_Proj(ir_node *node) */ static void register_transformers(void) { - ir_op *op_Mulh; - /* first clear the generic function pointer for all ops */ clear_irp_opcodes_generic_func(); @@ -4801,6 +5257,7 @@ static void register_transformers(void) GEN(Add); GEN(Sub); GEN(Mul); + GEN(Mulh); GEN(And); GEN(Or); GEN(Eor); @@ -4868,6 +5325,9 @@ static void register_transformers(void) BAD(EndReg); BAD(EndExcept); + /* handle builtins */ + GEN(Builtin); + /* handle generic backend nodes */ GEN(be_FrameAddr); GEN(be_Call); @@ -4877,10 +5337,6 @@ static void register_transformers(void) GEN(be_SubSP); GEN(be_Copy); - op_Mulh = get_op_Mulh(); - if (op_Mulh) - GEN(Mulh); - #undef GEN #undef BAD }