X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fbe%2Fia32%2Fbearch_ia32.c;h=3abf57d17168497c755b0de61bdd4d5a5952c97d;hb=a1a465eb2b3f54027b29f829423fffd0396937f4;hp=8dd18a725e31df61a0bac90b7a6bf34968271a46;hpb=fcf7f6261b95ed16fbd4cb63623eceb471b1850c;p=libfirm diff --git a/ir/be/ia32/bearch_ia32.c b/ir/be/ia32/bearch_ia32.c index 8dd18a725..3abf57d17 100644 --- a/ir/be/ia32/bearch_ia32.c +++ b/ir/be/ia32/bearch_ia32.c @@ -21,6 +21,8 @@ #include #endif /* WITH_LIBCORE */ +#include + #include "pseudo_irg.h" #include "irgwalk.h" #include "irprog.h" @@ -38,6 +40,10 @@ #include "../belower.h" #include "../besched_t.h" #include "../be.h" +#include "../be_t.h" +#include "../beirgmod.h" +#include "../be_dbgout.h" +#include "../beblocksched.h" #include "bearch_ia32_t.h" #include "ia32_new_nodes.h" /* ia32 nodes interface */ @@ -57,9 +63,6 @@ /* TODO: ugly */ static set *cur_reg_set = NULL; -#undef is_Start -#define is_Start(irn) (get_irn_opcode(irn) == iro_Start) - /* Creates the unique per irg GP NoReg node. */ ir_node *ia32_new_NoReg_gp(ia32_code_gen_t *cg) { return be_abi_get_callee_save_irn(cg->birg->abi, &ia32_gp_regs[REG_GP_NOREG]); @@ -71,6 +74,21 @@ ir_node *ia32_new_NoReg_fp(ia32_code_gen_t *cg) { USE_SSE2(cg) ? &ia32_xmm_regs[REG_XMM_NOREG] : &ia32_vfp_regs[REG_VFP_NOREG]); } +/** + * Returns gp_noreg or fp_noreg, depending in input requirements. + */ +ir_node *ia32_get_admissible_noreg(ia32_code_gen_t *cg, ir_node *irn, int pos) { + arch_register_req_t req; + const arch_register_req_t *p_req; + + p_req = arch_get_register_req(cg->arch_env, &req, irn, pos); + assert(p_req && "Missing register requirements"); + if (p_req->cls == &ia32_reg_classes[CLASS_ia32_gp]) + return ia32_new_NoReg_gp(cg); + else + return ia32_new_NoReg_fp(cg); +} + /************************************************** * _ _ _ __ * | | | (_)/ _| @@ -82,13 +100,6 @@ ir_node *ia32_new_NoReg_fp(ia32_code_gen_t *cg) { * |___/ **************************************************/ -static ir_node *my_skip_proj(const ir_node *n) { - while (is_Proj(n)) - n = get_Proj_pred(n); - return (ir_node *)n; -} - - /** * Return register requirements for an ia32 node. * If the node returns a tuple (mode_T) then the proj's @@ -114,24 +125,22 @@ static const arch_register_req_t *ia32_get_irn_reg_req(const void *self, arch_re DBG((mod, LEVEL_1, "get requirements at pos %d for %+F ... ", pos, irn)); if (is_Proj(irn)) { - if (pos == -1) { - node_pos = ia32_translate_proj_pos(irn); - } - else { - node_pos = pos; + if(pos >= 0) { + DBG((mod, LEVEL_1, "ignoring request IN requirements for node %+F\n", irn)); + return NULL; } - irn = my_skip_proj(irn); + node_pos = (pos == -1) ? get_Proj_proj(irn) : pos; + irn = skip_Proj(irn); DB((mod, LEVEL_1, "skipping Proj, going to %+F at pos %d ... ", irn, node_pos)); } if (is_ia32_irn(irn)) { - if (pos >= 0) { - irn_req = get_ia32_in_req(irn, pos); - } - else { - irn_req = get_ia32_out_req(irn, node_pos); + irn_req = (pos >= 0) ? get_ia32_in_req(irn, pos) : get_ia32_out_req(irn, node_pos); + if (irn_req == NULL) { + /* no requirements */ + return NULL; } DB((mod, LEVEL_1, "returning reqs for %+F at pos %d\n", irn, pos)); @@ -187,8 +196,8 @@ static void ia32_set_irn_reg(const void *self, ir_node *irn, const arch_register DBG((ops->cg->mod, LEVEL_1, "ia32 assigned register %s to node %+F\n", reg->name, irn)); if (is_Proj(irn)) { - pos = ia32_translate_proj_pos(irn); - irn = my_skip_proj(irn); + pos = get_Proj_proj(irn); + irn = skip_Proj(irn); } if (is_ia32_irn(irn)) { @@ -212,8 +221,8 @@ static const arch_register_t *ia32_get_irn_reg(const void *self, const ir_node * return NULL; } - pos = ia32_translate_proj_pos(irn); - irn = my_skip_proj(irn); + pos = get_Proj_proj(irn); + irn = skip_Proj(irn); } if (is_ia32_irn(irn)) { @@ -231,7 +240,7 @@ static const arch_register_t *ia32_get_irn_reg(const void *self, const ir_node * static arch_irn_class_t ia32_classify(const void *self, const ir_node *irn) { arch_irn_class_t classification = arch_irn_class_normal; - irn = my_skip_proj(irn); + irn = skip_Proj(irn); if (is_cfop(irn)) classification |= arch_irn_class_branch; @@ -255,16 +264,33 @@ static arch_irn_class_t ia32_classify(const void *self, const ir_node *irn) { } static arch_irn_flags_t ia32_get_flags(const void *self, const ir_node *irn) { - irn = my_skip_proj(irn); - if (is_ia32_irn(irn)) - return get_ia32_flags(irn); + arch_irn_flags_t flags; + ir_node *pred = is_Proj(irn) && mode_is_datab(get_irn_mode(irn)) ? get_Proj_pred(irn) : NULL; + + if (is_Unknown(irn)) + flags = arch_irn_flags_ignore; else { - if (is_Unknown(irn)) - return arch_irn_flags_ignore; - return 0; + /* pred is only set, if we have a Proj */ + flags = pred && is_ia32_irn(pred) ? get_ia32_out_flags(pred, get_Proj_proj(irn)) : arch_irn_flags_none; + + irn = skip_Proj(irn); + if (is_ia32_irn(irn)) + flags |= get_ia32_flags(irn); } + + return flags; } +/** + * The IA32 ABI callback object. + */ +typedef struct { + be_abi_call_flags_bits_t flags; /**< The call flags. */ + const arch_isa_t *isa; /**< The ISA handle. */ + const arch_env_t *aenv; /**< The architecture environment. */ + ir_graph *irg; /**< The associated graph. */ +} ia32_abi_env_t; + static entity *ia32_get_frame_entity(const void *self, const ir_node *irn) { return is_ia32_irn(irn) ? get_ia32_frame_ent(irn) : NULL; } @@ -273,20 +299,30 @@ static void ia32_set_frame_entity(const void *self, ir_node *irn, entity *ent) { set_ia32_frame_ent(irn, ent); } -static void ia32_set_stack_bias(const void *self, ir_node *irn, int bias) { +static void ia32_set_frame_offset(const void *self, ir_node *irn, int bias) { char buf[64]; const ia32_irn_ops_t *ops = self; if (get_ia32_frame_ent(irn)) { ia32_am_flavour_t am_flav = get_ia32_am_flavour(irn); + if(is_ia32_Pop(irn)) { + int omit_fp = be_abi_omit_fp(ops->cg->birg->abi); + if (omit_fp) { + /* Pop nodes modify the stack pointer before calculating the destination + * address, so fix this here + */ + bias -= 4; + } + } + DBG((ops->cg->mod, LEVEL_1, "stack biased %+F with %d\n", irn, bias)); + snprintf(buf, sizeof(buf), "%d", bias); if (get_ia32_op_type(irn) == ia32_Normal) { set_ia32_cnst(irn, buf); - } - else { + } else { add_ia32_am_offs(irn, buf); am_flav |= ia32_O; set_ia32_am_flavour(irn, am_flav); @@ -294,22 +330,18 @@ static void ia32_set_stack_bias(const void *self, ir_node *irn, int bias) { } } -typedef struct { - be_abi_call_flags_bits_t flags; - const arch_isa_t *isa; - const arch_env_t *aenv; - ir_graph *irg; -} ia32_abi_env_t; +static int ia32_get_sp_bias(const void *self, const ir_node *irn) { + if(is_Proj(irn)) { + long proj = get_Proj_proj(irn); + ir_node *pred = get_Proj_pred(irn); -static void *ia32_abi_init(const be_abi_call_t *call, const arch_env_t *aenv, ir_graph *irg) -{ - ia32_abi_env_t *env = xmalloc(sizeof(env[0])); - be_abi_call_flags_t fl = be_abi_call_get_flags(call); - env->flags = fl.bits; - env->irg = irg; - env->aenv = aenv; - env->isa = aenv->isa; - return env; + if (proj == pn_ia32_Push_stack && is_ia32_Push(pred)) + return 4; + if (proj == pn_ia32_Pop_stack && is_ia32_Pop(pred)) + return -4; + } + + return 0; } /** @@ -338,16 +370,17 @@ static void ia32_abi_dont_save_regs(void *self, pset *s) */ static const arch_register_t *ia32_abi_prologue(void *self, ir_node **mem, pmap *reg_map) { - ia32_abi_env_t *env = self; + ia32_abi_env_t *env = self; - if (!env->flags.try_omit_fp) { - ir_node *bl = get_irg_start_block(env->irg); - ir_node *curr_sp = be_abi_reg_map_get(reg_map, env->isa->sp); - ir_node *curr_bp = be_abi_reg_map_get(reg_map, env->isa->bp); + if (! env->flags.try_omit_fp) { + ir_node *bl = get_irg_start_block(env->irg); + ir_node *curr_sp = be_abi_reg_map_get(reg_map, env->isa->sp); + ir_node *curr_bp = be_abi_reg_map_get(reg_map, env->isa->bp); + ir_node *noreg = be_abi_reg_map_get(reg_map, &ia32_gp_regs[REG_GP_NOREG]); ir_node *push; /* push ebp */ - push = new_rd_ia32_Push(NULL, env->irg, bl, curr_sp, curr_bp, *mem); + push = new_rd_ia32_Push(NULL, env->irg, bl, noreg, noreg, curr_bp, curr_sp, *mem); curr_sp = new_r_Proj(env->irg, bl, push, get_irn_mode(curr_sp), pn_ia32_Push_stack); *mem = new_r_Proj(env->irg, bl, push, mode_M, pn_ia32_Push_M); @@ -388,37 +421,39 @@ static const arch_register_t *ia32_abi_prologue(void *self, ir_node **mem, pmap */ static void ia32_abi_epilogue(void *self, ir_node *bl, ir_node **mem, pmap *reg_map) { - ia32_abi_env_t *env = self; - ir_node *curr_sp = be_abi_reg_map_get(reg_map, env->isa->sp); - ir_node *curr_bp = be_abi_reg_map_get(reg_map, env->isa->bp); + ia32_abi_env_t *env = self; + ir_node *curr_sp = be_abi_reg_map_get(reg_map, env->isa->sp); + ir_node *curr_bp = be_abi_reg_map_get(reg_map, env->isa->bp); if (env->flags.try_omit_fp) { /* simply remove the stack frame here */ - curr_sp = be_new_IncSP(env->isa->sp, env->irg, bl, curr_sp, *mem, BE_STACK_FRAME_SIZE, be_stack_dir_shrink); + curr_sp = be_new_IncSP(env->isa->sp, env->irg, bl, curr_sp, BE_STACK_FRAME_SIZE_SHRINK); + add_irn_dep(curr_sp, *mem); } else { - const ia32_isa_t *isa = (ia32_isa_t *)env->isa; - ir_mode *mode_bp = env->isa->bp->reg_class->mode; + const ia32_isa_t *isa = (ia32_isa_t *)env->isa; + ir_mode *mode_bp = env->isa->bp->reg_class->mode; /* gcc always emits a leave at the end of a routine */ if (1 || ARCH_AMD(isa->opt_arch)) { ir_node *leave; /* leave */ - leave = new_rd_ia32_Leave(NULL, env->irg, bl, curr_sp, *mem); + leave = new_rd_ia32_Leave(NULL, env->irg, bl, curr_sp, curr_bp); set_ia32_flags(leave, arch_irn_flags_ignore); curr_bp = new_r_Proj(current_ir_graph, bl, leave, mode_bp, pn_ia32_Leave_frame); curr_sp = new_r_Proj(current_ir_graph, bl, leave, get_irn_mode(curr_sp), pn_ia32_Leave_stack); *mem = new_r_Proj(current_ir_graph, bl, leave, mode_M, pn_ia32_Leave_M); } else { + ir_node *noreg = be_abi_reg_map_get(reg_map, &ia32_gp_regs[REG_GP_NOREG]); ir_node *pop; /* copy ebp to esp */ curr_sp = be_new_SetSP(env->isa->sp, env->irg, bl, curr_sp, curr_bp, *mem); /* pop ebp */ - pop = new_rd_ia32_Pop(NULL, env->irg, bl, curr_sp, *mem); + pop = new_rd_ia32_Pop(NULL, env->irg, bl, noreg, noreg, curr_sp, *mem); set_ia32_flags(pop, arch_irn_flags_ignore); curr_bp = new_r_Proj(current_ir_graph, bl, pop, mode_bp, pn_ia32_Pop_res); curr_sp = new_r_Proj(current_ir_graph, bl, pop, get_irn_mode(curr_sp), pn_ia32_Pop_stack); @@ -432,6 +467,32 @@ static void ia32_abi_epilogue(void *self, ir_node *bl, ir_node **mem, pmap *reg_ be_abi_reg_map_set(reg_map, env->isa->bp, curr_bp); } +/** + * Initialize the callback object. + * @param call The call object. + * @param aenv The architecture environment. + * @param irg The graph with the method. + * @return Some pointer. This pointer is passed to all other callback functions as self object. + */ +static void *ia32_abi_init(const be_abi_call_t *call, const arch_env_t *aenv, ir_graph *irg) +{ + ia32_abi_env_t *env = xmalloc(sizeof(env[0])); + be_abi_call_flags_t fl = be_abi_call_get_flags(call); + env->flags = fl.bits; + env->irg = irg; + env->aenv = aenv; + env->isa = aenv->isa; + return env; +} + +/** + * Destroy the callback object. + * @param self The callback object. + */ +static void ia32_abi_done(void *self) { + free(self); +} + /** * Produces the type which sits between the stack args and the locals on the stack. * it will contain the return address and space to store the old base pointer. @@ -485,45 +546,35 @@ static ir_type *ia32_abi_get_between_type(void *self) static int ia32_get_op_estimated_cost(const void *self, const ir_node *irn) { int cost; + ia32_op_type_t op_tp; + const ia32_irn_ops_t *ops = self; - if(is_Proj(irn)) + if (is_Proj(irn)) return 0; - switch (get_ia32_irn_opcode(irn)) { - case iro_ia32_xDiv: - case iro_ia32_DivMod: - cost = 8; - break; - - case iro_ia32_xLoad: - case iro_ia32_l_Load: - case iro_ia32_Load: - cost = 25; - break; - - case iro_ia32_Push: - case iro_ia32_Pop: - cost = 5; - break; - - case iro_ia32_xStore: - case iro_ia32_l_Store: - case iro_ia32_Store: - case iro_ia32_Store8Bit: - cost = 50; - break; - - case iro_ia32_MulS: - case iro_ia32_Mul: - case iro_ia32_Mulh: - case iro_ia32_xMul: - case iro_ia32_l_MulS: - case iro_ia32_l_Mul: - cost = 2; - break; - - default: - cost = 1; + assert(is_ia32_irn(irn)); + + cost = get_ia32_latency(irn); + op_tp = get_ia32_op_type(irn); + + if (is_ia32_CopyB(irn)) { + cost = 250; + if (ARCH_INTEL(ops->cg->arch)) + cost += 150; + } + else if (is_ia32_CopyB_i(irn)) { + int size = get_tarval_long(get_ia32_Immop_tarval(irn)); + cost = 20 + (int)ceil((4/3) * size); + if (ARCH_INTEL(ops->cg->arch)) + cost += 150; + } + /* in case of address mode operations add additional cycles */ + else if (op_tp == ia32_AddrModeD || op_tp == ia32_AddrModeS) { + /* + In case of stack access add 5 cycles (we assume stack is in cache), + other memory operations cost 20 cycles. + */ + cost += is_ia32_use_frame(irn) ? 5 : 20; } return cost; @@ -677,7 +728,7 @@ static int ia32_possible_memory_operand(const void *self, const ir_node *irn, un get_irn_arity(irn) != 5 || /* must be a binary operation */ get_ia32_op_type(irn) != ia32_Normal || /* must not already be a addressmode irn */ ! (get_ia32_am_support(irn) & ia32_am_Source) || /* must be capable of source addressmode */ - (i != 2 && i != 3) || /* a "real" operand position must be requested */ + (i != 2 && i != 3) || /* a "real" operand position must be requested */ (i == 2 && ! is_ia32_commutative(irn)) || /* if first operand requested irn must be commutative */ is_ia32_use_frame(irn)) /* must not already use frame */ return 0; @@ -685,12 +736,11 @@ static int ia32_possible_memory_operand(const void *self, const ir_node *irn, un return 1; } -static void ia32_perform_memory_operand(const void *self, ir_node *irn, ir_node *reload, unsigned int i) { - assert(ia32_possible_memory_operand(self, irn, i) && "Cannot perform memory operand change"); - assert(get_nodes_block(reload) == get_nodes_block(irn) && "Reload must be in same block as irn."); +static void ia32_perform_memory_operand(const void *self, ir_node *irn, ir_node *spill, unsigned int i) { + const ia32_irn_ops_t *ops = self; + ia32_code_gen_t *cg = ops->cg; - if (get_irn_n_edges(reload) > 1) - return; + assert(ia32_possible_memory_operand(self, irn, i) && "Cannot perform memory operand change"); if (i == 2) { ir_node *tmp = get_irn_n(irn, 3); @@ -701,27 +751,26 @@ static void ia32_perform_memory_operand(const void *self, ir_node *irn, ir_node set_ia32_am_support(irn, ia32_am_Source); set_ia32_op_type(irn, ia32_AddrModeS); set_ia32_am_flavour(irn, ia32_B); - set_ia32_ls_mode(irn, get_irn_mode(reload)); - set_ia32_frame_ent(irn, be_get_frame_entity(reload)); + set_ia32_ls_mode(irn, get_irn_mode(get_irn_n(irn, i))); set_ia32_use_frame(irn); set_ia32_got_reload(irn); - set_irn_n(irn, 0, be_get_Reload_frame(reload)); - set_irn_n(irn, 4, be_get_Reload_mem(reload)); + set_irn_n(irn, 0, get_irg_frame(get_irn_irg(irn))); + set_irn_n(irn, 4, spill); /* Input at position one is index register, which is NoReg. We would need cg object to get a real noreg, but we cannot access it from here. */ - set_irn_n(irn, 3, get_irn_n(irn, 1)); + set_irn_n(irn, 3, ia32_get_admissible_noreg(cg, irn, 3)); - DBG_OPT_AM_S(reload, irn); + //FIXME DBG_OPT_AM_S(reload, irn); } static const be_abi_callbacks_t ia32_abi_callbacks = { ia32_abi_init, - free, + ia32_abi_done, ia32_abi_get_between_type, ia32_abi_dont_save_regs, ia32_abi_prologue, @@ -738,7 +787,8 @@ static const arch_irn_ops_if_t ia32_irn_ops_if = { ia32_get_flags, ia32_get_frame_entity, ia32_set_frame_entity, - ia32_set_stack_bias, + ia32_set_frame_offset, + ia32_get_sp_bias, ia32_get_inverse, ia32_get_op_estimated_cost, ia32_possible_memory_operand, @@ -773,6 +823,22 @@ static void ia32_kill_convs(ia32_code_gen_t *cg) { } } +/** + * Transform the Thread Local Store base. + */ +static void transform_tls(ir_graph *irg) { + ir_node *irn = get_irg_tls(irg); + + if (irn) { + dbg_info *dbg = get_irn_dbg_info(irn); + ir_node *blk = get_nodes_block(irn); + ir_node *newn; + newn = new_rd_ia32_LdTls(dbg, irg, blk, get_irn_mode(irn)); + + exchange(irn, newn); + } +} + /** * Transforms the standard firm graph into * an ia32 firm graph @@ -792,6 +858,7 @@ static void ia32_prepare_graph(void *self) { dom = be_compute_dominance_frontiers(cg->irg); cg->kill_conv = new_nodeset(5); + transform_tls(cg->irg); irg_walk_blkwise_graph(cg->irg, NULL, ia32_transform_node, cg); ia32_kill_convs(cg); del_nodeset(cg->kill_conv); @@ -818,9 +885,9 @@ static void ia32_before_sched(void *self) { } static void remove_unused_nodes(ir_node *irn, bitset_t *already_visited) { - int i; + int i, arity; ir_mode *mode; - ir_node *mem_proj; + ir_node *mem_proj = NULL; if (is_Block(irn)) return; @@ -828,29 +895,38 @@ static void remove_unused_nodes(ir_node *irn, bitset_t *already_visited) { mode = get_irn_mode(irn); /* check if we already saw this node or the node has more than one user */ - if (bitset_contains_irn(already_visited, irn) || get_irn_n_edges(irn) > 1) + if (bitset_contains_irn(already_visited, irn) || get_irn_n_edges(irn) > 1) { return; + }; /* mark irn visited */ bitset_add_irn(already_visited, irn); /* non-Tuple nodes with one user: ok, return */ - if (get_irn_n_edges(irn) >= 1 && mode != mode_T) + if (get_irn_n_edges(irn) >= 1 && mode != mode_T) { return; + } /* tuple node has one user which is not the mem proj-> ok */ if (mode == mode_T && get_irn_n_edges(irn) == 1) { mem_proj = ia32_get_proj_for_mode(irn, mode_M); - if (! mem_proj) + if (mem_proj == NULL) { return; + } } - for (i = get_irn_arity(irn) - 1; i >= 0; i--) { + arity = get_irn_arity(irn); + for (i = 0; i < arity; ++i) { ir_node *pred = get_irn_n(irn, i); /* do not follow memory edges or we will accidentally remove stores */ - if (is_Proj(pred) && get_irn_mode(pred) == mode_M) + if (get_irn_mode(pred) == mode_M) { + if(mem_proj != NULL) { + edges_reroute(mem_proj, pred, get_irn_irg(mem_proj)); + mem_proj = NULL; + } continue; + } set_irn_n(irn, i, new_Bad()); @@ -862,8 +938,15 @@ static void remove_unused_nodes(ir_node *irn, bitset_t *already_visited) { remove_unused_nodes(pred, already_visited); } - if (sched_is_scheduled(irn)) + // we need to set the presd to Bad again to also get the memory edges + arity = get_irn_arity(irn); + for (i = 0; i < arity; ++i) { + set_irn_n(irn, i, new_Bad()); + } + + if (sched_is_scheduled(irn)) { sched_remove(irn); + } } static void remove_unused_loads_walker(ir_node *irn, void *env) { @@ -879,9 +962,7 @@ static void remove_unused_loads_walker(ir_node *irn, void *env) { */ static void ia32_before_ra(void *self) { ia32_code_gen_t *cg = self; - bitset_t *already_visited = bitset_irg_malloc(cg->irg); - - cg->blk_sched = sched_create_block_schedule(cg->irg); + bitset_t *already_visited = bitset_irg_alloca(cg->irg); /* Handle special case: @@ -889,8 +970,7 @@ static void ia32_before_ra(void *self) { We need to remove those Loads and all other nodes which won't be used after removing the Load from schedule. */ - irg_walk_graph(cg->irg, remove_unused_loads_walker, NULL, already_visited); - bitset_free(already_visited); + irg_walk_graph(cg->irg, NULL, remove_unused_loads_walker, already_visited); } @@ -899,7 +979,7 @@ static void ia32_before_ra(void *self) { */ static void transform_to_Load(ia32_transform_env_t *env) { ir_node *irn = env->irn; - entity *ent = arch_get_frame_entity(env->cg->arch_env, irn); + entity *ent = be_get_frame_entity(irn); ir_mode *mode = env->mode; ir_node *noreg = ia32_new_NoReg_gp(env->cg); ir_node *nomem = new_rd_NoMem(env->irg); @@ -919,9 +999,8 @@ static void transform_to_Load(ia32_transform_env_t *env) { else new_op = new_rd_ia32_vfld(env->dbg, env->irg, env->block, ptr, noreg, mem); } - else { + else new_op = new_rd_ia32_Load(env->dbg, env->irg, env->block, ptr, noreg, mem); - } set_ia32_am_support(new_op, ia32_am_Source); set_ia32_op_type(new_op, ia32_AddrModeS); @@ -932,7 +1011,7 @@ static void transform_to_Load(ia32_transform_env_t *env) { DBG_OPT_RELOAD2LD(irn, new_op); - proj = new_rd_Proj(env->dbg, env->irg, env->block, new_op, mode, pn_Load_res); + proj = new_rd_Proj(env->dbg, env->irg, env->block, new_op, mode, pn_ia32_Load_res); if (sched_point) { sched_add_after(sched_point, new_op); @@ -955,7 +1034,7 @@ static void transform_to_Load(ia32_transform_env_t *env) { */ static void transform_to_Store(ia32_transform_env_t *env) { ir_node *irn = env->irn; - entity *ent = arch_get_frame_entity(env->cg->arch_env, irn); + entity *ent = be_get_frame_entity(irn); ir_mode *mode = env->mode; ir_node *noreg = ia32_new_NoReg_gp(env->cg); ir_node *nomem = new_rd_NoMem(env->irg); @@ -1002,60 +1081,60 @@ static void transform_to_Store(ia32_transform_env_t *env) { exchange(irn, proj); } -static ir_node *create_push(ia32_transform_env_t *env, ir_node *schedpoint, ir_node *sp, ir_node *mem, entity *ent, const char *offset) { +static ir_node *create_push(ia32_transform_env_t *env, ir_node *schedpoint, ir_node *sp, ir_node *mem, entity *ent) { ir_node *noreg = ia32_new_NoReg_gp(env->cg); + ir_node *frame = get_irg_frame(env->irg); - ir_node *push = new_rd_ia32_Push(env->dbg, env->irg, env->block, sp, noreg, mem); + ir_node *push = new_rd_ia32_Push(env->dbg, env->irg, env->block, frame, noreg, noreg, sp, mem); set_ia32_frame_ent(push, ent); set_ia32_use_frame(push); set_ia32_op_type(push, ia32_AddrModeS); set_ia32_am_flavour(push, ia32_B); set_ia32_ls_mode(push, mode_Is); - if(offset != NULL) - add_ia32_am_offs(push, offset); sched_add_before(schedpoint, push); return push; } -static ir_node *create_pop(ia32_transform_env_t *env, ir_node *schedpoint, ir_node *sp, entity *ent, const char *offset) { - ir_node *pop = new_rd_ia32_Pop(env->dbg, env->irg, env->block, sp, new_NoMem()); +static ir_node *create_pop(ia32_transform_env_t *env, ir_node *schedpoint, ir_node *sp, entity *ent) { + ir_node *noreg = ia32_new_NoReg_gp(env->cg); + ir_node *frame = get_irg_frame(env->irg); + + ir_node *pop = new_rd_ia32_Pop(env->dbg, env->irg, env->block, frame, noreg, sp, new_NoMem()); set_ia32_frame_ent(pop, ent); set_ia32_use_frame(pop); set_ia32_op_type(pop, ia32_AddrModeD); set_ia32_am_flavour(pop, ia32_B); set_ia32_ls_mode(pop, mode_Is); - if(offset != NULL) - add_ia32_am_offs(pop, offset); sched_add_before(schedpoint, pop); return pop; } -static ir_node* create_spproj(ia32_transform_env_t *env, ir_node *pred, ir_node *schedpoint, const ir_node *oldsp) { - ir_mode *spmode = get_irn_mode(oldsp); - const arch_register_t *spreg = arch_get_irn_register(env->cg->arch_env, oldsp); +static ir_node* create_spproj(ia32_transform_env_t *env, ir_node *pred, int pos, ir_node *schedpoint) { + ir_mode *spmode = mode_Iu; + const arch_register_t *spreg = &ia32_gp_regs[REG_ESP]; ir_node *sp; - sp = new_rd_Proj(env->dbg, env->irg, env->block, pred, spmode, 0); + sp = new_rd_Proj(env->dbg, env->irg, env->block, pred, spmode, pos); arch_set_irn_register(env->cg->arch_env, sp, spreg); sched_add_before(schedpoint, sp); return sp; } +/** + * Transform memperm, currently we do this the ugly way and produce + * push/pop into/from memory cascades. This is possible without using + * any registers. + */ static void transform_MemPerm(ia32_transform_env_t *env) { - /* - * Transform memperm, currently we do this the ugly way and produce - * push/pop into/from memory cascades. This is possible without using - * any registers. - */ ir_node *node = env->irn; int i, arity; - ir_node *sp = get_irn_n(node, 0); + ir_node *sp = be_abi_get_ignore_irn(env->cg->birg->abi, &ia32_gp_regs[REG_ESP]); const ir_edge_t *edge; const ir_edge_t *next; ir_node **pops; @@ -1073,12 +1152,13 @@ static void transform_MemPerm(ia32_transform_env_t *env) { assert( (entbits == 32 || entbits == 64) && "spillslot on x86 should be 32 or 64 bit"); - push = create_push(env, node, sp, mem, ent, NULL); - sp = create_spproj(env, push, node, sp); + push = create_push(env, node, sp, mem, ent); + sp = create_spproj(env, push, 0, node); if(entbits == 64) { // add another push after the first one - push = create_push(env, node, sp, mem, ent, "4"); - sp = create_spproj(env, push, node, sp); + push = create_push(env, node, sp, mem, ent); + add_ia32_am_offs_int(push, 4); + sp = create_spproj(env, push, 0, node); } set_irn_n(node, i, new_Bad()); @@ -1094,15 +1174,14 @@ static void transform_MemPerm(ia32_transform_env_t *env) { assert( (entbits == 32 || entbits == 64) && "spillslot on x86 should be 32 or 64 bit"); - pop = create_pop(env, node, sp, ent, NULL); + pop = create_pop(env, node, sp, ent); if(entbits == 64) { // add another pop after the first one - sp = create_spproj(env, pop, node, sp); - pop = create_pop(env, node, sp, ent, "4"); - } - if(i != 0) { - sp = create_spproj(env, pop, node, sp); + sp = create_spproj(env, pop, 1, node); + pop = create_pop(env, node, sp, ent); + add_ia32_am_offs_int(pop, 4); } + sp = create_spproj(env, pop, 1, node); pops[i] = pop; } @@ -1118,6 +1197,11 @@ static void transform_MemPerm(ia32_transform_env_t *env) { set_Proj_proj(proj, 3); } + // remove memperm + arity = get_irn_arity(node); + for(i = 0; i < arity; ++i) { + set_irn_n(node, i, new_Bad()); + } sched_remove(node); } @@ -1185,13 +1269,11 @@ static void ia32_after_ra_walker(ir_node *block, void *env) { */ static void ia32_after_ra(void *self) { ia32_code_gen_t *cg = self; + ir_graph *irg = cg->irg; - irg_block_walk_graph(cg->irg, NULL, ia32_after_ra_walker, self); + irg_block_walk_graph(irg, NULL, ia32_after_ra_walker, cg); - /* if we do x87 code generation, rewrite all the virtual instructions and registers */ - if (cg->used_fp == fp_x87 || cg->force_sim) { - x87_simulate_graph(cg->arch_env, cg->irg, cg->blk_sched); - } + ia32_finish_irg(irg, cg); } /** @@ -1201,7 +1283,17 @@ static void ia32_finish(void *self) { ia32_code_gen_t *cg = self; ir_graph *irg = cg->irg; - ia32_finish_irg(irg, cg); + //be_remove_empty_blocks(irg); + cg->blk_sched = be_create_block_schedule(irg, cg->birg->execfreqs); + + //cg->blk_sched = sched_create_block_schedule(cg->irg, cg->birg->execfreqs); + + /* if we do x87 code generation, rewrite all the virtual instructions and registers */ + if (cg->used_fp == fp_x87 || cg->force_sim) { + x87_simulate_graph(cg->arch_env, irg, cg->blk_sched); + } + + ia32_peephole_optimization(irg, cg); } /** @@ -1221,7 +1313,7 @@ static void ia32_codegen(void *self) { /* de-allocate code generator */ del_set(cg->reg_set); - free(self); + free(cg); } static void *ia32_cg_init(const be_irg_t *birg); @@ -1305,21 +1397,18 @@ static const tarval_mode_info mo_integer = { }; /* -* set the tarval output mode to C-semantics -*/ + * set the tarval output mode of all integer modes to decimal + */ static void set_tarval_output_modes(void) { - set_tarval_mode_output_option(get_modeLs(), &mo_integer); - set_tarval_mode_output_option(get_modeLu(), &mo_integer); - set_tarval_mode_output_option(get_modeIs(), &mo_integer); - set_tarval_mode_output_option(get_modeIu(), &mo_integer); - set_tarval_mode_output_option(get_modeHs(), &mo_integer); - set_tarval_mode_output_option(get_modeHu(), &mo_integer); - set_tarval_mode_output_option(get_modeBs(), &mo_integer); - set_tarval_mode_output_option(get_modeBu(), &mo_integer); - set_tarval_mode_output_option(get_modeC(), &mo_integer); - set_tarval_mode_output_option(get_modeU(), &mo_integer); - set_tarval_mode_output_option(get_modeIu(), &mo_integer); + int i; + + for (i = get_irp_n_modes() - 1; i >= 0; --i) { + ir_mode *mode = get_irp_mode(i); + + if (mode_is_int(mode)) + set_tarval_mode_output_option(mode, &mo_integer); + } } @@ -1334,6 +1423,7 @@ static ia32_isa_t ia32_isa_template = { &ia32_gp_regs[REG_ESP], /* stack pointer register */ &ia32_gp_regs[REG_EBP], /* base pointer register */ -1, /* stack direction */ + NULL, /* main environment */ }, NULL, /* 16bit register names */ NULL, /* 8bit register names */ @@ -1350,6 +1440,7 @@ static ia32_isa_t ia32_isa_template = { arch_pentium_4, /* optimize for architecture */ fp_sse2, /* use sse2 unit */ NULL, /* current code generator */ + NULL, /* output file */ #ifndef NDEBUG NULL, /* name obstack */ 0 /* name obst size */ @@ -1412,9 +1503,13 @@ static void *ia32_init(FILE *file_handle) { #endif /* NDEBUG */ ia32_handle_intrinsics(); - ia32_switch_section(NULL, NO_SECTION); + ia32_switch_section(isa->out, NO_SECTION); fprintf(isa->out, "\t.intel_syntax\n"); + /* needed for the debug support */ + ia32_switch_section(isa->out, SECTION_TEXT); + fprintf(isa->out, ".Ltext0:\n"); + inited = 1; return isa; @@ -1429,7 +1524,7 @@ static void ia32_done(void *self) { ia32_isa_t *isa = self; /* emit now all global declarations */ - ia32_gen_decls(isa->out); + ia32_gen_decls(isa->out, isa->arch_isa.main_env); pmap_destroy(isa->regs_16bit); pmap_destroy(isa->regs_8bit); @@ -1541,7 +1636,8 @@ static void ia32_get_call_abi(const void *self, ir_type *method_type, be_abi_cal /* set stack parameters */ for (i = stack_idx; i < n; i++) { - be_abi_call_param_stack(abi, i, 1, 0, 0); + /* parameters on the stack are 32 bit aligned */ + be_abi_call_param_stack(abi, i, 4, 0, 0); } @@ -1560,7 +1656,7 @@ static void ia32_get_call_abi(const void *self, ir_type *method_type, be_abi_cal tp = get_method_res_type(method_type, 1); mode = get_type_mode(tp); - assert(!mode_is_float(mode) && "two FP results not supported"); + assert(!mode_is_float(mode) && "mixed INT, FP results not supported"); be_abi_call_res_reg(abi, 0, &ia32_gp_regs[REG_EAX]); be_abi_call_res_reg(abi, 1, &ia32_gp_regs[REG_EDX]); @@ -1572,9 +1668,7 @@ static void ia32_get_call_abi(const void *self, ir_type *method_type, be_abi_cal assert(is_atomic_type(tp)); mode = get_type_mode(tp); - reg = mode_is_float(mode) ? - (USE_SSE2(isa) ? &ia32_xmm_regs[REG_XMM0] : &ia32_vfp_regs[REG_VF0]) : - &ia32_gp_regs[REG_EAX]; + reg = mode_is_float(mode) ? &ia32_vfp_regs[REG_VF0] : &ia32_gp_regs[REG_EAX]; be_abi_call_res_reg(abi, 0, reg); } @@ -1604,14 +1698,22 @@ static const arch_code_generator_if_t *ia32_get_code_generator_if(void *self) { return &ia32_code_gen_if; } +/** + * Returns the estimated execution time of an ia32 irn. + */ +static sched_timestep_t ia32_sched_exectime(void *env, const ir_node *irn) { + const arch_env_t *arch_env = env; + return is_ia32_irn(irn) ? ia32_get_op_estimated_cost(arch_get_irn_ops(arch_env, irn), irn) : 1; +} + list_sched_selector_t ia32_sched_selector; /** * Returns the reg_pressure scheduler with to_appear_in_schedule() overloaded */ -static const list_sched_selector_t *ia32_get_list_sched_selector(const void *self) { -// memcpy(&ia32_sched_selector, reg_pressure_selector, sizeof(list_sched_selector_t)); - memcpy(&ia32_sched_selector, trivial_selector, sizeof(list_sched_selector_t)); +static const list_sched_selector_t *ia32_get_list_sched_selector(const void *self, list_sched_selector_t *selector) { + memcpy(&ia32_sched_selector, selector, sizeof(ia32_sched_selector)); + ia32_sched_selector.exectime = ia32_sched_exectime; ia32_sched_selector.to_appear_in_schedule = ia32_to_appear_in_schedule; return &ia32_sched_selector; }