X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fbe%2Fia32%2Fbearch_ia32.c;h=5b97324fe2aa5824d4b568b6a2e8b061f9956b69;hb=bd31a5350ce9e110c058b4ad2223d460c9eb5c4e;hp=fabcfd7c6cb36dfbcf033b90cfc2f50b76b19251;hpb=93cd212dded0d6c840c3a474ee44f58bf6468838;p=libfirm diff --git a/ir/be/ia32/bearch_ia32.c b/ir/be/ia32/bearch_ia32.c index fabcfd7c6..5b97324fe 100644 --- a/ir/be/ia32/bearch_ia32.c +++ b/ir/be/ia32/bearch_ia32.c @@ -66,6 +66,7 @@ #include "../bemodule.h" #include "../begnuas.h" #include "../bestate.h" +#include "../beflags.h" #include "bearch_ia32_t.h" @@ -90,6 +91,24 @@ static set *cur_reg_set = NULL; ir_mode *mode_fpcw = NULL; ia32_code_gen_t *ia32_current_cg = NULL; +/** + * The environment for the intrinsic mapping. + */ +static ia32_intrinsic_env_t intrinsic_env = { + NULL, /* the isa */ + NULL, /* the irg, these entities belong to */ + NULL, /* entity for first div operand (move into FPU) */ + NULL, /* entity for second div operand (move into FPU) */ + NULL, /* entity for converts ll -> d */ + NULL, /* entity for converts d -> ll */ + NULL, /* entity for __divdi3 library call */ + NULL, /* entity for __moddi3 library call */ + NULL, /* entity for __udivdi3 library call */ + NULL, /* entity for __umoddi3 library call */ + NULL, /* bias value for conversion from float to unsigned 64 */ +}; + + typedef ir_node *(*create_const_node_func) (dbg_info *dbg, ir_graph *irg, ir_node *block); static INLINE ir_node *create_const(ia32_code_gen_t *cg, ir_node **place, @@ -423,7 +442,7 @@ static const arch_register_t *ia32_abi_prologue(void *self, ir_node **mem, pmap be_node_set_flags(get_Proj_pred(curr_bp), BE_OUT_POS(get_Proj_proj(curr_bp)), arch_irn_flags_ignore); /* push ebp */ - push = new_rd_ia32_Push(NULL, env->irg, bl, noreg, noreg, curr_bp, curr_sp, *mem); + push = new_rd_ia32_Push(NULL, env->irg, bl, noreg, noreg, *mem, curr_bp, curr_sp); curr_sp = new_r_Proj(env->irg, bl, push, get_irn_mode(curr_sp), pn_ia32_Push_stack); *mem = new_r_Proj(env->irg, bl, push, mode_M, pn_ia32_Push_M); @@ -475,31 +494,37 @@ static void ia32_abi_epilogue(void *self, ir_node *bl, ir_node **mem, pmap *reg_ } else { const ia32_isa_t *isa = (ia32_isa_t *)env->isa; ia32_code_gen_t *cg = isa->cg; - ir_mode *mode_bp = env->isa->bp->reg_class->mode; + ir_mode *mode_bp = env->isa->bp->reg_class->mode; + ir_graph *irg = current_ir_graph; - /* gcc always emits a leave at the end of a routine */ - if (1 || ARCH_AMD(isa->opt_arch)) { + if (ARCH_AMD(isa->opt_arch)) { ir_node *leave; /* leave */ - leave = new_rd_ia32_Leave(NULL, env->irg, bl, curr_sp, curr_bp); + leave = new_rd_ia32_Leave(NULL, irg, bl, curr_sp, curr_bp); set_ia32_flags(leave, arch_irn_flags_ignore); - curr_bp = new_r_Proj(current_ir_graph, bl, leave, mode_bp, pn_ia32_Leave_frame); - curr_sp = new_r_Proj(current_ir_graph, bl, leave, get_irn_mode(curr_sp), pn_ia32_Leave_stack); + curr_bp = new_r_Proj(irg, bl, leave, mode_bp, pn_ia32_Leave_frame); + curr_sp = new_r_Proj(irg, bl, leave, get_irn_mode(curr_sp), pn_ia32_Leave_stack); } else { ir_node *noreg = ia32_new_NoReg_gp(cg); ir_node *pop; + /* the old SP is not needed anymore (kill the proj) */ + assert(is_Proj(curr_sp)); + be_kill_node(curr_sp); + /* copy ebp to esp */ - curr_sp = be_new_SetSP(env->isa->sp, env->irg, bl, curr_sp, curr_bp, *mem); + curr_sp = be_new_Copy(&ia32_reg_classes[CLASS_ia32_gp], irg, bl, curr_bp); + arch_set_irn_register(env->aenv, curr_sp, env->isa->sp); + be_node_set_flags(curr_sp, BE_OUT_POS(0), arch_irn_flags_ignore); /* pop ebp */ - pop = new_rd_ia32_Pop(NULL, env->irg, bl, noreg, noreg, curr_sp, *mem); + pop = new_rd_ia32_Pop(NULL, env->irg, bl, noreg, noreg, *mem, curr_sp); set_ia32_flags(pop, arch_irn_flags_ignore); - curr_bp = new_r_Proj(current_ir_graph, bl, pop, mode_bp, pn_ia32_Pop_res); - curr_sp = new_r_Proj(current_ir_graph, bl, pop, get_irn_mode(curr_sp), pn_ia32_Pop_stack); + curr_bp = new_r_Proj(irg, bl, pop, mode_bp, pn_ia32_Pop_res); + curr_sp = new_r_Proj(irg, bl, pop, get_irn_mode(curr_sp), pn_ia32_Pop_stack); - *mem = new_r_Proj(current_ir_graph, bl, pop, mode_M, pn_ia32_Pop_M); + *mem = new_r_Proj(irg, bl, pop, mode_M, pn_ia32_Pop_M); } arch_set_irn_register(env->aenv, curr_sp, env->isa->sp); arch_set_irn_register(env->aenv, curr_bp, env->isa->bp); @@ -653,7 +678,7 @@ static arch_inverse_t *ia32_get_inverse(const void *self, const ir_node *irn, in return NULL; /* operand must always be a real operand (not base, index or mem) */ - if (i != 2 && i != 3) + if (i != n_ia32_binary_left && i != n_ia32_binary_right) return NULL; /* we don't invert address mode operations */ @@ -684,7 +709,7 @@ static arch_inverse_t *ia32_get_inverse(const void *self, const ir_node *irn, in if (get_ia32_immop_type(irn) == ia32_ImmConst) { /* we have an add with a const here */ /* invers == add with negated const */ - inverse->nodes[0] = new_rd_ia32_Add(dbg, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem); + inverse->nodes[0] = new_rd_ia32_Add(dbg, irg, block, noreg, noreg, nomem, get_irn_n(irn, i), noreg); inverse->costs += 1; copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn); set_ia32_Immop_tarval(inverse->nodes[0], tarval_neg(get_ia32_Immop_tarval(irn))); @@ -693,13 +718,13 @@ static arch_inverse_t *ia32_get_inverse(const void *self, const ir_node *irn, in else if (get_ia32_immop_type(irn) == ia32_ImmSymConst) { /* we have an add with a symconst here */ /* invers == sub with const */ - inverse->nodes[0] = new_rd_ia32_Sub(dbg, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem); + inverse->nodes[0] = new_rd_ia32_Sub(dbg, irg, block, noreg, noreg, nomem, get_irn_n(irn, i), noreg); inverse->costs += 2; copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn); } else { /* normal add: inverse == sub */ - inverse->nodes[0] = new_rd_ia32_Sub(dbg, irg, block, noreg, noreg, (ir_node*) irn, get_irn_n(irn, i ^ 1), nomem); + inverse->nodes[0] = new_rd_ia32_Sub(dbg, irg, block, noreg, noreg, nomem, (ir_node*) irn, get_irn_n(irn, i ^ 1)); inverse->costs += 2; } #endif @@ -709,17 +734,17 @@ static arch_inverse_t *ia32_get_inverse(const void *self, const ir_node *irn, in if (get_ia32_immop_type(irn) != ia32_ImmNone) { /* we have a sub with a const/symconst here */ /* invers == add with this const */ - inverse->nodes[0] = new_rd_ia32_Add(dbg, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem); + inverse->nodes[0] = new_rd_ia32_Add(dbg, irg, block, noreg, noreg, nomem, get_irn_n(irn, i), noreg); inverse->costs += (get_ia32_immop_type(irn) == ia32_ImmSymConst) ? 5 : 1; copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn); } else { /* normal sub */ - if (i == 2) { - inverse->nodes[0] = new_rd_ia32_Add(dbg, irg, block, noreg, noreg, (ir_node*) irn, get_irn_n(irn, 3), nomem); + if (i == n_ia32_binary_left) { + inverse->nodes[0] = new_rd_ia32_Add(dbg, irg, block, noreg, noreg, nomem, (ir_node*) irn, get_irn_n(irn, 3)); } else { - inverse->nodes[0] = new_rd_ia32_Sub(dbg, irg, block, noreg, noreg, get_irn_n(irn, 2), (ir_node*) irn, nomem); + inverse->nodes[0] = new_rd_ia32_Sub(dbg, irg, block, noreg, noreg, nomem, get_irn_n(irn, n_ia32_binary_left), (ir_node*) irn); } inverse->costs += 1; } @@ -729,13 +754,13 @@ static arch_inverse_t *ia32_get_inverse(const void *self, const ir_node *irn, in #if 0 if (get_ia32_immop_type(irn) != ia32_ImmNone) { /* xor with const: inverse = xor */ - inverse->nodes[0] = new_rd_ia32_Xor(dbg, irg, block, noreg, noreg, get_irn_n(irn, i), noreg, nomem); + inverse->nodes[0] = new_rd_ia32_Xor(dbg, irg, block, noreg, noreg, nomem, get_irn_n(irn, i), noreg); inverse->costs += (get_ia32_immop_type(irn) == ia32_ImmSymConst) ? 5 : 1; copy_ia32_Immop_attr(inverse->nodes[0], (ir_node *)irn); } else { /* normal xor */ - inverse->nodes[0] = new_rd_ia32_Xor(dbg, irg, block, noreg, noreg, (ir_node *) irn, get_irn_n(irn, i), nomem); + inverse->nodes[0] = new_rd_ia32_Xor(dbg, irg, block, noreg, noreg, nomem, (ir_node *) irn, get_irn_n(irn, i)); inverse->costs += 1; } #endif @@ -801,23 +826,23 @@ static int ia32_possible_memory_operand(const void *self, const ir_node *irn, un const ir_mode *spillmode = get_spill_mode(op); (void) self; - if (! is_ia32_irn(irn) || /* must be an ia32 irn */ - get_irn_arity(irn) != 5 || /* must be a binary operation */ - get_ia32_op_type(irn) != ia32_Normal || /* must not already be a addressmode irn */ - ! (get_ia32_am_support(irn) & ia32_am_Source) || /* must be capable of source addressmode */ - ! ia32_is_spillmode_compatible(mode, spillmode) || - (i != 2 && i != 3) || /* a "real" operand position must be requested */ - is_ia32_use_frame(irn)) /* must not already use frame */ + if (! is_ia32_irn(irn) || /* must be an ia32 irn */ + get_ia32_am_arity(irn) != 2 || /* must be a binary operation TODO is this necessary? */ + get_ia32_op_type(irn) != ia32_Normal || /* must not already be a addressmode irn */ + ! (get_ia32_am_support(irn) & ia32_am_Source) || /* must be capable of source addressmode */ + ! ia32_is_spillmode_compatible(mode, spillmode) || + (i != n_ia32_binary_left && i != n_ia32_binary_right) || /* a "real" operand position must be requested */ + is_ia32_use_frame(irn)) /* must not already use frame */ return 0; - if(i == 2) { + if (i == n_ia32_binary_left) { const arch_register_req_t *req; if(!is_ia32_commutative(irn)) return 0; /* we can't swap left/right for limited registers * (As this (currently) breaks constraint handling copies) */ - req = get_ia32_in_req(irn, 2); + req = get_ia32_in_req(irn, n_ia32_binary_left); if(req->type & arch_register_req_type_limited) { return 0; } @@ -834,7 +859,7 @@ static void ia32_perform_memory_operand(const void *self, ir_node *irn, assert(ia32_possible_memory_operand(self, irn, i) && "Cannot perform memory operand change"); - if (i == 2) { + if (i == n_ia32_binary_left) { ia32_swap_left_right(irn); } @@ -843,12 +868,12 @@ static void ia32_perform_memory_operand(const void *self, ir_node *irn, set_ia32_use_frame(irn); set_ia32_need_stackent(irn); - set_irn_n(irn, 0, get_irg_frame(get_irn_irg(irn))); - set_irn_n(irn, 3, ia32_get_admissible_noreg(cg, irn, 3)); - set_irn_n(irn, 4, spill); + set_irn_n(irn, n_ia32_base, get_irg_frame(get_irn_irg(irn))); + set_irn_n(irn, n_ia32_binary_right, ia32_get_admissible_noreg(cg, irn, n_ia32_binary_right)); + set_irn_n(irn, n_ia32_mem, spill); /* immediates are only allowed on the right side */ - if(i == 2 && is_ia32_Immediate(get_irn_n(irn, 2))) { + if (i == n_ia32_binary_left && is_ia32_Immediate(get_irn_n(irn, n_ia32_binary_left))) { ia32_swap_left_right(irn); } } @@ -898,6 +923,14 @@ ia32_irn_ops_t ia32_irn_ops = { * |___/ **************************************************/ +static void ia32_before_abi(void *self) { + ia32_code_gen_t *cg = self; + + ir_lower_mode_b(cg->irg, mode_Iu, 0); + if(cg->dump) + be_dump(cg->irg, "-lower_modeb", dump_ir_block_graph_sched); +} + /** * Transforms the standard firm graph into * an ia32 firm graph @@ -905,11 +938,19 @@ ia32_irn_ops_t ia32_irn_ops = { static void ia32_prepare_graph(void *self) { ia32_code_gen_t *cg = self; - ir_lower_mode_b(cg->irg, mode_Iu, 0); /* do local optimisations */ optimize_graph_df(cg->irg); + + /* TODO: we often have dead code reachable through out-edges here. So for + * now we rebuild edges (as we need correct user count for code selection) + */ +#if 1 + edges_deactivate(cg->irg); + edges_activate(cg->irg); +#endif + if(cg->dump) - be_dump(cg->irg, "-lower_modeb", dump_ir_block_graph_sched); + be_dump(cg->irg, "-pre_transform", dump_ir_block_graph_sched); /* transform nodes into assembler instructions */ ia32_transform_graph(cg); @@ -940,16 +981,113 @@ static void ia32_before_sched(void *self) { (void) self; } +static void turn_back_am(ir_node *node) +{ + ir_graph *irg = current_ir_graph; + dbg_info *dbgi = get_irn_dbg_info(node); + ir_node *block = get_nodes_block(node); + ir_node *base = get_irn_n(node, n_ia32_base); + ir_node *index = get_irn_n(node, n_ia32_index); + ir_node *mem = get_irn_n(node, n_ia32_mem); + ir_node *noreg = ia32_new_NoReg_gp(ia32_current_cg); + ir_node *load; + ir_node *load_res; + ir_node *mem_proj; + const ir_edge_t *edge; + + load = new_rd_ia32_Load(dbgi, irg, block, base, index, mem); + load_res = new_rd_Proj(dbgi, irg, block, load, mode_Iu, pn_ia32_Load_res); + + ia32_copy_am_attrs(load, node); + set_irn_n(node, n_ia32_mem, new_NoMem()); + + if(get_ia32_am_arity(node) == ia32_am_unary) { + set_irn_n(node, n_ia32_unary_op, load_res); + } else if(get_ia32_am_arity(node) == ia32_am_binary) { + if(is_ia32_Immediate(get_irn_n(node, n_ia32_Cmp_right))) { + assert(is_ia32_Cmp(node) || is_ia32_Cmp8Bit(node) + || is_ia32_Test(node) || is_ia32_Test8Bit(node)); + set_irn_n(node, n_ia32_binary_left, load_res); + } else { + set_irn_n(node, n_ia32_binary_right, load_res); + } + } else if(get_ia32_am_arity(node) == ia32_am_ternary) { + set_irn_n(node, n_ia32_binary_right, load_res); + } + set_irn_n(node, n_ia32_base, noreg); + set_irn_n(node, n_ia32_index, noreg); + set_ia32_am_offs_int(node, 0); + set_ia32_am_sc(node, NULL); + set_ia32_am_scale(node, 0); + clear_ia32_am_sc_sign(node); + + /* rewire mem-proj */ + if(get_irn_mode(node) == mode_T) { + mem_proj = NULL; + foreach_out_edge(node, edge) { + ir_node *out = get_edge_src_irn(edge); + if(get_Proj_proj(out) == pn_ia32_mem) { + mem_proj = out; + break; + } + } + + if(mem_proj != NULL) { + set_Proj_pred(mem_proj, load); + set_Proj_proj(mem_proj, pn_ia32_Load_M); + } + } + + set_ia32_op_type(node, ia32_Normal); + if(sched_is_scheduled(node)) + sched_add_before(node, load); +} + +static ir_node *flags_remat(ir_node *node, ir_node *after) +{ + /* we should turn back source address mode when rematerializing nodes */ + ia32_op_type_t type = get_ia32_op_type(node); + ir_node *block; + ir_node *copy; + + if(is_Block(after)) { + block = after; + } else { + block = get_nodes_block(after); + } + + if (type == ia32_AddrModeS) { + turn_back_am(node); + } else if (type == ia32_AddrModeD) { + /* TODO implement this later... */ + panic("found DestAM with flag user %+F this should not happen", node); + } else { + assert(type == ia32_Normal); + } + + copy = exact_copy(node); + set_nodes_block(copy, block); + sched_add_after(after, copy); + + return copy; +} + /** * Called before the register allocator. * Calculate a block schedule here. We need it for the x87 * simulator and the emitter. */ static void ia32_before_ra(void *self) { - ia32_code_gen_t *cg = self; + ia32_code_gen_t *cg = self; /* setup fpu rounding modes */ ia32_setup_fpu_mode(cg); + + /* fixup flags */ + be_sched_fix_flags(cg->birg, &ia32_reg_classes[CLASS_ia32_flags], + &flags_remat); + + ia32_add_missing_keeps(cg); } @@ -1045,16 +1183,16 @@ static void transform_to_Store(ia32_code_gen_t *cg, ir_node *node) { if (mode_is_float(mode)) { if (USE_SSE2(cg)) - store = new_rd_ia32_xStore(dbg, irg, block, ptr, noreg, val, nomem); + store = new_rd_ia32_xStore(dbg, irg, block, ptr, noreg, nomem, val); else - store = new_rd_ia32_vfst(dbg, irg, block, ptr, noreg, val, nomem, mode); + store = new_rd_ia32_vfst(dbg, irg, block, ptr, noreg, nomem, val, mode); } else if (get_mode_size_bits(mode) == 128) { // Spill 128 bit SSE registers - store = new_rd_ia32_xxStore(dbg, irg, block, ptr, noreg, val, nomem); + store = new_rd_ia32_xxStore(dbg, irg, block, ptr, noreg, nomem, val); } else if (get_mode_size_bits(mode) == 8) { - store = new_rd_ia32_Store8Bit(dbg, irg, block, ptr, noreg, val, nomem); + store = new_rd_ia32_Store8Bit(dbg, irg, block, ptr, noreg, nomem, val); } else { - store = new_rd_ia32_Store(dbg, irg, block, ptr, noreg, val, nomem); + store = new_rd_ia32_Store(dbg, irg, block, ptr, noreg, nomem, val); } set_ia32_op_type(store, ia32_AddrModeD); @@ -1079,7 +1217,7 @@ static ir_node *create_push(ia32_code_gen_t *cg, ir_node *node, ir_node *schedpo ir_node *noreg = ia32_new_NoReg_gp(cg); ir_node *frame = get_irg_frame(irg); - ir_node *push = new_rd_ia32_Push(dbg, irg, block, frame, noreg, noreg, sp, mem); + ir_node *push = new_rd_ia32_Push(dbg, irg, block, frame, noreg, mem, noreg, sp); set_ia32_frame_ent(push, ent); set_ia32_use_frame(push); @@ -1097,7 +1235,7 @@ static ir_node *create_pop(ia32_code_gen_t *cg, ir_node *node, ir_node *schedpoi ir_node *noreg = ia32_new_NoReg_gp(cg); ir_node *frame = get_irg_frame(irg); - ir_node *pop = new_rd_ia32_Pop(dbg, irg, block, frame, noreg, sp, new_NoMem()); + ir_node *pop = new_rd_ia32_Pop(dbg, irg, block, frame, noreg, new_NoMem(), sp); set_ia32_frame_ent(pop, ent); set_ia32_use_frame(pop); @@ -1301,8 +1439,6 @@ static void ia32_after_ra(void *self) { be_free_frame_entity_coalescer(fec_env); irg_block_walk_graph(irg, NULL, ia32_after_ra_walker, cg); - - ia32_finish_irg(irg, cg); } /** @@ -1314,6 +1450,8 @@ static void ia32_finish(void *self) { ia32_code_gen_t *cg = self; ir_graph *irg = cg->irg; + ia32_finish_irg(irg, cg); + /* we might have to rewrite x87 virtual registers */ if (cg->do_x87_sim) { x87_simulate_graph(cg->arch_env, cg->birg); @@ -1354,7 +1492,7 @@ static void *ia32_cg_init(be_irg_t *birg); static const arch_code_generator_if_t ia32_code_gen_if = { ia32_cg_init, - NULL, /* before abi introduce hook */ + ia32_before_abi, /* before abi introduce hook */ ia32_prepare_graph, NULL, /* spill */ ia32_before_sched, /* before scheduling hook */ @@ -1459,7 +1597,6 @@ static ia32_isa_t ia32_isa_template = { 7, /* costs for a spill instruction */ 5, /* costs for a reload instruction */ }, - NULL_EMITTER, /* emitter environment */ NULL, /* 16bit register names */ NULL, /* 8bit register names */ NULL, /* 8bit register names high */ @@ -1482,6 +1619,8 @@ static ia32_isa_t ia32_isa_template = { #endif }; +static void set_arch_costs(enum cpu_support arch); + /** * Initializes the backend ISA. */ @@ -1505,6 +1644,8 @@ static void *ia32_init(FILE *file_handle) { ia32_register_init(); ia32_create_opcodes(); + set_arch_costs(isa->opt_arch); + if ((ARCH_INTEL(isa->arch) && isa->arch < arch_pentium_4) || (ARCH_AMD(isa->arch) && isa->arch < arch_athlon)) /* no SSE2 for these cpu's */ @@ -1515,7 +1656,7 @@ static void *ia32_init(FILE *file_handle) { isa->opt &= ~IA32_OPT_INCDEC; } - be_emit_init_env(&isa->emit, file_handle); + be_emit_init(file_handle); isa->regs_16bit = pmap_create(); isa->regs_8bit = pmap_create(); isa->regs_8bit_high = pmap_create(); @@ -1532,12 +1673,14 @@ static void *ia32_init(FILE *file_handle) { obstack_init(isa->name_obst); #endif /* NDEBUG */ + /* enter the ISA object into the intrinsic environment */ + intrinsic_env.isa = isa; ia32_handle_intrinsics(); /* needed for the debug support */ - be_gas_emit_switch_section(&isa->emit, GAS_SECTION_TEXT); - be_emit_cstring(&isa->emit, ".Ltext0:\n"); - be_emit_write_line(&isa->emit); + be_gas_emit_switch_section(GAS_SECTION_TEXT); + be_emit_cstring(".Ltext0:\n"); + be_emit_write_line(); /* we mark referenced global entities, so we can only emit those which * are actually referenced. (Note: you mustn't use the type visited flag @@ -1557,7 +1700,7 @@ static void ia32_done(void *self) { ia32_isa_t *isa = self; /* emit now all global declarations */ - be_gas_emit_decls(&isa->emit, isa->arch_isa.main_env, 1); + be_gas_emit_decls(isa->arch_isa.main_env, 1); pmap_destroy(isa->regs_16bit); pmap_destroy(isa->regs_8bit); @@ -1569,7 +1712,7 @@ static void ia32_done(void *self) { obstack_free(isa->name_obst, NULL); #endif /* NDEBUG */ - be_emit_destroy_env(&isa->emit); + be_emit_exit(); free(self); } @@ -1627,11 +1770,9 @@ static void ia32_get_call_abi(const void *self, ir_type *method_type, be_abi_cal int n, i, regnum; be_abi_call_flags_t call_flags = be_abi_call_get_flags(abi); - unsigned use_push = !IS_P6_ARCH(isa->opt_arch); - /* set abi flags for calls */ call_flags.bits.left_to_right = 0; /* always last arg first on stack */ - call_flags.bits.store_args_sequential = use_push; + call_flags.bits.store_args_sequential = 0; /* call_flags.bits.try_omit_fp not changed: can handle both settings */ call_flags.bits.fp_free = 0; /* the frame pointer is fixed in IA32 */ call_flags.bits.call_has_imm = 1; /* IA32 calls can have immediate address */ @@ -1900,18 +2041,172 @@ static int ia32_is_psi_allowed(ir_node *sel, ir_node *phi_list, int i, int j) return 1; } -static ia32_intrinsic_env_t intrinsic_env = { - NULL, /**< the irg, these entities belong to */ - NULL, /**< entity for first div operand (move into FPU) */ - NULL, /**< entity for second div operand (move into FPU) */ - NULL, /**< entity for converts ll -> d */ - NULL, /**< entity for converts d -> ll */ - NULL, /**< entity for __divdi3 library call */ - NULL, /**< entity for __moddi3 library call */ - NULL, /**< entity for __udivdi3 library call */ - NULL, /**< entity for __umoddi3 library call */ +typedef struct insn_const { + int add_cost; /**< cost of an add instruction */ + int lea_cost; /**< cost of a lea instruction */ + int const_shf_cost; /**< cost of a constant shift instruction */ + int cost_mul_start; /**< starting cost of a multiply instruction */ + int cost_mul_bit; /**< cost of multiply for every set bit */ +} insn_const; + +/* costs for the i386 */ +static const insn_const i386_cost = { + 1, /* cost of an add instruction */ + 1, /* cost of a lea instruction */ + 2, /* cost of a constant shift instruction */ + 6, /* starting cost of a multiply instruction */ + 1 /* cost of multiply for every set bit */ +}; + +/* costs for the i486 */ +static const insn_const i486_cost = { + 1, /* cost of an add instruction */ + 1, /* cost of a lea instruction */ + 2, /* cost of a constant shift instruction */ + 12, /* starting cost of a multiply instruction */ + 1 /* cost of multiply for every set bit */ +}; + +/* costs for the Pentium */ +static const insn_const pentium_cost = { + 1, /* cost of an add instruction */ + 1, /* cost of a lea instruction */ + 1, /* cost of a constant shift instruction */ + 11, /* starting cost of a multiply instruction */ + 0 /* cost of multiply for every set bit */ }; +/* costs for the Pentium Pro */ +static const insn_const pentiumpro_cost = { + 1, /* cost of an add instruction */ + 1, /* cost of a lea instruction */ + 1, /* cost of a constant shift instruction */ + 4, /* starting cost of a multiply instruction */ + 0 /* cost of multiply for every set bit */ +}; + +/* costs for the K6 */ +static const insn_const k6_cost = { + 1, /* cost of an add instruction */ + 2, /* cost of a lea instruction */ + 1, /* cost of a constant shift instruction */ + 3, /* starting cost of a multiply instruction */ + 0 /* cost of multiply for every set bit */ +}; + +/* costs for the Athlon */ +static const insn_const athlon_cost = { + 1, /* cost of an add instruction */ + 2, /* cost of a lea instruction */ + 1, /* cost of a constant shift instruction */ + 5, /* starting cost of a multiply instruction */ + 0 /* cost of multiply for every set bit */ +}; + +/* costs for the Pentium 4 */ +static const insn_const pentium4_cost = { + 1, /* cost of an add instruction */ + 3, /* cost of a lea instruction */ + 4, /* cost of a constant shift instruction */ + 15, /* starting cost of a multiply instruction */ + 0 /* cost of multiply for every set bit */ +}; + +/* costs for the Core */ +static const insn_const core_cost = { + 1, /* cost of an add instruction */ + 1, /* cost of a lea instruction */ + 1, /* cost of a constant shift instruction */ + 10, /* starting cost of a multiply instruction */ + 0 /* cost of multiply for every set bit */ +}; + +/* costs for the generic */ +static const insn_const generic_cost = { + 1, /* cost of an add instruction */ + 2, /* cost of a lea instruction */ + 1, /* cost of a constant shift instruction */ + 4, /* starting cost of a multiply instruction */ + 0 /* cost of multiply for every set bit */ +}; + +static const insn_const *arch_costs = &generic_cost; + +static void set_arch_costs(enum cpu_support arch) { + switch (arch) { + case arch_i386: + arch_costs = &i386_cost; + break; + case arch_i486: + arch_costs = &i486_cost; + break; + case arch_pentium: + case arch_pentium_mmx: + arch_costs = &pentium_cost; + break; + case arch_pentium_pro: + case arch_pentium_2: + case arch_pentium_3: + arch_costs = &pentiumpro_cost; + break; + case arch_pentium_4: + arch_costs = &pentium4_cost; + break; + case arch_pentium_m: + arch_costs = &pentiumpro_cost; + break; + case arch_core: + arch_costs = &core_cost; + break; + case arch_k6: + arch_costs = &k6_cost; + break; + case arch_athlon: + case arch_athlon_64: + case arch_opteron: + arch_costs = &athlon_cost; + break; + case arch_generic: + default: + arch_costs = &generic_cost; + } +} + +/** + * Evaluate a given simple instruction. + */ +static int ia32_evaluate_insn(insn_kind kind, tarval *tv) { + int cost; + + switch (kind) { + case MUL: + cost = arch_costs->cost_mul_start; + if (arch_costs->cost_mul_bit > 0) { + char *bitstr = get_tarval_bitpattern(tv); + int i; + + for (i = 0; bitstr[i] != '\0'; ++i) { + if (bitstr[i] == '1') { + cost += arch_costs->cost_mul_bit; + } + } + free(bitstr); + } + return cost; + case LEA: + return arch_costs->lea_cost; + case ADD: + case SUB: + return arch_costs->add_cost; + case SHIFT: + return arch_costs->const_shf_cost; + case ZERO: + return arch_costs->add_cost; + default: + return 1; + } +} + /** * Returns the libFirm configuration parameter for this backend. */ @@ -1921,9 +2216,10 @@ static const backend_params *ia32_get_libfirm_params(void) { ia32_is_psi_allowed /* allows or disallows Psi creation for given selector */ }; static const ir_settings_arch_dep_t ad = { - 1, /* also use subs */ - 4, /* maximum shifts */ - 31, /* maximum shift amount */ + 1, /* also use subs */ + 4, /* maximum shifts */ + 31, /* maximum shift amount */ + ia32_evaluate_insn, /* evaluate the instruction sequence */ 1, /* allow Mulhs */ 1, /* allow Mulus */ @@ -1966,6 +2262,7 @@ static const lc_opt_enum_int_items_t arch_items[] = { { "athlon", arch_athlon, }, { "athlon64", arch_athlon_64, }, { "opteron", arch_opteron, }, + { "generic", arch_generic, }, { NULL, 0 } };