noreg = ia32_new_NoReg_gp(cg);
base = be_get_IncSP_pred(incsp);
val = get_irn_n(node, n_ia32_Store_val);
- push = new_rd_ia32_Push(dbgi, irg, block, noreg, noreg, mem, base, val);
+ push = new_rd_ia32_Push(dbgi, irg, block, noreg, noreg, mem, val, base);
proj = new_r_Proj(irg, block, push, mode_M, pn_ia32_Push_M);
be_peephole_after_exchange(flags_proj);
}
-// only optimize up to 48 stores behind IncSPs
+/**
+ * AMD Athlon works faster when RET is not destination of
+ * conditional jump or directly preceded by other jump instruction.
+ * Can be avoided by placing a Rep prefix before the return.
+ */
+static void peephole_ia32_Return(ir_node *node) {
+ ir_node *block, *irn;
+
+ if (!ia32_cg_config.use_pad_return)
+ return;
+
+ block = get_nodes_block(node);
+
+ if (get_Block_n_cfgpreds(block) == 1) {
+ ir_node *pred = get_Block_cfgpred(block, 0);
+
+ if (is_Jmp(pred)) {
+ /* The block of the return has only one predecessor,
+ which jumps directly to this block.
+ This jump will be encoded as a fall through, so we
+ ignore it here.
+ However, the predecessor might be empty, so it must be
+ ensured that empty blocks are gone away ... */
+ return;
+ }
+ }
+
+ /* check if this return is the first on the block */
+ sched_foreach_reverse_from(node, irn) {
+ switch (be_get_irn_opcode(irn)) {
+ case beo_Return:
+ /* the return node itself, ignore */
+ continue;
+ case beo_Barrier:
+ /* ignore the barrier, no code generated */
+ continue;
+ case beo_IncSP:
+ /* arg, IncSP 0 nodes might occur, ignore these */
+ if (be_get_IncSP_offset(irn) == 0)
+ continue;
+ return;
+ default:
+ if (is_Phi(irn))
+ continue;
+ return;
+ }
+ }
+ /* yep, return is the first real instruction in this block */
+#if 0
+ {
+ /* add an rep prefix to the return */
+ ir_node *rep = new_rd_ia32_RepPrefix(get_irn_dbg_info(node), current_ir_graph, block);
+ keep_alive(rep);
+ sched_add_before(node, rep);
+ }
+#else
+ /* ensure, that the 3 byte return is generated */
+ be_Return_set_emit_pop(node, 1);
+#endif
+}
+
+/* only optimize up to 48 stores behind IncSPs */
#define MAXPUSH_OPTIMIZE 48
/**
mem = get_irn_n(store, n_ia32_mem);
spreg = arch_get_irn_register(cg->arch_env, curr_sp);
- push = new_rd_ia32_Push(get_irn_dbg_info(store), irg, block, noreg, noreg, mem, curr_sp, val);
+ push = new_rd_ia32_Push(get_irn_dbg_info(store), irg, block, noreg, noreg, mem, val, curr_sp);
sched_add_before(irn, push);
ir_node *keep;
ir_node *val;
ir_node *pop, *pop2;
- ir_node *noreg;
ir_node *stack;
int offset;
/* replace IncSP -4 by Pop freereg when possible */
offset = be_get_IncSP_offset(node);
- if (!(offset == -4 && !ia32_cg_config.use_add_esp_4) &&
- !(offset == -8 && !ia32_cg_config.use_add_esp_8) &&
- !(offset == +4 && !ia32_cg_config.use_sub_esp_4) &&
- !(offset == +8 && !ia32_cg_config.use_sub_esp_8))
+ if ((offset != -8 || !ia32_cg_config.use_add_esp_8) &&
+ (offset != -4 || !ia32_cg_config.use_add_esp_4) &&
+ (offset != +4 || !ia32_cg_config.use_sub_esp_4) &&
+ (offset != +8 || !ia32_cg_config.use_sub_esp_8))
return;
if (offset < 0) {
irg = current_ir_graph;
dbgi = get_irn_dbg_info(node);
block = get_nodes_block(node);
- noreg = ia32_new_NoReg_gp(cg);
stack = be_get_IncSP_pred(node);
- pop = new_rd_ia32_Pop(dbgi, irg, block, noreg, noreg, new_NoMem(), stack);
+ pop = new_rd_ia32_Pop(dbgi, irg, block, new_NoMem(), stack);
stack = new_r_Proj(irg, block, pop, mode_Iu, pn_ia32_Pop_stack);
arch_set_irn_register(arch_env, stack, esp);
}
if (offset == -8) {
- pop2 = new_rd_ia32_Pop(dbgi, irg, block, noreg, noreg, new_NoMem(), stack);
+ pop2 = new_rd_ia32_Pop(dbgi, irg, block, new_NoMem(), stack);
stack = new_r_Proj(irg, block, pop2, mode_Iu, pn_ia32_Pop_stack);
arch_set_irn_register(arch_env, stack, esp);
ir_node *noreg;
/* try to transform a mov 0, reg to xor reg reg */
- if(attr->offset != 0 || attr->symconst != NULL)
+ if (attr->offset != 0 || attr->symconst != NULL)
+ return;
+ if (ia32_cg_config.use_mov_0)
return;
/* xor destroys the flags, so no-one must be using them */
- if(be_peephole_get_value(CLASS_ia32_flags, REG_EFLAGS) != NULL)
+ if (be_peephole_get_value(CLASS_ia32_flags, REG_EFLAGS) != NULL)
return;
reg = arch_get_irn_register(arch_env, node);
be_peephole_after_exchange(res);
}
+/**
+ * Split a Imul mem, imm into a Load mem and Imul reg, imm if possible.
+ */
+static void peephole_ia32_Imul_split(ir_node *imul) {
+ const ir_node *right = get_irn_n(imul, n_ia32_IMul_right);
+ const arch_register_t *reg;
+ ir_node *load, *block, *base, *index, *mem, *res, *noreg;
+ dbg_info *dbgi;
+ ir_graph *irg;
+
+ if (! is_ia32_Immediate(right) || get_ia32_op_type(imul) != ia32_AddrModeS) {
+ /* no memory, imm form ignore */
+ return;
+ }
+ /* we need a free register */
+ reg = get_free_gp_reg();
+ if (reg == NULL)
+ return;
+
+ /* fine, we can rebuild it */
+ dbgi = get_irn_dbg_info(imul);
+ block = get_nodes_block(imul);
+ irg = current_ir_graph;
+ base = get_irn_n(imul, n_ia32_IMul_base);
+ index = get_irn_n(imul, n_ia32_IMul_index);
+ mem = get_irn_n(imul, n_ia32_IMul_mem);
+ load = new_rd_ia32_Load(dbgi, irg, block, base, index, mem);
+
+ /* copy all attributes */
+ set_irn_pinned(load, get_irn_pinned(imul));
+ set_ia32_op_type(load, ia32_AddrModeS);
+ set_ia32_ls_mode(load, get_ia32_ls_mode(imul));
+
+ set_ia32_am_scale(load, get_ia32_am_scale(imul));
+ set_ia32_am_sc(load, get_ia32_am_sc(imul));
+ set_ia32_am_offs_int(load, get_ia32_am_offs_int(imul));
+ if (is_ia32_am_sc_sign(imul))
+ set_ia32_am_sc_sign(load);
+ if (is_ia32_use_frame(imul))
+ set_ia32_use_frame(load);
+ set_ia32_frame_ent(load, get_ia32_frame_ent(imul));
+
+ sched_add_before(imul, load);
+
+ mem = new_rd_Proj(dbgi, irg, block, load, mode_M, pn_ia32_Load_M);
+ res = new_rd_Proj(dbgi, irg, block, load, mode_Iu, pn_ia32_Load_res);
+
+ arch_set_irn_register(arch_env, res, reg);
+ be_peephole_after_exchange(res);
+
+ set_irn_n(imul, n_ia32_IMul_mem, mem);
+ noreg = get_irn_n(imul, n_ia32_IMul_left);
+ set_irn_n(imul, n_ia32_IMul_left, res);
+ set_ia32_op_type(imul, ia32_Normal);
+}
+
/**
* Register a peephole optimisation function.
*/
static void register_peephole_optimisation(ir_op *op, peephole_opt_func func) {
assert(op->ops.generic == NULL);
- op->ops.generic = (void*) func;
+ op->ops.generic = (op_func)func;
}
/* Perform peephole-optimizations. */
register_peephole_optimisation(op_ia32_Lea, peephole_ia32_Lea);
register_peephole_optimisation(op_ia32_Test, peephole_ia32_Test);
register_peephole_optimisation(op_ia32_Test8Bit, peephole_ia32_Test);
+ register_peephole_optimisation(op_be_Return, peephole_ia32_Return);
+ if (! ia32_cg_config.use_imul_mem_imm32)
+ register_peephole_optimisation(op_ia32_IMul, peephole_ia32_Imul_split);
be_peephole_opt(cg->birg);
}