be_node_attr_t node_attr; /**< base attributes of every be node. */
int num_ret_vals; /**< number of return values */
unsigned pop; /**< number of bytes that should be popped */
+ int emit_pop; /**< if set, emit pop bytes, even if pop = 0 */
} be_return_attr_t;
/** The be_IncSP attribute type. */
typedef struct {
be_node_attr_t node_attr; /**< base attributes of every be node. */
int offset; /**< The offset by which the stack shall be expanded/shrinked. */
- int align; /**< wether stack should be aligned after the
- IncSP */
+ int align; /**< whether stack should be aligned after the
+ IncSP */
} be_incsp_attr_t;
/** The be_Frame attribute type. */
/** The be_Call attribute type. */
typedef struct {
be_node_attr_t node_attr; /**< base attributes of every be node. */
- ir_entity *ent; /**< The called entity if this is a static call. */
+ ir_entity *ent; /**< The called entity if this is a static call. */
unsigned pop;
- ir_type *call_tp; /**< The call type, copied from the original Call node. */
+ ir_type *call_tp; /**< The call type, copied from the original Call node. */
} be_call_attr_t;
typedef struct {
return 1;
if (a_attr->pop != b_attr->pop)
return 1;
+ if (a_attr->emit_pop != b_attr->emit_pop)
+ return 1;
return _node_cmp_attr(&a_attr->node_attr, &b_attr->node_attr);
}
a = get_irn_attr(res);
a->num_ret_vals = n_res;
a->pop = pop;
+ a->emit_pop = 0;
return res;
}
/* Returns the number of real returns values */
-int be_Return_get_n_rets(const ir_node *ret)
-{
+int be_Return_get_n_rets(const ir_node *ret) {
const be_return_attr_t *a = get_irn_generic_attr_const(ret);
return a->num_ret_vals;
}
-unsigned be_Return_get_pop(const ir_node *ret)
-{
+/* return the number of bytes that should be popped from stack when executing the Return. */
+unsigned be_Return_get_pop(const ir_node *ret) {
const be_return_attr_t *a = get_irn_generic_attr_const(ret);
return a->pop;
}
-int be_Return_append_node(ir_node *ret, ir_node *node)
-{
+/* return non-zero, if number of popped bytes must be always emitted */
+int be_Return_get_emit_pop(const ir_node *ret) {
+ const be_return_attr_t *a = get_irn_generic_attr_const(ret);
+ return a->emit_pop;
+}
+
+/* return non-zero, if number of popped bytes must be always emitted */
+void be_Return_set_emit_pop(ir_node *ret, int emit_pop) {
+ be_return_attr_t *a = get_irn_generic_attr(ret);
+ a->emit_pop = emit_pop;
+}
+
+int be_Return_append_node(ir_node *ret, ir_node *node) {
int pos;
pos = add_irn_n(ret, node);
}
}
/* yep, return is the first real instruction in this block */
+#if 0
+ /* add an rep prefix to the return */
rep = new_rd_ia32_RepPrefix(get_irn_dbg_info(node), current_ir_graph, block);
keep_alive(rep);
sched_add_before(node, rep);
+#else
+ /* ensure, that the 3 byte return is generated */
+ be_Return_set_emit_pop(node, 1);
+#endif
}
/* only optimize up to 48 stores behind IncSPs */
be_peephole_after_exchange(res);
}
+/**
+ * Split a Imul mem, imm into a Load mem and Imul reg, imm if possible.
+ */
+static void peephole_ia32_Imul_split(ir_node *imul) {
+ const ir_node *right = get_irn_n(imul, n_ia32_IMul_right);
+ const arch_register_t *reg;
+ ir_node *load, *block, *base, *index, *mem, *res, *noreg;
+ dbg_info *dbgi;
+ ir_graph *irg;
+
+ if (! is_ia32_Immediate(right) || get_ia32_op_type(imul) != ia32_AddrModeS) {
+ /* no memory, imm form ignore */
+ return;
+ }
+ /* we need a free register */
+ reg = get_free_gp_reg();
+ if (reg == NULL)
+ return;
+
+ /* fine, we can rebuild it */
+ dbgi = get_irn_dbg_info(imul);
+ block = get_nodes_block(imul);
+ irg = current_ir_graph;
+ base = get_irn_n(imul, n_ia32_IMul_base);
+ index = get_irn_n(imul, n_ia32_IMul_index);
+ mem = get_irn_n(imul, n_ia32_IMul_mem);
+ load = new_rd_ia32_Load(dbgi, irg, block, base, index, mem);
+
+ /* copy all attributes */
+ set_irn_pinned(load, get_irn_pinned(imul));
+ set_ia32_op_type(load, ia32_AddrModeS);
+ set_ia32_ls_mode(load, get_ia32_ls_mode(imul));
+
+ set_ia32_am_scale(load, get_ia32_am_scale(imul));
+ set_ia32_am_sc(load, get_ia32_am_sc(imul));
+ set_ia32_am_offs_int(load, get_ia32_am_offs_int(imul));
+ if (is_ia32_am_sc_sign(imul))
+ set_ia32_am_sc_sign(load);
+ if (is_ia32_use_frame(imul))
+ set_ia32_use_frame(load);
+ set_ia32_frame_ent(load, get_ia32_frame_ent(imul));
+
+ sched_add_before(imul, load);
+
+ mem = new_rd_Proj(dbgi, irg, block, load, mode_M, pn_ia32_Load_M);
+ res = new_rd_Proj(dbgi, irg, block, load, mode_Iu, pn_ia32_Load_res);
+
+ arch_set_irn_register(arch_env, res, reg);
+ be_peephole_after_exchange(res);
+
+ set_irn_n(imul, n_ia32_IMul_mem, mem);
+ noreg = get_irn_n(imul, n_ia32_IMul_left);
+ set_irn_n(imul, n_ia32_IMul_left, res);
+ set_ia32_op_type(imul, ia32_Normal);
+}
+
/**
* Register a peephole optimisation function.
*/
static void register_peephole_optimisation(ir_op *op, peephole_opt_func func) {
assert(op->ops.generic == NULL);
- op->ops.generic = (void*) func;
+ op->ops.generic = (op_func)func;
}
/* Perform peephole-optimizations. */
register_peephole_optimisation(op_ia32_Test, peephole_ia32_Test);
register_peephole_optimisation(op_ia32_Test8Bit, peephole_ia32_Test);
register_peephole_optimisation(op_be_Return, peephole_ia32_Return);
+ if (! ia32_cg_config.use_imul_mem_imm32)
+ register_peephole_optimisation(op_ia32_IMul, peephole_ia32_Imul_split);
be_peephole_opt(cg->birg);
}
ir_node *op1 = get_Mul_left(node);
ir_node *op2 = get_Mul_right(node);
ir_mode *mode = get_irn_mode(node);
- unsigned flags;
if (mode_is_float(mode)) {
if (ia32_cg_config.use_sse2)
return gen_binop_x87_float(node, op1, op2, new_rd_ia32_vfmul,
match_commutative | match_am);
}
-
- /* for the lower 32bit of the result it doesn't matter whether we use
- * signed or unsigned multiplication so we use IMul as it has fewer
- * constraints */
- flags = match_commutative | match_am | match_mode_neutral | match_immediate;
- if (ia32_cg_config.use_imul_mem_imm32)
- flags |= match_am_and_immediates;
- return gen_binop(node, op1, op2, new_rd_ia32_IMul, flags);
+ return gen_binop(node, op1, op2, new_rd_ia32_IMul,
+ match_commutative | match_am | match_mode_neutral |
+ match_immediate | match_am_and_immediates);
}
/**