#include "irnode.h"
#include "irprog_t.h"
#include "ircons.h"
+#include "irtools.h"
#include "firm_types.h"
#include "iredges.h"
#include "tv.h"
#include "ia32_new_nodes.h"
#include "bearch_ia32_t.h"
-#include "gen_ia32_regalloc_if_t.h"
+#include "gen_ia32_regalloc_if.h"
#include "ia32_transform.h"
#include "ia32_dbg_stat.h"
#include "ia32_util.h"
DEBUG_ONLY(static firm_dbg_module_t *dbg = NULL;)
-#define AGGRESSIVE_AM
+//#define AGGRESSIVE_AM
typedef enum {
IA32_AM_CAND_NONE = 0, /**< no addressmode possible with irn inputs */
return irn == cg->noreg_gp || irn == cg->noreg_xmm || irn == cg->noreg_vfp;
}
-void ia32_pre_transform_phase(ia32_code_gen_t *cg) {
- /*
- We need to transform the consts twice:
- - the psi condition tree transformer needs existing constants to be ia32 constants
- - the psi condition tree transformer inserts new firm constants which need to be transformed
- */
- irg_walk_graph(cg->irg, NULL, ia32_transform_psi_cond_tree, cg);
-}
-
/********************************************************************************************************
* _____ _ _ ____ _ _ _ _ _
* | __ \ | | | | / __ \ | | (_) (_) | | (_)
* NOTE: THESE PEEPHOLE OPTIMIZATIONS MUST BE CALLED AFTER SCHEDULING AND REGISTER ALLOCATION.
*/
-static int ia32_const_equal(const ir_node *n1, const ir_node *n2) {
- if(get_ia32_immop_type(n1) != get_ia32_immop_type(n2))
- return 0;
-
- if(get_ia32_immop_type(n1) == ia32_ImmConst) {
- return get_ia32_Immop_tarval(n1) == get_ia32_Immop_tarval(n2);
- } else if(get_ia32_immop_type(n1) == ia32_ImmSymConst) {
- return get_ia32_Immop_symconst(n1) == get_ia32_Immop_symconst(n2);
- }
-
- assert(get_ia32_immop_type(n1) == ia32_ImmNone);
- return 1;
-}
-
-/**
- * Checks for potential CJmp/CJmpAM optimization candidates.
- */
-static ir_node *ia32_determine_cjmp_cand(ir_node *irn, is_op_func_t *is_op_func) {
- ir_node *cand = NULL;
- ir_node *prev = sched_prev(irn);
-
- if (is_Block(prev)) {
- if (get_Block_n_cfgpreds(prev) == 1)
- prev = get_Block_cfgpred(prev, 0);
- else
- prev = NULL;
- }
-
- /* The predecessor must be a ProjX. */
- if (prev && is_Proj(prev) && get_irn_mode(prev) == mode_X) {
- prev = get_Proj_pred(prev);
-
- if (is_op_func(prev))
- cand = prev;
- }
-
- return cand;
-}
-
-static int is_TestJmp_cand(const ir_node *irn) {
- return is_ia32_TestJmp(irn) || is_ia32_And(irn);
-}
-
-/**
- * Checks if two consecutive arguments of cand matches
- * the two arguments of irn (TestJmp).
- */
-static int is_TestJmp_replacement(ir_node *cand, ir_node *irn) {
- ir_node *in1 = get_irn_n(irn, 0);
- ir_node *in2 = get_irn_n(irn, 1);
- int i, n = get_irn_arity(cand);
- int same_args = 0;
-
- for (i = 0; i < n - 1; i++) {
- if (get_irn_n(cand, i) == in1 &&
- get_irn_n(cand, i + 1) == in2)
- {
- same_args = 1;
- break;
- }
- }
-
- if (!same_args)
- return 0;
-
- return ia32_const_equal(cand, irn);
-}
-
-/**
- * Tries to replace a TestJmp by a CJmp or CJmpAM (in case of And)
- */
-static void ia32_optimize_TestJmp(ir_node *irn, ia32_code_gen_t *cg) {
- ir_node *cand = ia32_determine_cjmp_cand(irn, is_TestJmp_cand);
- int replace = 0;
-
- /* we found a possible candidate */
- replace = cand ? is_TestJmp_replacement(cand, irn) : 0;
-
- if (replace) {
- DBG((dbg, LEVEL_1, "replacing %+F by ", irn));
-
- if (is_ia32_And(cand))
- set_irn_op(irn, op_ia32_CJmpAM);
- else
- set_irn_op(irn, op_ia32_CJmp);
-
- DB((dbg, LEVEL_1, "%+F\n", irn));
- }
-}
-
-static int is_CondJmp_cand(const ir_node *irn) {
- return is_ia32_CondJmp(irn) || is_ia32_Sub(irn);
-}
-
-/**
- * Checks if the arguments of cand are the same of irn.
- */
-static int is_CondJmp_replacement(ir_node *cand, ir_node *irn) {
- int i, arity;
-
- arity = get_irn_arity(cand);
- for (i = 0; i < arity; i++) {
- if (get_irn_n(cand, i) != get_irn_n(irn, i)) {
- return 0;
- }
- }
-
- return ia32_const_equal(cand, irn);
-}
-
-/**
- * Tries to replace a CondJmp by a CJmpAM
- */
-static void ia32_optimize_CondJmp(ir_node *irn, ia32_code_gen_t *cg) {
- ir_node *cand = ia32_determine_cjmp_cand(irn, is_CondJmp_cand);
- int replace = 0;
-
- /* we found a possible candidate */
- replace = cand ? is_CondJmp_replacement(cand, irn) : 0;
-
- if (replace) {
- DBG((dbg, LEVEL_1, "replacing %+F by ", irn));
- DBG_OPT_CJMP(irn);
-
- set_irn_op(irn, op_ia32_CJmpAM);
-
- DB((dbg, LEVEL_1, "%+F\n", irn));
- }
-}
-
// only optimize up to 48 stores behind IncSPs
#define MAXPUSH_OPTIMIZE 48
mem = get_irn_n(store, 3);
spreg = arch_get_irn_register(cg->arch_env, curr_sp);
- // create a push
- push = new_rd_ia32_Push(NULL, irg, block, noreg, noreg, val, curr_sp, mem);
+ push = new_rd_ia32_Push(get_irn_dbg_info(store), irg, block, noreg, noreg, val, curr_sp, mem);
- set_ia32_am_support(push, ia32_am_Source);
+ set_ia32_am_support(push, ia32_am_Source, ia32_am_unary);
copy_ia32_Immop_attr(push, store);
sched_add_before(irn, push);
// create stackpointer proj
curr_sp = new_r_Proj(irg, block, push, spmode, pn_ia32_Push_stack);
arch_set_irn_register(cg->arch_env, curr_sp, spreg);
- sched_add_before(irn, curr_sp);
// create memory proj
mem_proj = new_r_Proj(irg, block, push, mode_M, pn_ia32_Push_M);
- sched_add_before(irn, mem_proj);
// use the memproj now
exchange(store, mem_proj);
}
}
-#if 0
/**
* Tries to optimize two following IncSP.
*/
-static void ia32_optimize_IncSP(ir_node *irn, ia32_code_gen_t *cg) {
- ir_node *prev = be_get_IncSP_pred(irn);
- int real_uses = get_irn_n_edges(prev);
+static void ia32_optimize_IncSP(ir_node *node)
+{
+ int pred_offs;
+ int curr_offs;
+ int offs;
+ ir_node *pred = be_get_IncSP_pred(node);
+ ir_node *predpred;
- if (be_is_IncSP(prev) && real_uses == 1) {
- /* first IncSP has only one IncSP user, kill the first one */
- int prev_offs = be_get_IncSP_offset(prev);
- int curr_offs = be_get_IncSP_offset(irn);
+ if(!be_is_IncSP(pred))
+ return;
- be_set_IncSP_offset(prev, prev_offs + curr_offs);
+ if(get_irn_n_edges(pred) > 1)
+ return;
- /* Omit the optimized IncSP */
- be_set_IncSP_pred(irn, be_get_IncSP_pred(prev));
+ pred_offs = be_get_IncSP_offset(pred);
+ curr_offs = be_get_IncSP_offset(node);
- set_irn_n(prev, 0, new_Bad());
- sched_remove(prev);
+ if(pred_offs == BE_STACK_FRAME_SIZE_EXPAND) {
+ if(curr_offs != BE_STACK_FRAME_SIZE_SHRINK) {
+ return;
+ }
+ offs = 0;
+ } else if(pred_offs == BE_STACK_FRAME_SIZE_SHRINK) {
+ if(curr_offs != BE_STACK_FRAME_SIZE_EXPAND) {
+ return;
+ }
+ offs = 0;
+ } else if(curr_offs == BE_STACK_FRAME_SIZE_EXPAND
+ || curr_offs == BE_STACK_FRAME_SIZE_SHRINK) {
+ return;
+ } else {
+ offs = curr_offs + pred_offs;
}
+
+ be_set_IncSP_offset(node, offs);
+
+ /* rewire dependency edges */
+ predpred = be_get_IncSP_pred(pred);
+ edges_reroute_kind(pred, predpred, EDGE_KIND_DEP, current_ir_graph);
+
+ /* Omit the IncSP */
+ be_set_IncSP_pred(node, predpred);
+ sched_remove(pred);
+ be_kill_node(pred);
}
-#endif
/**
* Performs Peephole Optimizations.
*/
-static void ia32_peephole_optimize_node(ir_node *irn, void *env) {
+static void ia32_peephole_optimize_node(ir_node *node, void *env) {
ia32_code_gen_t *cg = env;
- /* AMD CPUs want explicit compare before conditional jump */
- if (! ARCH_AMD(cg->opt_arch)) {
- if (is_ia32_TestJmp(irn))
- ia32_optimize_TestJmp(irn, cg);
- else if (is_ia32_CondJmp(irn))
- ia32_optimize_CondJmp(irn, cg);
- }
-
- if (be_is_IncSP(irn)) {
- // optimize_IncSP doesn't respect dependency edges yet...
- //ia32_optimize_IncSP(irn, cg);
+ if (be_is_IncSP(node)) {
+ ia32_optimize_IncSP(node);
if (cg->opt & IA32_OPT_PUSHARGS)
- ia32_create_Pushs(irn, cg);
+ ia32_create_Pushs(node, cg);
}
}
* @param irn The irn to check
* return 1 if irn is a candidate, 0 otherwise
*/
-static int is_addr_candidate(const ir_node *irn) {
+static int is_addr_candidate(const ir_node *irn)
+{
#ifndef AGGRESSIVE_AM
const ir_node *block = get_nodes_block(irn);
ir_node *left, *right;
right = get_irn_n(irn, 3);
if (pred_is_specific_nodeblock(block, left, is_ia32_Ld)) {
- n = ia32_get_irn_n_edges(left);
+ n = ia32_get_irn_n_edges(left);
/* load with only one user: don't create LEA */
if(n == 1)
return 0;
}
if (pred_is_specific_nodeblock(block, right, is_ia32_Ld)) {
- n = ia32_get_irn_n_edges(right);
+ n = ia32_get_irn_n_edges(right);
if(n == 1)
return 0;
}
#endif
+ (void) irn;
return 1;
}
* @param h The height information of the irg
* @param block The block the Loads must/mustnot be in
* @param irn The irn to check
- * return 0 if irn is no candidate, 1 if left load can be used, 2 if right one, 3 for both
+ * @return 0 if irn is no candidate, 1 if left load can be used, 2 if right one, 3 for both
*/
-static ia32_am_cand_t is_am_candidate(ia32_code_gen_t *cg, heights_t *h, const ir_node *block, ir_node *irn) {
+static ia32_am_cand_t is_am_candidate(heights_t *h, const ir_node *block, ir_node *irn) {
ir_node *in, *load, *other, *left, *right;
int is_cand = 0, cand;
- int arity;
+ int arity;
+ int is_binary;
- if (is_ia32_Ld(irn) || is_ia32_St(irn) || is_ia32_Store8Bit(irn) || is_ia32_vfild(irn) || is_ia32_vfist(irn) ||
- is_ia32_GetST0(irn) || is_ia32_SetST0(irn) || is_ia32_xStoreSimple(irn))
+ if (is_ia32_Ld(irn) || is_ia32_St(irn) ||
+ is_ia32_vfild(irn) || is_ia32_vfist(irn) ||
+ is_ia32_xStoreSimple(irn))
return 0;
if(get_ia32_frame_ent(irn) != NULL)
return IA32_AM_CAND_NONE;
- left = get_irn_n(irn, 2);
- arity = get_irn_arity(irn);
- assert(arity == 5 || arity == 4);
- if(arity == 5) {
+ left = get_irn_n(irn, 2);
+ arity = get_irn_arity(irn);
+ is_binary = get_ia32_am_arity(irn) == ia32_am_binary;
+ if(is_binary) {
/* binary op */
right = get_irn_n(irn, 3);
} else {
+ assert(get_ia32_am_arity(irn) == ia32_am_unary);
/* unary op */
right = left;
}
/* 8bit Loads are not supported (for binary ops),
* they cannot be used with every register */
- if (get_irn_arity(irn) != 4 && get_mode_size_bits(get_ia32_ls_mode(load)) < 16) {
- assert(get_irn_arity(irn) == 5);
+ if (get_ia32_am_arity(irn) == ia32_am_binary &&
+ get_mode_size_bits(get_ia32_ls_mode(load)) < 16) {
is_cand = 0;
}
/* If there is a data dependency of other irn from load: cannot use AM */
- if (is_cand && get_nodes_block(other) == block) {
+ if (is_cand && is_binary && get_nodes_block(other) == block) {
other = skip_Proj(other);
is_cand = heights_reachable_in_block(h, other, load) ? 0 : is_cand;
/* this could happen in loops */
other = left;
/* 8bit Loads are not supported, they cannot be used with every register */
- if (get_mode_size_bits(get_ia32_ls_mode(load)) < 16)
+ /* 8bit Loads are not supported (for binary ops),
+ * they cannot be used with every register */
+ if (get_ia32_am_arity(irn) == ia32_am_binary &&
+ get_mode_size_bits(get_ia32_ls_mode(load)) < 16) {
is_cand = 0;
+ }
/* If there is a data dependency of other irn from load: cannot use load */
- if (is_cand && get_nodes_block(other) == block) {
+ if (is_cand && is_binary && get_nodes_block(other) == block) {
other = skip_Proj(other);
is_cand = heights_reachable_in_block(h, other, load) ? 0 : is_cand;
/* this could happen in loops */
* all it's Projs are removed as well.
* @param irn The irn to be removed from schedule
*/
-static INLINE void try_remove_from_sched(ir_node *node) {
- int i, arity;
-
+static INLINE void try_kill(ir_node *node)
+{
if(get_irn_mode(node) == mode_T) {
const ir_edge_t *edge, *next;
foreach_out_edge_safe(node, edge, next) {
ir_node *proj = get_edge_src_irn(edge);
- try_remove_from_sched(proj);
+ try_kill(proj);
}
}
sched_remove(node);
}
- arity = get_irn_arity(node);
- for(i = 0; i < arity; ++i) {
- set_irn_n(node, i, new_Bad());
- }
+ be_kill_node(node);
}
/**
/* check for SHL 1,2,3 */
if (pred_is_specific_node(temp, is_ia32_Shl)) {
+ ir_node *right = get_irn_n(temp, n_ia32_Shl_right);
- if (is_ia32_ImmConst(temp)) {
- long shiftval = get_tarval_long(get_ia32_Immop_tarval(temp));
+ if (is_ia32_Immediate(right)) {
+ const ia32_immediate_attr_t *attr
+ = get_ia32_immediate_attr_const(right);
+ long shiftval = attr->offset;
if (shiftval <= 3) {
index = get_irn_n(temp, 2);
try_add_to_sched(irn, res);
/* exchange the old op with the new LEA */
- try_remove_from_sched(irn);
+ try_kill(irn);
exchange(irn, res);
/* we will exchange it, report here before the Proj is created */
if (shift && lea && lea_o) {
- try_remove_from_sched(shift);
- try_remove_from_sched(lea);
- try_remove_from_sched(lea_o);
+ try_kill(shift);
+ try_kill(lea);
+ try_kill(lea_o);
DBG_OPT_LEA4(irn, lea_o, lea, shift, res);
} else if (shift && lea) {
- try_remove_from_sched(shift);
- try_remove_from_sched(lea);
+ try_kill(shift);
+ try_kill(lea);
DBG_OPT_LEA3(irn, lea, shift, res);
} else if (shift && lea_o) {
- try_remove_from_sched(shift);
- try_remove_from_sched(lea_o);
+ try_kill(shift);
+ try_kill(lea_o);
DBG_OPT_LEA3(irn, lea_o, shift, res);
} else if (lea && lea_o) {
- try_remove_from_sched(lea);
- try_remove_from_sched(lea_o);
+ try_kill(lea);
+ try_kill(lea_o);
DBG_OPT_LEA3(irn, lea_o, lea, res);
} else if (shift) {
- try_remove_from_sched(shift);
+ try_kill(shift);
DBG_OPT_LEA2(irn, shift, res);
} else if (lea) {
- try_remove_from_sched(lea);
+ try_kill(lea);
DBG_OPT_LEA2(irn, lea, res);
} else if (lea_o) {
- try_remove_from_sched(lea_o);
+ try_kill(lea_o);
DBG_OPT_LEA2(irn, lea_o, res);
} else {
DBG_OPT_LEA1(irn, res);
set_irn_n(irn, 0, get_irn_n(lea, 0));
set_irn_n(irn, 1, get_irn_n(lea, 1));
- try_remove_from_sched(lea);
+ try_kill(lea);
/* clear remat flag */
set_ia32_flags(irn, get_ia32_flags(irn) & ~arch_irn_flags_rematerializable);
* Sets new_right index of irn to right and new_left index to left.
* Also exchange left and right
*/
-static void exchange_left_right(ir_node *irn, ir_node **left, ir_node **right, int new_left, int new_right) {
+static void exchange_left_right(ir_node *irn, ir_node **left, ir_node **right,
+ int new_left, int new_right)
+{
ir_node *temp;
+ assert(is_ia32_commutative(irn));
+
set_irn_n(irn, new_right, *right);
set_irn_n(irn, new_left, *left);
DB((dbg, LEVEL_1, "transformed into %+F\n", res));
else
DB((dbg, LEVEL_1, "not transformed\n"));
- } else if (is_ia32_Ld(irn) || is_ia32_St(irn) || is_ia32_Store8Bit(irn)) {
+ } else if (is_ia32_Ld(irn) || is_ia32_St(irn)) {
/* - Load -> LEA into Load } TODO: If the LEA is used by more than one Load/Store */
/* - Store -> LEA into Store } it might be better to keep the LEA */
ir_node *left = get_irn_n(irn, 0);
foreach_out_edge_safe(left, edge, ne) {
src = get_edge_src_irn(edge);
- if (src && (get_edge_src_pos(edge) == 0) && (is_ia32_Ld(src) || is_ia32_St(src) || is_ia32_Store8Bit(src))) {
+ if (src && (get_edge_src_pos(edge) == 0) && (is_ia32_Ld(src) || is_ia32_St(src))) {
DBG((dbg, LEVEL_1, "\nmerging %+F into %+F\n", left, irn));
if (! is_ia32_got_lea(src))
merge_loadstore_lea(src, left);
}
}
-static void optimize_conv_store(ia32_code_gen_t *cg, ir_node *node)
+static void optimize_conv_store(ir_node *node)
{
ir_node *pred;
ir_mode *conv_mode;
}
}
-static void optimize_load_conv(ia32_code_gen_t *cg, ir_node *node)
+static void optimize_load_conv(ir_node *node)
{
ir_node *pred, *predpred;
ir_mode *load_mode;
if(get_mode_size_bits(conv_mode) < get_mode_size_bits(load_mode))
return;
+ if(get_mode_sign(conv_mode) != get_mode_sign(load_mode)) {
+ /* change the load if it has only 1 user */
+ if(get_irn_n_edges(pred) == 1) {
+ ir_mode *newmode;
+ if(get_mode_sign(conv_mode)) {
+ newmode = find_signed_mode(load_mode);
+ } else {
+ newmode = find_unsigned_mode(load_mode);
+ }
+ assert(newmode != NULL);
+ set_ia32_ls_mode(predpred, newmode);
+ } else {
+ /* otherwise we have to keep the conv */
+ return;
+ }
+ }
+
/* kill the conv */
exchange(node, pred);
}
-static void optimize_conv_conv(ia32_code_gen_t *cg, ir_node *node)
+static void optimize_conv_conv(ir_node *node)
{
- ir_node *pred;
- ir_mode *pred_mode;
- ir_mode *conv_mode;
+ ir_node *pred_proj, *pred, *result_conv;
+ ir_mode *pred_mode, *conv_mode;
if (!is_ia32_Conv_I2I(node) && !is_ia32_Conv_I2I8Bit(node))
return;
- pred = get_irn_n(node, 2);
+ assert(n_ia32_Conv_I2I_val == n_ia32_Conv_I2I8Bit_val);
+ pred_proj = get_irn_n(node, n_ia32_Conv_I2I_val);
+ if(is_Proj(pred_proj))
+ pred = get_Proj_pred(pred_proj);
+ else
+ pred = pred_proj;
+
if(!is_ia32_Conv_I2I(pred) && !is_ia32_Conv_I2I8Bit(pred))
return;
* so we only need the 2nd conv if it shrinks the mode */
conv_mode = get_ia32_ls_mode(node);
pred_mode = get_ia32_ls_mode(pred);
- if(get_mode_size_bits(conv_mode) < get_mode_size_bits(pred_mode))
- return;
+ /* if 2nd conv is smaller then first conv, then we can always take the 2nd
+ * conv */
+ if(get_mode_size_bits(conv_mode) <= get_mode_size_bits(pred_mode)) {
+ if(get_irn_n_edges(pred_proj) == 1) {
+ result_conv = pred_proj;
+ set_ia32_ls_mode(pred, conv_mode);
+ } else {
+ /* TODO: construct syncs/stuff here but we'll probably end up with
+ * 2 statements anyway */
+ if(get_irn_mode(pred) == mode_T) {
+ return;
+ }
+
+ result_conv = exact_copy(pred);
+ set_ia32_ls_mode(result_conv, conv_mode);
+ }
+ } else {
+ /* if both convs have the same sign, then we can take the smaller one */
+ if(get_mode_sign(conv_mode) == get_mode_sign(pred_mode)) {
+ result_conv = pred_proj;
+ } else {
+ /* no optimisation possible if smaller conv is sign-extend */
+ if(mode_is_signed(pred_mode)) {
+ return;
+ }
+ /* we can take the smaller conv if it is unsigned */
+ result_conv = pred_proj;
+ }
+ }
/* kill the conv */
- exchange(node, pred);
+ exchange(node, result_conv);
+
+ if(get_irn_n_edges(pred) == 0) {
+ be_kill_node(pred);
+ }
+ optimize_conv_conv(result_conv);
}
static void optimize_node(ir_node *node, void *env)
{
ia32_code_gen_t *cg = env;
- optimize_load_conv(cg, node);
- optimize_conv_store(cg, node);
- optimize_conv_conv(cg, node);
+ optimize_load_conv(node);
+ optimize_conv_store(node);
+ optimize_conv_conv(node);
optimize_lea(cg, node);
}
ir_node *addr_b, *addr_i;
int need_exchange_on_fail = 0;
ia32_am_type_t am_support;
+ ia32_am_arity_t am_arity;
ia32_am_cand_t cand;
ia32_am_cand_t orig_cand;
int dest_possible;
&dest_out_reg_req_0
};
- if (!is_ia32_irn(irn) || is_ia32_Ld(irn) || is_ia32_St(irn) || is_ia32_Store8Bit(irn))
+ if (!is_ia32_irn(irn) || is_ia32_Ld(irn) || is_ia32_St(irn))
return;
if (is_ia32_Lea(irn))
return;
am_support = get_ia32_am_support(irn);
+ am_arity = get_ia32_am_arity(irn);
block = get_nodes_block(irn);
- /* fold following patterns: */
- /* - op -> Load into AMop with am_Source */
- /* conditions: */
- /* - op is am_Source capable AND */
- /* - the Load is only used by this op AND */
- /* - the Load is in the same block */
- /* - Store -> op -> Load into AMop with am_Dest */
- /* conditions: */
- /* - op is am_Dest capable AND */
- /* - the Store uses the same address as the Load AND */
- /* - the Load is only used by this op AND */
- /* - the Load and Store are in the same block AND */
- /* - nobody else uses the result of the op */
- if (get_ia32_am_support(irn) == ia32_am_None)
+ /* fold following patterns:
+ * - op -> Load into AMop with am_Source
+ * conditions:
+ * - op is am_Source capable AND
+ * - the Load is only used by this op AND
+ * - the Load is in the same block
+ * - Store -> op -> Load into AMop with am_Dest
+ * conditions:
+ * - op is am_Dest capable AND
+ * - the Store uses the same address as the Load AND
+ * - the Load is only used by this op AND
+ * - the Load and Store are in the same block AND
+ * - nobody else uses the result of the op
+ */
+ if (am_support == ia32_am_None)
return;
- cand = is_am_candidate(cg, h, block, irn);
+ assert(am_arity == ia32_am_unary || am_arity == ia32_am_binary);
+
+ cand = is_am_candidate(h, block, irn);
if (cand == IA32_AM_CAND_NONE)
return;
cand & IA32_AM_CAND_LEFT, cand & IA32_AM_CAND_RIGHT));
left = get_irn_n(irn, 2);
- if (get_irn_arity(irn) == 4) {
- /* it's an "unary" operation */
+ if (am_arity == ia32_am_unary) {
+ assert(get_irn_arity(irn) >= 4);
right = left;
assert(cand == IA32_AM_CAND_BOTH);
} else {
+ assert(get_irn_arity(irn) >= 5);
right = get_irn_n(irn, 3);
}
set_ia32_am_sc_sign(irn);
/* connect to Load memory and disconnect Load */
- if (get_irn_arity(irn) == 5) {
+ if (am_arity == ia32_am_binary) {
/* binary AMop */
set_irn_n(irn, 4, get_irn_n(load, 2));
set_irn_n(irn, 2, ia32_get_admissible_noreg(cg, irn, 2));
/* clear remat flag */
set_ia32_flags(irn, get_ia32_flags(irn) & ~arch_irn_flags_rematerializable);
- try_remove_from_sched(store);
- try_remove_from_sched(load);
+ try_kill(store);
+ try_kill(load);
DBG_OPT_AM_D(load, store, irn);
DB((dbg, LEVEL_1, "merged with %+F and %+F into dest AM\n", load, store));
assert(cand & IA32_AM_CAND_RIGHT);
load = get_Proj_pred(right);
+ if(get_irn_n_edges(right) > 1) {
+ source_possible = 0;
+ }
+#if 1
+ /* TODO: this isn't really needed, but the code below is buggy
+ as heights won't get recomputed when the graph is reconstructed
+ so we can only transform loads with the result proj only */
if(get_irn_n_edges(load) > 1) {
source_possible = 0;
}
+#endif
}
if (source_possible) {
ir_mode *ls_mode = get_ia32_ls_mode(load);
- if(get_mode_size_bits(ls_mode) != 32)
+ if(get_mode_size_bits(ls_mode) != 32 || ls_mode == mode_D)
source_possible = 0;
}
if (source_possible) {
+ const ia32_attr_t *attr_load = get_ia32_attr_const(load);
+ ia32_attr_t *attr_irn = get_ia32_attr(irn);
addr_b = get_irn_n(load, 0);
addr_i = get_irn_n(load, 1);
set_ia32_am_flavour(irn, get_ia32_am_flavour(load));
set_ia32_op_type(irn, ia32_AddrModeS);
set_ia32_frame_ent(irn, get_ia32_frame_ent(load));
- set_ia32_ls_mode(irn, get_ia32_ls_mode(load));
+ attr_irn->data.need_64bit_stackent
+ = attr_load->data.need_64bit_stackent;
+ attr_irn->data.need_32bit_stackent
+ = attr_load->data.need_32bit_stackent;
+
+ /* set ls_mode if not already present (conv nodes already have ls_mode
+ set) */
+ if(get_ia32_ls_mode(irn) == NULL) {
+ set_ia32_ls_mode(irn, get_ia32_ls_mode(load));
+ }
set_ia32_am_sc(irn, get_ia32_am_sc(load));
if (is_ia32_am_sc_sign(load))
}
/* connect to Load memory and disconnect Load */
- if (get_irn_arity(irn) == 5) {
+ if (am_arity == ia32_am_binary) {
/* binary AMop */
- set_irn_n(irn, 3, ia32_get_admissible_noreg(cg, irn, 3));
- set_irn_n(irn, 4, get_irn_n(load, 2));
+ right = ia32_get_admissible_noreg(cg, irn, 3);
+ set_irn_n(irn, 3, right);
+ set_irn_n(irn, 4, get_irn_n(load, n_ia32_Load_mem));
} else {
- assert(get_irn_arity(irn) == 4);
/* unary AMop */
- set_irn_n(irn, 2, ia32_get_admissible_noreg(cg, irn, 2));
- set_irn_n(irn, 3, get_irn_n(load, 2));
+ right = ia32_get_admissible_noreg(cg, irn, 2);
+ set_irn_n(irn, 2, right);
+ set_irn_n(irn, 3, get_irn_n(load, n_ia32_Load_mem));
}
DBG_OPT_AM_S(load, irn);
ir_node *res_proj;
ir_mode *mode = get_irn_mode(irn);
- res_proj = new_rd_Proj(get_irn_dbg_info(irn), irg,
- get_nodes_block(irn), new_Unknown(mode_T),
- mode, 0);
- set_irn_mode(irn, mode_T);
- edges_reroute(irn, res_proj, irg);
- set_Proj_pred(res_proj, irn);
-
- set_Proj_pred(mem_proj, irn);
- set_Proj_proj(mem_proj, 1);
+ if(mode != mode_T) {
+ res_proj = new_rd_Proj(get_irn_dbg_info(irn), irg,
+ get_nodes_block(irn),
+ new_Unknown(mode_T), mode, 0);
+ set_irn_mode(irn, mode_T);
+ edges_reroute(irn, res_proj, irg);
+ set_Proj_pred(res_proj, irn);
- if(sched_is_scheduled(irn)) {
- sched_add_after(irn, res_proj);
- sched_add_after(irn, mem_proj);
+ set_Proj_pred(mem_proj, irn);
+ set_Proj_proj(mem_proj, 1);
+ } else {
+ /* hacky: we need some proj number which is not used yet... */
+ set_Proj_proj(mem_proj, -1);
+ set_Proj_pred(mem_proj, irn);
}
}
- if(get_irn_n_edges(load) == 0) {
- try_remove_from_sched(load);
- }
+ try_kill(load);
need_exchange_on_fail = 0;
+ /* immediate are only allowed on the right side */
+ if(is_ia32_Immediate(left)) {
+ exchange_left_right(irn, &left, &right, 3, 2);
+ }
+
DB((dbg, LEVEL_1, "merged with %+F into source AM\n", load));
}