#include "iropt_t.h"
#include "plist.h"
+/* suggested by GVN-PRE authors */
#define MAX_ANTIC_ITER 10
#define MAX_INSERT_ITER 3
-#define PARTLY_ONLY 0
-#define HOIST_HIGH 1
-#define BETTER_GREED 0
-#define LOADS 0
-#define DIVMODS 1
+/* Stops antic iteration from processing endless loops. */
+#define IGNORE_INF_LOOPS 1
+/* Stops antic information to flow over infinite loop backedge */
#define NO_INF_LOOPS 0
+/* Attempt to reduce register pressure and reduce code size
+ for hoisted nodes. */
+#define HOIST_HIGH 1
+#define COMMON_DOM 1
+
+/* Seamless implementation of handling loads and generally memory
+ dependent nodes with GVN-PRE. */
+#define LOADS 1
+#define DIVMODS 0
+
+/* Experimental */
+
+/* Only optimize node directly after phi node if node is only successor. */
+#define MIN_CUT 0
+
+/* GVN is very limited. This enables optimize_node during value recognition.
+ GVN ist still very limited, since a+1+b+1 doesn't equal a+b+2.
+ TODO Broken for yet unknown reasons. */
+#define OPTIMIZE_NODES 0
+
+
/** Additional info we need for every block. */
typedef struct block_info {
ir_valueset_t *exp_gen; /* contains this blocks clean expressions */
ir_valueset_t *avail_out; /* available values at block end */
ir_valueset_t *antic_in; /* clean anticipated values at block entry */
- ir_valueset_t *antic_done; /* keeps elements of antic_in after insert nodes phase*/
+ ir_valueset_t *antic_done; /* keeps elements of antic_in after insert nodes phase */
ir_valueset_t *new_set; /* new by hoisting made available values */
ir_nodehashmap_t *trans; /* contains translated nodes translated into block */
ir_node *avail; /* saves available node for insert node phase */
/**
* A pair of nodes to be exchanged.
- * We have to defer the exchange because our hash-sets cannot
- * find an already replaced node.
+ * We have to defer the exchange because there are still needed references
+ * to certain nodes.
*/
typedef struct elim_pair {
ir_node *old_node; /* node that will be replaced */
unsigned last_idx; /* last node index of input graph */
char changes; /* flag for fixed point iterations - non-zero if changes occurred */
char first_iter; /* non-zero for first fixed point iteration */
+ int iteration; /* iteration counter */
+#if OPTIMIZE_NODES
+ pset *value_table; /* standard value table*/
+ pset *gvnpre_values; /* gvnpre value table */
+#endif
} pre_env;
static pre_env *environ;
+
/* custom GVN value map */
static ir_nodehashmap_t value_map;
int infinite_loops;
} gvnpre_statistics;
-gvnpre_statistics *gvnpre_stats = NULL;
+static gvnpre_statistics *gvnpre_stats = NULL;
-static void init_stats()
+static void init_stats(void)
{
gvnpre_stats = XMALLOCZ(gvnpre_statistics);
}
-static void free_stats()
+static void free_stats(void)
{
free(gvnpre_stats);
gvnpre_stats = NULL;
}
-static void print_stats()
+static void print_stats(void)
{
gvnpre_statistics *stats = gvnpre_stats;
DB((dbg, LEVEL_1, "replaced : %d\n", stats->replaced));
if (is_Phi(a) || is_Phi(b))
return 1;
- //// REM
- assert(! is_Call(a) || is_memop(a));
- assert(! is_Load(a) || is_memop(a));
-
- assert(! is_Call(b) || is_memop(b));
- assert(! is_Load(b) || is_memop(b));
-
- /* memops are not the same, even if we want optimize them
+ /* memops are not the same, even if we want to optimize them
we have to take the order in account */
- if (is_memop(a) || is_memop(b))
- return 1;
-
-#if 0
- if (is_Call(a) || is_Call(b))
- return 1;
-#endif
+ if (is_memop(a) || is_memop(b) ||
+ get_irn_mode(a) == mode_T ||
+ get_irn_mode(b) == mode_T) {
+ /* Loads with the same predecessors are the same value;
+ this should only happen after phi translation. */
+ if ((! is_Load(a) || ! is_Load(b)) && (! is_Store(a) || ! is_Store(b)))
+ return 1;
+ }
if ((get_irn_op(a) != get_irn_op(b)) ||
(get_irn_mode(a) != get_irn_mode(b))) return 1;
if (is_Block(a) || is_Block(b))
return 1;
- /* TODO depends on load optimization */
- if (get_irn_pinned(a) == op_pin_state_pinned) {
- /* for pinned nodes, the block inputs must be equal */
- if (get_irn_n(a, -1) != get_irn_n(b, -1))
- return 1;
- } else {
- /* we need global values independently from their blocks */
- assert(get_opt_global_cse());
- }
+ /* should only be used with gcse enabled */
+ assert(get_opt_global_cse());
/* compare a->in[0..ins] with b->in[0..ins] */
for (i = 0; i < irn_arity_a; ++i) {
in[i] = pred_value;
}
- if (changed) {
+ if (changed && !is_memop(irn) && get_irn_mode(irn) != mode_X) {
/* create representative for */
ir_node *nn = new_ir_node(
get_irn_dbg_info(irn),
in);
copy_node_attr(environ->graph, irn, nn);
+#if OPTIMIZE_NODES
+ /* TODO optimize_node() uses the valuetable and thus the custom
+ identify_cmp() and will fail trying. */
+ environ->graph->value_table = environ->value_table;
+ nn = optimize_node(nn);
+ environ->graph->value_table = environ->gvnpre_values;
+#endif
+
/* now the value can be determined because the
predecessors are the leaders */
value = identify_remember(nn);
return value;
}
-/** When the value map has been built we may lookup expressions
- * and remember them if new.
+/**
+ * When the value map has been built we may lookup expressions
+ * and remember them if new.
*/
static ir_node *identify_or_remember(ir_node *irn)
{
ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK);
}
-#if NO_INF_LOOPS
+#if IGNORE_INF_LOOPS || NO_INF_LOOPS
/**
* Returns non-zero if block is part of an infinite loop.
*/
* --------------------------------------------------------
*/
-/**
- * Helper function to get the anti leader of node in block.
- */
-static ir_node *get_anti_leader(ir_node *node, ir_node *block)
-{
- block_info *info = get_block_info(block);
- ir_node *value = identify(node);
- ir_node *leader = ir_valueset_lookup(info->antic_in, value);
-
- if (leader)
- return leader;
- else
- /* this applies in the context of this algorithm */
- return node;
-}
-
/**
* Returns non-zero if a node is movable and a possible candidate for PRE.
*/
#if LOADS
if (is_Load(n))
- return (get_Load_volatility(n) == volatility_non_volatile);
+ return get_Load_volatility(n) == volatility_non_volatile;
+ if (is_Store(n))
+ return get_Store_volatility(n) == volatility_non_volatile;
#endif
if (get_irn_pinned(n) == op_pin_state_pinned)
if (! is_nice_value(n))
return 0;
+#if LOADS
+ /* filter loads with no phi predecessor from antic_in */
+ if (is_Load(n) && ! is_Phi(get_Load_mem(n)))
+ return 0;
+ if (is_Store(n) && ! is_Phi(get_Store_mem(n)))
+ return 0;
+#endif
+
+#if DIVMODS
+ if (is_Div(n)) {
+ ir_node *mem = get_Div_mem(n);
+
+ mem = skip_Pin(mem);
+
+ if (! is_Phi(mem) && ! is_NoMem(mem))
+ return 0;
+ }
+
+ if (is_Mod(n) && ! is_Phi(get_Mod_mem(n)))
+ return 0;
+#endif
+
arity = get_irn_arity(n);
for (i = 0; i < arity; ++i) {
ir_node *pred = get_irn_n(n, i);
if (is_Block(irn))
return;
- if (! is_nice_value(irn))
- return;
+#if OPTIMIZE_NODES
+ environ->graph->value_table = environ->value_table;
+ identify_remember(irn);
+ environ->graph->value_table = environ->gvnpre_values;
+#endif
/* GVN step: remember the value. */
value = remember(irn);
block = get_nodes_block(irn);
info = get_block_info(block);
- ir_valueset_insert(info->avail_out, value, irn);
+ if (get_irn_mode(irn) != mode_X)
+ ir_valueset_insert(info->avail_out, value, irn);
+
+ /* values that are not in antic_in also dont't need to be in any other set */
+
+ if (! is_nice_value(irn))
+ return;
if (is_clean_in_block(irn, block, info->exp_gen)) {
DB((dbg, LEVEL_3, "%+F clean in block %+F\n", irn, block));
ir_valueset_insert(info->exp_gen, value, irn);
-#if 0
- /* flag irn as clean*/
- set_irn_link(irn, irn);
- } else {
- /* flag irn as not clean */
- set_irn_link(irn, NULL);
-#endif
}
}
*/
static ir_node *get_translated(ir_node *block, ir_node *node)
{
- ir_node *trans;
if (is_irn_constlike(node))
return node;
- trans = ir_nodehashmap_get(ir_node, get_block_info(block)->trans, node);
- if (trans == NULL)
- return node;
- else
- return trans;
-}
-
-static ir_node *get_translated_pred(ir_node *block, int pos, ir_node *node)
-{
- ir_node *pred_block;
- if (is_irn_constlike(node))
- return node;
-
- pred_block = get_Block_cfgpred_block(block, pos);
- return ir_nodehashmap_get(ir_node, get_block_info(pred_block)->trans, node);
+ return ir_nodehashmap_get(ir_node, get_block_info(block)->trans, node);
}
/**
ir_nodehashmap_insert(map, node, trans);
}
-#if 0
-/**
- * Checks if node is hoistable into block.
- *
- * A clean node in block block can be hoisted above block succ.
- * A node is not clean if its representative is killed in block block.
- * The node can still be hoisted into block block.
- *
- * @param n the node to be checked
- * @param block the block to be hoisted into
- * @returns block which node can be hoisted above
- */
-static unsigned is_hoistable_above(ir_node *node, ir_node *block, unsigned translated)
-{
- int i;
- int arity = get_irn_arity(node);
-
- /* make sure that node can be hoisted above block */
-
- if (is_irn_constlike(node))
- return 1;
-
- for (i = 0; i < arity; ++i) {
- block_info *info = get_block_info(block);
- ir_node *pred = get_irn_n(node, i);
- ir_node *pred_value = identify(pred);
- ir_node *pred_block = get_nodes_block(pred);
-
- /* predecessor strictly dominating */
- if (block_strictly_dominates(pred_block, block))
- continue;
-
- /* if we didn't translate the exact representative we cannot translate */
- if (translated && !get_translated(pred_block, pred))
- return 0;
-
- if (! ir_valueset_lookup(info->antic_in, pred_value))
- return 0;
- }
- return 1;
-}
-#endif
-
-#if LOADS || DIVMODS
-/* Helper function to compare the values of pred and avail_pred. */
-static unsigned match_pred(ir_node *pred, ir_node *avail_pred, ir_node *block, int pos)
-{
- ir_node *avail_value = identify(avail_pred);
- ir_node *trans_pred = get_translated_pred(block, pos, pred);
- ir_node *value;
-
- if (trans_pred == NULL)
- trans_pred = pred;
- value = identify(trans_pred);
-
- DB((dbg, LEVEL_3, "manual compare %+F %+F\n", pred, avail_pred));
-
- return (value == avail_value);
-}
-#endif
-
-#if LOADS
-/**
- * Does phi translation for redundant load nodes only.
- * Returns NULL for non-redundant loads, which need to be phi translated.
- * Loads are compared by comparing their pointer values,
- * and assuring that they are adjacent.
- * This is equivalent to what phi_translation does.
- */
-static ir_node *phi_translate_load(ir_node *load, ir_node *block, int pos)
-{
- ir_node *mem = get_Load_mem(load);
- ir_node *trans = get_translated_pred(block, pos, mem);
-
- if (trans == NULL)
- trans = mem;
-
- /* no partial redundancy if this is a mode_M phi */
- if (is_Proj(trans)) {
- /* The last memory operation in predecessor block */
- ir_node *avail_load = get_Proj_pred(trans);
-
- /* memop is a load with matching type */
- if (is_Load(avail_load) &&
- get_Load_mode(load) == get_Load_mode(avail_load)) {
-
- unsigned match = match_pred(get_Load_ptr(load), get_Load_ptr(avail_load), block, pos);
-
- if (match)
- return avail_load;
- }
- }
- return NULL;
-}
-#endif
-
-#if DIVMODS
-/**
- * Does phi translation for redundant Div/Mod nodes only.
- * Returns NULL for non-redundant node, which needs to be phi translated.
- */
-static ir_node *phi_translate_divmod(ir_node *divmod, ir_node *block, int pos)
-{
- ir_node *mem = get_memop_mem(divmod);
- ir_node *trans = get_translated_pred(block, pos, mem);
-
- if (trans == NULL)
- trans = mem;
-
- /* no partial redundancy if this is a mode_M phi */
- if (is_Proj(trans)) {
- /* The last memory operation in predecessor block */
- ir_node *avail_op = get_Proj_pred(trans);
-
- if (get_irn_op(divmod) == get_irn_op(avail_op)) {
- unsigned left, right;
-
- if (is_Div(avail_op)) {
- if (get_Div_resmode(divmod) == get_Div_resmode(avail_op) &&
- get_Div_no_remainder(divmod) == get_Div_no_remainder(avail_op)) {
-
- left = match_pred(get_Div_left(divmod), get_Div_left(avail_op), block, pos);
- right = match_pred(get_Div_right(divmod), get_Div_right(avail_op), block, pos);
-
- if (left && right)
- return avail_op;
- }
- } else if (is_Mod(avail_op)) {
- if (get_Mod_resmode(divmod) == get_Mod_resmode(avail_op)) {
-
- left = match_pred(get_Mod_left(divmod), get_Mod_left(avail_op), block, pos);
- right = match_pred(get_Mod_right(divmod), get_Mod_right(avail_op), block, pos);
-
- if (left && right)
- return avail_op;
- }
- }
- }
- }
- return NULL;
-}
-#endif
-
/**
* Translates an expression above a Phi.
*
*
* @return a node representing the translated value
*/
-static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, ir_node *pred_block)
+static ir_node *phi_translate(ir_node *node, ir_node *block, int pos, ir_valueset_t *leaderset)
{
int i;
int arity;
ir_node **in;
- ir_node *target_block;
+ ir_node *pred_block = get_Block_cfgpred_block(block, pos);
ir_node *nn;
- int needed;
+ unsigned needed;
if (is_Phi(node)) {
if (get_nodes_block(node) == block)
}
arity = get_irn_arity(node);
-#if LOADS
- if (is_Load(node)) {
- ir_node *avail_load = phi_translate_load(node, block, pos);
- if (avail_load)
- return avail_load;
- }
-#endif
-
-#if DIVMODS
- if (is_Div(node) || is_Mod(node)) {
- ir_node *avail_op = phi_translate_divmod(node, block, pos);
- if (avail_op)
- return avail_op;
- }
-#endif
-
needed = 0;
- in = XMALLOCN(ir_node *, arity);
+ in = ALLOCANZ(ir_node *, arity);
/* A value has several representatives. The anti leader is chosen to be
the main representative. If we access a node as representative of a
value we always use the anti leader. The anti leader can be found by
antic_in(identify(node)). */
for (i = 0; i < arity; ++i) {
- /* get anti leader for pred to lookup its translated value */
- ir_node *pred = get_irn_n(node, i);
- ir_node *anti_leader = get_anti_leader(pred, block);
+ ir_node *pred = get_irn_n(node, i);
+ ir_node *value = identify(pred);
+ /* get leader for pred to lookup its translated value */
+ ir_node *leader = ir_valueset_lookup(leaderset, value);
+ ir_node *pred_trans;
+ ir_node *new_pred;
+
+ if (! leader)
+ leader = pred;
+
/* we cannot find this value in antic_in, because the value
has (possibly) changed! */
- ir_node *pred_trans = get_translated(pred_block, anti_leader);
- ir_node *expr;
+ pred_trans = get_translated(pred_block, leader);
+
+
+#if DIVMODS
+ if (is_Div(node)) {
+ ir_node *mem = get_Div_mem(node);
+
+ mem = skip_Pin(mem);
+
+ if (! is_Phi(mem))
+ pred_trans = get_Div_mem(node);
+ }
+#endif
+ DB((dbg, LEVEL_3, "trans %+F of %+F is %+F\n", leader, pred_block, pred_trans));
if (pred_trans == NULL) {
- expr = pred;
+ new_pred = pred;
} else {
- expr = pred_trans;
- /* predecessor value changed, so translation is needed */
- if (identify(expr) != identify(pred))
+ new_pred = pred_trans;
+
+ /* loads: Predecessor is a memory phi, which translated yields a proj or
+ another phi. In case of projection and a load predecessor,
+ skip them and use the loads memory. */
+ if (is_Proj(pred_trans) && get_irn_mode(pred_trans) == mode_M) {
+#if LOADS || DIVMODS
+ ir_node *loadstore = get_Proj_pred(pred_trans);
+ /* If we do not translate this node, we will get its value wrong. */
needed |= 1;
+
+ if (is_Load(loadstore)) {
+ /* Put new load under the adjacent loads memory edge
+ such that GVN may compare them. */
+ new_pred = get_Load_mem(loadstore);
+ } else if (is_Store(loadstore)) {
+ new_pred = get_Store_mem(loadstore);
+ }
+#endif
+ } else {
+ /* predecessor value changed, so translation is needed */
+ if (identify(new_pred) != identify(pred))
+ needed |= 1;
+ }
}
- DB((dbg, LEVEL_3, "in %+F\n", expr));
- in[i] = expr;
+ DB((dbg, LEVEL_4, "in %+F\n", new_pred));
+ in[i] = new_pred;
}
if (! needed)
DB((dbg, LEVEL_3, "Translate\n"));
- target_block = get_Block_cfgpred_block(block, pos);
if (is_Proj(node))
- target_block = get_nodes_block(in[0]);
+ pred_block = get_nodes_block(in[0]);
/* copy node to represent the new value.
We do not translate nodes that do not need translation,
so we use the newly created nodes as value representatives only.
Their block is not important, because we create new ones during
- insert node phase. */
+ the insert node phase. */
nn = new_ir_node(
get_irn_dbg_info(node),
environ->graph,
- target_block,
+ pred_block,
get_irn_op(node),
get_irn_mode(node),
arity,
in);
- free(in);
/* We need the attribute copy here, because the Hash value of a
node might depend on it. */
copy_node_attr(environ->graph, node, nn);
/* add exp_gen */
if (env->first_iter) {
-#if NO_INF_LOOPS
+#if IGNORE_INF_LOOPS
/* keep antic_in of infinite loops empty */
if (! is_in_infinite_loop(block)) {
foreach_valueset(info->exp_gen, value, expr, iter) {
#endif
}
- // do exp_gen first, because we nee to know which values to kill of with tmpgen.
-
/* successor might have phi nodes */
if (n_succ == 1 && get_irn_arity(get_Block_cfg_out(block, 0)) > 1) {
succ = get_Block_cfg_out(block, 0);
}
foreach_valueset(succ_info->antic_in, value, expr, iter) {
- ir_node *trans = phi_translate(expr, succ, pos, block);
- /* create new value if necessary */
- ir_node *trans_value = identify_or_remember(trans);
+ ir_node *trans = get_translated(block, expr);
+ ir_node *trans_value;
ir_node *represent;
+ if (trans == NULL)
+ trans = phi_translate(expr, succ, pos, get_block_info(succ)->antic_in);
+
+ /* create new value if necessary */
+ trans_value = identify_or_remember(trans);
+
DB((dbg, LEVEL_3, "Translate %+F %+F to %d = %+F (%+F)\n", expr, succ, pos, trans, trans_value));
- /* on value change (phi present) we need the translated node
+ /* On value change (phi present) we need the translated node
to represent the new value for possible further translation. */
if (value != trans_value)
represent = trans;
represent = expr;
if (is_clean_in_block(expr, block, info->antic_in)) {
+#if NO_INF_LOOPS
+ /* Prevent information flow over the backedge of endless loops. */
+ if (env->iteration <= 2 || (is_backedge(succ, pos) && !is_in_infinite_loop(succ))) {
ir_valueset_replace(info->antic_in, trans_value, represent);
+ }
+#else
+ ir_valueset_replace(info->antic_in, trans_value, represent);
+#endif
}
set_translated(info->trans, expr, represent);
-#if 0
- if (is_hoistable_above(expr, block, 1))
- ir_valueset_insert(info->antic_in, trans_value, represent);
- set_translated(info->trans, expr, represent);
-#endif
}
} else if (n_succ > 1) {
break;
}
- //if (common && is_hoistable_above(expr, block, 0) && is_clean_in_block(expr, block, info->antic_in))
if (common && is_clean_in_block(expr, block, info->antic_in))
- //ir_valueset_insert(info->antic_in, value, expr);
ir_valueset_replace(info->antic_in, value, expr);
-#if 0
- if (common && is_hoistable_above(expr, block, 0)) {
- ir_valueset_insert(info->antic_in, value, expr);
- DB((dbg, LEVEL_3, "common and clean %+F(%+F) in %+F\n", expr, value, block));
- }
-#endif
}
}
* --------------------------------------------------------
*/
-#if 0
-/**
- * Flags node irn redundant depending on redundant parameter.
- */
-static void flag_redundant(ir_node *irn, unsigned redundant)
-{
- if (redundant) {
- set_irn_link(irn, irn);
- } else {
- set_irn_link(irn, NULL);
- }
-}
-#endif
-
-#if 0
-/*
- * Returns redundant flag of node irn.
- */
-static unsigned is_redundant(ir_node *irn)
-{
- return (get_irn_link(irn) != NULL);
-}
-#endif
-
/**
* Returns a valid mode if the value of expr is a partially redundant value.
*
ir_node *trans_value;
ir_node *avail_expr;
- /* ignore bad blocks. */
- if (is_Bad(pred_block))
- continue;
-
pred_info = get_block_info(pred_block);
trans_expr = get_translated(pred_block, expr);
trans_value = identify(trans_expr);
- avail_expr = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
+ if (is_Const(trans_expr))
+ avail_expr = trans_expr;
+ else
+ avail_expr = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
+
/* value might be available through a not yet existing constant */
if (avail_expr == NULL && is_Const(trans_expr)) {
/* limit range of new constants */
ir_tarval *lower = new_tarval_from_long(-127, cmode);
ir_tarval *c = get_Const_tarval(trans_expr);
- if (tarval_cmp(lower, c) != ir_relation_greater_equal &&
- tarval_cmp(c, upper) != ir_relation_greater_equal) {
- avail_expr = NULL;
- } else {
+ /* tarval within range? */
+ if (tarval_cmp(lower, c) == ir_relation_less_equal &&
+ tarval_cmp(c, upper) == ir_relation_less_equal) {
avail_expr = trans_expr;
+ } else {
+ avail_expr = NULL;
}
}
pred_info->found = 0;
fully_redundant = 0;
} else {
- /* expr is available */
+ /* expr is available, use the leader */
pred_info->avail = avail_expr;
pred_info->found = 1;
mode = get_irn_mode(avail_expr);
if (first_avail == NULL)
first_avail = avail_expr;
else if (first_avail != avail_expr)
- /* Multiple different expressions are available */
+ /* Multiple different expressions are available,
+ This is why we need no cut over avail_out sets. */
fully_redundant = 0;
DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", expr, block, avail_expr, pred_block));
}
}
-#if BETTER_GREED
- /* value is redundant from last iteration,
- but has not been removed from antic_in (is not optimized) */
- if (! environ->first_iter && is_redundant(expr))
- return mode;
-#endif
-
/* If it is not the same value already existing along every predecessor
and it is defined by some predecessor then it is partially redundant. */
if (! partially_redundant || fully_redundant)
block_info *idom_info = get_block_info(idom);
int updated = 0;
- //DEBUG_ONLY(dump_value_set(idom_info->new_set, "[New Set]", idom);)
+ DEBUG_ONLY(dump_value_set(idom_info->new_set, "[New Set]", idom);)
foreach_valueset(idom_info->new_set, value, expr, iter) {
/* inherit new_set from immediate dominator */
ir_valueset_insert(curr_info->new_set, value, expr);
int block_arity = get_irn_arity(block);
int arity = get_irn_arity(irn);
int pos, i;
+ block_info *info = get_block_info(block);
- /* As long as the predecessor values is available in all predecessor blocks,
+ /* As long as the predecessor values are available in all predecessor blocks,
we can hoist this value. */
for (pos = 0; pos < block_arity; ++pos) {
ir_node *pred_block = get_Block_cfgpred_block(block, pos);
block_info *pred_info = get_block_info(pred_block);
for (i = 0; i < arity; ++i) {
- ir_node *pred = get_irn_n(irn, i);
- ir_node *value = identify(pred);
- //ir_node *anti_leader = get_anti_leader(pred, block);
+ ir_node *pred = get_irn_n(irn, i);
+ ir_node *value;
+ ir_node *leader;
+ ir_node *trans;
+ ir_node *trans_val;
+ ir_node *avail;
+
+#if MIN_CUT
+ /* Very conservative min cut. Phi might only have 1 user. */
+ if (is_Phi(pred) && get_irn_n_edges(pred) != 1)
+ return 1;
+#endif
- if (is_irn_constlike(value))
+ if (is_Phi(pred) && get_nodes_block(pred) == block)
continue;
- /* predecessor is on current path we have to ensure redundancy */
- //if (block_dominates(block, get_nodes_block(pred)) && ! is_redundant(anti_leader))
- if (ir_valueset_lookup(pred_info->avail_out, value) == NULL)
+ DB((dbg, LEVEL_3, "pred %+F\n", pred));
+ value = identify(pred);
+ leader = ir_valueset_lookup(info->antic_in, value);
+ if (! leader)
+ leader = pred;
+ DB((dbg, LEVEL_3, "lead %+F\n", leader));
+ trans = get_translated(pred_block, leader);
+ if (! trans)
+ trans = pred;
+ DB((dbg, LEVEL_3, "trans %+F\n", trans));
+
+ trans_val = identify(trans);
+ DB((dbg, LEVEL_3, "value %+F\n", trans_val));
+
+ if (is_Const(trans_val) || is_SymConst(trans_val)) {
+ /* existing constant */
+ if (get_irn_idx(trans_val) < environ->last_idx) {
+ continue;
+ } else {
+ /* limit range of new constants */
+ ir_mode *cmode = get_irn_mode(trans);
+ ir_tarval *upper = new_tarval_from_long(128, cmode);
+ ir_tarval *lower = new_tarval_from_long(-128, cmode);
+ ir_tarval *c = get_Const_tarval(trans);
+
+ /* tarval within range? */
+ if (tarval_cmp(lower, c) == ir_relation_less &&
+ tarval_cmp(c, upper) == ir_relation_less) {
+ continue;
+ } else {
+ return 1;
+ }
+ }
+ }
+
+ /* */
+ if (is_irn_constlike(trans_val))
+ continue;
+
+ avail = ir_valueset_lookup(pred_info->avail_out, trans_val);
+
+ DB((dbg, LEVEL_3, "avail %+F\n", avail));
+ if (! avail)
+ return 1;
+#if MIN_CUT
+ /* only optimize if predecessors have been optimized */
+ if (ir_valueset_lookup(info->antic_done, value) == NULL)
return 1;
+#endif
}
}
return 0;
ir_node *idom;
int pos;
ir_valueset_iterator_t iter;
-#if BETTER_GREED
- plist_t *stack;
-#endif
/* only blocks */
if (! is_Block(block))
return;
}
-#if BETTER_GREED
- stack = plist_new();
- foreach_valueset(info->antic_in, value, expr, iter) {
- /* inverse topologic */
- plist_insert_front(stack, expr);
- }
-#endif
-
/* This is the main reason antic_in is preverred over antic_out;
we may iterate over every anticipated value first and not
over the predecessor blocks. */
continue;
/* filter phi nodes from antic_in */
- if (is_Phi(expr)) {
-#if 0
- flag_redundant(expr, 1);
-#endif
+ if (is_Phi(expr))
continue;
- }
DB((dbg, LEVEL_2, "Insert for %+F (value %+F) in %+F\n", expr, value, block));
DB((dbg, LEVEL_2, "Fully redundant expr %+F value %+F\n", expr, value));
DEBUG_ONLY(inc_stats(gvnpre_stats->fully);)
-#if 0
- flag_redundant(expr, 1);
-#endif
+ ir_valueset_insert(info->antic_done, value, expr);
continue;
}
-#if !BETTER_GREED
if (is_hoisting_greedy(expr, block)) {
- DB((dbg, LEVEL_2, "Greedy\n"));
-#if 0
- flag_redundant(expr, 0);
-#endif
+ DB((dbg, LEVEL_2, "greedy\n"));
continue;
}
-#endif
mode = is_partially_redundant(block, expr, value);
- if (mode == NULL) {
-#if 0
- flag_redundant(expr, 0);
-#endif
+ if (mode == NULL)
continue;
- } else {
-#if 0
- flag_redundant(expr, 1);
-#endif
- }
-
-#if BETTER_GREED
- if (is_hoisting_greedy(expr, block)) {
- DB((dbg, LEVEL_2, "Better greed: greedy\n"));
- continue;
- }
-#endif
#if LOADS || DIVMODS
/* save old mode_M phis to remove keepalive edges later */
inc_stats(gvnpre_stats->first_iter_found);
inc_stats(gvnpre_stats->partially);
}
- if (is_Load(expr))
+ if (is_Load(expr) || is_Store(expr))
inc_stats(gvnpre_stats->loads);
else if (is_Div(expr) || is_Mod(expr))
inc_stats(gvnpre_stats->divmods);
ir_node *pred_block = get_Block_cfgpred_block(block, pos);
block_info *pred_info;
- /* ignore bad blocks. */
- if (is_Bad(pred_block)) {
- ir_graph *irg = get_irn_irg(pred_block);
- phi_in[pos] = new_r_Bad(irg, mode);
- continue;
- }
pred_info = get_block_info(pred_block);
if (! pred_info->found) {
- ir_node *trans = get_translated(pred_block, expr);
- assert(trans);
-
- if (! block_dominates(get_nodes_block(trans), pred_block)) {
- int trans_arity = get_irn_arity(trans);
- int i;
- ir_node **in = XMALLOCN(ir_node *, trans_arity);
- ir_node *nn;
- /* non phi descendants can also be redundant, but have
- not yet been (phi) translated. */
- for (i = 0; i < trans_arity; ++i) {
- ir_node *pred = get_irn_n(trans, i);
- ir_node *avail = ir_valueset_lookup(pred_info->avail_out, identify(pred));
- if (! avail)
- avail = pred;
- /* phi translate uses anti leader, we need the leader */
- in[i] = avail;
+ int i;
+ int node_arity = get_irn_arity(expr);
+ ir_node **in = XMALLOCNZ(ir_node *, node_arity);
+ ir_node *trans;
+ ir_node *new_value, *new_value2;
+ ir_node *target_block = pred_block;
+
+ for (i = 0; i < node_arity; ++i) {
+ ir_node *pred = get_irn_n(expr, i);
+ ir_node *value = identify(pred);
+ ir_node *leader;
+ ir_node *trans;
+ ir_node *trans_val;
+ ir_node *avail;
+
+ /* transform knowledge over the predecessor from
+ anti-leader world into leader world. */
+
+ DB((dbg, LEVEL_3, "pred %+F\n", pred));
+ value = identify(pred);
+
+ /* get leader for pred to lookup its translated value */
+ leader = ir_valueset_lookup(info->antic_in, value);
+ if (! leader)
+ leader = pred;
+ DB((dbg, LEVEL_3, "lead %+F\n", leader));
+
+ trans = get_translated(pred_block, leader);
+ if (!trans)
+ trans = pred;
+ DB((dbg, LEVEL_3, "trans %+F\n", trans));
+
+ /* in case of phi, we are done */
+ if (is_Phi(pred) && get_nodes_block(pred) == block) {
+ in[i] = trans;
+ continue;
+ }
+
+ trans_val = identify(trans);
+ DB((dbg, LEVEL_3, "value %+F\n", trans_val));
+
+ /* constants are always available but not in avail set */
+ if (is_irn_constlike(trans_val)) {
+ in[i] = trans;
+ continue;
}
- nn = new_ir_node(
- get_irn_dbg_info(trans),
- environ->graph,
- pred_block,
- get_irn_op(trans),
- get_irn_mode(trans),
- trans_arity,
- in);
- free(in);
-
- identify_or_remember(nn);
- set_translated(pred_info->trans, expr, nn);
- DB((dbg, LEVEL_3, "Translation during insert: trans %+F\n", nn));
-
- trans = nn;
+
+ /* use the leader
+ In case of loads we need to make sure the hoisted
+ loads are found despite their unique value. */
+ avail = ir_valueset_lookup(pred_info->avail_out, trans_val);
+ DB((dbg, LEVEL_3, "avail %+F\n", avail));
+
+ assert(avail && "predecessor has to be available");
+ in[i] = avail;
}
- DB((dbg, LEVEL_3, "Use new %+F in %+F because expr %+F(%+F) not available\n", trans, pred_block, expr, value));
+ if (is_Proj(expr))
+ target_block = get_nodes_block(in[0]);
+
+ /* Copy node to represent the new value.
+ We use translated nodes as value representatives only.
+ They have anti leaders as predecessors, not leaders!
+ So we have to create a new node using leaders. */
+ trans = new_ir_node(
+ get_irn_dbg_info(expr),
+ environ->graph,
+ target_block,
+ get_irn_op(expr),
+ get_irn_mode(expr),
+ get_irn_arity(expr),
+ in);
+ free(in);
+ /* We need the attribute copy here, because the Hash value of a
+ node might depend on it. */
+ copy_node_attr(environ->graph, expr, trans);
/* value is now available in target block through trans
insert (not replace) because it has not been available */
- ir_valueset_insert(pred_info->avail_out, value, trans);
+ new_value = identify_or_remember(trans);
+ ir_valueset_insert(pred_info->avail_out, new_value, trans);
+ DB((dbg, LEVEL_4, "avail%+F+= trans %+F(%+F)\n", pred_block, trans, new_value));
+
+ new_value2 = identify(get_translated(pred_block, expr));
+ ir_valueset_insert(pred_info->avail_out, new_value2, trans);
+ DB((dbg, LEVEL_4, "avail%+F+= trans %+F(%+F)\n", pred_block, trans, new_value2));
- // REMOVe
- DEBUG_ONLY(dump_value_set(pred_info->antic_in, "Antic_in", block);)
+ DB((dbg, LEVEL_3, "Use new %+F in %+F because %+F(%+F) not available\n", trans, pred_block, expr, value));
phi_in[pos] = trans;
} else {
phi = new_r_Phi(block, arity, phi_in, mode);
DB((dbg, LEVEL_3, "New %+F for redundant %+F created\n", phi, expr));
- /* this value is now available through the new phi */
+ /* This value is now available through the new phi.
+ insert || replace in avail_out */
ir_valueset_replace(info->avail_out, value, phi);
ir_valueset_insert(info->new_set, value, phi);
}
free(phi_in);
-#if 0
- /* remove from antic_in, because expr is not anticipated
- anymore in this block */
- ir_valueset_remove_iterator(info->antic_in, &iter);
-#endif
+ /* already optimized this value in this block */
ir_valueset_insert(info->antic_done, value, expr);
-
env->changes |= 1;
}
-
-#if BETTER_GREED
- if (env->changes) {
- plist_element_t *it;
- /* iterate in inverse topological order */
- foreach_plist(stack, it) {
- ir_node *irn = (ir_node *)plist_element_get_value(it);
- ir_node *block = get_nodes_block(irn);
- int j;
- char redundant = 1;
-
- /* does irn only have redundant successors? */
-
- if (! get_irn_outs_computed(irn))
- continue;
-
- for (j = get_irn_n_outs(irn) - 1; j >= 0; --j) {
- ir_node *succ = get_irn_out(irn, j);
-
- /* if succ and irn are in the same block */
- if (get_nodes_block(succ) == block && is_redundant(succ)) {
- continue;
- } else {
- redundant = 0;
- break;
- }
- }
-
- if (redundant)
- flag_redundant(irn, 1);
- }
- }
- plist_free(stack);
-#endif
-
}
#if HOIST_HIGH
}
/**
- * Dom tree block walker to insert nodes into the highest
- * possible block where their .
- *
+ * Domtree block walker to insert nodes with dying operands
+ * into the highest possible block whilst still being anticipated.
*/
static void hoist_high(ir_node *block, void *ctx)
{
foreach_valueset(curr_info->antic_done, value, expr, iter) {
int pos;
+ /* TODO currently we cannot handle load and their projections */
if (is_memop(expr) || is_Proj(expr))
continue;
- DB((dbg, LEVEL_2, "leader %+F value %+F\n", expr, value));
+ DB((dbg, LEVEL_4, "leader %+F value %+F\n", expr, value));
/* visit hoisted expressions */
for (pos = 0; pos < arity; ++pos) {
continue;
avail_arity = get_irn_arity(avail);
-
value = identify(avail);
/* anticipation border */
new_target = NULL;
nest_depth = get_loop_depth(get_irn_loop(target));
+
+ /* Either push the hoisted nodes up their path,
+ or try to put them directly into their common dominator. */
+#if COMMON_DOM
+ /* By using block (instead of target) as initial block,
+ we only allow hoisting into a common block of
+ both predecessor blocks. */
+ dom = block;
+#else
dom = target;
+#endif
- while (dom && dom != environ->start_block) {
+ while (dom && dom != get_Block_idom(block)) {
dom = get_Block_idom(dom);
dom_info = get_block_info(dom);
- DB((dbg, LEVEL_2, "testing dom %+F\n", dom));
+ DB((dbg, LEVEL_4, "testing dom %+F\n", dom));
- /* TODO antic_in means hoistable above block,
- but we need 'hoistable into block'. */
+ /* TODO Being in antic_in means hoistable above block,
+ but we need 'hoistable into block'.
+ This could be achieved by a flag for each valueset pair,
+ being set during antic computation. */
/* check if available node ist still anticipated and clean */
if (! ir_valueset_lookup(dom_info->antic_in, value)) {
- DB((dbg, LEVEL_2, "%+F not antic in %+F\n", value, dom));
+ DB((dbg, LEVEL_4, "%+F not antic in %+F\n", value, dom));
break;
}
nest_depth = get_loop_depth(get_irn_loop(dom));
/* do not hoist into loops */
- if (! get_loop_depth(get_irn_loop(dom)) <= nest_depth) {
- DB((dbg, LEVEL_2, "%+F deeper nested\n", dom));
+ if (get_loop_depth(get_irn_loop(dom)) > nest_depth) {
+ DB((dbg, LEVEL_4, "%+F deeper nested\n", dom));
/* not a suitable location */
continue;
}
+ /* check if operands die */
- /************* check if value dies */
-
- /* check for uses on current path*/
+ /* check for uses on current path */
for (i = 0; i < avail_arity; i++) {
- ir_node *pred = get_irn_n(avail, i);
- ir_node *pred_value = identify(pred);
- int j;
+ ir_node *pred = get_irn_n(avail, i);
+ ir_node *pred_value = identify(pred);
if (dom == NULL)
break;
- DB((dbg, LEVEL_2, "testing pred %+F\n", pred));
+ DB((dbg, LEVEL_4, "testing pred %+F\n", pred));
if (! ir_valueset_lookup(dom_info->avail_out, pred_value)) {
- DB((dbg, LEVEL_2, "%+F deeper nested\n", dom));
- dom = NULL;
- break;
- }
-
- /* TODO this might be a problem as we mainly operate on new nodes */
- if (! get_irn_outs_computed(pred)) {
- DB((dbg, LEVEL_2, "%+F outs not computed\n", pred));
+ DB((dbg, LEVEL_4, "pred %+F not available\n", pred));
dom = NULL;
break;
}
/* check every successor */
- for (j = get_irn_n_outs(pred) - 1; j >= 0; --j) {
- ir_node *succ = get_irn_out(pred, j);
- DB((dbg, LEVEL_2, "testing succ %+F\n", succ));
+ foreach_out_edge(pred, edge) {
+ ir_node *succ = get_edge_src_irn(edge);
+ DB((dbg, LEVEL_4, "testing succ %+F\n", succ));
/* check only successors on current path to end */
if (block_dominates(dom, get_nodes_block(succ))) {
/* Do we have another user than avail?
Then predecessor is not dead after removal of avail. */
if (succ_value != value) {
- DB((dbg, LEVEL_4, "fail: still used in %+F\n", succ));
+ DB((dbg, LEVEL_4, "still used in %+F\n", succ));
dom = NULL;
break;
}
}
if (dom)
new_target = dom;
- }
-
- if (new_target) {
- DB((dbg, LEVEL_2, "Hoisting %+F into %+F\n", avail, new_target));
- set_nodes_block(avail, new_target);
- ir_valueset_insert(dom_info->new_set, value, avail);
- }
-
- }
- }
-#if 0
- /* No new target or does the available node already dominate the new_target? */
- if (new_target) {
- DB((dbg, LEVEL_2, "leader block %+F\n", get_nodes_block(avail)));
- if (! block_strictly_dominates(new_target, get_nodes_block(avail))) {
- DB((dbg, LEVEL_4, "fail: antic border %+F\n", block));
- new_target = NULL;
- } else {
- assert(0);
- }
- assert(0);
- }
-
- DB((dbg, LEVEL_2, "antic border %+F\n", new_target));
+#if COMMON_DOM
+ /* only try common dominator */
+ break;
#endif
+ }
-
-#if 0
- /* only one usage on our path */
+ /* put node into new target block */
if (new_target) {
- /* push the available node up into */
- set_nodes_block(avail, new_target);
+ block_info *target_info = get_block_info(new_target);
+ int nn_arity = get_irn_arity(avail);
+ ir_node **in = XMALLOCN(ir_node *, nn_arity);
+ ir_node *nn;
+ int i;
+ DB((dbg, LEVEL_2, "Hoisting %+F into %+F\n", avail, new_target));
DEBUG_ONLY(inc_stats(gvnpre_stats->hoist_high);)
+
+ for (i = 0; i < nn_arity; ++i) {
+ ir_node *pred = get_irn_n(avail, i);
+ ir_node *avail_pred = ir_valueset_lookup(target_info->avail_out, identify(pred));
+ assert(avail_pred);
+ in[i] = avail_pred;
+ }
+ nn = new_ir_node(
+ get_irn_dbg_info(avail),
+ environ->graph,
+ new_target,
+ get_irn_op(avail),
+ get_irn_mode(avail),
+ nn_arity,
+ in);
+ free(in);
+
+ identify_or_remember(nn);
+ /* TODO Nodes are inserted into a dominating block and should
+ be available from this point on. Currently we do not push
+ the availability information through during the walk. */
+ ir_valueset_insert(target_info->new_set, value, nn);
+ ir_valueset_insert(target_info->avail_out, value, nn);
}
-#endif
+ }
+ }
}
#endif
if (value != NULL) {
ir_node *expr = (ir_node*)ir_valueset_lookup(info->avail_out, value);
+ DB((dbg, LEVEL_3, "Elim %+F(%+F) avail %+F\n", irn, value, expr));
if (expr != NULL && expr != irn) {
elim_pair *p = OALLOC(env->obst, elim_pair);
-#if PARTLY_ONLY
- if (get_irn_idx(expr) <= env->last_idx)
- return;
-#endif
-
p->old_node = irn;
p->new_node = expr;
p->next = env->pairs;
static void eliminate_nodes(elim_pair *pairs, ir_nodeset_t *keeps)
{
elim_pair *p;
- ir_nodeset_iterator_t iter;
- ir_node *m_phi;
ir_node *end = environ->end_node;
for (p = pairs; p != NULL; p = p->next) {
}
} /* eliminate_nodes */
+
/* --------------------------------------------------------
* GVN PRE pass
* --------------------------------------------------------
* Gvn_Pre algorithm.
*
* @param irg the graph
+ * @param env the environment
*/
static void gvn_pre(ir_graph *irg, pre_env *env)
{
antic_iter = 0;
env->first_iter = 1;
+ env->iteration = 1;
/* antic_in passes */
do {
++antic_iter;
irg_walk_blkwise_graph(irg, compute_antic, NULL, env);
env->first_iter = 0;
DB((dbg, LEVEL_2, "----------------------------------------------\n"));
+ env->iteration ++;
} while (env->changes != 0 && antic_iter < MAX_ANTIC_ITER);
DEBUG_ONLY(set_stats(gvnpre_stats->antic_iterations, antic_iter);)
ir_nodeset_init(env->keeps);
insert_iter = 0;
env->first_iter = 1;
- env->last_idx = get_irg_last_idx(irg);
/* compute redundant expressions */
do {
++insert_iter;
DEBUG_ONLY(set_stats(gvnpre_stats->insert_iterations, insert_iter);)
#if HOIST_HIGH
+ /* An attempt to reduce lifetimes by hoisting already hoisted values
+ even higher if their operands die. */
dom_tree_walk_irg(irg, hoist_high, NULL, env);
+ /* update avail_out for elimination */
dom_tree_walk_irg(irg, update_new_set_walker, NULL, env);
#endif
+ /* Deactivate edges to prevent intelligent removal of nodes,
+ or else we will get deleted nodes which we try to exchange. */
+ edges_deactivate(environ->graph);
-
- /* last step: eliminate nodes */
+ /* eliminate nodes */
irg_walk_graph(irg, NULL, eliminate, env);
eliminate_nodes(env->pairs, env->keeps);
optimization_state_t state;
block_info *block_info;
- /* bads and unreachables cause too much trouble with dominance
- dominance
- loop info for endless loop detection
- no critical edges is GVN-PRE precondition
+ /* bads and unreachables cause too much trouble with dominance,
+ loop info for endless loop detection,
+ no critical edges is PRE precondition
*/
assure_irg_properties(irg,
IR_GRAPH_PROPERTY_NO_BADS
FIRM_DBG_REGISTER(dbg, "firm.opt.gvn_pre");
save_optimization_state(&state);
- environ = &env;
-
- /* edges will crash if enabled due to our allocate on other obstack trick */
- edges_deactivate(irg);
ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK | IR_RESOURCE_LOOP_LINK);
+ edges_activate(irg);
+
+ environ = &env;
DEBUG_ONLY(init_stats();)
/* setup environment */
env.end_node = get_irg_end(irg);
env.pairs = NULL;
env.keeps = &keeps;
+ env.last_idx = get_irg_last_idx(irg);
/* Detect and set links of infinite loops to non-zero. */
analyse_loops(irg);
+#if OPTIMIZE_NODES
+ new_identities(irg);
+ env.value_table = irg->value_table;
+ irg->value_table = NULL;
+#endif
+
/* Switch on GCSE. We need it to correctly compute
the value of a node, which is independent from
its block. */
set_opt_global_cse(1);
- /* new_identities */
+ /* new_identities() */
if (irg->value_table != NULL)
del_pset(irg->value_table);
/* initially assumed nodes in pset are 512 */
irg->value_table = new_pset(compare_gvn_identities, 512);
+#if OPTIMIZE_NODES
+ env.gvnpre_values = irg->value_table;
+#endif
/* do GVN-PRE pass */
gvn_pre(irg, &env);
restore_optimization_state(&state);
confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_NONE);
- /* TODO there are optimizations that try to use the existing value_table */
+#if OPTIMIZE_NODES
+ irg->value_table = env.value_table;
+ del_pset(irg->value_table);
+ irg->value_table = env.gvnpre_values;
+#endif
+
+ /* TODO There seem to be optimizations that try to use the existing
+ value_table. */
new_identities(irg);
+
+ /* TODO assure nothing else breaks. */
+ set_opt_global_cse(0);
+ edges_activate(irg);
}
/* Creates an ir_graph pass for do_gvn_pre. */