#include "irpass.h"
#include "valueset.h"
#include "irloop.h"
+#include "firmstat_t.h"
#include "irgraph_t.h"
#include "irnode_t.h"
#include "iropt_t.h"
#include "plist.h"
+/* suggested by GVN-PRE authors */
#define MAX_ANTIC_ITER 10
#define MAX_INSERT_ITER 3
-#define PARTLY_ONLY 0
-#define HOIST_HIGH 0
-#define BETTER_GREED 0
+/* Stops antic iteration from processing endless loops. */
+#define IGNORE_INF_LOOPS 1
+/* Stops antic information to flow over infinite loop backedge */
+#define NO_INF_LOOPS 0
+
+/* Attempt to reduce register pressure and reduce code size
+ for hoisted nodes. */
+#define HOIST_HIGH 1
+#define COMMON_DOM 1
+
+/* Seamless implementation of handling loads and generally memory
+ dependent nodes with GVN-PRE. */
#define LOADS 1
#define DIVMODS 0
-#define NO_INF_LOOPS 0
-#define NO_INF_LOOPS2 0
+
+/* Experimental */
+
+/* Only optimize node directly after phi node if node is only successor. */
+#define MIN_CUT 0
+
+/* GVN is very limited. This enables optimize_node during value recognition.
+ GVN ist still very limited, since a+1+b+1 doesn't equal a+b+2.
+ TODO Broken for yet unknown reasons. */
+#define OPTIMIZE_NODES 0
+
/** Additional info we need for every block. */
typedef struct block_info {
ir_valueset_t *exp_gen; /* contains this blocks clean expressions */
ir_valueset_t *avail_out; /* available values at block end */
ir_valueset_t *antic_in; /* clean anticipated values at block entry */
- ir_valueset_t *antic_done; /* keeps elements of antic_in after insert nodes phase*/
+ ir_valueset_t *antic_done; /* keeps elements of antic_in after insert nodes phase */
ir_valueset_t *new_set; /* new by hoisting made available values */
ir_nodehashmap_t *trans; /* contains translated nodes translated into block */
ir_node *avail; /* saves available node for insert node phase */
/**
* A pair of nodes to be exchanged.
- * We have to defer the exchange because our hash-sets cannot
- * find an already replaced node.
+ * We have to defer the exchange because there are still needed references
+ * to certain nodes.
*/
typedef struct elim_pair {
ir_node *old_node; /* node that will be replaced */
unsigned last_idx; /* last node index of input graph */
char changes; /* flag for fixed point iterations - non-zero if changes occurred */
char first_iter; /* non-zero for first fixed point iteration */
+ int iteration; /* iteration counter */
+#if OPTIMIZE_NODES
+ pset *value_table; /* standard value table*/
+ pset *gvnpre_values; /* gvnpre value table */
+#endif
} pre_env;
static pre_env *environ;
+
/* custom GVN value map */
static ir_nodehashmap_t value_map;
int infinite_loops;
} gvnpre_statistics;
-gvnpre_statistics *gvnpre_stats = NULL;
+static gvnpre_statistics *gvnpre_stats = NULL;
-static void init_stats()
+static void init_stats(void)
{
gvnpre_stats = XMALLOCZ(gvnpre_statistics);
}
-static void free_stats()
+static void free_stats(void)
{
free(gvnpre_stats);
gvnpre_stats = NULL;
}
-static void print_stats()
+static void print_stats(void)
{
gvnpre_statistics *stats = gvnpre_stats;
DB((dbg, LEVEL_1, "replaced : %d\n", stats->replaced));
if (is_Phi(a) || is_Phi(b))
return 1;
- /* memops are not the same, even if we want optimize them
+ /* memops are not the same, even if we want to optimize them
we have to take the order in account */
- if (is_memop(a) || is_memop(b)) {
+ if (is_memop(a) || is_memop(b) ||
+ get_irn_mode(a) == mode_T ||
+ get_irn_mode(b) == mode_T) {
/* Loads with the same predecessors are the same value;
this should only happen after phi translation. */
- if (! is_Load(a) || ! is_Load(b))
+ if ((! is_Load(a) || ! is_Load(b)) && (! is_Store(a) || ! is_Store(b)))
return 1;
}
in[i] = pred_value;
}
- /* do not create new value for loads or else it will not
- be found in avail_out during insert phase */
- if (changed && ! is_Load(irn)) {
+ if (changed && !is_memop(irn) && get_irn_mode(irn) != mode_X) {
/* create representative for */
ir_node *nn = new_ir_node(
get_irn_dbg_info(irn),
in);
copy_node_attr(environ->graph, irn, nn);
+#if OPTIMIZE_NODES
+ /* TODO optimize_node() uses the valuetable and thus the custom
+ identify_cmp() and will fail trying. */
+ environ->graph->value_table = environ->value_table;
+ nn = optimize_node(nn);
+ environ->graph->value_table = environ->gvnpre_values;
+#endif
+
/* now the value can be determined because the
predecessors are the leaders */
value = identify_remember(nn);
return value;
}
-/** When the value map has been built we may lookup expressions
- * and remember them if new.
+/**
+ * When the value map has been built we may lookup expressions
+ * and remember them if new.
*/
static ir_node *identify_or_remember(ir_node *irn)
{
ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK);
}
-#if NO_INF_LOOPS || NO_INF_LOOPS2
+#if IGNORE_INF_LOOPS || NO_INF_LOOPS
/**
* Returns non-zero if block is part of an infinite loop.
*/
#if LOADS
if (is_Load(n))
return get_Load_volatility(n) == volatility_non_volatile;
+ if (is_Store(n))
+ return get_Store_volatility(n) == volatility_non_volatile;
#endif
if (get_irn_pinned(n) == op_pin_state_pinned)
if (! is_nice_value(n))
return 0;
- /* filter loads from antic_in */
+#if LOADS
+ /* filter loads with no phi predecessor from antic_in */
if (is_Load(n) && ! is_Phi(get_Load_mem(n)))
return 0;
+ if (is_Store(n) && ! is_Phi(get_Store_mem(n)))
+ return 0;
+#endif
+
+#if DIVMODS
+ if (is_Div(n)) {
+ ir_node *mem = get_Div_mem(n);
+
+ mem = skip_Pin(mem);
+
+ if (! is_Phi(mem) && ! is_NoMem(mem))
+ return 0;
+ }
+
+ if (is_Mod(n) && ! is_Phi(get_Mod_mem(n)))
+ return 0;
+#endif
arity = get_irn_arity(n);
for (i = 0; i < arity; ++i) {
if (is_Block(irn))
return;
+#if OPTIMIZE_NODES
+ environ->graph->value_table = environ->value_table;
+ identify_remember(irn);
+ environ->graph->value_table = environ->gvnpre_values;
+#endif
+
/* GVN step: remember the value. */
value = remember(irn);
- /* values that are not in antic_in also dont't need to be in any other set */
- if (! is_nice_value(irn))
- return;
-
if (is_irn_constlike(irn))
return;
block = get_nodes_block(irn);
info = get_block_info(block);
- ir_valueset_insert(info->avail_out, value, irn);
+ if (get_irn_mode(irn) != mode_X)
+ ir_valueset_insert(info->avail_out, value, irn);
+
+ /* values that are not in antic_in also dont't need to be in any other set */
+
+ if (! is_nice_value(irn))
+ return;
if (is_clean_in_block(irn, block, info->exp_gen)) {
DB((dbg, LEVEL_3, "%+F clean in block %+F\n", irn, block));
ir_nodehashmap_insert(map, node, trans);
}
-#if DIVMODS
-/* Helper function to compare the values of pred and avail_pred. */
-static unsigned match_pred(ir_node *pred, ir_node *avail_pred, ir_node *block, int pos)
-{
- ir_node *avail_value = identify(avail_pred);
- ir_node *pred_block = get_Block_cfgpred_block(block, pos);
- ir_node *trans_pred = get_translated(pred_block, pred);
- ir_node *value;
-
- if (trans_pred == NULL)
- trans_pred = pred;
- value = identify(trans_pred);
-
- DB((dbg, LEVEL_3, "manual compare %+F %+F\n", pred, avail_pred));
-
- return (value == avail_value);
-}
-
-/**
- * Does phi translation for redundant Div/Mod nodes only.
- * Returns NULL for non-redundant node, which needs to be phi translated.
- */
-static ir_node *phi_translate_divmod(ir_node *divmod, ir_node *block, int pos)
-{
- ir_node *mem = get_memop_mem(divmod);
- ir_node *trans = get_translated_pred(block, pos, mem);
-
- if (trans == NULL)
- trans = mem;
-
- /* no partial redundancy if this is a mode_M phi */
- if (is_Proj(trans)) {
- /* The last memory operation in predecessor block */
- ir_node *avail_op = get_Proj_pred(trans);
-
- if (get_irn_op(divmod) == get_irn_op(avail_op)) {
- unsigned left, right;
-
- if (is_Div(avail_op)) {
- if (get_Div_resmode(divmod) == get_Div_resmode(avail_op) &&
- get_Div_no_remainder(divmod) == get_Div_no_remainder(avail_op)) {
-
- left = match_pred(get_Div_left(divmod), get_Div_left(avail_op), block, pos);
- right = match_pred(get_Div_right(divmod), get_Div_right(avail_op), block, pos);
-
- if (left && right)
- return avail_op;
- }
- } else if (is_Mod(avail_op)) {
- if (get_Mod_resmode(divmod) == get_Mod_resmode(avail_op)) {
-
- left = match_pred(get_Mod_left(divmod), get_Mod_left(avail_op), block, pos);
- right = match_pred(get_Mod_right(divmod), get_Mod_right(avail_op), block, pos);
-
- if (left && right)
- return avail_op;
- }
- }
- }
- }
- return NULL;
-}
-#endif
-
/**
* Translates an expression above a Phi.
*
}
arity = get_irn_arity(node);
-#if DIVMODS
- if (is_Div(node) || is_Mod(node)) {
- ir_node *avail_op = phi_translate_divmod(node, block, pos);
- if (avail_op)
- return avail_op;
- }
-#endif
-
needed = 0;
in = ALLOCANZ(ir_node *, arity);
if (! leader)
leader = pred;
+
/* we cannot find this value in antic_in, because the value
has (possibly) changed! */
pred_trans = get_translated(pred_block, leader);
+
+#if DIVMODS
+ if (is_Div(node)) {
+ ir_node *mem = get_Div_mem(node);
+
+ mem = skip_Pin(mem);
+
+ if (! is_Phi(mem))
+ pred_trans = get_Div_mem(node);
+ }
+#endif
+
DB((dbg, LEVEL_3, "trans %+F of %+F is %+F\n", leader, pred_block, pred_trans));
if (pred_trans == NULL) {
new_pred = pred;
} else {
new_pred = pred_trans;
- /* loads: in case of memory projection and load,
- skip them and use the loads memory */
+ /* loads: Predecessor is a memory phi, which translated yields a proj or
+ another phi. In case of projection and a load predecessor,
+ skip them and use the loads memory. */
if (is_Proj(pred_trans) && get_irn_mode(pred_trans) == mode_M) {
- ir_node *load = get_Proj_pred(pred_trans);
- /* translated predecessor will be different */
+#if LOADS || DIVMODS
+ ir_node *loadstore = get_Proj_pred(pred_trans);
+ /* If we do not translate this node, we will get its value wrong. */
needed |= 1;
- if (is_Load(load)) {
- /* Put new load under the adjacent loads mem
+ if (is_Load(loadstore)) {
+ /* Put new load under the adjacent loads memory edge
such that GVN may compare them. */
- new_pred = get_Load_mem(load);
+ new_pred = get_Load_mem(loadstore);
+ } else if (is_Store(loadstore)) {
+ new_pred = get_Store_mem(loadstore);
}
+#endif
} else {
/* predecessor value changed, so translation is needed */
if (identify(new_pred) != identify(pred))
/* add exp_gen */
if (env->first_iter) {
-#if NO_INF_LOOPS
+#if IGNORE_INF_LOOPS
/* keep antic_in of infinite loops empty */
if (! is_in_infinite_loop(block)) {
foreach_valueset(info->exp_gen, value, expr, iter) {
if (trans == NULL)
trans = phi_translate(expr, succ, pos, get_block_info(succ)->antic_in);
+
/* create new value if necessary */
trans_value = identify_or_remember(trans);
DB((dbg, LEVEL_3, "Translate %+F %+F to %d = %+F (%+F)\n", expr, succ, pos, trans, trans_value));
- /* on value change (phi present) we need the translated node
+ /* On value change (phi present) we need the translated node
to represent the new value for possible further translation. */
if (value != trans_value)
represent = trans;
else
represent = expr;
- // test only 1 translation, because more failed, because of uncleanliness of load
- if (is_clean_in_block(expr, block, info->antic_in) && ! is_Load(expr)) {
-#if NO_INF_LOOPS2
- /* TODO to be determined why nearly every spec benchmark fails */
- if (! is_in_infinite_loop(succ) || ! is_backedge(succ, pos)) {
+ if (is_clean_in_block(expr, block, info->antic_in)) {
+#if NO_INF_LOOPS
+ /* Prevent information flow over the backedge of endless loops. */
+ if (env->iteration <= 2 || (is_backedge(succ, pos) && !is_in_infinite_loop(succ))) {
ir_valueset_replace(info->antic_in, trans_value, represent);
}
#else
trans_expr = get_translated(pred_block, expr);
trans_value = identify(trans_expr);
- avail_expr = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
+ if (is_Const(trans_expr))
+ avail_expr = trans_expr;
+ else
+ avail_expr = (ir_node*)ir_valueset_lookup(pred_info->avail_out, trans_value);
+
/* value might be available through a not yet existing constant */
if (avail_expr == NULL && is_Const(trans_expr)) {
/* limit range of new constants */
ir_tarval *lower = new_tarval_from_long(-127, cmode);
ir_tarval *c = get_Const_tarval(trans_expr);
- if (tarval_cmp(lower, c) != ir_relation_greater_equal &&
- tarval_cmp(c, upper) != ir_relation_greater_equal) {
- avail_expr = NULL;
- } else {
+ /* tarval within range? */
+ if (tarval_cmp(lower, c) == ir_relation_less_equal &&
+ tarval_cmp(c, upper) == ir_relation_less_equal) {
avail_expr = trans_expr;
+ } else {
+ avail_expr = NULL;
}
}
if (first_avail == NULL)
first_avail = avail_expr;
else if (first_avail != avail_expr)
- /* Multiple different expressions are available */
+ /* Multiple different expressions are available,
+ This is why we need no cut over avail_out sets. */
fully_redundant = 0;
DB((dbg, LEVEL_2, "Found %+F from block %+F as %+F in pred %+F\n", expr, block, avail_expr, pred_block));
}
}
-#if BETTER_GREED
- /* value is redundant from last iteration,
- but has not been removed from antic_in (is not optimized) */
- if (! environ->first_iter && is_redundant(block, expr))
- return mode;
-#endif
-
/* If it is not the same value already existing along every predecessor
and it is defined by some predecessor then it is partially redundant. */
if (! partially_redundant || fully_redundant)
#endif
} /* update_new_set */
-#if BETTER_GREED
-/*
- * Returns redundant flag of node irn in block block.
- */
-static unsigned is_redundant(ir_node *block, ir_node *irn)
-{
- (void) block;
- (void) irn;
-
- /* TODO Needs to use a flag, because antic_done should only be used
- if node is finally processed by insert_nodes. */
- return 0;
-}
-#endif
-
/**
* Checks if hoisting irn is greedy.
* Greedy hoisting means that there are non partially redundant nodes
ir_node *trans_val;
ir_node *avail;
- if (get_loop_depth(get_irn_loop(pred_block)) > get_loop_depth(get_irn_loop(get_nodes_block(irn))))
+#if MIN_CUT
+ /* Very conservative min cut. Phi might only have 1 user. */
+ if (is_Phi(pred) && get_irn_n_edges(pred) != 1)
return 1;
+#endif
if (is_Phi(pred) && get_nodes_block(pred) == block)
continue;
trans = pred;
DB((dbg, LEVEL_3, "trans %+F\n", trans));
- if (is_Load(trans))
- continue;
-
trans_val = identify(trans);
DB((dbg, LEVEL_3, "value %+F\n", trans_val));
+
+ if (is_Const(trans_val) || is_SymConst(trans_val)) {
+ /* existing constant */
+ if (get_irn_idx(trans_val) < environ->last_idx) {
+ continue;
+ } else {
+ /* limit range of new constants */
+ ir_mode *cmode = get_irn_mode(trans);
+ ir_tarval *upper = new_tarval_from_long(128, cmode);
+ ir_tarval *lower = new_tarval_from_long(-128, cmode);
+ ir_tarval *c = get_Const_tarval(trans);
+
+ /* tarval within range? */
+ if (tarval_cmp(lower, c) == ir_relation_less &&
+ tarval_cmp(c, upper) == ir_relation_less) {
+ continue;
+ } else {
+ return 1;
+ }
+ }
+ }
+
+ /* */
if (is_irn_constlike(trans_val))
continue;
+
avail = ir_valueset_lookup(pred_info->avail_out, trans_val);
+ DB((dbg, LEVEL_3, "avail %+F\n", avail));
if (! avail)
return 1;
-#if 0
+#if MIN_CUT
/* only optimize if predecessors have been optimized */
if (ir_valueset_lookup(info->antic_done, value) == NULL)
return 1;
#endif
-
-#if 0
- /* Try to reduce cut. Turned out to be not very effective. */
- /* TODO Use out edges. */
- if (get_irn_outs_computed(irn) && get_irn_n_outs(irn) > 1)
- return 1;
-#endif
}
}
return 0;
ir_node *idom;
int pos;
ir_valueset_iterator_t iter;
-#if BETTER_GREED
- plist_t *stack;
-#endif
/* only blocks */
if (! is_Block(block))
return;
}
-#if BETTER_GREED
- stack = plist_new();
- foreach_valueset(info->antic_in, value, expr, iter) {
- /* inverse topologic */
- plist_insert_front(stack, expr);
- }
-#endif
-
/* This is the main reason antic_in is preverred over antic_out;
we may iterate over every anticipated value first and not
over the predecessor blocks. */
DB((dbg, LEVEL_2, "Fully redundant expr %+F value %+F\n", expr, value));
DEBUG_ONLY(inc_stats(gvnpre_stats->fully);)
-#if ! PARTLY_ONLY
ir_valueset_insert(info->antic_done, value, expr);
-#endif
continue;
}
-#if !BETTER_GREED
if (is_hoisting_greedy(expr, block)) {
DB((dbg, LEVEL_2, "greedy\n"));
continue;
}
-#endif
mode = is_partially_redundant(block, expr, value);
if (mode == NULL)
continue;
-#if BETTER_GREED
- if (is_hoisting_greedy(expr, block)) {
- DB((dbg, LEVEL_2, "Better greed: greedy\n"));
- continue;
- }
-#endif
-
#if LOADS || DIVMODS
/* save old mode_M phis to remove keepalive edges later */
if (is_memop(expr)) {
inc_stats(gvnpre_stats->first_iter_found);
inc_stats(gvnpre_stats->partially);
}
- if (is_Load(expr))
+ if (is_Load(expr) || is_Store(expr))
inc_stats(gvnpre_stats->loads);
else if (is_Div(expr) || is_Mod(expr))
inc_stats(gvnpre_stats->divmods);
int node_arity = get_irn_arity(expr);
ir_node **in = XMALLOCNZ(ir_node *, node_arity);
ir_node *trans;
- ir_node *new_value;
+ ir_node *new_value, *new_value2;
ir_node *target_block = pred_block;
for (i = 0; i < node_arity; ++i) {
ir_node *trans_val;
ir_node *avail;
+ /* transform knowledge over the predecessor from
+ anti-leader world into leader world. */
+
DB((dbg, LEVEL_3, "pred %+F\n", pred));
value = identify(pred);
- /* transform knowledge over anti leader into knowledge over leader */
+
/* get leader for pred to lookup its translated value */
leader = ir_valueset_lookup(info->antic_in, value);
if (! leader)
continue;
}
- /* handle load predecessor */
- if (is_Load(trans)) {
- in[i] = trans;
- continue;
- }
-
trans_val = identify(trans);
DB((dbg, LEVEL_3, "value %+F\n", trans_val));
+
/* constants are always available but not in avail set */
if (is_irn_constlike(trans_val)) {
- in[i] = pred;
+ in[i] = trans;
continue;
}
if (is_Proj(expr))
target_block = get_nodes_block(in[0]);
- /* copy node to represent the new value.
- We do not translate nodes that do not need translation,
- so we use the newly created nodes as value representatives only.
- Their block is not important, because we create new ones during
- the insert node phase. */
+ /* Copy node to represent the new value.
+ We use translated nodes as value representatives only.
+ They have anti leaders as predecessors, not leaders!
+ So we have to create a new node using leaders. */
trans = new_ir_node(
get_irn_dbg_info(expr),
environ->graph,
/* We need the attribute copy here, because the Hash value of a
node might depend on it. */
copy_node_attr(environ->graph, expr, trans);
- new_value = identify_or_remember(trans);
-
- DB((dbg, LEVEL_3, "Use new %+F in %+F because expr %+F(%+F) not available\n", trans, pred_block, expr, value));
/* value is now available in target block through trans
insert (not replace) because it has not been available */
- ir_valueset_insert(pred_info->avail_out, value, trans);
+ new_value = identify_or_remember(trans);
ir_valueset_insert(pred_info->avail_out, new_value, trans);
+ DB((dbg, LEVEL_4, "avail%+F+= trans %+F(%+F)\n", pred_block, trans, new_value));
+
+ new_value2 = identify(get_translated(pred_block, expr));
+ ir_valueset_insert(pred_info->avail_out, new_value2, trans);
+ DB((dbg, LEVEL_4, "avail%+F+= trans %+F(%+F)\n", pred_block, trans, new_value2));
- DB((dbg, LEVEL_4, "SET AVAIL value %+F trans %+F in %+F\n", value, trans, pred_block));
- DB((dbg, LEVEL_4, "SET AVAIL newvalue %+F trans %+F in %+F\n", new_value, trans, pred_block));
+ DB((dbg, LEVEL_3, "Use new %+F in %+F because %+F(%+F) not available\n", trans, pred_block, expr, value));
phi_in[pos] = trans;
} else {
phi = new_r_Phi(block, arity, phi_in, mode);
DB((dbg, LEVEL_3, "New %+F for redundant %+F created\n", phi, expr));
- /* this value is now available through the new phi */
+ /* This value is now available through the new phi.
+ insert || replace in avail_out */
ir_valueset_replace(info->avail_out, value, phi);
ir_valueset_insert(info->new_set, value, phi);
}
ir_valueset_insert(info->antic_done, value, expr);
env->changes |= 1;
}
-
-#if BETTER_GREED
- /* TODO Unfinished
- Better greed first determines which values are redundant
- and decides then which to take.
- insert_nodes needs to be split up for that. The cycle could be
- for each block: flag redundant nodes,
- use heuristic to adjust these flags (also consider antic_done),
- do insert nodes.
- This way we could decide if we hoist a non redundant node,
- if all its successors are redundant.
- Or we might try to minimize the cut along hoisted nodes and their
- non redundant successors.
- */
- if (env->changes) {
- plist_element_t *it;
- /* iterate in inverse topological order */
- foreach_plist(stack, it) {
- ir_node *irn = (ir_node *)plist_element_get_value(it);
- ir_node *block = get_nodes_block(irn);
- int j;
- char redundant = 1;
-
- /* does irn only have redundant successors? */
-
- /* TODO we need to use edges */
- assert(get_irn_outs_computed(irn));
-
- for (j = get_irn_n_outs(irn) - 1; j >= 0; --j) {
- ir_node *succ = get_irn_out(irn, j);
-
- /* if succ and irn are in the same block */
- if (get_nodes_block(succ) == block && is_redundant(block, succ)) {
- continue;
- } else {
- redundant = 0;
- break;
- }
- }
-
- if (redundant)
- flag_redundant(irn, 1);
- }
- }
- plist_free(stack);
-#endif
-
}
#if HOIST_HIGH
}
/**
- * Domtree block walker to insert nodes into the highest
- * possible block. .
- *
+ * Domtree block walker to insert nodes with dying operands
+ * into the highest possible block whilst still being anticipated.
*/
static void hoist_high(ir_node *block, void *ctx)
{
foreach_valueset(curr_info->antic_done, value, expr, iter) {
int pos;
+ /* TODO currently we cannot handle load and their projections */
if (is_memop(expr) || is_Proj(expr))
continue;
/* anticipation border */
new_target = NULL;
nest_depth = get_loop_depth(get_irn_loop(target));
+
+ /* Either push the hoisted nodes up their path,
+ or try to put them directly into their common dominator. */
+#if COMMON_DOM
/* By using block (instead of target) as initial block,
we only allow hoisting into a common block of
both predecessor blocks. */
dom = block;
+#else
+ dom = target;
+#endif
- while (dom && dom != environ->start_block) {
+ while (dom && dom != get_Block_idom(block)) {
dom = get_Block_idom(dom);
dom_info = get_block_info(dom);
DB((dbg, LEVEL_4, "testing dom %+F\n", dom));
- /* TODO antic_in means hoistable above block,
- but we need 'hoistable into block'. */
+ /* TODO Being in antic_in means hoistable above block,
+ but we need 'hoistable into block'.
+ This could be achieved by a flag for each valueset pair,
+ being set during antic computation. */
/* check if available node ist still anticipated and clean */
if (! ir_valueset_lookup(dom_info->antic_in, value)) {
/* check if operands die */
- /* check for uses on current path*/
+ /* check for uses on current path */
for (i = 0; i < avail_arity; i++) {
ir_node *pred = get_irn_n(avail, i);
ir_node *pred_value = identify(pred);
/* Do we have another user than avail?
Then predecessor is not dead after removal of avail. */
if (succ_value != value) {
- DB((dbg, LEVEL_4, "fail: still used in %+F\n", succ));
+ DB((dbg, LEVEL_4, "still used in %+F\n", succ));
dom = NULL;
break;
}
if (dom)
new_target = dom;
- /* only try direct dominator */
+#if COMMON_DOM
+ /* only try common dominator */
break;
+#endif
}
+ /* put node into new target block */
if (new_target) {
block_info *target_info = get_block_info(new_target);
int nn_arity = get_irn_arity(avail);
free(in);
identify_or_remember(nn);
- /* NOTE: Nodes are inserted into a dominating block and should
+ /* TODO Nodes are inserted into a dominating block and should
be available from this point on. Currently we do not push
the availability information through during the walk. */
ir_valueset_insert(target_info->new_set, value, nn);
+ ir_valueset_insert(target_info->avail_out, value, nn);
}
}
}
if (value != NULL) {
ir_node *expr = (ir_node*)ir_valueset_lookup(info->avail_out, value);
+ DB((dbg, LEVEL_3, "Elim %+F(%+F) avail %+F\n", irn, value, expr));
-#if ! PARTLY_ONLY
- if (expr && get_irn_idx(expr) <= env->last_idx)
- return;
-#endif
if (expr != NULL && expr != irn) {
elim_pair *p = OALLOC(env->obst, elim_pair);
static void eliminate_nodes(elim_pair *pairs, ir_nodeset_t *keeps)
{
elim_pair *p;
- ir_nodeset_iterator_t iter;
- ir_node *m_phi;
ir_node *end = environ->end_node;
for (p = pairs; p != NULL; p = p->next) {
/* might be already changed */
p->new_node = skip_Id(p->new_node);
- // TEST
- /* happens with load nodes */
- if (p->new_node == p->old_node)
- continue;
-
DB((dbg, LEVEL_2, "Replacing %+F by %+F\n", p->old_node, p->new_node));
/* PRE tends to create Phi(self, self, ... , x, self, self, ...)
}
} /* eliminate_nodes */
+
/* --------------------------------------------------------
* GVN PRE pass
* --------------------------------------------------------
* Gvn_Pre algorithm.
*
* @param irg the graph
+ * @param env the environment
*/
static void gvn_pre(ir_graph *irg, pre_env *env)
{
antic_iter = 0;
env->first_iter = 1;
+ env->iteration = 1;
/* antic_in passes */
do {
++antic_iter;
irg_walk_blkwise_graph(irg, compute_antic, NULL, env);
env->first_iter = 0;
DB((dbg, LEVEL_2, "----------------------------------------------\n"));
+ env->iteration ++;
} while (env->changes != 0 && antic_iter < MAX_ANTIC_ITER);
DEBUG_ONLY(set_stats(gvnpre_stats->antic_iterations, antic_iter);)
DEBUG_ONLY(set_stats(gvnpre_stats->insert_iterations, insert_iter);)
#if HOIST_HIGH
- /* An attempt to reduce lifetime by hoisting already hoisted values
- even higher, if their operands die. */
+ /* An attempt to reduce lifetimes by hoisting already hoisted values
+ even higher if their operands die. */
dom_tree_walk_irg(irg, hoist_high, NULL, env);
/* update avail_out for elimination */
dom_tree_walk_irg(irg, update_new_set_walker, NULL, env);
#endif
- /* TODO deactivate edges to prevent intelligent removal of nodes */
+ /* Deactivate edges to prevent intelligent removal of nodes,
+ or else we will get deleted nodes which we try to exchange. */
edges_deactivate(environ->graph);
- /* last step: eliminate nodes */
+ /* eliminate nodes */
irg_walk_graph(irg, NULL, eliminate, env);
eliminate_nodes(env->pairs, env->keeps);
optimization_state_t state;
block_info *block_info;
- /* bads and unreachables cause too much trouble with dominance
- loop info for endless loop detection
- no critical edges is GVN-PRE precondition
+ /* bads and unreachables cause too much trouble with dominance,
+ loop info for endless loop detection,
+ no critical edges is PRE precondition
*/
assure_irg_properties(irg,
IR_GRAPH_PROPERTY_NO_BADS
env.end_node = get_irg_end(irg);
env.pairs = NULL;
env.keeps = &keeps;
- /* last index is no node */
- env.last_idx = get_irg_last_idx(irg) - 1;
+ env.last_idx = get_irg_last_idx(irg);
/* Detect and set links of infinite loops to non-zero. */
analyse_loops(irg);
+#if OPTIMIZE_NODES
+ new_identities(irg);
+ env.value_table = irg->value_table;
+ irg->value_table = NULL;
+#endif
+
/* Switch on GCSE. We need it to correctly compute
the value of a node, which is independent from
its block. */
set_opt_global_cse(1);
- /* new_identities */
+ /* new_identities() */
if (irg->value_table != NULL)
del_pset(irg->value_table);
/* initially assumed nodes in pset are 512 */
irg->value_table = new_pset(compare_gvn_identities, 512);
+#if OPTIMIZE_NODES
+ env.gvnpre_values = irg->value_table;
+#endif
/* do GVN-PRE pass */
gvn_pre(irg, &env);
restore_optimization_state(&state);
confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_NONE);
- /* TODO there seem to be optimizations that try to use the existing
+#if OPTIMIZE_NODES
+ irg->value_table = env.value_table;
+ del_pset(irg->value_table);
+ irg->value_table = env.gvnpre_values;
+#endif
+
+ /* TODO There seem to be optimizations that try to use the existing
value_table. */
new_identities(irg);
+
+ /* TODO assure nothing else breaks. */
+ set_opt_global_cse(0);
+ edges_activate(irg);
}
/* Creates an ir_graph pass for do_gvn_pre. */