# include "config.h"
#endif
+#ifdef HAVE_MALLOC_H
+# include <malloc.h>
+#endif
+#ifdef HAVE_ALLOCA_H
+# include <alloca.h>
+#endif
+
#include <assert.h>
#include "xmalloc.h"
#include "firmstat.h"
#include "cfopt.h"
+#include "iropt_dbg.h"
/*------------------------------------------------------------------*/
/* Control flow optimization. */
/* semantics of Phi nodes. */
/*------------------------------------------------------------------*/
+/**
+ * Replace binary Conds that jumps twice into the same block
+ * by a simple Jmp.
+ * E.g.
+ * @verbatim
+ * Cond Jmp Bad
+ * / \ | /
+ * ProjX True ProjX False ==> | /
+ * \ / | /
+ * Block Block
+ * @endverbatim
+ *
+ * Such pattern are the result of if-conversion.
+ *
+ * Note that the simple case that Block has only these two
+ * predecessors are already handled in equivalent_node_Block().
+ */
+static void remove_senseless_conds(ir_node *bl, void *data)
+{
+ int i, j;
+ int n = get_Block_n_cfgpreds(bl);
+
+ assert(is_Block(bl));
+
+ for (i = 0; i < n; ++i) {
+ ir_node *pred_i = get_Block_cfgpred(bl, i);
+ ir_node *cond_i = skip_Proj(pred_i);
+
+ for (j = i + 1; j < n; ++j) {
+ ir_node *pred_j = get_Block_cfgpred(bl, j);
+ ir_node *cond_j = skip_Proj(pred_j);
+
+ if (cond_j == cond_i
+ && get_irn_op(cond_i) == op_Cond
+ && get_irn_mode(get_Cond_selector(cond_i)) == mode_b) {
+
+ ir_node *jmp = new_r_Jmp(current_ir_graph, get_nodes_block(cond_i));
+ set_irn_n(bl, i, jmp);
+ set_irn_n(bl, j, new_Bad());
+
+ DBG_OPT_IFSIM2(cond_i, jmp);
+ break;
+ }
+ }
+ }
+}
+
+
/**
* Removes Tuples from Block control flow predecessors.
* Optimizes blocks with equivalent_node(). This is tricky,
/* see below */
new_block = equivalent_node(n);
- if (new_block != n && ! is_Bad(new_block))
+ if (new_block != n && ! is_Block_dead(new_block))
exchange (n, new_block);
} else if (get_opt_optimize() && (get_irn_mode(n) == mode_X)) {
/* We will soon visit a block. Optimize it before visiting! */
ir_node *b = get_nodes_block(skip_Proj(n));
- if (!is_Bad(b)) {
+ if (!is_Block_dead(b)) {
new_block = equivalent_node(b);
- while (irn_not_visited(b) && (!is_Bad(new_block)) && (new_block != b)) {
- /* We would have to run gigo if new is bad, so we
+ while (irn_not_visited(b) && (!is_Block_dead(new_block)) && (new_block != b)) {
+ /* We would have to run gigo() if new is bad, so we
promote it directly below. Nevertheless, we sometimes reach a block
the first time through a dataflow node. In this case we optimized the
block as such and have to promote the Bad here. */
/* normally, we would create a Bad block here, but this must be
* prevented, so just set it's cf to Bad.
*/
- if (is_Bad(new_block))
- exchange(n, new_Bad());
+ if (is_Block_dead(new_block))
+ exchange(n, new_Bad());
}
}
}
/**
* Remove cf from dead block by inspecting dominance info
* Do not replace blocks by Bad. This optimization shall
- * ensure, that all Bad cfg preds are removed, and no new
- * other Bads are introduced.
+ * ensure, that all Bad control flow predecessors are
+ * removed, and no new other Bads are introduced.
*
* Must be run in the post walker.
*/
if (! is_Bad(pred_X)) {
ir_node *pred_bl = get_nodes_block(skip_Proj(pred_X));
- if (is_Bad(pred_bl) || (get_Block_dom_depth(pred_bl) == -1))
+ if (is_Block_dead(pred_bl) || (get_Block_dom_depth(pred_bl) < 0))
exchange (pred_X, new_Bad());
}
}
}
-/** Test wether we can optimize away pred block pos of b.
+/** Test whether we can optimize away pred block pos of b.
*
* @param b A block node.
* @param pos The position of the predecessor block to judge about.
* The test is rather tricky.
*
* The situation is something like the following:
- *
+ * @verbatim
* if-block
* / \
* then-b else-b
* \ /
* b
+ * @endverbatim
*
- * b merges the control flow of an if-then-else. We may not remove
- * the 'then' _and_ the 'else' block of an 'if' if there is a Phi
- * node in b, even if both are empty. The destruction of this Phi
- * requires that a copy is added before the merge. We have to
- * keep one of the case blocks to place the copies in.
+ * b merges the control flow of an if-then-else. We may not remove
+ * the 'then' _and_ the 'else' block of an 'if' if there is a Phi
+ * node in b, even if both are empty. The destruction of this Phi
+ * requires that a copy is added before the merge. We have to
+ * keep one of the case blocks to place the copies in.
*
- * To perform the test for pos, we must regard preds before pos
- * as already removed.
+ * To perform the test for pos, we must regard predecessors before pos
+ * as already removed.
**/
static int test_whether_dispensable(ir_node *b, int pos) {
int i, j, n_preds = 1;
- int dispensable = 1;
- ir_node *cfop = get_Block_cfgpred(b, pos);
- ir_node *pred = get_nodes_block(cfop);
+ ir_node *pred = get_Block_cfgpred_block(b, pos);
/* Bad blocks will be optimized away, so we don't need space for them */
- if (is_Bad(pred))
+ if (is_Block_dead(pred))
return 0;
if (get_Block_block_visited(pred) + 1
n_preds = get_Block_n_cfgpreds(pred);
} else {
/* b's pred blocks and pred's pred blocks must be pairwise disjunct.
- Work preds < pos as if they were already removed. */
+ Handle all pred blocks with preds < pos as if they were already removed. */
for (i = 0; i < pos; i++) {
- ir_node *b_pred = get_nodes_block(get_Block_cfgpred(b, i));
- if (get_Block_block_visited(b_pred) + 1
+ ir_node *b_pred = get_Block_cfgpred_block(b, i);
+ if (! is_Block_dead(b_pred) &&
+ get_Block_block_visited(b_pred) + 1
< get_irg_block_visited(current_ir_graph)) {
for (j = 0; j < get_Block_n_cfgpreds(b_pred); j++) {
- ir_node *b_pred_pred = get_nodes_block(get_Block_cfgpred(b_pred, j));
- if (is_pred_of(b_pred_pred, pred)) dispensable = 0;
+ ir_node *b_pred_pred = get_Block_cfgpred_block(b_pred, j);
+ if (is_pred_of(b_pred_pred, pred))
+ goto non_dispensable;
}
} else {
- if (is_pred_of(b_pred, pred)) dispensable = 0;
+ if (is_pred_of(b_pred, pred))
+ goto non_dispensable;
}
}
for (i = pos +1; i < get_Block_n_cfgpreds(b); i++) {
- ir_node *b_pred = get_nodes_block(get_Block_cfgpred(b, i));
- if (is_pred_of(b_pred, pred)) dispensable = 0;
- }
- if (!dispensable) {
- set_Block_block_visited(pred, get_irg_block_visited(current_ir_graph)-1);
- n_preds = 1;
- } else {
- n_preds = get_Block_n_cfgpreds(pred);
+ ir_node *b_pred = get_Block_cfgpred_block(b, i);
+ if (is_pred_of(b_pred, pred))
+ goto non_dispensable;
}
+ /* if we get here, the block is dispensable */
+ n_preds = get_Block_n_cfgpreds(pred);
}
}
return n_preds;
+
+non_dispensable:
+ set_Block_block_visited(pred, get_irg_block_visited(current_ir_graph)-1);
+ return 1;
}
+/**
+ * Store to defer the exchanged of Phi nodes.
+ */
+typedef struct _defer_ex_phi {
+ ir_node *phi_pred; /**< the previous Phi node that will be replaced */
+ ir_node *phi; /**< the new Phi node that replaces phi_pred */
+} defer_ex_phi;
/**
- * This method removed Bad cf preds from Blocks and Phis, and removes
+ * This method removed Bad cf predecessors from Blocks and Phis, and removes
* empty blocks. A block is empty if it only contains Phi and Jmp nodes.
*
* We first adapt Phi nodes, then Block nodes, as we need the old ins
* for all nodes, not regarding whether there is a possibility for optimization.
*
* For each predecessor p of a Block b there are three cases:
- * 1. The predecessor p is a Bad node: just skip it. The in array of b shrinks by one.
- * 2. The predecessor p is empty. Remove p. All predecessors of p are now
- * predecessors of b.
- * 3. The predecessor p is a block containing useful code. Just keep p as is.
+ * -1. The predecessor p is a Bad node: just skip it. The in array of b shrinks by one.
+ * -2. The predecessor p is empty. Remove p. All predecessors of p are now
+ * predecessors of b.
+ * -3. The predecessor p is a block containing useful code. Just keep p as is.
*
* For Phi nodes f we have to check the conditions at the Block of f.
* For cases 1 and 3 we proceed as for Blocks. For case 2 we can have two
* cases:
- * 2a: The old precessor of the Phi f is a Phi pred_f IN THE BLOCK REMOVED. In this
+ * -2a: The old predecessor of the Phi f is a Phi pred_f IN THE BLOCK REMOVED. In this
* case we proceed as for blocks. We remove pred_f. All
* predecessors of pred_f now are predecessors of f.
- * 2b: The old predecessor of f is NOT in the block removed. It might be a Phi, too.
+ * -2b: The old predecessor of f is NOT in the block removed. It might be a Phi, too.
* We have to replicate f for each predecessor of the removed block. Or, with
* other words, the removed predecessor block has exactly one predecessor.
*
* Further there is a special case for self referencing blocks:
+ * @verbatim
*
* then_b else_b then_b else_b
* \ / \ /
* \ / | /
* pred_b | /
- * | ____ | /
+ * | ____ | / ____
* | | | | | | |
* | | | === optimized to ===> \ | | |
* loop_b | loop_b |
* | | | | | |
* | |____| | |____|
* | |
+ * @endverbatim
*
* If there is a Phi in pred_b, but we remove pred_b, we have to generate a
* Phi in loop_b, that has the ins of the Phi in pred_b and a self referencing
int i, j, k, n, max_preds, n_preds, p_preds;
ir_node *pred, *phi;
ir_node **in;
+ defer_ex_phi *defers;
/* Count the number of predecessor if this block is merged with pred blocks
that are empty. */
}
in = xmalloc(max_preds * sizeof(*in));
+ defers = NEW_ARR_F(defer_ex_phi, 0);
+
/*-
printf(" working on "); DDMN(b);
for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
/* Find the new predecessors for the Phi */
p_preds = 0;
for (i = 0, n = get_Block_n_cfgpreds(b); i < n; ++i) {
- pred = get_nodes_block(get_Block_cfgpred(b, i));
+ pred = get_Block_cfgpred_block(b, i);
if (is_Bad(get_Block_cfgpred(b, i))) {
/* case Phi 1: Do nothing */
Somehow the removed Phi node can be used legally in loops.
Therefore we replace the old phi by the new one.
+ This must be done _AFTER_ all Phis are optimized, or
+ it will fail if two Phis use the same pred_Phi.
+
+ FIXME: Is the following true? We ALWAYS replace it by the new one.
Further we have to remove the old Phi node by replacing it
- by Bad. Else it will remain in the keepalive array of End
+ by Bad. Else it will remain in the keep alive array of End
and cause illegal situations. So if there is no loop, we should
replace it by Bad.
*/
if (get_nodes_block(phi_pred) == pred) {
+ int i;
/* remove the Phi as it might be kept alive. Further there
might be other users. */
- exchange(phi_pred, phi); /* geht, ist aber doch semantisch falsch! Warum?? */
+ for (i = ARR_LEN(defers) - 1; i >= 0; --i)
+ if (defers[i].phi_pred == phi_pred)
+ break;
+
+ if (i < 0) {
+ /* we have a new replacement */
+ defer_ex_phi elem;
+
+ elem.phi_pred = phi_pred;
+ elem.phi = phi;
+ ARR_APP1(defer_ex_phi, defers, elem);
+ }
}
} else {
/* case Phi 3: */
phi = get_irn_link(phi);
}
+ /* now, exchange all Phis */
+ for (i = ARR_LEN(defers) - 1; i >= 0; --i) {
+ exchange(defers[i].phi_pred, defers[i].phi);
+ }
+ DEL_ARR_F(defers);
+
/*- This happens only if merge between loop backedge and single loop entry.
See special case above. -*/
for (k = 0, n = get_Block_n_cfgpreds(b); k < n; ++k) {
< get_irg_block_visited(current_ir_graph)) {
/* case 2: It's an empty block and not yet visited. */
assert(get_Block_n_cfgpreds(b) > 1);
- /* Else it should be optimized by equivalent_node. */
+ /* Else it should be optimized by equivalent_node. */
for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
ir_node *pred_block = get_Block_cfgpred(pred, j);
* phase.
* @@@ It would be better to add a struct in the link field
* that keeps the Phi list and the mark. Place it on an obstack, as
- * we will lose blocks and thereby generate mem leaks.
+ * we will lose blocks and thereby generate memory leaks.
*/
void optimize_cf(ir_graph *irg) {
- int i, n;
+ int i, j, n;
ir_node **in;
ir_node *end = get_irg_end(irg);
ir_graph *rem = current_ir_graph;
irg_dom_state dom_state = get_irg_dom_state(current_ir_graph);
current_ir_graph = irg;
+ /* if the graph is not pinned, we cannot determine empty blocks */
+ assert(get_irg_pinned(irg) != op_pin_state_floats &&
+ "Control flow optimization need a pinned graph");
+
/* Handle graph state */
assert(get_irg_phase_state(irg) != phase_building);
- if (get_irg_outs_state(current_ir_graph) == outs_consistent)
- set_irg_outs_inconsistent(current_ir_graph);
- if (get_irg_dom_state(current_ir_graph) == dom_consistent)
- set_irg_dom_inconsistent(current_ir_graph);
+ set_irg_outs_inconsistent(current_ir_graph);
+ set_irg_extblk_inconsistent(current_ir_graph);
+ set_irg_loopinfo_inconsistent(current_ir_graph);
+ set_irg_doms_inconsistent(current_ir_graph);
if (dom_state == dom_consistent && get_opt_optimize() && get_opt_unreachable_code()) {
ir_node *end = get_irg_end(irg);
- /* we have dominace info, we can kill dead block */
+ /* we have dominance info, we can kill dead block */
irg_block_walk_graph(irg, NULL, remove_dead_block_cf, NULL);
/* fix the keep-alives */
for (i = 0, n = get_End_n_keepalives(end); i < n; ++i) {
ir_node *ka = get_End_keepalive(end, i);
- if (is_Block(ka) && (get_Block_dom_depth(ka) == -1))
- set_End_keepalive(end, i, new_Bad());
- if (is_Phi(ka) && (get_Block_dom_depth(get_nodes_block(ka)) == -1))
- set_End_keepalive(end, i, new_Bad());
+ if (is_Block(ka)) {
+ /* do NOT keep dead blocks */
+ if (get_Block_dom_depth(ka) == -1)
+ set_End_keepalive(end, i, new_Bad());
+ }
+ else if (is_Block_dead(get_nodes_block(ka)) ||
+ get_Block_dom_depth(get_nodes_block(ka)) == -1)
+ /* do NOT keep nodes in dead blocks */
+ set_End_keepalive(end, i, new_Bad());
}
}
+ irg_block_walk_graph(current_ir_graph, NULL, remove_senseless_conds, NULL);
/* Use block visited flag to mark non-empty blocks. */
inc_irg_block_visited(irg);
/* Walk all keep alives, optimize them if block, add to new in-array
for end if useful. */
- in = NEW_ARR_F (ir_node *, 1);
- in[0] = get_nodes_block(end);
+ n = get_End_n_keepalives(end);
+ if (n > 0)
+ NEW_ARR_A (ir_node *, in, n);
inc_irg_visited(current_ir_graph);
- for (i = 0; i < get_End_n_keepalives(end); i++) {
+ /* fix the keep alive */
+ for (i = j = 0; i < n; i++) {
ir_node *ka = get_End_keepalive(end, i);
if (irn_not_visited(ka)) {
- if ((get_irn_op(ka) == op_Block) && Block_not_block_visited(ka)) {
+ ir_op *op = get_irn_op(ka);
+
+ if ((op == op_Block) && Block_not_block_visited(ka)) {
set_irg_block_visited(current_ir_graph, /* Don't walk all the way to Start. */
get_irg_block_visited(current_ir_graph)-1);
irg_block_walk(ka, optimize_blocks, NULL, NULL);
mark_irn_visited(ka);
- ARR_APP1 (ir_node *, in, ka);
- } else if (get_irn_op(ka) == op_Phi) {
+ in[j++] = ka;
+ } else if (op == op_Phi) {
+ mark_irn_visited(ka);
+ if (! is_Block_dead(get_nodes_block(ka)))
+ in[j++] = ka;
+ } else if (is_op_keep(op)) {
mark_irn_visited(ka);
- ARR_APP1 (ir_node *, in, ka);
+ if (! is_Block_dead(get_nodes_block(ka)))
+ in[j++] = ka;
}
}
}
- /* DEL_ARR_F(end->in); GL @@@ tut nicht ! */
- end->in = in;
+ if (j != n)
+ set_End_keepalives(end, j, in);
- /* the verifyer doesn't work yet with floating nodes */
+ /* the verifier doesn't work yet with floating nodes */
if (get_irg_pinned(irg) == op_pin_state_pinned) {
/* after optimize_cf(), only Bad data flow may remain. */
if (irg_vrfy_bads(irg, BAD_DF | BAD_BLOCK | TUPLE)) {