X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fopt%2Fcfopt.c;h=7164dfe0346ab747e563b93e3d9e008d4f5c4e73;hb=cc273381cbd00c4afddb1d1626b80d5cd3a636be;hp=03f3aabc1eb9ffea78498df15bda47a1a892e2a0;hpb=5d2f7c6eca2173a427ccf35a4b8ac793706de2e9;p=libfirm diff --git a/ir/opt/cfopt.c b/ir/opt/cfopt.c index 03f3aabc1..7164dfe03 100644 --- a/ir/opt/cfopt.c +++ b/ir/opt/cfopt.c @@ -36,6 +36,7 @@ #include "firmstat.h" #include "cfopt.h" +#include "iropt_dbg.h" /*------------------------------------------------------------------*/ /* Control flow optimization. */ @@ -46,6 +47,54 @@ /* semantics of Phi nodes. */ /*------------------------------------------------------------------*/ +/** + * Replace binary Conds that jumps twice into the same block + * by a simple Jmp. + * E.g. + * @verbatim + * Cond Jmp Bad + * / \ | / + * ProjX True ProjX False ==> | / + * \ / | / + * Block Block + * @endverbatim + * + * Such pattern are the result of if-conversion. + * + * Note that the simple case that Block has only these two + * predecessors are already handled in equivalent_node_Block(). + */ +static void remove_senseless_conds(ir_node *bl, void *data) +{ + int i, j; + int n = get_Block_n_cfgpreds(bl); + + assert(is_Block(bl)); + + for (i = 0; i < n; ++i) { + ir_node *pred_i = get_Block_cfgpred(bl, i); + ir_node *cond_i = skip_Proj(pred_i); + + for (j = i + 1; j < n; ++j) { + ir_node *pred_j = get_Block_cfgpred(bl, j); + ir_node *cond_j = skip_Proj(pred_j); + + if (cond_j == cond_i + && get_irn_op(cond_i) == op_Cond + && get_irn_mode(get_Cond_selector(cond_i)) == mode_b) { + + ir_node *jmp = new_r_Jmp(current_ir_graph, get_nodes_block(cond_i)); + set_irn_n(bl, i, jmp); + set_irn_n(bl, j, new_Bad()); + + DBG_OPT_IFSIM2(cond_i, jmp); + break; + } + } + } +} + + /** * Removes Tuples from Block control flow predecessors. * Optimizes blocks with equivalent_node(). This is tricky, @@ -71,18 +120,18 @@ static void merge_blocks(ir_node *n, void *env) { /* see below */ new_block = equivalent_node(n); - if (new_block != n && ! is_Bad(new_block)) + if (new_block != n && ! is_Block_dead(new_block)) exchange (n, new_block); } else if (get_opt_optimize() && (get_irn_mode(n) == mode_X)) { /* We will soon visit a block. Optimize it before visiting! */ ir_node *b = get_nodes_block(skip_Proj(n)); - if (!is_Bad(b)) { + if (!is_Block_dead(b)) { new_block = equivalent_node(b); - while (irn_not_visited(b) && (!is_Bad(new_block)) && (new_block != b)) { - /* We would have to run gigo if new is bad, so we + while (irn_not_visited(b) && (!is_Block_dead(new_block)) && (new_block != b)) { + /* We would have to run gigo() if new is bad, so we promote it directly below. Nevertheless, we sometimes reach a block the first time through a dataflow node. In this case we optimized the block as such and have to promote the Bad here. */ @@ -97,8 +146,8 @@ static void merge_blocks(ir_node *n, void *env) { /* normally, we would create a Bad block here, but this must be * prevented, so just set it's cf to Bad. */ - if (is_Bad(new_block)) - exchange(n, new_Bad()); + if (is_Block_dead(new_block)) + exchange(n, new_Bad()); } } } @@ -106,8 +155,8 @@ static void merge_blocks(ir_node *n, void *env) { /** * Remove cf from dead block by inspecting dominance info * Do not replace blocks by Bad. This optimization shall - * ensure, that all Bad cfg preds are removed, and no new - * other Bads are introduced. + * ensure, that all Bad control flow predecessors are + * removed, and no new other Bads are introduced. * * Must be run in the post walker. */ @@ -122,7 +171,7 @@ static void remove_dead_block_cf(ir_node *block, void *env) if (! is_Bad(pred_X)) { ir_node *pred_bl = get_nodes_block(skip_Proj(pred_X)); - if (is_Bad(pred_bl) || (get_Block_dom_depth(pred_bl) == -1)) + if (is_Block_dead(pred_bl) || (get_Block_dom_depth(pred_bl) < 0)) exchange (pred_X, new_Bad()); } } @@ -162,7 +211,7 @@ static int is_pred_of(ir_node *pred, ir_node *b) { } -/** Test wether we can optimize away pred block pos of b. +/** Test whether we can optimize away pred block pos of b. * * @param b A block node. * @param pos The position of the predecessor block to judge about. @@ -172,30 +221,29 @@ static int is_pred_of(ir_node *pred, ir_node *b) { * The test is rather tricky. * * The situation is something like the following: - * + * @verbatim * if-block * / \ * then-b else-b * \ / * b + * @endverbatim * - * b merges the control flow of an if-then-else. We may not remove - * the 'then' _and_ the 'else' block of an 'if' if there is a Phi - * node in b, even if both are empty. The destruction of this Phi - * requires that a copy is added before the merge. We have to - * keep one of the case blocks to place the copies in. + * b merges the control flow of an if-then-else. We may not remove + * the 'then' _and_ the 'else' block of an 'if' if there is a Phi + * node in b, even if both are empty. The destruction of this Phi + * requires that a copy is added before the merge. We have to + * keep one of the case blocks to place the copies in. * - * To perform the test for pos, we must regard preds before pos - * as already removed. + * To perform the test for pos, we must regard predecessors before pos + * as already removed. **/ static int test_whether_dispensable(ir_node *b, int pos) { int i, j, n_preds = 1; - int dispensable = 1; - ir_node *cfop = get_Block_cfgpred(b, pos); - ir_node *pred = get_nodes_block(cfop); + ir_node *pred = get_Block_cfgpred_block(b, pos); /* Bad blocks will be optimized away, so we don't need space for them */ - if (is_Bad(pred)) + if (is_Block_dead(pred)) return 0; if (get_Block_block_visited(pred) + 1 @@ -213,38 +261,49 @@ static int test_whether_dispensable(ir_node *b, int pos) { n_preds = get_Block_n_cfgpreds(pred); } else { /* b's pred blocks and pred's pred blocks must be pairwise disjunct. - Work preds < pos as if they were already removed. */ + Handle all pred blocks with preds < pos as if they were already removed. */ for (i = 0; i < pos; i++) { - ir_node *b_pred = get_nodes_block(get_Block_cfgpred(b, i)); - if (get_Block_block_visited(b_pred) + 1 + ir_node *b_pred = get_Block_cfgpred_block(b, i); + if (! is_Block_dead(b_pred) && + get_Block_block_visited(b_pred) + 1 < get_irg_block_visited(current_ir_graph)) { for (j = 0; j < get_Block_n_cfgpreds(b_pred); j++) { - ir_node *b_pred_pred = get_nodes_block(get_Block_cfgpred(b_pred, j)); - if (is_pred_of(b_pred_pred, pred)) dispensable = 0; + ir_node *b_pred_pred = get_Block_cfgpred_block(b_pred, j); + if (is_pred_of(b_pred_pred, pred)) + goto non_dispensable; } } else { - if (is_pred_of(b_pred, pred)) dispensable = 0; + if (is_pred_of(b_pred, pred)) + goto non_dispensable; } } for (i = pos +1; i < get_Block_n_cfgpreds(b); i++) { - ir_node *b_pred = get_nodes_block(get_Block_cfgpred(b, i)); - if (is_pred_of(b_pred, pred)) dispensable = 0; - } - if (!dispensable) { - set_Block_block_visited(pred, get_irg_block_visited(current_ir_graph)-1); - n_preds = 1; - } else { - n_preds = get_Block_n_cfgpreds(pred); + ir_node *b_pred = get_Block_cfgpred_block(b, i); + if (is_pred_of(b_pred, pred)) + goto non_dispensable; } + /* if we get here, the block is dispensable */ + n_preds = get_Block_n_cfgpreds(pred); } } return n_preds; + +non_dispensable: + set_Block_block_visited(pred, get_irg_block_visited(current_ir_graph)-1); + return 1; } +/** + * Store to defer the exchanged of Phi nodes. + */ +typedef struct _defer_ex_phi { + ir_node *phi_pred; /**< the previous Phi node that will be replaced */ + ir_node *phi; /**< the new Phi node that replaces phi_pred */ +} defer_ex_phi; /** - * This method removed Bad cf preds from Blocks and Phis, and removes + * This method removed Bad cf predecessors from Blocks and Phis, and removes * empty blocks. A block is empty if it only contains Phi and Jmp nodes. * * We first adapt Phi nodes, then Block nodes, as we need the old ins @@ -253,34 +312,36 @@ static int test_whether_dispensable(ir_node *b, int pos) { * for all nodes, not regarding whether there is a possibility for optimization. * * For each predecessor p of a Block b there are three cases: - * 1. The predecessor p is a Bad node: just skip it. The in array of b shrinks by one. - * 2. The predecessor p is empty. Remove p. All predecessors of p are now - * predecessors of b. - * 3. The predecessor p is a block containing useful code. Just keep p as is. + * -1. The predecessor p is a Bad node: just skip it. The in array of b shrinks by one. + * -2. The predecessor p is empty. Remove p. All predecessors of p are now + * predecessors of b. + * -3. The predecessor p is a block containing useful code. Just keep p as is. * * For Phi nodes f we have to check the conditions at the Block of f. * For cases 1 and 3 we proceed as for Blocks. For case 2 we can have two * cases: - * 2a: The old precessor of the Phi f is a Phi pred_f IN THE BLOCK REMOVED. In this + * -2a: The old predecessor of the Phi f is a Phi pred_f IN THE BLOCK REMOVED. In this * case we proceed as for blocks. We remove pred_f. All * predecessors of pred_f now are predecessors of f. - * 2b: The old predecessor of f is NOT in the block removed. It might be a Phi, too. + * -2b: The old predecessor of f is NOT in the block removed. It might be a Phi, too. * We have to replicate f for each predecessor of the removed block. Or, with * other words, the removed predecessor block has exactly one predecessor. * * Further there is a special case for self referencing blocks: + * @verbatim * * then_b else_b then_b else_b * \ / \ / * \ / | / * pred_b | / - * | ____ | / + * | ____ | / ____ * | | | | | | | * | | | === optimized to ===> \ | | | * loop_b | loop_b | * | | | | | | * | |____| | |____| * | | + * @endverbatim * * If there is a Phi in pred_b, but we remove pred_b, we have to generate a * Phi in loop_b, that has the ins of the Phi in pred_b and a self referencing @@ -292,6 +353,7 @@ static void optimize_blocks(ir_node *b, void *env) { int i, j, k, n, max_preds, n_preds, p_preds; ir_node *pred, *phi; ir_node **in; + defer_ex_phi *defers; /* Count the number of predecessor if this block is merged with pred blocks that are empty. */ @@ -299,7 +361,9 @@ static void optimize_blocks(ir_node *b, void *env) { for (i = 0, k = get_Block_n_cfgpreds(b); i < k; ++i) { max_preds += test_whether_dispensable(b, i); } - in = (ir_node **) xmalloc(max_preds * sizeof(ir_node *)); + in = xmalloc(max_preds * sizeof(*in)); + + defers = NEW_ARR_F(defer_ex_phi, 0); /*- printf(" working on "); DDMN(b); @@ -321,7 +385,7 @@ static void optimize_blocks(ir_node *b, void *env) { /* Find the new predecessors for the Phi */ p_preds = 0; for (i = 0, n = get_Block_n_cfgpreds(b); i < n; ++i) { - pred = get_nodes_block(get_Block_cfgpred(b, i)); + pred = get_Block_cfgpred_block(b, i); if (is_Bad(get_Block_cfgpred(b, i))) { /* case Phi 1: Do nothing */ @@ -351,16 +415,32 @@ static void optimize_blocks(ir_node *b, void *env) { Somehow the removed Phi node can be used legally in loops. Therefore we replace the old phi by the new one. + This must be done _AFTER_ all Phis are optimized, or + it will fail if two Phis use the same pred_Phi. + + FIXME: Is the following true? We ALWAYS replace it by the new one. Further we have to remove the old Phi node by replacing it - by Bad. Else it will remain in the keepalive array of End + by Bad. Else it will remain in the keep alive array of End and cause illegal situations. So if there is no loop, we should replace it by Bad. */ if (get_nodes_block(phi_pred) == pred) { + int i; /* remove the Phi as it might be kept alive. Further there might be other users. */ - exchange(phi_pred, phi); /* geht, ist aber doch semantisch falsch! Warum?? */ + for (i = ARR_LEN(defers) - 1; i >= 0; --i) + if (defers[i].phi_pred == phi_pred) + break; + + if (i < 0) { + /* we have a new replacement */ + defer_ex_phi elem; + + elem.phi_pred = phi_pred; + elem.phi = phi; + ARR_APP1(defer_ex_phi, defers, elem); + } } } else { /* case Phi 3: */ @@ -379,6 +459,12 @@ static void optimize_blocks(ir_node *b, void *env) { phi = get_irn_link(phi); } + /* now, exchange all Phis */ + for (i = ARR_LEN(defers) - 1; i >= 0; --i) { + exchange(defers[i].phi_pred, defers[i].phi); + } + DEL_ARR_F(defers); + /*- This happens only if merge between loop backedge and single loop entry. See special case above. -*/ for (k = 0, n = get_Block_n_cfgpreds(b); k < n; ++k) { @@ -468,7 +554,7 @@ static void optimize_blocks(ir_node *b, void *env) { < get_irg_block_visited(current_ir_graph)) { /* case 2: It's an empty block and not yet visited. */ assert(get_Block_n_cfgpreds(b) > 1); - /* Else it should be optimized by equivalent_node. */ + /* Else it should be optimized by equivalent_node. */ for (j = 0; j < get_Block_n_cfgpreds(pred); j++) { ir_node *pred_block = get_Block_cfgpred(pred, j); @@ -511,7 +597,7 @@ static void optimize_blocks(ir_node *b, void *env) { * phase. * @@@ It would be better to add a struct in the link field * that keeps the Phi list and the mark. Place it on an obstack, as - * we will lose blocks and thereby generate mem leaks. + * we will lose blocks and thereby generate memory leaks. */ void optimize_cf(ir_graph *irg) { int i, n; @@ -521,6 +607,10 @@ void optimize_cf(ir_graph *irg) { irg_dom_state dom_state = get_irg_dom_state(current_ir_graph); current_ir_graph = irg; + /* if the graph is not pinned, we cannot determine empty blocks */ + assert(get_irg_pinned(irg) != op_pin_state_floats && + "Control flow optimization need a pinned graph"); + /* Handle graph state */ assert(get_irg_phase_state(irg) != phase_building); if (get_irg_outs_state(current_ir_graph) == outs_consistent) @@ -531,20 +621,24 @@ void optimize_cf(ir_graph *irg) { if (dom_state == dom_consistent && get_opt_optimize() && get_opt_unreachable_code()) { ir_node *end = get_irg_end(irg); - /* we have dominace info, we can kill dead block */ + /* we have dominance info, we can kill dead block */ irg_block_walk_graph(irg, NULL, remove_dead_block_cf, NULL); /* fix the keep-alives */ for (i = 0, n = get_End_n_keepalives(end); i < n; ++i) { ir_node *ka = get_End_keepalive(end, i); - if (is_Block(ka) && (get_Block_dom_depth(ka) == -1)) - set_End_keepalive(end, i, new_Bad()); - if (is_Phi(ka) && (get_Block_dom_depth(get_nodes_block(ka)) == -1)) - set_End_keepalive(end, i, new_Bad()); + if (is_Block(ka)) { + if (get_Block_dom_depth(ka) == -1) + set_End_keepalive(end, i, new_Bad()); + } + else if (is_Block_dead(get_nodes_block(ka)) || get_Block_dom_depth(get_nodes_block(ka)) == -1) + set_End_keepalive(end, i, new_Bad()); } } + irg_block_walk_graph(current_ir_graph, NULL, remove_senseless_conds, NULL); + /* Use block visited flag to mark non-empty blocks. */ inc_irg_block_visited(irg); irg_walk(end, merge_blocks, collect_nodes, NULL); @@ -571,13 +665,17 @@ void optimize_cf(ir_graph *irg) { } else if (get_irn_op(ka) == op_Phi) { mark_irn_visited(ka); ARR_APP1 (ir_node *, in, ka); + } else if (get_irn_op(ka) == op_IJmp) { + mark_irn_visited(ka); + ARR_APP1 (ir_node *, in, ka); } } } /* DEL_ARR_F(end->in); GL @@@ tut nicht ! */ end->in = in; - /* the verifyer doesn't work yet with floating nodes */ + + /* the verifier doesn't work yet with floating nodes */ if (get_irg_pinned(irg) == op_pin_state_pinned) { /* after optimize_cf(), only Bad data flow may remain. */ if (irg_vrfy_bads(irg, BAD_DF | BAD_BLOCK | TUPLE)) {