X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=ir%2Fopt%2Fcfopt.c;h=0f0fd84292359b156b94678c1cd3d583529c04b4;hb=8eb0a1256d88d68e762c1f087bffadca68dc5c12;hp=fb7480382cc0411586b366e023390275b9124be5;hpb=537ab2daad45aea2daa8741b64dacccadb1445d5;p=libfirm diff --git a/ir/opt/cfopt.c b/ir/opt/cfopt.c index fb7480382..0f0fd8429 100644 --- a/ir/opt/cfopt.c +++ b/ir/opt/cfopt.c @@ -10,11 +10,12 @@ */ #ifdef HAVE_CONFIG_H -# include +# include "config.h" #endif #include +#include "xmalloc.h" #include "irnode_t.h" #include "irgraph_t.h" #include "irprog_t.h" @@ -22,6 +23,8 @@ #include "ircons.h" #include "iropt_t.h" #include "irgwalk.h" +#include "irgmod.h" +#include "irdump.h" #include "irvrfy.h" #include "array.h" @@ -36,12 +39,44 @@ /*------------------------------------------------------------------*/ /* Control flow optimization. */ +/* */ /* Removes Bad control flow predecessors and empty blocks. A block */ /* is empty if it contains only a Jmp node. */ /* Blocks can only be removed if they are not needed for the */ /* semantics of Phi nodes. */ /*------------------------------------------------------------------*/ + +static void remove_senseless_conds(ir_node *bl, void *data) +{ + int i, j; + int n = get_irn_arity(bl); + + assert(is_Block(bl)); + + for(i = 0; i < n; ++i) { + ir_node *pred_i = get_irn_n(bl, i); + ir_node *cond_i = skip_Proj(pred_i); + + for(j = i + 1; j < n; ++j) { + ir_node *pred_j = get_irn_n(bl, j); + ir_node *cond_j = skip_Proj(pred_j); + + if(cond_j == cond_i + && get_irn_opcode(cond_i) == iro_Cond + && get_irn_mode(get_Cond_selector(cond_i)) == mode_b) { + + ir_node *jmp = new_r_Jmp(current_ir_graph, get_nodes_block(cond_i)); + set_irn_n(bl, i, jmp); + set_irn_n(bl, j, new_Bad()); + + break; + } + } + } +} + + /** * Removes Tuples from Block control flow predecessors. * Optimizes blocks with equivalent_node(). This is tricky, @@ -53,6 +88,7 @@ static void merge_blocks(ir_node *n, void *env) { int i; ir_node *new_block; + /* clear the link field for ALL nodes first */ set_irn_link(n, NULL); if (get_irn_op(n) == op_Block) { @@ -63,40 +99,63 @@ static void merge_blocks(ir_node *n, void *env) { if (get_opt_normalize()) set_Block_cfgpred(n, i, skip_Tuple(get_Block_cfgpred(n, i))); } + + /* see below */ new_block = equivalent_node(n); - if (new_block != n) + if (new_block != n && ! is_Bad(new_block)) exchange (n, new_block); } else if (get_opt_optimize() && (get_irn_mode(n) == mode_X)) { /* We will soon visit a block. Optimize it before visiting! */ - ir_node *b = get_nodes_block(n); + ir_node *b = get_nodes_block(skip_Proj(n)); if (!is_Bad(b)) { new_block = equivalent_node(b); while (irn_not_visited(b) && (!is_Bad(new_block)) && (new_block != b)) { /* We would have to run gigo if new is bad, so we - promote it directly below. Nevertheless, we somtimes reach a block + promote it directly below. Nevertheless, we sometimes reach a block the first time through a dataflow node. In this case we optimized the block as such and have to promote the Bad here. */ - assert(((b == new_block) || - get_opt_control_flow_straightening() || - get_opt_control_flow_weak_simplification()) && - ("strange flag setting")); + assert((get_opt_control_flow_straightening() || + get_opt_control_flow_weak_simplification()) && + ("strange flag setting")); exchange (b, new_block); b = new_block; new_block = equivalent_node(b); } - b = new_block; + + /* normally, we would create a Bad block here, but this must be + * prevented, so just set it's cf to Bad. + */ + if (is_Bad(new_block)) + exchange(n, new_Bad()); } + } +} + +/** + * Remove cf from dead block by inspecting dominance info + * Do not replace blocks by Bad. This optimization shall + * ensure, that all Bad cfg preds are removed, and no new + * other Bads are introduced. + * + * Must be run in the post walker. + */ +static void remove_dead_block_cf(ir_node *block, void *env) +{ + int i, n; + + /* check block predecessors and turn control flow into bad */ + for (i = 0, n = get_Block_n_cfgpreds(block); i < n; ++i) { + ir_node *pred_X = get_Block_cfgpred(block, i); - /* - * BEWARE: do not kill floating notes here as they might be needed in - * valid blocks because of global CSE. - */ - if (is_Bad(b) && get_opt_normalize() && - get_op_pinned(get_irn_op(n)) == op_pin_state_pinned) - exchange(n, new_Bad()); + if (! is_Bad(pred_X)) { + ir_node *pred_bl = get_nodes_block(skip_Proj(pred_X)); + + if (is_Bad(pred_bl) || (get_Block_dom_depth(pred_bl) == -1)) + exchange (pred_X, new_Bad()); + } } } @@ -108,44 +167,25 @@ static void merge_blocks(ir_node *n, void *env) { * in the post walker, so we catch all blocks. */ static void collect_nodes(ir_node *n, void *env) { - irg_dom_state *dom_state = env; - if (is_no_Block(n)) { ir_node *b = get_nodes_block(n); - /* - * BEWARE: do not kill floating notes here as they might be needed in - * valid blocks because of global CSE. - */ - if (is_Bad(b) && - get_op_pinned(get_irn_op(n)) == op_pin_state_pinned) { - /* previous merge_blocks() may have killed dead blocks */ - exchange(n, new_Bad()); - } - else if ((get_irn_op(n) == op_Phi)) { + if ((get_irn_op(n) == op_Phi)) { /* Collect Phi nodes to compact ins along with block's ins. */ set_irn_link(n, get_irn_link(b)); set_irn_link(b, n); - } else if ((get_irn_op(n) != op_Jmp) && !is_Bad(b)) { /* Check for non empty block. */ - mark_Block_block_visited(b); } - } - else { - /* delete dead blocks: if we have dominator information, this can easily be detected - * BEWARE: don't kill the end block */ - if (*dom_state == dom_consistent && - n != get_irg_end_block(current_ir_graph) && - get_Block_dom_depth(n) == -1 && - get_opt_unreachable_code()) { - exchange (n, new_Bad()); + else if ((get_irn_op(n) != op_Jmp) && !is_Bad(b)) { /* Check for non empty block. */ + mark_Block_block_visited(b); } } } /** Returns true if pred is predecessor of block. */ static int is_pred_of(ir_node *pred, ir_node *b) { - int i; - for (i = 0; i < get_Block_n_cfgpreds(b); i++) { + int i, n; + + for (i = 0, n = get_Block_n_cfgpreds(b); i < n; ++i) { ir_node *b_pred = get_nodes_block(get_Block_cfgpred(b, i)); if (b_pred == pred) return 1; } @@ -153,22 +193,54 @@ static int is_pred_of(ir_node *pred, ir_node *b) { } +/** Test wether we can optimize away pred block pos of b. + * + * @param b A block node. + * @param pos The position of the predecessor block to judge about. + * + * @returns The number of predecessors + * + * The test is rather tricky. + * + * The situation is something like the following: + * + * if-block + * / \ + * then-b else-b + * \ / + * b + * + * b merges the control flow of an if-then-else. We may not remove + * the 'then' _and_ the 'else' block of an 'if' if there is a Phi + * node in b, even if both are empty. The destruction of this Phi + * requires that a copy is added before the merge. We have to + * keep one of the case blocks to place the copies in. + * + * To perform the test for pos, we must regard preds before pos + * as already removed. + **/ static int test_whether_dispensable(ir_node *b, int pos) { int i, j, n_preds = 1; int dispensable = 1; ir_node *cfop = get_Block_cfgpred(b, pos); ir_node *pred = get_nodes_block(cfop); + /* Bad blocks will be optimized away, so we don't need space for them */ + if (is_Bad(pred)) + return 0; + if (get_Block_block_visited(pred) + 1 < get_irg_block_visited(current_ir_graph)) { + if (!get_opt_optimize() || !get_opt_control_flow_strong_simplification()) { - /* Mark block so that is will not be removed. */ + /* Mark block so that is will not be removed: optimization is turned off. */ set_Block_block_visited(pred, get_irg_block_visited(current_ir_graph)-1); return 1; } - /* Seems to be empty. */ + + /* Seems to be empty. At least we detected this in collect_nodes. */ if (!get_irn_link(b)) { - /* There are no Phi nodes ==> dispensable. */ + /* There are no Phi nodes ==> all predecessors are dispensable. */ n_preds = get_Block_n_cfgpreds(pred); } else { /* b's pred blocks and pred's pred blocks must be pairwise disjunct. @@ -201,18 +273,64 @@ static int test_whether_dispensable(ir_node *b, int pos) { return n_preds; } + +/** + * This method removed Bad cf preds from Blocks and Phis, and removes + * empty blocks. A block is empty if it only contains Phi and Jmp nodes. + * + * We first adapt Phi nodes, then Block nodes, as we need the old ins + * of the Block to adapt the Phi nodes. We do this by computing new + * in arrays, and then replacing the old ones. So far we compute new in arrays + * for all nodes, not regarding whether there is a possibility for optimization. + * + * For each predecessor p of a Block b there are three cases: + * 1. The predecessor p is a Bad node: just skip it. The in array of b shrinks by one. + * 2. The predecessor p is empty. Remove p. All predecessors of p are now + * predecessors of b. + * 3. The predecessor p is a block containing useful code. Just keep p as is. + * + * For Phi nodes f we have to check the conditions at the Block of f. + * For cases 1 and 3 we proceed as for Blocks. For case 2 we can have two + * cases: + * 2a: The old precessor of the Phi f is a Phi pred_f IN THE BLOCK REMOVED. In this + * case we proceed as for blocks. We remove pred_f. All + * predecessors of pred_f now are predecessors of f. + * 2b: The old predecessor of f is NOT in the block removed. It might be a Phi, too. + * We have to replicate f for each predecessor of the removed block. Or, with + * other words, the removed predecessor block has exactly one predecessor. + * + * Further there is a special case for self referencing blocks: + * + * then_b else_b then_b else_b + * \ / \ / + * \ / | / + * pred_b | / + * | ____ | / + * | | | | | | | + * | | | === optimized to ===> \ | | | + * loop_b | loop_b | + * | | | | | | + * | |____| | |____| + * | | + * + * If there is a Phi in pred_b, but we remove pred_b, we have to generate a + * Phi in loop_b, that has the ins of the Phi in pred_b and a self referencing + * backedge. + * @@@ It is negotiable whether we should do this ... there might end up a copy + * from the Phi in the loop when removing the Phis. + */ static void optimize_blocks(ir_node *b, void *env) { - int i, j, k, max_preds, n_preds; + int i, j, k, n, max_preds, n_preds, p_preds; ir_node *pred, *phi; ir_node **in; /* Count the number of predecessor if this block is merged with pred blocks that are empty. */ max_preds = 0; - for (i = 0; i < get_Block_n_cfgpreds(b); i++) { + for (i = 0, k = get_Block_n_cfgpreds(b); i < k; ++i) { max_preds += test_whether_dispensable(b, i); } - in = (ir_node **) malloc(max_preds * sizeof(ir_node *)); + in = xmalloc(max_preds * sizeof(*in)); /*- printf(" working on "); DDMN(b); @@ -227,109 +345,143 @@ static void optimize_blocks(ir_node *b, void *env) { } * end Debug output -*/ - /*- Fix the Phi nodes -*/ - phi = get_irn_link(b); - while (phi) { + /*- Fix the Phi nodes of the current block -*/ + for (phi = get_irn_link(b); phi; ) { assert(get_irn_op(phi) == op_Phi); + /* Find the new predecessors for the Phi */ - n_preds = 0; - for (i = 0; i < get_Block_n_cfgpreds(b); i++) { + p_preds = 0; + for (i = 0, n = get_Block_n_cfgpreds(b); i < n; ++i) { pred = get_nodes_block(get_Block_cfgpred(b, i)); + if (is_Bad(get_Block_cfgpred(b, i))) { - /* Do nothing */ - } else if (get_Block_block_visited(pred) + 1 + /* case Phi 1: Do nothing */ + } + else if (get_Block_block_visited(pred) + 1 < get_irg_block_visited(current_ir_graph)) { - /* It's an empty block and not yet visited. */ + /* case Phi 2: It's an empty block and not yet visited. */ ir_node *phi_pred = get_Phi_pred(phi, i); - for (j = 0; j < get_Block_n_cfgpreds(pred); j++) { - if (get_nodes_block(phi_pred) == pred) { - assert(get_irn_op(phi_pred) == op_Phi); /* Block is empty!! */ - in[n_preds] = get_Phi_pred(phi_pred, j); - } else { - in[n_preds] = phi_pred; + for (j = 0, k = get_Block_n_cfgpreds(pred); j < k; j++) { + /* because of breaking loops, not all predecessors are Bad-clean, + * so we must check this here again */ + if (! is_Bad(get_Block_cfgpred(pred, j))) { + if (get_nodes_block(phi_pred) == pred) { + /* case Phi 2a: */ + assert(get_irn_op(phi_pred) == op_Phi); /* Block is empty!! */ + + in[p_preds++] = get_Phi_pred(phi_pred, j); + } else { + /* case Phi 2b: */ + in[p_preds++] = phi_pred; + } } - n_preds++; } + /* The Phi_pred node is replaced now if it is a Phi. - In Schleifen kann offenbar der entfernte Phi Knoten legal verwendet werden. - Daher muss der Phiknoten durch den neuen ersetzt werden. - Weiter muss der alte Phiknoten entfernt werden (durch ersetzen oder - durch einen Bad) damit er aus den keep_alive verschwinden kann. - Man sollte also, falls keine Schleife vorliegt, exchange mit new_Bad - aufrufen. */ + + Somehow the removed Phi node can be used legally in loops. + Therefore we replace the old phi by the new one. + + Further we have to remove the old Phi node by replacing it + by Bad. Else it will remain in the keepalive array of End + and cause illegal situations. So if there is no loop, we should + replace it by Bad. + */ if (get_nodes_block(phi_pred) == pred) { /* remove the Phi as it might be kept alive. Further there might be other users. */ exchange(phi_pred, phi); /* geht, ist aber doch semantisch falsch! Warum?? */ } } else { - in[n_preds] = get_Phi_pred(phi, i); - n_preds ++; + /* case Phi 3: */ + in[p_preds++] = get_Phi_pred(phi, i); } } + assert(p_preds <= max_preds); + /* Fix the node */ - if (n_preds == 1) + if (p_preds == 1) + /* By removal of Bad ins the Phi might be degenerated. */ exchange(phi, in[0]); else - set_irn_in(phi, n_preds, in); + set_irn_in(phi, p_preds, in); phi = get_irn_link(phi); } - /*- This happens only if merge between loop backedge and single loop entry. -*/ - for (k = 0; k < get_Block_n_cfgpreds(b); k++) { + /*- This happens only if merge between loop backedge and single loop entry. + See special case above. -*/ + for (k = 0, n = get_Block_n_cfgpreds(b); k < n; ++k) { pred = get_nodes_block(get_Block_cfgpred(b, k)); - if (get_Block_block_visited(pred)+1 < get_irg_block_visited(current_ir_graph)) { - phi = get_irn_link(pred); - while (phi) { + + if (get_Block_block_visited(pred) + 1 < get_irg_block_visited(current_ir_graph)) { + /* we found a predecessor block at position k that will be removed */ + for (phi = get_irn_link(pred); phi;) { + /* + * the previous phase may already changed the phi, and even + * removed it at all, so check here if this node is still a phi + */ if (get_irn_op(phi) == op_Phi) { + int q_preds = 0; + + /* move this phi from the predecessor into the block b */ set_nodes_block(phi, b); - n_preds = 0; + /* first, copy all 0..k-1 predecessors */ for (i = 0; i < k; i++) { pred = get_nodes_block(get_Block_cfgpred(b, i)); + if (is_Bad(get_Block_cfgpred(b, i))) { /* Do nothing */ - } else if (get_Block_block_visited(pred) +1 + } else if (get_Block_block_visited(pred) + 1 < get_irg_block_visited(current_ir_graph)) { /* It's an empty block and not yet visited. */ for (j = 0; j < get_Block_n_cfgpreds(pred); j++) { /* @@@ Hier brauche ich Schleifeninformation!!! Kontrollflusskante - muss Rueckwaertskante sein! (An allen vier in[n_preds] = phi + muss Rueckwaertskante sein! (An allen vier in[q_preds] = phi Anweisungen.) Trotzdem tuts bisher!! */ - in[n_preds] = phi; - n_preds++; + if (! is_Bad(get_Block_cfgpred(pred, j))) + in[q_preds++] = phi; } } else { - in[n_preds] = phi; - n_preds++; + in[q_preds++] = phi; } } + + /* now we are at k, copy the phi predecessors */ + pred = get_nodes_block(get_Block_cfgpred(b, k)); for (i = 0; i < get_Phi_n_preds(phi); i++) { - in[n_preds] = get_Phi_pred(phi, i); - n_preds++; + if (! is_Bad(get_Block_cfgpred(pred, i))) + in[q_preds++] = get_Phi_pred(phi, i); } + + /* and now all the rest */ for (i = k+1; i < get_Block_n_cfgpreds(b); i++) { pred = get_nodes_block(get_Block_cfgpred(b, i)); + if (is_Bad(get_Block_cfgpred(b, i))) { /* Do nothing */ } else if (get_Block_block_visited(pred) +1 < get_irg_block_visited(current_ir_graph)) { /* It's an empty block and not yet visited. */ for (j = 0; j < get_Block_n_cfgpreds(pred); j++) { - in[n_preds] = phi; - n_preds++; + if (! is_Bad(get_Block_cfgpred(pred, j))) + in[q_preds++] = phi; } } else { - in[n_preds] = phi; - n_preds++; + in[q_preds++] = phi; } } - if (n_preds == 1) + + /* Fix the node */ + if (q_preds == 1) exchange(phi, in[0]); else - set_irn_in(phi, n_preds, in); + set_irn_in(phi, q_preds, in); + + assert(q_preds <= max_preds); +// assert(p_preds == q_preds && "Wrong Phi Fix"); } phi = get_irn_link(phi); } @@ -340,30 +492,60 @@ static void optimize_blocks(ir_node *b, void *env) { n_preds = 0; for (i = 0; i < get_Block_n_cfgpreds(b); i++) { pred = get_nodes_block(get_Block_cfgpred(b, i)); + if (is_Bad(get_Block_cfgpred(b, i))) { - /* Do nothing */ + /* case 1: Do nothing */ } else if (get_Block_block_visited(pred) +1 < get_irg_block_visited(current_ir_graph)) { - /* It's an empty block and not yet visited. */ + /* case 2: It's an empty block and not yet visited. */ assert(get_Block_n_cfgpreds(b) > 1); /* Else it should be optimized by equivalent_node. */ for (j = 0; j < get_Block_n_cfgpreds(pred); j++) { - in[n_preds] = get_Block_cfgpred(pred, j); - n_preds++; + ir_node *pred_block = get_Block_cfgpred(pred, j); + + /* because of breaking loops, not all predecessors are Bad-clean, + * so we must check this here again */ + if (! is_Bad(pred_block)) + in[n_preds++] = pred_block; } /* Remove block as it might be kept alive. */ exchange(pred, b/*new_Bad()*/); } else { - in[n_preds] = get_Block_cfgpred(b, i); - n_preds ++; + /* case 3: */ + in[n_preds++] = get_Block_cfgpred(b, i); } } + assert(n_preds <= max_preds); + set_irn_in(b, n_preds, in); - free(in); + + assert(get_irn_link(b) == NULL || (n_preds == p_preds && "Wrong Phi Fix")); + + xfree(in); } + +/* Optimizations of the control flow that also require changes of Phi nodes. + * + * This optimization performs two passes over the graph. + * + * The first pass collects all Phi nodes in a link list in the block + * nodes. Further it performs simple control flow optimizations. + * Finally it marks all blocks that do not contain useful + * computations, i.e., these blocks might be removed. + * + * The second pass performs the optimizations intended by this algorithm. + * It walks only over block nodes and adapts these and the Phi nodes in these blocks, + * which it finds in a linked list computed by the first pass. + * + * We use the block_visited flag to mark empty blocks in the first + * phase. + * @@@ It would be better to add a struct in the link field + * that keeps the Phi list and the mark. Place it on an obstack, as + * we will lose blocks and thereby generate mem leaks. + */ void optimize_cf(ir_graph *irg) { - int i; + int i, n; ir_node **in; ir_node *end = get_irg_end(irg); ir_graph *rem = current_ir_graph; @@ -377,9 +559,27 @@ void optimize_cf(ir_graph *irg) { if (get_irg_dom_state(current_ir_graph) == dom_consistent) set_irg_dom_inconsistent(current_ir_graph); + if (dom_state == dom_consistent && get_opt_optimize() && get_opt_unreachable_code()) { + ir_node *end = get_irg_end(irg); + + /* we have dominace info, we can kill dead block */ + irg_block_walk_graph(irg, NULL, remove_dead_block_cf, NULL); + + /* fix the keep-alives */ + for (i = 0, n = get_End_n_keepalives(end); i < n; ++i) { + ir_node *ka = get_End_keepalive(end, i); + + if (is_Block(ka) && (get_Block_dom_depth(ka) == -1)) + set_End_keepalive(end, i, new_Bad()); + if (is_Phi(ka) && (get_Block_dom_depth(get_nodes_block(ka)) == -1)) + set_End_keepalive(end, i, new_Bad()); + } + } + + irg_block_walk_graph(current_ir_graph, NULL, remove_senseless_conds, NULL); /* Use block visited flag to mark non-empty blocks. */ inc_irg_block_visited(irg); - irg_walk(end, merge_blocks, collect_nodes, &dom_state); + irg_walk(end, merge_blocks, collect_nodes, NULL); /* Optimize the standard code. */ irg_block_walk(get_irg_end_block(irg), optimize_blocks, NULL, NULL); @@ -389,8 +589,10 @@ void optimize_cf(ir_graph *irg) { in = NEW_ARR_F (ir_node *, 1); in[0] = get_nodes_block(end); inc_irg_visited(current_ir_graph); - for(i = 0; i < get_End_n_keepalives(end); i++) { + + for (i = 0; i < get_End_n_keepalives(end); i++) { ir_node *ka = get_End_keepalive(end, i); + if (irn_not_visited(ka)) { if ((get_irn_op(ka) == op_Block) && Block_not_block_visited(ka)) { set_irg_block_visited(current_ir_graph, /* Don't walk all the way to Start. */ @@ -407,11 +609,15 @@ void optimize_cf(ir_graph *irg) { /* DEL_ARR_F(end->in); GL @@@ tut nicht ! */ end->in = in; - /* after optimize_cf(), only Bad data flow may remain. */ - if (irg_vrfy_bads(irg, BAD_DF | BAD_BLOCK | TUPLE)) { - dump_ir_block_graph(irg, "-vrfy-cf"); - dump_ir_graph(irg, "-vrfy-cf"); - fprintf(stderr, "VRFY_BAD in optimize_cf()\n"); + + /* the verifyer doesn't work yet with floating nodes */ + if (get_irg_pinned(irg) == op_pin_state_pinned) { + /* after optimize_cf(), only Bad data flow may remain. */ + if (irg_vrfy_bads(irg, BAD_DF | BAD_BLOCK | TUPLE)) { + dump_ir_block_graph(irg, "-vrfy-cf"); + dump_ir_graph(irg, "-vrfy-cf"); + fprintf(stderr, "VRFY_BAD in optimize_cf()\n"); + } } current_ir_graph = rem;