2 * Copyright (C) 1995-2007 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Control flow optimizations.
23 * @author Goetz Lindenmaier, Michael Beck, Sebastian Hack
35 #include "irgraph_t.h"
49 #include "irbackedge_t.h"
55 #include "iropt_dbg.h"
57 /*------------------------------------------------------------------*/
58 /* Control flow optimization. */
60 /* Removes Bad control flow predecessors and empty blocks. A block */
61 /* is empty if it contains only a Jmp node. */
62 /* Blocks can only be removed if they are not needed for the */
63 /* semantics of Phi nodes. */
64 /*------------------------------------------------------------------*/
67 * Block walker, replacing binary Conds that jumps twice into the same block
73 * ProjX True ProjX False ==> | /
78 * Such pattern are the result of if-conversion.
80 * Note that the simple case that Block has only these two
81 * predecessors are already handled in equivalent_node_Block().
83 static void remove_senseless_conds(ir_node *bl, void *env) {
85 int n = get_Block_n_cfgpreds(bl);
90 for (i = 0; i < n; ++i) {
91 ir_node *pred_i = get_Block_cfgpred(bl, i);
92 ir_node *cond_i = skip_Proj(pred_i);
95 if (is_Cond(cond_i) && get_irn_mode(get_Cond_selector(cond_i)) == mode_b) {
97 for (j = i + 1; j < n; ++j) {
98 ir_node *pred_j = get_Block_cfgpred(bl, j);
99 ir_node *cond_j = skip_Proj(pred_j);
101 if (cond_j == cond_i) {
102 ir_node *jmp = new_r_Jmp(current_ir_graph, get_nodes_block(cond_i));
103 set_irn_n(bl, i, jmp);
104 set_irn_n(bl, j, new_Bad());
106 DBG_OPT_IFSIM2(cond_i, jmp);
115 /** An environment for merge_blocks and collect nodes. */
116 typedef struct _merge_env {
122 * Removes Tuples from Block control flow predecessors.
123 * Optimizes blocks with equivalent_node(). This is tricky,
124 * as we want to avoid nodes that have as block predecessor Bads.
125 * Therefore we also optimize at control flow operations, depending
126 * how we first reach the Block.
128 static void merge_blocks(ir_node *node, void *ctx) {
131 merge_env *env = ctx;
133 /* clear the link field for ALL nodes first */
134 set_irn_link(node, NULL);
136 if (is_Block(node)) {
138 for (i = get_Block_n_cfgpreds(node) - 1; i >= 0; --i) {
139 ir_node *pred = get_Block_cfgpred(node, i);
140 ir_node *skipped = skip_Tuple(pred);
141 if (pred != skipped) {
142 set_Block_cfgpred(node, i, skipped);
148 new_block = equivalent_node(node);
149 if (new_block != node && ! is_Block_dead(new_block)) {
150 exchange(node, new_block);
154 } else if (get_opt_optimize() && (get_irn_mode(node) == mode_X)) {
155 /* We will soon visit a block. Optimize it before visiting! */
156 ir_node *b = get_nodes_block(skip_Proj(node));
158 if (!is_Block_dead(b)) {
159 new_block = equivalent_node(b);
161 while (irn_not_visited(b) && (!is_Block_dead(new_block)) && (new_block != b)) {
162 /* We would have to run gigo() if new is bad, so we
163 promote it directly below. Nevertheless, we sometimes reach a block
164 the first time through a dataflow node. In this case we optimized the
165 block as such and have to promote the Bad here. */
166 assert((get_opt_control_flow_straightening() ||
167 get_opt_control_flow_weak_simplification()) &&
168 ("strange flag setting"));
169 exchange(b, new_block);
172 new_block = equivalent_node(b);
175 /* normally, we would create a Bad block here, but this must be
176 * prevented, so just set it's cf to Bad.
178 if (is_Block_dead(new_block)) {
179 exchange(node, new_Bad());
188 * Block walker removing control flow from dead block by
189 * inspecting dominance info.
190 * Do not replace blocks by Bad. This optimization shall
191 * ensure, that all Bad control flow predecessors are
192 * removed, and no new other Bads are introduced.
194 * Must be run in the post walker.
196 static void remove_dead_block_cf(ir_node *block, void *env) {
200 /* check block predecessors and turn control flow into bad */
201 for (i = get_Block_n_cfgpreds(block) - 1; i >= 0; --i) {
202 ir_node *pred_X = get_Block_cfgpred(block, i);
204 if (! is_Bad(pred_X)) {
205 ir_node *pred_bl = get_nodes_block(skip_Proj(pred_X));
207 if (is_Block_dead(pred_bl) || (get_Block_dom_depth(pred_bl) < 0)) {
208 set_Block_dead(pred_bl);
209 exchange(pred_X, new_Bad());
217 * Collects all Phi nodes in link list of Block.
218 * Marks all blocks "block_visited" if they contain a node other
219 * than Jmp (and Proj).
220 * Links all Proj nodes to their predecessors.
221 * Collects all switch-Conds in a list.
223 static void collect_nodes(ir_node *n, void *ctx) {
224 ir_op *op = get_irn_op(n);
225 merge_env *env = ctx;
227 if (op != op_Block) {
228 ir_node *b = get_nodes_block(n);
231 /* Collect Phi nodes to compact ins along with block's ins. */
232 set_irn_link(n, get_irn_link(b));
234 } else if (op != op_Jmp && !is_Bad(b)) { /* Check for non empty block. */
235 mark_Block_block_visited(b);
237 if (op == op_Proj) { /* link Proj nodes */
238 ir_node *pred = get_Proj_pred(n);
240 set_irn_link(n, get_irn_link(pred));
241 set_irn_link(pred, n);
242 } else if (op == op_Cond) {
243 ir_node *sel = get_Cond_selector(n);
244 if (mode_is_int(get_irn_mode(sel))) {
245 /* found a switch-Cond, collect */
246 plist_insert_back(env->list, n);
253 /** Returns true if pred is predecessor of block. */
254 static int is_pred_of(ir_node *pred, ir_node *b) {
257 for (i = 0, n = get_Block_n_cfgpreds(b); i < n; ++i) {
258 ir_node *b_pred = get_Block_cfgpred_block(b, i);
259 if (b_pred == pred) return 1;
264 /** Test whether we can optimize away pred block pos of b.
266 * @param b A block node.
267 * @param pos The position of the predecessor block to judge about.
269 * @returns The number of predecessors
271 * The test is rather tricky.
273 * The situation is something like the following:
282 * b merges the control flow of an if-then-else. We may not remove
283 * the 'then' _and_ the 'else' block of an 'if' if there is a Phi
284 * node in b, even if both are empty. The destruction of this Phi
285 * requires that a copy is added before the merge. We have to
286 * keep one of the case blocks to place the copies in.
288 * To perform the test for pos, we must regard predecessors before pos
289 * as already removed.
291 static int test_whether_dispensable(ir_node *b, int pos) {
292 int i, j, n_preds = 1;
293 ir_node *pred = get_Block_cfgpred_block(b, pos);
295 /* Bad blocks will be optimized away, so we don't need space for them */
296 if (is_Block_dead(pred))
299 if (get_Block_block_visited(pred) + 1
300 < get_irg_block_visited(current_ir_graph)) {
302 if (!get_opt_optimize() || !get_opt_control_flow_strong_simplification()) {
303 /* Mark block so that is will not be removed: optimization is turned off. */
304 set_Block_block_visited(pred, get_irg_block_visited(current_ir_graph)-1);
308 /* Seems to be empty. At least we detected this in collect_nodes. */
309 if (!get_irn_link(b)) {
310 /* There are no Phi nodes ==> all predecessors are dispensable. */
311 n_preds = get_Block_n_cfgpreds(pred);
313 /* b's pred blocks and pred's pred blocks must be pairwise disjunct.
314 Handle all pred blocks with preds < pos as if they were already removed. */
315 for (i = 0; i < pos; i++) {
316 ir_node *b_pred = get_Block_cfgpred_block(b, i);
317 if (! is_Block_dead(b_pred) &&
318 get_Block_block_visited(b_pred) + 1
319 < get_irg_block_visited(current_ir_graph)) {
320 for (j = 0; j < get_Block_n_cfgpreds(b_pred); j++) {
321 ir_node *b_pred_pred = get_Block_cfgpred_block(b_pred, j);
322 if (is_pred_of(b_pred_pred, pred))
323 goto non_dispensable;
326 if (is_pred_of(b_pred, pred))
327 goto non_dispensable;
330 for (i = pos +1; i < get_Block_n_cfgpreds(b); i++) {
331 ir_node *b_pred = get_Block_cfgpred_block(b, i);
332 if (is_pred_of(b_pred, pred))
333 goto non_dispensable;
335 /* if we get here, the block is dispensable */
336 n_preds = get_Block_n_cfgpreds(pred);
343 set_Block_block_visited(pred, get_irg_block_visited(current_ir_graph)-1);
348 * This method removed Bad cf predecessors from Blocks and Phis, and removes
349 * empty blocks. A block is empty if it only contains Phi and Jmp nodes.
351 * We first adapt Phi nodes, then Block nodes, as we need the old ins
352 * of the Block to adapt the Phi nodes. We do this by computing new
353 * in arrays, and then replacing the old ones. So far we compute new in arrays
354 * for all nodes, not regarding whether there is a possibility for optimization.
356 * For each predecessor p of a Block b there are three cases:
357 * -1. The predecessor p is a Bad node: just skip it. The in array of b shrinks by one.
358 * -2. The predecessor p is empty. Remove p. All predecessors of p are now
360 * -3. The predecessor p is a block containing useful code. Just keep p as is.
362 * For Phi nodes f we have to check the conditions at the Block of f.
363 * For cases 1 and 3 we proceed as for Blocks. For case 2 we can have two
365 * -2a: The old predecessor of the Phi f is a Phi pred_f IN THE BLOCK REMOVED. In this
366 * case we proceed as for blocks. We remove pred_f. All
367 * predecessors of pred_f now are predecessors of f.
368 * -2b: The old predecessor of f is NOT in the block removed. It might be a Phi, too.
369 * We have to replicate f for each predecessor of the removed block. Or, with
370 * other words, the removed predecessor block has exactly one predecessor.
372 * Further there is a special case for self referencing blocks:
375 * then_b else_b then_b else_b
381 * | | | === optimized to ===> \ | | |
388 * If there is a Phi in pred_b, but we remove pred_b, we have to generate a
389 * Phi in loop_b, that has the ins of the Phi in pred_b and a self referencing
391 * @@@ It is negotiable whether we should do this ... there might end up a copy
392 * from the Phi in the loop when removing the Phis.
394 static void optimize_blocks(ir_node *b, void *env) {
395 int i, j, k, n, max_preds, n_preds, p_preds = -1;
400 /* Count the number of predecessor if this block is merged with pred blocks
403 for (i = 0, k = get_Block_n_cfgpreds(b); i < k; ++i) {
404 max_preds += test_whether_dispensable(b, i);
406 in = xmalloc(max_preds * sizeof(*in));
408 /*- Fix the Phi nodes of the current block -*/
409 for (phi = get_irn_link(b); phi; ) {
410 assert(get_irn_op(phi) == op_Phi);
412 /* Find the new predecessors for the Phi */
414 for (i = 0, n = get_Block_n_cfgpreds(b); i < n; ++i) {
415 pred = get_Block_cfgpred_block(b, i);
417 if (is_Bad(get_Block_cfgpred(b, i))) {
418 /* case Phi 1: Do nothing */
420 else if (get_Block_block_visited(pred) + 1
421 < get_irg_block_visited(current_ir_graph)) {
422 /* case Phi 2: It's an empty block and not yet visited. */
423 ir_node *phi_pred = get_Phi_pred(phi, i);
425 for (j = 0, k = get_Block_n_cfgpreds(pred); j < k; j++) {
426 /* because of breaking loops, not all predecessors are Bad-clean,
427 * so we must check this here again */
428 if (! is_Bad(get_Block_cfgpred(pred, j))) {
429 if (get_nodes_block(phi_pred) == pred) {
431 assert(get_irn_op(phi_pred) == op_Phi); /* Block is empty!! */
433 in[p_preds++] = get_Phi_pred(phi_pred, j);
436 in[p_preds++] = phi_pred;
442 in[p_preds++] = get_Phi_pred(phi, i);
445 assert(p_preds <= max_preds);
449 /* By removal of Bad ins the Phi might be degenerated. */
450 exchange(phi, in[0]);
452 set_irn_in(phi, p_preds, in);
455 phi = get_irn_link(phi);
458 /*- This happens only if merge between loop backedge and single loop entry.
459 Moreover, it is only needed if predb is the direct dominator of b, else there can be no uses
460 of the Phi's in predb ... -*/
461 for (k = 0, n = get_Block_n_cfgpreds(b); k < n; ++k) {
462 ir_node *predb = get_nodes_block(get_Block_cfgpred(b, k));
464 if (get_Block_block_visited(predb) + 1 < get_irg_block_visited(current_ir_graph)) {
467 /* we found a predecessor block at position k that will be removed */
468 for (phi = get_irn_link(predb); phi; phi = next_phi) {
470 next_phi = get_irn_link(phi);
474 if (get_Block_idom(b) != predb) {
475 /* predb is not the dominator. There can't be uses of pred's Phi nodes, kill them .*/
476 exchange(phi, new_Bad());
478 /* predb is the direct dominator of b. There might be uses of the Phi nodes from
479 predb in further block, so move this phi from the predecessor into the block b */
480 set_nodes_block(phi, b);
481 set_irn_link(phi, get_irn_link(b));
482 set_irn_link(b, phi);
484 /* first, copy all 0..k-1 predecessors */
485 for (i = 0; i < k; i++) {
486 pred = get_Block_cfgpred_block(b, i);
490 } else if (get_Block_block_visited(pred) + 1
491 < get_irg_block_visited(current_ir_graph)) {
492 /* It's an empty block and not yet visited. */
493 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
494 if (! is_Bad(get_Block_cfgpred(pred, j)))
502 /* now we are at k, copy the phi predecessors */
503 pred = get_nodes_block(get_Block_cfgpred(b, k));
504 for (i = 0; i < get_Phi_n_preds(phi); i++) {
505 if (! is_Bad(get_Block_cfgpred(pred, i)))
506 in[q_preds++] = get_Phi_pred(phi, i);
509 /* and now all the rest */
510 for (i = k+1; i < get_Block_n_cfgpreds(b); i++) {
511 pred = get_nodes_block(get_Block_cfgpred(b, i));
513 if (is_Bad(get_Block_cfgpred(b, i))) {
515 } else if (get_Block_block_visited(pred) +1
516 < get_irg_block_visited(current_ir_graph)) {
517 /* It's an empty block and not yet visited. */
518 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
519 if (! is_Bad(get_Block_cfgpred(pred, j)))
529 exchange(phi, in[0]);
531 set_irn_in(phi, q_preds, in);
534 assert(q_preds <= max_preds);
535 // assert(p_preds == q_preds && "Wrong Phi Fix");
541 /*- Fix the block -*/
543 for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
544 pred = get_Block_cfgpred_block(b, i);
547 /* case 1: Do nothing */
548 } else if (get_Block_block_visited(pred) +1
549 < get_irg_block_visited(current_ir_graph)) {
550 /* case 2: It's an empty block and not yet visited. */
551 assert(get_Block_n_cfgpreds(b) > 1);
552 /* Else it should be optimized by equivalent_node. */
553 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
554 ir_node *pred_block = get_Block_cfgpred(pred, j);
556 /* because of breaking loops, not all predecessors are Bad-clean,
557 * so we must check this here again */
558 if (! is_Bad(pred_block))
559 in[n_preds++] = pred_block;
561 /* Remove block as it might be kept alive. */
562 exchange(pred, b/*new_Bad()*/);
565 in[n_preds++] = get_Block_cfgpred(b, i);
568 assert(n_preds <= max_preds);
570 set_irn_in(b, n_preds, in);
573 assert(get_irn_link(b) == NULL || p_preds == -1 || (n_preds == p_preds && "Wrong Phi Fix"));
578 * Block walker: optimize all blocks using the default optimizations.
579 * This removes Blocks that with only a Jmp predecessor.
581 static void remove_simple_blocks(ir_node *block, void *env) {
582 ir_node *new_blk = equivalent_node(block);
585 if (new_blk != block) {
586 exchange(block, new_blk);
592 * Handle pre-optimized table switch Cond's.
593 * During iropt, all Projs from a switch-Cond are already removed except
594 * the defProj and maybe the taken one.
595 * The defProj cannot be removed WITHOUT looking backwards, so we do this here.
597 * @param cond the switch-Cond
599 * @return non-zero if a switch-Cond was optimized
601 * Expects all Proj's linked to the cond node
603 static int handle_switch_cond(ir_node *cond) {
604 ir_node *sel = get_Cond_selector(cond);
606 ir_node *proj1 = get_irn_link(cond);
607 ir_node *proj2 = get_irn_link(proj1);
610 blk = get_nodes_block(cond);
613 /* this Cond has only one Proj: must be the defProj */
614 assert(get_Cond_defaultProj(cond) == get_Proj_proj(proj1));
615 /* convert it into a Jmp */
616 jmp = new_r_Jmp(current_ir_graph, blk);
617 exchange(proj1, jmp);
619 } else if (get_irn_link(proj2) == NULL) {
620 /* We have two Proj's here. Check if the Cond has
621 a constant argument */
622 tarval *tv = value_of(sel);
624 if (tv != tarval_bad) {
625 /* we have a constant switch */
626 long num = get_tarval_long(tv);
627 long def_num = get_Cond_defaultProj(cond);
629 if (def_num == get_Proj_proj(proj1)) {
630 /* first one is the defProj */
631 if (num == get_Proj_proj(proj2)) {
632 jmp = new_r_Jmp(current_ir_graph, blk);
633 exchange(proj2, jmp);
634 exchange(proj1, new_Bad());
637 } else if (def_num == get_Proj_proj(proj2)) {
638 /* second one is the defProj */
639 if (num == get_Proj_proj(proj1)) {
640 jmp = new_r_Jmp(current_ir_graph, blk);
641 exchange(proj1, jmp);
642 exchange(proj2, new_Bad());
646 /* neither: strange, Cond was not optimized so far */
647 if (num == get_Proj_proj(proj1)) {
648 jmp = new_r_Jmp(current_ir_graph, blk);
649 exchange(proj1, jmp);
650 exchange(proj2, new_Bad());
652 } else if (num == get_Proj_proj(proj2)) {
653 jmp = new_r_Jmp(current_ir_graph, blk);
654 exchange(proj2, jmp);
655 exchange(proj1, new_Bad());
664 /* Optimizations of the control flow that also require changes of Phi nodes.
666 * This optimization performs two passes over the graph.
668 * The first pass collects all Phi nodes in a link list in the block
669 * nodes. Further it performs simple control flow optimizations.
670 * Finally it marks all blocks that do not contain useful
671 * computations, i.e., these blocks might be removed.
673 * The second pass performs the optimizations intended by this algorithm.
674 * It walks only over block nodes and adapts these and the Phi nodes in these blocks,
675 * which it finds in a linked list computed by the first pass.
677 * We use the block_visited flag to mark empty blocks in the first
679 * @@@ It would be better to add a struct in the link field
680 * that keeps the Phi list and the mark. Place it on an obstack, as
681 * we will lose blocks and thereby generate memory leaks.
683 void optimize_cf(ir_graph *irg) {
686 ir_node *cond, *end = get_irg_end(irg);
687 ir_graph *rem = current_ir_graph;
691 assert(get_irg_phase_state(irg) != phase_building);
693 /* if the graph is not pinned, we cannot determine empty blocks */
694 assert(get_irg_pinned(irg) != op_pin_state_floats &&
695 "Control flow optimization need a pinned graph");
697 current_ir_graph = irg;
699 /* FIXME: is this still needed? */
700 edges_deactivate(irg);
703 if (get_opt_optimize() && get_opt_unreachable_code()) {
706 /* kill dead blocks using dom info */
708 irg_block_walk_graph(irg, NULL, remove_dead_block_cf, &env.changed);
710 /* fix the keep-alives */
711 end = get_irg_end(irg);
712 for (i = 0, n = get_End_n_keepalives(end); i < n; ++i) {
713 ir_node *ka = get_End_keepalive(end, i);
716 /* do NOT keep dead blocks */
717 if (get_Block_dom_depth(ka) < 0) {
718 set_End_keepalive(end, i, new_Bad());
721 } else if (is_Block_dead(get_nodes_block(ka)) ||
722 get_Block_dom_depth(get_nodes_block(ka)) < 0) {
723 /* do NOT keep nodes in dead blocks */
724 set_End_keepalive(end, i, new_Bad());
729 irg_block_walk_graph(irg, NULL, remove_senseless_conds, &env.changed);
731 /* Use block visited flag to mark non-empty blocks. */
732 inc_irg_block_visited(irg);
733 set_using_block_visited(irg);
734 set_using_irn_link(irg);
736 env.list = plist_new();
737 irg_walk(end, merge_blocks, collect_nodes, &env);
739 clear_using_block_visited(irg);
740 clear_using_irn_link(irg);
742 /* handle all collected switch-Conds */
743 foreach_plist(env.list, el) {
744 cond = plist_element_get_value(el);
745 env.changed |= handle_switch_cond(cond);
747 plist_free(env.list);
750 /* Handle graph state if was changed. */
751 set_irg_outs_inconsistent(irg);
752 set_irg_doms_inconsistent(irg);
753 set_irg_extblk_inconsistent(irg);
754 set_irg_loopinfo_inconsistent(irg);
757 /* Optimize the standard code. */
760 irg_block_walk(get_irg_end_block(irg), optimize_blocks, remove_simple_blocks, &env.changed);
762 /* Walk all keep alives, optimize them if block, add to new in-array
763 for end if useful. */
764 n = get_End_n_keepalives(end);
766 NEW_ARR_A(ir_node *, in, n);
768 /* in rare cases a node may be kept alive more than once, use the visited flag to detect this */
769 inc_irg_visited(irg);
770 set_using_visited(irg);
772 /* fix the keep alive */
773 for (i = j = 0; i < n; i++) {
774 ir_node *ka = get_End_keepalive(end, i);
776 if (irn_not_visited(ka)) {
777 ir_op *op = get_irn_op(ka);
779 if ((op == op_Block) && Block_not_block_visited(ka)) {
780 /* irg_block_walk() will increase the block visited flag, but we must visit only
781 these blocks that are not visited yet, so decrease it first. */
782 set_irg_block_visited(irg, get_irg_block_visited(irg) - 1);
783 irg_block_walk(ka, optimize_blocks, remove_simple_blocks, &env.changed);
784 mark_irn_visited(ka);
786 } else if (op == op_Phi) {
787 mark_irn_visited(ka);
788 /* don't keep alive dead blocks */
789 if (! is_Block_dead(get_nodes_block(ka)))
791 } else if (is_op_keep(op)) {
792 mark_irn_visited(ka);
793 if (! is_Block_dead(get_nodes_block(ka)))
799 set_End_keepalives(end, j, in);
803 clear_using_visited(irg);
806 /* Handle graph state if was changed. */
807 set_irg_outs_inconsistent(irg);
808 set_irg_doms_inconsistent(irg);
809 set_irg_extblk_inconsistent(irg);
810 set_irg_loopinfo_inconsistent(irg);
813 /* the verifier doesn't work yet with floating nodes */
814 if (get_irg_pinned(irg) == op_pin_state_pinned) {
815 /* after optimize_cf(), only Bad data flow may remain. */
816 if (irg_vrfy_bads(irg, BAD_DF | BAD_BLOCK | TUPLE)) {
817 dump_ir_block_graph(irg, "-vrfy-cf");
818 dump_ir_graph(irg, "-vrfy-cf");
819 fprintf(stderr, "VRFY_BAD in optimize_cf()\n");
823 current_ir_graph = rem;