2 * Copyright (C) 1995-2007 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Control flow optimizations.
23 * @author Goetz Lindenmaier, Michael Beck, Sebastian Hack
35 #include "irgraph_t.h"
49 #include "irbackedge_t.h"
55 #include "iropt_dbg.h"
57 /*------------------------------------------------------------------*/
58 /* Control flow optimization. */
60 /* Removes Bad control flow predecessors and empty blocks. A block */
61 /* is empty if it contains only a Jmp node. */
62 /* Blocks can only be removed if they are not needed for the */
63 /* semantics of Phi nodes. */
64 /*------------------------------------------------------------------*/
67 * Block walker, replacing binary Conds that jumps twice into the same block
73 * ProjX True ProjX False ==> | /
78 * Such pattern are the result of if-conversion.
80 * Note that the simple case that Block has only these two
81 * predecessors are already handled in equivalent_node_Block().
83 static void remove_senseless_conds(ir_node *bl, void *env) {
85 int n = get_Block_n_cfgpreds(bl);
90 for (i = 0; i < n; ++i) {
91 ir_node *pred_i = get_Block_cfgpred(bl, i);
92 ir_node *cond_i = skip_Proj(pred_i);
95 if (is_Cond(cond_i) && get_irn_mode(get_Cond_selector(cond_i)) == mode_b) {
97 for (j = i + 1; j < n; ++j) {
98 ir_node *pred_j = get_Block_cfgpred(bl, j);
99 ir_node *cond_j = skip_Proj(pred_j);
101 if (cond_j == cond_i) {
102 ir_node *jmp = new_r_Jmp(current_ir_graph, get_nodes_block(cond_i));
103 set_irn_n(bl, i, jmp);
104 set_irn_n(bl, j, new_Bad());
106 DBG_OPT_IFSIM2(cond_i, jmp);
115 /** An environment for merge_blocks and collect nodes. */
116 typedef struct _merge_env {
122 * Removes Tuples from Block control flow predecessors.
123 * Optimizes blocks with equivalent_node(). This is tricky,
124 * as we want to avoid nodes that have as block predecessor Bads.
125 * Therefore we also optimize at control flow operations, depending
126 * how we first reach the Block.
128 static void merge_blocks(ir_node *node, void *ctx) {
131 merge_env *env = ctx;
133 /* clear the link field for ALL nodes first */
134 set_irn_link(node, NULL);
136 if (is_Block(node)) {
138 for (i = get_Block_n_cfgpreds(node) - 1; i >= 0; --i) {
139 ir_node *pred = get_Block_cfgpred(node, i);
140 ir_node *skipped = skip_Tuple(pred);
141 if (pred != skipped) {
142 set_Block_cfgpred(node, i, skipped);
148 new_block = equivalent_node(node);
149 if (new_block != node && ! is_Block_dead(new_block)) {
150 exchange(node, new_block);
154 } else if (get_opt_optimize() && (get_irn_mode(node) == mode_X)) {
155 /* We will soon visit a block. Optimize it before visiting! */
156 ir_node *b = get_nodes_block(skip_Proj(node));
158 if (!is_Block_dead(b)) {
159 new_block = equivalent_node(b);
161 while (irn_not_visited(b) && (!is_Block_dead(new_block)) && (new_block != b)) {
162 /* We would have to run gigo() if new is bad, so we
163 promote it directly below. Nevertheless, we sometimes reach a block
164 the first time through a dataflow node. In this case we optimized the
165 block as such and have to promote the Bad here. */
166 assert((get_opt_control_flow_straightening() ||
167 get_opt_control_flow_weak_simplification()) &&
168 ("strange flag setting"));
169 exchange(b, new_block);
172 new_block = equivalent_node(b);
175 /* normally, we would create a Bad block here, but this must be
176 * prevented, so just set it's cf to Bad.
178 if (is_Block_dead(new_block)) {
179 exchange(node, new_Bad());
188 * Block walker removing control flow from dead block by
189 * inspecting dominance info.
190 * Do not replace blocks by Bad. This optimization shall
191 * ensure, that all Bad control flow predecessors are
192 * removed, and no new other Bads are introduced.
194 * Must be run in the post walker.
196 static void remove_dead_block_cf(ir_node *block, void *env) {
200 /* check block predecessors and turn control flow into bad */
201 for (i = get_Block_n_cfgpreds(block) - 1; i >= 0; --i) {
202 ir_node *pred_X = get_Block_cfgpred(block, i);
204 if (! is_Bad(pred_X)) {
205 ir_node *pred_bl = get_nodes_block(skip_Proj(pred_X));
207 if (is_Block_dead(pred_bl) || (get_Block_dom_depth(pred_bl) < 0)) {
208 set_Block_dead(pred_bl);
209 exchange(pred_X, new_Bad());
217 * Collects all Phi nodes in link list of Block.
218 * Marks all blocks "block_visited" if they contain a node other
219 * than Jmp (and Proj).
220 * Links all Proj nodes to their predecessors.
221 * Collects all switch-Conds in a list.
223 static void collect_nodes(ir_node *n, void *ctx) {
224 ir_op *op = get_irn_op(n);
225 merge_env *env = ctx;
227 if (op != op_Block) {
228 ir_node *b = get_nodes_block(n);
231 /* Collect Phi nodes to compact ins along with block's ins. */
232 set_irn_link(n, get_irn_link(b));
234 } else if (op != op_Jmp && !is_Bad(b)) { /* Check for non empty block. */
235 mark_Block_block_visited(b);
237 if (op == op_Proj) { /* link Proj nodes */
238 ir_node *pred = get_Proj_pred(n);
240 set_irn_link(n, get_irn_link(pred));
241 set_irn_link(pred, n);
242 } else if (op == op_Cond) {
243 ir_node *sel = get_Cond_selector(n);
244 if (mode_is_int(get_irn_mode(sel))) {
245 /* found a switch-Cond, collect */
246 plist_insert_back(env->list, n);
253 /** Returns true if pred is predecessor of block. */
254 static int is_pred_of(ir_node *pred, ir_node *b) {
257 for (i = 0, n = get_Block_n_cfgpreds(b); i < n; ++i) {
258 ir_node *b_pred = get_Block_cfgpred_block(b, i);
259 if (b_pred == pred) return 1;
264 /** Test whether we can optimize away pred block pos of b.
266 * @param b A block node.
267 * @param pos The position of the predecessor block to judge about.
269 * @returns The number of predecessors
271 * The test is rather tricky.
273 * The situation is something like the following:
282 * b merges the control flow of an if-then-else. We may not remove
283 * the 'then' _and_ the 'else' block of an 'if' if there is a Phi
284 * node in b, even if both are empty. The destruction of this Phi
285 * requires that a copy is added before the merge. We have to
286 * keep one of the case blocks to place the copies in.
288 * To perform the test for pos, we must regard predecessors before pos
289 * as already removed.
291 static int test_whether_dispensable(ir_node *b, int pos) {
292 int i, j, n_preds = 1;
293 ir_node *pred = get_Block_cfgpred_block(b, pos);
295 /* Bad blocks will be optimized away, so we don't need space for them */
296 if (is_Block_dead(pred))
299 if (get_Block_block_visited(pred) + 1
300 < get_irg_block_visited(current_ir_graph)) {
302 if (!get_opt_optimize() || !get_opt_control_flow_strong_simplification()) {
303 /* Mark block so that is will not be removed: optimization is turned off. */
304 set_Block_block_visited(pred, get_irg_block_visited(current_ir_graph)-1);
308 /* Seems to be empty. At least we detected this in collect_nodes. */
309 if (!get_irn_link(b)) {
310 /* There are no Phi nodes ==> all predecessors are dispensable. */
311 n_preds = get_Block_n_cfgpreds(pred);
313 /* b's pred blocks and pred's pred blocks must be pairwise disjunct.
314 Handle all pred blocks with preds < pos as if they were already removed. */
315 for (i = 0; i < pos; i++) {
316 ir_node *b_pred = get_Block_cfgpred_block(b, i);
317 if (! is_Block_dead(b_pred) &&
318 get_Block_block_visited(b_pred) + 1
319 < get_irg_block_visited(current_ir_graph)) {
320 for (j = 0; j < get_Block_n_cfgpreds(b_pred); j++) {
321 ir_node *b_pred_pred = get_Block_cfgpred_block(b_pred, j);
322 if (is_pred_of(b_pred_pred, pred))
323 goto non_dispensable;
326 if (is_pred_of(b_pred, pred))
327 goto non_dispensable;
330 for (i = pos +1; i < get_Block_n_cfgpreds(b); i++) {
331 ir_node *b_pred = get_Block_cfgpred_block(b, i);
332 if (is_pred_of(b_pred, pred))
333 goto non_dispensable;
335 /* if we get here, the block is dispensable */
336 n_preds = get_Block_n_cfgpreds(pred);
343 set_Block_block_visited(pred, get_irg_block_visited(current_ir_graph)-1);
348 * This method removed Bad cf predecessors from Blocks and Phis, and removes
349 * empty blocks. A block is empty if it only contains Phi and Jmp nodes.
351 * We first adapt Phi nodes, then Block nodes, as we need the old ins
352 * of the Block to adapt the Phi nodes. We do this by computing new
353 * in arrays, and then replacing the old ones. So far we compute new in arrays
354 * for all nodes, not regarding whether there is a possibility for optimization.
356 * For each predecessor p of a Block b there are three cases:
357 * -1. The predecessor p is a Bad node: just skip it. The in array of b shrinks by one.
358 * -2. The predecessor p is empty. Remove p. All predecessors of p are now
360 * -3. The predecessor p is a block containing useful code. Just keep p as is.
362 * For Phi nodes f we have to check the conditions at the Block of f.
363 * For cases 1 and 3 we proceed as for Blocks. For case 2 we can have two
365 * -2a: The old predecessor of the Phi f is a Phi pred_f IN THE BLOCK REMOVED. In this
366 * case we proceed as for blocks. We remove pred_f. All
367 * predecessors of pred_f now are predecessors of f.
368 * -2b: The old predecessor of f is NOT in the block removed. It might be a Phi, too.
369 * We have to replicate f for each predecessor of the removed block. Or, with
370 * other words, the removed predecessor block has exactly one predecessor.
372 * Further there is a special case for self referencing blocks:
375 * then_b else_b then_b else_b
381 * | | | === optimized to ===> \ | | |
388 * If there is a Phi in pred_b, but we remove pred_b, we have to generate a
389 * Phi in loop_b, that has the ins of the Phi in pred_b and a self referencing
391 * @@@ It is negotiable whether we should do this ... there might end up a copy
392 * from the Phi in the loop when removing the Phis.
394 static void optimize_blocks(ir_node *b, void *env) {
395 int i, j, k, n, max_preds, n_preds, p_preds = -1;
400 /* Count the number of predecessor if this block is merged with pred blocks
403 for (i = 0, k = get_Block_n_cfgpreds(b); i < k; ++i) {
404 max_preds += test_whether_dispensable(b, i);
406 in = xmalloc(max_preds * sizeof(*in));
408 /*- Fix the Phi nodes of the current block -*/
409 for (phi = get_irn_link(b); phi; ) {
410 assert(get_irn_op(phi) == op_Phi);
412 /* Find the new predecessors for the Phi */
414 for (i = 0, n = get_Block_n_cfgpreds(b); i < n; ++i) {
415 pred = get_Block_cfgpred_block(b, i);
417 if (is_Bad(get_Block_cfgpred(b, i))) {
418 /* case Phi 1: Do nothing */
420 else if (get_Block_block_visited(pred) + 1
421 < get_irg_block_visited(current_ir_graph)) {
422 /* case Phi 2: It's an empty block and not yet visited. */
423 ir_node *phi_pred = get_Phi_pred(phi, i);
425 for (j = 0, k = get_Block_n_cfgpreds(pred); j < k; j++) {
426 /* because of breaking loops, not all predecessors are Bad-clean,
427 * so we must check this here again */
428 if (! is_Bad(get_Block_cfgpred(pred, j))) {
429 if (get_nodes_block(phi_pred) == pred) {
431 assert(get_irn_op(phi_pred) == op_Phi); /* Block is empty!! */
433 in[p_preds++] = get_Phi_pred(phi_pred, j);
436 in[p_preds++] = phi_pred;
442 in[p_preds++] = get_Phi_pred(phi, i);
445 assert(p_preds <= max_preds);
449 /* By removal of Bad ins the Phi might be degenerated. */
450 exchange(phi, in[0]);
452 set_irn_in(phi, p_preds, in);
455 phi = get_irn_link(phi);
458 /*- This happens only if merge between loop backedge and single loop entry.
459 Moreover, it is only needed if pred is the direct dominator of b, else there can be no uses
460 of the Phi's in pred ... -*/
461 for (k = 0, n = get_Block_n_cfgpreds(b); k < n; ++k) {
462 pred = get_nodes_block(get_Block_cfgpred(b, k));
464 if (get_Block_block_visited(pred) + 1 < get_irg_block_visited(current_ir_graph)) {
467 /* we found a predecessor block at position k that will be removed */
468 for (phi = get_irn_link(pred); phi; phi = next_phi) {
470 next_phi = get_irn_link(phi);
474 /* move this phi from the predecessor into the block b */
475 set_nodes_block(phi, b);
476 set_irn_link(phi, get_irn_link(b));
477 set_irn_link(b, phi);
479 /* first, copy all 0..k-1 predecessors */
480 for (i = 0; i < k; i++) {
481 pred = get_Block_cfgpred_block(b, i);
485 } else if (get_Block_block_visited(pred) + 1
486 < get_irg_block_visited(current_ir_graph)) {
487 /* It's an empty block and not yet visited. */
488 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
489 if (! is_Bad(get_Block_cfgpred(pred, j)))
497 /* now we are at k, copy the phi predecessors */
498 pred = get_nodes_block(get_Block_cfgpred(b, k));
499 for (i = 0; i < get_Phi_n_preds(phi); i++) {
500 if (! is_Bad(get_Block_cfgpred(pred, i)))
501 in[q_preds++] = get_Phi_pred(phi, i);
504 /* and now all the rest */
505 for (i = k+1; i < get_Block_n_cfgpreds(b); i++) {
506 pred = get_nodes_block(get_Block_cfgpred(b, i));
508 if (is_Bad(get_Block_cfgpred(b, i))) {
510 } else if (get_Block_block_visited(pred) +1
511 < get_irg_block_visited(current_ir_graph)) {
512 /* It's an empty block and not yet visited. */
513 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
514 if (! is_Bad(get_Block_cfgpred(pred, j)))
524 exchange(phi, in[0]);
526 set_irn_in(phi, q_preds, in);
529 assert(q_preds <= max_preds);
530 // assert(p_preds == q_preds && "Wrong Phi Fix");
535 /*- Fix the block -*/
537 for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
538 pred = get_Block_cfgpred_block(b, i);
541 /* case 1: Do nothing */
542 } else if (get_Block_block_visited(pred) +1
543 < get_irg_block_visited(current_ir_graph)) {
544 /* case 2: It's an empty block and not yet visited. */
545 assert(get_Block_n_cfgpreds(b) > 1);
546 /* Else it should be optimized by equivalent_node. */
547 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
548 ir_node *pred_block = get_Block_cfgpred(pred, j);
550 /* because of breaking loops, not all predecessors are Bad-clean,
551 * so we must check this here again */
552 if (! is_Bad(pred_block))
553 in[n_preds++] = pred_block;
555 /* Remove block as it might be kept alive. */
556 exchange(pred, b/*new_Bad()*/);
559 in[n_preds++] = get_Block_cfgpred(b, i);
562 assert(n_preds <= max_preds);
564 set_irn_in(b, n_preds, in);
567 assert(get_irn_link(b) == NULL || (n_preds == p_preds && "Wrong Phi Fix"));
572 * Block walker: optimize all blocks using the default optimizations.
573 * This removes Blocks that with only a Jmp predecessor.
575 static void remove_simple_blocks(ir_node *block, void *env) {
576 ir_node *new_blk = equivalent_node(block);
579 if (new_blk != block) {
580 exchange(block, new_blk);
586 * Handle pre-optimized table switch Cond's.
587 * During iropt, all Projs from a switch-Cond are already removed except
588 * the defProj and maybe the taken one.
589 * The defProj cannot be removed WITHOUT looking backwards, so we do this here.
591 * @param cond the switch-Cond
593 * @return non-zero if a switch-Cond was optimized
595 * Expects all Proj's linked to the cond node
597 static int handle_switch_cond(ir_node *cond) {
598 ir_node *sel = get_Cond_selector(cond);
600 ir_node *proj1 = get_irn_link(cond);
601 ir_node *proj2 = get_irn_link(proj1);
604 blk = get_nodes_block(cond);
607 /* this Cond has only one Proj: must be the defProj */
608 assert(get_Cond_defaultProj(cond) == get_Proj_proj(proj1));
609 /* convert it into a Jmp */
610 jmp = new_r_Jmp(current_ir_graph, blk);
611 exchange(proj1, jmp);
613 } else if (get_irn_link(proj2) == NULL) {
614 /* We have two Proj's here. Check if the Cond has
615 a constant argument */
616 tarval *tv = value_of(sel);
618 if (tv != tarval_bad) {
619 /* we have a constant switch */
620 long num = get_tarval_long(tv);
621 long def_num = get_Cond_defaultProj(cond);
623 if (def_num == get_Proj_proj(proj1)) {
624 /* first one is the defProj */
625 if (num == get_Proj_proj(proj2)) {
626 jmp = new_r_Jmp(current_ir_graph, blk);
627 exchange(proj2, jmp);
628 exchange(proj1, new_Bad());
631 } else if (def_num == get_Proj_proj(proj2)) {
632 /* second one is the defProj */
633 if (num == get_Proj_proj(proj1)) {
634 jmp = new_r_Jmp(current_ir_graph, blk);
635 exchange(proj1, jmp);
636 exchange(proj2, new_Bad());
640 /* neither: strange, Cond was not optimized so far */
641 if (num == get_Proj_proj(proj1)) {
642 jmp = new_r_Jmp(current_ir_graph, blk);
643 exchange(proj1, jmp);
644 exchange(proj2, new_Bad());
646 } else if (num == get_Proj_proj(proj2)) {
647 jmp = new_r_Jmp(current_ir_graph, blk);
648 exchange(proj2, jmp);
649 exchange(proj1, new_Bad());
658 /* Optimizations of the control flow that also require changes of Phi nodes.
660 * This optimization performs two passes over the graph.
662 * The first pass collects all Phi nodes in a link list in the block
663 * nodes. Further it performs simple control flow optimizations.
664 * Finally it marks all blocks that do not contain useful
665 * computations, i.e., these blocks might be removed.
667 * The second pass performs the optimizations intended by this algorithm.
668 * It walks only over block nodes and adapts these and the Phi nodes in these blocks,
669 * which it finds in a linked list computed by the first pass.
671 * We use the block_visited flag to mark empty blocks in the first
673 * @@@ It would be better to add a struct in the link field
674 * that keeps the Phi list and the mark. Place it on an obstack, as
675 * we will lose blocks and thereby generate memory leaks.
677 void optimize_cf(ir_graph *irg) {
680 ir_node *cond, *end = get_irg_end(irg);
681 ir_graph *rem = current_ir_graph;
685 assert(get_irg_phase_state(irg) != phase_building);
687 /* if the graph is not pinned, we cannot determine empty blocks */
688 assert(get_irg_pinned(irg) != op_pin_state_floats &&
689 "Control flow optimization need a pinned graph");
691 current_ir_graph = irg;
693 /* FIXME: is this still needed? */
694 edges_deactivate(irg);
697 if (get_opt_optimize() && get_opt_unreachable_code()) {
700 /* kill dead blocks using dom info */
702 irg_block_walk_graph(irg, NULL, remove_dead_block_cf, &env.changed);
704 /* fix the keep-alives */
705 end = get_irg_end(irg);
706 for (i = 0, n = get_End_n_keepalives(end); i < n; ++i) {
707 ir_node *ka = get_End_keepalive(end, i);
710 /* do NOT keep dead blocks */
711 if (get_Block_dom_depth(ka) < 0) {
712 set_End_keepalive(end, i, new_Bad());
715 } else if (is_Block_dead(get_nodes_block(ka)) ||
716 get_Block_dom_depth(get_nodes_block(ka)) < 0) {
717 /* do NOT keep nodes in dead blocks */
718 set_End_keepalive(end, i, new_Bad());
723 irg_block_walk_graph(irg, NULL, remove_senseless_conds, &env.changed);
725 /* Use block visited flag to mark non-empty blocks. */
726 inc_irg_block_visited(irg);
727 set_using_block_visited(irg);
728 set_using_irn_link(irg);
730 env.list = plist_new();
731 irg_walk(end, merge_blocks, collect_nodes, &env);
733 clear_using_block_visited(irg);
734 clear_using_irn_link(irg);
736 /* handle all collected switch-Conds */
737 foreach_plist(env.list, el) {
738 cond = plist_element_get_value(el);
739 env.changed |= handle_switch_cond(cond);
741 plist_free(env.list);
744 /* Handle graph state if was changed. */
745 set_irg_outs_inconsistent(irg);
746 set_irg_doms_inconsistent(irg);
747 set_irg_extblk_inconsistent(irg);
748 set_irg_loopinfo_inconsistent(irg);
751 /* Optimize the standard code. */
753 assure_irg_outs(irg);
754 irg_block_walk(get_irg_end_block(irg), optimize_blocks, remove_simple_blocks, &env.changed);
756 /* Walk all keep alives, optimize them if block, add to new in-array
757 for end if useful. */
758 n = get_End_n_keepalives(end);
760 NEW_ARR_A(ir_node *, in, n);
762 /* in rare cases a node may be kept alive more than once, use the visited flag to detect this */
763 inc_irg_visited(irg);
764 set_using_visited(irg);
766 /* fix the keep alive */
767 for (i = j = 0; i < n; i++) {
768 ir_node *ka = get_End_keepalive(end, i);
770 if (irn_not_visited(ka)) {
771 ir_op *op = get_irn_op(ka);
773 if ((op == op_Block) && Block_not_block_visited(ka)) {
774 /* irg_block_walk() will increase the block visited flag, but we must visit only
775 these blocks that are not visited yet, so decrease it first. */
776 set_irg_block_visited(irg, get_irg_block_visited(irg) - 1);
777 irg_block_walk(ka, optimize_blocks, remove_simple_blocks, &env.changed);
778 mark_irn_visited(ka);
783 for (i = 0; i < n; i++) {
784 ir_node *ka = get_End_keepalive(end, i);
786 if (irn_not_visited(ka)) {
787 ir_op *op = get_irn_op(ka);
790 mark_irn_visited(ka);
791 /* don't keep alive dead or already visited blocks */
792 if (! is_Block_dead(get_nodes_block(ka)) &&
793 Block_not_block_visited(get_nodes_block(ka)))
795 } else if (is_op_keep(op)) {
796 mark_irn_visited(ka);
797 if (! is_Block_dead(get_nodes_block(ka)))
803 set_End_keepalives(end, j, in);
807 clear_using_visited(irg);
810 /* Handle graph state if was changed. */
811 set_irg_outs_inconsistent(irg);
812 set_irg_doms_inconsistent(irg);
813 set_irg_extblk_inconsistent(irg);
814 set_irg_loopinfo_inconsistent(irg);
817 /* the verifier doesn't work yet with floating nodes */
818 if (get_irg_pinned(irg) == op_pin_state_pinned) {
819 /* after optimize_cf(), only Bad data flow may remain. */
820 if (irg_vrfy_bads(irg, BAD_DF | BAD_BLOCK | TUPLE)) {
821 dump_ir_block_graph(irg, "-vrfy-cf");
822 dump_ir_graph(irg, "-vrfy-cf");
823 fprintf(stderr, "VRFY_BAD in optimize_cf()\n");
827 current_ir_graph = rem;