3 * File name: ir/opt/cfopt.c
4 * Purpose: control flow optimizations
8 * Copyright: (c) 1998-2004 Universität Karlsruhe
9 * Licence: This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
27 #include "irgraph_t.h"
40 #include "irbackedge_t.h"
46 #include "iropt_dbg.h"
48 /*------------------------------------------------------------------*/
49 /* Control flow optimization. */
51 /* Removes Bad control flow predecessors and empty blocks. A block */
52 /* is empty if it contains only a Jmp node. */
53 /* Blocks can only be removed if they are not needed for the */
54 /* semantics of Phi nodes. */
55 /*------------------------------------------------------------------*/
58 * Replace binary Conds that jumps twice into the same block
64 * ProjX True ProjX False ==> | /
69 * Such pattern are the result of if-conversion.
71 * Note that the simple case that Block has only these two
72 * predecessors are already handled in equivalent_node_Block().
74 static void remove_senseless_conds(ir_node *bl, void *data)
77 int n = get_Block_n_cfgpreds(bl);
81 for (i = 0; i < n; ++i) {
82 ir_node *pred_i = get_Block_cfgpred(bl, i);
83 ir_node *cond_i = skip_Proj(pred_i);
85 if (get_irn_op(cond_i) != op_Cond ||
86 get_irn_mode(get_Cond_selector(cond_i)) != mode_b)
89 for (j = i + 1; j < n; ++j) {
90 ir_node *pred_j = get_Block_cfgpred(bl, j);
91 ir_node *cond_j = skip_Proj(pred_j);
93 if (cond_j == cond_i) {
94 ir_node *jmp = new_r_Jmp(current_ir_graph, get_nodes_block(cond_i));
95 set_irn_n(bl, i, jmp);
96 set_irn_n(bl, j, new_Bad());
98 DBG_OPT_IFSIM2(cond_i, jmp);
107 * Removes Tuples from Block control flow predecessors.
108 * Optimizes blocks with equivalent_node(). This is tricky,
109 * as we want to avoid nodes that have as block predecessor Bads.
110 * Therefore we also optimize at control flow operations, depending
111 * how we first reach the Block.
113 static void merge_blocks(ir_node *node, void *env) {
117 /* clear the link field for ALL nodes first */
118 set_irn_link(node, NULL);
120 if (is_Block(node)) {
123 /* GL @@@ : is this possible? if (get_opt_normalize()) -- added, all tests go through.
124 A different order of optimizations might cause problems. */
125 if (get_opt_normalize()) {
126 for (i = 0, n = get_Block_n_cfgpreds(node); i < n; ++i)
127 set_Block_cfgpred(node, i, skip_Tuple(get_Block_cfgpred(node, i)));
131 new_block = equivalent_node(node);
132 if (new_block != node && ! is_Block_dead(new_block))
133 exchange(node, new_block);
135 } else if (get_opt_optimize() && (get_irn_mode(node) == mode_X)) {
136 /* We will soon visit a block. Optimize it before visiting! */
137 ir_node *b = get_nodes_block(skip_Proj(node));
139 if (!is_Block_dead(b)) {
140 new_block = equivalent_node(b);
142 while (irn_not_visited(b) && (!is_Block_dead(new_block)) && (new_block != b)) {
143 /* We would have to run gigo() if new is bad, so we
144 promote it directly below. Nevertheless, we sometimes reach a block
145 the first time through a dataflow node. In this case we optimized the
146 block as such and have to promote the Bad here. */
147 assert((get_opt_control_flow_straightening() ||
148 get_opt_control_flow_weak_simplification()) &&
149 ("strange flag setting"));
150 exchange (b, new_block);
152 new_block = equivalent_node(b);
155 /* normally, we would create a Bad block here, but this must be
156 * prevented, so just set it's cf to Bad.
158 if (is_Block_dead(new_block))
159 exchange(node, new_Bad());
165 * Remove cf from dead block by inspecting dominance info
166 * Do not replace blocks by Bad. This optimization shall
167 * ensure, that all Bad control flow predecessors are
168 * removed, and no new other Bads are introduced.
170 * Must be run in the post walker.
172 static void remove_dead_block_cf(ir_node *block, void *env)
176 /* check block predecessors and turn control flow into bad */
177 for (i = 0, n = get_Block_n_cfgpreds(block); i < n; ++i) {
178 ir_node *pred_X = get_Block_cfgpred(block, i);
180 if (! is_Bad(pred_X)) {
181 ir_node *pred_bl = get_nodes_block(skip_Proj(pred_X));
183 if (is_Block_dead(pred_bl) || (get_Block_dom_depth(pred_bl) < 0))
184 exchange (pred_X, new_Bad());
190 * Collects all Phi nodes in link list of Block.
191 * Marks all blocks "block_visited" if they contain a node other
193 * Replaces n by Bad if n is unreachable control flow. We do that
194 * in the post walker, so we catch all blocks.
196 * Links all Proj nodes to their predecessors.
198 static void collect_nodes(ir_node *n, void *env) {
199 if (is_no_Block(n)) {
200 ir_node *b = get_nodes_block(n);
201 ir_op *op = get_irn_op(n);
204 /* Collect Phi nodes to compact ins along with block's ins. */
205 set_irn_link(n, get_irn_link(b));
208 else if (op != op_Jmp && !is_Bad(b)) { /* Check for non empty block. */
209 mark_Block_block_visited(b);
211 if (op == op_Proj) { /* link Proj nodes */
212 ir_node *pred = get_Proj_pred(n);
214 set_irn_link(n, get_irn_link(pred));
215 set_irn_link(pred, n);
221 /** Returns true if pred is predecessor of block. */
222 static int is_pred_of(ir_node *pred, ir_node *b) {
225 for (i = 0, n = get_Block_n_cfgpreds(b); i < n; ++i) {
226 ir_node *b_pred = get_nodes_block(get_Block_cfgpred(b, i));
227 if (b_pred == pred) return 1;
233 /** Test whether we can optimize away pred block pos of b.
235 * @param b A block node.
236 * @param pos The position of the predecessor block to judge about.
238 * @returns The number of predecessors
240 * The test is rather tricky.
242 * The situation is something like the following:
251 * b merges the control flow of an if-then-else. We may not remove
252 * the 'then' _and_ the 'else' block of an 'if' if there is a Phi
253 * node in b, even if both are empty. The destruction of this Phi
254 * requires that a copy is added before the merge. We have to
255 * keep one of the case blocks to place the copies in.
257 * To perform the test for pos, we must regard predecessors before pos
258 * as already removed.
260 static int test_whether_dispensable(ir_node *b, int pos) {
261 int i, j, n_preds = 1;
262 ir_node *pred = get_Block_cfgpred_block(b, pos);
264 /* Bad blocks will be optimized away, so we don't need space for them */
265 if (is_Block_dead(pred))
268 if (get_Block_block_visited(pred) + 1
269 < get_irg_block_visited(current_ir_graph)) {
271 if (!get_opt_optimize() || !get_opt_control_flow_strong_simplification()) {
272 /* Mark block so that is will not be removed: optimization is turned off. */
273 set_Block_block_visited(pred, get_irg_block_visited(current_ir_graph)-1);
277 /* Seems to be empty. At least we detected this in collect_nodes. */
278 if (!get_irn_link(b)) {
279 /* There are no Phi nodes ==> all predecessors are dispensable. */
280 n_preds = get_Block_n_cfgpreds(pred);
282 /* b's pred blocks and pred's pred blocks must be pairwise disjunct.
283 Handle all pred blocks with preds < pos as if they were already removed. */
284 for (i = 0; i < pos; i++) {
285 ir_node *b_pred = get_Block_cfgpred_block(b, i);
286 if (! is_Block_dead(b_pred) &&
287 get_Block_block_visited(b_pred) + 1
288 < get_irg_block_visited(current_ir_graph)) {
289 for (j = 0; j < get_Block_n_cfgpreds(b_pred); j++) {
290 ir_node *b_pred_pred = get_Block_cfgpred_block(b_pred, j);
291 if (is_pred_of(b_pred_pred, pred))
292 goto non_dispensable;
295 if (is_pred_of(b_pred, pred))
296 goto non_dispensable;
299 for (i = pos +1; i < get_Block_n_cfgpreds(b); i++) {
300 ir_node *b_pred = get_Block_cfgpred_block(b, i);
301 if (is_pred_of(b_pred, pred))
302 goto non_dispensable;
304 /* if we get here, the block is dispensable */
305 n_preds = get_Block_n_cfgpreds(pred);
312 set_Block_block_visited(pred, get_irg_block_visited(current_ir_graph)-1);
317 * Store to defer the exchanged of Phi nodes.
319 typedef struct _defer_ex_phi {
320 ir_node *phi_pred; /**< the previous Phi node that will be replaced */
321 ir_node *phi; /**< the new Phi node that replaces phi_pred */
325 * handle pre-optimized table switch Cond's
327 static void handle_switch_cond(ir_node *proj) {
328 ir_node *cond = skip_Proj(proj);
334 sel = get_Cond_selector(cond);
335 if (mode_is_int(get_irn_mode(sel))) {
336 /* check for table switch that could be optimized */
337 ir_node *proj1 = get_irn_link(cond);
338 ir_node *proj2 = get_irn_link(proj1);
341 blk = is_Bad(cond) ? cond : get_nodes_block(cond);
344 /* this Cond has only one Proj: must be the defProj */
345 assert(get_Cond_defaultProj(cond) == get_Proj_proj(proj1));
346 /* convert it into a Jmp */
347 jmp = new_r_Jmp(current_ir_graph, blk);
348 exchange(proj1, jmp);
350 else if (get_irn_link(proj2) == NULL) {
351 /* We have two Proj's here. Check if the Cond has
352 a constant argument */
353 tarval *tv = value_of(sel);
355 if (tv != tarval_bad) {
356 /* we have a constant switch */
357 long num = get_tarval_long(tv);
358 long def_num = get_Cond_defaultProj(cond);
360 if (def_num == get_Proj_proj(proj1)) {
361 /* first one is the defProj */
362 if (num == get_Proj_proj(proj2)) {
363 jmp = new_r_Jmp(current_ir_graph, blk);
364 exchange(proj2, jmp);
365 exchange(proj1, new_Bad());
368 else if (def_num == get_Proj_proj(proj2)) {
369 /* second one is the defProj */
370 if (num == get_Proj_proj(proj1)) {
371 jmp = new_r_Jmp(current_ir_graph, blk);
372 exchange(proj1, jmp);
373 exchange(proj2, new_Bad());
377 /* neither: strange, Cond was not optimized so far */
378 if (num == get_Proj_proj(proj1)) {
379 jmp = new_r_Jmp(current_ir_graph, blk);
380 exchange(proj1, jmp);
381 exchange(proj2, new_Bad());
383 else if (num == get_Proj_proj(proj2)) {
384 jmp = new_r_Jmp(current_ir_graph, blk);
385 exchange(proj2, jmp);
386 exchange(proj1, new_Bad());
395 * This method removed Bad cf predecessors from Blocks and Phis, and removes
396 * empty blocks. A block is empty if it only contains Phi and Jmp nodes.
398 * We first adapt Phi nodes, then Block nodes, as we need the old ins
399 * of the Block to adapt the Phi nodes. We do this by computing new
400 * in arrays, and then replacing the old ones. So far we compute new in arrays
401 * for all nodes, not regarding whether there is a possibility for optimization.
403 * For each predecessor p of a Block b there are three cases:
404 * -1. The predecessor p is a Bad node: just skip it. The in array of b shrinks by one.
405 * -2. The predecessor p is empty. Remove p. All predecessors of p are now
407 * -3. The predecessor p is a block containing useful code. Just keep p as is.
409 * For Phi nodes f we have to check the conditions at the Block of f.
410 * For cases 1 and 3 we proceed as for Blocks. For case 2 we can have two
412 * -2a: The old predecessor of the Phi f is a Phi pred_f IN THE BLOCK REMOVED. In this
413 * case we proceed as for blocks. We remove pred_f. All
414 * predecessors of pred_f now are predecessors of f.
415 * -2b: The old predecessor of f is NOT in the block removed. It might be a Phi, too.
416 * We have to replicate f for each predecessor of the removed block. Or, with
417 * other words, the removed predecessor block has exactly one predecessor.
419 * Further there is a special case for self referencing blocks:
422 * then_b else_b then_b else_b
428 * | | | === optimized to ===> \ | | |
435 * If there is a Phi in pred_b, but we remove pred_b, we have to generate a
436 * Phi in loop_b, that has the ins of the Phi in pred_b and a self referencing
438 * @@@ It is negotiable whether we should do this ... there might end up a copy
439 * from the Phi in the loop when removing the Phis.
441 static void optimize_blocks(ir_node *b, void *env) {
442 int i, j, k, n, max_preds, n_preds, p_preds;
445 defer_ex_phi *defers;
447 /* Count the number of predecessor if this block is merged with pred blocks
450 for (i = 0, k = get_Block_n_cfgpreds(b); i < k; ++i) {
451 handle_switch_cond(get_Block_cfgpred(b, i));
452 max_preds += test_whether_dispensable(b, i);
454 in = xmalloc(max_preds * sizeof(*in));
456 defers = NEW_ARR_F(defer_ex_phi, 0);
459 printf(" working on "); DDMN(b);
460 for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
461 pred = get_nodes_block(get_Block_cfgpred(b, i));
462 if (is_Bad(get_Block_cfgpred(b, i))) {
463 printf(" removing Bad %i\n ", i);
464 } else if (get_Block_block_visited(pred) +1
465 < get_irg_block_visited(current_ir_graph)) {
466 printf(" removing pred %i ", i); DDMN(pred);
467 } else { printf(" Nothing to do for "); DDMN(pred); }
469 * end Debug output -*/
471 /*- Fix the Phi nodes of the current block -*/
472 for (phi = get_irn_link(b); phi; ) {
473 assert(get_irn_op(phi) == op_Phi);
475 /* Find the new predecessors for the Phi */
477 for (i = 0, n = get_Block_n_cfgpreds(b); i < n; ++i) {
478 pred = get_Block_cfgpred_block(b, i);
480 if (is_Bad(get_Block_cfgpred(b, i))) {
481 /* case Phi 1: Do nothing */
483 else if (get_Block_block_visited(pred) + 1
484 < get_irg_block_visited(current_ir_graph)) {
485 /* case Phi 2: It's an empty block and not yet visited. */
486 ir_node *phi_pred = get_Phi_pred(phi, i);
488 for (j = 0, k = get_Block_n_cfgpreds(pred); j < k; j++) {
489 /* because of breaking loops, not all predecessors are Bad-clean,
490 * so we must check this here again */
491 if (! is_Bad(get_Block_cfgpred(pred, j))) {
492 if (get_nodes_block(phi_pred) == pred) {
494 assert(get_irn_op(phi_pred) == op_Phi); /* Block is empty!! */
496 in[p_preds++] = get_Phi_pred(phi_pred, j);
499 in[p_preds++] = phi_pred;
504 /* The Phi_pred node is replaced now if it is a Phi.
506 Somehow the removed Phi node can be used legally in loops.
507 Therefore we replace the old phi by the new one.
508 This must be done _AFTER_ all Phis are optimized, or
509 it will fail if two Phis use the same pred_Phi.
511 FIXME: Is the following true? We ALWAYS replace it by the new one.
513 Further we have to remove the old Phi node by replacing it
514 by Bad. Else it will remain in the keep alive array of End
515 and cause illegal situations. So if there is no loop, we should
518 if (get_nodes_block(phi_pred) == pred) {
520 /* remove the Phi as it might be kept alive. Further there
521 might be other users. */
522 for (i = ARR_LEN(defers) - 1; i >= 0; --i)
523 if (defers[i].phi_pred == phi_pred)
527 /* we have a new replacement */
530 elem.phi_pred = phi_pred;
532 ARR_APP1(defer_ex_phi, defers, elem);
537 in[p_preds++] = get_Phi_pred(phi, i);
540 assert(p_preds <= max_preds);
544 /* By removal of Bad ins the Phi might be degenerated. */
545 exchange(phi, in[0]);
547 set_irn_in(phi, p_preds, in);
549 phi = get_irn_link(phi);
552 /* now, exchange all Phis */
553 for (i = ARR_LEN(defers) - 1; i >= 0; --i) {
554 exchange(defers[i].phi_pred, defers[i].phi);
558 /*- This happens only if merge between loop backedge and single loop entry.
559 See special case above. -*/
560 for (k = 0, n = get_Block_n_cfgpreds(b); k < n; ++k) {
561 pred = get_nodes_block(get_Block_cfgpred(b, k));
563 if (get_Block_block_visited(pred) + 1 < get_irg_block_visited(current_ir_graph)) {
564 /* we found a predecessor block at position k that will be removed */
565 for (phi = get_irn_link(pred); phi;) {
567 * the previous phase may already changed the phi, and even
568 * removed it at all, so check here if this node is still a phi
570 if (get_irn_op(phi) == op_Phi) {
573 /* move this phi from the predecessor into the block b */
574 set_nodes_block(phi, b);
576 /* first, copy all 0..k-1 predecessors */
577 for (i = 0; i < k; i++) {
578 pred = get_nodes_block(get_Block_cfgpred(b, i));
580 if (is_Bad(get_Block_cfgpred(b, i))) {
582 } else if (get_Block_block_visited(pred) + 1
583 < get_irg_block_visited(current_ir_graph)) {
584 /* It's an empty block and not yet visited. */
585 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
586 /* @@@ Hier brauche ich Schleifeninformation!!! Kontrollflusskante
587 muss Rueckwaertskante sein! (An allen vier in[q_preds] = phi
588 Anweisungen.) Trotzdem tuts bisher!! */
589 if (! is_Bad(get_Block_cfgpred(pred, j)))
597 /* now we are at k, copy the phi predecessors */
598 pred = get_nodes_block(get_Block_cfgpred(b, k));
599 for (i = 0; i < get_Phi_n_preds(phi); i++) {
600 if (! is_Bad(get_Block_cfgpred(pred, i)))
601 in[q_preds++] = get_Phi_pred(phi, i);
604 /* and now all the rest */
605 for (i = k+1; i < get_Block_n_cfgpreds(b); i++) {
606 pred = get_nodes_block(get_Block_cfgpred(b, i));
608 if (is_Bad(get_Block_cfgpred(b, i))) {
610 } else if (get_Block_block_visited(pred) +1
611 < get_irg_block_visited(current_ir_graph)) {
612 /* It's an empty block and not yet visited. */
613 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
614 if (! is_Bad(get_Block_cfgpred(pred, j)))
624 exchange(phi, in[0]);
626 set_irn_in(phi, q_preds, in);
628 assert(q_preds <= max_preds);
629 // assert(p_preds == q_preds && "Wrong Phi Fix");
631 phi = get_irn_link(phi);
636 /*- Fix the block -*/
638 for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
639 pred = get_nodes_block(get_Block_cfgpred(b, i));
641 if (is_Bad(get_Block_cfgpred(b, i))) {
642 /* case 1: Do nothing */
643 } else if (get_Block_block_visited(pred) +1
644 < get_irg_block_visited(current_ir_graph)) {
645 /* case 2: It's an empty block and not yet visited. */
646 assert(get_Block_n_cfgpreds(b) > 1);
647 /* Else it should be optimized by equivalent_node. */
648 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
649 ir_node *pred_block = get_Block_cfgpred(pred, j);
651 /* because of breaking loops, not all predecessors are Bad-clean,
652 * so we must check this here again */
653 if (! is_Bad(pred_block))
654 in[n_preds++] = pred_block;
656 /* Remove block as it might be kept alive. */
657 exchange(pred, b/*new_Bad()*/);
660 in[n_preds++] = get_Block_cfgpred(b, i);
663 assert(n_preds <= max_preds);
665 set_irn_in(b, n_preds, in);
667 assert(get_irn_link(b) == NULL || (n_preds == p_preds && "Wrong Phi Fix"));
673 /* Optimizations of the control flow that also require changes of Phi nodes.
675 * This optimization performs two passes over the graph.
677 * The first pass collects all Phi nodes in a link list in the block
678 * nodes. Further it performs simple control flow optimizations.
679 * Finally it marks all blocks that do not contain useful
680 * computations, i.e., these blocks might be removed.
682 * The second pass performs the optimizations intended by this algorithm.
683 * It walks only over block nodes and adapts these and the Phi nodes in these blocks,
684 * which it finds in a linked list computed by the first pass.
686 * We use the block_visited flag to mark empty blocks in the first
688 * @@@ It would be better to add a struct in the link field
689 * that keeps the Phi list and the mark. Place it on an obstack, as
690 * we will lose blocks and thereby generate memory leaks.
692 void optimize_cf(ir_graph *irg) {
695 ir_node *end = get_irg_end(irg);
696 ir_graph *rem = current_ir_graph;
697 irg_dom_state dom_state = get_irg_dom_state(current_ir_graph);
698 current_ir_graph = irg;
700 edges_deactivate(irg);
702 /* if the graph is not pinned, we cannot determine empty blocks */
703 assert(get_irg_pinned(irg) != op_pin_state_floats &&
704 "Control flow optimization need a pinned graph");
706 /* Handle graph state */
707 assert(get_irg_phase_state(irg) != phase_building);
708 set_irg_outs_inconsistent(current_ir_graph);
709 set_irg_extblk_inconsistent(current_ir_graph);
710 set_irg_loopinfo_inconsistent(current_ir_graph);
711 set_irg_doms_inconsistent(current_ir_graph);
713 if (dom_state == dom_consistent && get_opt_optimize() && get_opt_unreachable_code()) {
716 /* we have dominance info, we can kill dead block */
717 irg_block_walk_graph(irg, NULL, remove_dead_block_cf, NULL);
719 /* fix the keep-alives */
720 end = get_irg_end(irg);
721 for (i = 0, n = get_End_n_keepalives(end); i < n; ++i) {
722 ir_node *ka = get_End_keepalive(end, i);
725 /* do NOT keep dead blocks */
726 if (get_Block_dom_depth(ka) == -1)
727 set_End_keepalive(end, i, new_Bad());
729 else if (is_Block_dead(get_nodes_block(ka)) ||
730 get_Block_dom_depth(get_nodes_block(ka)) == -1)
731 /* do NOT keep nodes in dead blocks */
732 set_End_keepalive(end, i, new_Bad());
735 irg_block_walk_graph(irg, NULL, remove_senseless_conds, NULL);
737 /* Use block visited flag to mark non-empty blocks. */
738 inc_irg_block_visited(irg);
739 irg_walk(end, merge_blocks, collect_nodes, NULL);
741 /* Optimize the standard code. */
742 irg_block_walk(get_irg_end_block(irg), optimize_blocks, NULL, NULL);
744 /* Walk all keep alives, optimize them if block, add to new in-array
745 for end if useful. */
746 n = get_End_n_keepalives(end);
748 NEW_ARR_A (ir_node *, in, n);
749 inc_irg_visited(irg);
751 /* fix the keep alive */
752 for (i = j = 0; i < n; i++) {
753 ir_node *ka = get_End_keepalive(end, i);
755 if (irn_not_visited(ka)) {
756 ir_op *op = get_irn_op(ka);
758 if ((op == op_Block) && Block_not_block_visited(ka)) {
759 set_irg_block_visited(irg, /* Don't walk all the way to Start. */
760 get_irg_block_visited(irg)-1);
761 irg_block_walk(ka, optimize_blocks, NULL, NULL);
762 mark_irn_visited(ka);
764 } else if (op == op_Phi) {
765 mark_irn_visited(ka);
766 if (! is_Block_dead(get_nodes_block(ka)))
768 } else if (is_op_keep(op)) {
769 mark_irn_visited(ka);
770 if (! is_Block_dead(get_nodes_block(ka)))
776 set_End_keepalives(end, j, in);
778 /* the verifier doesn't work yet with floating nodes */
779 if (get_irg_pinned(irg) == op_pin_state_pinned) {
780 /* after optimize_cf(), only Bad data flow may remain. */
781 if (irg_vrfy_bads(irg, BAD_DF | BAD_BLOCK | TUPLE)) {
782 dump_ir_block_graph(irg, "-vrfy-cf");
783 dump_ir_graph(irg, "-vrfy-cf");
784 fprintf(stderr, "VRFY_BAD in optimize_cf()\n");
788 current_ir_graph = rem;