3 * File name: ir/opt/cfopt.c
4 * Purpose: control flow optimizations
8 * Copyright: (c) 1998-2004 Universität Karlsruhe
9 * Licence: This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
27 #include "irgraph_t.h"
41 #include "irbackedge_t.h"
47 #include "iropt_dbg.h"
49 /*------------------------------------------------------------------*/
50 /* Control flow optimization. */
52 /* Removes Bad control flow predecessors and empty blocks. A block */
53 /* is empty if it contains only a Jmp node. */
54 /* Blocks can only be removed if they are not needed for the */
55 /* semantics of Phi nodes. */
56 /*------------------------------------------------------------------*/
59 * Replace binary Conds that jumps twice into the same block
65 * ProjX True ProjX False ==> | /
70 * Such pattern are the result of if-conversion.
72 * Note that the simple case that Block has only these two
73 * predecessors are already handled in equivalent_node_Block().
75 static void remove_senseless_conds(ir_node *bl, void *data)
78 int n = get_Block_n_cfgpreds(bl);
82 for (i = 0; i < n; ++i) {
83 ir_node *pred_i = get_Block_cfgpred(bl, i);
84 ir_node *cond_i = skip_Proj(pred_i);
86 if (get_irn_op(cond_i) != op_Cond ||
87 get_irn_mode(get_Cond_selector(cond_i)) != mode_b)
90 for (j = i + 1; j < n; ++j) {
91 ir_node *pred_j = get_Block_cfgpred(bl, j);
92 ir_node *cond_j = skip_Proj(pred_j);
94 if (cond_j == cond_i) {
95 ir_node *jmp = new_r_Jmp(current_ir_graph, get_nodes_block(cond_i));
96 set_irn_n(bl, i, jmp);
97 set_irn_n(bl, j, new_Bad());
99 DBG_OPT_IFSIM2(cond_i, jmp);
108 * Removes Tuples from Block control flow predecessors.
109 * Optimizes blocks with equivalent_node(). This is tricky,
110 * as we want to avoid nodes that have as block predecessor Bads.
111 * Therefore we also optimize at control flow operations, depending
112 * how we first reach the Block.
114 static void merge_blocks(ir_node *node, void *env) {
118 /* clear the link field for ALL nodes first */
119 set_irn_link(node, NULL);
121 if (is_Block(node)) {
124 /* GL @@@ : is this possible? if (get_opt_normalize()) -- added, all tests go through.
125 A different order of optimizations might cause problems. */
126 if (get_opt_normalize()) {
127 for (i = 0, n = get_Block_n_cfgpreds(node); i < n; ++i)
128 set_Block_cfgpred(node, i, skip_Tuple(get_Block_cfgpred(node, i)));
132 new_block = equivalent_node(node);
133 if (new_block != node && ! is_Block_dead(new_block))
134 exchange(node, new_block);
136 } else if (get_opt_optimize() && (get_irn_mode(node) == mode_X)) {
137 /* We will soon visit a block. Optimize it before visiting! */
138 ir_node *b = get_nodes_block(skip_Proj(node));
140 if (!is_Block_dead(b)) {
141 new_block = equivalent_node(b);
143 while (irn_not_visited(b) && (!is_Block_dead(new_block)) && (new_block != b)) {
144 /* We would have to run gigo() if new is bad, so we
145 promote it directly below. Nevertheless, we sometimes reach a block
146 the first time through a dataflow node. In this case we optimized the
147 block as such and have to promote the Bad here. */
148 assert((get_opt_control_flow_straightening() ||
149 get_opt_control_flow_weak_simplification()) &&
150 ("strange flag setting"));
151 exchange (b, new_block);
153 new_block = equivalent_node(b);
156 /* normally, we would create a Bad block here, but this must be
157 * prevented, so just set it's cf to Bad.
159 if (is_Block_dead(new_block))
160 exchange(node, new_Bad());
166 * Remove cf from dead block by inspecting dominance info
167 * Do not replace blocks by Bad. This optimization shall
168 * ensure, that all Bad control flow predecessors are
169 * removed, and no new other Bads are introduced.
171 * Must be run in the post walker.
173 static void remove_dead_block_cf(ir_node *block, void *env)
177 /* check block predecessors and turn control flow into bad */
178 for (i = 0, n = get_Block_n_cfgpreds(block); i < n; ++i) {
179 ir_node *pred_X = get_Block_cfgpred(block, i);
181 if (! is_Bad(pred_X)) {
182 ir_node *pred_bl = get_nodes_block(skip_Proj(pred_X));
184 if (is_Block_dead(pred_bl) || (get_Block_dom_depth(pred_bl) < 0))
185 exchange (pred_X, new_Bad());
191 * Collects all Phi nodes in link list of Block.
192 * Marks all blocks "block_visited" if they contain a node other
194 * Replaces n by Bad if n is unreachable control flow. We do that
195 * in the post walker, so we catch all blocks.
197 * Links all Proj nodes to their predecessors.
199 static void collect_nodes(ir_node *n, void *env) {
200 if (is_no_Block(n)) {
201 ir_node *b = get_nodes_block(n);
202 ir_op *op = get_irn_op(n);
205 /* Collect Phi nodes to compact ins along with block's ins. */
206 set_irn_link(n, get_irn_link(b));
209 else if (op != op_Jmp && !is_Bad(b)) { /* Check for non empty block. */
210 mark_Block_block_visited(b);
212 if (op == op_Proj) { /* link Proj nodes */
213 ir_node *pred = get_Proj_pred(n);
215 set_irn_link(n, get_irn_link(pred));
216 set_irn_link(pred, n);
222 /** Returns true if pred is predecessor of block. */
223 static int is_pred_of(ir_node *pred, ir_node *b) {
226 for (i = 0, n = get_Block_n_cfgpreds(b); i < n; ++i) {
227 ir_node *b_pred = get_nodes_block(get_Block_cfgpred(b, i));
228 if (b_pred == pred) return 1;
234 /** Test whether we can optimize away pred block pos of b.
236 * @param b A block node.
237 * @param pos The position of the predecessor block to judge about.
239 * @returns The number of predecessors
241 * The test is rather tricky.
243 * The situation is something like the following:
252 * b merges the control flow of an if-then-else. We may not remove
253 * the 'then' _and_ the 'else' block of an 'if' if there is a Phi
254 * node in b, even if both are empty. The destruction of this Phi
255 * requires that a copy is added before the merge. We have to
256 * keep one of the case blocks to place the copies in.
258 * To perform the test for pos, we must regard predecessors before pos
259 * as already removed.
261 static int test_whether_dispensable(ir_node *b, int pos) {
262 int i, j, n_preds = 1;
263 ir_node *pred = get_Block_cfgpred_block(b, pos);
265 /* Bad blocks will be optimized away, so we don't need space for them */
266 if (is_Block_dead(pred))
269 if (get_Block_block_visited(pred) + 1
270 < get_irg_block_visited(current_ir_graph)) {
272 if (!get_opt_optimize() || !get_opt_control_flow_strong_simplification()) {
273 /* Mark block so that is will not be removed: optimization is turned off. */
274 set_Block_block_visited(pred, get_irg_block_visited(current_ir_graph)-1);
278 /* Seems to be empty. At least we detected this in collect_nodes. */
279 if (!get_irn_link(b)) {
280 /* There are no Phi nodes ==> all predecessors are dispensable. */
281 n_preds = get_Block_n_cfgpreds(pred);
283 /* b's pred blocks and pred's pred blocks must be pairwise disjunct.
284 Handle all pred blocks with preds < pos as if they were already removed. */
285 for (i = 0; i < pos; i++) {
286 ir_node *b_pred = get_Block_cfgpred_block(b, i);
287 if (! is_Block_dead(b_pred) &&
288 get_Block_block_visited(b_pred) + 1
289 < get_irg_block_visited(current_ir_graph)) {
290 for (j = 0; j < get_Block_n_cfgpreds(b_pred); j++) {
291 ir_node *b_pred_pred = get_Block_cfgpred_block(b_pred, j);
292 if (is_pred_of(b_pred_pred, pred))
293 goto non_dispensable;
296 if (is_pred_of(b_pred, pred))
297 goto non_dispensable;
300 for (i = pos +1; i < get_Block_n_cfgpreds(b); i++) {
301 ir_node *b_pred = get_Block_cfgpred_block(b, i);
302 if (is_pred_of(b_pred, pred))
303 goto non_dispensable;
305 /* if we get here, the block is dispensable */
306 n_preds = get_Block_n_cfgpreds(pred);
313 set_Block_block_visited(pred, get_irg_block_visited(current_ir_graph)-1);
318 * Store to defer the exchanged of Phi nodes.
320 typedef struct _defer_ex_phi {
321 ir_node *phi_pred; /**< the previous Phi node that will be replaced */
322 ir_node *phi; /**< the new Phi node that replaces phi_pred */
326 * handle pre-optimized table switch Cond's
328 static void handle_switch_cond(ir_node *proj) {
329 ir_node *cond = skip_Proj(proj);
335 sel = get_Cond_selector(cond);
336 if (mode_is_int(get_irn_mode(sel))) {
337 /* check for table switch that could be optimized */
338 ir_node *proj1 = get_irn_link(cond);
339 ir_node *proj2 = get_irn_link(proj1);
342 blk = is_Bad(cond) ? cond : get_nodes_block(cond);
345 /* this Cond has only one Proj: must be the defProj */
346 assert(get_Cond_defaultProj(cond) == get_Proj_proj(proj1));
347 /* convert it into a Jmp */
348 jmp = new_r_Jmp(current_ir_graph, blk);
349 exchange(proj1, jmp);
351 else if (get_irn_link(proj2) == NULL) {
352 /* We have two Proj's here. Check if the Cond has
353 a constant argument */
354 tarval *tv = value_of(sel);
356 if (tv != tarval_bad) {
357 /* we have a constant switch */
358 long num = get_tarval_long(tv);
359 long def_num = get_Cond_defaultProj(cond);
361 if (def_num == get_Proj_proj(proj1)) {
362 /* first one is the defProj */
363 if (num == get_Proj_proj(proj2)) {
364 jmp = new_r_Jmp(current_ir_graph, blk);
365 exchange(proj2, jmp);
366 exchange(proj1, new_Bad());
369 else if (def_num == get_Proj_proj(proj2)) {
370 /* second one is the defProj */
371 if (num == get_Proj_proj(proj1)) {
372 jmp = new_r_Jmp(current_ir_graph, blk);
373 exchange(proj1, jmp);
374 exchange(proj2, new_Bad());
378 /* neither: strange, Cond was not optimized so far */
379 if (num == get_Proj_proj(proj1)) {
380 jmp = new_r_Jmp(current_ir_graph, blk);
381 exchange(proj1, jmp);
382 exchange(proj2, new_Bad());
384 else if (num == get_Proj_proj(proj2)) {
385 jmp = new_r_Jmp(current_ir_graph, blk);
386 exchange(proj2, jmp);
387 exchange(proj1, new_Bad());
396 * This method removed Bad cf predecessors from Blocks and Phis, and removes
397 * empty blocks. A block is empty if it only contains Phi and Jmp nodes.
399 * We first adapt Phi nodes, then Block nodes, as we need the old ins
400 * of the Block to adapt the Phi nodes. We do this by computing new
401 * in arrays, and then replacing the old ones. So far we compute new in arrays
402 * for all nodes, not regarding whether there is a possibility for optimization.
404 * For each predecessor p of a Block b there are three cases:
405 * -1. The predecessor p is a Bad node: just skip it. The in array of b shrinks by one.
406 * -2. The predecessor p is empty. Remove p. All predecessors of p are now
408 * -3. The predecessor p is a block containing useful code. Just keep p as is.
410 * For Phi nodes f we have to check the conditions at the Block of f.
411 * For cases 1 and 3 we proceed as for Blocks. For case 2 we can have two
413 * -2a: The old predecessor of the Phi f is a Phi pred_f IN THE BLOCK REMOVED. In this
414 * case we proceed as for blocks. We remove pred_f. All
415 * predecessors of pred_f now are predecessors of f.
416 * -2b: The old predecessor of f is NOT in the block removed. It might be a Phi, too.
417 * We have to replicate f for each predecessor of the removed block. Or, with
418 * other words, the removed predecessor block has exactly one predecessor.
420 * Further there is a special case for self referencing blocks:
423 * then_b else_b then_b else_b
429 * | | | === optimized to ===> \ | | |
436 * If there is a Phi in pred_b, but we remove pred_b, we have to generate a
437 * Phi in loop_b, that has the ins of the Phi in pred_b and a self referencing
439 * @@@ It is negotiable whether we should do this ... there might end up a copy
440 * from the Phi in the loop when removing the Phis.
442 static void optimize_blocks(ir_node *b, void *env) {
443 int i, j, k, n, max_preds, n_preds, p_preds = -1;
446 defer_ex_phi *defers;
448 /* Count the number of predecessor if this block is merged with pred blocks
451 for (i = 0, k = get_Block_n_cfgpreds(b); i < k; ++i) {
452 handle_switch_cond(get_Block_cfgpred(b, i));
453 max_preds += test_whether_dispensable(b, i);
455 in = xmalloc(max_preds * sizeof(*in));
457 defers = NEW_ARR_F(defer_ex_phi, 0);
460 printf(" working on "); DDMN(b);
461 for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
462 pred = get_nodes_block(get_Block_cfgpred(b, i));
463 if (is_Bad(get_Block_cfgpred(b, i))) {
464 printf(" removing Bad %i\n ", i);
465 } else if (get_Block_block_visited(pred) +1
466 < get_irg_block_visited(current_ir_graph)) {
467 printf(" removing pred %i ", i); DDMN(pred);
468 } else { printf(" Nothing to do for "); DDMN(pred); }
470 * end Debug output -*/
472 /*- Fix the Phi nodes of the current block -*/
473 for (phi = get_irn_link(b); phi; ) {
474 assert(get_irn_op(phi) == op_Phi);
476 /* Find the new predecessors for the Phi */
478 for (i = 0, n = get_Block_n_cfgpreds(b); i < n; ++i) {
479 pred = get_Block_cfgpred_block(b, i);
481 if (is_Bad(get_Block_cfgpred(b, i))) {
482 /* case Phi 1: Do nothing */
484 else if (get_Block_block_visited(pred) + 1
485 < get_irg_block_visited(current_ir_graph)) {
486 /* case Phi 2: It's an empty block and not yet visited. */
487 ir_node *phi_pred = get_Phi_pred(phi, i);
489 for (j = 0, k = get_Block_n_cfgpreds(pred); j < k; j++) {
490 /* because of breaking loops, not all predecessors are Bad-clean,
491 * so we must check this here again */
492 if (! is_Bad(get_Block_cfgpred(pred, j))) {
493 if (get_nodes_block(phi_pred) == pred) {
495 assert(get_irn_op(phi_pred) == op_Phi); /* Block is empty!! */
497 in[p_preds++] = get_Phi_pred(phi_pred, j);
500 in[p_preds++] = phi_pred;
505 /* The Phi_pred node is replaced now if it is a Phi.
507 Somehow the removed Phi node can be used legally in loops.
508 Therefore we replace the old phi by the new one.
509 This must be done _AFTER_ all Phis are optimized, or
510 it will fail if two Phis use the same pred_Phi.
512 FIXME: Is the following true? We ALWAYS replace it by the new one.
514 Further we have to remove the old Phi node by replacing it
515 by Bad. Else it will remain in the keep alive array of End
516 and cause illegal situations. So if there is no loop, we should
519 if (get_nodes_block(phi_pred) == pred) {
521 /* remove the Phi as it might be kept alive. Further there
522 might be other users. */
523 for (i = ARR_LEN(defers) - 1; i >= 0; --i)
524 if (defers[i].phi_pred == phi_pred)
528 /* we have a new replacement */
531 elem.phi_pred = phi_pred;
533 ARR_APP1(defer_ex_phi, defers, elem);
538 in[p_preds++] = get_Phi_pred(phi, i);
541 assert(p_preds <= max_preds);
545 /* By removal of Bad ins the Phi might be degenerated. */
546 exchange(phi, in[0]);
548 set_irn_in(phi, p_preds, in);
550 phi = get_irn_link(phi);
553 /* now, exchange all Phis */
554 for (i = ARR_LEN(defers) - 1; i >= 0; --i) {
555 exchange(defers[i].phi_pred, defers[i].phi);
559 /*- This happens only if merge between loop backedge and single loop entry.
560 See special case above. -*/
561 for (k = 0, n = get_Block_n_cfgpreds(b); k < n; ++k) {
562 pred = get_nodes_block(get_Block_cfgpred(b, k));
564 if (get_Block_block_visited(pred) + 1 < get_irg_block_visited(current_ir_graph)) {
565 /* we found a predecessor block at position k that will be removed */
566 for (phi = get_irn_link(pred); phi;) {
568 * the previous phase may already changed the phi, and even
569 * removed it at all, so check here if this node is still a phi
571 if (get_irn_op(phi) == op_Phi) {
574 /* move this phi from the predecessor into the block b */
575 set_nodes_block(phi, b);
577 /* first, copy all 0..k-1 predecessors */
578 for (i = 0; i < k; i++) {
579 pred = get_nodes_block(get_Block_cfgpred(b, i));
581 if (is_Bad(get_Block_cfgpred(b, i))) {
583 } else if (get_Block_block_visited(pred) + 1
584 < get_irg_block_visited(current_ir_graph)) {
585 /* It's an empty block and not yet visited. */
586 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
587 /* @@@ Hier brauche ich Schleifeninformation!!! Kontrollflusskante
588 muss Rueckwaertskante sein! (An allen vier in[q_preds] = phi
589 Anweisungen.) Trotzdem tuts bisher!! */
590 if (! is_Bad(get_Block_cfgpred(pred, j)))
598 /* now we are at k, copy the phi predecessors */
599 pred = get_nodes_block(get_Block_cfgpred(b, k));
600 for (i = 0; i < get_Phi_n_preds(phi); i++) {
601 if (! is_Bad(get_Block_cfgpred(pred, i)))
602 in[q_preds++] = get_Phi_pred(phi, i);
605 /* and now all the rest */
606 for (i = k+1; i < get_Block_n_cfgpreds(b); i++) {
607 pred = get_nodes_block(get_Block_cfgpred(b, i));
609 if (is_Bad(get_Block_cfgpred(b, i))) {
611 } else if (get_Block_block_visited(pred) +1
612 < get_irg_block_visited(current_ir_graph)) {
613 /* It's an empty block and not yet visited. */
614 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
615 if (! is_Bad(get_Block_cfgpred(pred, j)))
625 exchange(phi, in[0]);
627 set_irn_in(phi, q_preds, in);
629 assert(q_preds <= max_preds);
630 // assert(p_preds == q_preds && "Wrong Phi Fix");
632 phi = get_irn_link(phi);
637 /*- Fix the block -*/
639 for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
640 pred = get_nodes_block(get_Block_cfgpred(b, i));
642 if (is_Bad(get_Block_cfgpred(b, i))) {
643 /* case 1: Do nothing */
644 } else if (get_Block_block_visited(pred) +1
645 < get_irg_block_visited(current_ir_graph)) {
646 /* case 2: It's an empty block and not yet visited. */
647 assert(get_Block_n_cfgpreds(b) > 1);
648 /* Else it should be optimized by equivalent_node. */
649 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
650 ir_node *pred_block = get_Block_cfgpred(pred, j);
652 /* because of breaking loops, not all predecessors are Bad-clean,
653 * so we must check this here again */
654 if (! is_Bad(pred_block))
655 in[n_preds++] = pred_block;
657 /* Remove block as it might be kept alive. */
658 exchange(pred, b/*new_Bad()*/);
661 in[n_preds++] = get_Block_cfgpred(b, i);
664 assert(n_preds <= max_preds);
666 set_irn_in(b, n_preds, in);
668 assert(get_irn_link(b) == NULL || (n_preds == p_preds && "Wrong Phi Fix"));
674 /* Optimizations of the control flow that also require changes of Phi nodes.
676 * This optimization performs two passes over the graph.
678 * The first pass collects all Phi nodes in a link list in the block
679 * nodes. Further it performs simple control flow optimizations.
680 * Finally it marks all blocks that do not contain useful
681 * computations, i.e., these blocks might be removed.
683 * The second pass performs the optimizations intended by this algorithm.
684 * It walks only over block nodes and adapts these and the Phi nodes in these blocks,
685 * which it finds in a linked list computed by the first pass.
687 * We use the block_visited flag to mark empty blocks in the first
689 * @@@ It would be better to add a struct in the link field
690 * that keeps the Phi list and the mark. Place it on an obstack, as
691 * we will lose blocks and thereby generate memory leaks.
693 void optimize_cf(ir_graph *irg) {
696 ir_node *end = get_irg_end(irg);
697 ir_graph *rem = current_ir_graph;
698 irg_dom_state dom_state = get_irg_dom_state(current_ir_graph);
699 current_ir_graph = irg;
701 edges_deactivate(irg);
703 /* if the graph is not pinned, we cannot determine empty blocks */
704 assert(get_irg_pinned(irg) != op_pin_state_floats &&
705 "Control flow optimization need a pinned graph");
707 /* Handle graph state */
708 assert(get_irg_phase_state(irg) != phase_building);
709 set_irg_outs_inconsistent(current_ir_graph);
710 set_irg_extblk_inconsistent(current_ir_graph);
711 set_irg_loopinfo_inconsistent(current_ir_graph);
712 set_irg_doms_inconsistent(current_ir_graph);
714 if (dom_state == dom_consistent && get_opt_optimize() && get_opt_unreachable_code()) {
717 /* we have dominance info, we can kill dead block */
718 irg_block_walk_graph(irg, NULL, remove_dead_block_cf, NULL);
720 /* fix the keep-alives */
721 end = get_irg_end(irg);
722 for (i = 0, n = get_End_n_keepalives(end); i < n; ++i) {
723 ir_node *ka = get_End_keepalive(end, i);
726 /* do NOT keep dead blocks */
727 if (get_Block_dom_depth(ka) == -1)
728 set_End_keepalive(end, i, new_Bad());
730 else if (is_Block_dead(get_nodes_block(ka)) ||
731 get_Block_dom_depth(get_nodes_block(ka)) == -1)
732 /* do NOT keep nodes in dead blocks */
733 set_End_keepalive(end, i, new_Bad());
736 irg_block_walk_graph(irg, NULL, remove_senseless_conds, NULL);
738 /* Use block visited flag to mark non-empty blocks. */
739 inc_irg_block_visited(irg);
740 irg_walk(end, merge_blocks, collect_nodes, NULL);
742 /* Optimize the standard code. */
743 irg_block_walk(get_irg_end_block(irg), optimize_blocks, NULL, NULL);
745 /* Walk all keep alives, optimize them if block, add to new in-array
746 for end if useful. */
747 n = get_End_n_keepalives(end);
749 NEW_ARR_A (ir_node *, in, n);
750 inc_irg_visited(irg);
752 /* fix the keep alive */
753 for (i = j = 0; i < n; i++) {
754 ir_node *ka = get_End_keepalive(end, i);
756 if (irn_not_visited(ka)) {
757 ir_op *op = get_irn_op(ka);
759 if ((op == op_Block) && Block_not_block_visited(ka)) {
760 set_irg_block_visited(irg, /* Don't walk all the way to Start. */
761 get_irg_block_visited(irg)-1);
762 irg_block_walk(ka, optimize_blocks, NULL, NULL);
763 mark_irn_visited(ka);
765 } else if (op == op_Phi) {
766 mark_irn_visited(ka);
767 if (! is_Block_dead(get_nodes_block(ka)))
769 } else if (is_op_keep(op)) {
770 mark_irn_visited(ka);
771 if (! is_Block_dead(get_nodes_block(ka)))
777 set_End_keepalives(end, j, in);
779 /* the verifier doesn't work yet with floating nodes */
780 if (get_irg_pinned(irg) == op_pin_state_pinned) {
781 /* after optimize_cf(), only Bad data flow may remain. */
782 if (irg_vrfy_bads(irg, BAD_DF | BAD_BLOCK | TUPLE)) {
783 dump_ir_block_graph(irg, "-vrfy-cf");
784 dump_ir_graph(irg, "-vrfy-cf");
785 fprintf(stderr, "VRFY_BAD in optimize_cf()\n");
789 current_ir_graph = rem;