2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Control flow optimizations.
23 * @author Goetz Lindenmaier, Michael Beck, Sebastian Hack
26 * Removes Bad control flow predecessors and empty blocks. A block is empty
27 * if it contains only a Jmp node. Blocks can only be removed if they are not
28 * needed for the semantics of Phi nodes. Further, we NEVER remove labeled
29 * blocks (even if we could move the label).
33 #include "iroptimize.h"
40 #include "irgraph_t.h"
54 #include "irbackedge_t.h"
59 #include "irphase_t.h"
61 #include "iropt_dbg.h"
63 /** An environment for merge_blocks and collect nodes. */
64 typedef struct merge_env {
65 bool changed; /**< Set if the graph was changed. */
66 bool phis_moved; /**< Set if Phi nodes were moved. */
69 static void set_Block_removable(ir_node *block, bool removable)
71 set_Block_mark(block, removable);
74 static bool is_Block_removable(ir_node *block)
76 return get_Block_mark(block);
79 static bool is_switch_Cond(ir_node *cond) {
80 ir_node *sel = get_Cond_selector(cond);
81 return get_irn_mode(sel) != mode_b;
84 static void clear_link(ir_node *node, void *ctx)
87 set_irn_link(node, NULL);
89 set_Block_removable(node, true);
93 * Collects all Phi nodes in link list of Block.
94 * Marks all blocks "non_removable" if they contain a node other
95 * than Jmp (and Proj).
96 * Links all Proj nodes to their predecessors.
97 * Collects all switch-Conds in a list.
99 static void collect_nodes(ir_node *n, void *ctx)
101 ir_node ***switch_conds = (ir_node***)ctx;
104 /* Collect Phi nodes to compact ins along with block's ins. */
105 ir_node *block = get_nodes_block(n);
106 set_irn_link(n, get_irn_link(block));
107 set_irn_link(block, n);
108 } else if (is_Block(n)) {
109 if (has_Block_entity(n))
110 set_Block_removable(n, false);
112 } else if (!is_Jmp(n)) { /* Check for non-empty block. */
113 ir_node *block = get_nodes_block(n);
114 set_Block_removable(block, false);
117 /* link Proj nodes */
118 ir_node *pred = get_Proj_pred(n);
119 set_irn_link(n, get_irn_link(pred));
120 set_irn_link(pred, n);
121 } else if (is_Cond(n) && is_switch_Cond(n)) {
122 /* found a switch-Cond, collect */
123 ARR_APP1(ir_node*, *switch_conds, n);
128 /** Returns true if pred is predecessor of block. */
129 static bool is_pred_of(ir_node *pred, ir_node *b)
133 for (i = get_Block_n_cfgpreds(b) - 1; i >= 0; --i) {
134 ir_node *b_pred = get_Block_cfgpred_block(b, i);
141 /** Test whether we can optimize away pred block pos of b.
143 * @param b A block node.
144 * @param pos The position of the predecessor block to judge about.
146 * @returns The number of predecessors
148 * The test is rather tricky.
150 * The situation is something like the following:
159 * b merges the control flow of an if-then-else. We may not remove
160 * the 'then' _and_ the 'else' block of an 'if' if there is a Phi
161 * node in b, even if both are empty. The destruction of this Phi
162 * requires that a copy is added before the merge. We have to
163 * keep one of the case blocks to place the copies in.
165 * To perform the test for pos, we must regard predecessors before pos
166 * as already removed.
168 static unsigned test_whether_dispensable(ir_node *b, int pos)
170 ir_node *pred = get_Block_cfgpred(b, pos);
171 ir_node *predb = get_nodes_block(pred);
173 if (is_Bad(pred) || !is_Block_removable(predb))
176 /* can't remove self-loops */
178 goto non_dispensable;
179 if (is_unknown_jump(pred))
180 goto non_dispensable;
182 /* Seems to be empty. At least we detected this in collect_nodes. */
183 if (get_irn_link(b) != NULL) {
184 int n_cfgpreds = get_Block_n_cfgpreds(b);
186 /* there are Phi nodes */
188 /* b's pred blocks and pred's pred blocks must be pairwise disjunct.
189 * Handle all pred blocks with preds < pos as if they were already
191 for (i = 0; i < pos; i++) {
192 ir_node *other_pred = get_Block_cfgpred(b, i);
193 ir_node *other_predb = get_nodes_block(other_pred);
194 if (is_Bad(other_pred))
196 if (is_Block_removable(other_predb)
197 && !Block_block_visited(other_predb)) {
199 for (j = get_Block_n_cfgpreds(other_predb) - 1; j >= 0; --j) {
200 ir_node *other_predpred
201 = get_Block_cfgpred_block(other_predb, j);
202 if (is_pred_of(other_predpred, predb))
203 goto non_dispensable;
205 } else if (is_pred_of(other_predb, predb)) {
206 goto non_dispensable;
209 for (i = pos+1; i < n_cfgpreds; i++) {
210 ir_node *other_predb = get_Block_cfgpred_block(b, i);
211 if (is_pred_of(other_predb, predb))
212 goto non_dispensable;
215 /* we will not dispense already visited blocks */
216 if (Block_block_visited(predb))
218 /* if we get here, the block is dispensable, count useful preds */
219 return get_irn_arity(predb);
222 set_Block_removable(predb, false);
227 * This method removes empty blocks. A block is empty if it only contains Phi
230 * We first adapt Phi nodes, then Block nodes, as we need the old ins
231 * of the Block to adapt the Phi nodes. We do this by computing new
232 * in arrays, and then replacing the old ones. So far we compute new in arrays
233 * for all nodes, not regarding whether there is a possibility for optimization.
235 * For each predecessor p of a Block b there are three cases:
236 * - The predecessor p is a Bad node: just skip it. The in array of b shrinks
238 * - The predecessor p is empty. Remove p. All predecessors of p are now
240 * - The predecessor p is a block containing useful code. Just keep p as is.
242 * For Phi nodes f we have to check the conditions at the Block of f.
243 * For cases 1 and 3 we proceed as for Blocks. For case 2 we can have two
245 * -2a: The old predecessor of the Phi f is a Phi pred_f IN THE BLOCK REMOVED.
246 * In this case we proceed as for blocks. We remove pred_f. All
247 * predecessors of pred_f now are predecessors of f.
248 * -2b: The old predecessor of f is NOT in the block removed. It might be a Phi
249 * too. We have to replicate f for each predecessor of the removed block.
250 * Or, with other words, the removed predecessor block has exactly one
253 * Further there is a special case for self referencing blocks:
256 * then_b else_b then_b else_b
262 * | | | === optimized to ===> \ | | |
269 * If there is a Phi in pred_b, but we remove pred_b, we have to generate a
270 * Phi in loop_b, that has the ins of the Phi in pred_b and a self referencing
273 static void optimize_blocks(ir_node *b, void *ctx)
275 int i, j, k, n, max_preds, n_preds, p_preds = -1;
276 ir_node *pred, *phi, *next;
278 merge_env *env = (merge_env*)ctx;
280 /* Count the number of predecessor if this block is merged with pred blocks
283 for (i = 0, k = get_Block_n_cfgpreds(b); i < k; ++i) {
284 max_preds += test_whether_dispensable(b, i);
286 in = XMALLOCN(ir_node*, max_preds);
288 /*- Fix the Phi nodes of the current block -*/
289 for (phi = (ir_node*)get_irn_link(b); phi != NULL; phi = (ir_node*)next) {
291 next = (ir_node*)get_irn_link(phi);
293 /* Find the new predecessors for the Phi */
295 for (i = 0, n = get_Block_n_cfgpreds(b); i < n; ++i) {
296 ir_graph *irg = get_irn_irg(b);
297 pred = get_Block_cfgpred_block(b, i);
300 /* case Phi 1: maintain Bads, as somebody else is responsible to remove them */
301 in[p_preds++] = new_r_Bad(irg, get_irn_mode(phi));
302 } else if (is_Block_removable(pred) && !Block_block_visited(pred)) {
303 /* case Phi 2: It's an empty block and not yet visited. */
304 ir_node *phi_pred = get_Phi_pred(phi, i);
306 for (j = 0, k = get_Block_n_cfgpreds(pred); j < k; j++) {
307 ir_node *pred_pred = get_Block_cfgpred(pred, j);
309 if (is_Bad(pred_pred)) {
310 in[p_preds++] = new_r_Bad(irg, get_irn_mode(phi));
314 if (get_nodes_block(phi_pred) == pred) {
316 assert(is_Phi(phi_pred)); /* Block is empty!! */
318 in[p_preds++] = get_Phi_pred(phi_pred, j);
321 in[p_preds++] = phi_pred;
326 in[p_preds++] = get_Phi_pred(phi, i);
329 assert(p_preds == max_preds);
333 exchange(phi, in[0]);
335 set_irn_in(phi, p_preds, in);
339 /*- This happens only if merge between loop backedge and single loop entry.
340 Moreover, it is only needed if predb is the direct dominator of b,
341 else there can be no uses of the Phi's in predb ... -*/
342 for (k = 0, n = get_Block_n_cfgpreds(b); k < n; ++k) {
343 ir_node *pred = get_Block_cfgpred(b, k);
344 ir_node *predb = get_nodes_block(pred);
348 if (is_Block_removable(predb) && !Block_block_visited(predb)) {
351 /* we found a predecessor block at position k that will be removed */
352 for (phi = (ir_node*)get_irn_link(predb); phi; phi = next_phi) {
354 next_phi = (ir_node*)get_irn_link(phi);
358 if (get_Block_idom(b) != predb) {
359 /* predb is not the dominator. There can't be uses of pred's Phi nodes, kill them .*/
360 ir_graph *irg = get_irn_irg(b);
361 ir_mode *mode = get_irn_mode(phi);
362 exchange(phi, new_r_Bad(irg, mode));
364 /* predb is the direct dominator of b. There might be uses of the Phi nodes from
365 predb in further block, so move this phi from the predecessor into the block b */
366 set_nodes_block(phi, b);
367 set_irn_link(phi, get_irn_link(b));
368 set_irn_link(b, phi);
369 env->phis_moved = true;
371 /* first, copy all 0..k-1 predecessors */
372 for (i = 0; i < k; i++) {
373 pred = get_Block_cfgpred_block(b, i);
376 ir_graph *irg = get_irn_irg(b);
377 ir_mode *mode = get_irn_mode(phi);
378 in[q_preds++] = new_r_Bad(irg, mode);
379 } else if (is_Block_removable(pred) && !Block_block_visited(pred)) {
380 /* It's an empty block and not yet visited. */
381 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
382 if (! is_Bad(get_Block_cfgpred(pred, j))) {
385 ir_graph *irg = get_irn_irg(b);
386 ir_mode *mode = get_irn_mode(phi);
387 in[q_preds++] = new_r_Bad(irg, mode);
395 /* now we are at k, copy the phi predecessors */
396 pred = get_nodes_block(get_Block_cfgpred(b, k));
397 for (i = 0; i < get_Phi_n_preds(phi); i++) {
398 in[q_preds++] = get_Phi_pred(phi, i);
401 /* and now all the rest */
402 for (i = k+1; i < get_Block_n_cfgpreds(b); i++) {
403 pred = get_Block_cfgpred_block(b, i);
406 ir_graph *irg = get_irn_irg(b);
407 ir_mode *mode = get_irn_mode(phi);
408 in[q_preds++] = new_r_Bad(irg, mode);
409 } else if (is_Block_removable(pred) && !Block_block_visited(pred)) {
410 /* It's an empty block and not yet visited. */
411 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
412 if (! is_Bad(get_Block_cfgpred(pred, j))) {
415 ir_graph *irg = get_irn_irg(b);
416 ir_mode *mode = get_irn_mode(phi);
417 in[q_preds++] = new_r_Bad(irg, mode);
427 exchange(phi, in[0]);
429 set_irn_in(phi, q_preds, in);
432 assert(q_preds <= max_preds);
433 // assert(p_preds == q_preds && "Wrong Phi Fix");
439 /*- Fix the block -*/
441 for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
442 ir_node *pred = get_Block_cfgpred(b, i);
443 ir_node *predb = get_nodes_block(pred);
444 ir_graph *irg = get_irn_irg(pred);
446 /* case 1: Bad predecessor */
448 in[n_preds++] = new_r_Bad(irg, mode_X);
451 if (is_Block_removable(predb) && !Block_block_visited(predb)) {
452 /* case 2: It's an empty block and not yet visited. */
453 for (j = 0; j < get_Block_n_cfgpreds(predb); j++) {
454 ir_node *predpred = get_Block_cfgpred(predb, j);
456 if (is_Bad(predpred)) {
457 in[n_preds++] = new_r_Bad(irg, mode_X);
461 in[n_preds++] = predpred;
463 /* Remove block+jump as it might be kept alive. */
464 exchange(pred, new_r_Bad(get_irn_irg(b), mode_X));
465 exchange(predb, new_r_Bad(get_irn_irg(b), mode_BB));
468 in[n_preds++] = pred;
471 assert(n_preds == max_preds);
473 set_irn_in(b, n_preds, in);
476 /* see if phi-fix was correct */
477 assert(get_irn_link(b) == NULL || p_preds == -1 || (n_preds == p_preds));
482 * Optimize table-switch Conds.
484 * @param cond the switch-Cond
485 * @return true if the switch-Cond was optimized
487 static bool handle_switch_cond(ir_node *cond)
489 ir_node *sel = get_Cond_selector(cond);
490 ir_node *proj1 = (ir_node*)get_irn_link(cond);
491 ir_node *proj2 = (ir_node*)get_irn_link(proj1);
492 ir_node *blk = get_nodes_block(cond);
494 /* exactly 1 Proj on the Cond node: must be the defaultProj */
496 ir_node *jmp = new_r_Jmp(blk);
497 assert(get_Cond_default_proj(cond) == get_Proj_proj(proj1));
498 /* convert it into a Jmp */
499 exchange(proj1, jmp);
503 /* handle Cond nodes with constant argument. In this case the localopt rules
504 * should have killed all obviously impossible cases.
505 * So the only case left to handle here is 1 defaultProj + 1case
506 * (this one case should be the one taken) */
507 if (get_irn_link(proj2) == NULL) {
508 ir_tarval *tv = value_of(sel);
510 if (tv != tarval_bad) {
511 /* we have a constant switch */
512 long num = get_tarval_long(tv);
513 long def_num = get_Cond_default_proj(cond);
514 ir_graph *irg = get_irn_irg(cond);
515 ir_node *bad = new_r_Bad(irg, mode_X);
517 if (def_num == get_Proj_proj(proj1)) {
518 /* first one is the defProj */
519 if (num == get_Proj_proj(proj2)) {
520 ir_node *jmp = new_r_Jmp(blk);
521 exchange(proj2, jmp);
522 exchange(proj1, bad);
525 } else if (def_num == get_Proj_proj(proj2)) {
526 /* second one is the defProj */
527 if (num == get_Proj_proj(proj1)) {
528 ir_node *jmp = new_r_Jmp(blk);
529 exchange(proj1, jmp);
530 exchange(proj2, bad);
534 /* neither: strange, Cond was not optimized so far */
535 if (num == get_Proj_proj(proj1)) {
536 ir_node *jmp = new_r_Jmp(blk);
537 exchange(proj1, jmp);
538 exchange(proj2, bad);
540 } else if (num == get_Proj_proj(proj2)) {
541 ir_node *jmp = new_r_Jmp(blk);
542 exchange(proj2, jmp);
543 exchange(proj1, bad);
552 static bool get_phase_flag(ir_phase *block_info, ir_node *block, int offset) {
553 return ((int)phase_get_irn_data(block_info, block)) & (1<<offset);
555 static void set_phase_flag(ir_phase *block_info, ir_node *block, int offset) {
556 int data = (int)phase_get_irn_data(block_info, block);
558 phase_set_irn_data(block_info, block, (void*)data);
561 static bool has_operations(ir_phase *block_info, ir_node *block) {
562 return get_phase_flag(block_info, block, 1);
564 static void set_has_operations(ir_phase *block_info, ir_node *block) {
565 set_phase_flag(block_info, block, 1);
568 static bool has_phis(ir_phase *block_info, ir_node *block) {
569 return get_phase_flag(block_info, block, 2);
571 static void set_has_phis(ir_phase *block_info, ir_node *block) {
572 set_phase_flag(block_info, block, 2);
575 static bool is_unknown_jump_target(ir_phase *block_info, ir_node *block) {
576 return get_phase_flag(block_info, block, 3);
578 static void set_is_unknown_jump_target(ir_phase *block_info, ir_node *block) {
579 set_phase_flag(block_info, block, 3);
583 * Optimize Conds, where true and false jump to the same block into a Jmp
587 * projA projB => Jmp Bad
591 static bool optimize_pred_cond(ir_node *block, int i, int j)
593 ir_node *projA, *projB, *cond, *pred_block, *jmp, *bad;
596 projA = get_Block_cfgpred(block, i);
597 if (!is_Proj(projA)) return false;
598 projB = get_Block_cfgpred(block, j);
599 if (!is_Proj(projB)) return false;
600 cond = get_Proj_pred(projA);
601 if (!is_Cond(cond)) return false;
603 if (cond != get_Proj_pred(projB)) return false;
604 if (is_switch_Cond(cond)) return false;
606 /* cond should actually be a Jmp */
607 pred_block = get_nodes_block(cond);
608 jmp = new_r_Jmp(pred_block);
609 bad = new_r_Bad(get_irn_irg(block), mode_X);
611 assert(projA != projB);
612 exchange(projA, jmp);
613 exchange(projB, bad);
617 static void compute_block_info(ir_node *n, void *x)
619 ir_phase *block_info = (ir_phase *)x;
622 int i, max = get_Block_n_cfgpreds(n);
623 for (i=0; i<max; i++) {
624 ir_node *pred = get_Block_cfgpred(n,i);
625 if (is_unknown_jump(pred)) {
626 set_is_unknown_jump_target(block_info, n);
629 } else if (is_Phi(n)) {
630 ir_node *block = get_nodes_block(n);
631 set_has_phis(block_info, block);
632 } else if (is_Jmp(n) || is_Cond(n) || is_Cmp(n) || is_Proj(n)) {
635 ir_node *block = get_nodes_block(n);
636 set_has_operations(block_info, block);
640 typedef struct skip_env {
645 static void optimize_conds(ir_node *b, void *x)
647 skip_env *env = (skip_env*)x;
649 int n_preds = get_Block_n_cfgpreds(b);
651 if (has_phis(env->phase,b)) return;
653 /* optimize Cond predecessors (might produce Bad predecessors) */
654 for (i = 0; i < n_preds; i++) {
655 for (j = i+1; j < n_preds; j++) {
656 optimize_pred_cond(b, i, j);
661 static void remove_empty_blocks(ir_node *b, void *x)
663 skip_env *env = (skip_env*)x;
665 int n_preds = get_Block_n_cfgpreds(b);
667 for (i = 0; i < n_preds; ++i) {
668 ir_node *jmp, *jmp_block, *pred, *pred_block;
670 jmp = get_Block_cfgpred(b, i);
671 if (!is_Jmp(jmp)) continue;
672 if (is_unknown_jump(jmp)) continue;
673 jmp_block = get_nodes_block(jmp);
674 if (is_unknown_jump_target(env->phase, jmp_block)) continue;
675 if (has_operations(env->phase,jmp_block)) continue;
676 /* jmp_block is an empty block! */
678 if (get_Block_n_cfgpreds(jmp_block) != 1) continue;
679 pred = get_Block_cfgpred(jmp_block, 0);
683 /* cleanup: jmp_block might have a Keep edge! */
684 pred_block = get_nodes_block(pred);
685 exchange(jmp_block, pred_block);
690 * Some cfg optimizations, which do not touch Phi nodes */
691 static void cfgopt_ignoring_phis(ir_graph *irg) {
692 ir_phase *block_info = new_phase(irg, NULL);
693 skip_env env = { false, block_info };
695 irg_walk_graph(irg, compute_block_info, NULL, block_info);
700 /* Conds => Jmp optimization; might produce empty blocks */
701 irg_block_walk_graph(irg, optimize_conds, NULL, &env);
703 /* Remove empty blocks */
704 irg_block_walk_graph(irg, remove_empty_blocks, NULL, &env);
706 set_irg_doms_inconsistent(irg);
707 /* Removing blocks might enable more Cond optimizations */
714 phase_free(block_info);
717 /* Optimizations of the control flow that also require changes of Phi nodes. */
718 void optimize_cf(ir_graph *irg)
722 ir_node *end = get_irg_end(irg);
726 assert(get_irg_phase_state(irg) != phase_building);
728 /* if the graph is not pinned, we cannot determine empty blocks */
729 assert(get_irg_pinned(irg) != op_pin_state_floats &&
730 "Control flow optimization need a pinned graph");
732 /* FIXME: control flow opt destroys block edges. So edges are deactivated
733 * here. Fix the edges! */
734 edges_deactivate(irg);
736 cfgopt_ignoring_phis(irg);
738 /* we use the mark flag to mark removable blocks */
739 ir_reserve_resources(irg, IR_RESOURCE_BLOCK_MARK | IR_RESOURCE_IRN_LINK);
741 /* The switch Cond optimization might expose unreachable code, so we loop */
744 ir_node **switch_conds = NULL;
746 env.phis_moved = false;
751 * This pass collects all Phi nodes in a link list in the block
752 * nodes. Further it performs simple control flow optimizations.
753 * Finally it marks all blocks that do not contain useful
754 * computations, i.e., these blocks might be removed.
756 switch_conds = NEW_ARR_F(ir_node*, 0);
757 irg_walk(end, clear_link, collect_nodes, &switch_conds);
759 /* handle all collected switch-Conds */
760 length = ARR_LEN(switch_conds);
761 for (i = 0; i < length; ++i) {
762 ir_node *cond = switch_conds[i];
763 env.changed |= handle_switch_cond(cond);
765 DEL_ARR_F(switch_conds);
767 if (!env.changed) break;
769 set_irg_doms_inconsistent(irg);
770 set_irg_extblk_inconsistent(irg);
771 set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
774 /* assert due to collect_nodes:
775 * 1. removable blocks are now marked as such
776 * 2. phi lists are up to date
779 /* Optimize the standard code.
780 * It walks only over block nodes and adapts these and the Phi nodes in these
781 * blocks, which it finds in a linked list computed before.
784 irg_block_walk_graph(irg, optimize_blocks, NULL, &env);
786 new_end = optimize_in_place(end);
787 if (new_end != end) {
788 set_irg_end(irg, new_end);
791 remove_End_Bads_and_doublets(end);
793 ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK | IR_RESOURCE_IRN_LINK);
795 if (env.phis_moved) {
796 /* Bad: when we moved Phi's, we might produce dead Phi nodes
798 Some other phases cannot copy with this, so will them.
800 n = get_End_n_keepalives(end);
802 NEW_ARR_A(ir_node *, in, n);
803 assure_irg_outs(irg);
805 for (i = j = 0; i < n; ++i) {
806 ir_node *ka = get_End_keepalive(end, i);
811 for (k = get_irn_n_outs(ka) - 1; k >= 0; --k) {
812 ir_node *user = get_irn_out(ka, k);
814 if (user != ka && user != end) {
815 /* Is it a real user or just a self loop ? */
825 set_End_keepalives(end, j, in);
832 /* Handle graph state if was changed. */
833 set_irg_doms_inconsistent(irg);
834 set_irg_extblk_inconsistent(irg);
835 set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
839 /* Creates an ir_graph pass for optimize_cf. */
840 ir_graph_pass_t *optimize_cf_pass(const char *name)
842 return def_graph_pass(name ? name : "optimize_cf", optimize_cf);