2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Control flow optimizations.
23 * @author Goetz Lindenmaier, Michael Beck, Sebastian Hack
26 * Removes Bad control flow predecessors and empty blocks. A block is empty
27 * if it contains only a Jmp node. Blocks can only be removed if they are not
28 * needed for the semantics of Phi nodes. Further, we NEVER remove labeled
29 * blocks (even if we could move the label).
33 #include "iroptimize.h"
40 #include "irgraph_t.h"
54 #include "irbackedge_t.h"
59 #include "irphase_t.h"
61 #include "iropt_dbg.h"
63 /** An environment for merge_blocks and collect nodes. */
64 typedef struct merge_env {
65 bool changed; /**< Set if the graph was changed. */
66 bool phis_moved; /**< Set if Phi nodes were moved. */
67 ir_node **switch_conds; /**< Helper list for all found Switch Conds. */
70 static void set_Block_removable(ir_node *block, bool removable)
72 set_Block_mark(block, removable);
75 static bool is_Block_removable(ir_node *block)
77 return get_Block_mark(block);
80 static bool is_switch_Cond(ir_node *cond) {
81 ir_node *sel = get_Cond_selector(cond);
82 return get_irn_mode(sel) != mode_b;
85 static void clear_link(ir_node *node, void *ctx)
88 set_irn_link(node, NULL);
90 set_Block_removable(node, true);
94 * Collects all Phi nodes in link list of Block.
95 * Marks all blocks "non_removable" if they contain a node other
96 * than Jmp (and Proj).
97 * Links all Proj nodes to their predecessors.
98 * Collects all switch-Conds in a list.
100 static void collect_nodes(ir_node *n, void *ctx)
102 merge_env *env = (merge_env*)ctx;
105 /* Collect Phi nodes to compact ins along with block's ins. */
106 ir_node *block = get_nodes_block(n);
107 set_irn_link(n, get_irn_link(block));
108 set_irn_link(block, n);
109 } else if (is_Block(n)) {
110 if (has_Block_entity(n))
111 set_Block_removable(n, false);
113 } else if (!is_Jmp(n)) { /* Check for non-empty block. */
114 ir_node *block = get_nodes_block(n);
115 set_Block_removable(block, false);
118 /* link Proj nodes */
119 ir_node *pred = get_Proj_pred(n);
120 set_irn_link(n, get_irn_link(pred));
121 set_irn_link(pred, n);
122 } else if (is_Cond(n) && is_switch_Cond(n)) {
123 /* found a switch-Cond, collect */
124 ARR_APP1(ir_node*, env->switch_conds, n);
129 /** Returns true if pred is predecessor of block. */
130 static bool is_pred_of(ir_node *pred, ir_node *b)
134 for (i = get_Block_n_cfgpreds(b) - 1; i >= 0; --i) {
135 ir_node *b_pred = get_Block_cfgpred_block(b, i);
142 /** Test whether we can optimize away pred block pos of b.
144 * @param b A block node.
145 * @param pos The position of the predecessor block to judge about.
147 * @returns The number of predecessors
149 * The test is rather tricky.
151 * The situation is something like the following:
160 * b merges the control flow of an if-then-else. We may not remove
161 * the 'then' _and_ the 'else' block of an 'if' if there is a Phi
162 * node in b, even if both are empty. The destruction of this Phi
163 * requires that a copy is added before the merge. We have to
164 * keep one of the case blocks to place the copies in.
166 * To perform the test for pos, we must regard predecessors before pos
167 * as already removed.
169 static unsigned test_whether_dispensable(ir_node *b, int pos)
171 ir_node *pred = get_Block_cfgpred(b, pos);
172 ir_node *predb = get_nodes_block(pred);
174 if (is_Bad(pred) || !is_Block_removable(predb))
177 /* can't remove self-loops */
179 goto non_dispensable;
180 if (is_unknown_jump(pred))
181 goto non_dispensable;
183 /* Seems to be empty. At least we detected this in collect_nodes. */
184 if (get_irn_link(b) != NULL) {
185 int n_cfgpreds = get_Block_n_cfgpreds(b);
187 /* there are Phi nodes */
189 /* b's pred blocks and pred's pred blocks must be pairwise disjunct.
190 * Handle all pred blocks with preds < pos as if they were already
192 for (i = 0; i < pos; i++) {
193 ir_node *other_pred = get_Block_cfgpred(b, i);
194 ir_node *other_predb = get_nodes_block(other_pred);
195 if (is_Bad(other_pred))
197 if (is_Block_removable(other_predb)
198 && !Block_block_visited(other_predb)) {
200 for (j = get_Block_n_cfgpreds(other_predb) - 1; j >= 0; --j) {
201 ir_node *other_predpred
202 = get_Block_cfgpred_block(other_predb, j);
203 if (is_pred_of(other_predpred, predb))
204 goto non_dispensable;
206 } else if (is_pred_of(other_predb, predb)) {
207 goto non_dispensable;
210 for (i = pos+1; i < n_cfgpreds; i++) {
211 ir_node *other_predb = get_Block_cfgpred_block(b, i);
212 if (is_pred_of(other_predb, predb))
213 goto non_dispensable;
216 /* we will not dispense already visited blocks */
217 if (Block_block_visited(predb))
219 /* if we get here, the block is dispensable, count useful preds */
220 return get_irn_arity(predb);
223 set_Block_removable(predb, false);
228 * This method removes empty blocks. A block is empty if it only contains Phi
231 * We first adapt Phi nodes, then Block nodes, as we need the old ins
232 * of the Block to adapt the Phi nodes. We do this by computing new
233 * in arrays, and then replacing the old ones. So far we compute new in arrays
234 * for all nodes, not regarding whether there is a possibility for optimization.
236 * For each predecessor p of a Block b there are three cases:
237 * - The predecessor p is a Bad node: just skip it. The in array of b shrinks
239 * - The predecessor p is empty. Remove p. All predecessors of p are now
241 * - The predecessor p is a block containing useful code. Just keep p as is.
243 * For Phi nodes f we have to check the conditions at the Block of f.
244 * For cases 1 and 3 we proceed as for Blocks. For case 2 we can have two
246 * -2a: The old predecessor of the Phi f is a Phi pred_f IN THE BLOCK REMOVED.
247 * In this case we proceed as for blocks. We remove pred_f. All
248 * predecessors of pred_f now are predecessors of f.
249 * -2b: The old predecessor of f is NOT in the block removed. It might be a Phi
250 * too. We have to replicate f for each predecessor of the removed block.
251 * Or, with other words, the removed predecessor block has exactly one
254 * Further there is a special case for self referencing blocks:
257 * then_b else_b then_b else_b
263 * | | | === optimized to ===> \ | | |
270 * If there is a Phi in pred_b, but we remove pred_b, we have to generate a
271 * Phi in loop_b, that has the ins of the Phi in pred_b and a self referencing
274 static void optimize_blocks(ir_node *b, void *ctx)
276 int i, j, k, n, max_preds, n_preds, p_preds = -1;
277 ir_node *pred, *phi, *next;
279 merge_env *env = (merge_env*)ctx;
281 /* Count the number of predecessor if this block is merged with pred blocks
284 for (i = 0, k = get_Block_n_cfgpreds(b); i < k; ++i) {
285 max_preds += test_whether_dispensable(b, i);
287 in = XMALLOCN(ir_node*, max_preds);
289 /*- Fix the Phi nodes of the current block -*/
290 for (phi = (ir_node*)get_irn_link(b); phi != NULL; phi = (ir_node*)next) {
292 next = (ir_node*)get_irn_link(phi);
294 /* Find the new predecessors for the Phi */
296 for (i = 0, n = get_Block_n_cfgpreds(b); i < n; ++i) {
297 ir_graph *irg = get_irn_irg(b);
298 pred = get_Block_cfgpred_block(b, i);
301 /* case Phi 1: maintain Bads, as somebody else is responsible to remove them */
302 in[p_preds++] = new_r_Bad(irg, get_irn_mode(phi));
303 } else if (is_Block_removable(pred) && !Block_block_visited(pred)) {
304 /* case Phi 2: It's an empty block and not yet visited. */
305 ir_node *phi_pred = get_Phi_pred(phi, i);
307 for (j = 0, k = get_Block_n_cfgpreds(pred); j < k; j++) {
308 ir_node *pred_pred = get_Block_cfgpred(pred, j);
310 if (is_Bad(pred_pred)) {
311 in[p_preds++] = new_r_Bad(irg, get_irn_mode(phi));
315 if (get_nodes_block(phi_pred) == pred) {
317 assert(is_Phi(phi_pred)); /* Block is empty!! */
319 in[p_preds++] = get_Phi_pred(phi_pred, j);
322 in[p_preds++] = phi_pred;
327 in[p_preds++] = get_Phi_pred(phi, i);
330 assert(p_preds == max_preds);
334 exchange(phi, in[0]);
336 set_irn_in(phi, p_preds, in);
340 /*- This happens only if merge between loop backedge and single loop entry.
341 Moreover, it is only needed if predb is the direct dominator of b,
342 else there can be no uses of the Phi's in predb ... -*/
343 for (k = 0, n = get_Block_n_cfgpreds(b); k < n; ++k) {
344 ir_node *pred = get_Block_cfgpred(b, k);
345 ir_node *predb = get_nodes_block(pred);
349 if (is_Block_removable(predb) && !Block_block_visited(predb)) {
352 /* we found a predecessor block at position k that will be removed */
353 for (phi = (ir_node*)get_irn_link(predb); phi; phi = next_phi) {
355 next_phi = (ir_node*)get_irn_link(phi);
359 if (get_Block_idom(b) != predb) {
360 /* predb is not the dominator. There can't be uses of pred's Phi nodes, kill them .*/
361 ir_graph *irg = get_irn_irg(b);
362 ir_mode *mode = get_irn_mode(phi);
363 exchange(phi, new_r_Bad(irg, mode));
365 /* predb is the direct dominator of b. There might be uses of the Phi nodes from
366 predb in further block, so move this phi from the predecessor into the block b */
367 set_nodes_block(phi, b);
368 set_irn_link(phi, get_irn_link(b));
369 set_irn_link(b, phi);
370 env->phis_moved = true;
372 /* first, copy all 0..k-1 predecessors */
373 for (i = 0; i < k; i++) {
374 pred = get_Block_cfgpred_block(b, i);
377 in[q_preds++] = pred;
378 } else if (is_Block_removable(pred) && !Block_block_visited(pred)) {
379 /* It's an empty block and not yet visited. */
380 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
381 if (! is_Bad(get_Block_cfgpred(pred, j)))
389 /* now we are at k, copy the phi predecessors */
390 pred = get_nodes_block(get_Block_cfgpred(b, k));
391 for (i = 0; i < get_Phi_n_preds(phi); i++) {
392 in[q_preds++] = get_Phi_pred(phi, i);
395 /* and now all the rest */
396 for (i = k+1; i < get_Block_n_cfgpreds(b); i++) {
397 pred = get_Block_cfgpred_block(b, i);
400 ir_graph *irg = get_irn_irg(b);
401 ir_mode *mode = get_irn_mode(phi);
402 in[q_preds++] = new_r_Bad(irg, mode);
403 } else if (is_Block_removable(pred) && !Block_block_visited(pred)) {
404 /* It's an empty block and not yet visited. */
405 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
406 if (! is_Bad(get_Block_cfgpred(pred, j)))
416 exchange(phi, in[0]);
418 set_irn_in(phi, q_preds, in);
421 assert(q_preds <= max_preds);
422 // assert(p_preds == q_preds && "Wrong Phi Fix");
428 /*- Fix the block -*/
430 for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
431 ir_node *pred = get_Block_cfgpred(b, i);
432 ir_node *predb = get_nodes_block(pred);
433 ir_graph *irg = get_irn_irg(pred);
435 /* case 1: Bad predecessor */
437 in[n_preds++] = new_r_Bad(irg, mode_X);
440 if (is_Block_removable(predb) && !Block_block_visited(predb)) {
441 /* case 2: It's an empty block and not yet visited. */
442 for (j = 0; j < get_Block_n_cfgpreds(predb); j++) {
443 ir_node *predpred = get_Block_cfgpred(predb, j);
445 if (is_Bad(predpred)) {
446 in[n_preds++] = new_r_Bad(irg, mode_X);
450 in[n_preds++] = predpred;
452 /* Remove block+jump as it might be kept alive. */
453 exchange(pred, new_r_Bad(get_irn_irg(b), mode_X));
454 exchange(predb, new_r_Bad(get_irn_irg(b), mode_BB));
457 in[n_preds++] = pred;
460 assert(n_preds == max_preds);
462 set_irn_in(b, n_preds, in);
465 /* see if phi-fix was correct */
466 assert(get_irn_link(b) == NULL || p_preds == -1 || (n_preds == p_preds));
471 * Optimize table-switch Conds.
473 * @param cond the switch-Cond
474 * @return true if the switch-Cond was optimized
476 static bool handle_switch_cond(ir_node *cond)
478 ir_node *sel = get_Cond_selector(cond);
479 ir_node *proj1 = (ir_node*)get_irn_link(cond);
480 ir_node *proj2 = (ir_node*)get_irn_link(proj1);
481 ir_node *blk = get_nodes_block(cond);
483 /* exactly 1 Proj on the Cond node: must be the defaultProj */
485 ir_node *jmp = new_r_Jmp(blk);
486 assert(get_Cond_default_proj(cond) == get_Proj_proj(proj1));
487 /* convert it into a Jmp */
488 exchange(proj1, jmp);
492 /* handle Cond nodes with constant argument. In this case the localopt rules
493 * should have killed all obviously impossible cases.
494 * So the only case left to handle here is 1 defaultProj + 1case
495 * (this one case should be the one taken) */
496 if (get_irn_link(proj2) == NULL) {
497 ir_tarval *tv = value_of(sel);
499 if (tv != tarval_bad) {
500 /* we have a constant switch */
501 long num = get_tarval_long(tv);
502 long def_num = get_Cond_default_proj(cond);
503 ir_graph *irg = get_irn_irg(cond);
504 ir_node *bad = new_r_Bad(irg, mode_X);
506 if (def_num == get_Proj_proj(proj1)) {
507 /* first one is the defProj */
508 if (num == get_Proj_proj(proj2)) {
509 ir_node *jmp = new_r_Jmp(blk);
510 exchange(proj2, jmp);
511 exchange(proj1, bad);
514 } else if (def_num == get_Proj_proj(proj2)) {
515 /* second one is the defProj */
516 if (num == get_Proj_proj(proj1)) {
517 ir_node *jmp = new_r_Jmp(blk);
518 exchange(proj1, jmp);
519 exchange(proj2, bad);
523 /* neither: strange, Cond was not optimized so far */
524 if (num == get_Proj_proj(proj1)) {
525 ir_node *jmp = new_r_Jmp(blk);
526 exchange(proj1, jmp);
527 exchange(proj2, bad);
529 } else if (num == get_Proj_proj(proj2)) {
530 ir_node *jmp = new_r_Jmp(blk);
531 exchange(proj2, jmp);
532 exchange(proj1, bad);
541 static bool get_phase_flag(ir_phase *block_info, ir_node *block, int offset) {
542 return ((int)phase_get_irn_data(block_info, block)) & (1<<offset);
544 static void set_phase_flag(ir_phase *block_info, ir_node *block, int offset) {
545 int data = (int)phase_get_irn_data(block_info, block);
547 phase_set_irn_data(block_info, block, (void*)data);
550 static bool has_operations(ir_phase *block_info, ir_node *block) {
551 return get_phase_flag(block_info, block, 1);
553 static void set_has_operations(ir_phase *block_info, ir_node *block) {
554 set_phase_flag(block_info, block, 1);
557 static bool has_phis(ir_phase *block_info, ir_node *block) {
558 return get_phase_flag(block_info, block, 2);
560 static void set_has_phis(ir_phase *block_info, ir_node *block) {
561 set_phase_flag(block_info, block, 2);
564 static bool is_unknown_jump_target(ir_phase *block_info, ir_node *block) {
565 return get_phase_flag(block_info, block, 3);
567 static void set_is_unknown_jump_target(ir_phase *block_info, ir_node *block) {
568 set_phase_flag(block_info, block, 3);
572 * Optimize Conds, where true and false jump to the same block into a Jmp
576 * projA projB => Jmp Bad
580 static bool optimize_pred_cond(ir_node *block, int i, int j)
582 ir_node *projA, *projB, *cond, *pred_block, *jmp, *bad;
585 projA = get_Block_cfgpred(block, i);
586 if (!is_Proj(projA)) return false;
587 projB = get_Block_cfgpred(block, j);
588 if (!is_Proj(projB)) return false;
589 cond = get_Proj_pred(projA);
590 if (!is_Cond(cond)) return false;
592 if (cond != get_Proj_pred(projB)) return false;
593 if (is_switch_Cond(cond)) return false;
595 /* cond should actually be a Jmp */
596 pred_block = get_nodes_block(cond);
597 jmp = new_r_Jmp(pred_block);
598 bad = new_r_Bad(get_irn_irg(block), mode_X);
600 assert(projA != projB);
601 exchange(projA, jmp);
602 exchange(projB, bad);
606 static void compute_block_info(ir_node *n, void *x)
608 ir_phase *block_info = (ir_phase *)x;
611 int i, max = get_Block_n_cfgpreds(n);
612 for (i=0; i<max; i++) {
613 ir_node *pred = get_Block_cfgpred(n,i);
614 if (is_unknown_jump(pred)) {
615 set_is_unknown_jump_target(block_info, n);
618 } else if (is_Phi(n)) {
619 ir_node *block = get_nodes_block(n);
620 set_has_phis(block_info, block);
621 } else if (is_Jmp(n) || is_Cond(n) || is_Cmp(n) || is_Proj(n)) {
624 ir_node *block = get_nodes_block(n);
625 set_has_operations(block_info, block);
629 typedef struct skip_env {
634 static void optimize_conds(ir_node *b, void *x)
636 skip_env *env = (skip_env*)x;
638 int n_preds = get_Block_n_cfgpreds(b);
640 if (has_phis(env->phase,b)) return;
642 /* optimize Cond predecessors (might produce Bad predecessors) */
643 for (i = 0; i < n_preds; i++) {
644 for (j = i+1; j < n_preds; j++) {
645 optimize_pred_cond(b, i, j);
650 static void remove_empty_blocks(ir_node *b, void *x)
652 skip_env *env = (skip_env*)x;
654 int n_preds = get_Block_n_cfgpreds(b);
656 for (i = 0; i < n_preds; ++i) {
657 ir_node *jmp, *jmp_block, *pred, *pred_block;
659 jmp = get_Block_cfgpred(b, i);
660 if (!is_Jmp(jmp)) continue;
661 if (is_unknown_jump(jmp)) continue;
662 jmp_block = get_nodes_block(jmp);
663 if (is_unknown_jump_target(env->phase, jmp_block)) continue;
664 if (has_operations(env->phase,jmp_block)) continue;
665 /* jmp_block is an empty block! */
667 if (get_Block_n_cfgpreds(jmp_block) != 1) continue;
668 pred = get_Block_cfgpred(jmp_block, 0);
672 /* cleanup: jmp_block might have a Keep edge! */
673 pred_block = get_nodes_block(pred);
674 exchange(jmp_block, pred_block);
679 * Some cfg optimizations, which do not touch Phi nodes */
680 static void cfgopt_ignoring_phis(ir_graph *irg) {
681 ir_phase *block_info = new_phase(irg, NULL);
682 skip_env env = { false, block_info };
684 irg_walk_graph(irg, compute_block_info, NULL, block_info);
689 /* Conds => Jmp optimization; might produce empty blocks */
690 irg_block_walk_graph(irg, optimize_conds, NULL, &env);
692 /* Remove empty blocks */
693 irg_block_walk_graph(irg, remove_empty_blocks, NULL, &env);
695 set_irg_doms_inconsistent(irg);
696 /* Removing blocks might enable more Cond optimizations */
703 phase_free(block_info);
706 /* Optimizations of the control flow that also require changes of Phi nodes. */
707 void optimize_cf(ir_graph *irg)
711 ir_node *end = get_irg_end(irg);
715 assert(get_irg_phase_state(irg) != phase_building);
717 /* if the graph is not pinned, we cannot determine empty blocks */
718 assert(get_irg_pinned(irg) != op_pin_state_floats &&
719 "Control flow optimization need a pinned graph");
721 /* FIXME: control flow opt destroys block edges. So edges are deactivated
722 * here. Fix the edges! */
723 edges_deactivate(irg);
725 cfgopt_ignoring_phis(irg);
727 /* we use the mark flag to mark removable blocks */
728 ir_reserve_resources(irg, IR_RESOURCE_BLOCK_MARK | IR_RESOURCE_IRN_LINK);
730 /* The switch Cond optimization might expose unreachable code, so we loop */
734 env.phis_moved = false;
739 * This pass collects all Phi nodes in a link list in the block
740 * nodes. Further it performs simple control flow optimizations.
741 * Finally it marks all blocks that do not contain useful
742 * computations, i.e., these blocks might be removed.
744 env.switch_conds = NEW_ARR_F(ir_node*, 0);
745 irg_walk(end, clear_link, collect_nodes, &env);
747 /* handle all collected switch-Conds */
748 length = ARR_LEN(env.switch_conds);
749 for (i = 0; i < length; ++i) {
750 ir_node *cond = env.switch_conds[i];
751 env.changed |= handle_switch_cond(cond);
753 DEL_ARR_F(env.switch_conds);
755 if (!env.changed) break;
757 set_irg_doms_inconsistent(irg);
758 set_irg_extblk_inconsistent(irg);
759 set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
762 /* assert due to collect_nodes:
763 * 1. removable blocks are now marked as such
764 * 2. phi lists are up to date
767 /* Optimize the standard code.
768 * It walks only over block nodes and adapts these and the Phi nodes in these
769 * blocks, which it finds in a linked list computed before.
772 irg_block_walk_graph(irg, optimize_blocks, NULL, &env);
774 new_end = optimize_in_place(end);
775 if (new_end != end) {
776 set_irg_end(irg, new_end);
779 remove_End_Bads_and_doublets(end);
781 ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK | IR_RESOURCE_IRN_LINK);
783 if (env.phis_moved) {
784 /* Bad: when we moved Phi's, we might produce dead Phi nodes
786 Some other phases cannot copy with this, so will them.
788 n = get_End_n_keepalives(end);
790 NEW_ARR_A(ir_node *, in, n);
791 assure_irg_outs(irg);
793 for (i = j = 0; i < n; ++i) {
794 ir_node *ka = get_End_keepalive(end, i);
799 for (k = get_irn_n_outs(ka) - 1; k >= 0; --k) {
800 ir_node *user = get_irn_out(ka, k);
802 if (user != ka && user != end) {
803 /* Is it a real user or just a self loop ? */
813 set_End_keepalives(end, j, in);
820 /* Handle graph state if was changed. */
821 set_irg_doms_inconsistent(irg);
822 set_irg_extblk_inconsistent(irg);
823 set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
827 /* Creates an ir_graph pass for optimize_cf. */
828 ir_graph_pass_t *optimize_cf_pass(const char *name)
830 return def_graph_pass(name ? name : "optimize_cf", optimize_cf);