2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Control flow optimizations.
23 * @author Goetz Lindenmaier, Michael Beck, Sebastian Hack
26 * Removes Bad control flow predecessors and empty blocks. A block is empty
27 * if it contains only a Jmp node. Blocks can only be removed if they are not
28 * needed for the semantics of Phi nodes. Further, we NEVER remove labeled
29 * blocks (even if we could move the label).
33 #include "iroptimize.h"
40 #include "irgraph_t.h"
54 #include "irbackedge_t.h"
60 #include "iropt_dbg.h"
62 /** An environment for merge_blocks and collect nodes. */
63 typedef struct merge_env {
64 bool changed; /**< Set if the graph was changed. */
65 bool phis_moved; /**< Set if Phi nodes were moved. */
66 ir_node **switch_conds; /**< Helper list for all found Switch Conds. */
69 static void set_Block_removable(ir_node *block, bool removable)
71 set_Block_mark(block, removable);
74 static bool is_Block_removable(ir_node *block)
76 return get_Block_mark(block);
79 static void clear_link(ir_node *node, void *ctx)
82 set_irn_link(node, NULL);
84 set_Block_removable(node, true);
88 * Collects all Phi nodes in link list of Block.
89 * Marks all blocks "non_removable" if they contain a node other
90 * than Jmp (and Proj).
91 * Links all Proj nodes to their predecessors.
92 * Collects all switch-Conds in a list.
94 static void collect_nodes(ir_node *n, void *ctx)
96 merge_env *env = (merge_env*)ctx;
99 /* Collect Phi nodes to compact ins along with block's ins. */
100 ir_node *block = get_nodes_block(n);
101 set_irn_link(n, get_irn_link(block));
102 set_irn_link(block, n);
103 } else if (is_Block(n)) {
104 if (has_Block_entity(n))
105 set_Block_removable(n, false);
107 } else if (!is_Jmp(n)) { /* Check for non-empty block. */
108 ir_node *block = get_nodes_block(n);
109 set_Block_removable(block, false);
112 /* link Proj nodes */
113 ir_node *pred = get_Proj_pred(n);
114 set_irn_link(n, get_irn_link(pred));
115 set_irn_link(pred, n);
116 } else if (is_Cond(n)) {
117 ir_node *sel = get_Cond_selector(n);
118 if (get_irn_mode(sel) != mode_b) {
119 /* found a switch-Cond, collect */
120 ARR_APP1(ir_node*, env->switch_conds, n);
126 /** Returns true if pred is predecessor of block. */
127 static bool is_pred_of(ir_node *pred, ir_node *b)
131 for (i = get_Block_n_cfgpreds(b) - 1; i >= 0; --i) {
132 ir_node *b_pred = get_Block_cfgpred_block(b, i);
139 static unsigned count_non_bad_inputs(const ir_node *node)
141 int arity = get_irn_arity(node);
145 for (i = 0; i < arity; ++i) {
146 ir_node *in = get_irn_n(node, i);
153 /** Test whether we can optimize away pred block pos of b.
155 * @param b A block node.
156 * @param pos The position of the predecessor block to judge about.
158 * @returns The number of predecessors
160 * The test is rather tricky.
162 * The situation is something like the following:
171 * b merges the control flow of an if-then-else. We may not remove
172 * the 'then' _and_ the 'else' block of an 'if' if there is a Phi
173 * node in b, even if both are empty. The destruction of this Phi
174 * requires that a copy is added before the merge. We have to
175 * keep one of the case blocks to place the copies in.
177 * To perform the test for pos, we must regard predecessors before pos
178 * as already removed.
180 static unsigned test_whether_dispensable(ir_node *b, int pos)
182 ir_node *pred = get_Block_cfgpred(b, pos);
183 ir_node *predb = get_nodes_block(pred);
185 /* Bad blocks will be optimized away, so we don't need space for them */
188 if (!is_Block_removable(predb))
190 /* can't remove self-loops */
192 goto non_dispensable;
193 if (is_unknown_jump(pred))
194 goto non_dispensable;
196 /* Seems to be empty. At least we detected this in collect_nodes. */
197 if (get_irn_link(b) != NULL) {
198 int n_cfgpreds = get_Block_n_cfgpreds(b);
200 /* there are Phi nodes */
202 /* b's pred blocks and pred's pred blocks must be pairwise disjunct.
203 * Handle all pred blocks with preds < pos as if they were already
205 for (i = 0; i < pos; i++) {
206 ir_node *other_pred = get_Block_cfgpred(b, i);
207 ir_node *other_predb = get_nodes_block(other_pred);
208 if (is_Bad(other_pred))
210 if (is_Block_removable(other_predb)
211 && !Block_block_visited(other_predb)) {
213 for (j = get_Block_n_cfgpreds(other_predb) - 1; j >= 0; --j) {
214 ir_node *other_predpred
215 = get_Block_cfgpred_block(other_predb, j);
216 if (is_pred_of(other_predpred, predb))
217 goto non_dispensable;
219 } else if (is_pred_of(other_predb, predb)) {
220 goto non_dispensable;
223 for (i = pos+1; i < n_cfgpreds; i++) {
224 ir_node *other_predb = get_Block_cfgpred_block(b, i);
225 if (is_pred_of(other_predb, predb))
226 goto non_dispensable;
229 /* we will not dispense already visited blocks */
230 if (Block_block_visited(predb))
232 /* if we get here, the block is dispensable, count useful preds */
233 return count_non_bad_inputs(predb);
236 set_Block_removable(predb, false);
241 * This method removed Bad cf predecessors from Blocks and Phis, and removes
242 * empty blocks. A block is empty if it only contains Phi and Jmp nodes.
244 * We first adapt Phi nodes, then Block nodes, as we need the old ins
245 * of the Block to adapt the Phi nodes. We do this by computing new
246 * in arrays, and then replacing the old ones. So far we compute new in arrays
247 * for all nodes, not regarding whether there is a possibility for optimization.
249 * For each predecessor p of a Block b there are three cases:
250 * - The predecessor p is a Bad node: just skip it. The in array of b shrinks
252 * - The predecessor p is empty. Remove p. All predecessors of p are now
254 * - The predecessor p is a block containing useful code. Just keep p as is.
256 * For Phi nodes f we have to check the conditions at the Block of f.
257 * For cases 1 and 3 we proceed as for Blocks. For case 2 we can have two
259 * -2a: The old predecessor of the Phi f is a Phi pred_f IN THE BLOCK REMOVED.
260 * In this case we proceed as for blocks. We remove pred_f. All
261 * predecessors of pred_f now are predecessors of f.
262 * -2b: The old predecessor of f is NOT in the block removed. It might be a Phi
263 * too. We have to replicate f for each predecessor of the removed block.
264 * Or, with other words, the removed predecessor block has exactly one
267 * Further there is a special case for self referencing blocks:
270 * then_b else_b then_b else_b
276 * | | | === optimized to ===> \ | | |
283 * If there is a Phi in pred_b, but we remove pred_b, we have to generate a
284 * Phi in loop_b, that has the ins of the Phi in pred_b and a self referencing
287 static void optimize_blocks(ir_node *b, void *ctx)
289 int i, j, k, n, max_preds, n_preds, p_preds = -1;
290 ir_node *pred, *phi, *next;
292 merge_env *env = (merge_env*)ctx;
294 /* Count the number of predecessor if this block is merged with pred blocks
297 for (i = 0, k = get_Block_n_cfgpreds(b); i < k; ++i) {
298 max_preds += test_whether_dispensable(b, i);
300 in = XMALLOCN(ir_node*, max_preds);
302 /*- Fix the Phi nodes of the current block -*/
303 for (phi = (ir_node*)get_irn_link(b); phi != NULL; phi = (ir_node*)next) {
305 next = (ir_node*)get_irn_link(phi);
307 /* Find the new predecessors for the Phi */
309 for (i = 0, n = get_Block_n_cfgpreds(b); i < n; ++i) {
310 pred = get_Block_cfgpred_block(b, i);
313 /* case Phi 1: Do nothing */
314 } else if (is_Block_removable(pred) && !Block_block_visited(pred)) {
315 /* case Phi 2: It's an empty block and not yet visited. */
316 ir_node *phi_pred = get_Phi_pred(phi, i);
318 for (j = 0, k = get_Block_n_cfgpreds(pred); j < k; j++) {
319 ir_node *pred_pred = get_Block_cfgpred(pred, j);
320 /* because of breaking loops, not all predecessors are
321 * Bad-clean, so we must check this here again */
322 if (is_Bad(pred_pred))
325 if (get_nodes_block(phi_pred) == pred) {
327 assert(is_Phi(phi_pred)); /* Block is empty!! */
329 in[p_preds++] = get_Phi_pred(phi_pred, j);
332 in[p_preds++] = phi_pred;
337 in[p_preds++] = get_Phi_pred(phi, i);
340 assert(p_preds == max_preds);
344 /* By removal of Bad ins the Phi might be degenerated. */
345 exchange(phi, in[0]);
347 set_irn_in(phi, p_preds, in);
351 /*- This happens only if merge between loop backedge and single loop entry.
352 Moreover, it is only needed if predb is the direct dominator of b,
353 else there can be no uses of the Phi's in predb ... -*/
354 for (k = 0, n = get_Block_n_cfgpreds(b); k < n; ++k) {
355 ir_node *pred = get_Block_cfgpred(b, k);
356 ir_node *predb = get_nodes_block(pred);
360 if (is_Block_removable(predb) && !Block_block_visited(predb)) {
363 /* we found a predecessor block at position k that will be removed */
364 for (phi = (ir_node*)get_irn_link(predb); phi; phi = next_phi) {
366 next_phi = (ir_node*)get_irn_link(phi);
370 if (get_Block_idom(b) != predb) {
371 /* predb is not the dominator. There can't be uses of pred's Phi nodes, kill them .*/
372 ir_graph *irg = get_irn_irg(b);
373 ir_mode *mode = get_irn_mode(phi);
374 exchange(phi, new_r_Bad(irg, mode));
376 /* predb is the direct dominator of b. There might be uses of the Phi nodes from
377 predb in further block, so move this phi from the predecessor into the block b */
378 set_nodes_block(phi, b);
379 set_irn_link(phi, get_irn_link(b));
380 set_irn_link(b, phi);
381 env->phis_moved = true;
383 /* first, copy all 0..k-1 predecessors */
384 for (i = 0; i < k; i++) {
385 pred = get_Block_cfgpred_block(b, i);
389 } else if (is_Block_removable(pred) && !Block_block_visited(pred)) {
390 /* It's an empty block and not yet visited. */
391 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
392 if (! is_Bad(get_Block_cfgpred(pred, j)))
400 /* now we are at k, copy the phi predecessors */
401 pred = get_nodes_block(get_Block_cfgpred(b, k));
402 for (i = 0; i < get_Phi_n_preds(phi); i++) {
403 if (! is_Bad(get_Block_cfgpred(pred, i)))
404 in[q_preds++] = get_Phi_pred(phi, i);
407 /* and now all the rest */
408 for (i = k+1; i < get_Block_n_cfgpreds(b); i++) {
409 pred = get_Block_cfgpred_block(b, i);
413 } else if (is_Block_removable(pred) && !Block_block_visited(pred)) {
414 /* It's an empty block and not yet visited. */
415 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
416 if (! is_Bad(get_Block_cfgpred(pred, j)))
426 exchange(phi, in[0]);
428 set_irn_in(phi, q_preds, in);
431 assert(q_preds <= max_preds);
432 // assert(p_preds == q_preds && "Wrong Phi Fix");
438 /*- Fix the block -*/
440 for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
441 ir_node *pred = get_Block_cfgpred(b, i);
442 ir_node *predb = get_nodes_block(pred);
444 /* case 1: Do nothing */
447 if (is_Block_removable(predb) && !Block_block_visited(predb)) {
448 /* case 2: It's an empty block and not yet visited. */
449 for (j = 0; j < get_Block_n_cfgpreds(predb); j++) {
450 ir_node *predpred = get_Block_cfgpred(predb, j);
452 /* because of breaking loops, not all predecessors are
453 * Bad-clean, so we must check this here again */
454 if (is_Bad(predpred))
456 in[n_preds++] = predpred;
458 /* Remove block+jump as it might be kept alive. */
459 exchange(pred, new_r_Bad(get_irn_irg(b), mode_X));
460 exchange(predb, new_r_Bad(get_irn_irg(b), mode_BB));
463 in[n_preds++] = pred;
466 assert(n_preds == max_preds);
468 set_irn_in(b, n_preds, in);
471 /* see if phi-fix was correct */
472 assert(get_irn_link(b) == NULL || p_preds == -1 || (n_preds == p_preds));
477 * Block walker: optimize all blocks using the default optimizations.
478 * This removes Blocks with only a Jmp predecessor.
480 static void remove_simple_blocks(ir_node *block, void *ctx)
482 merge_env *env = (merge_env*)ctx;
483 ir_node *new_blk = equivalent_node(block);
485 if (new_blk != block) {
486 exchange(block, new_blk);
492 * Optimize table-switch Conds.
494 * @param cond the switch-Cond
495 * @return true if the switch-Cond was optimized
497 static bool handle_switch_cond(ir_node *cond)
499 ir_node *sel = get_Cond_selector(cond);
500 ir_node *proj1 = (ir_node*)get_irn_link(cond);
501 ir_node *proj2 = (ir_node*)get_irn_link(proj1);
502 ir_node *blk = get_nodes_block(cond);
504 /* exactly 1 Proj on the Cond node: must be the defaultProj */
506 ir_node *jmp = new_r_Jmp(blk);
507 assert(get_Cond_default_proj(cond) == get_Proj_proj(proj1));
508 /* convert it into a Jmp */
509 exchange(proj1, jmp);
513 /* handle Cond nodes with constant argument. In this case the localopt rules
514 * should have killed all obviously impossible cases.
515 * So the only case left to handle here is 1 defaultProj + 1case
516 * (this one case should be the one taken) */
517 if (get_irn_link(proj2) == NULL) {
518 ir_tarval *tv = value_of(sel);
520 if (tv != tarval_bad) {
521 /* we have a constant switch */
522 long num = get_tarval_long(tv);
523 long def_num = get_Cond_default_proj(cond);
524 ir_graph *irg = get_irn_irg(cond);
525 ir_node *bad = new_r_Bad(irg, mode_X);
527 if (def_num == get_Proj_proj(proj1)) {
528 /* first one is the defProj */
529 if (num == get_Proj_proj(proj2)) {
530 ir_node *jmp = new_r_Jmp(blk);
531 exchange(proj2, jmp);
532 exchange(proj1, bad);
535 } else if (def_num == get_Proj_proj(proj2)) {
536 /* second one is the defProj */
537 if (num == get_Proj_proj(proj1)) {
538 ir_node *jmp = new_r_Jmp(blk);
539 exchange(proj1, jmp);
540 exchange(proj2, bad);
544 /* neither: strange, Cond was not optimized so far */
545 if (num == get_Proj_proj(proj1)) {
546 ir_node *jmp = new_r_Jmp(blk);
547 exchange(proj1, jmp);
548 exchange(proj2, bad);
550 } else if (num == get_Proj_proj(proj2)) {
551 ir_node *jmp = new_r_Jmp(blk);
552 exchange(proj2, jmp);
553 exchange(proj1, bad);
562 /* Optimizations of the control flow that also require changes of Phi nodes.
564 * This optimization performs two passes over the graph.
566 * The first pass collects all Phi nodes in a link list in the block
567 * nodes. Further it performs simple control flow optimizations.
568 * Finally it marks all blocks that do not contain useful
569 * computations, i.e., these blocks might be removed.
571 * The second pass performs the optimizations intended by this algorithm.
572 * It walks only over block nodes and adapts these and the Phi nodes in these
573 * blocks, which it finds in a linked list computed by the first pass.
575 * We use the mark flag to mark removable blocks in the first phase.
577 void optimize_cf(ir_graph *irg)
581 ir_node *end = get_irg_end(irg);
585 assert(get_irg_phase_state(irg) != phase_building);
587 /* if the graph is not pinned, we cannot determine empty blocks */
588 assert(get_irg_pinned(irg) != op_pin_state_floats &&
589 "Control flow optimization need a pinned graph");
591 /* FIXME: control flow opt destroys block edges. So edges are deactivated
592 * here. Fix the edges! */
593 edges_deactivate(irg);
595 /* we use the mark flag to mark removable blocks */
596 ir_reserve_resources(irg, IR_RESOURCE_BLOCK_MARK | IR_RESOURCE_IRN_LINK);
599 env.phis_moved = false;
603 env.switch_conds = NEW_ARR_F(ir_node*, 0);
604 irg_walk(end, clear_link, collect_nodes, &env);
606 /* handle all collected switch-Conds */
607 n = ARR_LEN(env.switch_conds);
608 for (i = 0; i < n; ++i) {
609 ir_node *cond = env.switch_conds[i];
610 env.changed |= handle_switch_cond(cond);
612 DEL_ARR_F(env.switch_conds);
615 /* Handle graph state if was changed. */
616 set_irg_outs_inconsistent(irg);
617 set_irg_doms_inconsistent(irg);
618 set_irg_extblk_inconsistent(irg);
619 set_irg_loopinfo_inconsistent(irg);
620 set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
622 /* The Cond optimization might generate unreachable code, so restart if
627 /* Optimize the standard code. */
629 irg_block_walk_graph(irg, optimize_blocks, remove_simple_blocks, &env);
631 new_end = optimize_in_place(end);
632 if (new_end != end) {
633 set_irg_end(irg, new_end);
636 remove_End_Bads_and_doublets(end);
638 ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK | IR_RESOURCE_IRN_LINK);
640 if (env.phis_moved) {
641 /* Bad: when we moved Phi's, we might produce dead Phi nodes
643 Some other phases cannot copy with this, so will them.
645 n = get_End_n_keepalives(end);
647 NEW_ARR_A(ir_node *, in, n);
649 /* Handle graph state if was changed. */
650 set_irg_outs_inconsistent(irg);
652 assure_irg_outs(irg);
654 for (i = j = 0; i < n; ++i) {
655 ir_node *ka = get_End_keepalive(end, i);
660 for (k = get_irn_n_outs(ka) - 1; k >= 0; --k) {
661 ir_node *user = get_irn_out(ka, k);
663 if (user != ka && user != end) {
664 /* Is it a real user or just a self loop ? */
674 set_End_keepalives(end, j, in);
681 /* Handle graph state if was changed. */
682 set_irg_outs_inconsistent(irg);
683 set_irg_doms_inconsistent(irg);
684 set_irg_extblk_inconsistent(irg);
685 set_irg_loopinfo_inconsistent(irg);
686 set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
689 /* the verifier doesn't work yet with floating nodes */
690 if (get_irg_pinned(irg) == op_pin_state_pinned) {
691 /* after optimize_cf(), only Bad data flow may remain. */
692 if (irg_verify_bads(irg, BAD_DF | BAD_BLOCK | TUPLE)) {
693 dump_ir_graph(irg, "-verify-cf");
694 fprintf(stderr, "VERIFY_BAD in optimize_cf()\n");
699 /* Creates an ir_graph pass for optimize_cf. */
700 ir_graph_pass_t *optimize_cf_pass(const char *name)
702 return def_graph_pass(name ? name : "optimize_cf", optimize_cf);