2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Control flow optimizations.
23 * @author Goetz Lindenmaier, Michael Beck, Sebastian Hack
26 * Removes Bad control flow predecessors and empty blocks. A block is empty
27 * if it contains only a Jmp node. Blocks can only be removed if they are not
28 * needed for the semantics of Phi nodes. Further, we NEVER remove labeled
29 * blocks (even if we could move the label).
33 #include "iroptimize.h"
40 #include "irgraph_t.h"
54 #include "irbackedge_t.h"
60 #include "iropt_dbg.h"
62 /** An environment for merge_blocks and collect nodes. */
63 typedef struct merge_env {
64 bool changed; /**< Set if the graph was changed. */
65 bool phis_moved; /**< Set if Phi nodes were moved. */
66 ir_node **switch_conds; /**< Helper list for all found Switch Conds. */
69 static void set_Block_removable(ir_node *block, bool removable)
71 set_Block_mark(block, removable);
74 static bool is_Block_removable(ir_node *block)
76 return get_Block_mark(block);
79 static void clear_link(ir_node *node, void *ctx)
82 set_irn_link(node, NULL);
84 set_Block_removable(node, true);
88 * Collects all Phi nodes in link list of Block.
89 * Marks all blocks "non_removable" if they contain a node other
90 * than Jmp (and Proj).
91 * Links all Proj nodes to their predecessors.
92 * Collects all switch-Conds in a list.
94 static void collect_nodes(ir_node *n, void *ctx)
96 merge_env *env = (merge_env*)ctx;
99 /* Collect Phi nodes to compact ins along with block's ins. */
100 ir_node *block = get_nodes_block(n);
101 set_irn_link(n, get_irn_link(block));
102 set_irn_link(block, n);
103 } else if (is_Block(n)) {
104 if (has_Block_entity(n))
105 set_Block_removable(n, false);
107 } else if (!is_Jmp(n)) { /* Check for non-empty block. */
108 ir_node *block = get_nodes_block(n);
109 set_Block_removable(block, false);
112 /* link Proj nodes */
113 ir_node *pred = get_Proj_pred(n);
114 set_irn_link(n, get_irn_link(pred));
115 set_irn_link(pred, n);
116 } else if (is_Cond(n)) {
117 ir_node *sel = get_Cond_selector(n);
118 if (get_irn_mode(sel) != mode_b) {
119 /* found a switch-Cond, collect */
120 ARR_APP1(ir_node*, env->switch_conds, n);
126 /** Returns true if pred is predecessor of block. */
127 static bool is_pred_of(ir_node *pred, ir_node *b)
131 for (i = get_Block_n_cfgpreds(b) - 1; i >= 0; --i) {
132 ir_node *b_pred = get_Block_cfgpred_block(b, i);
139 /** Test whether we can optimize away pred block pos of b.
141 * @param b A block node.
142 * @param pos The position of the predecessor block to judge about.
144 * @returns The number of predecessors
146 * The test is rather tricky.
148 * The situation is something like the following:
157 * b merges the control flow of an if-then-else. We may not remove
158 * the 'then' _and_ the 'else' block of an 'if' if there is a Phi
159 * node in b, even if both are empty. The destruction of this Phi
160 * requires that a copy is added before the merge. We have to
161 * keep one of the case blocks to place the copies in.
163 * To perform the test for pos, we must regard predecessors before pos
164 * as already removed.
166 static unsigned test_whether_dispensable(ir_node *b, int pos)
168 ir_node *pred = get_Block_cfgpred(b, pos);
169 ir_node *predb = get_nodes_block(pred);
171 if (is_Bad(pred) || !is_Block_removable(predb))
174 /* can't remove self-loops */
176 goto non_dispensable;
177 if (is_unknown_jump(pred))
178 goto non_dispensable;
180 /* Seems to be empty. At least we detected this in collect_nodes. */
181 if (get_irn_link(b) != NULL) {
182 int n_cfgpreds = get_Block_n_cfgpreds(b);
184 /* there are Phi nodes */
186 /* b's pred blocks and pred's pred blocks must be pairwise disjunct.
187 * Handle all pred blocks with preds < pos as if they were already
189 for (i = 0; i < pos; i++) {
190 ir_node *other_pred = get_Block_cfgpred(b, i);
191 ir_node *other_predb = get_nodes_block(other_pred);
192 if (is_Bad(other_pred))
194 if (is_Block_removable(other_predb)
195 && !Block_block_visited(other_predb)) {
197 for (j = get_Block_n_cfgpreds(other_predb) - 1; j >= 0; --j) {
198 ir_node *other_predpred
199 = get_Block_cfgpred_block(other_predb, j);
200 if (is_pred_of(other_predpred, predb))
201 goto non_dispensable;
203 } else if (is_pred_of(other_predb, predb)) {
204 goto non_dispensable;
207 for (i = pos+1; i < n_cfgpreds; i++) {
208 ir_node *other_predb = get_Block_cfgpred_block(b, i);
209 if (is_pred_of(other_predb, predb))
210 goto non_dispensable;
213 /* we will not dispense already visited blocks */
214 if (Block_block_visited(predb))
216 /* if we get here, the block is dispensable, count useful preds */
217 return get_irn_arity(predb);
220 set_Block_removable(predb, false);
225 * This method removed Bad cf predecessors from Blocks and Phis, and removes
226 * empty blocks. A block is empty if it only contains Phi and Jmp nodes.
228 * We first adapt Phi nodes, then Block nodes, as we need the old ins
229 * of the Block to adapt the Phi nodes. We do this by computing new
230 * in arrays, and then replacing the old ones. So far we compute new in arrays
231 * for all nodes, not regarding whether there is a possibility for optimization.
233 * For each predecessor p of a Block b there are three cases:
234 * - The predecessor p is a Bad node: just skip it. The in array of b shrinks
236 * - The predecessor p is empty. Remove p. All predecessors of p are now
238 * - The predecessor p is a block containing useful code. Just keep p as is.
240 * For Phi nodes f we have to check the conditions at the Block of f.
241 * For cases 1 and 3 we proceed as for Blocks. For case 2 we can have two
243 * -2a: The old predecessor of the Phi f is a Phi pred_f IN THE BLOCK REMOVED.
244 * In this case we proceed as for blocks. We remove pred_f. All
245 * predecessors of pred_f now are predecessors of f.
246 * -2b: The old predecessor of f is NOT in the block removed. It might be a Phi
247 * too. We have to replicate f for each predecessor of the removed block.
248 * Or, with other words, the removed predecessor block has exactly one
251 * Further there is a special case for self referencing blocks:
254 * then_b else_b then_b else_b
260 * | | | === optimized to ===> \ | | |
267 * If there is a Phi in pred_b, but we remove pred_b, we have to generate a
268 * Phi in loop_b, that has the ins of the Phi in pred_b and a self referencing
271 static void optimize_blocks(ir_node *b, void *ctx)
273 int i, j, k, n, max_preds, n_preds, p_preds = -1;
274 ir_node *pred, *phi, *next;
276 merge_env *env = (merge_env*)ctx;
278 /* Count the number of predecessor if this block is merged with pred blocks
281 for (i = 0, k = get_Block_n_cfgpreds(b); i < k; ++i) {
282 max_preds += test_whether_dispensable(b, i);
284 in = XMALLOCN(ir_node*, max_preds);
286 /*- Fix the Phi nodes of the current block -*/
287 for (phi = (ir_node*)get_irn_link(b); phi != NULL; phi = (ir_node*)next) {
289 next = (ir_node*)get_irn_link(phi);
291 /* Find the new predecessors for the Phi */
293 for (i = 0, n = get_Block_n_cfgpreds(b); i < n; ++i) {
294 ir_graph *irg = get_irn_irg(b);
295 pred = get_Block_cfgpred_block(b, i);
298 /* case Phi 1: maintain Bads, as somebody else is responsible to remove them */
299 in[p_preds++] = new_r_Bad(irg, get_irn_mode(phi));
300 } else if (is_Block_removable(pred) && !Block_block_visited(pred)) {
301 /* case Phi 2: It's an empty block and not yet visited. */
302 ir_node *phi_pred = get_Phi_pred(phi, i);
304 for (j = 0, k = get_Block_n_cfgpreds(pred); j < k; j++) {
305 ir_node *pred_pred = get_Block_cfgpred(pred, j);
307 if (is_Bad(pred_pred)) {
308 in[p_preds++] = new_r_Bad(irg, get_irn_mode(phi));
312 if (get_nodes_block(phi_pred) == pred) {
314 assert(is_Phi(phi_pred)); /* Block is empty!! */
316 in[p_preds++] = get_Phi_pred(phi_pred, j);
319 in[p_preds++] = phi_pred;
324 in[p_preds++] = get_Phi_pred(phi, i);
327 assert(p_preds == max_preds);
331 exchange(phi, in[0]);
333 set_irn_in(phi, p_preds, in);
337 /*- This happens only if merge between loop backedge and single loop entry.
338 Moreover, it is only needed if predb is the direct dominator of b,
339 else there can be no uses of the Phi's in predb ... -*/
340 for (k = 0, n = get_Block_n_cfgpreds(b); k < n; ++k) {
341 ir_node *pred = get_Block_cfgpred(b, k);
342 ir_node *predb = get_nodes_block(pred);
346 if (is_Block_removable(predb) && !Block_block_visited(predb)) {
349 /* we found a predecessor block at position k that will be removed */
350 for (phi = (ir_node*)get_irn_link(predb); phi; phi = next_phi) {
352 next_phi = (ir_node*)get_irn_link(phi);
356 if (get_Block_idom(b) != predb) {
357 /* predb is not the dominator. There can't be uses of pred's Phi nodes, kill them .*/
358 ir_graph *irg = get_irn_irg(b);
359 ir_mode *mode = get_irn_mode(phi);
360 exchange(phi, new_r_Bad(irg, mode));
362 /* predb is the direct dominator of b. There might be uses of the Phi nodes from
363 predb in further block, so move this phi from the predecessor into the block b */
364 set_nodes_block(phi, b);
365 set_irn_link(phi, get_irn_link(b));
366 set_irn_link(b, phi);
367 env->phis_moved = true;
369 /* first, copy all 0..k-1 predecessors */
370 for (i = 0; i < k; i++) {
371 pred = get_Block_cfgpred_block(b, i);
374 in[q_preds++] = pred;
375 } else if (is_Block_removable(pred) && !Block_block_visited(pred)) {
376 /* It's an empty block and not yet visited. */
377 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
378 if (! is_Bad(get_Block_cfgpred(pred, j)))
386 /* now we are at k, copy the phi predecessors */
387 pred = get_nodes_block(get_Block_cfgpred(b, k));
388 for (i = 0; i < get_Phi_n_preds(phi); i++) {
389 in[q_preds++] = get_Phi_pred(phi, i);
392 /* and now all the rest */
393 for (i = k+1; i < get_Block_n_cfgpreds(b); i++) {
394 pred = get_Block_cfgpred_block(b, i);
397 in[q_preds++] = pred;
398 } else if (is_Block_removable(pred) && !Block_block_visited(pred)) {
399 /* It's an empty block and not yet visited. */
400 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
401 if (! is_Bad(get_Block_cfgpred(pred, j)))
411 exchange(phi, in[0]);
413 set_irn_in(phi, q_preds, in);
416 assert(q_preds <= max_preds);
417 // assert(p_preds == q_preds && "Wrong Phi Fix");
423 /*- Fix the block -*/
425 for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
426 ir_node *pred = get_Block_cfgpred(b, i);
427 ir_node *predb = get_nodes_block(pred);
428 ir_graph *irg = get_irn_irg(pred);
430 /* case 1: Bad predecessor */
432 in[n_preds++] = new_r_Bad(irg, mode_X);
435 if (is_Block_removable(predb) && !Block_block_visited(predb)) {
436 /* case 2: It's an empty block and not yet visited. */
437 for (j = 0; j < get_Block_n_cfgpreds(predb); j++) {
438 ir_node *predpred = get_Block_cfgpred(predb, j);
440 if (is_Bad(predpred)) {
441 in[n_preds++] = new_r_Bad(irg, mode_X);
445 in[n_preds++] = predpred;
447 /* Remove block+jump as it might be kept alive. */
448 exchange(pred, new_r_Bad(get_irn_irg(b), mode_X));
449 exchange(predb, new_r_Bad(get_irn_irg(b), mode_BB));
452 in[n_preds++] = pred;
455 assert(n_preds == max_preds);
457 set_irn_in(b, n_preds, in);
460 /* see if phi-fix was correct */
461 assert(get_irn_link(b) == NULL || p_preds == -1 || (n_preds == p_preds));
466 * Block walker: optimize all blocks using the default optimizations.
467 * This removes Blocks with only a Jmp predecessor.
469 static void remove_simple_blocks(ir_node *block, void *ctx)
471 merge_env *env = (merge_env*)ctx;
472 ir_node *new_blk = equivalent_node(block);
474 if (new_blk != block) {
475 exchange(block, new_blk);
481 * Optimize table-switch Conds.
483 * @param cond the switch-Cond
484 * @return true if the switch-Cond was optimized
486 static bool handle_switch_cond(ir_node *cond)
488 ir_node *sel = get_Cond_selector(cond);
489 ir_node *proj1 = (ir_node*)get_irn_link(cond);
490 ir_node *proj2 = (ir_node*)get_irn_link(proj1);
491 ir_node *blk = get_nodes_block(cond);
493 /* exactly 1 Proj on the Cond node: must be the defaultProj */
495 ir_node *jmp = new_r_Jmp(blk);
496 assert(get_Cond_default_proj(cond) == get_Proj_proj(proj1));
497 /* convert it into a Jmp */
498 exchange(proj1, jmp);
502 /* handle Cond nodes with constant argument. In this case the localopt rules
503 * should have killed all obviously impossible cases.
504 * So the only case left to handle here is 1 defaultProj + 1case
505 * (this one case should be the one taken) */
506 if (get_irn_link(proj2) == NULL) {
507 ir_tarval *tv = value_of(sel);
509 if (tv != tarval_bad) {
510 /* we have a constant switch */
511 long num = get_tarval_long(tv);
512 long def_num = get_Cond_default_proj(cond);
513 ir_graph *irg = get_irn_irg(cond);
514 ir_node *bad = new_r_Bad(irg, mode_X);
516 if (def_num == get_Proj_proj(proj1)) {
517 /* first one is the defProj */
518 if (num == get_Proj_proj(proj2)) {
519 ir_node *jmp = new_r_Jmp(blk);
520 exchange(proj2, jmp);
521 exchange(proj1, bad);
524 } else if (def_num == get_Proj_proj(proj2)) {
525 /* second one is the defProj */
526 if (num == get_Proj_proj(proj1)) {
527 ir_node *jmp = new_r_Jmp(blk);
528 exchange(proj1, jmp);
529 exchange(proj2, bad);
533 /* neither: strange, Cond was not optimized so far */
534 if (num == get_Proj_proj(proj1)) {
535 ir_node *jmp = new_r_Jmp(blk);
536 exchange(proj1, jmp);
537 exchange(proj2, bad);
539 } else if (num == get_Proj_proj(proj2)) {
540 ir_node *jmp = new_r_Jmp(blk);
541 exchange(proj2, jmp);
542 exchange(proj1, bad);
551 /* Optimizations of the control flow that also require changes of Phi nodes.
553 * This optimization performs two passes over the graph.
555 * The first pass collects all Phi nodes in a link list in the block
556 * nodes. Further it performs simple control flow optimizations.
557 * Finally it marks all blocks that do not contain useful
558 * computations, i.e., these blocks might be removed.
560 * The second pass performs the optimizations intended by this algorithm.
561 * It walks only over block nodes and adapts these and the Phi nodes in these
562 * blocks, which it finds in a linked list computed by the first pass.
564 * We use the mark flag to mark removable blocks in the first phase.
566 void optimize_cf(ir_graph *irg)
570 ir_node *end = get_irg_end(irg);
574 assert(get_irg_phase_state(irg) != phase_building);
576 /* if the graph is not pinned, we cannot determine empty blocks */
577 assert(get_irg_pinned(irg) != op_pin_state_floats &&
578 "Control flow optimization need a pinned graph");
580 /* FIXME: control flow opt destroys block edges. So edges are deactivated
581 * here. Fix the edges! */
582 edges_deactivate(irg);
584 /* we use the mark flag to mark removable blocks */
585 ir_reserve_resources(irg, IR_RESOURCE_BLOCK_MARK | IR_RESOURCE_IRN_LINK);
588 env.phis_moved = false;
592 env.switch_conds = NEW_ARR_F(ir_node*, 0);
593 irg_walk(end, clear_link, collect_nodes, &env);
595 /* handle all collected switch-Conds */
596 n = ARR_LEN(env.switch_conds);
597 for (i = 0; i < n; ++i) {
598 ir_node *cond = env.switch_conds[i];
599 env.changed |= handle_switch_cond(cond);
601 DEL_ARR_F(env.switch_conds);
604 /* Handle graph state if was changed. */
605 set_irg_doms_inconsistent(irg);
606 set_irg_extblk_inconsistent(irg);
607 set_irg_loopinfo_inconsistent(irg);
608 set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
610 /* The Cond optimization might generate unreachable code, so restart if
615 /* Optimize the standard code. */
617 irg_block_walk_graph(irg, optimize_blocks, remove_simple_blocks, &env);
619 new_end = optimize_in_place(end);
620 if (new_end != end) {
621 set_irg_end(irg, new_end);
624 remove_End_Bads_and_doublets(end);
626 ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK | IR_RESOURCE_IRN_LINK);
628 if (env.phis_moved) {
629 /* Bad: when we moved Phi's, we might produce dead Phi nodes
631 Some other phases cannot copy with this, so will them.
633 n = get_End_n_keepalives(end);
635 NEW_ARR_A(ir_node *, in, n);
636 assure_irg_outs(irg);
638 for (i = j = 0; i < n; ++i) {
639 ir_node *ka = get_End_keepalive(end, i);
644 for (k = get_irn_n_outs(ka) - 1; k >= 0; --k) {
645 ir_node *user = get_irn_out(ka, k);
647 if (user != ka && user != end) {
648 /* Is it a real user or just a self loop ? */
658 set_End_keepalives(end, j, in);
665 /* Handle graph state if was changed. */
666 set_irg_doms_inconsistent(irg);
667 set_irg_extblk_inconsistent(irg);
668 set_irg_loopinfo_inconsistent(irg);
669 set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
673 /* Creates an ir_graph pass for optimize_cf. */
674 ir_graph_pass_t *optimize_cf_pass(const char *name)
676 return def_graph_pass(name ? name : "optimize_cf", optimize_cf);