2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Control flow optimizations.
23 * @author Goetz Lindenmaier, Michael Beck, Sebastian Hack
26 * Removes Bad control flow predecessors and empty blocks. A block is empty
27 * if it contains only a Jmp node. Blocks can only be removed if they are not
28 * needed for the semantics of Phi nodes. Further, we NEVER remove labeled
29 * blocks (even if we could move the label).
33 #include "iroptimize.h"
40 #include "irgraph_t.h"
54 #include "irbackedge_t.h"
60 #include "iropt_dbg.h"
62 /** An environment for merge_blocks and collect nodes. */
63 typedef struct merge_env {
64 bool changed; /**< Set if the graph was changed. */
65 bool phis_moved; /**< Set if Phi nodes were moved. */
66 ir_node **switch_conds; /**< Helper list for all found Switch Conds. */
69 static void set_Block_removable(ir_node *block, bool removable)
71 set_Block_mark(block, removable);
74 static bool is_Block_removable(ir_node *block)
76 return get_Block_mark(block);
79 static void clear_link(ir_node *node, void *ctx)
82 set_irn_link(node, NULL);
84 set_Block_removable(node, true);
88 * Collects all Phi nodes in link list of Block.
89 * Marks all blocks "non_removable" if they contain a node other
90 * than Jmp (and Proj).
91 * Links all Proj nodes to their predecessors.
92 * Collects all switch-Conds in a list.
94 static void collect_nodes(ir_node *n, void *ctx)
96 merge_env *env = (merge_env*)ctx;
99 /* Collect Phi nodes to compact ins along with block's ins. */
100 ir_node *block = get_nodes_block(n);
101 set_irn_link(n, get_irn_link(block));
102 set_irn_link(block, n);
103 } else if (is_Block(n)) {
105 } else if (!is_Jmp(n)) { /* Check for non-empty block. */
106 ir_node *block = get_nodes_block(n);
107 set_Block_removable(block, false);
110 /* link Proj nodes */
111 ir_node *pred = get_Proj_pred(n);
112 set_irn_link(n, get_irn_link(pred));
113 set_irn_link(pred, n);
114 } else if (is_Cond(n)) {
115 ir_node *sel = get_Cond_selector(n);
116 if (get_irn_mode(sel) != mode_b) {
117 /* found a switch-Cond, collect */
118 ARR_APP1(ir_node*, env->switch_conds, n);
124 /** Returns true if pred is predecessor of block. */
125 static bool is_pred_of(ir_node *pred, ir_node *b)
129 for (i = get_Block_n_cfgpreds(b) - 1; i >= 0; --i) {
130 ir_node *b_pred = get_Block_cfgpred_block(b, i);
137 /** Test whether we can optimize away pred block pos of b.
139 * @param b A block node.
140 * @param pos The position of the predecessor block to judge about.
142 * @returns The number of predecessors
144 * The test is rather tricky.
146 * The situation is something like the following:
155 * b merges the control flow of an if-then-else. We may not remove
156 * the 'then' _and_ the 'else' block of an 'if' if there is a Phi
157 * node in b, even if both are empty. The destruction of this Phi
158 * requires that a copy is added before the merge. We have to
159 * keep one of the case blocks to place the copies in.
161 * To perform the test for pos, we must regard predecessors before pos
162 * as already removed.
164 static unsigned test_whether_dispensable(ir_node *b, int pos)
166 int i, j, n_preds = 1;
167 ir_node *pred = get_Block_cfgpred_block(b, pos);
169 /* Bad blocks will be optimized away, so we don't need space for them */
173 if (is_Block_removable(pred)) {
174 /* Seems to be empty. At least we detected this in collect_nodes. */
175 if (get_irn_link(b) == NULL) {
176 /* There are no Phi nodes ==> all predecessors are dispensable. */
177 n_preds = get_Block_n_cfgpreds(pred);
179 /* b's pred blocks and pred's pred blocks must be pairwise disjunct.
180 Handle all pred blocks with preds < pos as if they were already removed. */
181 for (i = 0; i < pos; i++) {
182 ir_node *b_pred = get_Block_cfgpred_block(b, i);
183 if (! is_Bad(b_pred) && is_Block_removable(b_pred)) {
184 for (j = get_Block_n_cfgpreds(b_pred) - 1; j >= 0; --j) {
185 ir_node *b_pred_pred = get_Block_cfgpred_block(b_pred, j);
186 if (is_pred_of(b_pred_pred, pred))
187 goto non_dispensable;
190 if (is_pred_of(b_pred, pred))
191 goto non_dispensable;
194 for (i = pos +1; i < get_Block_n_cfgpreds(b); i++) {
195 ir_node *b_pred = get_Block_cfgpred_block(b, i);
196 if (is_pred_of(b_pred, pred))
197 goto non_dispensable;
199 /* if we get here, the block is dispensable */
200 n_preds = get_Block_n_cfgpreds(pred);
207 set_Block_removable(pred, false);
212 * This method removed Bad cf predecessors from Blocks and Phis, and removes
213 * empty blocks. A block is empty if it only contains Phi and Jmp nodes.
215 * We first adapt Phi nodes, then Block nodes, as we need the old ins
216 * of the Block to adapt the Phi nodes. We do this by computing new
217 * in arrays, and then replacing the old ones. So far we compute new in arrays
218 * for all nodes, not regarding whether there is a possibility for optimization.
220 * For each predecessor p of a Block b there are three cases:
221 * - The predecessor p is a Bad node: just skip it. The in array of b shrinks
223 * - The predecessor p is empty. Remove p. All predecessors of p are now
225 * - The predecessor p is a block containing useful code. Just keep p as is.
227 * For Phi nodes f we have to check the conditions at the Block of f.
228 * For cases 1 and 3 we proceed as for Blocks. For case 2 we can have two
230 * -2a: The old predecessor of the Phi f is a Phi pred_f IN THE BLOCK REMOVED.
231 * In this case we proceed as for blocks. We remove pred_f. All
232 * predecessors of pred_f now are predecessors of f.
233 * -2b: The old predecessor of f is NOT in the block removed. It might be a Phi
234 * too. We have to replicate f for each predecessor of the removed block.
235 * Or, with other words, the removed predecessor block has exactly one
238 * Further there is a special case for self referencing blocks:
241 * then_b else_b then_b else_b
247 * | | | === optimized to ===> \ | | |
254 * If there is a Phi in pred_b, but we remove pred_b, we have to generate a
255 * Phi in loop_b, that has the ins of the Phi in pred_b and a self referencing
258 static void optimize_blocks(ir_node *b, void *ctx)
260 int i, j, k, n, max_preds, n_preds, p_preds = -1;
261 ir_node *pred, *phi, *next;
263 merge_env *env = (merge_env*)ctx;
265 /* Count the number of predecessor if this block is merged with pred blocks
268 for (i = 0, k = get_Block_n_cfgpreds(b); i < k; ++i) {
269 max_preds += test_whether_dispensable(b, i);
271 in = XMALLOCN(ir_node*, max_preds);
273 /*- Fix the Phi nodes of the current block -*/
274 for (phi = (ir_node*)get_irn_link(b); phi != NULL; phi = (ir_node*)next) {
276 next = (ir_node*)get_irn_link(phi);
278 /* Find the new predecessors for the Phi */
280 for (i = 0, n = get_Block_n_cfgpreds(b); i < n; ++i) {
281 pred = get_Block_cfgpred_block(b, i);
284 /* case Phi 1: Do nothing */
285 } else if (is_Block_removable(pred) && !Block_block_visited(pred)) {
286 /* case Phi 2: It's an empty block and not yet visited. */
287 ir_node *phi_pred = get_Phi_pred(phi, i);
289 for (j = 0, k = get_Block_n_cfgpreds(pred); j < k; j++) {
290 /* because of breaking loops, not all predecessors are
291 * Bad-clean, so we must check this here again */
292 if (! is_Bad(get_Block_cfgpred(pred, j))) {
293 if (get_nodes_block(phi_pred) == pred) {
295 assert(is_Phi(phi_pred)); /* Block is empty!! */
297 in[p_preds++] = get_Phi_pred(phi_pred, j);
300 in[p_preds++] = phi_pred;
306 in[p_preds++] = get_Phi_pred(phi, i);
309 assert(p_preds <= max_preds);
313 /* By removal of Bad ins the Phi might be degenerated. */
314 exchange(phi, in[0]);
316 set_irn_in(phi, p_preds, in);
320 /*- This happens only if merge between loop backedge and single loop entry.
321 Moreover, it is only needed if predb is the direct dominator of b,
322 else there can be no uses of the Phi's in predb ... -*/
323 for (k = 0, n = get_Block_n_cfgpreds(b); k < n; ++k) {
324 ir_node *predb = get_nodes_block(get_Block_cfgpred(b, k));
329 if (is_Block_removable(predb) && !Block_block_visited(predb)) {
332 /* we found a predecessor block at position k that will be removed */
333 for (phi = (ir_node*)get_irn_link(predb); phi; phi = next_phi) {
335 next_phi = (ir_node*)get_irn_link(phi);
339 if (get_Block_idom(b) != predb) {
340 /* predb is not the dominator. There can't be uses of pred's Phi nodes, kill them .*/
341 ir_graph *irg = get_irn_irg(b);
342 exchange(phi, get_irg_bad(irg));
344 /* predb is the direct dominator of b. There might be uses of the Phi nodes from
345 predb in further block, so move this phi from the predecessor into the block b */
346 set_nodes_block(phi, b);
347 set_irn_link(phi, get_irn_link(b));
348 set_irn_link(b, phi);
349 env->phis_moved = true;
351 /* first, copy all 0..k-1 predecessors */
352 for (i = 0; i < k; i++) {
353 pred = get_Block_cfgpred_block(b, i);
357 } else if (is_Block_removable(pred) && !Block_block_visited(pred)) {
358 /* It's an empty block and not yet visited. */
359 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
360 if (! is_Bad(get_Block_cfgpred(pred, j)))
368 /* now we are at k, copy the phi predecessors */
369 pred = get_nodes_block(get_Block_cfgpred(b, k));
370 for (i = 0; i < get_Phi_n_preds(phi); i++) {
371 if (! is_Bad(get_Block_cfgpred(pred, i)))
372 in[q_preds++] = get_Phi_pred(phi, i);
375 /* and now all the rest */
376 for (i = k+1; i < get_Block_n_cfgpreds(b); i++) {
377 pred = get_Block_cfgpred_block(b, i);
381 } else if (is_Block_removable(pred) && !Block_block_visited(pred)) {
382 /* It's an empty block and not yet visited. */
383 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
384 if (! is_Bad(get_Block_cfgpred(pred, j)))
394 exchange(phi, in[0]);
396 set_irn_in(phi, q_preds, in);
399 assert(q_preds <= max_preds);
400 // assert(p_preds == q_preds && "Wrong Phi Fix");
406 /*- Fix the block -*/
408 for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
409 pred = get_Block_cfgpred_block(b, i);
412 /* case 1: Do nothing */
413 } else if (is_Block_removable(pred) && !Block_block_visited(pred)) {
414 /* case 2: It's an empty block and not yet visited. */
415 assert(get_Block_n_cfgpreds(b) > 1 || has_Block_entity(b));
416 /* Else it should be optimized by equivalent_node. */
417 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
418 ir_node *pred_X = get_Block_cfgpred(pred, j);
420 /* because of breaking loops, not all predecessors are Bad-clean,
421 * so we must check this here again */
422 if (! is_Bad(pred_X))
423 in[n_preds++] = pred_X;
425 /* Remove block as it might be kept alive. */
426 exchange(pred, b/*get_irg_bad(irg)*/);
429 in[n_preds++] = get_Block_cfgpred(b, i);
432 assert(n_preds <= max_preds);
434 set_irn_in(b, n_preds, in);
437 assert(get_irn_link(b) == NULL || p_preds == -1 || (n_preds == p_preds && "Wrong Phi Fix"));
442 * Block walker: optimize all blocks using the default optimizations.
443 * This removes Blocks with only a Jmp predecessor.
445 static void remove_simple_blocks(ir_node *block, void *ctx)
447 merge_env *env = (merge_env*)ctx;
448 ir_node *new_blk = equivalent_node(block);
450 if (new_blk != block) {
451 exchange(block, new_blk);
457 * Optimize table-switch Conds.
459 * @param cond the switch-Cond
460 * @return true if the switch-Cond was optimized
462 static bool handle_switch_cond(ir_node *cond)
464 ir_node *sel = get_Cond_selector(cond);
465 ir_node *proj1 = (ir_node*)get_irn_link(cond);
466 ir_node *proj2 = (ir_node*)get_irn_link(proj1);
467 ir_node *blk = get_nodes_block(cond);
469 /* exactly 1 Proj on the Cond node: must be the defaultProj */
471 ir_node *jmp = new_r_Jmp(blk);
472 assert(get_Cond_default_proj(cond) == get_Proj_proj(proj1));
473 /* convert it into a Jmp */
474 exchange(proj1, jmp);
478 /* handle Cond nodes with constant argument. In this case the localopt rules
479 * should have killed all obviously impossible cases.
480 * So the only case left to handle here is 1 defaultProj + 1case
481 * (this one case should be the one taken) */
482 if (get_irn_link(proj2) == NULL) {
483 ir_tarval *tv = value_of(sel);
485 if (tv != tarval_bad) {
486 /* we have a constant switch */
487 long num = get_tarval_long(tv);
488 long def_num = get_Cond_default_proj(cond);
489 ir_graph *irg = get_irn_irg(cond);
490 ir_node *bad = get_irg_bad(irg);
492 if (def_num == get_Proj_proj(proj1)) {
493 /* first one is the defProj */
494 if (num == get_Proj_proj(proj2)) {
495 ir_node *jmp = new_r_Jmp(blk);
496 exchange(proj2, jmp);
497 exchange(proj1, bad);
500 } else if (def_num == get_Proj_proj(proj2)) {
501 /* second one is the defProj */
502 if (num == get_Proj_proj(proj1)) {
503 ir_node *jmp = new_r_Jmp(blk);
504 exchange(proj1, jmp);
505 exchange(proj2, bad);
509 /* neither: strange, Cond was not optimized so far */
510 if (num == get_Proj_proj(proj1)) {
511 ir_node *jmp = new_r_Jmp(blk);
512 exchange(proj1, jmp);
513 exchange(proj2, bad);
515 } else if (num == get_Proj_proj(proj2)) {
516 ir_node *jmp = new_r_Jmp(blk);
517 exchange(proj2, jmp);
518 exchange(proj1, bad);
527 /* Optimizations of the control flow that also require changes of Phi nodes.
529 * This optimization performs two passes over the graph.
531 * The first pass collects all Phi nodes in a link list in the block
532 * nodes. Further it performs simple control flow optimizations.
533 * Finally it marks all blocks that do not contain useful
534 * computations, i.e., these blocks might be removed.
536 * The second pass performs the optimizations intended by this algorithm.
537 * It walks only over block nodes and adapts these and the Phi nodes in these
538 * blocks, which it finds in a linked list computed by the first pass.
540 * We use the mark flag to mark removable blocks in the first phase.
542 void optimize_cf(ir_graph *irg)
546 ir_node *end = get_irg_end(irg);
550 assert(get_irg_phase_state(irg) != phase_building);
552 /* if the graph is not pinned, we cannot determine empty blocks */
553 assert(get_irg_pinned(irg) != op_pin_state_floats &&
554 "Control flow optimization need a pinned graph");
556 /* FIXME: control flow opt destroys block edges. So edges are deactivated
557 * here. Fix the edges! */
558 edges_deactivate(irg);
560 /* we use the mark flag to mark removable blocks */
561 ir_reserve_resources(irg, IR_RESOURCE_BLOCK_MARK | IR_RESOURCE_IRN_LINK);
564 env.phis_moved = false;
568 env.switch_conds = NEW_ARR_F(ir_node*, 0);
569 irg_walk(end, clear_link, collect_nodes, &env);
571 /* handle all collected switch-Conds */
572 n = ARR_LEN(env.switch_conds);
573 for (i = 0; i < n; ++i) {
574 ir_node *cond = env.switch_conds[i];
575 env.changed |= handle_switch_cond(cond);
577 DEL_ARR_F(env.switch_conds);
580 /* Handle graph state if was changed. */
581 set_irg_outs_inconsistent(irg);
582 set_irg_doms_inconsistent(irg);
583 set_irg_extblk_inconsistent(irg);
584 set_irg_loopinfo_inconsistent(irg);
585 set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
587 /* The Cond optimization might generate unreachable code, so restart if
592 /* Optimize the standard code. */
594 irg_block_walk_graph(irg, optimize_blocks, remove_simple_blocks, &env);
596 new_end = optimize_in_place(end);
597 if (new_end != end) {
598 set_irg_end(irg, new_end);
601 remove_End_Bads_and_doublets(end);
603 ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK | IR_RESOURCE_IRN_LINK);
605 if (env.phis_moved) {
606 /* Bad: when we moved Phi's, we might produce dead Phi nodes
608 Some other phases cannot copy with this, so will them.
610 n = get_End_n_keepalives(end);
612 NEW_ARR_A(ir_node *, in, n);
614 /* Handle graph state if was changed. */
615 set_irg_outs_inconsistent(irg);
617 assure_irg_outs(irg);
619 for (i = j = 0; i < n; ++i) {
620 ir_node *ka = get_End_keepalive(end, i);
625 for (k = get_irn_n_outs(ka) - 1; k >= 0; --k) {
626 ir_node *user = get_irn_out(ka, k);
628 if (user != ka && user != end) {
629 /* Is it a real user or just a self loop ? */
639 set_End_keepalives(end, j, in);
646 /* Handle graph state if was changed. */
647 set_irg_outs_inconsistent(irg);
648 set_irg_doms_inconsistent(irg);
649 set_irg_extblk_inconsistent(irg);
650 set_irg_loopinfo_inconsistent(irg);
651 set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
654 /* the verifier doesn't work yet with floating nodes */
655 if (get_irg_pinned(irg) == op_pin_state_pinned) {
656 /* after optimize_cf(), only Bad data flow may remain. */
657 if (irg_verify_bads(irg, BAD_DF | BAD_BLOCK | TUPLE)) {
658 dump_ir_graph(irg, "-verify-cf");
659 fprintf(stderr, "VERIFY_BAD in optimize_cf()\n");
664 /* Creates an ir_graph pass for optimize_cf. */
665 ir_graph_pass_t *optimize_cf_pass(const char *name)
667 return def_graph_pass(name ? name : "optimize_cf", optimize_cf);