2 * Copyright (C) 1995-2008 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Control flow optimizations.
23 * @author Goetz Lindenmaier, Michael Beck, Sebastian Hack
26 * Removes Bad control flow predecessors and empty blocks. A block is empty
27 * if it contains only a Jmp node. Blocks can only be removed if they are not
28 * needed for the semantics of Phi nodes. Further, we NEVER remove labeled
29 * blocks (even if we could move the label).
33 #include "iroptimize.h"
40 #include "irgraph_t.h"
54 #include "irbackedge_t.h"
60 #include "iropt_dbg.h"
62 /** An environment for merge_blocks and collect nodes. */
63 typedef struct merge_env {
64 bool changed; /**< Set if the graph was changed. */
65 bool phis_moved; /**< Set if Phi nodes were moved. */
66 ir_node **switch_conds; /**< Helper list for all found Switch Conds. */
69 static void set_Block_removable(ir_node *block, bool removable)
71 set_Block_mark(block, removable);
74 static bool is_Block_removable(ir_node *block)
76 return get_Block_mark(block);
79 static void clear_link(ir_node *node, void *ctx)
82 set_irn_link(node, NULL);
86 * Collects all Phi nodes in link list of Block.
87 * Marks all blocks "non_removable" if they contain a node other
88 * than Jmp (and Proj).
89 * Links all Proj nodes to their predecessors.
90 * Collects all switch-Conds in a list.
92 static void collect_nodes(ir_node *n, void *ctx)
94 merge_env *env = (merge_env*)ctx;
97 /* Collect Phi nodes to compact ins along with block's ins. */
98 ir_node *block = get_nodes_block(n);
99 set_irn_link(n, get_irn_link(block));
100 set_irn_link(block, n);
101 } else if (is_Block(n)) {
103 } else if (!is_Jmp(n)) { /* Check for non-empty block. */
104 ir_node *block = get_nodes_block(n);
105 set_Block_removable(block, false);
108 /* link Proj nodes */
109 ir_node *pred = get_Proj_pred(n);
110 set_irn_link(n, get_irn_link(pred));
111 set_irn_link(pred, n);
112 } else if (is_Cond(n)) {
113 ir_node *sel = get_Cond_selector(n);
114 if (get_irn_mode(sel) != mode_b) {
115 /* found a switch-Cond, collect */
116 ARR_APP1(ir_node*, env->switch_conds, n);
122 /** Returns true if pred is predecessor of block. */
123 static bool is_pred_of(ir_node *pred, ir_node *b)
127 for (i = get_Block_n_cfgpreds(b) - 1; i >= 0; --i) {
128 ir_node *b_pred = get_Block_cfgpred_block(b, i);
135 /** Test whether we can optimize away pred block pos of b.
137 * @param b A block node.
138 * @param pos The position of the predecessor block to judge about.
140 * @returns The number of predecessors
142 * The test is rather tricky.
144 * The situation is something like the following:
153 * b merges the control flow of an if-then-else. We may not remove
154 * the 'then' _and_ the 'else' block of an 'if' if there is a Phi
155 * node in b, even if both are empty. The destruction of this Phi
156 * requires that a copy is added before the merge. We have to
157 * keep one of the case blocks to place the copies in.
159 * To perform the test for pos, we must regard predecessors before pos
160 * as already removed.
162 static unsigned test_whether_dispensable(ir_node *b, int pos)
164 int i, j, n_preds = 1;
165 ir_node *pred = get_Block_cfgpred_block(b, pos);
167 /* Bad blocks will be optimized away, so we don't need space for them */
171 if (is_Block_removable(pred)) {
172 /* Seems to be empty. At least we detected this in collect_nodes. */
173 if (get_irn_link(b) == NULL) {
174 /* There are no Phi nodes ==> all predecessors are dispensable. */
175 n_preds = get_Block_n_cfgpreds(pred);
177 /* b's pred blocks and pred's pred blocks must be pairwise disjunct.
178 Handle all pred blocks with preds < pos as if they were already removed. */
179 for (i = 0; i < pos; i++) {
180 ir_node *b_pred = get_Block_cfgpred_block(b, i);
181 if (! is_Bad(b_pred) && is_Block_removable(b_pred)) {
182 for (j = get_Block_n_cfgpreds(b_pred) - 1; j >= 0; --j) {
183 ir_node *b_pred_pred = get_Block_cfgpred_block(b_pred, j);
184 if (is_pred_of(b_pred_pred, pred))
185 goto non_dispensable;
188 if (is_pred_of(b_pred, pred))
189 goto non_dispensable;
192 for (i = pos +1; i < get_Block_n_cfgpreds(b); i++) {
193 ir_node *b_pred = get_Block_cfgpred_block(b, i);
194 if (is_pred_of(b_pred, pred))
195 goto non_dispensable;
197 /* if we get here, the block is dispensable */
198 n_preds = get_Block_n_cfgpreds(pred);
205 set_Block_removable(pred, false);
210 * This method removed Bad cf predecessors from Blocks and Phis, and removes
211 * empty blocks. A block is empty if it only contains Phi and Jmp nodes.
213 * We first adapt Phi nodes, then Block nodes, as we need the old ins
214 * of the Block to adapt the Phi nodes. We do this by computing new
215 * in arrays, and then replacing the old ones. So far we compute new in arrays
216 * for all nodes, not regarding whether there is a possibility for optimization.
218 * For each predecessor p of a Block b there are three cases:
219 * - The predecessor p is a Bad node: just skip it. The in array of b shrinks
221 * - The predecessor p is empty. Remove p. All predecessors of p are now
223 * - The predecessor p is a block containing useful code. Just keep p as is.
225 * For Phi nodes f we have to check the conditions at the Block of f.
226 * For cases 1 and 3 we proceed as for Blocks. For case 2 we can have two
228 * -2a: The old predecessor of the Phi f is a Phi pred_f IN THE BLOCK REMOVED.
229 * In this case we proceed as for blocks. We remove pred_f. All
230 * predecessors of pred_f now are predecessors of f.
231 * -2b: The old predecessor of f is NOT in the block removed. It might be a Phi
232 * too. We have to replicate f for each predecessor of the removed block.
233 * Or, with other words, the removed predecessor block has exactly one
236 * Further there is a special case for self referencing blocks:
239 * then_b else_b then_b else_b
245 * | | | === optimized to ===> \ | | |
252 * If there is a Phi in pred_b, but we remove pred_b, we have to generate a
253 * Phi in loop_b, that has the ins of the Phi in pred_b and a self referencing
256 static void optimize_blocks(ir_node *b, void *ctx)
258 int i, j, k, n, max_preds, n_preds, p_preds = -1;
259 ir_node *pred, *phi, *next;
261 merge_env *env = (merge_env*)ctx;
263 /* Count the number of predecessor if this block is merged with pred blocks
266 for (i = 0, k = get_Block_n_cfgpreds(b); i < k; ++i) {
267 max_preds += test_whether_dispensable(b, i);
269 in = XMALLOCN(ir_node*, max_preds);
271 /*- Fix the Phi nodes of the current block -*/
272 for (phi = (ir_node*)get_irn_link(b); phi != NULL; phi = (ir_node*)next) {
274 next = (ir_node*)get_irn_link(phi);
276 /* Find the new predecessors for the Phi */
278 for (i = 0, n = get_Block_n_cfgpreds(b); i < n; ++i) {
279 pred = get_Block_cfgpred_block(b, i);
282 /* case Phi 1: Do nothing */
283 } else if (is_Block_removable(pred) && !Block_block_visited(pred)) {
284 /* case Phi 2: It's an empty block and not yet visited. */
285 ir_node *phi_pred = get_Phi_pred(phi, i);
287 for (j = 0, k = get_Block_n_cfgpreds(pred); j < k; j++) {
288 /* because of breaking loops, not all predecessors are
289 * Bad-clean, so we must check this here again */
290 if (! is_Bad(get_Block_cfgpred(pred, j))) {
291 if (get_nodes_block(phi_pred) == pred) {
293 assert(is_Phi(phi_pred)); /* Block is empty!! */
295 in[p_preds++] = get_Phi_pred(phi_pred, j);
298 in[p_preds++] = phi_pred;
304 in[p_preds++] = get_Phi_pred(phi, i);
307 assert(p_preds <= max_preds);
311 /* By removal of Bad ins the Phi might be degenerated. */
312 exchange(phi, in[0]);
314 set_irn_in(phi, p_preds, in);
318 /*- This happens only if merge between loop backedge and single loop entry.
319 Moreover, it is only needed if predb is the direct dominator of b,
320 else there can be no uses of the Phi's in predb ... -*/
321 for (k = 0, n = get_Block_n_cfgpreds(b); k < n; ++k) {
322 ir_node *predb = get_nodes_block(get_Block_cfgpred(b, k));
327 if (is_Block_removable(predb) && !Block_block_visited(predb)) {
330 /* we found a predecessor block at position k that will be removed */
331 for (phi = (ir_node*)get_irn_link(predb); phi; phi = next_phi) {
333 next_phi = (ir_node*)get_irn_link(phi);
337 if (get_Block_idom(b) != predb) {
338 /* predb is not the dominator. There can't be uses of pred's Phi nodes, kill them .*/
339 ir_graph *irg = get_irn_irg(b);
340 exchange(phi, get_irg_bad(irg));
342 /* predb is the direct dominator of b. There might be uses of the Phi nodes from
343 predb in further block, so move this phi from the predecessor into the block b */
344 set_nodes_block(phi, b);
345 set_irn_link(phi, get_irn_link(b));
346 set_irn_link(b, phi);
347 env->phis_moved = true;
349 /* first, copy all 0..k-1 predecessors */
350 for (i = 0; i < k; i++) {
351 pred = get_Block_cfgpred_block(b, i);
355 } else if (is_Block_removable(pred) && !Block_block_visited(pred)) {
356 /* It's an empty block and not yet visited. */
357 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
358 if (! is_Bad(get_Block_cfgpred(pred, j)))
366 /* now we are at k, copy the phi predecessors */
367 pred = get_nodes_block(get_Block_cfgpred(b, k));
368 for (i = 0; i < get_Phi_n_preds(phi); i++) {
369 if (! is_Bad(get_Block_cfgpred(pred, i)))
370 in[q_preds++] = get_Phi_pred(phi, i);
373 /* and now all the rest */
374 for (i = k+1; i < get_Block_n_cfgpreds(b); i++) {
375 pred = get_Block_cfgpred_block(b, i);
379 } else if (is_Block_removable(pred) && !Block_block_visited(pred)) {
380 /* It's an empty block and not yet visited. */
381 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
382 if (! is_Bad(get_Block_cfgpred(pred, j)))
392 exchange(phi, in[0]);
394 set_irn_in(phi, q_preds, in);
397 assert(q_preds <= max_preds);
398 // assert(p_preds == q_preds && "Wrong Phi Fix");
404 /*- Fix the block -*/
406 for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
407 pred = get_Block_cfgpred_block(b, i);
410 /* case 1: Do nothing */
411 } else if (is_Block_removable(pred) && !Block_block_visited(pred)) {
412 /* case 2: It's an empty block and not yet visited. */
413 assert(get_Block_n_cfgpreds(b) > 1 || has_Block_entity(b));
414 /* Else it should be optimized by equivalent_node. */
415 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
416 ir_node *pred_X = get_Block_cfgpred(pred, j);
418 /* because of breaking loops, not all predecessors are Bad-clean,
419 * so we must check this here again */
420 if (! is_Bad(pred_X))
421 in[n_preds++] = pred_X;
423 /* Remove block as it might be kept alive. */
424 exchange(pred, b/*get_irg_bad(irg)*/);
427 in[n_preds++] = get_Block_cfgpred(b, i);
430 assert(n_preds <= max_preds);
432 set_irn_in(b, n_preds, in);
435 assert(get_irn_link(b) == NULL || p_preds == -1 || (n_preds == p_preds && "Wrong Phi Fix"));
440 * Block walker: optimize all blocks using the default optimizations.
441 * This removes Blocks with only a Jmp predecessor.
443 static void remove_simple_blocks(ir_node *block, void *ctx)
445 merge_env *env = (merge_env*)ctx;
446 ir_node *new_blk = equivalent_node(block);
448 if (new_blk != block) {
449 exchange(block, new_blk);
455 * Optimize table-switch Conds.
457 * @param cond the switch-Cond
458 * @return true if the switch-Cond was optimized
460 static bool handle_switch_cond(ir_node *cond)
462 ir_node *sel = get_Cond_selector(cond);
463 ir_node *proj1 = (ir_node*)get_irn_link(cond);
464 ir_node *proj2 = (ir_node*)get_irn_link(proj1);
465 ir_node *blk = get_nodes_block(cond);
467 /* exactly 1 Proj on the Cond node: must be the defaultProj */
469 ir_node *jmp = new_r_Jmp(blk);
470 assert(get_Cond_default_proj(cond) == get_Proj_proj(proj1));
471 /* convert it into a Jmp */
472 exchange(proj1, jmp);
476 /* handle Cond nodes with constant argument. In this case the localopt rules
477 * should have killed all obviously impossible cases.
478 * So the only case left to handle here is 1 defaultProj + 1case
479 * (this one case should be the one taken) */
480 if (get_irn_link(proj2) == NULL) {
481 ir_tarval *tv = value_of(sel);
483 if (tv != tarval_bad) {
484 /* we have a constant switch */
485 long num = get_tarval_long(tv);
486 long def_num = get_Cond_default_proj(cond);
487 ir_graph *irg = get_irn_irg(cond);
488 ir_node *bad = get_irg_bad(irg);
490 if (def_num == get_Proj_proj(proj1)) {
491 /* first one is the defProj */
492 if (num == get_Proj_proj(proj2)) {
493 ir_node *jmp = new_r_Jmp(blk);
494 exchange(proj2, jmp);
495 exchange(proj1, bad);
498 } else if (def_num == get_Proj_proj(proj2)) {
499 /* second one is the defProj */
500 if (num == get_Proj_proj(proj1)) {
501 ir_node *jmp = new_r_Jmp(blk);
502 exchange(proj1, jmp);
503 exchange(proj2, bad);
507 /* neither: strange, Cond was not optimized so far */
508 if (num == get_Proj_proj(proj1)) {
509 ir_node *jmp = new_r_Jmp(blk);
510 exchange(proj1, jmp);
511 exchange(proj2, bad);
513 } else if (num == get_Proj_proj(proj2)) {
514 ir_node *jmp = new_r_Jmp(blk);
515 exchange(proj2, jmp);
516 exchange(proj1, bad);
525 /* Optimizations of the control flow that also require changes of Phi nodes.
527 * This optimization performs two passes over the graph.
529 * The first pass collects all Phi nodes in a link list in the block
530 * nodes. Further it performs simple control flow optimizations.
531 * Finally it marks all blocks that do not contain useful
532 * computations, i.e., these blocks might be removed.
534 * The second pass performs the optimizations intended by this algorithm.
535 * It walks only over block nodes and adapts these and the Phi nodes in these
536 * blocks, which it finds in a linked list computed by the first pass.
538 * We use the mark flag to mark removable blocks in the first phase.
540 void optimize_cf(ir_graph *irg)
544 ir_node *end = get_irg_end(irg);
548 assert(get_irg_phase_state(irg) != phase_building);
550 /* if the graph is not pinned, we cannot determine empty blocks */
551 assert(get_irg_pinned(irg) != op_pin_state_floats &&
552 "Control flow optimization need a pinned graph");
554 /* FIXME: control flow opt destroys block edges. So edges are deactivated
555 * here. Fix the edges! */
556 edges_deactivate(irg);
558 /* we use the mark flag to mark removable blocks */
559 ir_reserve_resources(irg, IR_RESOURCE_BLOCK_MARK | IR_RESOURCE_IRN_LINK);
562 env.phis_moved = false;
566 env.switch_conds = NEW_ARR_F(ir_node*, 0);
567 irg_walk(end, clear_link, collect_nodes, &env);
569 /* handle all collected switch-Conds */
570 n = ARR_LEN(env.switch_conds);
571 for (i = 0; i < n; ++i) {
572 ir_node *cond = env.switch_conds[i];
573 env.changed |= handle_switch_cond(cond);
575 DEL_ARR_F(env.switch_conds);
578 /* Handle graph state if was changed. */
579 set_irg_outs_inconsistent(irg);
580 set_irg_doms_inconsistent(irg);
581 set_irg_extblk_inconsistent(irg);
582 set_irg_loopinfo_inconsistent(irg);
583 set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
585 /* The Cond optimization might generate unreachable code, so restart if
590 /* Optimize the standard code. */
592 irg_block_walk_graph(irg, optimize_blocks, remove_simple_blocks, &env);
594 new_end = optimize_in_place(end);
595 if (new_end != end) {
596 set_irg_end(irg, new_end);
599 remove_End_Bads_and_doublets(end);
601 ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK | IR_RESOURCE_IRN_LINK);
603 if (env.phis_moved) {
604 /* Bad: when we moved Phi's, we might produce dead Phi nodes
606 Some other phases cannot copy with this, so will them.
608 n = get_End_n_keepalives(end);
610 NEW_ARR_A(ir_node *, in, n);
612 /* Handle graph state if was changed. */
613 set_irg_outs_inconsistent(irg);
615 assure_irg_outs(irg);
617 for (i = j = 0; i < n; ++i) {
618 ir_node *ka = get_End_keepalive(end, i);
623 for (k = get_irn_n_outs(ka) - 1; k >= 0; --k) {
624 ir_node *user = get_irn_out(ka, k);
626 if (user != ka && user != end) {
627 /* Is it a real user or just a self loop ? */
637 set_End_keepalives(end, j, in);
644 /* Handle graph state if was changed. */
645 set_irg_outs_inconsistent(irg);
646 set_irg_doms_inconsistent(irg);
647 set_irg_extblk_inconsistent(irg);
648 set_irg_loopinfo_inconsistent(irg);
649 set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
652 /* the verifier doesn't work yet with floating nodes */
653 if (get_irg_pinned(irg) == op_pin_state_pinned) {
654 /* after optimize_cf(), only Bad data flow may remain. */
655 if (irg_verify_bads(irg, BAD_DF | BAD_BLOCK | TUPLE)) {
656 dump_ir_graph(irg, "-verify-cf");
657 fprintf(stderr, "VERIFY_BAD in optimize_cf()\n");
662 /* Creates an ir_graph pass for optimize_cf. */
663 ir_graph_pass_t *optimize_cf_pass(const char *name)
665 return def_graph_pass(name ? name : "optimize_cf", optimize_cf);