3 * File name: ir/opt/cfopt.c
4 * Purpose: control flow optimizations
8 * Copyright: (c) 1998-2004 Universität Karlsruhe
9 * Licence: This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
20 #include "irgraph_t.h"
33 #include "irbackedge_t.h"
40 /*------------------------------------------------------------------*/
41 /* Control flow optimization. */
43 /* Removes Bad control flow predecessors and empty blocks. A block */
44 /* is empty if it contains only a Jmp node. */
45 /* Blocks can only be removed if they are not needed for the */
46 /* semantics of Phi nodes. */
47 /*------------------------------------------------------------------*/
50 * Replace binary Conds that jumps twice into the same block
56 * ProjX True ProjX False ==> | /
61 * Such pattern are the result of if-conversion.
63 * Note that the simple case that Block has only these two
64 * predecessors are already handled in equivalent_node_Block().
66 static void remove_senseless_conds(ir_node *bl, void *data)
69 int n = get_Block_n_cfgpreds(bl);
73 for (i = 0; i < n; ++i) {
74 ir_node *pred_i = get_Block_cfgpred(bl, i);
75 ir_node *cond_i = skip_Proj(pred_i);
77 for (j = i + 1; j < n; ++j) {
78 ir_node *pred_j = get_Block_cfgpred(bl, j);
79 ir_node *cond_j = skip_Proj(pred_j);
82 && get_irn_op(cond_i) == op_Cond
83 && get_irn_mode(get_Cond_selector(cond_i)) == mode_b) {
85 ir_node *jmp = new_r_Jmp(current_ir_graph, get_nodes_block(cond_i));
86 set_irn_n(bl, i, jmp);
87 set_irn_n(bl, j, new_Bad());
89 DBG_OPT_IFSIM2(cond_i, jmp);
98 * Removes Tuples from Block control flow predecessors.
99 * Optimizes blocks with equivalent_node(). This is tricky,
100 * as we want to avoid nodes that have as block predecessor Bads.
101 * Therefore we also optimize at control flow operations, depending
102 * how we first reach the Block.
104 static void merge_blocks(ir_node *n, void *env) {
108 /* clear the link field for ALL nodes first */
109 set_irn_link(n, NULL);
111 if (get_irn_op(n) == op_Block) {
113 for (i = 0; i < get_Block_n_cfgpreds(n); i++) {
114 /* GL @@@ : is this possible? if (get_opt_normalize()) -- added, all tests go through.
115 A different order of optimizations might cause problems. */
116 if (get_opt_normalize())
117 set_Block_cfgpred(n, i, skip_Tuple(get_Block_cfgpred(n, i)));
121 new_block = equivalent_node(n);
122 if (new_block != n && ! is_Bad(new_block))
123 exchange (n, new_block);
125 } else if (get_opt_optimize() && (get_irn_mode(n) == mode_X)) {
126 /* We will soon visit a block. Optimize it before visiting! */
127 ir_node *b = get_nodes_block(skip_Proj(n));
130 new_block = equivalent_node(b);
132 while (irn_not_visited(b) && (!is_Bad(new_block)) && (new_block != b)) {
133 /* We would have to run gigo() if new is bad, so we
134 promote it directly below. Nevertheless, we sometimes reach a block
135 the first time through a dataflow node. In this case we optimized the
136 block as such and have to promote the Bad here. */
137 assert((get_opt_control_flow_straightening() ||
138 get_opt_control_flow_weak_simplification()) &&
139 ("strange flag setting"));
140 exchange (b, new_block);
142 new_block = equivalent_node(b);
145 /* normally, we would create a Bad block here, but this must be
146 * prevented, so just set it's cf to Bad.
148 if (is_Bad(new_block))
149 exchange(n, new_Bad());
155 * Remove cf from dead block by inspecting dominance info
156 * Do not replace blocks by Bad. This optimization shall
157 * ensure, that all Bad control flow predecessors are
158 * removed, and no new other Bads are introduced.
160 * Must be run in the post walker.
162 static void remove_dead_block_cf(ir_node *block, void *env)
166 /* check block predecessors and turn control flow into bad */
167 for (i = 0, n = get_Block_n_cfgpreds(block); i < n; ++i) {
168 ir_node *pred_X = get_Block_cfgpred(block, i);
170 if (! is_Bad(pred_X)) {
171 ir_node *pred_bl = get_nodes_block(skip_Proj(pred_X));
173 if (is_Bad(pred_bl) || (get_Block_dom_depth(pred_bl) == -1))
174 exchange (pred_X, new_Bad());
180 * Collects all Phi nodes in link list of Block.
181 * Marks all blocks "block_visited" if they contain a node other
183 * Replaces n by Bad if n is unreachable control flow. We do that
184 * in the post walker, so we catch all blocks.
186 static void collect_nodes(ir_node *n, void *env) {
187 if (is_no_Block(n)) {
188 ir_node *b = get_nodes_block(n);
190 if ((get_irn_op(n) == op_Phi)) {
191 /* Collect Phi nodes to compact ins along with block's ins. */
192 set_irn_link(n, get_irn_link(b));
195 else if ((get_irn_op(n) != op_Jmp) && !is_Bad(b)) { /* Check for non empty block. */
196 mark_Block_block_visited(b);
201 /** Returns true if pred is predecessor of block. */
202 static int is_pred_of(ir_node *pred, ir_node *b) {
205 for (i = 0, n = get_Block_n_cfgpreds(b); i < n; ++i) {
206 ir_node *b_pred = get_nodes_block(get_Block_cfgpred(b, i));
207 if (b_pred == pred) return 1;
213 /** Test whether we can optimize away pred block pos of b.
215 * @param b A block node.
216 * @param pos The position of the predecessor block to judge about.
218 * @returns The number of predecessors
220 * The test is rather tricky.
222 * The situation is something like the following:
230 * b merges the control flow of an if-then-else. We may not remove
231 * the 'then' _and_ the 'else' block of an 'if' if there is a Phi
232 * node in b, even if both are empty. The destruction of this Phi
233 * requires that a copy is added before the merge. We have to
234 * keep one of the case blocks to place the copies in.
236 * To perform the test for pos, we must regard predecessors before pos
237 * as already removed.
239 static int test_whether_dispensable(ir_node *b, int pos) {
240 int i, j, n_preds = 1;
242 ir_node *cfop = get_Block_cfgpred(b, pos);
243 ir_node *pred = get_nodes_block(cfop);
245 /* Bad blocks will be optimized away, so we don't need space for them */
249 if (get_Block_block_visited(pred) + 1
250 < get_irg_block_visited(current_ir_graph)) {
252 if (!get_opt_optimize() || !get_opt_control_flow_strong_simplification()) {
253 /* Mark block so that is will not be removed: optimization is turned off. */
254 set_Block_block_visited(pred, get_irg_block_visited(current_ir_graph)-1);
258 /* Seems to be empty. At least we detected this in collect_nodes. */
259 if (!get_irn_link(b)) {
260 /* There are no Phi nodes ==> all predecessors are dispensable. */
261 n_preds = get_Block_n_cfgpreds(pred);
263 /* b's pred blocks and pred's pred blocks must be pairwise disjunct.
264 Work preds < pos as if they were already removed. */
265 for (i = 0; i < pos; i++) {
266 ir_node *b_pred = get_nodes_block(get_Block_cfgpred(b, i));
267 if (get_Block_block_visited(b_pred) + 1
268 < get_irg_block_visited(current_ir_graph)) {
269 for (j = 0; j < get_Block_n_cfgpreds(b_pred); j++) {
270 ir_node *b_pred_pred = get_nodes_block(get_Block_cfgpred(b_pred, j));
271 if (is_pred_of(b_pred_pred, pred)) dispensable = 0;
274 if (is_pred_of(b_pred, pred)) dispensable = 0;
277 for (i = pos +1; i < get_Block_n_cfgpreds(b); i++) {
278 ir_node *b_pred = get_nodes_block(get_Block_cfgpred(b, i));
279 if (is_pred_of(b_pred, pred)) dispensable = 0;
282 set_Block_block_visited(pred, get_irg_block_visited(current_ir_graph)-1);
285 n_preds = get_Block_n_cfgpreds(pred);
295 * This method removed Bad cf predecessors from Blocks and Phis, and removes
296 * empty blocks. A block is empty if it only contains Phi and Jmp nodes.
298 * We first adapt Phi nodes, then Block nodes, as we need the old ins
299 * of the Block to adapt the Phi nodes. We do this by computing new
300 * in arrays, and then replacing the old ones. So far we compute new in arrays
301 * for all nodes, not regarding whether there is a possibility for optimization.
303 * For each predecessor p of a Block b there are three cases:
304 * 1. The predecessor p is a Bad node: just skip it. The in array of b shrinks by one.
305 * 2. The predecessor p is empty. Remove p. All predecessors of p are now
307 * 3. The predecessor p is a block containing useful code. Just keep p as is.
309 * For Phi nodes f we have to check the conditions at the Block of f.
310 * For cases 1 and 3 we proceed as for Blocks. For case 2 we can have two
312 * 2a: The old predecessor of the Phi f is a Phi pred_f IN THE BLOCK REMOVED. In this
313 * case we proceed as for blocks. We remove pred_f. All
314 * predecessors of pred_f now are predecessors of f.
315 * 2b: The old predecessor of f is NOT in the block removed. It might be a Phi, too.
316 * We have to replicate f for each predecessor of the removed block. Or, with
317 * other words, the removed predecessor block has exactly one predecessor.
319 * Further there is a special case for self referencing blocks:
321 * then_b else_b then_b else_b
327 * | | | === optimized to ===> \ | | |
333 * If there is a Phi in pred_b, but we remove pred_b, we have to generate a
334 * Phi in loop_b, that has the ins of the Phi in pred_b and a self referencing
336 * @@@ It is negotiable whether we should do this ... there might end up a copy
337 * from the Phi in the loop when removing the Phis.
339 static void optimize_blocks(ir_node *b, void *env) {
340 int i, j, k, n, max_preds, n_preds, p_preds;
344 /* Count the number of predecessor if this block is merged with pred blocks
347 for (i = 0, k = get_Block_n_cfgpreds(b); i < k; ++i) {
348 max_preds += test_whether_dispensable(b, i);
350 in = xmalloc(max_preds * sizeof(*in));
353 printf(" working on "); DDMN(b);
354 for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
355 pred = get_nodes_block(get_Block_cfgpred(b, i));
356 if (is_Bad(get_Block_cfgpred(b, i))) {
357 printf(" removing Bad %i\n ", i);
358 } else if (get_Block_block_visited(pred) +1
359 < get_irg_block_visited(current_ir_graph)) {
360 printf(" removing pred %i ", i); DDMN(pred);
361 } else { printf(" Nothing to do for "); DDMN(pred); }
363 * end Debug output -*/
365 /*- Fix the Phi nodes of the current block -*/
366 for (phi = get_irn_link(b); phi; ) {
367 assert(get_irn_op(phi) == op_Phi);
369 /* Find the new predecessors for the Phi */
371 for (i = 0, n = get_Block_n_cfgpreds(b); i < n; ++i) {
372 pred = get_nodes_block(get_Block_cfgpred(b, i));
374 if (is_Bad(get_Block_cfgpred(b, i))) {
375 /* case Phi 1: Do nothing */
377 else if (get_Block_block_visited(pred) + 1
378 < get_irg_block_visited(current_ir_graph)) {
379 /* case Phi 2: It's an empty block and not yet visited. */
380 ir_node *phi_pred = get_Phi_pred(phi, i);
382 for (j = 0, k = get_Block_n_cfgpreds(pred); j < k; j++) {
383 /* because of breaking loops, not all predecessors are Bad-clean,
384 * so we must check this here again */
385 if (! is_Bad(get_Block_cfgpred(pred, j))) {
386 if (get_nodes_block(phi_pred) == pred) {
388 assert(get_irn_op(phi_pred) == op_Phi); /* Block is empty!! */
390 in[p_preds++] = get_Phi_pred(phi_pred, j);
393 in[p_preds++] = phi_pred;
398 /* The Phi_pred node is replaced now if it is a Phi.
400 Somehow the removed Phi node can be used legally in loops.
401 Therefore we replace the old phi by the new one.
403 Further we have to remove the old Phi node by replacing it
404 by Bad. Else it will remain in the keep alive array of End
405 and cause illegal situations. So if there is no loop, we should
408 if (get_nodes_block(phi_pred) == pred) {
409 /* remove the Phi as it might be kept alive. Further there
410 might be other users. */
411 exchange(phi_pred, phi); /* geht, ist aber doch semantisch falsch! Warum?? */
415 in[p_preds++] = get_Phi_pred(phi, i);
418 assert(p_preds <= max_preds);
422 /* By removal of Bad ins the Phi might be degenerated. */
423 exchange(phi, in[0]);
425 set_irn_in(phi, p_preds, in);
427 phi = get_irn_link(phi);
430 /*- This happens only if merge between loop backedge and single loop entry.
431 See special case above. -*/
432 for (k = 0, n = get_Block_n_cfgpreds(b); k < n; ++k) {
433 pred = get_nodes_block(get_Block_cfgpred(b, k));
435 if (get_Block_block_visited(pred) + 1 < get_irg_block_visited(current_ir_graph)) {
436 /* we found a predecessor block at position k that will be removed */
437 for (phi = get_irn_link(pred); phi;) {
439 * the previous phase may already changed the phi, and even
440 * removed it at all, so check here if this node is still a phi
442 if (get_irn_op(phi) == op_Phi) {
445 /* move this phi from the predecessor into the block b */
446 set_nodes_block(phi, b);
448 /* first, copy all 0..k-1 predecessors */
449 for (i = 0; i < k; i++) {
450 pred = get_nodes_block(get_Block_cfgpred(b, i));
452 if (is_Bad(get_Block_cfgpred(b, i))) {
454 } else if (get_Block_block_visited(pred) + 1
455 < get_irg_block_visited(current_ir_graph)) {
456 /* It's an empty block and not yet visited. */
457 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
458 /* @@@ Hier brauche ich Schleifeninformation!!! Kontrollflusskante
459 muss Rueckwaertskante sein! (An allen vier in[q_preds] = phi
460 Anweisungen.) Trotzdem tuts bisher!! */
461 if (! is_Bad(get_Block_cfgpred(pred, j)))
469 /* now we are at k, copy the phi predecessors */
470 pred = get_nodes_block(get_Block_cfgpred(b, k));
471 for (i = 0; i < get_Phi_n_preds(phi); i++) {
472 if (! is_Bad(get_Block_cfgpred(pred, i)))
473 in[q_preds++] = get_Phi_pred(phi, i);
476 /* and now all the rest */
477 for (i = k+1; i < get_Block_n_cfgpreds(b); i++) {
478 pred = get_nodes_block(get_Block_cfgpred(b, i));
480 if (is_Bad(get_Block_cfgpred(b, i))) {
482 } else if (get_Block_block_visited(pred) +1
483 < get_irg_block_visited(current_ir_graph)) {
484 /* It's an empty block and not yet visited. */
485 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
486 if (! is_Bad(get_Block_cfgpred(pred, j)))
496 exchange(phi, in[0]);
498 set_irn_in(phi, q_preds, in);
500 assert(q_preds <= max_preds);
501 // assert(p_preds == q_preds && "Wrong Phi Fix");
503 phi = get_irn_link(phi);
508 /*- Fix the block -*/
510 for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
511 pred = get_nodes_block(get_Block_cfgpred(b, i));
513 if (is_Bad(get_Block_cfgpred(b, i))) {
514 /* case 1: Do nothing */
515 } else if (get_Block_block_visited(pred) +1
516 < get_irg_block_visited(current_ir_graph)) {
517 /* case 2: It's an empty block and not yet visited. */
518 assert(get_Block_n_cfgpreds(b) > 1);
519 /* Else it should be optimized by equivalent_node. */
520 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
521 ir_node *pred_block = get_Block_cfgpred(pred, j);
523 /* because of breaking loops, not all predecessors are Bad-clean,
524 * so we must check this here again */
525 if (! is_Bad(pred_block))
526 in[n_preds++] = pred_block;
528 /* Remove block as it might be kept alive. */
529 exchange(pred, b/*new_Bad()*/);
532 in[n_preds++] = get_Block_cfgpred(b, i);
535 assert(n_preds <= max_preds);
537 set_irn_in(b, n_preds, in);
539 assert(get_irn_link(b) == NULL || (n_preds == p_preds && "Wrong Phi Fix"));
545 /* Optimizations of the control flow that also require changes of Phi nodes.
547 * This optimization performs two passes over the graph.
549 * The first pass collects all Phi nodes in a link list in the block
550 * nodes. Further it performs simple control flow optimizations.
551 * Finally it marks all blocks that do not contain useful
552 * computations, i.e., these blocks might be removed.
554 * The second pass performs the optimizations intended by this algorithm.
555 * It walks only over block nodes and adapts these and the Phi nodes in these blocks,
556 * which it finds in a linked list computed by the first pass.
558 * We use the block_visited flag to mark empty blocks in the first
560 * @@@ It would be better to add a struct in the link field
561 * that keeps the Phi list and the mark. Place it on an obstack, as
562 * we will lose blocks and thereby generate memory leaks.
564 void optimize_cf(ir_graph *irg) {
567 ir_node *end = get_irg_end(irg);
568 ir_graph *rem = current_ir_graph;
569 irg_dom_state dom_state = get_irg_dom_state(current_ir_graph);
570 current_ir_graph = irg;
572 /* Handle graph state */
573 assert(get_irg_phase_state(irg) != phase_building);
574 if (get_irg_outs_state(current_ir_graph) == outs_consistent)
575 set_irg_outs_inconsistent(current_ir_graph);
576 if (get_irg_dom_state(current_ir_graph) == dom_consistent)
577 set_irg_dom_inconsistent(current_ir_graph);
579 if (dom_state == dom_consistent && get_opt_optimize() && get_opt_unreachable_code()) {
580 ir_node *end = get_irg_end(irg);
582 /* we have dominance info, we can kill dead block */
583 irg_block_walk_graph(irg, NULL, remove_dead_block_cf, NULL);
585 /* fix the keep-alives */
586 for (i = 0, n = get_End_n_keepalives(end); i < n; ++i) {
587 ir_node *ka = get_End_keepalive(end, i);
589 if (is_Block(ka) && (get_Block_dom_depth(ka) == -1))
590 set_End_keepalive(end, i, new_Bad());
591 if (is_Phi(ka) && (get_Block_dom_depth(get_nodes_block(ka)) == -1))
592 set_End_keepalive(end, i, new_Bad());
596 irg_block_walk_graph(current_ir_graph, NULL, remove_senseless_conds, NULL);
598 /* Use block visited flag to mark non-empty blocks. */
599 inc_irg_block_visited(irg);
600 irg_walk(end, merge_blocks, collect_nodes, NULL);
602 /* Optimize the standard code. */
603 irg_block_walk(get_irg_end_block(irg), optimize_blocks, NULL, NULL);
605 /* Walk all keep alives, optimize them if block, add to new in-array
606 for end if useful. */
607 in = NEW_ARR_F (ir_node *, 1);
608 in[0] = get_nodes_block(end);
609 inc_irg_visited(current_ir_graph);
611 for (i = 0; i < get_End_n_keepalives(end); i++) {
612 ir_node *ka = get_End_keepalive(end, i);
614 if (irn_not_visited(ka)) {
615 if ((get_irn_op(ka) == op_Block) && Block_not_block_visited(ka)) {
616 set_irg_block_visited(current_ir_graph, /* Don't walk all the way to Start. */
617 get_irg_block_visited(current_ir_graph)-1);
618 irg_block_walk(ka, optimize_blocks, NULL, NULL);
619 mark_irn_visited(ka);
620 ARR_APP1 (ir_node *, in, ka);
621 } else if (get_irn_op(ka) == op_Phi) {
622 mark_irn_visited(ka);
623 ARR_APP1 (ir_node *, in, ka);
627 /* DEL_ARR_F(end->in); GL @@@ tut nicht ! */
631 /* the verifier doesn't work yet with floating nodes */
632 if (get_irg_pinned(irg) == op_pin_state_pinned) {
633 /* after optimize_cf(), only Bad data flow may remain. */
634 if (irg_vrfy_bads(irg, BAD_DF | BAD_BLOCK | TUPLE)) {
635 dump_ir_block_graph(irg, "-vrfy-cf");
636 dump_ir_graph(irg, "-vrfy-cf");
637 fprintf(stderr, "VRFY_BAD in optimize_cf()\n");
641 current_ir_graph = rem;