3 * File name: ir/opt/cfopt.c
4 * Purpose: control flow optimizations
8 * Copyright: (c) 1998-2004 Universität Karlsruhe
9 * Licence: This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
19 #include "irgraph_t.h"
32 #include "irbackedge_t.h"
39 /*------------------------------------------------------------------*/
40 /* Control flow optimization. */
42 /* Removes Bad control flow predecessors and empty blocks. A block */
43 /* is empty if it contains only a Jmp node. */
44 /* Blocks can only be removed if they are not needed for the */
45 /* semantics of Phi nodes. */
46 /*------------------------------------------------------------------*/
49 * Removes Tuples from Block control flow predecessors.
50 * Optimizes blocks with equivalent_node(). This is tricky,
51 * as we want to avoid nodes that have as block predecessor Bads.
52 * Therefore we also optimize at control flow operations, depending
53 * how we first reach the Block.
55 static void merge_blocks(ir_node *n, void *env) {
59 /* clear the link field for ALL nodes first */
60 set_irn_link(n, NULL);
62 if (get_irn_op(n) == op_Block) {
64 for (i = 0; i < get_Block_n_cfgpreds(n); i++) {
65 /* GL @@@ : is this possible? if (get_opt_normalize()) -- added, all tests go through.
66 A different order of optimizations might cause problems. */
67 if (get_opt_normalize())
68 set_Block_cfgpred(n, i, skip_Tuple(get_Block_cfgpred(n, i)));
72 new_block = equivalent_node(n);
73 if (new_block != n && ! is_Bad(new_block))
74 exchange (n, new_block);
76 } else if (get_opt_optimize() && (get_irn_mode(n) == mode_X)) {
77 /* We will soon visit a block. Optimize it before visiting! */
78 ir_node *b = get_nodes_block(skip_Proj(n));
81 new_block = equivalent_node(b);
83 while (irn_not_visited(b) && (!is_Bad(new_block)) && (new_block != b)) {
84 /* We would have to run gigo if new is bad, so we
85 promote it directly below. Nevertheless, we sometimes reach a block
86 the first time through a dataflow node. In this case we optimized the
87 block as such and have to promote the Bad here. */
88 assert((get_opt_control_flow_straightening() ||
89 get_opt_control_flow_weak_simplification()) &&
90 ("strange flag setting"));
91 exchange (b, new_block);
93 new_block = equivalent_node(b);
96 /* normally, we would create a Bad block here, but this must be
97 * prevented, so just set it's cf to Bad.
99 if (is_Bad(new_block))
100 exchange(n, new_Bad());
106 * Remove cf from dead block by inspecting dominance info
107 * Do not replace blocks by Bad. This optimization shall
108 * ensure, that all Bad cfg preds are removed, and no new
109 * other Bads are introduced.
111 * Must be run in the post walker.
113 static void remove_dead_block_cf(ir_node *block, void *env)
117 /* check block predecessors and turn control flow into bad */
118 for (i = 0, n = get_Block_n_cfgpreds(block); i < n; ++i) {
119 ir_node *pred_X = get_Block_cfgpred(block, i);
121 if (! is_Bad(pred_X)) {
122 ir_node *pred_bl = get_nodes_block(skip_Proj(pred_X));
124 if (is_Bad(pred_bl) || (get_Block_dom_depth(pred_bl) == -1))
125 exchange (pred_X, new_Bad());
131 * Collects all Phi nodes in link list of Block.
132 * Marks all blocks "block_visited" if they contain a node other
134 * Replaces n by Bad if n is unreachable control flow. We do that
135 * in the post walker, so we catch all blocks.
137 static void collect_nodes(ir_node *n, void *env) {
138 if (is_no_Block(n)) {
139 ir_node *b = get_nodes_block(n);
141 if ((get_irn_op(n) == op_Phi)) {
142 /* Collect Phi nodes to compact ins along with block's ins. */
143 set_irn_link(n, get_irn_link(b));
146 else if ((get_irn_op(n) != op_Jmp) && !is_Bad(b)) { /* Check for non empty block. */
147 mark_Block_block_visited(b);
152 /** Returns true if pred is predecessor of block. */
153 static int is_pred_of(ir_node *pred, ir_node *b) {
156 for (i = 0, n = get_Block_n_cfgpreds(b); i < n; ++i) {
157 ir_node *b_pred = get_nodes_block(get_Block_cfgpred(b, i));
158 if (b_pred == pred) return 1;
164 /** Test wether we can optimize away pred block pos of b.
166 * @param b A block node.
167 * @param pos The position of the predecessor block to judge about.
169 * @returns The number of predecessors
171 * The test is rather tricky.
173 * The situation is something like the following:
181 * b merges the control flow of an if-then-else. We may not remove
182 * the 'then' _and_ the 'else' block of an 'if' if there is a Phi
183 * node in b, even if both are empty. The destruction of this Phi
184 * requires that a copy is added before the merge. We have to
185 * keep one of the case blocks to place the copies in.
187 * To perform the test for pos, we must regard preds before pos
188 * as already removed.
190 static int test_whether_dispensable(ir_node *b, int pos) {
191 int i, j, n_preds = 1;
193 ir_node *cfop = get_Block_cfgpred(b, pos);
194 ir_node *pred = get_nodes_block(cfop);
196 /* Bad blocks will be optimized away, so we don't need space for them */
200 if (get_Block_block_visited(pred) + 1
201 < get_irg_block_visited(current_ir_graph)) {
203 if (!get_opt_optimize() || !get_opt_control_flow_strong_simplification()) {
204 /* Mark block so that is will not be removed: optimization is turned off. */
205 set_Block_block_visited(pred, get_irg_block_visited(current_ir_graph)-1);
209 /* Seems to be empty. At least we detected this in collect_nodes. */
210 if (!get_irn_link(b)) {
211 /* There are no Phi nodes ==> all predecessors are dispensable. */
212 n_preds = get_Block_n_cfgpreds(pred);
214 /* b's pred blocks and pred's pred blocks must be pairwise disjunct.
215 Work preds < pos as if they were already removed. */
216 for (i = 0; i < pos; i++) {
217 ir_node *b_pred = get_nodes_block(get_Block_cfgpred(b, i));
218 if (get_Block_block_visited(b_pred) + 1
219 < get_irg_block_visited(current_ir_graph)) {
220 for (j = 0; j < get_Block_n_cfgpreds(b_pred); j++) {
221 ir_node *b_pred_pred = get_nodes_block(get_Block_cfgpred(b_pred, j));
222 if (is_pred_of(b_pred_pred, pred)) dispensable = 0;
225 if (is_pred_of(b_pred, pred)) dispensable = 0;
228 for (i = pos +1; i < get_Block_n_cfgpreds(b); i++) {
229 ir_node *b_pred = get_nodes_block(get_Block_cfgpred(b, i));
230 if (is_pred_of(b_pred, pred)) dispensable = 0;
233 set_Block_block_visited(pred, get_irg_block_visited(current_ir_graph)-1);
236 n_preds = get_Block_n_cfgpreds(pred);
246 * This method removed Bad cf preds from Blocks and Phis, and removes
247 * empty blocks. A block is empty if it only contains Phi and Jmp nodes.
249 * We first adapt Phi nodes, then Block nodes, as we need the old ins
250 * of the Block to adapt the Phi nodes. We do this by computing new
251 * in arrays, and then replacing the old ones. So far we compute new in arrays
252 * for all nodes, not regarding whether there is a possibility for optimization.
254 * For each predecessor p of a Block b there are three cases:
255 * 1. The predecessor p is a Bad node: just skip it. The in array of b shrinks by one.
256 * 2. The predecessor p is empty. Remove p. All predecessors of p are now
258 * 3. The predecessor p is a block containing useful code. Just keep p as is.
260 * For Phi nodes f we have to check the conditions at the Block of f.
261 * For cases 1 and 3 we proceed as for Blocks. For case 2 we can have two
263 * 2a: The old precessor of the Phi f is a Phi pred_f IN THE BLOCK REMOVED. In this
264 * case we proceed as for blocks. We remove pred_f. All
265 * predecessors of pred_f now are predecessors of f.
266 * 2b: The old predecessor of f is NOT in the block removed. It might be a Phi, too.
267 * We have to replicate f for each predecessor of the removed block. Or, with
268 * other words, the removed predecessor block has exactly one predecessor.
270 * Further there is a special case for self referencing blocks:
272 * then_b else_b then_b else_b
278 * | | | === optimized to ===> \ | | |
284 * If there is a Phi in pred_b, but we remove pred_b, we have to generate a
285 * Phi in loop_b, that has the ins of the Phi in pred_b and a self referencing
287 * @@@ It is negotiable whether we should do this ... there might end up a copy
288 * from the Phi in the loop when removing the Phis.
290 static void optimize_blocks(ir_node *b, void *env) {
291 int i, j, k, n, max_preds, n_preds, p_preds;
295 /* Count the number of predecessor if this block is merged with pred blocks
298 for (i = 0, k = get_Block_n_cfgpreds(b); i < k; ++i) {
299 max_preds += test_whether_dispensable(b, i);
301 in = (ir_node **) malloc(max_preds * sizeof(ir_node *));
304 printf(" working on "); DDMN(b);
305 for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
306 pred = get_nodes_block(get_Block_cfgpred(b, i));
307 if (is_Bad(get_Block_cfgpred(b, i))) {
308 printf(" removing Bad %i\n ", i);
309 } else if (get_Block_block_visited(pred) +1
310 < get_irg_block_visited(current_ir_graph)) {
311 printf(" removing pred %i ", i); DDMN(pred);
312 } else { printf(" Nothing to do for "); DDMN(pred); }
314 * end Debug output -*/
316 /*- Fix the Phi nodes of the current block -*/
317 for (phi = get_irn_link(b); phi; ) {
318 assert(get_irn_op(phi) == op_Phi);
320 /* Find the new predecessors for the Phi */
322 for (i = 0, n = get_Block_n_cfgpreds(b); i < n; ++i) {
323 pred = get_nodes_block(get_Block_cfgpred(b, i));
325 if (is_Bad(get_Block_cfgpred(b, i))) {
326 /* case Phi 1: Do nothing */
328 else if (get_Block_block_visited(pred) + 1
329 < get_irg_block_visited(current_ir_graph)) {
330 /* case Phi 2: It's an empty block and not yet visited. */
331 ir_node *phi_pred = get_Phi_pred(phi, i);
333 for (j = 0, k = get_Block_n_cfgpreds(pred); j < k; j++) {
334 /* because of breaking loops, not all predecessors are Bad-clean,
335 * so we must check this here again */
336 if (! is_Bad(get_Block_cfgpred(pred, j))) {
337 if (get_nodes_block(phi_pred) == pred) {
339 assert(get_irn_op(phi_pred) == op_Phi); /* Block is empty!! */
341 in[p_preds++] = get_Phi_pred(phi_pred, j);
344 in[p_preds++] = phi_pred;
349 /* The Phi_pred node is replaced now if it is a Phi.
351 Somehow the removed Phi node can be used legally in loops.
352 Therefore we replace the old phi by the new one.
354 Further we have to remove the old Phi node by replacing it
355 by Bad. Else it will remain in the keepalive array of End
356 and cause illegal situations. So if there is no loop, we should
359 if (get_nodes_block(phi_pred) == pred) {
360 /* remove the Phi as it might be kept alive. Further there
361 might be other users. */
362 exchange(phi_pred, phi); /* geht, ist aber doch semantisch falsch! Warum?? */
366 in[p_preds++] = get_Phi_pred(phi, i);
369 assert(p_preds <= max_preds);
373 /* By removal of Bad ins the Phi might be degenerated. */
374 exchange(phi, in[0]);
376 set_irn_in(phi, p_preds, in);
378 phi = get_irn_link(phi);
381 /*- This happens only if merge between loop backedge and single loop entry.
382 See special case above. -*/
383 for (k = 0, n = get_Block_n_cfgpreds(b); k < n; ++k) {
384 pred = get_nodes_block(get_Block_cfgpred(b, k));
386 if (get_Block_block_visited(pred) + 1 < get_irg_block_visited(current_ir_graph)) {
387 /* we found a predecessor block at position k that will be removed */
388 for (phi = get_irn_link(pred); phi;) {
390 * the previous phase may already changed the phi, and even
391 * removed it at all, so check here if this node is still a phi
393 if (get_irn_op(phi) == op_Phi) {
396 /* move this phi from the predecessor into the block b */
397 set_nodes_block(phi, b);
399 /* first, copy all 0..k-1 predecessors */
400 for (i = 0; i < k; i++) {
401 pred = get_nodes_block(get_Block_cfgpred(b, i));
403 if (is_Bad(get_Block_cfgpred(b, i))) {
405 } else if (get_Block_block_visited(pred) + 1
406 < get_irg_block_visited(current_ir_graph)) {
407 /* It's an empty block and not yet visited. */
408 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
409 /* @@@ Hier brauche ich Schleifeninformation!!! Kontrollflusskante
410 muss Rueckwaertskante sein! (An allen vier in[q_preds] = phi
411 Anweisungen.) Trotzdem tuts bisher!! */
412 if (! is_Bad(get_Block_cfgpred(pred, j)))
420 /* now we are at k, copy the phi predecessors */
421 pred = get_nodes_block(get_Block_cfgpred(b, k));
422 for (i = 0; i < get_Phi_n_preds(phi); i++) {
423 if (! is_Bad(get_Block_cfgpred(pred, i)))
424 in[q_preds++] = get_Phi_pred(phi, i);
427 /* and now all the rest */
428 for (i = k+1; i < get_Block_n_cfgpreds(b); i++) {
429 pred = get_nodes_block(get_Block_cfgpred(b, i));
431 if (is_Bad(get_Block_cfgpred(b, i))) {
433 } else if (get_Block_block_visited(pred) +1
434 < get_irg_block_visited(current_ir_graph)) {
435 /* It's an empty block and not yet visited. */
436 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
437 if (! is_Bad(get_Block_cfgpred(pred, j)))
447 exchange(phi, in[0]);
449 set_irn_in(phi, q_preds, in);
451 assert(q_preds <= max_preds);
452 // assert(p_preds == q_preds && "Wrong Phi Fix");
454 phi = get_irn_link(phi);
459 /*- Fix the block -*/
461 for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
462 pred = get_nodes_block(get_Block_cfgpred(b, i));
464 if (is_Bad(get_Block_cfgpred(b, i))) {
465 /* case 1: Do nothing */
466 } else if (get_Block_block_visited(pred) +1
467 < get_irg_block_visited(current_ir_graph)) {
468 /* case 2: It's an empty block and not yet visited. */
469 assert(get_Block_n_cfgpreds(b) > 1);
470 /* Else it should be optimized by equivalent_node. */
471 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
472 ir_node *pred_block = get_Block_cfgpred(pred, j);
474 /* because of breaking loops, not all predecessors are Bad-clean,
475 * so we must check this here again */
476 if (! is_Bad(pred_block))
477 in[n_preds++] = pred_block;
479 /* Remove block as it might be kept alive. */
480 exchange(pred, b/*new_Bad()*/);
483 in[n_preds++] = get_Block_cfgpred(b, i);
486 assert(n_preds <= max_preds);
488 set_irn_in(b, n_preds, in);
490 assert(get_irn_link(b) == NULL || (n_preds == p_preds && "Wrong Phi Fix"));
496 /* Optimizations of the control flow that also require changes of Phi nodes.
498 * This optimization performs two passes over the graph.
500 * The first pass collects all Phi nodes in a link list in the block
501 * nodes. Further it performs simple control flow optimizations.
502 * Finally it marks all blocks that do not contain useful
503 * computations, i.e., these blocks might be removed.
505 * The second pass performs the optimizations intended by this algorithm.
506 * It walks only over block nodes and adapts these and the Phi nodes in these blocks,
507 * which it finds in a linked list computed by the first pass.
509 * We use the block_visited flag to mark empty blocks in the first
511 * @@@ It would be better to add a struct in the link field
512 * that keeps the Phi list and the mark. Place it on an obstack, as
513 * we will lose blocks and thereby generate mem leaks.
515 void optimize_cf(ir_graph *irg) {
518 ir_node *end = get_irg_end(irg);
519 ir_graph *rem = current_ir_graph;
520 irg_dom_state dom_state = get_irg_dom_state(current_ir_graph);
521 current_ir_graph = irg;
523 /* Handle graph state */
524 assert(get_irg_phase_state(irg) != phase_building);
525 if (get_irg_outs_state(current_ir_graph) == outs_consistent)
526 set_irg_outs_inconsistent(current_ir_graph);
527 if (get_irg_dom_state(current_ir_graph) == dom_consistent)
528 set_irg_dom_inconsistent(current_ir_graph);
530 if (dom_state == dom_consistent && get_opt_optimize() && get_opt_unreachable_code()) {
531 ir_node *end = get_irg_end(irg);
533 /* we have dominace info, we can kill dead block */
534 irg_block_walk_graph(irg, NULL, remove_dead_block_cf, NULL);
536 /* fix the keep-alives */
537 for (i = 0, n = get_End_n_keepalives(end); i < n; ++i) {
538 ir_node *ka = get_End_keepalive(end, i);
540 if (is_Block(ka) && (get_Block_dom_depth(ka) == -1))
541 set_End_keepalive(end, i, new_Bad());
542 if (is_Phi(ka) && (get_Block_dom_depth(get_nodes_block(ka)) == -1))
543 set_End_keepalive(end, i, new_Bad());
547 /* Use block visited flag to mark non-empty blocks. */
548 inc_irg_block_visited(irg);
549 irg_walk(end, merge_blocks, collect_nodes, NULL);
551 /* Optimize the standard code. */
552 irg_block_walk(get_irg_end_block(irg), optimize_blocks, NULL, NULL);
554 /* Walk all keep alives, optimize them if block, add to new in-array
555 for end if useful. */
556 in = NEW_ARR_F (ir_node *, 1);
557 in[0] = get_nodes_block(end);
558 inc_irg_visited(current_ir_graph);
560 for (i = 0; i < get_End_n_keepalives(end); i++) {
561 ir_node *ka = get_End_keepalive(end, i);
563 if (irn_not_visited(ka)) {
564 if ((get_irn_op(ka) == op_Block) && Block_not_block_visited(ka)) {
565 set_irg_block_visited(current_ir_graph, /* Don't walk all the way to Start. */
566 get_irg_block_visited(current_ir_graph)-1);
567 irg_block_walk(ka, optimize_blocks, NULL, NULL);
568 mark_irn_visited(ka);
569 ARR_APP1 (ir_node *, in, ka);
570 } else if (get_irn_op(ka) == op_Phi) {
571 mark_irn_visited(ka);
572 ARR_APP1 (ir_node *, in, ka);
576 /* DEL_ARR_F(end->in); GL @@@ tut nicht ! */
579 /* the verifyer doesn't work yet with floating nodes */
580 if (get_irg_pinned(irg) == op_pin_state_pinned) {
581 /* after optimize_cf(), only Bad data flow may remain. */
582 if (irg_vrfy_bads(irg, BAD_DF | BAD_BLOCK | TUPLE)) {
583 dump_ir_block_graph(irg, "-vrfy-cf");
584 dump_ir_graph(irg, "-vrfy-cf");
585 fprintf(stderr, "VRFY_BAD in optimize_cf()\n");
589 current_ir_graph = rem;