3 * File name: ir/opt/cfopt.c
4 * Purpose: control flow optimizations
8 * Copyright: (c) 1998-2004 Universität Karlsruhe
9 * Licence: This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
19 #include "irgraph_t.h"
32 #include "irbackedge_t.h"
39 /*------------------------------------------------------------------*/
40 /* Control flow optimization. */
42 /* Removes Bad control flow predecessors and empty blocks. A block */
43 /* is empty if it contains only a Jmp node. */
44 /* Blocks can only be removed if they are not needed for the */
45 /* semantics of Phi nodes. */
46 /*------------------------------------------------------------------*/
49 * Removes Tuples from Block control flow predecessors.
50 * Optimizes blocks with equivalent_node(). This is tricky,
51 * as we want to avoid nodes that have as block predecessor Bads.
52 * Therefore we also optimize at control flow operations, depending
53 * how we first reach the Block.
55 static void merge_blocks(ir_node *n, void *env) {
59 /* clear the link field for ALL nodes first */
60 set_irn_link(n, NULL);
62 if (get_irn_op(n) == op_Block) {
64 for (i = 0; i < get_Block_n_cfgpreds(n); i++) {
65 /* GL @@@ : is this possible? if (get_opt_normalize()) -- added, all tests go through.
66 A different order of optimizations might cause problems. */
67 if (get_opt_normalize())
68 set_Block_cfgpred(n, i, skip_Tuple(get_Block_cfgpred(n, i)));
70 new_block = equivalent_node(n);
72 exchange (n, new_block);
74 } else if (get_opt_optimize() && (get_irn_mode(n) == mode_X)) {
75 /* We will soon visit a block. Optimize it before visiting! */
76 ir_node *b = get_nodes_block(n);
79 new_block = equivalent_node(b);
81 while (irn_not_visited(b) && (!is_Bad(new_block)) && (new_block != b)) {
82 /* We would have to run gigo if new is bad, so we
83 promote it directly below. Nevertheless, we sometimes reach a block
84 the first time through a dataflow node. In this case we optimized the
85 block as such and have to promote the Bad here. */
86 assert(((b == new_block) ||
87 get_opt_control_flow_straightening() ||
88 get_opt_control_flow_weak_simplification()) &&
89 ("strange flag setting"));
90 exchange (b, new_block);
92 new_block = equivalent_node(b);
98 * BEWARE: do not kill floating notes here as they might be needed in
99 * valid blocks because of global CSE.
101 if (is_Bad(b) && get_opt_normalize() &&
102 get_op_pinned(get_irn_op(n)) == op_pin_state_pinned)
103 exchange(n, new_Bad());
108 * Remove dead block by inspecting dominance info
110 static void remove_dead_blocks(ir_node *block, void *env) {
111 /* delete dead blocks: if we have dominator information, this can easily be detected.
112 * Here, new Bad blocks my be introduced.
114 * BEWARE: don't kill the end block */
115 if (block != get_irg_end_block(current_ir_graph) &&
116 get_Block_dom_depth(block) == -1 &&
117 get_opt_unreachable_code()) {
118 exchange (block, new_Bad());
123 * Collects all Phi nodes in link list of Block.
124 * Marks all blocks "block_visited" if they contain a node other
126 * Replaces n by Bad if n is unreachable control flow. We do that
127 * in the post walker, so we catch all blocks.
129 static void collect_nodes(ir_node *n, void *env) {
130 if (is_no_Block(n)) {
131 ir_node *b = get_nodes_block(n);
134 * BEWARE: do not kill floating notes here as they might be needed in
135 * valid blocks because of global CSE.
138 get_op_pinned(get_irn_op(n)) == op_pin_state_pinned) {
139 /* previous merge_blocks() may have killed dead blocks */
140 exchange(n, new_Bad());
142 else if ((get_irn_op(n) == op_Phi)) {
143 /* Collect Phi nodes to compact ins along with block's ins. */
144 set_irn_link(n, get_irn_link(b));
147 else if ((get_irn_op(n) != op_Jmp) && !is_Bad(b)) { /* Check for non empty block. */
148 mark_Block_block_visited(b);
153 /** Returns true if pred is predecessor of block. */
154 static int is_pred_of(ir_node *pred, ir_node *b) {
157 for (i = 0, n = get_Block_n_cfgpreds(b); i < n; ++i) {
158 ir_node *b_pred = get_nodes_block(get_Block_cfgpred(b, i));
159 if (b_pred == pred) return 1;
165 /** Test wether we can optimize away pred block pos of b.
167 * @param b A block node.
168 * @param pos The position of the predecessor block to judge about.
170 * @returns The number of predecessors
172 * The test is rather tricky.
174 * The situation is something like the following:
182 * b merges the control flow of an if-then-else. We may not remove
183 * the 'then' _and_ the 'else' block of an 'if' if there is a Phi
184 * node in b, even if both are empty. The destruction of this Phi
185 * requires that a copy is added before the merge. We have to
186 * keep one of the case blocks to place the copies in.
188 * To perform the test for pos, we must regard preds before pos
189 * as already removed.
191 static int test_whether_dispensable(ir_node *b, int pos) {
192 int i, j, n_preds = 1;
194 ir_node *cfop = get_Block_cfgpred(b, pos);
195 ir_node *pred = get_nodes_block(cfop);
197 if (get_Block_block_visited(pred) + 1
198 < get_irg_block_visited(current_ir_graph)) {
200 if (!get_opt_optimize() || !get_opt_control_flow_strong_simplification()) {
201 /* Mark block so that is will not be removed: optimization is turned off. */
202 set_Block_block_visited(pred, get_irg_block_visited(current_ir_graph)-1);
206 /* Seems to be empty. At least we detected this in collect_nodes. */
207 if (!get_irn_link(b)) {
208 /* There are no Phi nodes ==> all predecessors are dispensable. */
209 n_preds = get_Block_n_cfgpreds(pred);
211 /* b's pred blocks and pred's pred blocks must be pairwise disjunct.
212 Work preds < pos as if they were already removed. */
213 for (i = 0; i < pos; i++) {
214 ir_node *b_pred = get_nodes_block(get_Block_cfgpred(b, i));
215 if (get_Block_block_visited(b_pred) + 1
216 < get_irg_block_visited(current_ir_graph)) {
217 for (j = 0; j < get_Block_n_cfgpreds(b_pred); j++) {
218 ir_node *b_pred_pred = get_nodes_block(get_Block_cfgpred(b_pred, j));
219 if (is_pred_of(b_pred_pred, pred)) dispensable = 0;
222 if (is_pred_of(b_pred, pred)) dispensable = 0;
225 for (i = pos +1; i < get_Block_n_cfgpreds(b); i++) {
226 ir_node *b_pred = get_nodes_block(get_Block_cfgpred(b, i));
227 if (is_pred_of(b_pred, pred)) dispensable = 0;
230 set_Block_block_visited(pred, get_irg_block_visited(current_ir_graph)-1);
233 n_preds = get_Block_n_cfgpreds(pred);
243 * This method removed Bad cf preds from Blocks and Phis, and removes
244 * empty blocks. A block is empty if it only contains Phi and Jmp nodes.
246 * We first adapt Phi nodes, then Block nodes, as we need the old ins
247 * of the Block to adapt the Phi nodes. We do this by computing new
248 * in arrays, and then replacing the old ones. So far we compute new in arrays
249 * for all nodes, not regarding whether there is a possibility for optimization.
251 * For each predecessor p of a Block b there are three cases:
252 * 1. The predecessor p is a Bad node: just skip it. The in array of b shrinks by one.
253 * 2. The predecessor p is empty. Remove p. All predecessors of p are now
255 * 3. The predecessor p is a block containing useful code. Just keep p as is.
257 * For Phi nodes f we have to check the conditions at the Block of f.
258 * For cases 1 and 3 we proceed as for Blocks. For case 2 we can have two
260 * 2a: The old precessor of the Phi f is a Phi pred_f IN THE BLOCK REMOVED. In this
261 * case we proceed as for blocks. We remove pred_f. All
262 * predecessors of pred_f now are predecessors of f.
263 * 2b: The old predecessor of f is NOT in the block removed. It might be a Phi, too.
264 * We have to replicate f for each predecessor of the removed block. Or, with
265 * other words, the removed predecessor block has exactly one predecessor.
267 * Further there is a special case for self referencing blocks:
269 * then_b else_b then_b else_b
275 * | | | === optimized to ===> \ | | |
281 * If there is a Phi in pred_b, but we remove pred_b, we have to generate a
282 * Phi in loop_b, that has the ins of the Phi in pred_b and a self referencing
284 * @@@ It is negotiable whether we should do this ... there might end up a copy
285 * from the Phi in the loop when removing the Phis.
287 static void optimize_blocks(ir_node *b, void *env) {
288 int i, j, k, n, max_preds, n_preds, p_preds;
292 /* Count the number of predecessor if this block is merged with pred blocks
295 for (i = 0, k = get_Block_n_cfgpreds(b); i < k; ++i) {
296 max_preds += test_whether_dispensable(b, i);
298 in = (ir_node **) malloc(max_preds * sizeof(ir_node *));
301 printf(" working on "); DDMN(b);
302 for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
303 pred = get_nodes_block(get_Block_cfgpred(b, i));
304 if (is_Bad(get_Block_cfgpred(b, i))) {
305 printf(" removing Bad %i\n ", i);
306 } else if (get_Block_block_visited(pred) +1
307 < get_irg_block_visited(current_ir_graph)) {
308 printf(" removing pred %i ", i); DDMN(pred);
309 } else { printf(" Nothing to do for "); DDMN(pred); }
311 * end Debug output -*/
313 /*- Fix the Phi nodes of the current block -*/
314 for (phi = get_irn_link(b); phi; ) {
315 assert(get_irn_op(phi) == op_Phi);
317 /* Find the new predecessors for the Phi */
319 for (i = 0, n = get_Block_n_cfgpreds(b); i < n; ++i) {
320 pred = get_nodes_block(get_Block_cfgpred(b, i));
322 if (is_Bad(get_Block_cfgpred(b, i))) {
323 /* case Phi 1: Do nothing */
325 else if (get_Block_block_visited(pred) + 1
326 < get_irg_block_visited(current_ir_graph)) {
327 /* case Phi 2: It's an empty block and not yet visited. */
328 ir_node *phi_pred = get_Phi_pred(phi, i);
330 for (j = 0, k = get_Block_n_cfgpreds(pred); j < k; j++) {
331 /* because of breaking loops, not all predecessors are Bad-clean,
332 * so we must check this here again */
333 if (! is_Bad(get_Block_cfgpred(pred, j))) {
334 if (get_nodes_block(phi_pred) == pred) {
336 assert(get_irn_op(phi_pred) == op_Phi); /* Block is empty!! */
338 in[p_preds++] = get_Phi_pred(phi_pred, j);
341 in[p_preds++] = phi_pred;
346 /* The Phi_pred node is replaced now if it is a Phi.
348 Somehow the removed Phi node can be used legally in loops.
349 Therefore we replace the old phi by the new one.
351 Further we have to remove the old Phi node by replacing it
352 by Bad. Else it will remain in the keepalive array of End
353 and cause illegal situations. So if there is no loop, we should
356 if (get_nodes_block(phi_pred) == pred) {
357 /* remove the Phi as it might be kept alive. Further there
358 might be other users. */
359 exchange(phi_pred, phi); /* geht, ist aber doch semantisch falsch! Warum?? */
363 in[p_preds++] = get_Phi_pred(phi, i);
369 /* By removal of Bad ins the Phi might be degenerated. */
370 exchange(phi, in[0]);
372 set_irn_in(phi, p_preds, in);
374 phi = get_irn_link(phi);
377 /*- This happens only if merge between loop backedge and single loop entry.
378 See special case above. -*/
379 for (k = 0, n = get_Block_n_cfgpreds(b); k < n; ++k) {
380 pred = get_nodes_block(get_Block_cfgpred(b, k));
382 if (get_Block_block_visited(pred) + 1 < get_irg_block_visited(current_ir_graph)) {
383 /* we found a predecessor block at position k that will be removed */
384 for (phi = get_irn_link(pred); phi;) {
386 * the previous phase may already changed the phi, and even
387 * removed it at all, so check here if this node is still a phi
389 if (get_irn_op(phi) == op_Phi) {
392 /* move this phi from the predecessor into the block b */
393 set_nodes_block(phi, b);
395 /* first, copy all 0..k-1 predecessors */
396 for (i = 0; i < k; i++) {
397 pred = get_nodes_block(get_Block_cfgpred(b, i));
399 if (is_Bad(get_Block_cfgpred(b, i))) {
401 } else if (get_Block_block_visited(pred) + 1
402 < get_irg_block_visited(current_ir_graph)) {
403 /* It's an empty block and not yet visited. */
404 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
405 /* @@@ Hier brauche ich Schleifeninformation!!! Kontrollflusskante
406 muss Rueckwaertskante sein! (An allen vier in[q_preds] = phi
407 Anweisungen.) Trotzdem tuts bisher!! */
408 if (! is_Bad(get_Block_cfgpred(pred, j)))
416 /* now we are at k, copy the phi predecessors */
417 pred = get_nodes_block(get_Block_cfgpred(b, k));
418 for (i = 0; i < get_Phi_n_preds(phi); i++) {
419 if (! is_Bad(get_Block_cfgpred(pred, i)))
420 in[q_preds++] = get_Phi_pred(phi, i);
423 /* and now all the rest */
424 for (i = k+1; i < get_Block_n_cfgpreds(b); i++) {
425 pred = get_nodes_block(get_Block_cfgpred(b, i));
427 if (is_Bad(get_Block_cfgpred(b, i))) {
429 } else if (get_Block_block_visited(pred) +1
430 < get_irg_block_visited(current_ir_graph)) {
431 /* It's an empty block and not yet visited. */
432 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
433 if (! is_Bad(get_Block_cfgpred(pred, j)))
443 exchange(phi, in[0]);
445 set_irn_in(phi, q_preds, in);
447 // assert(p_preds == q_preds && "Wrong Phi Fix");
449 phi = get_irn_link(phi);
454 /*- Fix the block -*/
456 for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
457 pred = get_nodes_block(get_Block_cfgpred(b, i));
459 if (is_Bad(get_Block_cfgpred(b, i))) {
460 /* case 1: Do nothing */
461 } else if (get_Block_block_visited(pred) +1
462 < get_irg_block_visited(current_ir_graph)) {
463 /* case 2: It's an empty block and not yet visited. */
464 assert(get_Block_n_cfgpreds(b) > 1);
465 /* Else it should be optimized by equivalent_node. */
466 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
467 ir_node *pred_block = get_Block_cfgpred(pred, j);
469 /* because of breaking loops, not all predecessors are Bad-clean,
470 * so we must check this here again */
471 if (! is_Bad(pred_block))
472 in[n_preds++] = pred_block;
474 /* Remove block as it might be kept alive. */
475 exchange(pred, b/*new_Bad()*/);
478 in[n_preds++] = get_Block_cfgpred(b, i);
481 set_irn_in(b, n_preds, in);
483 assert(get_irn_link(b) == NULL || (n_preds == p_preds && "Wrong Phi Fix"));
489 /* Optimizations of the control flow that also require changes of Phi nodes.
491 * This optimization performs two passes over the graph.
493 * The first pass collects all Phi nodes in a link list in the block
494 * nodes. Further it performs simple control flow optimizations.
495 * Finally it marks all blocks that do not contain useful
496 * computations, i.e., these blocks might be removed.
498 * The second pass performs the optimizations intended by this algorithm.
499 * It walks only over block nodes and adapts these and the Phi nodes in these blocks,
500 * which it finds in a linked list computed by the first pass.
502 * We use the block_visited flag to mark empty blocks in the first
504 * @@@ It would be better to add a struct in the link field
505 * that keeps the Phi list and the mark. Place it on an obstack, as
506 * we will lose blocks and thereby generate mem leaks.
508 void optimize_cf(ir_graph *irg) {
511 ir_node *end = get_irg_end(irg);
512 ir_graph *rem = current_ir_graph;
513 irg_dom_state dom_state = get_irg_dom_state(current_ir_graph);
514 current_ir_graph = irg;
516 /* Handle graph state */
517 assert(get_irg_phase_state(irg) != phase_building);
518 if (get_irg_outs_state(current_ir_graph) == outs_consistent)
519 set_irg_outs_inconsistent(current_ir_graph);
520 if (get_irg_dom_state(current_ir_graph) == dom_consistent)
521 set_irg_dom_inconsistent(current_ir_graph);
523 if (dom_state == dom_consistent) {
524 /* we have dominace info, we can kill dead block */
525 irg_block_walk(get_irg_end_block(irg), NULL, remove_dead_blocks, NULL);
528 /* Use block visited flag to mark non-empty blocks. */
529 inc_irg_block_visited(irg);
530 irg_walk(end, merge_blocks, collect_nodes, NULL);
532 /* Optimize the standard code. */
533 irg_block_walk(get_irg_end_block(irg), optimize_blocks, NULL, NULL);
535 /* Walk all keep alives, optimize them if block, add to new in-array
536 for end if useful. */
537 in = NEW_ARR_F (ir_node *, 1);
538 in[0] = get_nodes_block(end);
539 inc_irg_visited(current_ir_graph);
541 for(i = 0; i < get_End_n_keepalives(end); i++) {
542 ir_node *ka = get_End_keepalive(end, i);
544 if (irn_not_visited(ka)) {
545 if ((get_irn_op(ka) == op_Block) && Block_not_block_visited(ka)) {
546 set_irg_block_visited(current_ir_graph, /* Don't walk all the way to Start. */
547 get_irg_block_visited(current_ir_graph)-1);
548 irg_block_walk(ka, optimize_blocks, NULL, NULL);
549 mark_irn_visited(ka);
550 ARR_APP1 (ir_node *, in, ka);
551 } else if (get_irn_op(ka) == op_Phi) {
552 mark_irn_visited(ka);
553 ARR_APP1 (ir_node *, in, ka);
557 /* DEL_ARR_F(end->in); GL @@@ tut nicht ! */
560 /* after optimize_cf(), only Bad data flow may remain. */
561 if (irg_vrfy_bads(irg, BAD_DF | BAD_BLOCK | TUPLE)) {
562 dump_ir_block_graph(irg, "-vrfy-cf");
563 dump_ir_graph(irg, "-vrfy-cf");
564 fprintf(stderr, "VRFY_BAD in optimize_cf()\n");
567 current_ir_graph = rem;