3 * File name: ir/opt/cfopt.c
4 * Purpose: control flow optimizations
8 * Copyright: (c) 1998-2004 Universität Karlsruhe
9 * Licence: This file protected by GPL - GNU GENERAL PUBLIC LICENSE.
19 #include "irgraph_t.h"
32 #include "irbackedge_t.h"
39 /*------------------------------------------------------------------*/
40 /* Control flow optimization. */
42 /* Removes Bad control flow predecessors and empty blocks. A block */
43 /* is empty if it contains only a Jmp node. */
44 /* Blocks can only be removed if they are not needed for the */
45 /* semantics of Phi nodes. */
46 /*------------------------------------------------------------------*/
49 * Removes Tuples from Block control flow predecessors.
50 * Optimizes blocks with equivalent_node(). This is tricky,
51 * as we want to avoid nodes that have as block predecessor Bads.
52 * Therefore we also optimize at control flow operations, depending
53 * how we first reach the Block.
55 static void merge_blocks(ir_node *n, void *env) {
59 set_irn_link(n, NULL);
61 if (get_irn_op(n) == op_Block) {
63 for (i = 0; i < get_Block_n_cfgpreds(n); i++) {
64 /* GL @@@ : is this possible? if (get_opt_normalize()) -- added, all tests go through.
65 A different order of optimizations might cause problems. */
66 if (get_opt_normalize())
67 set_Block_cfgpred(n, i, skip_Tuple(get_Block_cfgpred(n, i)));
69 new_block = equivalent_node(n);
71 exchange (n, new_block);
73 } else if (get_opt_optimize() && (get_irn_mode(n) == mode_X)) {
74 /* We will soon visit a block. Optimize it before visiting! */
75 ir_node *b = get_nodes_block(n);
78 new_block = equivalent_node(b);
80 while (irn_not_visited(b) && (!is_Bad(new_block)) && (new_block != b)) {
81 /* We would have to run gigo if new is bad, so we
82 promote it directly below. Nevertheless, we somtimes reach a block
83 the first time through a dataflow node. In this case we optimized the
84 block as such and have to promote the Bad here. */
85 assert(((b == new_block) ||
86 get_opt_control_flow_straightening() ||
87 get_opt_control_flow_weak_simplification()) &&
88 ("strange flag setting"));
89 exchange (b, new_block);
91 new_block = equivalent_node(b);
97 * BEWARE: do not kill floating notes here as they might be needed in
98 * valid blocks because of global CSE.
100 if (is_Bad(b) && get_opt_normalize() &&
101 get_op_pinned(get_irn_op(n)) == op_pin_state_pinned)
102 exchange(n, new_Bad());
107 * Collects all Phi nodes in link list of Block.
108 * Marks all blocks "block_visited" if they contain a node other
110 * Replaces n by Bad if n is unreachable control flow. We do that
111 * in the post walker, so we catch all blocks.
113 static void collect_nodes(ir_node *n, void *env) {
114 irg_dom_state *dom_state = env;
116 if (is_no_Block(n)) {
117 ir_node *b = get_nodes_block(n);
120 * BEWARE: do not kill floating notes here as they might be needed in
121 * valid blocks because of global CSE.
124 get_op_pinned(get_irn_op(n)) == op_pin_state_pinned) {
125 /* previous merge_blocks() may have killed dead blocks */
126 exchange(n, new_Bad());
128 else if ((get_irn_op(n) == op_Phi)) {
129 /* Collect Phi nodes to compact ins along with block's ins. */
130 set_irn_link(n, get_irn_link(b));
132 } else if ((get_irn_op(n) != op_Jmp) && !is_Bad(b)) { /* Check for non empty block. */
133 mark_Block_block_visited(b);
137 /* delete dead blocks: if we have dominator information, this can easily be detected
138 * BEWARE: don't kill the end block */
139 if (*dom_state == dom_consistent &&
140 n != get_irg_end_block(current_ir_graph) &&
141 get_Block_dom_depth(n) == -1 &&
142 get_opt_unreachable_code()) {
143 exchange (n, new_Bad());
148 /** Returns true if pred is predecessor of block. */
149 static int is_pred_of(ir_node *pred, ir_node *b) {
151 for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
152 ir_node *b_pred = get_nodes_block(get_Block_cfgpred(b, i));
153 if (b_pred == pred) return 1;
159 /** Test wether we can optimize away pred block pos of b.
161 * @param b A block node.
162 * @param pos The position of the predecessor block to judge about.
164 * @returns The number of predecessors
166 * The test is rather tricky.
168 * The situation is something like the following:
176 * b merges the control flow of an if-then-else. We may not remove
177 * the 'then' _and_ the 'else' block of an 'if' if there is a Phi
178 * node in b, even if both are empty. The destruction of this Phi
179 * requires that a copy is added before the merge. We have to
180 * keep one of the case blocks to place the copies in.
182 * To perform the test for pos, we must regard preds before pos
183 * as already removed.
185 static int test_whether_dispensable(ir_node *b, int pos) {
186 int i, j, n_preds = 1;
188 ir_node *cfop = get_Block_cfgpred(b, pos);
189 ir_node *pred = get_nodes_block(cfop);
191 if (get_Block_block_visited(pred) + 1
192 < get_irg_block_visited(current_ir_graph)) {
194 if (!get_opt_optimize() || !get_opt_control_flow_strong_simplification()) {
195 /* Mark block so that is will not be removed: optimization is turned off. */
196 set_Block_block_visited(pred, get_irg_block_visited(current_ir_graph)-1);
200 /* Seems to be empty. At least we detected this in collect_nodes. */
201 if (!get_irn_link(b)) {
202 /* There are no Phi nodes ==> all predecessors are dispensable. */
203 n_preds = get_Block_n_cfgpreds(pred);
205 /* b's pred blocks and pred's pred blocks must be pairwise disjunct.
206 Work preds < pos as if they were already removed. */
207 for (i = 0; i < pos; i++) {
208 ir_node *b_pred = get_nodes_block(get_Block_cfgpred(b, i));
209 if (get_Block_block_visited(b_pred) + 1
210 < get_irg_block_visited(current_ir_graph)) {
211 for (j = 0; j < get_Block_n_cfgpreds(b_pred); j++) {
212 ir_node *b_pred_pred = get_nodes_block(get_Block_cfgpred(b_pred, j));
213 if (is_pred_of(b_pred_pred, pred)) dispensable = 0;
216 if (is_pred_of(b_pred, pred)) dispensable = 0;
219 for (i = pos +1; i < get_Block_n_cfgpreds(b); i++) {
220 ir_node *b_pred = get_nodes_block(get_Block_cfgpred(b, i));
221 if (is_pred_of(b_pred, pred)) dispensable = 0;
224 set_Block_block_visited(pred, get_irg_block_visited(current_ir_graph)-1);
227 n_preds = get_Block_n_cfgpreds(pred);
236 /* This method removed Bad cf preds from Blocks and Phis, and removes
237 * empty blocks. A block is empty if it only contains Phi and Jmp nodes.
239 * We first adapt Phi nodes, then Block nodes, as we need the old ins
240 * of the Block to adapt the Phi nodes. We do this by computing new
241 * in arrays, and then replacing the old ones. So far we compute new in arrays
242 * for all nodes, not regarding whether there is a possibility for optimization.
244 * For each predecessor p of a Block b there are three cases:
245 * 1. The predecessor p is a Bad node: just skip it. The in array of b shrinks by one.
246 * 2. The predecessor p is empty. Remove p. All predecessors of p are now
248 * 3. The predecessor p is a block containing useful code. Just keep p as is.
250 * For Phi nodes f we have to check the conditions at the Block of f.
251 * For cases 1 and 3 we proceed as for Blocks. For case 2 we can have two
253 * 2a: The old precessor of the Phi f is a Phi pred_f IN THE BLOCK REMOVED. In this
254 * case we proceed as for blocks. We remove pred_f. All
255 * predecessors of pred_f now are predecessors of f.
256 * 2b: The old predecessor of f is NOT in the block removed. It might be a Phi, too.
257 * We have to replicate f for each predecessor of the removed block. Or, with
258 * other words, the removed predecessor block has exactly one predecessor.
260 * Further there is a special case for self referencing blocks:
262 * then_b else_b then_b else_b
268 * | | | === optimized to ===> \ | | |
274 * If there is a Phi in pred_b, but we remove pred_b, we have to generate a
275 * Phi in loop_b, that has the ins of the Phi in pred_b and a self referencing
277 * @@@ It is negotiable whether we should do this ... there might end up a copy
278 * from the Phi in the loop when removing the Phis.
280 static void optimize_blocks(ir_node *b, void *env) {
281 int i, j, k, max_preds, n_preds;
285 /* Count the number of predecessor if this block is merged with pred blocks
288 for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
289 max_preds += test_whether_dispensable(b, i);
291 in = (ir_node **) malloc(max_preds * sizeof(ir_node *));
294 printf(" working on "); DDMN(b);
295 for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
296 pred = get_nodes_block(get_Block_cfgpred(b, i));
297 if (is_Bad(get_Block_cfgpred(b, i))) {
298 printf(" removing Bad %i\n ", i);
299 } else if (get_Block_block_visited(pred) +1
300 < get_irg_block_visited(current_ir_graph)) {
301 printf(" removing pred %i ", i); DDMN(pred);
302 } else { printf(" Nothing to do for "); DDMN(pred); }
304 * end Debug output -*/
306 /*- Fix the Phi nodes -*/
307 phi = get_irn_link(b);
309 assert(get_irn_op(phi) == op_Phi);
310 /* Find the new predecessors for the Phi */
312 for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
313 pred = get_nodes_block(get_Block_cfgpred(b, i));
314 if (is_Bad(get_Block_cfgpred(b, i))) {
315 /* case Phi 1: Do nothing */
316 } else if (get_Block_block_visited(pred) + 1
317 < get_irg_block_visited(current_ir_graph)) {
318 /* case Phi 2: It's an empty block and not yet visited. */
319 ir_node *phi_pred = get_Phi_pred(phi, i);
321 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
322 if (get_nodes_block(phi_pred) == pred) {
324 assert(get_irn_op(phi_pred) == op_Phi); /* Block is empty!! */
325 in[n_preds] = get_Phi_pred(phi_pred, j);
328 in[n_preds] = phi_pred;
333 /* The Phi_pred node is replaced now if it is a Phi.
335 Somehow the removed Phi node can be used legally in loops.
336 Therefore we replace the old phi by the new one.
338 Further we have to remove the old Phi node by replacing it
339 by Bad. Else it will remain in the keepalive array of End
340 and cause illegal situations. So if there is no loop, we should
343 if (get_nodes_block(phi_pred) == pred) {
344 /* remove the Phi as it might be kept alive. Further there
345 might be other users. */
346 exchange(phi_pred, phi); /* geht, ist aber doch semantisch falsch! Warum?? */
351 in[n_preds] = get_Phi_pred(phi, i);
358 /* By removal of Bad ins the Phi might be degenerated. */
359 exchange(phi, in[0]);
361 set_irn_in(phi, n_preds, in);
363 phi = get_irn_link(phi);
366 /*- This happens only if merge between loop backedge and single loop entry.
367 See special case above. -*/
368 for (k = 0; k < get_Block_n_cfgpreds(b); k++) {
369 pred = get_nodes_block(get_Block_cfgpred(b, k));
370 if (get_Block_block_visited(pred)+1 < get_irg_block_visited(current_ir_graph)) {
371 phi = get_irn_link(pred);
373 if (get_irn_op(phi) == op_Phi) {
374 set_nodes_block(phi, b);
377 for (i = 0; i < k; i++) {
378 pred = get_nodes_block(get_Block_cfgpred(b, i));
379 if (is_Bad(get_Block_cfgpred(b, i))) {
381 } else if (get_Block_block_visited(pred) +1
382 < get_irg_block_visited(current_ir_graph)) {
383 /* It's an empty block and not yet visited. */
384 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
385 /* @@@ Hier brauche ich Schleifeninformation!!! Kontrollflusskante
386 muss Rueckwaertskante sein! (An allen vier in[n_preds] = phi
387 Anweisungen.) Trotzdem tuts bisher!! */
396 for (i = 0; i < get_Phi_n_preds(phi); i++) {
397 in[n_preds] = get_Phi_pred(phi, i);
400 for (i = k+1; i < get_Block_n_cfgpreds(b); i++) {
401 pred = get_nodes_block(get_Block_cfgpred(b, i));
402 if (is_Bad(get_Block_cfgpred(b, i))) {
404 } else if (get_Block_block_visited(pred) +1
405 < get_irg_block_visited(current_ir_graph)) {
406 /* It's an empty block and not yet visited. */
407 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
417 exchange(phi, in[0]);
419 set_irn_in(phi, n_preds, in);
421 phi = get_irn_link(phi);
426 /*- Fix the block -*/
428 for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
429 pred = get_nodes_block(get_Block_cfgpred(b, i));
430 if (is_Bad(get_Block_cfgpred(b, i))) {
431 /* case 1: Do nothing */
432 } else if (get_Block_block_visited(pred) +1
433 < get_irg_block_visited(current_ir_graph)) {
434 /* case 2: It's an empty block and not yet visited. */
435 assert(get_Block_n_cfgpreds(b) > 1);
436 /* Else it should be optimized by equivalent_node. */
437 for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
438 in[n_preds] = get_Block_cfgpred(pred, j);
441 /* Remove block as it might be kept alive. */
442 exchange(pred, b/*new_Bad()*/);
445 in[n_preds] = get_Block_cfgpred(b, i);
449 set_irn_in(b, n_preds, in);
455 /* Optimizations of the control flow that also reqire changes of Phi nodes.
457 * This optimization performs two passes over the graph.
459 * The first pass collects all Phi nodes in a link list in the block
460 * nodes. Further it performs simple control flow optimizations.
461 * Finally it marks all blocks that do not contain useful
462 * computations, i.e., these blocks might be removed.
464 * The second pass performs the optimizations intended by this algorithm.
465 * It walks only over block nodes and adapts these and the Phi nodes in these blocks,
466 * which it finds in a linked list computed by the first pass.
468 * We use the block_visited flag to mark empty blocks in the first
470 * @@@ It would be better to add a struct in the link field
471 * that keeps the Phi list and the mark. Place it on an obstack, as
472 * we will lose blocks and thereby generate mem leaks.
474 void optimize_cf(ir_graph *irg) {
477 ir_node *end = get_irg_end(irg);
478 ir_graph *rem = current_ir_graph;
479 irg_dom_state dom_state = get_irg_dom_state(current_ir_graph);
480 current_ir_graph = irg;
482 /* Handle graph state */
483 assert(get_irg_phase_state(irg) != phase_building);
484 if (get_irg_outs_state(current_ir_graph) == outs_consistent)
485 set_irg_outs_inconsistent(current_ir_graph);
486 if (get_irg_dom_state(current_ir_graph) == dom_consistent)
487 set_irg_dom_inconsistent(current_ir_graph);
489 /* Use block visited flag to mark non-empty blocks. */
490 inc_irg_block_visited(irg);
491 irg_walk(end, merge_blocks, collect_nodes, &dom_state);
493 /* Optimize the standard code. */
494 irg_block_walk(get_irg_end_block(irg), optimize_blocks, NULL, NULL);
496 /* Walk all keep alives, optimize them if block, add to new in-array
497 for end if useful. */
498 in = NEW_ARR_F (ir_node *, 1);
499 in[0] = get_nodes_block(end);
500 inc_irg_visited(current_ir_graph);
501 for(i = 0; i < get_End_n_keepalives(end); i++) {
502 ir_node *ka = get_End_keepalive(end, i);
503 if (irn_not_visited(ka)) {
504 if ((get_irn_op(ka) == op_Block) && Block_not_block_visited(ka)) {
505 set_irg_block_visited(current_ir_graph, /* Don't walk all the way to Start. */
506 get_irg_block_visited(current_ir_graph)-1);
507 irg_block_walk(ka, optimize_blocks, NULL, NULL);
508 mark_irn_visited(ka);
509 ARR_APP1 (ir_node *, in, ka);
510 } else if (get_irn_op(ka) == op_Phi) {
511 mark_irn_visited(ka);
512 ARR_APP1 (ir_node *, in, ka);
516 /* DEL_ARR_F(end->in); GL @@@ tut nicht ! */
519 /* after optimize_cf(), only Bad data flow may remain. */
520 if (irg_vrfy_bads(irg, BAD_DF | BAD_BLOCK | TUPLE)) {
521 dump_ir_block_graph(irg, "-vrfy-cf");
522 dump_ir_graph(irg, "-vrfy-cf");
523 fprintf(stderr, "VRFY_BAD in optimize_cf()\n");
526 current_ir_graph = rem;