Control flow optimization: Merge consecutive blocks.
[libfirm] / ir / opt / cfopt.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   Control flow optimizations.
23  * @author  Goetz Lindenmaier, Michael Beck, Sebastian Hack
24  *
25  * Removes Bad control flow predecessors and empty blocks.  A block is empty
26  * if it contains only a Jmp node. Blocks can only be removed if they are not
27  * needed for the semantics of Phi nodes. Further, we NEVER remove labeled
28  * blocks (even if we could move the label).
29  */
30 #include "config.h"
31
32 #include "iroptimize.h"
33
34 #include <assert.h>
35 #include <stdbool.h>
36
37 #include "xmalloc.h"
38 #include "irnode_t.h"
39 #include "irgraph_t.h"
40 #include "irprog_t.h"
41
42 #include "ircons.h"
43 #include "iropt_t.h"
44 #include "irgwalk.h"
45 #include "irgmod.h"
46 #include "irdump.h"
47 #include "irverify.h"
48 #include "iredges.h"
49 #include "opt_manage.h"
50
51 #include "array_t.h"
52
53 #include "irouts.h"
54 #include "irbackedge_t.h"
55
56 #include "irflag_t.h"
57 #include "firmstat.h"
58 #include "irpass.h"
59 #include "irnodehashmap.h"
60 #include "irtools.h"
61
62 #include "iropt_dbg.h"
63
64 /** An environment for merge_blocks and collect nodes. */
65 typedef struct merge_env {
66         bool      changed;      /**< Set if the graph was changed. */
67         bool      phis_moved;   /**< Set if Phi nodes were moved. */
68 } merge_env;
69
70 /** set or reset the removable property of a block. */
71 static void set_Block_removable(ir_node *block, bool removable)
72 {
73         set_Block_mark(block, removable);
74 }
75
76 /** check if a block has the removable property set. */
77 static bool is_Block_removable(const ir_node *block)
78 {
79         return get_Block_mark(block);
80 }
81
82 /** checks if a given Cond node is a switch Cond. */
83 static bool is_switch_Cond(const ir_node *cond)
84 {
85         ir_node *sel = get_Cond_selector(cond);
86         return get_irn_mode(sel) != mode_b;
87 }
88
89 /** Walker: clear link fields and mark all blocks as removable. */
90 static void clear_link_and_mark_blocks_removable(ir_node *node, void *ctx)
91 {
92         (void) ctx;
93         set_irn_link(node, NULL);
94         if (is_Block(node)) {
95                 set_Block_removable(node, true);
96                 set_Block_phis(node, NULL);
97         } else if (is_Phi(node)) {
98                 set_Phi_next(node, NULL);
99         }
100 }
101
102 /**
103  * Collects all Phi nodes in link list of Block.
104  * Marks all blocks "non_removable" if they contain a node other
105  * than Jmp (and Proj).
106  * Links all Proj nodes to their predecessors.
107  * Collects all switch-Conds in a list.
108  */
109 static void collect_nodes(ir_node *n, void *ctx)
110 {
111         (void) ctx;
112         if (is_Phi(n)) {
113                 /* Collect Phi nodes to compact ins along with block's ins. */
114                 ir_node *block = get_nodes_block(n);
115                 add_Block_phi(block, n);
116         } else if (is_Block(n)) {
117                 if (get_Block_entity(n) != NULL) {
118                         /* block with a jump label attached cannot be removed. */
119                         set_Block_removable(n, false);
120                 }
121         } else if (is_Bad(n) || is_Jmp(n)) {
122                 /* ignore these */
123                 return;
124         } else {
125                 /* Check for non-empty block. */
126                 ir_node *block = get_nodes_block(n);
127
128                 set_Block_removable(block, false);
129
130                 if (is_Proj(n)) {
131                         /* link Proj nodes */
132                         ir_node *pred = get_Proj_pred(n);
133                         set_irn_link(n, get_irn_link(pred));
134                         set_irn_link(pred, n);
135                 }
136         }
137 }
138
139 /** Returns true if pred is predecessor of block b. */
140 static bool is_pred_of(const ir_node *pred, const ir_node *b)
141 {
142         int i;
143
144         for (i = get_Block_n_cfgpreds(b) - 1; i >= 0; --i) {
145                 ir_node *b_pred = get_Block_cfgpred_block(b, i);
146                 if (b_pred == pred)
147                         return true;
148         }
149         return false;
150 }
151
152 /** Test whether we can optimize away pred block pos of b.
153  *
154  *  @param  b    A block node.
155  *  @param  pos  The position of the predecessor block to judge about.
156  *
157  *  @returns     The number of predecessors
158  *
159  *  The test is rather tricky.
160  *
161  *  The situation is something like the following:
162  *  @verbatim
163  *                 if-block
164  *                  /   \
165  *              then-b  else-b
166  *                  \   /
167  *                    b
168  *  @endverbatim
169  *
170  *  b merges the control flow of an if-then-else.  We may not remove
171  *  the 'then' _and_ the 'else' block of an 'if' if there is a Phi
172  *  node in b, even if both are empty.  The destruction of this Phi
173  *  requires that a copy is added before the merge.  We have to
174  *  keep one of the case blocks to place the copies in.
175  *
176  *  To perform the test for pos, we must regard predecessors before pos
177  *  as already removed.
178  **/
179 static unsigned test_whether_dispensable(const ir_node *b, int pos)
180 {
181         ir_node *pred  = get_Block_cfgpred(b, pos);
182         ir_node *predb = get_nodes_block(pred);
183
184         if (is_Bad(pred) || !is_Block_removable(predb))
185                 return 1;
186
187         /* can't remove self-loops */
188         if (predb == b)
189                 goto non_dispensable;
190         if (is_unknown_jump(pred))
191                 goto non_dispensable;
192
193         /* Seems to be empty. At least we detected this in collect_nodes. */
194         if (get_Block_phis(b) != NULL) {
195                 int n_cfgpreds = get_Block_n_cfgpreds(b);
196                 int i;
197                 /* there are Phi nodes */
198
199                 /* b's pred blocks and pred's pred blocks must be pairwise disjunct.
200                  * Handle all pred blocks with preds < pos as if they were already
201                  * removed. */
202                 for (i = 0; i < pos; i++) {
203                         ir_node *other_pred  = get_Block_cfgpred(b, i);
204                         ir_node *other_predb = get_nodes_block(other_pred);
205                         if (is_Bad(other_pred))
206                                 continue;
207                         if (is_Block_removable(other_predb)
208                             && !Block_block_visited(other_predb)) {
209                                 int j;
210                                 for (j = get_Block_n_cfgpreds(other_predb) - 1; j >= 0; --j) {
211                                         ir_node *other_predpred
212                                                 = get_Block_cfgpred_block(other_predb, j);
213                                         if (is_pred_of(other_predpred, predb))
214                                                 goto non_dispensable;
215                                 }
216                         } else if (is_pred_of(other_predb, predb)) {
217                                 goto non_dispensable;
218                         }
219                 }
220                 for (i = pos+1; i < n_cfgpreds; i++) {
221                         ir_node *other_predb = get_Block_cfgpred_block(b, i);
222                         if (is_pred_of(other_predb, predb))
223                                 goto non_dispensable;
224                 }
225         }
226         /* we will not dispense already visited blocks */
227         if (Block_block_visited(predb))
228                 return 1;
229         /* if we get here, the block is dispensable, count useful preds */
230         return get_irn_arity(predb);
231
232 non_dispensable:
233         set_Block_removable(predb, false);
234         return 1;
235 }
236
237 /**
238  * This method merges blocks. A block is applicable to be merged, if it
239  * has only one predecessor with an unconditional jump to this block;
240  * and if this block does not contain any phis.
241  */
242 static void merge_blocks(ir_node *b)
243 {
244         if (get_Block_n_cfgpreds(b) == 1) {
245                 ir_node* pred = get_Block_cfgpred(b, 0);
246                 if (is_Jmp(pred)) {
247                         ir_node* pred_block = get_nodes_block(pred);
248                         if (get_Block_phis(b) == NULL) {
249                                 exchange(b, pred_block);
250                         }
251                 }
252         }
253 }
254
255 /**
256  * This method removes empty blocks.  A block is empty if it only contains Phi
257  * and Jmp nodes.
258  *
259  * We first adapt Phi nodes, then Block nodes, as we need the old ins
260  * of the Block to adapt the Phi nodes.  We do this by computing new
261  * in arrays, and then replacing the old ones.  So far we compute new in arrays
262  * for all nodes, not regarding whether there is a possibility for optimization.
263  *
264  * For each predecessor p of a Block b there are three cases:
265  *  - The predecessor p is a Bad node: just skip it. The in array of b shrinks
266  *    by one.
267  *  - The predecessor p is empty. Remove p. All predecessors of p are now
268  *    predecessors of b.
269  *  - The predecessor p is a block containing useful code. Just keep p as is.
270  *
271  * For Phi nodes f we have to check the conditions at the Block of f.
272  * For cases 1 and 3 we proceed as for Blocks.  For case 2 we can have two
273  * cases:
274  *  -2a: The old predecessor of the Phi f is a Phi pred_f IN THE BLOCK REMOVED.
275  *       In this case we proceed as for blocks. We remove pred_f.  All
276  *       predecessors of pred_f now are predecessors of f.
277  *  -2b: The old predecessor of f is NOT in the block removed. It might be a Phi
278  *       too. We have to replicate f for each predecessor of the removed block.
279  *       Or, with other words, the removed predecessor block has exactly one
280  *       predecessor.
281  *
282  * Further there is a special case for self referencing blocks:
283  * @verbatim
284  *
285  *    then_b     else_b                              then_b  else_b
286  *       \      /                                      \      /
287  *        \    /                                        |    /
288  *        pred_b                                        |   /
289  *         |   ____                                     |  /  ____
290  *         |  |    |                                    |  | |    |
291  *         |  |    |       === optimized to ===>        \  | |    |
292  *        loop_b   |                                     loop_b   |
293  *         |  |    |                                      |  |    |
294  *         |  |____|                                      |  |____|
295  *         |                                              |
296  * @endverbatim
297  *
298  * If there is a Phi in pred_b, but we remove pred_b, we have to generate a
299  * Phi in loop_b, that has the ins of the Phi in pred_b and a self referencing
300  * backedge.
301  */
302 static void optimize_blocks(ir_node *b, void *ctx)
303 {
304         int i, j, k, n, max_preds, n_preds, p_preds = -1;
305         ir_node *phi;
306         ir_node *next;
307         ir_node **in;
308         merge_env *env = (merge_env*)ctx;
309
310         if (get_Block_dom_depth(b) < 0) {
311                 /* ignore unreachable blocks */
312                 return;
313         }
314
315         /* Count the number of predecessor if this block is merged with pred blocks
316            that are empty. */
317         max_preds = 0;
318         for (i = 0, k = get_Block_n_cfgpreds(b); i < k; ++i) {
319                 max_preds += test_whether_dispensable(b, i);
320         }
321         in = XMALLOCN(ir_node*, max_preds);
322
323         /*- Fix the Phi nodes of the current block -*/
324         for (phi = get_Block_phis(b); phi != NULL; phi = next) {
325                 next = get_Phi_next(phi);
326
327                 /* Find the new predecessors for the Phi */
328                 p_preds = 0;
329                 for (i = 0, n = get_Block_n_cfgpreds(b); i < n; ++i) {
330                         ir_graph *irg = get_irn_irg(b);
331                         ir_node *predx = get_Block_cfgpred(b, i);
332                         ir_node *pred;
333
334                         /* case Phi 1: maintain Bads, as somebody else is responsible to
335                          * remove them */
336                         if (is_Bad(predx)) {
337                                 in[p_preds++] = new_r_Bad(irg, get_irn_mode(phi));
338                                 continue;
339                         }
340
341                         pred = get_nodes_block(predx);
342
343                         /* case Phi 2: It's an empty block and not yet visited. */
344                         if (is_Block_removable(pred) && !Block_block_visited(pred)) {
345                                 ir_node *phi_pred = get_Phi_pred(phi, i);
346
347                                 for (j = 0, k = get_Block_n_cfgpreds(pred); j < k; j++) {
348                                         ir_node *pred_pred = get_Block_cfgpred(pred, j);
349
350                                         if (is_Bad(pred_pred)) {
351                                                 in[p_preds++] = new_r_Bad(irg, get_irn_mode(phi));
352                                                 continue;
353                                         }
354
355                                         if (get_nodes_block(phi_pred) == pred) {
356                                                 /* case Phi 2a: */
357                                                 assert(is_Phi(phi_pred));  /* Block is empty!! */
358
359                                                 in[p_preds++] = get_Phi_pred(phi_pred, j);
360                                         } else {
361                                                 /* case Phi 2b: */
362                                                 in[p_preds++] = phi_pred;
363                                         }
364                                 }
365                         } else {
366                                 /* case Phi 3: */
367                                 in[p_preds++] = get_Phi_pred(phi, i);
368                         }
369                 }
370                 assert(p_preds == max_preds);
371
372                 /* Fix the node */
373                 if (p_preds == 1) {
374                         exchange(phi, in[0]);
375                 } else {
376                         set_irn_in(phi, p_preds, in);
377                 }
378                 env->changed = true;
379         }
380
381         /*- This happens only if merge between loop backedge and single loop entry.
382             Moreover, it is only needed if predb is the direct dominator of b,
383             else there can be no uses of the Phi's in predb ... -*/
384         for (k = 0, n = get_Block_n_cfgpreds(b); k < n; ++k) {
385                 ir_node *pred  = get_Block_cfgpred(b, k);
386                 ir_node *predb = get_nodes_block(pred);
387                 if (is_Bad(pred))
388                         continue;
389
390                 if (is_Block_removable(predb) && !Block_block_visited(predb)) {
391                         ir_node *next_phi;
392
393                         /* we found a predecessor block at position k that will be removed */
394                         for (phi = get_Block_phis(predb); phi != NULL; phi = next_phi) {
395                                 int q_preds = 0;
396                                 next_phi = get_Phi_next(phi);
397
398                                 if (get_Block_idom(b) != predb) {
399                                         /* predb is not the dominator. There can't be uses of
400                                          * pred's Phi nodes, kill them .*/
401                                         ir_graph *irg  = get_irn_irg(b);
402                                         ir_mode  *mode = get_irn_mode(phi);
403                                         exchange(phi, new_r_Bad(irg, mode));
404                                 } else {
405                                         /* predb is the direct dominator of b. There might be uses
406                                          * of the Phi nodes from predb in further block, so move
407                                          * this phi from the predecessor into the block b */
408                                         set_nodes_block(phi, b);
409                                         set_Phi_next(phi, get_Block_phis(b));
410                                         set_Block_phis(b, phi);
411                                         env->phis_moved = true;
412
413                                         /* first, copy all 0..k-1 predecessors */
414                                         for (i = 0; i < k; i++) {
415                                                 ir_node *predx = get_Block_cfgpred(b, i);
416                                                 ir_node *pred_block;
417
418                                                 if (is_Bad(predx)) {
419                                                         ir_graph *irg  = get_irn_irg(b);
420                                                         ir_mode  *mode = get_irn_mode(phi);
421                                                         in[q_preds++] = new_r_Bad(irg, mode);
422                                                         continue;
423                                                 }
424                                                 pred_block = get_nodes_block(predx);
425                                                 if (is_Block_removable(pred_block)
426                                                            && !Block_block_visited(pred_block)) {
427                                                         int n_cfgpreds = get_Block_n_cfgpreds(pred_block);
428                                                         /* It's an empty block and not yet visited. */
429                                                         for (j = 0; j < n_cfgpreds; j++) {
430                                                                 if (!is_Bad(get_Block_cfgpred(pred_block, j))) {
431                                                                         in[q_preds++] = phi;
432                                                                 } else {
433                                                                         ir_graph *irg  = get_irn_irg(b);
434                                                                         ir_mode  *mode = get_irn_mode(phi);
435                                                                         in[q_preds++] = new_r_Bad(irg, mode);
436                                                                 }
437                                                         }
438                                                 } else {
439                                                         in[q_preds++] = phi;
440                                                 }
441                                         }
442
443                                         /* now we are at k, copy the phi predecessors */
444                                         pred = get_nodes_block(get_Block_cfgpred(b, k));
445                                         for (i = 0; i < get_Phi_n_preds(phi); i++) {
446                                                 in[q_preds++] = get_Phi_pred(phi, i);
447                                         }
448
449                                         /* and now all the rest */
450                                         for (i = k+1; i < get_Block_n_cfgpreds(b); i++) {
451                                                 pred = get_Block_cfgpred_block(b, i);
452
453                                                 if (is_Bad(pred)) {
454                                                         ir_graph *irg  = get_irn_irg(b);
455                                                         ir_mode  *mode = get_irn_mode(phi);
456                                                         in[q_preds++] = new_r_Bad(irg, mode);
457                                                 } else if (is_Block_removable(pred) && !Block_block_visited(pred)) {
458                                                         /* It's an empty block and not yet visited. */
459                                                         for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
460                                                                 if (! is_Bad(get_Block_cfgpred(pred, j))) {
461                                                                         in[q_preds++] = phi;
462                                                                 } else {
463                                                                         ir_graph *irg  = get_irn_irg(b);
464                                                                         ir_mode  *mode = get_irn_mode(phi);
465                                                                         in[q_preds++] = new_r_Bad(irg, mode);
466                                                                 }
467                                                         }
468                                                 } else {
469                                                         in[q_preds++] = phi;
470                                                 }
471                                         }
472
473                                         /* Fix the node */
474                                         if (q_preds == 1)
475                                                 exchange(phi, in[0]);
476                                         else
477                                                 set_irn_in(phi, q_preds, in);
478                                         env->changed = true;
479
480                                         assert(q_preds <= max_preds);
481                                         // assert(p_preds == q_preds && "Wrong Phi Fix");
482                                 }
483                         }
484                 }
485         }
486
487         /*- Fix the block -*/
488         n_preds = 0;
489         for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
490                 ir_node *pred  = get_Block_cfgpred(b, i);
491                 ir_node *predb = get_nodes_block(pred);
492                 ir_graph *irg  = get_irn_irg(pred);
493
494                 /* case 1: Bad predecessor */
495                 if (is_Bad(pred)) {
496                         in[n_preds++] = new_r_Bad(irg, mode_X);
497                         continue;
498                 }
499                 if (is_Block_removable(predb) && !Block_block_visited(predb)) {
500                         /* case 2: It's an empty block and not yet visited. */
501                         for (j = 0; j < get_Block_n_cfgpreds(predb); j++) {
502                                 ir_node *predpred = get_Block_cfgpred(predb, j);
503
504                                 if (is_Bad(predpred)) {
505                                         in[n_preds++] = new_r_Bad(irg, mode_X);
506                                         continue;
507                                 }
508
509                                 in[n_preds++] = predpred;
510                         }
511                         /* Remove block+jump as it might be kept alive. */
512                         exchange(pred, new_r_Bad(get_irn_irg(b), mode_X));
513                         exchange(predb, new_r_Bad(get_irn_irg(b), mode_BB));
514                 } else {
515                         /* case 3: */
516                         in[n_preds++] = pred;
517                 }
518         }
519         assert(n_preds == max_preds);
520
521         set_irn_in(b, n_preds, in);
522         env->changed = true;
523
524         /* see if phi-fix was correct */
525         assert(get_Block_phis(b) == NULL || p_preds == -1 || (n_preds == p_preds));
526         xfree(in);
527 }
528
529 /**
530  * Optimize boolean Conds, where true and false jump to the same block into a Jmp
531  * Block must contain no Phi nodes.
532  *
533  *        Cond
534  *       /    \
535  *  projA      projB   =>   Jmp     Bad
536  *       \    /                \   /
537  *       block                 block
538  */
539 static bool optimize_pred_cond(ir_node *block, int i, int j)
540 {
541         ir_node *projA, *projB, *cond, *pred_block, *jmp, *bad;
542         assert(i != j);
543
544         projA = get_Block_cfgpred(block, i);
545         if (!is_Proj(projA)) return false;
546         projB = get_Block_cfgpred(block, j);
547         if (!is_Proj(projB)) return false;
548         cond  = get_Proj_pred(projA);
549         if (!is_Cond(cond))  return false;
550
551         if (cond != get_Proj_pred(projB)) return false;
552         if (is_switch_Cond(cond)) return false;
553
554         /* cond should actually be a Jmp */
555         pred_block = get_nodes_block(cond);
556         jmp = new_r_Jmp(pred_block);
557         bad = new_r_Bad(get_irn_irg(block), mode_X);
558
559         assert(projA != projB);
560         exchange(projA, jmp);
561         exchange(projB, bad);
562         return true;
563 }
564
565 typedef enum block_flags_t {
566         BF_HAS_OPERATIONS         = 1 << 0,
567         BF_HAS_PHIS               = 1 << 1,
568         BF_IS_UNKNOWN_JUMP_TARGET = 1 << 2,
569 } block_flags_t;
570
571 static bool get_block_flag(const ir_nodehashmap_t *infos, const ir_node *block,
572                            int flag)
573 {
574         return PTR_TO_INT(ir_nodehashmap_get(infos, block)) & flag;
575 }
576
577 static void set_block_flag(ir_nodehashmap_t *infos, ir_node *block,
578                            block_flags_t flag)
579 {
580         int data = PTR_TO_INT(ir_nodehashmap_get(infos, block));
581         data |= flag;
582         ir_nodehashmap_insert(infos, block, INT_TO_PTR(data));
583 }
584
585 static void clear_block_flag(ir_nodehashmap_t *infos, const ir_node *block)
586 {
587         ir_nodehashmap_remove(infos, block);
588 }
589
590 static bool has_operations(ir_nodehashmap_t *infos, const ir_node *block)
591 {
592         return get_block_flag(infos, block, BF_HAS_OPERATIONS);
593 }
594
595 static void set_has_operations(ir_nodehashmap_t *infos, ir_node *block)
596 {
597         set_block_flag(infos, block, BF_HAS_OPERATIONS);
598 }
599
600 static bool has_phis(ir_nodehashmap_t *infos, const ir_node *block)
601 {
602         return get_block_flag(infos, block, BF_HAS_PHIS);
603 }
604
605 static void set_has_phis(ir_nodehashmap_t *infos, ir_node *block)
606 {
607         set_block_flag(infos, block, BF_HAS_PHIS);
608 }
609
610 static bool is_unknown_jump_target(ir_nodehashmap_t *infos, const ir_node *block)
611 {
612         return get_block_flag(infos, block, BF_IS_UNKNOWN_JUMP_TARGET);
613 }
614
615 static void set_is_unknown_jump_target(ir_nodehashmap_t *infos, ir_node *block)
616 {
617         set_block_flag(infos, block, BF_IS_UNKNOWN_JUMP_TARGET);
618 }
619
620 /**
621  * Pre-Walker: fill block info information.
622  */
623 static void compute_block_info(ir_node *n, void *x)
624 {
625         ir_nodehashmap_t *infos = (ir_nodehashmap_t*)x;
626
627         if (is_Block(n)) {
628                 int i, max = get_Block_n_cfgpreds(n);
629                 for (i=0; i<max; i++) {
630                         ir_node *pred = get_Block_cfgpred(n,i);
631                         if (is_unknown_jump(pred)) {
632                                 set_is_unknown_jump_target(infos, n);
633                         }
634                 }
635         } else if (is_Phi(n)) {
636                 ir_node *block = get_nodes_block(n);
637                 set_has_phis(infos, block);
638         } else if (is_Jmp(n) || is_Cond(n) || is_Proj(n)) {
639                 /* ignore */
640         } else {
641                 ir_node *block = get_nodes_block(n);
642                 set_has_operations(infos, block);
643         }
644 }
645
646 static void clear_block_info(ir_node *block, void *x)
647 {
648         ir_nodehashmap_t *infos = (ir_nodehashmap_t*)x;
649         clear_block_flag(infos, block);
650 }
651
652 typedef struct skip_env {
653         bool             changed;
654         ir_nodehashmap_t block_infos;
655 } skip_env;
656
657 /**
658  * Post-Block-walker: Optimize useless if's (boolean Cond nodes
659  * with same true/false target) away.
660  */
661 static void optimize_ifs(ir_node *block, void *x)
662 {
663         skip_env *env = (skip_env*)x;
664         int i, j;
665         int n_preds = get_Block_n_cfgpreds(block);
666
667         if (has_phis(&env->block_infos, block))
668                 return;
669
670         /* optimize Cond predecessors (might produce Bad predecessors) */
671         for (i = 0; i < n_preds; ++i) {
672                 for (j = i+1; j < n_preds; ++j) {
673                         optimize_pred_cond(block, i, j);
674                 }
675         }
676 }
677
678 /**
679  * Pre-Block walker: remove empty blocks (only contain a Jmp)
680  * that are control flow predecessors of the current block.
681  */
682 static void remove_empty_blocks(ir_node *block, void *x)
683 {
684         skip_env *env = (skip_env*)x;
685         int i;
686         int n_preds = get_Block_n_cfgpreds(block);
687
688         for (i = 0; i < n_preds; ++i) {
689                 ir_node *jmp, *jmp_block;
690                 int n_jpreds = 0;
691
692                 jmp = get_Block_cfgpred(block, i);
693                 if (!is_Jmp(jmp))
694                         continue;
695                 jmp_block = get_nodes_block(jmp);
696                 if (jmp_block == block)
697                         continue; /* this infinite loop cannot be optimized any further */
698                 if (is_unknown_jump_target(&env->block_infos, jmp_block))
699                         continue; /* unknown jump target must not be optimized */
700                 if (has_phis(&env->block_infos,jmp_block))
701                         continue; /* this block contains Phis and is not skipped */
702                 if (Block_block_visited(jmp_block)) {
703                         continue;
704                         /* otherwise we could break the walker,
705                          * if block was reached via
706                          *     KeepAlive edge -> jmp_block -> A ---> block,
707                          * because the walker cannot handle Id nodes.
708                          *
709                          *   A      B
710                          *    \    /
711                          *   jmp_block
712                          *    /    \
713                          * block    End
714                          */
715                 }
716
717                 /* jmp_block is an empty block and can be optimized! */
718
719                 n_jpreds = get_Block_n_cfgpreds(jmp_block);
720                 /**
721                  * If the jmp block has only one predecessor this is straightforward.
722                  * However, if there are more predecessors, we only handle this,
723                  * if block has no Phis.
724                  */
725                 if (n_jpreds == 1) {
726                         ir_node *pred        = get_Block_cfgpred(jmp_block, 0);
727                         ir_node *pred_block  = get_nodes_block(pred);
728                         if (has_operations(&env->block_infos,jmp_block)) {
729                                 if (get_irg_start_block(get_irn_irg(pred_block)) == pred_block)
730                                         continue; /* must not merge operations into start block */
731                                 if (!is_Jmp(pred))
732                                         continue; /* must not create partially dead code, especially when it is mode_M */
733                         }
734
735                         /* skip jmp block by rerouting its predecessor to block
736                          *
737                          *     A              A
738                          *     |              |
739                          *  jmp_block   =>    |
740                          *     |              |
741                          *   block          block
742                          */
743                         exchange(jmp, pred);
744
745                         /* cleanup: jmp_block might have a Keep edge! */
746                         exchange(jmp_block, pred_block);
747                         env->changed = true;
748                 } else if ( !has_phis(&env->block_infos, block) &&
749                             !has_operations(&env->block_infos,jmp_block))
750                 {
751                         /* all predecessors can skip the jmp block, so block gets some new
752                          * predecessors
753                          *
754                          *  A     B                 A  B
755                          *   \   /                  |  |
756                          * jmp_block  C  =>  Bad  C |  |
757                          *      \    /          \ | | /
758                          *      block            block
759                          */
760                         ir_node **ins = ALLOCAN(ir_node*, n_preds+n_jpreds);
761                         int j;
762                         /* first copy the old predecessors, because the outer loop (i)
763                          * still walks over them */
764                         for (j = 0; j < n_preds; ++j) {
765                                 ins[j] = get_Block_cfgpred(block, j);
766                         }
767                         /* now append the new predecessors */
768                         for (j = 0; j < n_jpreds; ++j) {
769                                 ir_node *pred = get_Block_cfgpred(jmp_block, j);
770                                 ins[n_preds+j] = pred;
771                         }
772                         set_irn_in(block, n_preds+n_jpreds, ins);
773                         /* convert the jmp_block to Bad */
774                         ir_graph *irg = get_irn_irg(block);
775                         exchange(jmp_block, new_r_Bad(irg, mode_BB));
776                         exchange(jmp, new_r_Bad(irg, mode_X));
777                         /* let the outer loop walk over the new predecessors as well */
778                         n_preds += n_jpreds;
779                         env->changed = true;
780                         // TODO What if jmp_block had a KeepAlive edge?
781                 } else {
782                         /* This would involve Phis ... */
783                 }
784         }
785 }
786
787 /*
788  * All cfg optimizations, which do not touch Phi nodes.
789  *
790  * Note that this might create critical edges.
791  */
792 static void cfgopt_ignoring_phis(ir_graph *irg)
793 {
794         skip_env env;
795
796         env.changed = true;
797         ir_nodehashmap_init(&env.block_infos);
798
799         while (env.changed) {
800                 irg_walk_graph(irg, compute_block_info, NULL, &env.block_infos);
801                 env.changed = false;
802
803                 /* Remove blocks, which only consist of a Jmp */
804                 irg_block_walk_graph(irg, remove_empty_blocks, NULL, &env);
805
806                 /* Optimize Cond->Jmp, where then- and else-block are the same. */
807                 irg_block_walk_graph(irg, NULL, optimize_ifs, &env);
808
809                 if (env.changed) {
810                         clear_irg_state(irg, IR_GRAPH_STATE_CONSISTENT_DOMINANCE);
811                         /* clear block info, because it must be recomputed */
812                         irg_block_walk_graph(irg, clear_block_info, NULL, &env.block_infos);
813                         /* Removing blocks and Conds might enable more optimizations */
814                         continue;
815                 } else {
816                         break;
817                 }
818         }
819
820         ir_nodehashmap_destroy(&env.block_infos);
821 }
822
823 /* Optimizations of the control flow that also require changes of Phi nodes.  */
824 static ir_graph_state_t do_cfopt(ir_graph *irg)
825 {
826         int i, j, n;
827         ir_node **in = NULL;
828         ir_node *end = get_irg_end(irg);
829         ir_node *new_end;
830         merge_env env;
831
832         env.changed    = false;
833         env.phis_moved = false;
834
835         assert(get_irg_phase_state(irg) != phase_building);
836
837         /* if the graph is not pinned, we cannot determine empty blocks */
838         assert(get_irg_pinned(irg) != op_pin_state_floats &&
839                "Control flow optimization need a pinned graph");
840
841         edges_deactivate(irg);
842
843         /* First the "simple" optimizations, which do not touch Phis */
844         cfgopt_ignoring_phis(irg);
845
846         /* we use the mark flag to mark removable blocks */
847         ir_reserve_resources(irg, IR_RESOURCE_BLOCK_MARK | IR_RESOURCE_IRN_LINK
848                              | IR_RESOURCE_PHI_LIST);
849
850         /* The switch Cond optimization might expose unreachable code, so we loop */
851         for (;;) {
852                 bool changed = false;
853
854                 assure_doms(irg);
855
856                 /*
857                  * This pass collects all Phi nodes in a link list in the block
858                  * nodes.  Further it performs simple control flow optimizations.
859                  * Finally it marks all blocks that do not contain useful
860                  * computations, i.e., these blocks might be removed.
861                  */
862                 irg_walk(end, clear_link_and_mark_blocks_removable, collect_nodes, NULL);
863
864                 if (!changed)
865                         break;
866
867                 clear_irg_state(irg, IR_GRAPH_STATE_CONSISTENT_DOMINANCE
868                                    | IR_GRAPH_STATE_VALID_EXTENDED_BLOCKS
869                                    | IR_GRAPH_STATE_CONSISTENT_ENTITY_USAGE);
870         }
871
872         /* assert due to collect_nodes:
873          * 1. removable blocks are now marked as such
874          * 2. phi lists are up to date
875          */
876
877         /* Optimize the standard code.
878          * It walks only over block nodes and adapts these and the Phi nodes in these
879          * blocks, which it finds in a linked list computed before.
880          * */
881         assure_doms(irg);
882         irg_block_walk_graph(irg, optimize_blocks, merge_blocks, &env);
883
884         new_end = optimize_in_place(end);
885         if (new_end != end) {
886                 set_irg_end(irg, new_end);
887                 end = new_end;
888         }
889         remove_End_Bads_and_doublets(end);
890
891         ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK | IR_RESOURCE_IRN_LINK
892                           | IR_RESOURCE_PHI_LIST);
893
894         if (env.phis_moved) {
895                 /* Bad: when we moved Phi's, we might produce dead Phi nodes
896                    that are kept-alive.
897                    Some other phases cannot copy with this, so kill them.
898                  */
899                 n = get_End_n_keepalives(end);
900                 if (n > 0) {
901                         NEW_ARR_A(ir_node *, in, n);
902                         assure_irg_outs(irg);
903
904                         for (i = j = 0; i < n; ++i) {
905                                 ir_node *ka = get_End_keepalive(end, i);
906
907                                 if (is_Phi(ka)) {
908                                         int k;
909
910                                         for (k = get_irn_n_outs(ka) - 1; k >= 0; --k) {
911                                                 ir_node *user = get_irn_out(ka, k);
912
913                                                 if (user != ka && user != end) {
914                                                         /* Is it a real user or just a self loop ? */
915                                                         break;
916                                                 }
917                                         }
918                                         if (k >= 0)
919                                                 in[j++] = ka;
920                                 } else
921                                         in[j++] = ka;
922                         }
923                         if (j != n) {
924                                 set_End_keepalives(end, j, in);
925                                 env.changed = true;
926                         }
927                 }
928         }
929
930         return 0;
931 }
932
933 static optdesc_t opt_cf = {
934         "control-flow",
935         IR_GRAPH_STATE_NO_UNREACHABLE_CODE,
936         do_cfopt,
937 };
938
939 void optimize_cf(ir_graph *irg)
940 {
941         perform_irg_optimization(irg, &opt_cf);
942 }
943
944 /* Creates an ir_graph pass for optimize_cf. */
945 ir_graph_pass_t *optimize_cf_pass(const char *name)
946 {
947         return def_graph_pass(name ? name : "optimize_cf", optimize_cf);
948 }