include backend headers without "../"
[libfirm] / ir / opt / cfopt.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief   Control flow optimizations.
23  * @author  Goetz Lindenmaier, Michael Beck, Sebastian Hack
24  * @version $Id$
25  *
26  * Removes Bad control flow predecessors and empty blocks.  A block is empty
27  * if it contains only a Jmp node. Blocks can only be removed if they are not
28  * needed for the semantics of Phi nodes. Further, we NEVER remove labeled
29  * blocks (even if we could move the label).
30  */
31 #include "config.h"
32
33 #include "iroptimize.h"
34
35 #include <assert.h>
36 #include <stdbool.h>
37
38 #include "xmalloc.h"
39 #include "irnode_t.h"
40 #include "irgraph_t.h"
41 #include "irprog_t.h"
42
43 #include "ircons.h"
44 #include "iropt_t.h"
45 #include "irgwalk.h"
46 #include "irgmod.h"
47 #include "irdump.h"
48 #include "irverify.h"
49 #include "iredges.h"
50 #include "opt_manage.h"
51
52 #include "array_t.h"
53
54 #include "irouts.h"
55 #include "irbackedge_t.h"
56
57 #include "irflag_t.h"
58 #include "firmstat.h"
59 #include "irpass.h"
60 #include "irphase_t.h"
61
62 #include "iropt_dbg.h"
63
64 /** An environment for merge_blocks and collect nodes. */
65 typedef struct merge_env {
66         bool      changed;      /**< Set if the graph was changed. */
67         bool      phis_moved;   /**< Set if Phi nodes were moved. */
68 } merge_env;
69
70 /** set or reset the removable property of a block. */
71 static void set_Block_removable(ir_node *block, bool removable)
72 {
73         set_Block_mark(block, removable);
74 }
75
76 /** check if a block has the removable property set. */
77 static bool is_Block_removable(const ir_node *block)
78 {
79         return get_Block_mark(block);
80 }
81
82 /** checks if a given Cond node is a switch Cond. */
83 static bool is_switch_Cond(const ir_node *cond)
84 {
85         ir_node *sel = get_Cond_selector(cond);
86         return get_irn_mode(sel) != mode_b;
87 }
88
89 /** Walker: clear link fields and mark all blocks as removable. */
90 static void clear_link_and_mark_blocks_removable(ir_node *node, void *ctx)
91 {
92         (void) ctx;
93         set_irn_link(node, NULL);
94         if (is_Block(node)) {
95                 set_Block_removable(node, true);
96                 set_Block_phis(node, NULL);
97         } else if (is_Phi(node)) {
98                 set_Phi_next(node, NULL);
99         }
100 }
101
102 /**
103  * Collects all Phi nodes in link list of Block.
104  * Marks all blocks "non_removable" if they contain a node other
105  * than Jmp (and Proj).
106  * Links all Proj nodes to their predecessors.
107  * Collects all switch-Conds in a list.
108  */
109 static void collect_nodes(ir_node *n, void *ctx)
110 {
111         ir_node ***switch_conds = (ir_node***)ctx;
112
113         if (is_Phi(n)) {
114                 /* Collect Phi nodes to compact ins along with block's ins. */
115                 ir_node *block = get_nodes_block(n);
116                 add_Block_phi(block, n);
117         } else if (is_Block(n)) {
118                 if (has_Block_entity(n)) {
119                         /* block with a jump label attached cannot be removed. */
120                         set_Block_removable(n, false);
121                 }
122         } else if (is_Bad(n) || is_Jmp(n)) {
123                 /* ignore these */
124                 return;
125         } else {
126                 /* Check for non-empty block. */
127                 ir_node *block = get_nodes_block(n);
128
129                 set_Block_removable(block, false);
130
131                 if (is_Proj(n)) {
132                         /* link Proj nodes */
133                         ir_node *pred = get_Proj_pred(n);
134                         set_irn_link(n, get_irn_link(pred));
135                         set_irn_link(pred, n);
136                 } else if (is_Cond(n) && is_switch_Cond(n)) {
137                         /* found a switch-Cond, collect */
138                         ARR_APP1(ir_node*, *switch_conds, n);
139                 }
140         }
141 }
142
143 /** Returns true if pred is predecessor of block b. */
144 static bool is_pred_of(const ir_node *pred, const ir_node *b)
145 {
146         int i;
147
148         for (i = get_Block_n_cfgpreds(b) - 1; i >= 0; --i) {
149                 ir_node *b_pred = get_Block_cfgpred_block(b, i);
150                 if (b_pred == pred)
151                         return true;
152         }
153         return false;
154 }
155
156 /** Test whether we can optimize away pred block pos of b.
157  *
158  *  @param  b    A block node.
159  *  @param  pos  The position of the predecessor block to judge about.
160  *
161  *  @returns     The number of predecessors
162  *
163  *  The test is rather tricky.
164  *
165  *  The situation is something like the following:
166  *  @verbatim
167  *                 if-block
168  *                  /   \
169  *              then-b  else-b
170  *                  \   /
171  *                    b
172  *  @endverbatim
173  *
174  *  b merges the control flow of an if-then-else.  We may not remove
175  *  the 'then' _and_ the 'else' block of an 'if' if there is a Phi
176  *  node in b, even if both are empty.  The destruction of this Phi
177  *  requires that a copy is added before the merge.  We have to
178  *  keep one of the case blocks to place the copies in.
179  *
180  *  To perform the test for pos, we must regard predecessors before pos
181  *  as already removed.
182  **/
183 static unsigned test_whether_dispensable(const ir_node *b, int pos)
184 {
185         ir_node *pred  = get_Block_cfgpred(b, pos);
186         ir_node *predb = get_nodes_block(pred);
187
188         if (is_Bad(pred) || !is_Block_removable(predb))
189                 return 1;
190
191         /* can't remove self-loops */
192         if (predb == b)
193                 goto non_dispensable;
194         if (is_unknown_jump(pred))
195                 goto non_dispensable;
196
197         /* Seems to be empty. At least we detected this in collect_nodes. */
198         if (get_Block_phis(b) != NULL) {
199                 int n_cfgpreds = get_Block_n_cfgpreds(b);
200                 int i;
201                 /* there are Phi nodes */
202
203                 /* b's pred blocks and pred's pred blocks must be pairwise disjunct.
204                  * Handle all pred blocks with preds < pos as if they were already
205                  * removed. */
206                 for (i = 0; i < pos; i++) {
207                         ir_node *other_pred  = get_Block_cfgpred(b, i);
208                         ir_node *other_predb = get_nodes_block(other_pred);
209                         if (is_Bad(other_pred))
210                                 continue;
211                         if (is_Block_removable(other_predb)
212                             && !Block_block_visited(other_predb)) {
213                                 int j;
214                                 for (j = get_Block_n_cfgpreds(other_predb) - 1; j >= 0; --j) {
215                                         ir_node *other_predpred
216                                                 = get_Block_cfgpred_block(other_predb, j);
217                                         if (is_pred_of(other_predpred, predb))
218                                                 goto non_dispensable;
219                                 }
220                         } else if (is_pred_of(other_predb, predb)) {
221                                 goto non_dispensable;
222                         }
223                 }
224                 for (i = pos+1; i < n_cfgpreds; i++) {
225                         ir_node *other_predb = get_Block_cfgpred_block(b, i);
226                         if (is_pred_of(other_predb, predb))
227                                 goto non_dispensable;
228                 }
229         }
230         /* we will not dispense already visited blocks */
231         if (Block_block_visited(predb))
232                 return 1;
233         /* if we get here, the block is dispensable, count useful preds */
234         return get_irn_arity(predb);
235
236 non_dispensable:
237         set_Block_removable(predb, false);
238         return 1;
239 }
240
241 /**
242  * This method removes empty blocks.  A block is empty if it only contains Phi
243  * and Jmp nodes.
244  *
245  * We first adapt Phi nodes, then Block nodes, as we need the old ins
246  * of the Block to adapt the Phi nodes.  We do this by computing new
247  * in arrays, and then replacing the old ones.  So far we compute new in arrays
248  * for all nodes, not regarding whether there is a possibility for optimization.
249  *
250  * For each predecessor p of a Block b there are three cases:
251  *  - The predecessor p is a Bad node: just skip it. The in array of b shrinks
252  *    by one.
253  *  - The predecessor p is empty. Remove p. All predecessors of p are now
254  *    predecessors of b.
255  *  - The predecessor p is a block containing useful code. Just keep p as is.
256  *
257  * For Phi nodes f we have to check the conditions at the Block of f.
258  * For cases 1 and 3 we proceed as for Blocks.  For case 2 we can have two
259  * cases:
260  *  -2a: The old predecessor of the Phi f is a Phi pred_f IN THE BLOCK REMOVED.
261  *       In this case we proceed as for blocks. We remove pred_f.  All
262  *       predecessors of pred_f now are predecessors of f.
263  *  -2b: The old predecessor of f is NOT in the block removed. It might be a Phi
264  *       too. We have to replicate f for each predecessor of the removed block.
265  *       Or, with other words, the removed predecessor block has exactly one
266  *       predecessor.
267  *
268  * Further there is a special case for self referencing blocks:
269  * @verbatim
270  *
271  *    then_b     else_b                              then_b  else_b
272  *       \      /                                      \      /
273  *        \    /                                        |    /
274  *        pred_b                                        |   /
275  *         |   ____                                     |  /  ____
276  *         |  |    |                                    |  | |    |
277  *         |  |    |       === optimized to ===>        \  | |    |
278  *        loop_b   |                                     loop_b   |
279  *         |  |    |                                      |  |    |
280  *         |  |____|                                      |  |____|
281  *         |                                              |
282  * @endverbatim
283  *
284  * If there is a Phi in pred_b, but we remove pred_b, we have to generate a
285  * Phi in loop_b, that has the ins of the Phi in pred_b and a self referencing
286  * backedge.
287  */
288 static void optimize_blocks(ir_node *b, void *ctx)
289 {
290         int i, j, k, n, max_preds, n_preds, p_preds = -1;
291         ir_node *phi;
292         ir_node *next;
293         ir_node **in;
294         merge_env *env = (merge_env*)ctx;
295
296         if (get_Block_dom_depth(b) < 0) {
297                 /* ignore unreachable blocks */
298                 return;
299         }
300
301         /* Count the number of predecessor if this block is merged with pred blocks
302            that are empty. */
303         max_preds = 0;
304         for (i = 0, k = get_Block_n_cfgpreds(b); i < k; ++i) {
305                 max_preds += test_whether_dispensable(b, i);
306         }
307         in = XMALLOCN(ir_node*, max_preds);
308
309         /*- Fix the Phi nodes of the current block -*/
310         for (phi = get_Block_phis(b); phi != NULL; phi = next) {
311                 next = get_Phi_next(phi);
312
313                 /* Find the new predecessors for the Phi */
314                 p_preds = 0;
315                 for (i = 0, n = get_Block_n_cfgpreds(b); i < n; ++i) {
316                         ir_graph *irg = get_irn_irg(b);
317                         ir_node *predx = get_Block_cfgpred(b, i);
318                         ir_node *pred;
319
320                         /* case Phi 1: maintain Bads, as somebody else is responsible to
321                          * remove them */
322                         if (is_Bad(predx)) {
323                                 in[p_preds++] = new_r_Bad(irg, get_irn_mode(phi));
324                                 continue;
325                         }
326
327                         pred = get_nodes_block(predx);
328
329                         /* case Phi 2: It's an empty block and not yet visited. */
330                         if (is_Block_removable(pred) && !Block_block_visited(pred)) {
331                                 ir_node *phi_pred = get_Phi_pred(phi, i);
332
333                                 for (j = 0, k = get_Block_n_cfgpreds(pred); j < k; j++) {
334                                         ir_node *pred_pred = get_Block_cfgpred(pred, j);
335
336                                         if (is_Bad(pred_pred)) {
337                                                 in[p_preds++] = new_r_Bad(irg, get_irn_mode(phi));
338                                                 continue;
339                                         }
340
341                                         if (get_nodes_block(phi_pred) == pred) {
342                                                 /* case Phi 2a: */
343                                                 assert(is_Phi(phi_pred));  /* Block is empty!! */
344
345                                                 in[p_preds++] = get_Phi_pred(phi_pred, j);
346                                         } else {
347                                                 /* case Phi 2b: */
348                                                 in[p_preds++] = phi_pred;
349                                         }
350                                 }
351                         } else {
352                                 /* case Phi 3: */
353                                 in[p_preds++] = get_Phi_pred(phi, i);
354                         }
355                 }
356                 assert(p_preds == max_preds);
357
358                 /* Fix the node */
359                 if (p_preds == 1) {
360                         exchange(phi, in[0]);
361                 } else {
362                         set_irn_in(phi, p_preds, in);
363                 }
364                 env->changed = true;
365         }
366
367         /*- This happens only if merge between loop backedge and single loop entry.
368             Moreover, it is only needed if predb is the direct dominator of b,
369             else there can be no uses of the Phi's in predb ... -*/
370         for (k = 0, n = get_Block_n_cfgpreds(b); k < n; ++k) {
371                 ir_node *pred  = get_Block_cfgpred(b, k);
372                 ir_node *predb = get_nodes_block(pred);
373                 if (is_Bad(pred))
374                         continue;
375
376                 if (is_Block_removable(predb) && !Block_block_visited(predb)) {
377                         ir_node *next_phi;
378
379                         /* we found a predecessor block at position k that will be removed */
380                         for (phi = get_Block_phis(predb); phi != NULL; phi = next_phi) {
381                                 int q_preds = 0;
382                                 next_phi = get_Phi_next(phi);
383
384                                 if (get_Block_idom(b) != predb) {
385                                         /* predb is not the dominator. There can't be uses of
386                                          * pred's Phi nodes, kill them .*/
387                                         ir_graph *irg  = get_irn_irg(b);
388                                         ir_mode  *mode = get_irn_mode(phi);
389                                         exchange(phi, new_r_Bad(irg, mode));
390                                 } else {
391                                         /* predb is the direct dominator of b. There might be uses
392                                          * of the Phi nodes from predb in further block, so move
393                                          * this phi from the predecessor into the block b */
394                                         set_nodes_block(phi, b);
395                                         set_Phi_next(phi, get_Block_phis(b));
396                                         set_Block_phis(b, phi);
397                                         env->phis_moved = true;
398
399                                         /* first, copy all 0..k-1 predecessors */
400                                         for (i = 0; i < k; i++) {
401                                                 ir_node *predx = get_Block_cfgpred(b, i);
402                                                 ir_node *pred_block;
403
404                                                 if (is_Bad(predx)) {
405                                                         ir_graph *irg  = get_irn_irg(b);
406                                                         ir_mode  *mode = get_irn_mode(phi);
407                                                         in[q_preds++] = new_r_Bad(irg, mode);
408                                                         continue;
409                                                 }
410                                                 pred_block = get_nodes_block(predx);
411                                                 if (is_Block_removable(pred_block)
412                                                            && !Block_block_visited(pred_block)) {
413                                                         int n_cfgpreds = get_Block_n_cfgpreds(pred_block);
414                                                         /* It's an empty block and not yet visited. */
415                                                         for (j = 0; j < n_cfgpreds; j++) {
416                                                                 if (!is_Bad(get_Block_cfgpred(pred_block, j))) {
417                                                                         in[q_preds++] = phi;
418                                                                 } else {
419                                                                         ir_graph *irg  = get_irn_irg(b);
420                                                                         ir_mode  *mode = get_irn_mode(phi);
421                                                                         in[q_preds++] = new_r_Bad(irg, mode);
422                                                                 }
423                                                         }
424                                                 } else {
425                                                         in[q_preds++] = phi;
426                                                 }
427                                         }
428
429                                         /* now we are at k, copy the phi predecessors */
430                                         pred = get_nodes_block(get_Block_cfgpred(b, k));
431                                         for (i = 0; i < get_Phi_n_preds(phi); i++) {
432                                                 in[q_preds++] = get_Phi_pred(phi, i);
433                                         }
434
435                                         /* and now all the rest */
436                                         for (i = k+1; i < get_Block_n_cfgpreds(b); i++) {
437                                                 pred = get_Block_cfgpred_block(b, i);
438
439                                                 if (is_Bad(pred)) {
440                                                         ir_graph *irg  = get_irn_irg(b);
441                                                         ir_mode  *mode = get_irn_mode(phi);
442                                                         in[q_preds++] = new_r_Bad(irg, mode);
443                                                 } else if (is_Block_removable(pred) && !Block_block_visited(pred)) {
444                                                         /* It's an empty block and not yet visited. */
445                                                         for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
446                                                                 if (! is_Bad(get_Block_cfgpred(pred, j))) {
447                                                                         in[q_preds++] = phi;
448                                                                 } else {
449                                                                         ir_graph *irg  = get_irn_irg(b);
450                                                                         ir_mode  *mode = get_irn_mode(phi);
451                                                                         in[q_preds++] = new_r_Bad(irg, mode);
452                                                                 }
453                                                         }
454                                                 } else {
455                                                         in[q_preds++] = phi;
456                                                 }
457                                         }
458
459                                         /* Fix the node */
460                                         if (q_preds == 1)
461                                                 exchange(phi, in[0]);
462                                         else
463                                                 set_irn_in(phi, q_preds, in);
464                                         env->changed = true;
465
466                                         assert(q_preds <= max_preds);
467                                         // assert(p_preds == q_preds && "Wrong Phi Fix");
468                                 }
469                         }
470                 }
471         }
472
473         /*- Fix the block -*/
474         n_preds = 0;
475         for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
476                 ir_node *pred  = get_Block_cfgpred(b, i);
477                 ir_node *predb = get_nodes_block(pred);
478                 ir_graph *irg  = get_irn_irg(pred);
479
480                 /* case 1: Bad predecessor */
481                 if (is_Bad(pred)) {
482                         in[n_preds++] = new_r_Bad(irg, mode_X);
483                         continue;
484                 }
485                 if (is_Block_removable(predb) && !Block_block_visited(predb)) {
486                         /* case 2: It's an empty block and not yet visited. */
487                         for (j = 0; j < get_Block_n_cfgpreds(predb); j++) {
488                                 ir_node *predpred = get_Block_cfgpred(predb, j);
489
490                                 if (is_Bad(predpred)) {
491                                         in[n_preds++] = new_r_Bad(irg, mode_X);
492                                         continue;
493                                 }
494
495                                 in[n_preds++] = predpred;
496                         }
497                         /* Remove block+jump as it might be kept alive. */
498                         exchange(pred, new_r_Bad(get_irn_irg(b), mode_X));
499                         exchange(predb, new_r_Bad(get_irn_irg(b), mode_BB));
500                 } else {
501                         /* case 3: */
502                         in[n_preds++] = pred;
503                 }
504         }
505         assert(n_preds == max_preds);
506
507         set_irn_in(b, n_preds, in);
508         env->changed = true;
509
510         /* see if phi-fix was correct */
511         assert(get_Block_phis(b) == NULL || p_preds == -1 || (n_preds == p_preds));
512         xfree(in);
513 }
514
515 /**
516  * Optimize table-switch Conds.
517  *
518  * @param cond the switch-Cond
519  * @return true if the switch-Cond was optimized
520  */
521 static bool handle_switch_cond(ir_node *cond)
522 {
523         ir_node *sel   = get_Cond_selector(cond);
524         ir_node *proj1 = (ir_node*)get_irn_link(cond);
525         ir_node *proj2 = (ir_node*)get_irn_link(proj1);
526         ir_node *blk   = get_nodes_block(cond);
527
528         /* exactly 1 Proj on the Cond node: must be the defaultProj */
529         if (proj2 == NULL) {
530                 ir_node *jmp = new_r_Jmp(blk);
531                 assert(get_Cond_default_proj(cond) == get_Proj_proj(proj1));
532                 /* convert it into a Jmp */
533                 exchange(proj1, jmp);
534                 return true;
535         }
536
537         /* handle Cond nodes with constant argument. In this case the localopt rules
538          * should have killed all obviously impossible cases.
539          * So the only case left to handle here is 1 defaultProj + 1 case
540          * (this one case should be the one taken) */
541         if (get_irn_link(proj2) == NULL) {
542                 ir_tarval *tv = value_of(sel);
543
544                 if (tv != tarval_bad) {
545                         /* we have a constant switch */
546                         long      num     = get_tarval_long(tv);
547                         long      def_num = get_Cond_default_proj(cond);
548                         ir_graph *irg     = get_irn_irg(cond);
549                         ir_node  *bad     = new_r_Bad(irg, mode_X);
550
551                         if (def_num == get_Proj_proj(proj1)) {
552                                 /* first one is the defProj */
553                                 if (num == get_Proj_proj(proj2)) {
554                                         ir_node *jmp = new_r_Jmp(blk);
555                                         exchange(proj2, jmp);
556                                         exchange(proj1, bad);
557                                         return true;
558                                 }
559                         } else if (def_num == get_Proj_proj(proj2)) {
560                                 /* second one is the defProj */
561                                 if (num == get_Proj_proj(proj1)) {
562                                         ir_node *jmp = new_r_Jmp(blk);
563                                         exchange(proj1, jmp);
564                                         exchange(proj2, bad);
565                                         return true;
566                                 }
567                         } else {
568                                 /* neither: strange, Cond was not optimized so far */
569                                 if (num == get_Proj_proj(proj1)) {
570                                         ir_node *jmp = new_r_Jmp(blk);
571                                         exchange(proj1, jmp);
572                                         exchange(proj2, bad);
573                                         return true;
574                                 } else if (num == get_Proj_proj(proj2)) {
575                                         ir_node *jmp = new_r_Jmp(blk);
576                                         exchange(proj2, jmp);
577                                         exchange(proj1, bad);
578                                         return true;
579                                 }
580                         }
581                 }
582         }
583         return false;
584 }
585
586 /**
587  * Optimize boolean Conds, where true and false jump to the same block into a Jmp
588  * Block must contain no Phi nodes.
589  *
590  *        Cond
591  *       /    \
592  *  projA      projB   =>   Jmp     Bad
593  *       \    /                \   /
594  *       block                 block
595  */
596 static bool optimize_pred_cond(ir_node *block, int i, int j)
597 {
598         ir_node *projA, *projB, *cond, *pred_block, *jmp, *bad;
599         assert(i != j);
600
601         projA = get_Block_cfgpred(block, i);
602         if (!is_Proj(projA)) return false;
603         projB = get_Block_cfgpred(block, j);
604         if (!is_Proj(projB)) return false;
605         cond  = get_Proj_pred(projA);
606         if (!is_Cond(cond))  return false;
607
608         if (cond != get_Proj_pred(projB)) return false;
609         if (is_switch_Cond(cond)) return false;
610
611         /* cond should actually be a Jmp */
612         pred_block = get_nodes_block(cond);
613         jmp = new_r_Jmp(pred_block);
614         bad = new_r_Bad(get_irn_irg(block), mode_X);
615
616         assert(projA != projB);
617         exchange(projA, jmp);
618         exchange(projB, bad);
619         return true;
620 }
621
622 typedef enum block_flags_t {
623         BF_HAS_OPERATIONS         = 1 << 0,
624         BF_HAS_PHIS               = 1 << 1,
625         BF_IS_UNKNOWN_JUMP_TARGET = 1 << 2,
626 } block_flags_t;
627
628 static bool get_phase_flag(ir_phase *block_info, ir_node *block, int flag)
629 {
630         return PTR_TO_INT(phase_get_irn_data(block_info, block)) & flag;
631 }
632
633 static void set_phase_flag(ir_phase *block_info, ir_node *block,
634                            block_flags_t flag)
635 {
636         int data = PTR_TO_INT(phase_get_irn_data(block_info, block));
637         data |= flag;
638         phase_set_irn_data(block_info, block, INT_TO_PTR(data));
639 }
640
641 static void clear_phase_flag(ir_phase *block_info, ir_node *block)
642 {
643         phase_set_irn_data(block_info, block, NULL);
644 }
645
646 static bool has_operations(ir_phase *block_info, ir_node *block)
647 {
648         return get_phase_flag(block_info, block, BF_HAS_OPERATIONS);
649 }
650
651 static void set_has_operations(ir_phase *block_info, ir_node *block)
652 {
653         set_phase_flag(block_info, block, BF_HAS_OPERATIONS);
654 }
655
656 static bool has_phis(ir_phase *block_info, ir_node *block)
657 {
658         return get_phase_flag(block_info, block, BF_HAS_PHIS);
659 }
660
661 static void set_has_phis(ir_phase *block_info, ir_node *block)
662 {
663         set_phase_flag(block_info, block, BF_HAS_PHIS);
664 }
665
666 static bool is_unknown_jump_target(ir_phase *block_info, ir_node *block)
667 {
668         return get_phase_flag(block_info, block, BF_IS_UNKNOWN_JUMP_TARGET);
669 }
670
671 static void set_is_unknown_jump_target(ir_phase *block_info, ir_node *block)
672 {
673         set_phase_flag(block_info, block, BF_IS_UNKNOWN_JUMP_TARGET);
674 }
675
676 /**
677  * Pre-Walker: fill block info information.
678  */
679 static void compute_block_info(ir_node *n, void *x)
680 {
681         ir_phase *block_info = (ir_phase *)x;
682
683         if (is_Block(n)) {
684                 int i, max = get_Block_n_cfgpreds(n);
685                 for (i=0; i<max; i++) {
686                         ir_node *pred = get_Block_cfgpred(n,i);
687                         if (is_unknown_jump(pred)) {
688                                 set_is_unknown_jump_target(block_info, n);
689                         }
690                 }
691         } else if (is_Phi(n)) {
692                 ir_node *block = get_nodes_block(n);
693                 set_has_phis(block_info, block);
694         } else if (is_Jmp(n) || is_Cond(n) || is_Proj(n)) {
695                 /* ignore */
696         } else {
697                 ir_node *block = get_nodes_block(n);
698                 set_has_operations(block_info, block);
699         }
700 }
701
702 static void clear_block_info(ir_node *block, void *x)
703 {
704         ir_phase *block_info = (ir_phase *)x;
705         clear_phase_flag(block_info, block);
706 }
707
708 typedef struct skip_env {
709         bool changed;
710         ir_phase *phase;
711 } skip_env;
712
713 /**
714  * Post-Block-walker: Optimize useless if's (boolean Cond nodes
715  * with same true/false target) away.
716  */
717 static void optimize_ifs(ir_node *block, void *x)
718 {
719         skip_env *env = (skip_env*)x;
720         int i, j;
721         int n_preds = get_Block_n_cfgpreds(block);
722
723         if (has_phis(env->phase, block))
724                 return;
725
726         /* optimize Cond predecessors (might produce Bad predecessors) */
727         for (i = 0; i < n_preds; ++i) {
728                 for (j = i+1; j < n_preds; ++j) {
729                         optimize_pred_cond(block, i, j);
730                 }
731         }
732 }
733
734 /**
735  * Pre-Block walker: remove empty blocks (only contain a Jmp)
736  * that are control flow predecessors of the current block.
737  */
738 static void remove_empty_blocks(ir_node *block, void *x)
739 {
740         skip_env *env = (skip_env*)x;
741         int i;
742         int n_preds = get_Block_n_cfgpreds(block);
743
744         for (i = 0; i < n_preds; ++i) {
745                 ir_node *jmp, *jmp_block, *pred, *pred_block;
746                 int n_jpreds = 0;
747
748                 jmp = get_Block_cfgpred(block, i);
749                 if (!is_Jmp(jmp))
750                         continue;
751                 jmp_block = get_nodes_block(jmp);
752                 if (jmp_block == block)
753                         continue; /* this infinite loop cannot be optimized any further */
754                 if (is_unknown_jump_target(env->phase, jmp_block))
755                         continue; /* unknown jump target must not be optimized */
756                 if (has_operations(env->phase,jmp_block))
757                         continue; /* this block contains operations and cannot be skipped */
758                 if (has_phis(env->phase,jmp_block))
759                         continue; /* this block contains Phis and is not skipped */
760                 if (Block_block_visited(jmp_block)) {
761                         continue;
762                         /* otherwise we could break the walker,
763                          * if block was reached via
764                          *     KeepAlive edge -> jmp_block -> A ---> block,
765                          * because the walker cannot handle Id nodes.
766                          *
767                          *   A      B
768                          *    \    /
769                          *   jmp_block
770                          *    /    \
771                          * block    End
772                          */
773                 }
774
775                 /* jmp_block is an empty block and can be optimized! */
776
777                 n_jpreds = get_Block_n_cfgpreds(jmp_block);
778                 /**
779                  * If the jmp block has only one predecessor this is straightforward.
780                  * However, if there are more predecessors, we only handle this,
781                  * if block has no Phis.
782                  */
783                 if (n_jpreds == 1) {
784                         /* skip jmp block by rerouting its predecessor to block
785                          *
786                          *     A              A
787                          *     |              |
788                          *  jmp_block   =>    |
789                          *     |              |
790                          *   block          block
791                          */
792                         pred = get_Block_cfgpred(jmp_block, 0);
793                         exchange(jmp, pred);
794
795                         /* cleanup: jmp_block might have a Keep edge! */
796                         pred_block = get_nodes_block(pred);
797                         exchange(jmp_block, pred_block);
798                         env->changed = true;
799                 } else if (! has_phis(env->phase, block)) {
800                         /* all predecessors can skip the jmp block, so block gets some new
801                          * predecessors
802                          *
803                          *  A     B                 A  B
804                          *   \   /                  |  |
805                          * jmp_block  C  =>  Bad  C |  |
806                          *      \    /          \ | | /
807                          *      block            block
808                          */
809                         ir_node **ins = ALLOCAN(ir_node*, n_preds+n_jpreds);
810                         int j;
811                         /* first copy the old predecessors, because the outer loop (i)
812                          * still walks over them */
813                         for (j = 0; j < n_preds; ++j) {
814                                 ins[j] = get_Block_cfgpred(block, j);
815                         }
816                         /* now append the new predecessors */
817                         for (j = 0; j < n_jpreds; ++j) {
818                                 pred = get_Block_cfgpred(jmp_block, j);
819                                 ins[n_preds+j] = pred;
820                         }
821                         set_irn_in(block, n_preds+n_jpreds, ins);
822                         /* convert the jmp_block to Bad */
823                         ir_graph *irg = get_irn_irg(block);
824                         exchange(jmp_block, new_r_Bad(irg, mode_BB));
825                         exchange(jmp, new_r_Bad(irg, mode_X));
826                         /* let the outer loop walk over the new predecessors as well */
827                         n_preds += n_jpreds;
828                         env->changed = true;
829                         // TODO What if jmp_block had a KeepAlive edge?
830                 } else {
831                         /* This would involve Phis ... */
832                 }
833         }
834 }
835
836 /*
837  * All cfg optimizations, which do not touch Phi nodes.
838  *
839  * Note that this might create critical edges.
840  */
841 static void cfgopt_ignoring_phis(ir_graph *irg)
842 {
843         ir_phase *block_info = new_phase(irg, NULL);
844         skip_env env = { true, block_info };
845
846         while (env.changed) {
847                 irg_walk_graph(irg, compute_block_info, NULL, block_info);
848                 env.changed = false;
849
850                 /* Remove blocks, which only consist of a Jmp */
851                 irg_block_walk_graph(irg, remove_empty_blocks, NULL, &env);
852
853                 /* Optimize Cond->Jmp, where then- and else-block are the same. */
854                 irg_block_walk_graph(irg, NULL, optimize_ifs, &env);
855
856                 if (env.changed) {
857                         clear_irg_state(irg, IR_GRAPH_STATE_CONSISTENT_DOMINANCE);
858                         /* clear block info, because it must be recomputed */
859                         irg_block_walk_graph(irg, clear_block_info, NULL, block_info);
860                         /* Removing blocks and Conds might enable more optimizations */
861                         continue;
862                 } else {
863                         break;
864                 }
865         }
866
867         phase_free(block_info);
868 }
869
870 /* Optimizations of the control flow that also require changes of Phi nodes.  */
871 static ir_graph_state_t do_cfopt(ir_graph *irg)
872 {
873         int i, j, n;
874         ir_node **in = NULL;
875         ir_node *end = get_irg_end(irg);
876         ir_node *new_end;
877         merge_env env;
878
879         env.changed    = false;
880         env.phis_moved = false;
881
882         assert(get_irg_phase_state(irg) != phase_building);
883
884         /* if the graph is not pinned, we cannot determine empty blocks */
885         assert(get_irg_pinned(irg) != op_pin_state_floats &&
886                "Control flow optimization need a pinned graph");
887
888         edges_deactivate(irg);
889
890         /* First the "simple" optimizations, which do not touch Phis */
891         cfgopt_ignoring_phis(irg);
892
893         /* we use the mark flag to mark removable blocks */
894         ir_reserve_resources(irg, IR_RESOURCE_BLOCK_MARK | IR_RESOURCE_IRN_LINK
895                              | IR_RESOURCE_PHI_LIST);
896
897         /* The switch Cond optimization might expose unreachable code, so we loop */
898         for (;;) {
899                 int length;
900                 ir_node **switch_conds = NULL;
901                 bool changed = false;
902
903                 assure_doms(irg);
904
905                 /*
906                  * This pass collects all Phi nodes in a link list in the block
907                  * nodes.  Further it performs simple control flow optimizations.
908                  * Finally it marks all blocks that do not contain useful
909                  * computations, i.e., these blocks might be removed.
910                  */
911                 switch_conds = NEW_ARR_F(ir_node*, 0);
912                 irg_walk(end, clear_link_and_mark_blocks_removable, collect_nodes, &switch_conds);
913
914                 /* handle all collected switch-Conds */
915                 length = ARR_LEN(switch_conds);
916                 for (i = 0; i < length; ++i) {
917                         ir_node *cond = switch_conds[i];
918                         changed |= handle_switch_cond(cond);
919                 }
920                 DEL_ARR_F(switch_conds);
921
922                 if (!changed)
923                         break;
924
925                 clear_irg_state(irg, IR_GRAPH_STATE_CONSISTENT_DOMINANCE
926                                    | IR_GRAPH_STATE_VALID_EXTENDED_BLOCKS
927                                    | IR_GRAPH_STATE_CONSISTENT_ENTITY_USAGE);
928         }
929
930         /* assert due to collect_nodes:
931          * 1. removable blocks are now marked as such
932          * 2. phi lists are up to date
933          */
934
935         /* Optimize the standard code.
936          * It walks only over block nodes and adapts these and the Phi nodes in these
937          * blocks, which it finds in a linked list computed before.
938          * */
939         assure_doms(irg);
940         irg_block_walk_graph(irg, optimize_blocks, NULL, &env);
941
942         new_end = optimize_in_place(end);
943         if (new_end != end) {
944                 set_irg_end(irg, new_end);
945                 end = new_end;
946         }
947         remove_End_Bads_and_doublets(end);
948
949         ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK | IR_RESOURCE_IRN_LINK
950                           | IR_RESOURCE_PHI_LIST);
951
952         if (env.phis_moved) {
953                 /* Bad: when we moved Phi's, we might produce dead Phi nodes
954                    that are kept-alive.
955                    Some other phases cannot copy with this, so kill them.
956                  */
957                 n = get_End_n_keepalives(end);
958                 if (n > 0) {
959                         NEW_ARR_A(ir_node *, in, n);
960                         assure_irg_outs(irg);
961
962                         for (i = j = 0; i < n; ++i) {
963                                 ir_node *ka = get_End_keepalive(end, i);
964
965                                 if (is_Phi(ka)) {
966                                         int k;
967
968                                         for (k = get_irn_n_outs(ka) - 1; k >= 0; --k) {
969                                                 ir_node *user = get_irn_out(ka, k);
970
971                                                 if (user != ka && user != end) {
972                                                         /* Is it a real user or just a self loop ? */
973                                                         break;
974                                                 }
975                                         }
976                                         if (k >= 0)
977                                                 in[j++] = ka;
978                                 } else
979                                         in[j++] = ka;
980                         }
981                         if (j != n) {
982                                 set_End_keepalives(end, j, in);
983                                 env.changed = true;
984                         }
985                 }
986         }
987
988         return 0;
989 }
990
991 static optdesc_t opt_cf = {
992         "control-flow",
993         IR_GRAPH_STATE_NO_UNREACHABLE_CODE,
994         do_cfopt,
995 };
996
997 void optimize_cf(ir_graph *irg)
998 {
999         perform_irg_optimization(irg, &opt_cf);
1000 }
1001
1002 /* Creates an ir_graph pass for optimize_cf. */
1003 ir_graph_pass_t *optimize_cf_pass(const char *name)
1004 {
1005         return def_graph_pass(name ? name : "optimize_cf", optimize_cf);
1006 }