nsz Git - libfirm/blob - ir/opt/cfopt.c

   1 /*
   2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
   3  *
   4  * This file is part of libFirm.
   5  *
   6  * This file may be distributed and/or modified under the terms of the
   7  * GNU General Public License version 2 as published by the Free Software
   8  * Foundation and appearing in the file LICENSE.GPL included in the
   9  * packaging of this file.
  10  *
  11  * Licensees holding valid libFirm Professional Edition licenses may use
  12  * this file in accordance with the libFirm Commercial License.
  13  * Agreement provided with the Software.
  14  *
  15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
  16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17  * PURPOSE.
  18  */
  19
  20 /**
  21  * @file
  22  * @brief   Control flow optimizations.
  23  * @author  Goetz Lindenmaier, Michael Beck, Sebastian Hack
  24  * @version $Id$
  25  *
  26  * Removes Bad control flow predecessors and empty blocks.  A block is empty
  27  * if it contains only a Jmp node. Blocks can only be removed if they are not
  28  * needed for the semantics of Phi nodes. Further, we NEVER remove labeled
  29  * blocks (even if we could move the label).
  30  */
  31 #include "config.h"
  32
  33 #include "iroptimize.h"
  34
  35 #include <assert.h>
  36 #include <stdbool.h>
  37
  38 #include "xmalloc.h"
  39 #include "irnode_t.h"
  40 #include "irgraph_t.h"
  41 #include "irprog_t.h"
  42
  43 #include "ircons.h"
  44 #include "iropt_t.h"
  45 #include "irgwalk.h"
  46 #include "irgmod.h"
  47 #include "irdump.h"
  48 #include "irverify.h"
  49 #include "iredges.h"
  50 #include "opt_manage.h"
  51
  52 #include "array_t.h"
  53
  54 #include "irouts.h"
  55 #include "irbackedge_t.h"
  56
  57 #include "irflag_t.h"
  58 #include "firmstat.h"
  59 #include "irpass.h"
  60 #include "irphase_t.h"
  61
  62 #include "iropt_dbg.h"
  63
  64 /** An environment for merge_blocks and collect nodes. */
  65 typedef struct merge_env {
  66         bool      changed;      /**< Set if the graph was changed. */
  67         bool      phis_moved;   /**< Set if Phi nodes were moved. */
  68 } merge_env;
  69
  70 /** set or reset the removable property of a block. */
  71 static void set_Block_removable(ir_node *block, bool removable)
  72 {
  73         set_Block_mark(block, removable);
  74 }
  75
  76 /** check if a block has the removable property set. */
  77 static bool is_Block_removable(ir_node *block)
  78 {
  79         return get_Block_mark(block);
  80 }
  81
  82 /** checks if a given Cond node is a switch Cond. */
  83 static bool is_switch_Cond(ir_node *cond)
  84 {
  85         ir_node *sel = get_Cond_selector(cond);
  86         return get_irn_mode(sel) != mode_b;
  87 }
  88
  89 /** Walker: clear link fields and mark all blocks as removable. */
  90 static void clear_link_and_mark_blocks_removable(ir_node *node, void *ctx)
  91 {
  92         (void) ctx;
  93         set_irn_link(node, NULL);
  94         if (is_Block(node))
  95                 set_Block_removable(node, true);
  96 }
  97
  98 /**
  99  * Collects all Phi nodes in link list of Block.
 100  * Marks all blocks "non_removable" if they contain a node other
 101  * than Jmp (and Proj).
 102  * Links all Proj nodes to their predecessors.
 103  * Collects all switch-Conds in a list.
 104  */
 105 static void collect_nodes(ir_node *n, void *ctx)
 106 {
 107         ir_node ***switch_conds = (ir_node***)ctx;
 108
 109         if (is_Phi(n)) {
 110                 /* Collect Phi nodes to compact ins along with block's ins. */
 111                 ir_node *block = get_nodes_block(n);
 112                 set_irn_link(n, get_irn_link(block));
 113                 set_irn_link(block, n);
 114         } else if (is_Block(n)) {
 115                 if (has_Block_entity(n)) {
 116                         /* block with a jump label attached cannot be removed. */
 117                         set_Block_removable(n, false);
 118                 }
 119         } else if (is_Bad(n) || is_Jmp(n)) {
 120                 /* ignore these */
 121                 return;
 122         } else {
 123                 /* Check for non-empty block. */
 124                 ir_node *block = get_nodes_block(n);
 125                 if (is_Bad(block))
 126                         return;
 127
 128                 set_Block_removable(block, false);
 129
 130                 if (is_Proj(n)) {
 131                         /* link Proj nodes */
 132                         ir_node *pred = get_Proj_pred(n);
 133                         set_irn_link(n, get_irn_link(pred));
 134                         set_irn_link(pred, n);
 135                 } else if (is_Cond(n) && is_switch_Cond(n)) {
 136                         /* found a switch-Cond, collect */
 137                         ARR_APP1(ir_node*, *switch_conds, n);
 138                 }
 139         }
 140 }
 141
 142 /** Returns true if pred is predecessor of block b. */
 143 static bool is_pred_of(ir_node *pred, ir_node *b)
 144 {
 145         int i;
 146
 147         for (i = get_Block_n_cfgpreds(b) - 1; i >= 0; --i) {
 148                 ir_node *b_pred = get_Block_cfgpred_block(b, i);
 149                 if (b_pred == pred)
 150                         return true;
 151         }
 152         return false;
 153 }
 154
 155 /** Test whether we can optimize away pred block pos of b.
 156  *
 157  *  @param  b    A block node.
 158  *  @param  pos  The position of the predecessor block to judge about.
 159  *
 160  *  @returns     The number of predecessors
 161  *
 162  *  The test is rather tricky.
 163  *
 164  *  The situation is something like the following:
 165  *  @verbatim
 166  *                 if-block
 167  *                  /   \
 168  *              then-b  else-b
 169  *                  \   /
 170  *                    b
 171  *  @endverbatim
 172  *
 173  *  b merges the control flow of an if-then-else.  We may not remove
 174  *  the 'then' _and_ the 'else' block of an 'if' if there is a Phi
 175  *  node in b, even if both are empty.  The destruction of this Phi
 176  *  requires that a copy is added before the merge.  We have to
 177  *  keep one of the case blocks to place the copies in.
 178  *
 179  *  To perform the test for pos, we must regard predecessors before pos
 180  *  as already removed.
 181  **/
 182 static unsigned test_whether_dispensable(ir_node *b, int pos)
 183 {
 184         ir_node *pred  = get_Block_cfgpred(b, pos);
 185         ir_node *predb = get_nodes_block(pred);
 186
 187         if (is_Bad(pred) || !is_Block_removable(predb))
 188                 return 1;
 189
 190         /* can't remove self-loops */
 191         if (predb == b)
 192                 goto non_dispensable;
 193         if (is_unknown_jump(pred))
 194                 goto non_dispensable;
 195
 196         /* Seems to be empty. At least we detected this in collect_nodes. */
 197         if (get_irn_link(b) != NULL) {
 198                 int n_cfgpreds = get_Block_n_cfgpreds(b);
 199                 int i;
 200                 /* there are Phi nodes */
 201
 202                 /* b's pred blocks and pred's pred blocks must be pairwise disjunct.
 203                  * Handle all pred blocks with preds < pos as if they were already
 204                  * removed. */
 205                 for (i = 0; i < pos; i++) {
 206                         ir_node *other_pred  = get_Block_cfgpred(b, i);
 207                         ir_node *other_predb = get_nodes_block(other_pred);
 208                         if (is_Bad(other_pred))
 209                                 continue;
 210                         if (is_Block_removable(other_predb)
 211                             && !Block_block_visited(other_predb)) {
 212                                 int j;
 213                                 for (j = get_Block_n_cfgpreds(other_predb) - 1; j >= 0; --j) {
 214                                         ir_node *other_predpred
 215                                                 = get_Block_cfgpred_block(other_predb, j);
 216                                         if (is_pred_of(other_predpred, predb))
 217                                                 goto non_dispensable;
 218                                 }
 219                         } else if (is_pred_of(other_predb, predb)) {
 220                                 goto non_dispensable;
 221                         }
 222                 }
 223                 for (i = pos+1; i < n_cfgpreds; i++) {
 224                         ir_node *other_predb = get_Block_cfgpred_block(b, i);
 225                         if (is_pred_of(other_predb, predb))
 226                                 goto non_dispensable;
 227                 }
 228         }
 229         /* we will not dispense already visited blocks */
 230         if (Block_block_visited(predb))
 231                 return 1;
 232         /* if we get here, the block is dispensable, count useful preds */
 233         return get_irn_arity(predb);
 234
 235 non_dispensable:
 236         set_Block_removable(predb, false);
 237         return 1;
 238 }
 239
 240 /**
 241  * This method removes empty blocks.  A block is empty if it only contains Phi
 242  * and Jmp nodes.
 243  *
 244  * We first adapt Phi nodes, then Block nodes, as we need the old ins
 245  * of the Block to adapt the Phi nodes.  We do this by computing new
 246  * in arrays, and then replacing the old ones.  So far we compute new in arrays
 247  * for all nodes, not regarding whether there is a possibility for optimization.
 248  *
 249  * For each predecessor p of a Block b there are three cases:
 250  *  - The predecessor p is a Bad node: just skip it. The in array of b shrinks
 251  *    by one.
 252  *  - The predecessor p is empty. Remove p. All predecessors of p are now
 253  *    predecessors of b.
 254  *  - The predecessor p is a block containing useful code. Just keep p as is.
 255  *
 256  * For Phi nodes f we have to check the conditions at the Block of f.
 257  * For cases 1 and 3 we proceed as for Blocks.  For case 2 we can have two
 258  * cases:
 259  *  -2a: The old predecessor of the Phi f is a Phi pred_f IN THE BLOCK REMOVED.
 260  *       In this case we proceed as for blocks. We remove pred_f.  All
 261  *       predecessors of pred_f now are predecessors of f.
 262  *  -2b: The old predecessor of f is NOT in the block removed. It might be a Phi
 263  *       too. We have to replicate f for each predecessor of the removed block.
 264  *       Or, with other words, the removed predecessor block has exactly one
 265  *       predecessor.
 266  *
 267  * Further there is a special case for self referencing blocks:
 268  * @verbatim
 269  *
 270  *    then_b     else_b                              then_b  else_b
 271  *       \      /                                      \      /
 272  *        \    /                                        |    /
 273  *        pred_b                                        |   /
 274  *         |   ____                                     |  /  ____
 275  *         |  |    |                                    |  | |    |
 276  *         |  |    |       === optimized to ===>        \  | |    |
 277  *        loop_b   |                                     loop_b   |
 278  *         |  |    |                                      |  |    |
 279  *         |  |____|                                      |  |____|
 280  *         |                                              |
 281  * @endverbatim
 282  *
 283  * If there is a Phi in pred_b, but we remove pred_b, we have to generate a
 284  * Phi in loop_b, that has the ins of the Phi in pred_b and a self referencing
 285  * backedge.
 286  */
 287 static void optimize_blocks(ir_node *b, void *ctx)
 288 {
 289         int i, j, k, n, max_preds, n_preds, p_preds = -1;
 290         ir_node *pred, *phi, *next;
 291         ir_node **in;
 292         merge_env *env = (merge_env*)ctx;
 293
 294         if (get_Block_dom_depth(b) < 0) {
 295                 /* ignore unreachable blocks */
 296                 return;
 297         }
 298
 299         /* Count the number of predecessor if this block is merged with pred blocks
 300            that are empty. */
 301         max_preds = 0;
 302         for (i = 0, k = get_Block_n_cfgpreds(b); i < k; ++i) {
 303                 max_preds += test_whether_dispensable(b, i);
 304         }
 305         in = XMALLOCN(ir_node*, max_preds);
 306
 307         /*- Fix the Phi nodes of the current block -*/
 308         for (phi = (ir_node*)get_irn_link(b); phi != NULL; phi = (ir_node*)next) {
 309                 assert(is_Phi(phi));
 310                 next = (ir_node*)get_irn_link(phi);
 311
 312                 /* Find the new predecessors for the Phi */
 313                 p_preds = 0;
 314                 for (i = 0, n = get_Block_n_cfgpreds(b); i < n; ++i) {
 315                         ir_graph *irg = get_irn_irg(b);
 316                         pred = get_Block_cfgpred_block(b, i);
 317
 318                         if (is_Bad(pred)) {
 319                                 /* case Phi 1: maintain Bads, as somebody else is responsible to remove them */
 320                                 in[p_preds++] = new_r_Bad(irg, get_irn_mode(phi));
 321                         } else if (is_Block_removable(pred) && !Block_block_visited(pred)) {
 322                                 /* case Phi 2: It's an empty block and not yet visited. */
 323                                 ir_node *phi_pred = get_Phi_pred(phi, i);
 324
 325                                 for (j = 0, k = get_Block_n_cfgpreds(pred); j < k; j++) {
 326                                         ir_node *pred_pred = get_Block_cfgpred(pred, j);
 327
 328                                         if (is_Bad(pred_pred)) {
 329                                                 in[p_preds++] = new_r_Bad(irg, get_irn_mode(phi));
 330                                                 continue;
 331                                         }
 332
 333                                         if (get_nodes_block(phi_pred) == pred) {
 334                                                 /* case Phi 2a: */
 335                                                 assert(is_Phi(phi_pred));  /* Block is empty!! */
 336
 337                                                 in[p_preds++] = get_Phi_pred(phi_pred, j);
 338                                         } else {
 339                                                 /* case Phi 2b: */
 340                                                 in[p_preds++] = phi_pred;
 341                                         }
 342                                 }
 343                         } else {
 344                                 /* case Phi 3: */
 345                                 in[p_preds++] = get_Phi_pred(phi, i);
 346                         }
 347                 }
 348                 assert(p_preds == max_preds);
 349
 350                 /* Fix the node */
 351                 if (p_preds == 1)
 352                         exchange(phi, in[0]);
 353                 else
 354                         set_irn_in(phi, p_preds, in);
 355                 env->changed = true;
 356         }
 357
 358         /*- This happens only if merge between loop backedge and single loop entry.
 359             Moreover, it is only needed if predb is the direct dominator of b,
 360             else there can be no uses of the Phi's in predb ... -*/
 361         for (k = 0, n = get_Block_n_cfgpreds(b); k < n; ++k) {
 362                 ir_node *pred  = get_Block_cfgpred(b, k);
 363                 ir_node *predb = get_nodes_block(pred);
 364                 if (is_Bad(pred))
 365                         continue;
 366
 367                 if (is_Block_removable(predb) && !Block_block_visited(predb)) {
 368                         ir_node *next_phi;
 369
 370                         /* we found a predecessor block at position k that will be removed */
 371                         for (phi = (ir_node*)get_irn_link(predb); phi; phi = next_phi) {
 372                                 int q_preds = 0;
 373                                 next_phi = (ir_node*)get_irn_link(phi);
 374
 375                                 assert(is_Phi(phi));
 376
 377                                 if (get_Block_idom(b) != predb) {
 378                                         /* predb is not the dominator. There can't be uses of pred's Phi nodes, kill them .*/
 379                                         ir_graph *irg  = get_irn_irg(b);
 380                                         ir_mode  *mode = get_irn_mode(phi);
 381                                         exchange(phi, new_r_Bad(irg, mode));
 382                                 } else {
 383                                         /* predb is the direct dominator of b. There might be uses of the Phi nodes from
 384                                            predb in further block, so move this phi from the predecessor into the block b */
 385                                         set_nodes_block(phi, b);
 386                                         set_irn_link(phi, get_irn_link(b));
 387                                         set_irn_link(b, phi);
 388                                         env->phis_moved = true;
 389
 390                                         /* first, copy all 0..k-1 predecessors */
 391                                         for (i = 0; i < k; i++) {
 392                                                 pred = get_Block_cfgpred_block(b, i);
 393
 394                                                 if (is_Bad(pred)) {
 395                                                         ir_graph *irg  = get_irn_irg(b);
 396                                                         ir_mode  *mode = get_irn_mode(phi);
 397                                                         in[q_preds++] = new_r_Bad(irg, mode);
 398                                                 } else if (is_Block_removable(pred) && !Block_block_visited(pred)) {
 399                                                         /* It's an empty block and not yet visited. */
 400                                                         for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
 401                                                                 if (! is_Bad(get_Block_cfgpred(pred, j))) {
 402                                                                         in[q_preds++] = phi;
 403                                                                 } else {
 404                                                                         ir_graph *irg  = get_irn_irg(b);
 405                                                                         ir_mode  *mode = get_irn_mode(phi);
 406                                                                         in[q_preds++] = new_r_Bad(irg, mode);
 407                                                                 }
 408                                                         }
 409                                                 } else {
 410                                                         in[q_preds++] = phi;
 411                                                 }
 412                                         }
 413
 414                                         /* now we are at k, copy the phi predecessors */
 415                                         pred = get_nodes_block(get_Block_cfgpred(b, k));
 416                                         for (i = 0; i < get_Phi_n_preds(phi); i++) {
 417                                                 in[q_preds++] = get_Phi_pred(phi, i);
 418                                         }
 419
 420                                         /* and now all the rest */
 421                                         for (i = k+1; i < get_Block_n_cfgpreds(b); i++) {
 422                                                 pred = get_Block_cfgpred_block(b, i);
 423
 424                                                 if (is_Bad(pred)) {
 425                                                         ir_graph *irg  = get_irn_irg(b);
 426                                                         ir_mode  *mode = get_irn_mode(phi);
 427                                                         in[q_preds++] = new_r_Bad(irg, mode);
 428                                                 } else if (is_Block_removable(pred) && !Block_block_visited(pred)) {
 429                                                         /* It's an empty block and not yet visited. */
 430                                                         for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
 431                                                                 if (! is_Bad(get_Block_cfgpred(pred, j))) {
 432                                                                         in[q_preds++] = phi;
 433                                                                 } else {
 434                                                                         ir_graph *irg  = get_irn_irg(b);
 435                                                                         ir_mode  *mode = get_irn_mode(phi);
 436                                                                         in[q_preds++] = new_r_Bad(irg, mode);
 437                                                                 }
 438                                                         }
 439                                                 } else {
 440                                                         in[q_preds++] = phi;
 441                                                 }
 442                                         }
 443
 444                                         /* Fix the node */
 445                                         if (q_preds == 1)
 446                                                 exchange(phi, in[0]);
 447                                         else
 448                                                 set_irn_in(phi, q_preds, in);
 449                                         env->changed = true;
 450
 451                                         assert(q_preds <= max_preds);
 452                                         // assert(p_preds == q_preds && "Wrong Phi Fix");
 453                                 }
 454                         }
 455                 }
 456         }
 457
 458         /*- Fix the block -*/
 459         n_preds = 0;
 460         for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
 461                 ir_node *pred  = get_Block_cfgpred(b, i);
 462                 ir_node *predb = get_nodes_block(pred);
 463                 ir_graph *irg  = get_irn_irg(pred);
 464
 465                 /* case 1: Bad predecessor */
 466                 if (is_Bad(pred)) {
 467                         in[n_preds++] = new_r_Bad(irg, mode_X);
 468                         continue;
 469                 }
 470                 if (is_Block_removable(predb) && !Block_block_visited(predb)) {
 471                         /* case 2: It's an empty block and not yet visited. */
 472                         for (j = 0; j < get_Block_n_cfgpreds(predb); j++) {
 473                                 ir_node *predpred = get_Block_cfgpred(predb, j);
 474
 475                                 if (is_Bad(predpred)) {
 476                                         in[n_preds++] = new_r_Bad(irg, mode_X);
 477                                         continue;
 478                                 }
 479
 480                                 in[n_preds++] = predpred;
 481                         }
 482                         /* Remove block+jump as it might be kept alive. */
 483                         exchange(pred, new_r_Bad(get_irn_irg(b), mode_X));
 484                         exchange(predb, new_r_Bad(get_irn_irg(b), mode_BB));
 485                 } else {
 486                         /* case 3: */
 487                         in[n_preds++] = pred;
 488                 }
 489         }
 490         assert(n_preds == max_preds);
 491
 492         set_irn_in(b, n_preds, in);
 493         env->changed = true;
 494
 495         /* see if phi-fix was correct */
 496         assert(get_irn_link(b) == NULL || p_preds == -1 || (n_preds == p_preds));
 497         xfree(in);
 498 }
 499
 500 /**
 501  * Optimize table-switch Conds.
 502  *
 503  * @param cond the switch-Cond
 504  * @return true if the switch-Cond was optimized
 505  */
 506 static bool handle_switch_cond(ir_node *cond)
 507 {
 508         ir_node *sel   = get_Cond_selector(cond);
 509         ir_node *proj1 = (ir_node*)get_irn_link(cond);
 510         ir_node *proj2 = (ir_node*)get_irn_link(proj1);
 511         ir_node *blk   = get_nodes_block(cond);
 512
 513         /* exactly 1 Proj on the Cond node: must be the defaultProj */
 514         if (proj2 == NULL) {
 515                 ir_node *jmp = new_r_Jmp(blk);
 516                 assert(get_Cond_default_proj(cond) == get_Proj_proj(proj1));
 517                 /* convert it into a Jmp */
 518                 exchange(proj1, jmp);
 519                 return true;
 520         }
 521
 522         /* handle Cond nodes with constant argument. In this case the localopt rules
 523          * should have killed all obviously impossible cases.
 524          * So the only case left to handle here is 1 defaultProj + 1 case
 525          * (this one case should be the one taken) */
 526         if (get_irn_link(proj2) == NULL) {
 527                 ir_tarval *tv = value_of(sel);
 528
 529                 if (tv != tarval_bad) {
 530                         /* we have a constant switch */
 531                         long      num     = get_tarval_long(tv);
 532                         long      def_num = get_Cond_default_proj(cond);
 533                         ir_graph *irg     = get_irn_irg(cond);
 534                         ir_node  *bad     = new_r_Bad(irg, mode_X);
 535
 536                         if (def_num == get_Proj_proj(proj1)) {
 537                                 /* first one is the defProj */
 538                                 if (num == get_Proj_proj(proj2)) {
 539                                         ir_node *jmp = new_r_Jmp(blk);
 540                                         exchange(proj2, jmp);
 541                                         exchange(proj1, bad);
 542                                         return true;
 543                                 }
 544                         } else if (def_num == get_Proj_proj(proj2)) {
 545                                 /* second one is the defProj */
 546                                 if (num == get_Proj_proj(proj1)) {
 547                                         ir_node *jmp = new_r_Jmp(blk);
 548                                         exchange(proj1, jmp);
 549                                         exchange(proj2, bad);
 550                                         return true;
 551                                 }
 552                         } else {
 553                                 /* neither: strange, Cond was not optimized so far */
 554                                 if (num == get_Proj_proj(proj1)) {
 555                                         ir_node *jmp = new_r_Jmp(blk);
 556                                         exchange(proj1, jmp);
 557                                         exchange(proj2, bad);
 558                                         return true;
 559                                 } else if (num == get_Proj_proj(proj2)) {
 560                                         ir_node *jmp = new_r_Jmp(blk);
 561                                         exchange(proj2, jmp);
 562                                         exchange(proj1, bad);
 563                                         return true;
 564                                 }
 565                         }
 566                 }
 567         }
 568         return false;
 569 }
 570
 571 /**
 572  * Optimize boolean Conds, where true and false jump to the same block into a Jmp
 573  * Block must contain no Phi nodes.
 574  *
 575  *        Cond
 576  *       /    \
 577  *  projA      projB   =>   Jmp     Bad
 578  *       \    /                \   /
 579  *       block                 block
 580  */
 581 static bool optimize_pred_cond(ir_node *block, int i, int j)
 582 {
 583         ir_node *projA, *projB, *cond, *pred_block, *jmp, *bad;
 584         assert(i != j);
 585
 586         projA = get_Block_cfgpred(block, i);
 587         if (!is_Proj(projA)) return false;
 588         projB = get_Block_cfgpred(block, j);
 589         if (!is_Proj(projB)) return false;
 590         cond  = get_Proj_pred(projA);
 591         if (!is_Cond(cond))  return false;
 592
 593         if (cond != get_Proj_pred(projB)) return false;
 594         if (is_switch_Cond(cond)) return false;
 595
 596         /* cond should actually be a Jmp */
 597         pred_block = get_nodes_block(cond);
 598         jmp = new_r_Jmp(pred_block);
 599         bad = new_r_Bad(get_irn_irg(block), mode_X);
 600
 601         assert(projA != projB);
 602         exchange(projA, jmp);
 603         exchange(projB, bad);
 604         return true;
 605 }
 606
 607 typedef enum block_flags_t {
 608         BF_HAS_OPERATIONS         = 1 << 0,
 609         BF_HAS_PHIS               = 1 << 1,
 610         BF_IS_UNKNOWN_JUMP_TARGET = 1 << 2,
 611 } block_flags_t;
 612
 613 static bool get_phase_flag(ir_phase *block_info, ir_node *block, int flag)
 614 {
 615         return PTR_TO_INT(phase_get_irn_data(block_info, block)) & flag;
 616 }
 617
 618 static void set_phase_flag(ir_phase *block_info, ir_node *block,
 619                            block_flags_t flag)
 620 {
 621         int data = PTR_TO_INT(phase_get_irn_data(block_info, block));
 622         data |= flag;
 623         phase_set_irn_data(block_info, block, INT_TO_PTR(data));
 624 }
 625
 626 static void clear_phase_flag(ir_phase *block_info, ir_node *block)
 627 {
 628         phase_set_irn_data(block_info, block, NULL);
 629 }
 630
 631 static bool has_operations(ir_phase *block_info, ir_node *block)
 632 {
 633         return get_phase_flag(block_info, block, BF_HAS_OPERATIONS);
 634 }
 635
 636 static void set_has_operations(ir_phase *block_info, ir_node *block)
 637 {
 638         set_phase_flag(block_info, block, BF_HAS_OPERATIONS);
 639 }
 640
 641 static bool has_phis(ir_phase *block_info, ir_node *block)
 642 {
 643         return get_phase_flag(block_info, block, BF_HAS_PHIS);
 644 }
 645
 646 static void set_has_phis(ir_phase *block_info, ir_node *block)
 647 {
 648         set_phase_flag(block_info, block, BF_HAS_PHIS);
 649 }
 650
 651 static bool is_unknown_jump_target(ir_phase *block_info, ir_node *block)
 652 {
 653         return get_phase_flag(block_info, block, BF_IS_UNKNOWN_JUMP_TARGET);
 654 }
 655
 656 static void set_is_unknown_jump_target(ir_phase *block_info, ir_node *block)
 657 {
 658         set_phase_flag(block_info, block, BF_IS_UNKNOWN_JUMP_TARGET);
 659 }
 660
 661 /**
 662  * Pre-Walker: fill block info information.
 663  */
 664 static void compute_block_info(ir_node *n, void *x)
 665 {
 666         ir_phase *block_info = (ir_phase *)x;
 667
 668         if (is_Block(n)) {
 669                 int i, max = get_Block_n_cfgpreds(n);
 670                 for (i=0; i<max; i++) {
 671                         ir_node *pred = get_Block_cfgpred(n,i);
 672                         if (is_unknown_jump(pred)) {
 673                                 set_is_unknown_jump_target(block_info, n);
 674                         }
 675                 }
 676         } else if (is_Phi(n)) {
 677                 ir_node *block = get_nodes_block(n);
 678                 set_has_phis(block_info, block);
 679         } else if (is_Jmp(n) || is_Cond(n) || is_Proj(n)) {
 680                 /* ignore */
 681         } else {
 682                 ir_node *block = get_nodes_block(n);
 683                 set_has_operations(block_info, block);
 684         }
 685 }
 686
 687 static void clear_block_info(ir_node *block, void *x)
 688 {
 689         ir_phase *block_info = (ir_phase *)x;
 690         clear_phase_flag(block_info, block);
 691 }
 692
 693 typedef struct skip_env {
 694         bool changed;
 695         ir_phase *phase;
 696 } skip_env;
 697
 698 /**
 699  * Post-Block-walker: Optimize useless if's (boolean Cond nodes
 700  * with same true/false target)
 701  * away.
 702  */
 703 static void optimize_ifs(ir_node *block, void *x)
 704 {
 705         skip_env *env = (skip_env*)x;
 706         int i, j;
 707         int n_preds = get_Block_n_cfgpreds(block);
 708
 709         if (has_phis(env->phase, block))
 710                 return;
 711
 712         /* optimize Cond predecessors (might produce Bad predecessors) */
 713         for (i = 0; i < n_preds; ++i) {
 714                 for (j = i+1; j < n_preds; ++j) {
 715                         optimize_pred_cond(block, i, j);
 716                 }
 717         }
 718 }
 719
 720 /**
 721  * Pre-Block walker: remove empty blocks (only contain a Jmp)
 722  * that are control flow predecessors of the current block.
 723  */
 724 static void remove_empty_blocks(ir_node *block, void *x)
 725 {
 726         skip_env *env = (skip_env*)x;
 727         int i;
 728         int n_preds = get_Block_n_cfgpreds(block);
 729
 730         for (i = 0; i < n_preds; ++i) {
 731                 ir_node *jmp, *jmp_block, *pred, *pred_block;
 732                 int n_jpreds = 0;
 733
 734                 jmp = get_Block_cfgpred(block, i);
 735                 if (!is_Jmp(jmp))
 736                         continue;
 737                 jmp_block = get_nodes_block(jmp);
 738                 if (jmp_block == block)
 739                         continue; /* this infinite loop cannot be optimized any further */
 740                 if (is_unknown_jump_target(env->phase, jmp_block))
 741                         continue; /* unknown jump target must not be optimized */
 742                 if (has_operations(env->phase,jmp_block))
 743                         continue; /* this block contains operations and cannot be skipped */
 744                 if (has_phis(env->phase,jmp_block))
 745                         continue; /* this block contains Phis and is not skipped */
 746                 if (Block_block_visited(jmp_block)) {
 747                         continue;
 748                         /* otherwise we could break the walker,
 749                          * if block was reached via KeepAlive edge -> jmp_block -> A ---> block,
 750                          * because the walker cannot handle Id nodes.
 751                          *
 752                          *   A      B
 753                          *    \    /
 754                          *   jmp_block
 755                          *    /    \
 756                          * block    End
 757                          */
 758                 }
 759
 760                 /* jmp_block is an empty block and can be optimized! */
 761
 762                 n_jpreds = get_Block_n_cfgpreds(jmp_block);
 763                 /**
 764                  * If the jmp block has only one predecessor this is straightforward.
 765                  * However, if there are more predecessors, we only handle this,
 766                  * if block has no Phis.
 767                  */
 768                 if (n_jpreds == 1) {
 769                         /* skip jmp block by rerouting its predecessor to block
 770                          *
 771                          *     A              A
 772                          *     |              |
 773                          *  jmp_block   =>    |
 774                          *     |              |
 775                          *   block          block
 776                          */
 777                         pred = get_Block_cfgpred(jmp_block, 0);
 778                         exchange(jmp, pred);
 779
 780                         /* cleanup: jmp_block might have a Keep edge! */
 781                         pred_block = get_nodes_block(pred);
 782                         exchange(jmp_block, pred_block);
 783                         env->changed = true;
 784                 } else if (! has_phis(env->phase, block)) {
 785                         /* all predecessors can skip the jmp block, so block gets some new predecessors
 786                          *
 787                          *  A     B                 A  B
 788                          *   \   /                  |  |
 789                          * jmp_block  C  =>  Bad  C |  |
 790                          *      \    /          \ | | /
 791                          *      block            block
 792                          */
 793                         ir_node **ins = NULL;
 794                         int j;
 795                         NEW_ARR_A(ir_node *, ins, n_preds+n_jpreds);
 796                         /* first copy the old predecessors, because the outer loop (i) still walks over them */
 797                         for (j = 0; j < n_preds; ++j) {
 798                                 ins[j] = get_Block_cfgpred(block, j);
 799                         }
 800                         /* now append the new predecessors */
 801                         for (j = 0; j < n_jpreds; ++j) {
 802                                 pred = get_Block_cfgpred(jmp_block, j);
 803                                 ins[n_preds+j] = pred;
 804                         }
 805                         set_irn_in(block, n_preds+n_jpreds, ins);
 806                         /* convert the jmp_block to Bad */
 807                         ir_graph *irg = get_irn_irg(block);
 808                         exchange(jmp_block, new_r_Bad(irg, mode_BB));
 809                         exchange(jmp, new_r_Bad(irg, mode_X));
 810                         /* let the outer loop walk over the new predecessors as well */
 811                         n_preds += n_jpreds;
 812                         env->changed = true;
 813                         // TODO What if jmp_block had a KeepAlive edge?
 814                 } else {
 815                         /* This would involve Phis ... */
 816                 }
 817         }
 818 }
 819
 820 /*
 821  * All cfg optimizations, which do not touch Phi nodes.
 822  *
 823  * Note that this might create critical edges.
 824  */
 825 static void cfgopt_ignoring_phis(ir_graph *irg)
 826 {
 827         ir_phase *block_info = new_phase(irg, NULL);
 828         skip_env env = { true, block_info };
 829
 830         while (env.changed) {
 831                 irg_walk_graph(irg, compute_block_info, NULL, block_info);
 832                 env.changed = false;
 833
 834                 /* Remove blocks, which only consist of a Jmp */
 835                 irg_block_walk_graph(irg, remove_empty_blocks, NULL, &env);
 836
 837                 /* Optimize Cond->Jmp, where then- and else-block are the same. */
 838                 irg_block_walk_graph(irg, NULL, optimize_ifs, &env);
 839
 840                 if (env.changed) {
 841                         set_irg_doms_inconsistent(irg);
 842                         /* clear block info, because it must be recomputed */
 843                         irg_block_walk_graph(irg, clear_block_info, NULL, block_info);
 844                         /* Removing blocks and Conds might enable more optimizations */
 845                         continue;
 846                 } else {
 847                         break;
 848                 }
 849         }
 850
 851         phase_free(block_info);
 852 }
 853
 854 /* Optimizations of the control flow that also require changes of Phi nodes.  */
 855 static ir_graph_state_t do_cfopt(ir_graph *irg)
 856 {
 857         int i, j, n;
 858         ir_node **in = NULL;
 859         ir_node *end = get_irg_end(irg);
 860         ir_node *new_end;
 861         merge_env env;
 862
 863         env.changed    = false;
 864         env.phis_moved = false;
 865
 866         assert(get_irg_phase_state(irg) != phase_building);
 867
 868         /* if the graph is not pinned, we cannot determine empty blocks */
 869         assert(get_irg_pinned(irg) != op_pin_state_floats &&
 870                "Control flow optimization need a pinned graph");
 871
 872         edges_deactivate(irg);
 873
 874         /* First the "simple" optimizations, which do not touch Phis */
 875         cfgopt_ignoring_phis(irg);
 876
 877         /* we use the mark flag to mark removable blocks */
 878         ir_reserve_resources(irg, IR_RESOURCE_BLOCK_MARK | IR_RESOURCE_IRN_LINK);
 879
 880         /* The switch Cond optimization might expose unreachable code, so we loop */
 881         for (;;) {
 882                 int length;
 883                 ir_node **switch_conds = NULL;
 884                 bool changed = false;
 885
 886                 assure_doms(irg);
 887
 888                 /*
 889                  * This pass collects all Phi nodes in a link list in the block
 890                  * nodes.  Further it performs simple control flow optimizations.
 891                  * Finally it marks all blocks that do not contain useful
 892                  * computations, i.e., these blocks might be removed.
 893                  */
 894                 switch_conds = NEW_ARR_F(ir_node*, 0);
 895                 irg_walk(end, clear_link_and_mark_blocks_removable, collect_nodes, &switch_conds);
 896
 897                 /* handle all collected switch-Conds */
 898                 length = ARR_LEN(switch_conds);
 899                 for (i = 0; i < length; ++i) {
 900                         ir_node *cond = switch_conds[i];
 901                         changed |= handle_switch_cond(cond);
 902                 }
 903                 DEL_ARR_F(switch_conds);
 904
 905                 if (!changed)
 906                         break;
 907
 908                 set_irg_doms_inconsistent(irg);
 909                 set_irg_extblk_inconsistent(irg);
 910                 set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
 911         }
 912
 913         /* assert due to collect_nodes:
 914          * 1. removable blocks are now marked as such
 915          * 2. phi lists are up to date
 916          */
 917
 918         /* Optimize the standard code.
 919          * It walks only over block nodes and adapts these and the Phi nodes in these
 920          * blocks, which it finds in a linked list computed before.
 921          * */
 922         assure_doms(irg);
 923         irg_block_walk_graph(irg, optimize_blocks, NULL, &env);
 924
 925         new_end = optimize_in_place(end);
 926         if (new_end != end) {
 927                 set_irg_end(irg, new_end);
 928                 end = new_end;
 929         }
 930         remove_End_Bads_and_doublets(end);
 931
 932         ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK | IR_RESOURCE_IRN_LINK);
 933
 934         if (env.phis_moved) {
 935                 /* Bad: when we moved Phi's, we might produce dead Phi nodes
 936                    that are kept-alive.
 937                    Some other phases cannot copy with this, so kill them.
 938                  */
 939                 n = get_End_n_keepalives(end);
 940                 if (n > 0) {
 941                         NEW_ARR_A(ir_node *, in, n);
 942                         assure_irg_outs(irg);
 943
 944                         for (i = j = 0; i < n; ++i) {
 945                                 ir_node *ka = get_End_keepalive(end, i);
 946
 947                                 if (is_Phi(ka)) {
 948                                         int k;
 949
 950                                         for (k = get_irn_n_outs(ka) - 1; k >= 0; --k) {
 951                                                 ir_node *user = get_irn_out(ka, k);
 952
 953                                                 if (user != ka && user != end) {
 954                                                         /* Is it a real user or just a self loop ? */
 955                                                         break;
 956                                                 }
 957                                         }
 958                                         if (k >= 0)
 959                                                 in[j++] = ka;
 960                                 } else
 961                                         in[j++] = ka;
 962                         }
 963                         if (j != n) {
 964                                 set_End_keepalives(end, j, in);
 965                                 env.changed = true;
 966                         }
 967                 }
 968         }
 969
 970         return 0;
 971 }
 972
 973 optdesc_t opt_cf = {
 974         "control-flow",
 975         0,
 976         do_cfopt,
 977 };
 978
 979 void optimize_cf(ir_graph *irg) {
 980         perform_irg_optimization(irg, &opt_cf);
 981 }
 982
 983 /* Creates an ir_graph pass for optimize_cf. */
 984 ir_graph_pass_t *optimize_cf_pass(const char *name)
 985 {
 986         return def_graph_pass(name ? name : "optimize_cf", optimize_cf);
 987 }