nsz Git - libfirm/blob - ir/opt/cfopt.c

   1 /*
   2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
   3  *
   4  * This file is part of libFirm.
   5  *
   6  * This file may be distributed and/or modified under the terms of the
   7  * GNU General Public License version 2 as published by the Free Software
   8  * Foundation and appearing in the file LICENSE.GPL included in the
   9  * packaging of this file.
  10  *
  11  * Licensees holding valid libFirm Professional Edition licenses may use
  12  * this file in accordance with the libFirm Commercial License.
  13  * Agreement provided with the Software.
  14  *
  15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
  16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17  * PURPOSE.
  18  */
  19
  20 /**
  21  * @file
  22  * @brief   Control flow optimizations.
  23  * @author  Goetz Lindenmaier, Michael Beck, Sebastian Hack
  24  *
  25  * Removes Bad control flow predecessors and empty blocks.  A block is empty
  26  * if it contains only a Jmp node. Blocks can only be removed if they are not
  27  * needed for the semantics of Phi nodes. Further, we NEVER remove labeled
  28  * blocks (even if we could move the label).
  29  */
  30 #include "config.h"
  31
  32 #include "iroptimize.h"
  33
  34 #include <assert.h>
  35 #include <stdbool.h>
  36
  37 #include "xmalloc.h"
  38 #include "irnode_t.h"
  39 #include "irgraph_t.h"
  40 #include "irprog_t.h"
  41
  42 #include "ircons.h"
  43 #include "iropt_t.h"
  44 #include "irgwalk.h"
  45 #include "irgmod.h"
  46 #include "irdump.h"
  47 #include "irverify.h"
  48 #include "iredges.h"
  49 #include "opt_manage.h"
  50
  51 #include "array_t.h"
  52
  53 #include "irouts.h"
  54 #include "irbackedge_t.h"
  55
  56 #include "irflag_t.h"
  57 #include "firmstat.h"
  58 #include "irpass.h"
  59 #include "irnodehashmap.h"
  60 #include "irtools.h"
  61
  62 #include "iropt_dbg.h"
  63
  64 /** An environment for merge_blocks and collect nodes. */
  65 typedef struct merge_env {
  66         bool      changed;      /**< Set if the graph was changed. */
  67         bool      phis_moved;   /**< Set if Phi nodes were moved. */
  68 } merge_env;
  69
  70 /** set or reset the removable property of a block. */
  71 static void set_Block_removable(ir_node *block, bool removable)
  72 {
  73         set_Block_mark(block, removable);
  74 }
  75
  76 /** check if a block has the removable property set. */
  77 static bool is_Block_removable(const ir_node *block)
  78 {
  79         return get_Block_mark(block);
  80 }
  81
  82 /** checks if a given Cond node is a switch Cond. */
  83 static bool is_switch_Cond(const ir_node *cond)
  84 {
  85         ir_node *sel = get_Cond_selector(cond);
  86         return get_irn_mode(sel) != mode_b;
  87 }
  88
  89 /** Walker: clear link fields and mark all blocks as removable. */
  90 static void clear_link_and_mark_blocks_removable(ir_node *node, void *ctx)
  91 {
  92         (void) ctx;
  93         set_irn_link(node, NULL);
  94         if (is_Block(node)) {
  95                 set_Block_removable(node, true);
  96                 set_Block_phis(node, NULL);
  97         } else if (is_Phi(node)) {
  98                 set_Phi_next(node, NULL);
  99         }
 100 }
 101
 102 /**
 103  * Collects all Phi nodes in link list of Block.
 104  * Marks all blocks "non_removable" if they contain a node other
 105  * than Jmp (and Proj).
 106  * Links all Proj nodes to their predecessors.
 107  * Collects all switch-Conds in a list.
 108  */
 109 static void collect_nodes(ir_node *n, void *ctx)
 110 {
 111         (void) ctx;
 112         if (is_Phi(n)) {
 113                 /* Collect Phi nodes to compact ins along with block's ins. */
 114                 ir_node *block = get_nodes_block(n);
 115                 add_Block_phi(block, n);
 116         } else if (is_Block(n)) {
 117                 if (get_Block_entity(n) != NULL) {
 118                         /* block with a jump label attached cannot be removed. */
 119                         set_Block_removable(n, false);
 120                 }
 121         } else if (is_Bad(n) || is_Jmp(n)) {
 122                 /* ignore these */
 123                 return;
 124         } else {
 125                 /* Check for non-empty block. */
 126                 ir_node *block = get_nodes_block(n);
 127
 128                 set_Block_removable(block, false);
 129
 130                 if (is_Proj(n)) {
 131                         /* link Proj nodes */
 132                         ir_node *pred = get_Proj_pred(n);
 133                         set_irn_link(n, get_irn_link(pred));
 134                         set_irn_link(pred, n);
 135                 }
 136         }
 137 }
 138
 139 /** Returns true if pred is predecessor of block b. */
 140 static bool is_pred_of(const ir_node *pred, const ir_node *b)
 141 {
 142         int i;
 143
 144         for (i = get_Block_n_cfgpreds(b) - 1; i >= 0; --i) {
 145                 ir_node *b_pred = get_Block_cfgpred_block(b, i);
 146                 if (b_pred == pred)
 147                         return true;
 148         }
 149         return false;
 150 }
 151
 152 /** Test whether we can optimize away pred block pos of b.
 153  *
 154  *  @param  b    A block node.
 155  *  @param  pos  The position of the predecessor block to judge about.
 156  *
 157  *  @returns     The number of predecessors
 158  *
 159  *  The test is rather tricky.
 160  *
 161  *  The situation is something like the following:
 162  *  @verbatim
 163  *                 if-block
 164  *                  /   \
 165  *              then-b  else-b
 166  *                  \   /
 167  *                    b
 168  *  @endverbatim
 169  *
 170  *  b merges the control flow of an if-then-else.  We may not remove
 171  *  the 'then' _and_ the 'else' block of an 'if' if there is a Phi
 172  *  node in b, even if both are empty.  The destruction of this Phi
 173  *  requires that a copy is added before the merge.  We have to
 174  *  keep one of the case blocks to place the copies in.
 175  *
 176  *  To perform the test for pos, we must regard predecessors before pos
 177  *  as already removed.
 178  **/
 179 static unsigned test_whether_dispensable(const ir_node *b, int pos)
 180 {
 181         ir_node *pred  = get_Block_cfgpred(b, pos);
 182         ir_node *predb = get_nodes_block(pred);
 183
 184         if (is_Bad(pred) || !is_Block_removable(predb))
 185                 return 1;
 186
 187         /* can't remove self-loops */
 188         if (predb == b)
 189                 goto non_dispensable;
 190         if (is_unknown_jump(pred))
 191                 goto non_dispensable;
 192
 193         /* Seems to be empty. At least we detected this in collect_nodes. */
 194         if (get_Block_phis(b) != NULL) {
 195                 int n_cfgpreds = get_Block_n_cfgpreds(b);
 196                 int i;
 197                 /* there are Phi nodes */
 198
 199                 /* b's pred blocks and pred's pred blocks must be pairwise disjunct.
 200                  * Handle all pred blocks with preds < pos as if they were already
 201                  * removed. */
 202                 for (i = 0; i < pos; i++) {
 203                         ir_node *other_pred  = get_Block_cfgpred(b, i);
 204                         ir_node *other_predb = get_nodes_block(other_pred);
 205                         if (is_Bad(other_pred))
 206                                 continue;
 207                         if (is_Block_removable(other_predb)
 208                             && !Block_block_visited(other_predb)) {
 209                                 int j;
 210                                 for (j = get_Block_n_cfgpreds(other_predb) - 1; j >= 0; --j) {
 211                                         ir_node *other_predpred
 212                                                 = get_Block_cfgpred_block(other_predb, j);
 213                                         if (is_pred_of(other_predpred, predb))
 214                                                 goto non_dispensable;
 215                                 }
 216                         } else if (is_pred_of(other_predb, predb)) {
 217                                 goto non_dispensable;
 218                         }
 219                 }
 220                 for (i = pos+1; i < n_cfgpreds; i++) {
 221                         ir_node *other_predb = get_Block_cfgpred_block(b, i);
 222                         if (is_pred_of(other_predb, predb))
 223                                 goto non_dispensable;
 224                 }
 225         }
 226         /* we will not dispense already visited blocks */
 227         if (Block_block_visited(predb))
 228                 return 1;
 229         /* if we get here, the block is dispensable, count useful preds */
 230         return get_irn_arity(predb);
 231
 232 non_dispensable:
 233         set_Block_removable(predb, false);
 234         return 1;
 235 }
 236
 237 /**
 238  * This method merges blocks. A block is applicable to be merged, if it
 239  * has only one predecessor with an unconditional jump to this block;
 240  * and if this block does not contain any phis.
 241  */
 242 static void merge_blocks(ir_node *b)
 243 {
 244         if (get_Block_n_cfgpreds(b) == 1) {
 245                 ir_node* pred = get_Block_cfgpred(b, 0);
 246                 if (is_Jmp(pred)) {
 247                         ir_node* pred_block = get_nodes_block(pred);
 248                         if (get_Block_phis(b) == NULL) {
 249                                 exchange(b, pred_block);
 250                         }
 251                 }
 252         }
 253 }
 254
 255 /**
 256  * This method removes empty blocks.  A block is empty if it only contains Phi
 257  * and Jmp nodes.
 258  *
 259  * We first adapt Phi nodes, then Block nodes, as we need the old ins
 260  * of the Block to adapt the Phi nodes.  We do this by computing new
 261  * in arrays, and then replacing the old ones.  So far we compute new in arrays
 262  * for all nodes, not regarding whether there is a possibility for optimization.
 263  *
 264  * For each predecessor p of a Block b there are three cases:
 265  *  - The predecessor p is a Bad node: just skip it. The in array of b shrinks
 266  *    by one.
 267  *  - The predecessor p is empty. Remove p. All predecessors of p are now
 268  *    predecessors of b.
 269  *  - The predecessor p is a block containing useful code. Just keep p as is.
 270  *
 271  * For Phi nodes f we have to check the conditions at the Block of f.
 272  * For cases 1 and 3 we proceed as for Blocks.  For case 2 we can have two
 273  * cases:
 274  *  -2a: The old predecessor of the Phi f is a Phi pred_f IN THE BLOCK REMOVED.
 275  *       In this case we proceed as for blocks. We remove pred_f.  All
 276  *       predecessors of pred_f now are predecessors of f.
 277  *  -2b: The old predecessor of f is NOT in the block removed. It might be a Phi
 278  *       too. We have to replicate f for each predecessor of the removed block.
 279  *       Or, with other words, the removed predecessor block has exactly one
 280  *       predecessor.
 281  *
 282  * Further there is a special case for self referencing blocks:
 283  * @verbatim
 284  *
 285  *    then_b     else_b                              then_b  else_b
 286  *       \      /                                      \      /
 287  *        \    /                                        |    /
 288  *        pred_b                                        |   /
 289  *         |   ____                                     |  /  ____
 290  *         |  |    |                                    |  | |    |
 291  *         |  |    |       === optimized to ===>        \  | |    |
 292  *        loop_b   |                                     loop_b   |
 293  *         |  |    |                                      |  |    |
 294  *         |  |____|                                      |  |____|
 295  *         |                                              |
 296  * @endverbatim
 297  *
 298  * If there is a Phi in pred_b, but we remove pred_b, we have to generate a
 299  * Phi in loop_b, that has the ins of the Phi in pred_b and a self referencing
 300  * backedge.
 301  */
 302 static void optimize_blocks(ir_node *b, void *ctx)
 303 {
 304         int i, j, k, n, max_preds, n_preds, p_preds = -1;
 305         ir_node *phi;
 306         ir_node *next;
 307         ir_node **in;
 308         merge_env *env = (merge_env*)ctx;
 309
 310         if (get_Block_dom_depth(b) < 0) {
 311                 /* ignore unreachable blocks */
 312                 return;
 313         }
 314
 315         /* Count the number of predecessor if this block is merged with pred blocks
 316            that are empty. */
 317         max_preds = 0;
 318         for (i = 0, k = get_Block_n_cfgpreds(b); i < k; ++i) {
 319                 max_preds += test_whether_dispensable(b, i);
 320         }
 321         in = XMALLOCN(ir_node*, max_preds);
 322
 323         /*- Fix the Phi nodes of the current block -*/
 324         for (phi = get_Block_phis(b); phi != NULL; phi = next) {
 325                 next = get_Phi_next(phi);
 326
 327                 /* Find the new predecessors for the Phi */
 328                 p_preds = 0;
 329                 for (i = 0, n = get_Block_n_cfgpreds(b); i < n; ++i) {
 330                         ir_graph *irg = get_irn_irg(b);
 331                         ir_node *predx = get_Block_cfgpred(b, i);
 332                         ir_node *pred;
 333
 334                         /* case Phi 1: maintain Bads, as somebody else is responsible to
 335                          * remove them */
 336                         if (is_Bad(predx)) {
 337                                 in[p_preds++] = new_r_Bad(irg, get_irn_mode(phi));
 338                                 continue;
 339                         }
 340
 341                         pred = get_nodes_block(predx);
 342
 343                         /* case Phi 2: It's an empty block and not yet visited. */
 344                         if (is_Block_removable(pred) && !Block_block_visited(pred)) {
 345                                 ir_node *phi_pred = get_Phi_pred(phi, i);
 346
 347                                 for (j = 0, k = get_Block_n_cfgpreds(pred); j < k; j++) {
 348                                         ir_node *pred_pred = get_Block_cfgpred(pred, j);
 349
 350                                         if (is_Bad(pred_pred)) {
 351                                                 in[p_preds++] = new_r_Bad(irg, get_irn_mode(phi));
 352                                                 continue;
 353                                         }
 354
 355                                         if (get_nodes_block(phi_pred) == pred) {
 356                                                 /* case Phi 2a: */
 357                                                 assert(is_Phi(phi_pred));  /* Block is empty!! */
 358
 359                                                 in[p_preds++] = get_Phi_pred(phi_pred, j);
 360                                         } else {
 361                                                 /* case Phi 2b: */
 362                                                 in[p_preds++] = phi_pred;
 363                                         }
 364                                 }
 365                         } else {
 366                                 /* case Phi 3: */
 367                                 in[p_preds++] = get_Phi_pred(phi, i);
 368                         }
 369                 }
 370                 assert(p_preds == max_preds);
 371
 372                 /* Fix the node */
 373                 if (p_preds == 1) {
 374                         exchange(phi, in[0]);
 375                 } else {
 376                         set_irn_in(phi, p_preds, in);
 377                 }
 378                 env->changed = true;
 379         }
 380
 381         /*- This happens only if merge between loop backedge and single loop entry.
 382             Moreover, it is only needed if predb is the direct dominator of b,
 383             else there can be no uses of the Phi's in predb ... -*/
 384         for (k = 0, n = get_Block_n_cfgpreds(b); k < n; ++k) {
 385                 ir_node *pred  = get_Block_cfgpred(b, k);
 386                 ir_node *predb = get_nodes_block(pred);
 387                 if (is_Bad(pred))
 388                         continue;
 389
 390                 if (is_Block_removable(predb) && !Block_block_visited(predb)) {
 391                         ir_node *next_phi;
 392
 393                         /* we found a predecessor block at position k that will be removed */
 394                         for (phi = get_Block_phis(predb); phi != NULL; phi = next_phi) {
 395                                 int q_preds = 0;
 396                                 next_phi = get_Phi_next(phi);
 397
 398                                 if (get_Block_idom(b) != predb) {
 399                                         /* predb is not the dominator. There can't be uses of
 400                                          * pred's Phi nodes, kill them .*/
 401                                         ir_graph *irg  = get_irn_irg(b);
 402                                         ir_mode  *mode = get_irn_mode(phi);
 403                                         exchange(phi, new_r_Bad(irg, mode));
 404                                 } else {
 405                                         /* predb is the direct dominator of b. There might be uses
 406                                          * of the Phi nodes from predb in further block, so move
 407                                          * this phi from the predecessor into the block b */
 408                                         set_nodes_block(phi, b);
 409                                         set_Phi_next(phi, get_Block_phis(b));
 410                                         set_Block_phis(b, phi);
 411                                         env->phis_moved = true;
 412
 413                                         /* first, copy all 0..k-1 predecessors */
 414                                         for (i = 0; i < k; i++) {
 415                                                 ir_node *predx = get_Block_cfgpred(b, i);
 416                                                 ir_node *pred_block;
 417
 418                                                 if (is_Bad(predx)) {
 419                                                         ir_graph *irg  = get_irn_irg(b);
 420                                                         ir_mode  *mode = get_irn_mode(phi);
 421                                                         in[q_preds++] = new_r_Bad(irg, mode);
 422                                                         continue;
 423                                                 }
 424                                                 pred_block = get_nodes_block(predx);
 425                                                 if (is_Block_removable(pred_block)
 426                                                            && !Block_block_visited(pred_block)) {
 427                                                         int n_cfgpreds = get_Block_n_cfgpreds(pred_block);
 428                                                         /* It's an empty block and not yet visited. */
 429                                                         for (j = 0; j < n_cfgpreds; j++) {
 430                                                                 if (!is_Bad(get_Block_cfgpred(pred_block, j))) {
 431                                                                         in[q_preds++] = phi;
 432                                                                 } else {
 433                                                                         ir_graph *irg  = get_irn_irg(b);
 434                                                                         ir_mode  *mode = get_irn_mode(phi);
 435                                                                         in[q_preds++] = new_r_Bad(irg, mode);
 436                                                                 }
 437                                                         }
 438                                                 } else {
 439                                                         in[q_preds++] = phi;
 440                                                 }
 441                                         }
 442
 443                                         /* now we are at k, copy the phi predecessors */
 444                                         pred = get_nodes_block(get_Block_cfgpred(b, k));
 445                                         for (i = 0; i < get_Phi_n_preds(phi); i++) {
 446                                                 in[q_preds++] = get_Phi_pred(phi, i);
 447                                         }
 448
 449                                         /* and now all the rest */
 450                                         for (i = k+1; i < get_Block_n_cfgpreds(b); i++) {
 451                                                 pred = get_Block_cfgpred_block(b, i);
 452
 453                                                 if (is_Bad(pred)) {
 454                                                         ir_graph *irg  = get_irn_irg(b);
 455                                                         ir_mode  *mode = get_irn_mode(phi);
 456                                                         in[q_preds++] = new_r_Bad(irg, mode);
 457                                                 } else if (is_Block_removable(pred) && !Block_block_visited(pred)) {
 458                                                         /* It's an empty block and not yet visited. */
 459                                                         for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
 460                                                                 if (! is_Bad(get_Block_cfgpred(pred, j))) {
 461                                                                         in[q_preds++] = phi;
 462                                                                 } else {
 463                                                                         ir_graph *irg  = get_irn_irg(b);
 464                                                                         ir_mode  *mode = get_irn_mode(phi);
 465                                                                         in[q_preds++] = new_r_Bad(irg, mode);
 466                                                                 }
 467                                                         }
 468                                                 } else {
 469                                                         in[q_preds++] = phi;
 470                                                 }
 471                                         }
 472
 473                                         /* Fix the node */
 474                                         if (q_preds == 1)
 475                                                 exchange(phi, in[0]);
 476                                         else
 477                                                 set_irn_in(phi, q_preds, in);
 478                                         env->changed = true;
 479
 480                                         assert(q_preds <= max_preds);
 481                                         // assert(p_preds == q_preds && "Wrong Phi Fix");
 482                                 }
 483                         }
 484                 }
 485         }
 486
 487         /*- Fix the block -*/
 488         n_preds = 0;
 489         for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
 490                 ir_node *pred  = get_Block_cfgpred(b, i);
 491                 ir_node *predb = get_nodes_block(pred);
 492                 ir_graph *irg  = get_irn_irg(pred);
 493
 494                 /* case 1: Bad predecessor */
 495                 if (is_Bad(pred)) {
 496                         in[n_preds++] = new_r_Bad(irg, mode_X);
 497                         continue;
 498                 }
 499                 if (is_Block_removable(predb) && !Block_block_visited(predb)) {
 500                         /* case 2: It's an empty block and not yet visited. */
 501                         for (j = 0; j < get_Block_n_cfgpreds(predb); j++) {
 502                                 ir_node *predpred = get_Block_cfgpred(predb, j);
 503
 504                                 if (is_Bad(predpred)) {
 505                                         in[n_preds++] = new_r_Bad(irg, mode_X);
 506                                         continue;
 507                                 }
 508
 509                                 in[n_preds++] = predpred;
 510                         }
 511                         /* Remove block+jump as it might be kept alive. */
 512                         exchange(pred, new_r_Bad(get_irn_irg(b), mode_X));
 513                         exchange(predb, new_r_Bad(get_irn_irg(b), mode_BB));
 514                 } else {
 515                         /* case 3: */
 516                         in[n_preds++] = pred;
 517                 }
 518         }
 519         assert(n_preds == max_preds);
 520
 521         set_irn_in(b, n_preds, in);
 522         env->changed = true;
 523
 524         /* see if phi-fix was correct */
 525         assert(get_Block_phis(b) == NULL || p_preds == -1 || (n_preds == p_preds));
 526         xfree(in);
 527 }
 528
 529 /**
 530  * Optimize boolean Conds, where true and false jump to the same block into a Jmp
 531  * Block must contain no Phi nodes.
 532  *
 533  *        Cond
 534  *       /    \
 535  *  projA      projB   =>   Jmp     Bad
 536  *       \    /                \   /
 537  *       block                 block
 538  */
 539 static bool optimize_pred_cond(ir_node *block, int i, int j)
 540 {
 541         ir_node *projA, *projB, *cond, *pred_block, *jmp, *bad;
 542         assert(i != j);
 543
 544         projA = get_Block_cfgpred(block, i);
 545         if (!is_Proj(projA)) return false;
 546         projB = get_Block_cfgpred(block, j);
 547         if (!is_Proj(projB)) return false;
 548         cond  = get_Proj_pred(projA);
 549         if (!is_Cond(cond))  return false;
 550
 551         if (cond != get_Proj_pred(projB)) return false;
 552         if (is_switch_Cond(cond)) return false;
 553
 554         /* cond should actually be a Jmp */
 555         pred_block = get_nodes_block(cond);
 556         jmp = new_r_Jmp(pred_block);
 557         bad = new_r_Bad(get_irn_irg(block), mode_X);
 558
 559         assert(projA != projB);
 560         exchange(projA, jmp);
 561         exchange(projB, bad);
 562         return true;
 563 }
 564
 565 typedef enum block_flags_t {
 566         BF_HAS_OPERATIONS         = 1 << 0,
 567         BF_HAS_PHIS               = 1 << 1,
 568         BF_IS_UNKNOWN_JUMP_TARGET = 1 << 2,
 569 } block_flags_t;
 570
 571 static bool get_block_flag(const ir_nodehashmap_t *infos, const ir_node *block,
 572                            int flag)
 573 {
 574         return PTR_TO_INT(ir_nodehashmap_get(infos, block)) & flag;
 575 }
 576
 577 static void set_block_flag(ir_nodehashmap_t *infos, ir_node *block,
 578                            block_flags_t flag)
 579 {
 580         int data = PTR_TO_INT(ir_nodehashmap_get(infos, block));
 581         data |= flag;
 582         ir_nodehashmap_insert(infos, block, INT_TO_PTR(data));
 583 }
 584
 585 static void clear_block_flag(ir_nodehashmap_t *infos, const ir_node *block)
 586 {
 587         ir_nodehashmap_remove(infos, block);
 588 }
 589
 590 static bool has_operations(ir_nodehashmap_t *infos, const ir_node *block)
 591 {
 592         return get_block_flag(infos, block, BF_HAS_OPERATIONS);
 593 }
 594
 595 static void set_has_operations(ir_nodehashmap_t *infos, ir_node *block)
 596 {
 597         set_block_flag(infos, block, BF_HAS_OPERATIONS);
 598 }
 599
 600 static bool has_phis(ir_nodehashmap_t *infos, const ir_node *block)
 601 {
 602         return get_block_flag(infos, block, BF_HAS_PHIS);
 603 }
 604
 605 static void set_has_phis(ir_nodehashmap_t *infos, ir_node *block)
 606 {
 607         set_block_flag(infos, block, BF_HAS_PHIS);
 608 }
 609
 610 static bool is_unknown_jump_target(ir_nodehashmap_t *infos, const ir_node *block)
 611 {
 612         return get_block_flag(infos, block, BF_IS_UNKNOWN_JUMP_TARGET);
 613 }
 614
 615 static void set_is_unknown_jump_target(ir_nodehashmap_t *infos, ir_node *block)
 616 {
 617         set_block_flag(infos, block, BF_IS_UNKNOWN_JUMP_TARGET);
 618 }
 619
 620 /**
 621  * Pre-Walker: fill block info information.
 622  */
 623 static void compute_block_info(ir_node *n, void *x)
 624 {
 625         ir_nodehashmap_t *infos = (ir_nodehashmap_t*)x;
 626
 627         if (is_Block(n)) {
 628                 int i, max = get_Block_n_cfgpreds(n);
 629                 for (i=0; i<max; i++) {
 630                         ir_node *pred = get_Block_cfgpred(n,i);
 631                         if (is_unknown_jump(pred)) {
 632                                 set_is_unknown_jump_target(infos, n);
 633                         }
 634                 }
 635         } else if (is_Phi(n)) {
 636                 ir_node *block = get_nodes_block(n);
 637                 set_has_phis(infos, block);
 638         } else if (is_Jmp(n) || is_Cond(n) || is_Proj(n)) {
 639                 /* ignore */
 640         } else {
 641                 ir_node *block = get_nodes_block(n);
 642                 set_has_operations(infos, block);
 643         }
 644 }
 645
 646 static void clear_block_info(ir_node *block, void *x)
 647 {
 648         ir_nodehashmap_t *infos = (ir_nodehashmap_t*)x;
 649         clear_block_flag(infos, block);
 650 }
 651
 652 typedef struct skip_env {
 653         bool             changed;
 654         ir_nodehashmap_t block_infos;
 655 } skip_env;
 656
 657 /**
 658  * Post-Block-walker: Optimize useless if's (boolean Cond nodes
 659  * with same true/false target) away.
 660  */
 661 static void optimize_ifs(ir_node *block, void *x)
 662 {
 663         skip_env *env = (skip_env*)x;
 664         int i, j;
 665         int n_preds = get_Block_n_cfgpreds(block);
 666
 667         if (has_phis(&env->block_infos, block))
 668                 return;
 669
 670         /* optimize Cond predecessors (might produce Bad predecessors) */
 671         for (i = 0; i < n_preds; ++i) {
 672                 for (j = i+1; j < n_preds; ++j) {
 673                         optimize_pred_cond(block, i, j);
 674                 }
 675         }
 676 }
 677
 678 /**
 679  * Pre-Block walker: remove empty blocks (only contain a Jmp)
 680  * that are control flow predecessors of the current block.
 681  */
 682 static void remove_empty_blocks(ir_node *block, void *x)
 683 {
 684         skip_env *env = (skip_env*)x;
 685         int i;
 686         int n_preds = get_Block_n_cfgpreds(block);
 687
 688         for (i = 0; i < n_preds; ++i) {
 689                 ir_node *jmp, *jmp_block;
 690                 int n_jpreds = 0;
 691
 692                 jmp = get_Block_cfgpred(block, i);
 693                 if (!is_Jmp(jmp))
 694                         continue;
 695                 jmp_block = get_nodes_block(jmp);
 696                 if (jmp_block == block)
 697                         continue; /* this infinite loop cannot be optimized any further */
 698                 if (is_unknown_jump_target(&env->block_infos, jmp_block))
 699                         continue; /* unknown jump target must not be optimized */
 700                 if (has_phis(&env->block_infos,jmp_block))
 701                         continue; /* this block contains Phis and is not skipped */
 702                 if (Block_block_visited(jmp_block)) {
 703                         continue;
 704                         /* otherwise we could break the walker,
 705                          * if block was reached via
 706                          *     KeepAlive edge -> jmp_block -> A ---> block,
 707                          * because the walker cannot handle Id nodes.
 708                          *
 709                          *   A      B
 710                          *    \    /
 711                          *   jmp_block
 712                          *    /    \
 713                          * block    End
 714                          */
 715                 }
 716
 717                 /* jmp_block is an empty block and can be optimized! */
 718
 719                 n_jpreds = get_Block_n_cfgpreds(jmp_block);
 720                 /**
 721                  * If the jmp block has only one predecessor this is straightforward.
 722                  * However, if there are more predecessors, we only handle this,
 723                  * if block has no Phis.
 724                  */
 725                 if (n_jpreds == 1) {
 726                         ir_node *pred        = get_Block_cfgpred(jmp_block, 0);
 727                         ir_node *pred_block  = get_nodes_block(pred);
 728                         if (has_operations(&env->block_infos,jmp_block)) {
 729                                 if (get_irg_start_block(get_irn_irg(pred_block)) == pred_block)
 730                                         continue; /* must not merge operations into start block */
 731                                 if (!is_Jmp(pred))
 732                                         continue; /* must not create partially dead code, especially when it is mode_M */
 733                         }
 734
 735                         /* skip jmp block by rerouting its predecessor to block
 736                          *
 737                          *     A              A
 738                          *     |              |
 739                          *  jmp_block   =>    |
 740                          *     |              |
 741                          *   block          block
 742                          */
 743                         exchange(jmp, pred);
 744
 745                         /* cleanup: jmp_block might have a Keep edge! */
 746                         exchange(jmp_block, pred_block);
 747                         env->changed = true;
 748                 } else if ( !has_phis(&env->block_infos, block) &&
 749                             !has_operations(&env->block_infos,jmp_block))
 750                 {
 751                         /* all predecessors can skip the jmp block, so block gets some new
 752                          * predecessors
 753                          *
 754                          *  A     B                 A  B
 755                          *   \   /                  |  |
 756                          * jmp_block  C  =>  Bad  C |  |
 757                          *      \    /          \ | | /
 758                          *      block            block
 759                          */
 760                         ir_node **ins = ALLOCAN(ir_node*, n_preds+n_jpreds);
 761                         int j;
 762                         /* first copy the old predecessors, because the outer loop (i)
 763                          * still walks over them */
 764                         for (j = 0; j < n_preds; ++j) {
 765                                 ins[j] = get_Block_cfgpred(block, j);
 766                         }
 767                         /* now append the new predecessors */
 768                         for (j = 0; j < n_jpreds; ++j) {
 769                                 ir_node *pred = get_Block_cfgpred(jmp_block, j);
 770                                 ins[n_preds+j] = pred;
 771                         }
 772                         set_irn_in(block, n_preds+n_jpreds, ins);
 773                         /* convert the jmp_block to Bad */
 774                         ir_graph *irg = get_irn_irg(block);
 775                         exchange(jmp_block, new_r_Bad(irg, mode_BB));
 776                         exchange(jmp, new_r_Bad(irg, mode_X));
 777                         /* let the outer loop walk over the new predecessors as well */
 778                         n_preds += n_jpreds;
 779                         env->changed = true;
 780                         // TODO What if jmp_block had a KeepAlive edge?
 781                 } else {
 782                         /* This would involve Phis ... */
 783                 }
 784         }
 785 }
 786
 787 /*
 788  * All cfg optimizations, which do not touch Phi nodes.
 789  *
 790  * Note that this might create critical edges.
 791  */
 792 static void cfgopt_ignoring_phis(ir_graph *irg)
 793 {
 794         skip_env env;
 795
 796         env.changed = true;
 797         ir_nodehashmap_init(&env.block_infos);
 798
 799         while (env.changed) {
 800                 irg_walk_graph(irg, compute_block_info, NULL, &env.block_infos);
 801                 env.changed = false;
 802
 803                 /* Remove blocks, which only consist of a Jmp */
 804                 irg_block_walk_graph(irg, remove_empty_blocks, NULL, &env);
 805
 806                 /* Optimize Cond->Jmp, where then- and else-block are the same. */
 807                 irg_block_walk_graph(irg, NULL, optimize_ifs, &env);
 808
 809                 if (env.changed) {
 810                         clear_irg_state(irg, IR_GRAPH_STATE_CONSISTENT_DOMINANCE);
 811                         /* clear block info, because it must be recomputed */
 812                         irg_block_walk_graph(irg, clear_block_info, NULL, &env.block_infos);
 813                         /* Removing blocks and Conds might enable more optimizations */
 814                         continue;
 815                 } else {
 816                         break;
 817                 }
 818         }
 819
 820         ir_nodehashmap_destroy(&env.block_infos);
 821 }
 822
 823 /* Optimizations of the control flow that also require changes of Phi nodes.  */
 824 static ir_graph_state_t do_cfopt(ir_graph *irg)
 825 {
 826         int i, j, n;
 827         ir_node **in = NULL;
 828         ir_node *end = get_irg_end(irg);
 829         ir_node *new_end;
 830         merge_env env;
 831
 832         env.changed    = false;
 833         env.phis_moved = false;
 834
 835         assert(get_irg_phase_state(irg) != phase_building);
 836
 837         /* if the graph is not pinned, we cannot determine empty blocks */
 838         assert(get_irg_pinned(irg) != op_pin_state_floats &&
 839                "Control flow optimization need a pinned graph");
 840
 841         edges_deactivate(irg);
 842
 843         /* First the "simple" optimizations, which do not touch Phis */
 844         cfgopt_ignoring_phis(irg);
 845
 846         /* we use the mark flag to mark removable blocks */
 847         ir_reserve_resources(irg, IR_RESOURCE_BLOCK_MARK | IR_RESOURCE_IRN_LINK
 848                              | IR_RESOURCE_PHI_LIST);
 849
 850         /* The switch Cond optimization might expose unreachable code, so we loop */
 851         for (;;) {
 852                 bool changed = false;
 853
 854                 assure_doms(irg);
 855
 856                 /*
 857                  * This pass collects all Phi nodes in a link list in the block
 858                  * nodes.  Further it performs simple control flow optimizations.
 859                  * Finally it marks all blocks that do not contain useful
 860                  * computations, i.e., these blocks might be removed.
 861                  */
 862                 irg_walk(end, clear_link_and_mark_blocks_removable, collect_nodes, NULL);
 863
 864                 if (!changed)
 865                         break;
 866
 867                 clear_irg_state(irg, IR_GRAPH_STATE_CONSISTENT_DOMINANCE
 868                                    | IR_GRAPH_STATE_VALID_EXTENDED_BLOCKS
 869                                    | IR_GRAPH_STATE_CONSISTENT_ENTITY_USAGE);
 870         }
 871
 872         /* assert due to collect_nodes:
 873          * 1. removable blocks are now marked as such
 874          * 2. phi lists are up to date
 875          */
 876
 877         /* Optimize the standard code.
 878          * It walks only over block nodes and adapts these and the Phi nodes in these
 879          * blocks, which it finds in a linked list computed before.
 880          * */
 881         assure_doms(irg);
 882         irg_block_walk_graph(irg, optimize_blocks, merge_blocks, &env);
 883
 884         new_end = optimize_in_place(end);
 885         if (new_end != end) {
 886                 set_irg_end(irg, new_end);
 887                 end = new_end;
 888         }
 889         remove_End_Bads_and_doublets(end);
 890
 891         ir_free_resources(irg, IR_RESOURCE_BLOCK_MARK | IR_RESOURCE_IRN_LINK
 892                           | IR_RESOURCE_PHI_LIST);
 893
 894         if (env.phis_moved) {
 895                 /* Bad: when we moved Phi's, we might produce dead Phi nodes
 896                    that are kept-alive.
 897                    Some other phases cannot copy with this, so kill them.
 898                  */
 899                 n = get_End_n_keepalives(end);
 900                 if (n > 0) {
 901                         NEW_ARR_A(ir_node *, in, n);
 902                         assure_irg_outs(irg);
 903
 904                         for (i = j = 0; i < n; ++i) {
 905                                 ir_node *ka = get_End_keepalive(end, i);
 906
 907                                 if (is_Phi(ka)) {
 908                                         int k;
 909
 910                                         for (k = get_irn_n_outs(ka) - 1; k >= 0; --k) {
 911                                                 ir_node *user = get_irn_out(ka, k);
 912
 913                                                 if (user != ka && user != end) {
 914                                                         /* Is it a real user or just a self loop ? */
 915                                                         break;
 916                                                 }
 917                                         }
 918                                         if (k >= 0)
 919                                                 in[j++] = ka;
 920                                 } else
 921                                         in[j++] = ka;
 922                         }
 923                         if (j != n) {
 924                                 set_End_keepalives(end, j, in);
 925                                 env.changed = true;
 926                         }
 927                 }
 928         }
 929
 930         return 0;
 931 }
 932
 933 static optdesc_t opt_cf = {
 934         "control-flow",
 935         IR_GRAPH_STATE_NO_UNREACHABLE_CODE,
 936         do_cfopt,
 937 };
 938
 939 void optimize_cf(ir_graph *irg)
 940 {
 941         perform_irg_optimization(irg, &opt_cf);
 942 }
 943
 944 /* Creates an ir_graph pass for optimize_cf. */
 945 ir_graph_pass_t *optimize_cf_pass(const char *name)
 946 {
 947         return def_graph_pass(name ? name : "optimize_cf", optimize_cf);
 948 }