nsz Git - libfirm/blob - ir/opt/cfopt.c

   1 /*
   2  * Project:     libFIRM
   3  * File name:   ir/opt/cfopt.c
   4  * Purpose:     control flow optimizations
   5  * Author:
   6  * Created:
   7  * CVS-ID:      $Id$
   8  * Copyright:   (c) 1998-2004 Universität Karlsruhe
   9  * Licence:     This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
  10  */
  11
  12 #ifdef HAVE_CONFIG_H
  13 # include <config.h>
  14 #endif
  15
  16 #include <assert.h>
  17
  18 #include "irnode_t.h"
  19 #include "irgraph_t.h"
  20 #include "irprog_t.h"
  21
  22 #include "ircons.h"
  23 #include "iropt_t.h"
  24 #include "irgwalk.h"
  25 #include "irgmod.h"
  26 #include "irdump.h"
  27 #include "irvrfy.h"
  28
  29 #include "array.h"
  30
  31 #include "irouts.h"
  32 #include "irbackedge_t.h"
  33
  34 #include "irflag_t.h"
  35 #include "firmstat.h"
  36
  37 #include "cfopt.h"
  38
  39 /*------------------------------------------------------------------*/
  40 /* Control flow optimization.                                       */
  41 /*                                                                  */
  42 /* Removes Bad control flow predecessors and empty blocks.  A block */
  43 /* is empty if it contains only a Jmp node.                         */
  44 /* Blocks can only be removed if they are not needed for the        */
  45 /* semantics of Phi nodes.                                          */
  46 /*------------------------------------------------------------------*/
  47
  48 /**
  49  * Removes Tuples from Block control flow predecessors.
  50  * Optimizes blocks with equivalent_node().  This is tricky,
  51  * as we want to avoid nodes that have as block predecessor Bads.
  52  * Therefore we also optimize at control flow operations, depending
  53  * how we first reach the Block.
  54  */
  55 static void merge_blocks(ir_node *n, void *env) {
  56   int i;
  57   ir_node *new_block;
  58
  59   set_irn_link(n, NULL);
  60
  61   if (get_irn_op(n) == op_Block) {
  62     /* Remove Tuples */
  63     for (i = 0; i < get_Block_n_cfgpreds(n); i++) {
  64       /* GL @@@ : is this possible? if (get_opt_normalize()) -- added, all tests go through.
  65          A different order of optimizations might cause problems. */
  66       if (get_opt_normalize())
  67         set_Block_cfgpred(n, i, skip_Tuple(get_Block_cfgpred(n, i)));
  68     }
  69     new_block = equivalent_node(n);
  70     if (new_block != n)
  71       exchange (n, new_block);
  72
  73   } else if (get_opt_optimize() && (get_irn_mode(n) == mode_X)) {
  74     /* We will soon visit a block.  Optimize it before visiting! */
  75     ir_node *b        = get_nodes_block(n);
  76
  77     if (!is_Bad(b)) {
  78       new_block = equivalent_node(b);
  79
  80       while (irn_not_visited(b) && (!is_Bad(new_block)) && (new_block != b)) {
  81         /* We would have to run gigo if new is bad, so we
  82            promote it directly below. Nevertheless, we somtimes reach a block
  83            the first time through a dataflow node.  In this case we optimized the
  84            block as such and have to promote the Bad here. */
  85         assert(((b == new_block) ||
  86             get_opt_control_flow_straightening() ||
  87             get_opt_control_flow_weak_simplification()) &&
  88            ("strange flag setting"));
  89         exchange (b, new_block);
  90         b = new_block;
  91         new_block = equivalent_node(b);
  92       }
  93       b = new_block;
  94     }
  95
  96     /*
  97      * BEWARE: do not kill floating notes here as they might be needed in
  98      * valid blocks because of global CSE.
  99      */
 100     if (is_Bad(b) && get_opt_normalize() &&
 101         get_op_pinned(get_irn_op(n)) == op_pin_state_pinned)
 102       exchange(n, new_Bad());
 103   }
 104 }
 105
 106 /**
 107  * Collects all Phi nodes in link list of Block.
 108  * Marks all blocks "block_visited" if they contain a node other
 109  * than Jmp.
 110  * Replaces n by Bad if n is unreachable control flow. We do that
 111  * in the post walker, so we catch all blocks.
 112  */
 113 static void collect_nodes(ir_node *n, void *env) {
 114   irg_dom_state *dom_state = env;
 115
 116   if (is_no_Block(n)) {
 117     ir_node *b = get_nodes_block(n);
 118
 119     /*
 120      * BEWARE: do not kill floating notes here as they might be needed in
 121      * valid blocks because of global CSE.
 122      */
 123     if (is_Bad(b) &&
 124         get_op_pinned(get_irn_op(n)) == op_pin_state_pinned) {
 125       /* previous merge_blocks() may have killed dead blocks */
 126       exchange(n, new_Bad());
 127     }
 128     else if ((get_irn_op(n) == op_Phi)) {
 129       /* Collect Phi nodes to compact ins along with block's ins. */
 130       set_irn_link(n, get_irn_link(b));
 131       set_irn_link(b, n);
 132     } else if ((get_irn_op(n) != op_Jmp) && !is_Bad(b)) {  /* Check for non empty block. */
 133       mark_Block_block_visited(b);
 134     }
 135   }
 136   else {
 137     /* delete dead blocks: if we have dominator information, this can easily be detected
 138      * BEWARE: don't kill the end block */
 139     if (*dom_state == dom_consistent &&
 140         n != get_irg_end_block(current_ir_graph) &&
 141         get_Block_dom_depth(n) == -1 &&
 142         get_opt_unreachable_code()) {
 143       exchange (n, new_Bad());
 144     }
 145   }
 146 }
 147
 148 /** Returns true if pred is predecessor of block. */
 149 static int is_pred_of(ir_node *pred, ir_node *b) {
 150   int i;
 151   for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
 152     ir_node *b_pred = get_nodes_block(get_Block_cfgpred(b, i));
 153     if (b_pred == pred) return 1;
 154   }
 155   return 0;
 156 }
 157
 158
 159 /** Test wether we can optimize away pred block pos of b.
 160  *
 161  *  @param  b    A block node.
 162  *  @param  pos  The position of the predecessor block to judge about.
 163  *
 164  *  @returns     The number of predecessors
 165  *
 166  *  The test is rather tricky.
 167  *
 168  *  The situation is something like the following:
 169  *
 170  *                 if-block
 171  *                  /   \
 172  *              then-b  else-b
 173  *                  \   /
 174  *                    b
 175  *
 176  *     b merges the control flow of an if-then-else.  We may not remove
 177  *     the 'then' _and_ the 'else' block of an 'if' if there is a Phi
 178  *     node in b, even if both are empty.  The destruction of this Phi
 179  *     requires that a copy is added before the merge.  We have to
 180  *     keep one of the case blocks to place the copies in.
 181  *
 182  *     To perform the test for pos, we must regard preds before pos
 183  *     as already removed.
 184  **/
 185 static int test_whether_dispensable(ir_node *b, int pos) {
 186   int i, j, n_preds = 1;
 187   int dispensable = 1;
 188   ir_node *cfop = get_Block_cfgpred(b, pos);
 189   ir_node *pred = get_nodes_block(cfop);
 190
 191   if (get_Block_block_visited(pred) + 1
 192       < get_irg_block_visited(current_ir_graph)) {
 193
 194     if (!get_opt_optimize() || !get_opt_control_flow_strong_simplification()) {
 195       /* Mark block so that is will not be removed: optimization is turned off. */
 196       set_Block_block_visited(pred, get_irg_block_visited(current_ir_graph)-1);
 197       return 1;
 198     }
 199
 200     /* Seems to be empty. At least we detected this in collect_nodes. */
 201     if (!get_irn_link(b)) {
 202       /* There are no Phi nodes ==> all predecessors are dispensable. */
 203       n_preds = get_Block_n_cfgpreds(pred);
 204     } else {
 205       /* b's pred blocks and pred's pred blocks must be pairwise disjunct.
 206          Work preds < pos as if they were already removed. */
 207       for (i = 0; i < pos; i++) {
 208         ir_node *b_pred = get_nodes_block(get_Block_cfgpred(b, i));
 209         if (get_Block_block_visited(b_pred) + 1
 210             < get_irg_block_visited(current_ir_graph)) {
 211           for (j = 0; j < get_Block_n_cfgpreds(b_pred); j++) {
 212             ir_node *b_pred_pred = get_nodes_block(get_Block_cfgpred(b_pred, j));
 213             if (is_pred_of(b_pred_pred, pred)) dispensable = 0;
 214           }
 215         } else {
 216           if (is_pred_of(b_pred, pred)) dispensable = 0;
 217         }
 218       }
 219       for (i = pos +1; i < get_Block_n_cfgpreds(b); i++) {
 220         ir_node *b_pred = get_nodes_block(get_Block_cfgpred(b, i));
 221         if (is_pred_of(b_pred, pred)) dispensable = 0;
 222       }
 223       if (!dispensable) {
 224         set_Block_block_visited(pred, get_irg_block_visited(current_ir_graph)-1);
 225         n_preds = 1;
 226       } else {
 227         n_preds = get_Block_n_cfgpreds(pred);
 228       }
 229     }
 230   }
 231
 232   return n_preds;
 233 }
 234
 235
 236 /* This method removed Bad cf preds from Blocks and Phis, and removes
 237  * empty blocks.  A block is empty if it only contains Phi and Jmp nodes.
 238  *
 239  * We first adapt Phi nodes, then Block nodes, as we need the old ins
 240  * of the Block to adapt the Phi nodes.  We do this by computing new
 241  * in arrays, and then replacing the old ones.  So far we compute new in arrays
 242  * for all nodes, not regarding whether there is a possibility for optimization.
 243  *
 244  * For each predecessor p of a Block b there are three cases:
 245  *  1. The predecessor p is a Bad node:  just skip it.  The in array of b shrinks by one.
 246  *  2. The predecessor p is empty.  Remove p.  All predecessors of p are now
 247  *     predecessors of b.
 248  *  3. The predecessor p is a block containing useful code.  Just keep p as is.
 249  *
 250  * For Phi nodes f we have to check the conditions at the Block of f.
 251  * For cases 1 and 3 we proceed as for Blocks.  For case 2 we can have two
 252  * cases:
 253  *  2a: The old precessor of the Phi f is a Phi pred_f IN THE BLOCK REMOVED.  In this
 254  *      case we proceed as for blocks. We remove pred_f.  All
 255  *      predecessors of pred_f now are predecessors of f.
 256  *  2b: The old predecessor of f is NOT in the block removed. It might be a Phi, too.
 257  *      We have to replicate f for each predecessor of the removed block. Or, with
 258  *      other words, the removed predecessor block has exactly one predecessor.
 259  *
 260  * Further there is a special case for self referencing blocks:
 261  *
 262  *    then_b     else_b                              then_b  else_b
 263  *       \      /                                      \      /
 264  *        \    /                                        |    /
 265  *        pred_b                                        |   /
 266  *         |   ____                                     |  /  ____
 267  *         |  |    |                                    |  | |    |
 268  *         |  |    |       === optimized to ===>        \  | |    |
 269  *        loop_b   |                                     loop_b   |
 270  *         |  |    |                                      |  |    |
 271  *         |  |____|                                      |  |____|
 272  *         |                                              |
 273  *
 274  * If there is a Phi in pred_b, but we remove pred_b, we have to generate a
 275  * Phi in loop_b, that has the ins of the Phi in pred_b and a self referencing
 276  * backedge.
 277  * @@@ It is negotiable whether we should do this ... there might end up a copy
 278  * from the Phi in the loop when removing the Phis.
 279  */
 280 static void optimize_blocks(ir_node *b, void *env) {
 281   int i, j, k, max_preds, n_preds;
 282   ir_node *pred, *phi;
 283   ir_node **in;
 284
 285   /* Count the number of predecessor if this block is merged with pred blocks
 286      that are empty. */
 287   max_preds = 0;
 288   for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
 289     max_preds += test_whether_dispensable(b, i);
 290   }
 291   in = (ir_node **) malloc(max_preds * sizeof(ir_node *));
 292
 293 /*-
 294   printf(" working on "); DDMN(b);
 295   for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
 296     pred = get_nodes_block(get_Block_cfgpred(b, i));
 297     if (is_Bad(get_Block_cfgpred(b, i))) {
 298       printf("  removing Bad %i\n ", i);
 299     } else if (get_Block_block_visited(pred) +1
 300            < get_irg_block_visited(current_ir_graph)) {
 301       printf("  removing pred %i ", i); DDMN(pred);
 302     } else { printf("  Nothing to do for "); DDMN(pred); }
 303   }
 304   * end Debug output -*/
 305
 306   /*- Fix the Phi nodes -*/
 307   phi = get_irn_link(b);
 308   while (phi) {
 309     assert(get_irn_op(phi) == op_Phi);
 310     /* Find the new predecessors for the Phi */
 311     n_preds = 0;
 312     for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
 313       pred = get_nodes_block(get_Block_cfgpred(b, i));
 314       if (is_Bad(get_Block_cfgpred(b, i))) {
 315         /* case Phi 1: Do nothing */
 316       } else if (get_Block_block_visited(pred) + 1
 317                  < get_irg_block_visited(current_ir_graph)) {
 318         /* case Phi 2: It's an empty block and not yet visited. */
 319         ir_node *phi_pred = get_Phi_pred(phi, i);
 320
 321         for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
 322           if (get_nodes_block(phi_pred) == pred) {
 323             /* case Phi 2a: */
 324             assert(get_irn_op(phi_pred) == op_Phi);  /* Block is empty!! */
 325             in[n_preds] = get_Phi_pred(phi_pred, j);
 326           } else {
 327             /* case Phi 2b: */
 328             in[n_preds] = phi_pred;
 329           }
 330           n_preds++;
 331         }
 332
 333         /* The Phi_pred node is replaced now if it is a Phi.
 334
 335            Somehow the removed Phi node can be used legally in loops.
 336            Therefore we replace the old phi by the new one.
 337
 338            Further we have to remove the old Phi node by replacing it
 339            by Bad.  Else it will remain in the keepalive array of End
 340            and cause illegal situations.  So if there is no loop, we should
 341            replace it by Bad.
 342         */
 343         if (get_nodes_block(phi_pred) == pred) {
 344           /* remove the Phi as it might be kept alive. Further there
 345              might be other users. */
 346           exchange(phi_pred, phi);  /* geht, ist aber doch semantisch falsch! Warum?? */
 347         }
 348
 349       } else {
 350         /* case Phi 3: */
 351         in[n_preds] = get_Phi_pred(phi, i);
 352         n_preds ++;
 353       }
 354     }
 355
 356     /* Fix the node */
 357     if (n_preds == 1)
 358       /* By removal of Bad ins the Phi might be degenerated. */
 359       exchange(phi, in[0]);
 360     else
 361       set_irn_in(phi, n_preds, in);
 362
 363     phi = get_irn_link(phi);
 364   }
 365
 366   /*- This happens only if merge between loop backedge and single loop entry.
 367       See special case above. -*/
 368   for (k = 0; k < get_Block_n_cfgpreds(b); k++) {
 369     pred = get_nodes_block(get_Block_cfgpred(b, k));
 370     if (get_Block_block_visited(pred)+1 < get_irg_block_visited(current_ir_graph)) {
 371       phi = get_irn_link(pred);
 372       while (phi) {
 373         if (get_irn_op(phi) == op_Phi) {
 374           set_nodes_block(phi, b);
 375
 376           n_preds = 0;
 377           for (i = 0; i < k; i++) {
 378             pred = get_nodes_block(get_Block_cfgpred(b, i));
 379             if (is_Bad(get_Block_cfgpred(b, i))) {
 380               /* Do nothing */
 381             } else if (get_Block_block_visited(pred) +1
 382                < get_irg_block_visited(current_ir_graph)) {
 383               /* It's an empty block and not yet visited. */
 384               for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
 385                 /* @@@ Hier brauche ich Schleifeninformation!!! Kontrollflusskante
 386                    muss Rueckwaertskante sein! (An allen vier in[n_preds] = phi
 387                    Anweisungen.) Trotzdem tuts bisher!! */
 388                 in[n_preds] = phi;
 389                 n_preds++;
 390               }
 391             } else {
 392               in[n_preds] = phi;
 393               n_preds++;
 394             }
 395           }
 396           for (i = 0; i < get_Phi_n_preds(phi); i++) {
 397             in[n_preds] = get_Phi_pred(phi, i);
 398             n_preds++;
 399           }
 400           for (i = k+1; i < get_Block_n_cfgpreds(b); i++) {
 401             pred = get_nodes_block(get_Block_cfgpred(b, i));
 402             if (is_Bad(get_Block_cfgpred(b, i))) {
 403               /* Do nothing */
 404             } else if (get_Block_block_visited(pred) +1
 405                < get_irg_block_visited(current_ir_graph)) {
 406               /* It's an empty block and not yet visited. */
 407               for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
 408                 in[n_preds] = phi;
 409                 n_preds++;
 410               }
 411             } else {
 412               in[n_preds] = phi;
 413               n_preds++;
 414             }
 415           }
 416           if (n_preds == 1)
 417             exchange(phi, in[0]);
 418           else
 419             set_irn_in(phi, n_preds, in);
 420         }
 421         phi = get_irn_link(phi);
 422       }
 423     }
 424   }
 425
 426   /*- Fix the block -*/
 427   n_preds = 0;
 428   for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
 429     pred = get_nodes_block(get_Block_cfgpred(b, i));
 430     if (is_Bad(get_Block_cfgpred(b, i))) {
 431       /* case 1: Do nothing */
 432     } else if (get_Block_block_visited(pred) +1
 433            < get_irg_block_visited(current_ir_graph)) {
 434       /* case 2: It's an empty block and not yet visited. */
 435       assert(get_Block_n_cfgpreds(b) > 1);
 436                         /* Else it should be optimized by equivalent_node. */
 437       for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
 438         in[n_preds] = get_Block_cfgpred(pred, j);
 439         n_preds++;
 440       }
 441       /* Remove block as it might be kept alive. */
 442       exchange(pred, b/*new_Bad()*/);
 443     } else {
 444       /* case 3: */
 445       in[n_preds] = get_Block_cfgpred(b, i);
 446       n_preds ++;
 447     }
 448   }
 449   set_irn_in(b, n_preds, in);
 450
 451   free(in);
 452 }
 453
 454
 455 /* Optimizations of the control flow that also reqire changes of Phi nodes.
 456  *
 457  * This optimization performs two passes over the graph.
 458  *
 459  * The first pass collects all Phi nodes in a link list in the block
 460  * nodes.  Further it performs simple control flow optimizations.
 461  * Finally it marks all blocks that do not contain useful
 462  * computations, i.e., these blocks might be removed.
 463  *
 464  * The second pass performs the optimizations intended by this algorithm.
 465  * It walks only over block nodes and adapts these and the Phi nodes in these blocks,
 466  * which it finds in a linked list computed by the first pass.
 467  *
 468  * We use the block_visited flag to mark empty blocks in the first
 469  * phase.
 470  * @@@ It would be better to add a struct in the link field
 471  * that keeps the Phi list and the mark.  Place it on an obstack, as
 472  * we will lose blocks and thereby generate mem leaks.
 473  */
 474 void optimize_cf(ir_graph *irg) {
 475   int i;
 476   ir_node **in;
 477   ir_node *end = get_irg_end(irg);
 478   ir_graph *rem = current_ir_graph;
 479   irg_dom_state dom_state = get_irg_dom_state(current_ir_graph);
 480   current_ir_graph = irg;
 481
 482   /* Handle graph state */
 483   assert(get_irg_phase_state(irg) != phase_building);
 484   if (get_irg_outs_state(current_ir_graph) == outs_consistent)
 485     set_irg_outs_inconsistent(current_ir_graph);
 486   if (get_irg_dom_state(current_ir_graph) == dom_consistent)
 487     set_irg_dom_inconsistent(current_ir_graph);
 488
 489   /* Use block visited flag to mark non-empty blocks. */
 490   inc_irg_block_visited(irg);
 491   irg_walk(end, merge_blocks, collect_nodes, &dom_state);
 492
 493   /* Optimize the standard code. */
 494   irg_block_walk(get_irg_end_block(irg), optimize_blocks, NULL, NULL);
 495
 496   /* Walk all keep alives, optimize them if block, add to new in-array
 497      for end if useful. */
 498   in = NEW_ARR_F (ir_node *, 1);
 499   in[0] = get_nodes_block(end);
 500   inc_irg_visited(current_ir_graph);
 501   for(i = 0; i < get_End_n_keepalives(end); i++) {
 502     ir_node *ka = get_End_keepalive(end, i);
 503     if (irn_not_visited(ka)) {
 504       if ((get_irn_op(ka) == op_Block) && Block_not_block_visited(ka)) {
 505         set_irg_block_visited(current_ir_graph,  /* Don't walk all the way to Start. */
 506               get_irg_block_visited(current_ir_graph)-1);
 507         irg_block_walk(ka, optimize_blocks, NULL, NULL);
 508         mark_irn_visited(ka);
 509         ARR_APP1 (ir_node *, in, ka);
 510       } else if (get_irn_op(ka) == op_Phi) {
 511         mark_irn_visited(ka);
 512         ARR_APP1 (ir_node *, in, ka);
 513       }
 514     }
 515   }
 516   /* DEL_ARR_F(end->in);   GL @@@ tut nicht ! */
 517   end->in = in;
 518
 519   /* after optimize_cf(), only Bad data flow may remain. */
 520   if (irg_vrfy_bads(irg, BAD_DF | BAD_BLOCK | TUPLE)) {
 521     dump_ir_block_graph(irg, "-vrfy-cf");
 522     dump_ir_graph(irg, "-vrfy-cf");
 523     fprintf(stderr, "VRFY_BAD in optimize_cf()\n");
 524   }
 525
 526   current_ir_graph = rem;
 527 }