Use [gs]et_irn_n() instead of direct access to the in array

[libfirm] / ir / opt / cfopt.c
diff --git a/ir/opt/cfopt.c b/ir/opt/cfopt.c

index 80fdee5..8c78879 100644 (file)
--- a/ir/opt/cfopt.c
+++ b/ir/opt/cfopt.c
@@ -13,6 +13,13 @@
  # include "config.h"
  #endif
  
+#ifdef HAVE_MALLOC_H
+# include <malloc.h>
+#endif
+#ifdef HAVE_ALLOCA_H
+# include <alloca.h>
+#endif
+
  #include <assert.h>
  
  #include "xmalloc.h"
@@ -36,6 +43,7 @@
  #include "firmstat.h"
  
  #include "cfopt.h"
+#include "iropt_dbg.h"
  
  /*------------------------------------------------------------------*/
  /* Control flow optimization.                                       */
@@ -46,6 +54,54 @@
  /* semantics of Phi nodes.                                          */
  /*------------------------------------------------------------------*/
  
+/**
+ * Replace binary Conds that jumps twice into the same block
+ * by a simple Jmp.
+ * E.g.
+ * @verbatim
+ *               Cond                     Jmp  Bad
+ *             /       \                   |   /
+ *       ProjX True   ProjX False  ==>     |  /
+ *             \       /                   | /
+ *               Block                    Block
+ * @endverbatim
+ *
+ * Such pattern are the result of if-conversion.
+ *
+ * Note that the simple case that Block has only these two
+ * predecessors are already handled in equivalent_node_Block().
+ */
+static void remove_senseless_conds(ir_node *bl, void *data)
+{
+  int i, j;
+  int n = get_Block_n_cfgpreds(bl);
+
+  assert(is_Block(bl));
+
+  for (i = 0; i < n; ++i) {
+    ir_node *pred_i = get_Block_cfgpred(bl, i);
+    ir_node *cond_i = skip_Proj(pred_i);
+
+    for (j = i + 1; j < n; ++j) {
+      ir_node *pred_j = get_Block_cfgpred(bl, j);
+      ir_node *cond_j = skip_Proj(pred_j);
+
+      if (cond_j == cond_i
+          && get_irn_op(cond_i) == op_Cond
+          && get_irn_mode(get_Cond_selector(cond_i)) == mode_b) {
+
+        ir_node *jmp = new_r_Jmp(current_ir_graph, get_nodes_block(cond_i));
+        set_irn_n(bl, i, jmp);
+        set_irn_n(bl, j, new_Bad());
+
+        DBG_OPT_IFSIM2(cond_i, jmp);
+        break;
+      }
+    }
+  }
+}
+
+
  /**
   * Removes Tuples from Block control flow predecessors.
   * Optimizes blocks with equivalent_node().  This is tricky,
@@ -71,18 +127,18 @@ static void merge_blocks(ir_node *n, void *env) {
  
      /* see below */
      new_block = equivalent_node(n);
-    if (new_block != n && ! is_Bad(new_block))
+    if (new_block != n && ! is_Block_dead(new_block))
        exchange (n, new_block);
  
    } else if (get_opt_optimize() && (get_irn_mode(n) == mode_X)) {
      /* We will soon visit a block.  Optimize it before visiting! */
      ir_node *b = get_nodes_block(skip_Proj(n));
  
-    if (!is_Bad(b)) {
+    if (!is_Block_dead(b)) {
        new_block = equivalent_node(b);
  
-      while (irn_not_visited(b) && (!is_Bad(new_block)) && (new_block != b)) {
-        /* We would have to run gigo if new is bad, so we
+      while (irn_not_visited(b) && (!is_Block_dead(new_block)) && (new_block != b)) {
+        /* We would have to run gigo() if new is bad, so we
             promote it directly below. Nevertheless, we sometimes reach a block
             the first time through a dataflow node.  In this case we optimized the
             block as such and have to promote the Bad here. */
@@ -97,8 +153,8 @@ static void merge_blocks(ir_node *n, void *env) {
        /* normally, we would create a Bad block here, but this must be
         * prevented, so just set it's cf to Bad.
         */
-      if (is_Bad(new_block))
-       exchange(n, new_Bad());
+      if (is_Block_dead(new_block))
+        exchange(n, new_Bad());
      }
    }
  }
@@ -106,8 +162,8 @@ static void merge_blocks(ir_node *n, void *env) {
  /**
   * Remove cf from dead block by inspecting dominance info
   * Do not replace blocks by Bad.  This optimization shall
- * ensure, that all Bad cfg preds are removed, and no new
- * other Bads are introduced.
+ * ensure, that all Bad control flow predecessors are
+ * removed, and no new other Bads are introduced.
   *
   * Must be run in the post walker.
   */
@@ -122,7 +178,7 @@ static void remove_dead_block_cf(ir_node *block, void *env)
      if (! is_Bad(pred_X)) {
        ir_node *pred_bl = get_nodes_block(skip_Proj(pred_X));
  
-      if (is_Bad(pred_bl) || (get_Block_dom_depth(pred_bl) == -1))
+      if (is_Block_dead(pred_bl) || (get_Block_dom_depth(pred_bl) < 0))
          exchange (pred_X, new_Bad());
      }
    }
@@ -162,7 +218,7 @@ static int is_pred_of(ir_node *pred, ir_node *b) {
  }
  
  
-/** Test wether we can optimize away pred block pos of b.
+/** Test whether we can optimize away pred block pos of b.
   *
   *  @param  b    A block node.
   *  @param  pos  The position of the predecessor block to judge about.
@@ -172,30 +228,29 @@ static int is_pred_of(ir_node *pred, ir_node *b) {
   *  The test is rather tricky.
   *
   *  The situation is something like the following:
- *
+ *  @verbatim
   *                 if-block
   *                  /   \
   *              then-b  else-b
   *                  \   /
   *                    b
+ *  @endverbatim
   *
- *     b merges the control flow of an if-then-else.  We may not remove
- *     the 'then' _and_ the 'else' block of an 'if' if there is a Phi
- *     node in b, even if both are empty.  The destruction of this Phi
- *     requires that a copy is added before the merge.  We have to
- *     keep one of the case blocks to place the copies in.
+ *  b merges the control flow of an if-then-else.  We may not remove
+ *  the 'then' _and_ the 'else' block of an 'if' if there is a Phi
+ *  node in b, even if both are empty.  The destruction of this Phi
+ *  requires that a copy is added before the merge.  We have to
+ *  keep one of the case blocks to place the copies in.
   *
- *     To perform the test for pos, we must regard preds before pos
- *     as already removed.
+ *  To perform the test for pos, we must regard predecessors before pos
+ *  as already removed.
   **/
  static int test_whether_dispensable(ir_node *b, int pos) {
    int i, j, n_preds = 1;
-  int dispensable = 1;
-  ir_node *cfop = get_Block_cfgpred(b, pos);
-  ir_node *pred = get_nodes_block(cfop);
+  ir_node *pred = get_Block_cfgpred_block(b, pos);
  
    /* Bad blocks will be optimized away, so we don't need space for them */
-  if (is_Bad(pred))
+  if (is_Block_dead(pred))
      return 0;
  
    if (get_Block_block_visited(pred) + 1
@@ -213,38 +268,49 @@ static int test_whether_dispensable(ir_node *b, int pos) {
        n_preds = get_Block_n_cfgpreds(pred);
      } else {
        /* b's pred blocks and pred's pred blocks must be pairwise disjunct.
-         Work preds < pos as if they were already removed. */
+         Handle all pred blocks with preds < pos as if they were already removed. */
        for (i = 0; i < pos; i++) {
-        ir_node *b_pred = get_nodes_block(get_Block_cfgpred(b, i));
-        if (get_Block_block_visited(b_pred) + 1
+        ir_node *b_pred = get_Block_cfgpred_block(b, i);
+        if (! is_Block_dead(b_pred) &&
+            get_Block_block_visited(b_pred) + 1
              < get_irg_block_visited(current_ir_graph)) {
            for (j = 0; j < get_Block_n_cfgpreds(b_pred); j++) {
-            ir_node *b_pred_pred = get_nodes_block(get_Block_cfgpred(b_pred, j));
-            if (is_pred_of(b_pred_pred, pred)) dispensable = 0;
+            ir_node *b_pred_pred = get_Block_cfgpred_block(b_pred, j);
+            if (is_pred_of(b_pred_pred, pred))
+              goto non_dispensable;
            }
          } else {
-          if (is_pred_of(b_pred, pred)) dispensable = 0;
+          if (is_pred_of(b_pred, pred))
+            goto non_dispensable;
          }
        }
        for (i = pos +1; i < get_Block_n_cfgpreds(b); i++) {
-        ir_node *b_pred = get_nodes_block(get_Block_cfgpred(b, i));
-        if (is_pred_of(b_pred, pred)) dispensable = 0;
-      }
-      if (!dispensable) {
-        set_Block_block_visited(pred, get_irg_block_visited(current_ir_graph)-1);
-        n_preds = 1;
-      } else {
-        n_preds = get_Block_n_cfgpreds(pred);
+        ir_node *b_pred = get_Block_cfgpred_block(b, i);
+        if (is_pred_of(b_pred, pred))
+          goto non_dispensable;
        }
+      /* if we get here, the block is dispensable */
+      n_preds = get_Block_n_cfgpreds(pred);
      }
    }
  
    return n_preds;
+
+non_dispensable:
+  set_Block_block_visited(pred, get_irg_block_visited(current_ir_graph)-1);
+  return 1;
  }
  
+/**
+ * Store to defer the exchanged of Phi nodes.
+ */
+typedef struct _defer_ex_phi {
+  ir_node *phi_pred;    /**< the previous Phi node that will be replaced */
+  ir_node *phi;         /**< the new Phi node that replaces phi_pred */
+} defer_ex_phi;
  
  /**
- * This method removed Bad cf preds from Blocks and Phis, and removes
+ * This method removed Bad cf predecessors from Blocks and Phis, and removes
   * empty blocks.  A block is empty if it only contains Phi and Jmp nodes.
   *
   * We first adapt Phi nodes, then Block nodes, as we need the old ins
@@ -253,34 +319,36 @@ static int test_whether_dispensable(ir_node *b, int pos) {
   * for all nodes, not regarding whether there is a possibility for optimization.
   *
   * For each predecessor p of a Block b there are three cases:
- *  1. The predecessor p is a Bad node:  just skip it.  The in array of b shrinks by one.
- *  2. The predecessor p is empty.  Remove p.  All predecessors of p are now
- *     predecessors of b.
- *  3. The predecessor p is a block containing useful code.  Just keep p as is.
+ *  -1. The predecessor p is a Bad node:  just skip it.  The in array of b shrinks by one.
+ *  -2. The predecessor p is empty.  Remove p.  All predecessors of p are now
+ *      predecessors of b.
+ *  -3. The predecessor p is a block containing useful code.  Just keep p as is.
   *
   * For Phi nodes f we have to check the conditions at the Block of f.
   * For cases 1 and 3 we proceed as for Blocks.  For case 2 we can have two
   * cases:
- *  2a: The old precessor of the Phi f is a Phi pred_f IN THE BLOCK REMOVED.  In this
+ *  -2a: The old predecessor of the Phi f is a Phi pred_f IN THE BLOCK REMOVED.  In this
   *      case we proceed as for blocks. We remove pred_f.  All
   *      predecessors of pred_f now are predecessors of f.
- *  2b: The old predecessor of f is NOT in the block removed. It might be a Phi, too.
+ *  -2b: The old predecessor of f is NOT in the block removed. It might be a Phi, too.
   *      We have to replicate f for each predecessor of the removed block. Or, with
   *      other words, the removed predecessor block has exactly one predecessor.
   *
   * Further there is a special case for self referencing blocks:
+ * @verbatim
   *
   *    then_b     else_b                              then_b  else_b
   *       \      /                                      \      /
   *        \    /                                        |    /
   *        pred_b                                        |   /
- *         |   ____                                     |  /
+ *         |   ____                                     |  /  ____
   *         |  |    |                                    |  | |    |
   *         |  |    |       === optimized to ===>        \  | |    |
   *        loop_b   |                                     loop_b   |
   *         |  |    |                                      |  |    |
   *         |  |____|                                      |  |____|
   *         |                                              |
+ * @endverbatim
   *
   * If there is a Phi in pred_b, but we remove pred_b, we have to generate a
   * Phi in loop_b, that has the ins of the Phi in pred_b and a self referencing
@@ -292,6 +360,7 @@ static void optimize_blocks(ir_node *b, void *env) {
    int i, j, k, n, max_preds, n_preds, p_preds;
    ir_node *pred, *phi;
    ir_node **in;
+  defer_ex_phi *defers;
  
    /* Count the number of predecessor if this block is merged with pred blocks
       that are empty. */
@@ -301,6 +370,8 @@ static void optimize_blocks(ir_node *b, void *env) {
    }
    in = xmalloc(max_preds * sizeof(*in));
  
+  defers = NEW_ARR_F(defer_ex_phi, 0);
+
  /*-
    printf(" working on "); DDMN(b);
    for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
@@ -321,7 +392,7 @@ static void optimize_blocks(ir_node *b, void *env) {
      /* Find the new predecessors for the Phi */
      p_preds = 0;
      for (i = 0, n = get_Block_n_cfgpreds(b); i < n; ++i) {
-      pred = get_nodes_block(get_Block_cfgpred(b, i));
+      pred = get_Block_cfgpred_block(b, i);
  
        if (is_Bad(get_Block_cfgpred(b, i))) {
          /* case Phi 1: Do nothing */
@@ -351,16 +422,32 @@ static void optimize_blocks(ir_node *b, void *env) {
  
             Somehow the removed Phi node can be used legally in loops.
             Therefore we replace the old phi by the new one.
+           This must be done _AFTER_ all Phis are optimized, or
+           it will fail if two Phis use the same pred_Phi.
+
+           FIXME: Is the following true? We ALWAYS replace it by the new one.
  
             Further we have to remove the old Phi node by replacing it
-           by Bad.  Else it will remain in the keepalive array of End
+           by Bad.  Else it will remain in the keep alive array of End
             and cause illegal situations.  So if there is no loop, we should
             replace it by Bad.
          */
          if (get_nodes_block(phi_pred) == pred) {
+          int i;
            /* remove the Phi as it might be kept alive. Further there
               might be other users. */
-          exchange(phi_pred, phi);  /* geht, ist aber doch semantisch falsch! Warum?? */
+          for (i = ARR_LEN(defers) - 1; i >= 0; --i)
+            if (defers[i].phi_pred == phi_pred)
+              break;
+
+          if (i < 0) {
+              /* we have a new replacement */
+            defer_ex_phi elem;
+
+            elem.phi_pred = phi_pred;
+            elem.phi      = phi;
+            ARR_APP1(defer_ex_phi, defers, elem);
+          }
          }
        } else {
          /* case Phi 3: */
@@ -379,6 +466,12 @@ static void optimize_blocks(ir_node *b, void *env) {
      phi = get_irn_link(phi);
    }
  
+  /* now, exchange all Phis */
+  for (i = ARR_LEN(defers) - 1; i >= 0; --i) {
+    exchange(defers[i].phi_pred, defers[i].phi);
+  }
+  DEL_ARR_F(defers);
+
    /*- This happens only if merge between loop backedge and single loop entry.
        See special case above. -*/
    for (k = 0, n = get_Block_n_cfgpreds(b); k < n; ++k) {
@@ -468,7 +561,7 @@ static void optimize_blocks(ir_node *b, void *env) {
             < get_irg_block_visited(current_ir_graph)) {
        /* case 2: It's an empty block and not yet visited. */
        assert(get_Block_n_cfgpreds(b) > 1);
-                        /* Else it should be optimized by equivalent_node. */
+      /* Else it should be optimized by equivalent_node. */
        for (j = 0; j < get_Block_n_cfgpreds(pred); j++) {
          ir_node *pred_block = get_Block_cfgpred(pred, j);
  
@@ -511,39 +604,49 @@ static void optimize_blocks(ir_node *b, void *env) {
   * phase.
   * @@@ It would be better to add a struct in the link field
   * that keeps the Phi list and the mark.  Place it on an obstack, as
- * we will lose blocks and thereby generate mem leaks.
+ * we will lose blocks and thereby generate memory leaks.
   */
  void optimize_cf(ir_graph *irg) {
-  int i, n;
+  int i, j, n;
    ir_node **in;
    ir_node *end = get_irg_end(irg);
    ir_graph *rem = current_ir_graph;
    irg_dom_state dom_state = get_irg_dom_state(current_ir_graph);
    current_ir_graph = irg;
  
+  /* if the graph is not pinned, we cannot determine empty blocks */
+  assert(get_irg_pinned(irg) != op_pin_state_floats &&
+         "Control flow optimization need a pinned graph");
+
    /* Handle graph state */
    assert(get_irg_phase_state(irg) != phase_building);
-  if (get_irg_outs_state(current_ir_graph) == outs_consistent)
-    set_irg_outs_inconsistent(current_ir_graph);
-  if (get_irg_dom_state(current_ir_graph) == dom_consistent)
-    set_irg_dom_inconsistent(current_ir_graph);
+  set_irg_outs_inconsistent(current_ir_graph);
+  set_irg_extblk_inconsistent(current_ir_graph);
+  set_irg_loopinfo_inconsistent(current_ir_graph);
+  set_irg_doms_inconsistent(current_ir_graph);
  
    if (dom_state == dom_consistent && get_opt_optimize() && get_opt_unreachable_code()) {
      ir_node *end = get_irg_end(irg);
  
-    /* we have dominace info, we can kill dead block */
+    /* we have dominance info, we can kill dead block */
      irg_block_walk_graph(irg, NULL, remove_dead_block_cf, NULL);
  
      /* fix the keep-alives */
      for (i = 0, n = get_End_n_keepalives(end); i < n; ++i) {
        ir_node *ka = get_End_keepalive(end, i);
  
-      if (is_Block(ka) && (get_Block_dom_depth(ka) == -1))
-       set_End_keepalive(end, i, new_Bad());
-      if (is_Phi(ka) && (get_Block_dom_depth(get_nodes_block(ka)) == -1))
-       set_End_keepalive(end, i, new_Bad());
+      if (is_Block(ka)) {
+        /* do NOT keep  dead blocks */
+        if (get_Block_dom_depth(ka) == -1)
+          set_End_keepalive(end, i, new_Bad());
+      }
+      else if (is_Block_dead(get_nodes_block(ka)) ||
+               get_Block_dom_depth(get_nodes_block(ka)) == -1)
+        /* do NOT keep nodes in dead blocks */
+        set_End_keepalive(end, i, new_Bad());
      }
    }
+  irg_block_walk_graph(current_ir_graph, NULL, remove_senseless_conds, NULL);
  
    /* Use block visited flag to mark non-empty blocks. */
    inc_irg_block_visited(irg);
@@ -554,30 +657,39 @@ void optimize_cf(ir_graph *irg) {
  
    /* Walk all keep alives, optimize them if block, add to new in-array
       for end if useful. */
-  in = NEW_ARR_F (ir_node *, 1);
-  in[0] = get_nodes_block(end);
+  n  = get_End_n_keepalives(end);
+  if (n > 0)
+    NEW_ARR_A (ir_node *, in, n);
    inc_irg_visited(current_ir_graph);
  
-  for (i = 0; i < get_End_n_keepalives(end); i++) {
+  /* fix the keep alive */
+  for (i = j = 0; i < n; i++) {
      ir_node *ka = get_End_keepalive(end, i);
  
      if (irn_not_visited(ka)) {
-      if ((get_irn_op(ka) == op_Block) && Block_not_block_visited(ka)) {
+      ir_op *op = get_irn_op(ka);
+
+      if ((op == op_Block) && Block_not_block_visited(ka)) {
          set_irg_block_visited(current_ir_graph,  /* Don't walk all the way to Start. */
                get_irg_block_visited(current_ir_graph)-1);
          irg_block_walk(ka, optimize_blocks, NULL, NULL);
          mark_irn_visited(ka);
-        ARR_APP1 (ir_node *, in, ka);
-      } else if (get_irn_op(ka) == op_Phi) {
+        in[j++] = ka;
+      } else if (op == op_Phi) {
+        mark_irn_visited(ka);
+        if (! is_Block_dead(get_nodes_block(ka)))
+          in[j++] = ka;
+      } else if (is_op_keep(op)) {
          mark_irn_visited(ka);
-        ARR_APP1 (ir_node *, in, ka);
+        if (! is_Block_dead(get_nodes_block(ka)))
+          in[j++] = ka;
        }
      }
    }
-  /* DEL_ARR_F(end->in);   GL @@@ tut nicht ! */
-  end->in = in;
+  if (j != n)
+    set_End_keepalives(end, j, in);
  
-  /* the verifyer doesn't work yet with floating nodes */
+  /* the verifier doesn't work yet with floating nodes */
    if (get_irg_pinned(irg) == op_pin_state_pinned) {
      /* after optimize_cf(), only Bad data flow may remain. */
      if (irg_vrfy_bads(irg, BAD_DF | BAD_BLOCK | TUPLE)) {