new inlineing heuristic

[libfirm] / ir / ir / irgopt.c
diff --git a/ir/ir/irgopt.c b/ir/ir/irgopt.c

index f97e449..15cb664 100644 (file)
--- a/ir/ir/irgopt.c
+++ b/ir/ir/irgopt.c
@@ -1,18 +1,22 @@
-/* Coyright (C) 1998 - 2002 by Universitaet Karlsruhe
-** All rights reserved.
-**
-** Author: Christian Schaefer, Goetz Lindenmaier, Sebastian Felis
-**
-** Optimizations for a whole ir graph, i.e., a procedure.
-*/
+/*
+ * Project:     libFIRM
+ * File name:   ir/ir/irgopt.c
+ * Purpose:     Optimizations for a whole ir graph, i.e., a procedure.
+ * Author:      Christian Schaefer, Goetz Lindenmaier
+ * Modified by: Sebastian Felis
+ * Created:
+ * CVS-ID:      $Id$
+ * Copyright:   (c) 1998-2003 Universität Karlsruhe
+ * Licence:     This file protected by GPL -  GNU GENERAL PUBLIC LICENSE.
+ */
  
-/* $Id$ */
  
  #ifdef HAVE_CONFIG_H
  # include <config.h>
  #endif
  
  # include <assert.h>
+# include <stdbool.h>
  
  # include "irprog.h"
  # include "irgopt.h"
@@ -21,10 +25,10 @@
  # include "iropt_t.h"
  # include "irgwalk.h"
  # include "ircons.h"
-# include "misc.h"
  # include "irgmod.h"
  # include "array.h"
  # include "pset.h"
+# include "eset.h"
  # include "pdeq.h"       /* Fuer code placement */
  # include "irouts.h"
  # include "irloop.h"
@@ -43,13 +47,21 @@ static void init_link (ir_node *n, void *env) {
    set_irn_link(n, NULL);
  }
  
+#if 0   /* Old version. Avoids Ids.
+          This is not necessary:  we do a postwalk, and get_irn_n
+          removes ids anyways.  So it's much cheaper to call the
+          optimization less often and use the exchange() algorithm. */
  static void
  optimize_in_place_wrapper (ir_node *n, void *env) {
-  int i;
-  ir_node *optimized;
-
-  for (i = 0; i < get_irn_arity(n); i++) {
-    optimized = optimize_in_place_2(get_irn_n(n, i));
+  int i, irn_arity;
+  ir_node *optimized, *old;
+
+  irn_arity = get_irn_arity(n);
+  for (i = 0; i < irn_arity; i++) {
+    /* get_irn_n skips Id nodes, so comparison old != optimized does not
+       show all optimizations. Therefore always set new predecessor. */
+    old = get_irn_n(n, i);
+    optimized = optimize_in_place_2(old);
      set_irn_n(n, i, optimized);
    }
  
@@ -58,6 +70,15 @@ optimize_in_place_wrapper (ir_node *n, void *env) {
      if (optimized != n) exchange (n, optimized);
    }
  }
+#else
+static void
+optimize_in_place_wrapper (ir_node *n, void *env) {
+  ir_node *optimized = optimize_in_place_2(n);
+  if (optimized != n) exchange (n, optimized);
+}
+#endif
+
+
  
  void
  local_optimize_graph (ir_graph *irg) {
@@ -110,7 +131,7 @@ get_new_node (ir_node * n)
     in a Block. */
  static INLINE int
  compute_new_arity(ir_node *b) {
-  int i, res;
+  int i, res, irn_arity;
    int irg_v, block_v;
  
    irg_v = get_irg_block_visited(current_ir_graph);
@@ -121,8 +142,8 @@ compute_new_arity(ir_node *b) {
      return block_v - irg_v;
    } else {
      /* compute the number of good predecessors */
-    res = get_irn_arity(b);
-    for (i = 0; i < get_irn_arity(b); i++)
+    res = irn_arity = get_irn_arity(b);
+    for (i = 0; i < irn_arity; i++)
        if (get_irn_opcode(get_irn_n(b, i)) == iro_Bad) res--;
      /* save it in the flag. */
      set_Block_block_visited(b, irg_v + res);
@@ -157,6 +178,11 @@ copy_node (ir_node *n, void *env) {
    ir_node *nn, *block;
    int new_arity;
  
+  /* The end node looses it's flexible in array.  This doesn't matter,
+     as dead node elimination builds End by hand, inlineing doesn't use
+     the End node. */
+  //assert(n->op == op_End ||  ((_ARR_DESCR(n->in))->cookie != ARR_F_MAGIC));
+
    if (get_irn_opcode(n) == iro_Block) {
      block = NULL;
      new_arity = compute_new_arity(n);
@@ -193,7 +219,7 @@ copy_node (ir_node *n, void *env) {
  static void
  copy_preds (ir_node *n, void *env) {
    ir_node *nn, *block;
-  int i, j;
+  int i, j, irn_arity;
  
    nn = get_new_node(n);
  
@@ -204,10 +230,11 @@ copy_preds (ir_node *n, void *env) {
    if (get_irn_opcode(n) == iro_Block) {
      /* Don't copy Bad nodes. */
      j = 0;
-    for (i = 0; i < get_irn_arity(n); i++)
+    irn_arity = get_irn_arity(n);
+    for (i = 0; i < irn_arity; i++)
        if (get_irn_opcode(get_irn_n(n, i)) != iro_Bad) {
         set_irn_n (nn, j, get_new_node(get_irn_n(n, i)));
-       //if (is_backedge(n, i)) set_backedge(nn, j);
+       /*if (is_backedge(n, i)) set_backedge(nn, j);*/
         j++;
        }
      /* repair the block visited flag from above misuse. Repair it in both
@@ -228,10 +255,11 @@ copy_preds (ir_node *n, void *env) {
      block = get_nodes_Block(n);
      set_irn_n (nn, -1, get_new_node(block));
      j = 0;
-    for (i = 0; i < get_irn_arity(n); i++)
+    irn_arity = get_irn_arity(n);
+    for (i = 0; i < irn_arity; i++)
        if (get_irn_opcode(get_irn_n(block, i)) != iro_Bad) {
         set_irn_n (nn, j, get_new_node(get_irn_n(n, i)));
-       //if (is_backedge(n, i)) set_backedge(nn, j);
+       /*if (is_backedge(n, i)) set_backedge(nn, j);*/
         j++;
        }
      /* If the pre walker reached this Phi after the post walker visited the
@@ -242,7 +270,8 @@ copy_preds (ir_node *n, void *env) {
      if (get_irn_arity(n) == 1)
        exchange(n, get_irn_n(n, 0));
    } else {
-    for (i = -1; i < get_irn_arity(n); i++)
+    irn_arity = get_irn_arity(n);
+    for (i = -1; i < irn_arity; i++)
        set_irn_n (nn, i, get_new_node(get_irn_n(n, i)));
    }
    /* Now the new node is complete.  We can add it to the hash table for cse.
@@ -253,10 +282,10 @@ copy_preds (ir_node *n, void *env) {
  
  /* Copies the graph recursively, compacts the keepalive of the end node. */
  static void
-copy_graph () {
+copy_graph (void) {
    ir_node *oe, *ne; /* old end, new end */
    ir_node *ka;      /* keep alive */
-  int i;
+  int i, irn_arity;
  
    oe = get_irg_end(current_ir_graph);
    /* copy the end node by hand, allocate dynamic in array! */
@@ -279,7 +308,8 @@ copy_graph () {
    /** ... and now the keep alives. **/
    /* First pick the not marked block nodes and walk them.  We must pick these
       first as else we will oversee blocks reachable from Phis. */
-  for (i = 0; i < get_irn_arity(oe); i++) {
+  irn_arity = get_irn_arity(oe);
+  for (i = 0; i < irn_arity; i++) {
      ka = get_irn_n(oe, i);
      if ((get_irn_op(ka) == op_Block) &&
         (get_irn_visited(ka) < get_irg_visited(current_ir_graph))) {
@@ -291,7 +321,8 @@ copy_graph () {
    }
  
    /* Now pick the Phis.  Here we will keep all! */
-  for (i = 0; i < get_irn_arity(oe); i++) {
+  irn_arity = get_irn_arity(oe);
+  for (i = 0; i < irn_arity; i++) {
      ka = get_irn_n(oe, i);
      if ((get_irn_op(ka) == op_Phi)) {
        if (get_irn_visited(ka) < get_irg_visited(current_ir_graph)) {
@@ -309,7 +340,7 @@ copy_graph () {
     Then fixes the fields in current_ir_graph containing nodes of the
     graph.  */
  static void
-copy_graph_env () {
+copy_graph_env (void) {
    ir_node *old_end;
    /* Not all nodes remembered in current_ir_graph might be reachable
       from the end node.  Assure their link is set to NULL, so that
@@ -353,11 +384,13 @@ copy_graph_env () {
      copy_preds(get_irg_bad(current_ir_graph), NULL);
    }
    set_irg_bad(current_ir_graph, get_new_node(get_irg_bad(current_ir_graph)));
+  /* GL removed: we need unknown with mode for analyses.
    if (get_irn_link(get_irg_unknown(current_ir_graph)) == NULL) {
      copy_node(get_irg_unknown(current_ir_graph), NULL);
      copy_preds(get_irg_unknown(current_ir_graph), NULL);
    }
    set_irg_unknown(current_ir_graph, get_new_node(get_irg_unknown(current_ir_graph)));
+  */
  }
  
  /* Copies all reachable nodes to a new obstack.  Removes bad inputs
@@ -504,10 +537,13 @@ void remove_bad_predecessors(ir_graph *irg) {
  /*  Funcionality for inlining                                         */
  /**********************************************************************/
  
-/* Copy node for inlineing.  Copies the node by calling copy_node and
-   then updates the entity if it's a local one.  env must be a pointer
-   to the frame type of the procedure. The new entities must be in
-   the link field of the entities. */
+/* Copy node for inlineing.  Updates attributes that change when
+ * inlineing but not for dead node elimination.
+ *
+ * Copies the node by calling copy_node and then updates the entity if
+ * it's a local one.  env must be a pointer of the frame type of the
+ * inlined procedure. The new entities must be in the link field of
+ * the entities. */
  static INLINE void
  copy_node_inline (ir_node *n, void *env) {
    ir_node *new;
@@ -520,9 +556,13 @@ copy_node_inline (ir_node *n, void *env) {
      if (get_entity_owner(get_Sel_entity(n)) == frame_tp) {
        set_Sel_entity(new, get_entity_link(get_Sel_entity(n)));
      }
+  } else if (get_irn_op(n) == op_Block) {
+    new = get_new_node (n);
+    new->attr.block.irg = current_ir_graph;
    }
  }
  
+
  void inline_method(ir_node *call, ir_graph *called_graph) {
    ir_node *pre_call;
    ir_node *post_call, *post_bl;
@@ -532,11 +572,11 @@ void inline_method(ir_node *call, ir_graph *called_graph) {
    ir_node **cf_pred;
    ir_node *ret, *phi;
    ir_node *cf_op = NULL, *bl;
-  int arity, n_ret, n_exc, n_res, i, j, rem_opt;
+  int arity, n_ret, n_exc, n_res, i, j, rem_opt, irn_arity;
    type *called_frame;
  
    if (!get_optimize() || !get_opt_inline()) return;
-  /** Turn off optimizations, this can cause problems when allocating new nodes. **/
+  /* --  Turn off optimizations, this can cause problems when allocating new nodes. -- */
    rem_opt = get_optimize();
    set_optimize(0);
  
@@ -547,21 +587,23 @@ void inline_method(ir_node *call, ir_graph *called_graph) {
    if (get_irg_outs_state(current_ir_graph) == outs_consistent)
      set_irg_outs_inconsistent(current_ir_graph);
  
-  /** Check preconditions **/
+  /* -- Check preconditions -- */
    assert(get_irn_op(call) == op_Call);
-  /* @@@ TODO does not work for InterfaceIII.java after cgana
+  /* @@@ does not work for InterfaceIII.java after cgana
       assert(get_Call_type(call) == get_entity_type(get_irg_ent(called_graph)));
       assert(smaller_type(get_entity_type(get_irg_ent(called_graph)),
       get_Call_type(call)));
    */
    assert(get_type_tpop(get_Call_type(call)) == type_method);
-  if (called_graph == current_ir_graph) return;
-
+  if (called_graph == current_ir_graph) {
+    set_optimize(rem_opt);
+    return;
+  }
  
-  /** Part the Call node into two nodes.  Pre_call collects the parameters of
+  /* --
        the procedure and later replaces the Start node of the called graph.
        Post_call is the old Call node and collects the results of the called
-      graph. Both will end up being a tuple.  **/
+      graph. Both will end up being a tuple.  -- */
    post_bl = get_nodes_Block(call);
    set_irg_current_block(current_ir_graph, post_bl);
    /* XxMxPxP of Start + parameter of Call */
@@ -573,12 +615,12 @@ void inline_method(ir_node *call, ir_graph *called_graph) {
    pre_call = new_Tuple(5, in);
    post_call = call;
  
-  /** Part the block of the Call node into two blocks.
+  /* --
        The new block gets the ins of the old block, pre_call and all its
-      predecessors and all Phi nodes. **/
+      predecessors and all Phi nodes. -- */
    part_block(pre_call);
  
-  /** Prepare state for dead node elimination **/
+  /* -- Prepare state for dead node elimination -- */
    /* Visited flags in calling irg must be >= flag in called irg.
       Else walker and arity computation will not work. */
    if (get_irg_visited(current_ir_graph) <= get_irg_visited(called_graph))
@@ -601,7 +643,7 @@ void inline_method(ir_node *call, ir_graph *called_graph) {
    /* Initialize for compaction of in arrays */
    inc_irg_block_visited(current_ir_graph);
  
-  /*** Replicate local entities of the called_graph ***/
+  /* -- Replicate local entities of the called_graph -- */
    /* copy the entities. */
    called_frame = get_irg_frame_type(called_graph);
    for (i = 0; i < get_class_n_members(called_frame); i++) {
@@ -616,7 +658,7 @@ void inline_method(ir_node *call, ir_graph *called_graph) {
       to inline, calling this inline will not visit the inlined nodes. */
    set_irg_visited(current_ir_graph, get_irg_visited(current_ir_graph)-1);
  
-  /** Performing dead node elimination inlines the graph **/
+  /* -- Performing dead node elimination inlines the graph -- */
    /* Copies the nodes to the obstack of current_ir_graph. Updates links to new
       entities. */
    /* @@@ endless loops are not copied!! -- they should be, I think... */
@@ -628,7 +670,7 @@ void inline_method(ir_node *call, ir_graph *called_graph) {
    set_irg_block_visited(called_graph, get_irg_block_visited(current_ir_graph));
    set_Block_block_visited(get_irg_start_block(called_graph), 0);
  
-  /*** Merge the end of the inlined procedure with the call site ***/
+  /* -- Merge the end of the inlined procedure with the call site -- */
    /* We will turn the old Call node into a Tuple with the following
       predecessors:
       -1:  Block of Tuple.
@@ -639,7 +681,7 @@ void inline_method(ir_node *call, ir_graph *called_graph) {
       3: Phi of Exception memories.
    */
  
-  /** Precompute some values **/
+  /* -- Precompute some values -- */
    end_bl = get_new_node(get_irg_end_block(called_graph));
    end = get_new_node(get_irg_end(called_graph));
    arity = get_irn_arity(end_bl);    /* arity = n_exc + n_ret  */
@@ -650,14 +692,16 @@ void inline_method(ir_node *call, ir_graph *called_graph) {
  
    set_irg_current_block(current_ir_graph, post_bl); /* just to make sure */
  
-  /** archive keepalives **/
-  for (i = 0; i < get_irn_arity(end); i++)
+  /* -- archive keepalives -- */
+  irn_arity = get_irn_arity(end);
+  for (i = 0; i < irn_arity; i++)
      add_End_keepalive(get_irg_end(current_ir_graph), get_irn_n(end, i));
-  /* The new end node will die, but the in array is not on the obstack ... */
-  free_End(end);
  
-  /** Collect control flow from Return blocks to post_calls block. Replace
-      Return nodes by Jump nodes. **/
+  /* The new end node will die.  We need not free as the in array is on the obstack:
+     copy_node only generated 'D' arrays. */
+
+/* --
+      Return nodes by Jump nodes. -- */
    n_ret = 0;
    for (i = 0; i < arity; i++) {
      ir_node *ret;
@@ -669,8 +713,8 @@ void inline_method(ir_node *call, ir_graph *called_graph) {
    }
    set_irn_in(post_bl, n_ret, cf_pred);
  
-  /** Collect results from Return nodes to post_call. Post_call is
-      turned into a tuple. **/
+/* --
+      turned into a tuple.  -- */
    turn_into_tuple(post_call, 4);
    /* First the Memory-Phi */
    n_ret = 0;
@@ -753,41 +797,48 @@ void inline_method(ir_node *call, ir_graph *called_graph) {
    free(res_pred);
    free(cf_pred);
  
-  /*** Correct the control flow to the end node.
-       If the exception control flow from the Call directly branched to the
-       end block we now have the following control flow predecessor pattern:
-       ProjX -> Tuple -> Jmp.
-       We must remove the Jmp along with it's empty block and add Jmp's
-       predecessors as predecessors of this end block. ***/
-  /* find the problematic predecessor of the end block. */
-  end_bl = get_irg_end_block(current_ir_graph);
-  for (i = 0; i < get_Block_n_cfgpreds(end_bl); i++) {
-    cf_op = get_Block_cfgpred(end_bl, i);
-    if (get_irn_op(cf_op) == op_Proj) {
-      cf_op = get_Proj_pred(cf_op);
-      if (get_irn_op(cf_op) == op_Tuple) {
-       cf_op = get_Tuple_pred(cf_op, 1);
-       assert(get_irn_op(cf_op) == op_Jmp);
-       break;
+  if (n_exc > 0) {
+    /* -- If the exception control flow from the inlined Call directly
+       branched to the end block we now have the following control
+       flow predecessor pattern: ProjX -> Tuple -> Jmp.  We must
+       remove the Jmp along with it's empty block and add Jmp's
+       predecessors as predecessors of this end block.  No problem if
+       there is no exception, because then branches Bad to End which
+       is fine. -- */
+    /* find the problematic predecessor of the end block. */
+    end_bl = get_irg_end_block(current_ir_graph);
+    for (i = 0; i < get_Block_n_cfgpreds(end_bl); i++) {
+      cf_op = get_Block_cfgpred(end_bl, i);
+      if (get_irn_op(cf_op) == op_Proj) {
+       cf_op = get_Proj_pred(cf_op);
+       if ((get_irn_op(cf_op) == op_Tuple) && (cf_op == call)) {
+         // There are unoptimized tuples from inlineing before when no exc
+         assert(get_Proj_proj(get_Block_cfgpred(end_bl, i)) == pn_Call_X_except);
+         cf_op = get_Tuple_pred(cf_op, pn_Call_X_except);
+         assert(get_irn_op(cf_op) == op_Jmp);
+         break;
+       }
        }
      }
-  }
-  /* repair */
-  if (i < get_Block_n_cfgpreds(end_bl)) {
-    bl = get_nodes_Block(cf_op);
-    arity = get_Block_n_cfgpreds(end_bl) + get_Block_n_cfgpreds(bl) - 1;
-    cf_pred = (ir_node **) malloc (arity * sizeof (ir_node *));
-    for (j = 0; j < i; j++)
-      cf_pred[j] = get_Block_cfgpred(end_bl, j);
-    for (j = j; j < i + get_Block_n_cfgpreds(bl); j++)
-      cf_pred[j] = get_Block_cfgpred(bl, j-i);
-    for (j = j; j < arity; j++)
-      cf_pred[j] = get_Block_cfgpred(end_bl, j-get_Block_n_cfgpreds(bl) +1);
-    set_irn_in(end_bl, arity, cf_pred);
-    free(cf_pred);
+    /* repair */
+    if (i < get_Block_n_cfgpreds(end_bl)) {
+      bl = get_nodes_Block(cf_op);
+      arity = get_Block_n_cfgpreds(end_bl) + get_Block_n_cfgpreds(bl) - 1;
+      cf_pred = (ir_node **) malloc (arity * sizeof (ir_node *));
+      for (j = 0; j < i; j++)
+       cf_pred[j] = get_Block_cfgpred(end_bl, j);
+      for (j = j; j < i + get_Block_n_cfgpreds(bl); j++)
+       cf_pred[j] = get_Block_cfgpred(bl, j-i);
+      for (j = j; j < arity; j++)
+       cf_pred[j] = get_Block_cfgpred(end_bl, j-get_Block_n_cfgpreds(bl) +1);
+      set_irn_in(end_bl, arity, cf_pred);
+      free(cf_pred);
+      // Remove the exception pred from post-call Tuple.
+      set_Tuple_pred(call, pn_Call_X_except, new_Bad());
+    }
    }
  
-  /** Turn cse back on. **/
+  /* --  Turn cse back on. -- */
    set_optimize(rem_opt);
  }
  
@@ -801,20 +852,40 @@ static int pos;
     I didn't get a version with NEW_ARR_F to run. */
  #define MAX_INLINE 1024
  
+/* given an Call node, returns the irg called.  NULL if not
+ * known. */
+static ir_graph *get_call_called_irg(ir_node *call) {
+  ir_node *addr;
+  tarval *tv;
+  ir_graph *called_irg = NULL;
+
+  assert(get_irn_op(call) == op_Call);
+
+  addr = get_Call_ptr(call);
+  if (get_irn_op(addr) == op_Const) {
+    /* Check whether the constant is the pointer to a compiled entity. */
+    tv = get_Const_tarval(addr);
+    if (tarval_to_entity(tv))
+      called_irg = get_entity_irg(tarval_to_entity(tv));
+  }
+  return called_irg;
+}
+
  static void collect_calls(ir_node *call, void *env) {
+
+  if (get_irn_op(call) != op_Call) return;
+
    ir_node **calls = (ir_node **)env;
    ir_node *addr;
    tarval *tv;
    ir_graph *called_irg;
  
-  if (get_irn_op(call) != op_Call) return;
-
    addr = get_Call_ptr(call);
    if (get_irn_op(addr) == op_Const) {
      /* Check whether the constant is the pointer to a compiled entity. */
      tv = get_Const_tarval(addr);
-    if (tv->u.P.ent) {
-      called_irg = get_entity_irg(tv->u.P.ent);
+    if (tarval_to_entity(tv)) {
+      called_irg = get_entity_irg(tarval_to_entity(tv));
        if (called_irg && pos < MAX_INLINE) {
         /* The Call node calls a locally defined method.  Remember to inline. */
         calls[pos] = call;
@@ -853,11 +924,10 @@ void inline_small_irgs(ir_graph *irg, int size) {
      /* There are calls to inline */
      collect_phiprojs(irg);
      for (i = 0; i < pos; i++) {
-      char buf[1024];
        tarval *tv;
        ir_graph *callee;
        tv = get_Const_tarval(get_Call_ptr(calls[i]));
-      callee = get_entity_irg(tv->u.P.ent);
+      callee = get_entity_irg(tarval_to_entity(tv));
        if ((_obstack_memory_used(callee->obst) - obstack_room(callee->obst)) < size) {
         inline_method(calls[i], callee);
        }
@@ -867,6 +937,194 @@ void inline_small_irgs(ir_graph *irg, int size) {
    current_ir_graph = rem;
  }
  
+typedef struct {
+  int n_nodes;       /* Nodes in graph except Id, Tuple, Proj, Start, End */
+  int n_nodes_orig;  /* for statistics */
+  eset *call_nodes;  /* All call nodes in this graph */
+  int n_call_nodes;
+  int n_call_nodes_orig; /* for statistics */
+  int n_callers;   /* Number of known graphs that call this graphs. */
+  int n_callers_orig; /* for statistics */
+} inline_irg_env;
+
+static inline_irg_env *new_inline_irg_env(void) {
+  inline_irg_env *env = malloc(sizeof(inline_irg_env));
+  env->n_nodes = -2; /* uncount Start, End */
+  env->n_nodes_orig = -2; /* uncount Start, End */
+  env->call_nodes = eset_create();
+  env->n_call_nodes = 0;
+  env->n_call_nodes_orig = 0;
+  env->n_callers = 0;
+  env->n_callers_orig = 0;
+  return env;
+}
+
+static void free_inline_irg_env(inline_irg_env *env) {
+  eset_destroy(env->call_nodes);
+  free(env);
+}
+
+static void collect_calls2(ir_node *call, void *env) {
+  inline_irg_env *x = (inline_irg_env *)env;
+  ir_op *op = get_irn_op(call);
+
+  /* count nodes in irg */
+  if (op != op_Proj && op != op_Tuple && op != op_Sync) {
+    x->n_nodes++;
+    x->n_nodes_orig++;
+  }
+
+  if (op != op_Call) return;
+
+  /* collect all call nodes */
+  eset_insert(x->call_nodes, (void *)call);
+  x->n_call_nodes++;
+  x->n_call_nodes_orig++;
+
+  /* count all static callers */
+  ir_graph *callee = get_call_called_irg(call);
+  if (callee) {
+    ((inline_irg_env *)get_irg_link(callee))->n_callers++;
+    ((inline_irg_env *)get_irg_link(callee))->n_callers_orig++;
+  }
+}
+
+INLINE static int is_leave(ir_graph *irg) {
+  return (((inline_irg_env *)get_irg_link(irg))->n_call_nodes == 0);
+}
+
+INLINE static int is_smaller(ir_graph *callee, int size) {
+  return (((inline_irg_env *)get_irg_link(callee))->n_nodes < size);
+}
+
+
+/* Inlines small leave methods at call sites where the called address comes
+   from a Const node that references the entity representing the called
+   method.
+   The size argument is a rough measure for the code size of the method:
+   Methods where the obstack containing the firm graph is smaller than
+   size are inlined. */
+void inline_leave_functions(int maxsize, int leavesize, int size) {
+  inline_irg_env *env;
+  int i, n_irgs = get_irp_n_irgs();
+  ir_graph *rem = current_ir_graph;
+  int did_inline = 1;
+
+  if (!(get_optimize() && get_opt_inline())) return;
+
+  /* extend all irgs by a temporary datastructure for inlineing. */
+  for (i = 0; i < n_irgs; ++i)
+    set_irg_link(get_irp_irg(i), new_inline_irg_env());
+
+  /* Precompute information in temporary datastructure. */
+  for (i = 0; i < n_irgs; ++i) {
+    current_ir_graph = get_irp_irg(i);
+    assert(get_irg_phase_state(current_ir_graph) != phase_building);
+
+    irg_walk(get_irg_end(current_ir_graph), NULL, collect_calls2,
+            get_irg_link(current_ir_graph));
+    env = (inline_irg_env *)get_irg_link(current_ir_graph);
+  }
+
+  /* and now inline.
+     Inline leaves recursively -- we might construct new leaves. */
+  //int itercnt = 1;
+  while (did_inline) {
+    //printf("iteration %d\n", itercnt++);
+    did_inline = 0;
+    for (i = 0; i < n_irgs; ++i) {
+      ir_node *call;
+      eset *walkset;
+      int phiproj_computed = 0;
+
+      current_ir_graph = get_irp_irg(i);
+      env = (inline_irg_env *)get_irg_link(current_ir_graph);
+
+      /* we can not walk and change a set, nor remove from it.
+        So recompute.*/
+      walkset = env->call_nodes;
+      env->call_nodes = eset_create();
+      for (call = eset_first(walkset); call; call = eset_next(walkset)) {
+       ir_graph *callee = get_call_called_irg(call);
+       if (env->n_nodes > maxsize) break;
+       if (callee && is_leave(callee) && is_smaller(callee, leavesize)) {
+         if (!phiproj_computed) {
+           phiproj_computed = 1;
+           collect_phiprojs(current_ir_graph);
+         }
+         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
+         // printf(" %s: Inlineing %s.\n", get_entity_name(get_irg_entity(current_ir_graph)),
+         //     get_entity_name(get_irg_entity(callee)));
+         inline_method(call, callee);
+         did_inline = 1;
+         env->n_call_nodes--;
+         eset_insert_all(env->call_nodes, callee_env->call_nodes);
+         env->n_call_nodes += callee_env->n_call_nodes;
+         env->n_nodes += callee_env->n_nodes;
+         callee_env->n_callers--;
+       } else {
+         eset_insert(env->call_nodes, call);
+       }
+      }
+      eset_destroy(walkset);
+    }
+  }
+
+  //printf("Non leaves\n");
+  /* inline other small functions. */
+  for (i = 0; i < n_irgs; ++i) {
+    ir_node *call;
+    eset *walkset;
+    int phiproj_computed = 0;
+
+    current_ir_graph = get_irp_irg(i);
+    env = (inline_irg_env *)get_irg_link(current_ir_graph);
+
+    /* we can not walk and change a set, nor remove from it.
+       So recompute.*/
+    walkset = env->call_nodes;
+    env->call_nodes = eset_create();
+    for (call = eset_first(walkset); call; call = eset_next(walkset)) {
+      ir_graph *callee = get_call_called_irg(call);
+      if (env->n_nodes > maxsize) break;
+      if (callee && is_smaller(callee, size)) {
+       if (!phiproj_computed) {
+         phiproj_computed = 1;
+         collect_phiprojs(current_ir_graph);
+       }
+       inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
+       //printf(" %s: Inlineing %s.\n", get_entity_name(get_irg_entity(current_ir_graph)),
+       //       get_entity_name(get_irg_entity(callee)));
+       inline_method(call, callee);
+       did_inline = 1;
+       env->n_call_nodes--;
+       eset_insert_all(env->call_nodes, callee_env->call_nodes);
+       env->n_call_nodes += callee_env->n_call_nodes;
+       env->n_nodes += callee_env->n_nodes;
+       callee_env->n_callers--;
+      } else {
+       eset_insert(env->call_nodes, call);
+      }
+    }
+    eset_destroy(walkset);
+  }
+
+  for (i = 0; i < n_irgs; ++i) {
+    current_ir_graph = get_irp_irg(i);
+#if 0
+    env = (inline_irg_env *)get_irg_link(current_ir_graph);
+    if ((env->n_call_nodes_orig != env->n_call_nodes) ||
+       (env->n_callers_orig != env->n_callers))
+      printf("Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
+            env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
+            env->n_callers_orig, env->n_callers,
+            get_entity_name(get_irg_entity(current_ir_graph)));
+#endif
+    free_inline_irg_env((inline_irg_env *)get_irg_link(current_ir_graph));
+  }
+
+  current_ir_graph = rem;
+}
  
  /********************************************************************/
  /*  Code Placement.  Pinns all floating nodes to a block where they */
@@ -880,7 +1138,7 @@ static pdeq *worklist;             /* worklist of ir_node*s */
  static void
  place_floats_early (ir_node *n)
  {
-  int i, start;
+  int i, start, irn_arity;
  
    /* we must not run into an infinite loop */
    assert (irn_not_visited(n));
@@ -895,14 +1153,16 @@ place_floats_early (ir_node *n)
  
      if ((get_irn_op(n) == op_Const) ||
         (get_irn_op(n) == op_SymConst) ||
-       (is_Bad(n))) {
+       (is_Bad(n)) ||
+       (get_irn_op(n) == op_Unknown)) {
        /* These nodes will not be placed by the loop below. */
        b = get_irg_start_block(current_ir_graph);
        depth = 1;
      }
  
      /* find the block for this node. */
-    for (i = 0; i < get_irn_arity(n); i++) {
+    irn_arity = get_irn_arity(n);
+    for (i = 0; i < irn_arity; i++) {
        ir_node *dep = get_irn_n(n, i);
        ir_node *dep_block;
        if ((irn_not_visited(dep)) &&
@@ -930,7 +1190,8 @@ place_floats_early (ir_node *n)
  
    /* Add predecessors of non floating nodes on worklist. */
    start = (get_irn_op(n) == op_Block) ? 0 : -1;
-  for (i = start; i < get_irn_arity(n); i++) {
+  irn_arity = get_irn_arity(n);
+  for (i = start; i < irn_arity; i++) {
      ir_node *pred = get_irn_n(n, i);
      if (irn_not_visited(pred)) {
        pdeq_putr (worklist, pred);
@@ -942,7 +1203,7 @@ place_floats_early (ir_node *n)
     Start, Call and end at pinned nodes as Store, Call.  Place_early
     places all floating nodes reachable from its argument through floating
     nodes and adds all beginnings at pinned nodes to the worklist. */
-static INLINE void place_early () {
+static INLINE void place_early (void) {
    assert(worklist);
    inc_irg_visited(current_ir_graph);
  
@@ -971,9 +1232,10 @@ consumer_dom_dca (ir_node *dca, ir_node *consumer, ir_node *producer)
    if (get_irn_op(consumer) == op_Phi) {
      /* our comsumer is a Phi-node, the effective use is in all those
         blocks through which the Phi-node reaches producer */
-    int i;
+    int i, irn_arity;
      ir_node *phi_block = get_nodes_Block(consumer);
-    for (i = 0;  i < get_irn_arity(consumer); i++) {
+    irn_arity = get_irn_arity(consumer);
+    for (i = 0;  i < irn_arity; i++) {
        if (get_irn_n(consumer, i) == producer) {
         block = get_nodes_Block(get_Block_cfgpred(phi_block, i));
        }
@@ -1096,7 +1358,7 @@ place_floats_late (ir_node *n)
    }
  }
  
-static INLINE void place_late() {
+static INLINE void place_late(void) {
    assert(worklist);
    inc_irg_visited(current_ir_graph);
  
@@ -1149,7 +1411,8 @@ void place_code(ir_graph *irg) {
  /********************************************************************/
  
  /* Removes Tuples from Block control flow predecessors.
-   Optimizes blocks with equivalent_node().                         */
+   Optimizes blocks with equivalent_node().
+   Replaces n by Bad if n is unreachable control flow. */
  static void merge_blocks(ir_node *n, void *env) {
    int i;
    set_irn_link(n, NULL);
@@ -1157,22 +1420,27 @@ static void merge_blocks(ir_node *n, void *env) {
    if (get_irn_op(n) == op_Block) {
      /* Remove Tuples */
      for (i = 0; i < get_Block_n_cfgpreds(n); i++)
-      set_Block_cfgpred(n, i, skip_Tuple(get_Block_cfgpred(n, i)));
-  } else if (get_irn_mode(n) == mode_X) {
+      /* GL @@@ : is this possible? if (get_opt_normalize()) -- added, all tests go throug.
+        A different order of optimizations might cause problems. */
+      if (get_opt_normalize())
+       set_Block_cfgpred(n, i, skip_Tuple(get_Block_cfgpred(n, i)));
+  } else if (get_optimize() && (get_irn_mode(n) == mode_X)) {
      /* We will soon visit a block.  Optimize it before visiting! */
      ir_node *b = get_nodes_Block(n);
      ir_node *new = equivalent_node(b);
      while (irn_not_visited(b) && (!is_Bad(new)) && (new != b)) {
-      /* We would have to run gigo if new is bad. */
-      if (!get_optimize() || (!get_opt_control_flow_straightening()
-                             && !get_opt_control_flow_weak_simplification()))
-       /* how could something be optimized of flags are not set? */
-       assert(0 && "strange ??!!");
+      /* We would have to run gigo if new is bad, so we
+        promote it directly below. */
+      assert(((b == new) ||
+             get_opt_control_flow_straightening() ||
+             get_opt_control_flow_weak_simplification()) &&
+            ("strange flag setting"));
        exchange (b, new);
        b = new;
        new = equivalent_node(b);
      }
-    if (is_Bad(new)) exchange (n, new_Bad());
+    /* GL @@@ get_opt_normalize hinzugefuegt, 5.5.2003 */
+    if (is_Bad(new) && get_opt_normalize()) exchange (n, new_Bad());
    }
  }
  
@@ -1264,7 +1532,7 @@ static void optimize_blocks(ir_node *b, void *env) {
    }
    in = (ir_node **) malloc(max_preds * sizeof(ir_node *));
  
-  /** Debug output **
+/**
    printf(" working on "); DDMN(b);
    for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
      pred = get_nodes_Block(get_Block_cfgpred(b, i));
@@ -1275,7 +1543,7 @@ static void optimize_blocks(ir_node *b, void *env) {
        printf("  removing pred %i ", i); DDMN(pred);
      } else { printf("  Nothing to do for "); DDMN(pred); }
    }
-  ** end Debug output **/
+  * end Debug output **/
  
    /** Fix the Phi nodes **/
    phi = get_irn_link(b);
@@ -1323,7 +1591,7 @@ static void optimize_blocks(ir_node *b, void *env) {
      phi = get_irn_link(phi);
    }
  
-  /** Move Phi nodes from removed blocks to this one.
+/**
        This happens only if merge between loop backedge and single loop entry. **/
    for (k = 0; k < get_Block_n_cfgpreds(b); k++) {
      pred = get_nodes_Block(get_Block_cfgpred(b, k));
@@ -1453,3 +1721,55 @@ void optimize_cf(ir_graph *irg) {
  
    current_ir_graph = rem;
  }
+
+
+/**
+ * Called by walker of remove_critical_cf_edges.
+ *
+ * Place an empty block to an edge between a blocks of multiple
+ * predecessors and a block of multiple sucessors.
+ *
+ * @param n IR node
+ * @param env Envirnment of walker. This field is unused and has
+ *            the value NULL.
+ */
+static void walk_critical_cf_edges(ir_node *n, void *env) {
+  int arity, i;
+  ir_node *pre, *block, **in, *jmp;
+
+  /* Block has multiple predecessors */
+  if ((op_Block == get_irn_op(n)) &&
+      (get_irn_arity(n) > 1)) {
+    arity = get_irn_arity(n);
+
+    if (n == get_irg_end_block(current_ir_graph))
+      return;  // No use to add a block here.
+
+    for (i=0; i<arity; i++) {
+      pre = get_irn_n(n, i);
+      /* Predecessor has multiple sucessors. Insert new flow edge */
+      if ((NULL != pre) &&
+         (op_Proj == get_irn_op(pre)) &&
+         op_Raise != get_irn_op(skip_Proj(pre))) {
+
+       /* set predecessor array for new block */
+       in = NEW_ARR_D (ir_node *, current_ir_graph->obst, 1);
+       /* set predecessor of new block */
+       in[0] = pre;
+       block = new_Block(1, in);
+       /* insert new jmp node to new block */
+       switch_block(block);
+       jmp = new_Jmp();
+       switch_block(n);
+       /* set sucessor of new block */
+       set_irn_n(n, i, jmp);
+
+      } /* predecessor has multiple sucessors */
+    } /* for all predecessors */
+  } /* n is a block */
+}
+
+void remove_critical_cf_edges(ir_graph *irg) {
+  if (get_opt_critical_edges())
+    irg_walk_graph(irg, NULL, walk_critical_cf_edges, NULL);
+}