Fixed 'inline' lossage --flo

[libfirm] / ir / ir / irgopt.c
diff --git a/ir/ir/irgopt.c b/ir/ir/irgopt.c

index 200dfdb..2112f6f 100644 (file)
--- a/ir/ir/irgopt.c
+++ b/ir/ir/irgopt.c
@@ -28,31 +28,38 @@
  # include "irgmod.h"
  # include "array.h"
  # include "pset.h"
+# include "eset.h"
  # include "pdeq.h"       /* Fuer code placement */
  # include "irouts.h"
  # include "irloop.h"
  # include "irbackedge_t.h"
+# include "irflag_t.h"
  
  /* Defined in iropt.c */
  pset *new_identities (void);
  void  del_identities (pset *value_table);
  void  add_identities   (pset *value_table, ir_node *node);
  
-/********************************************************************/
+/*------------------------------------------------------------------*/
  /* apply optimizations of iropt to all nodes.                       */
-/********************************************************************/
+/*------------------------------------------------------------------*/
  
  static void init_link (ir_node *n, void *env) {
    set_irn_link(n, NULL);
  }
  
+#if 0   /* Old version. Avoids Ids.
+          This is not necessary:  we do a postwalk, and get_irn_n
+          removes ids anyways.  So it's much cheaper to call the
+          optimization less often and use the exchange() algorithm. */
  static void
  optimize_in_place_wrapper (ir_node *n, void *env) {
-  int i;
+  int i, irn_arity;
    ir_node *optimized, *old;
  
-  for (i = 0; i < get_irn_arity(n); i++) {
-    /* get?irn_n skips Id nodes, so comparison old != optimized does not
+  irn_arity = get_irn_arity(n);
+  for (i = 0; i < irn_arity; i++) {
+    /* get_irn_n skips Id nodes, so comparison old != optimized does not
         show all optimizations. Therefore always set new predecessor. */
      old = get_irn_n(n, i);
      optimized = optimize_in_place_2(old);
@@ -64,6 +71,15 @@ optimize_in_place_wrapper (ir_node *n, void *env) {
      if (optimized != n) exchange (n, optimized);
    }
  }
+#else
+static void
+optimize_in_place_wrapper (ir_node *n, void *env) {
+  ir_node *optimized = optimize_in_place_2(n);
+  if (optimized != n) exchange (n, optimized);
+}
+#endif
+
+
  
  void
  local_optimize_graph (ir_graph *irg) {
@@ -89,34 +105,40 @@ local_optimize_graph (ir_graph *irg) {
    current_ir_graph = rem;
  }
  
-/********************************************************************/
+/*------------------------------------------------------------------*/
  /* Routines for dead node elimination / copying garbage collection  */
  /* of the obstack.                                                  */
-/********************************************************************/
+/*------------------------------------------------------------------*/
  
-/* Remeber the new node in the old node by using a field all nodes have. */
+/**
+ * Remember the new node in the old node by using a field all nodes have.
+ */
  static INLINE void
  set_new_node (ir_node *old, ir_node *new)
  {
    old->link = new;
  }
  
-/* Get this new node, before the old node is forgotton.*/
+/**
+ * Get this new node, before the old node is forgotton.
+ */
  static INLINE ir_node *
  get_new_node (ir_node * n)
  {
    return n->link;
  }
  
-/* We use the block_visited flag to mark that we have computed the
-   number of useful predecessors for this block.
-   Further we encode the new arity in this flag in the old blocks.
-   Remembering the arity is useful, as it saves a lot of pointer
-   accesses.  This function is called for all Phi and Block nodes
-   in a Block. */
+/**
+ * We use the block_visited flag to mark that we have computed the
+ * number of useful predecessors for this block.
+ * Further we encode the new arity in this flag in the old blocks.
+ * Remembering the arity is useful, as it saves a lot of pointer
+ * accesses.  This function is called for all Phi and Block nodes
+ * in a Block.
+ */
  static INLINE int
  compute_new_arity(ir_node *b) {
-  int i, res;
+  int i, res, irn_arity;
    int irg_v, block_v;
  
    irg_v = get_irg_block_visited(current_ir_graph);
@@ -127,8 +149,8 @@ compute_new_arity(ir_node *b) {
      return block_v - irg_v;
    } else {
      /* compute the number of good predecessors */
-    res = get_irn_arity(b);
-    for (i = 0; i < get_irn_arity(b); i++)
+    res = irn_arity = get_irn_arity(b);
+    for (i = 0; i < irn_arity; i++)
        if (get_irn_opcode(get_irn_n(b, i)) == iro_Bad) res--;
      /* save it in the flag. */
      set_Block_block_visited(b, irg_v + res);
@@ -136,6 +158,7 @@ compute_new_arity(ir_node *b) {
    }
  }
  
+/* TODO: add an ir_op operation */
  static INLINE void new_backedge_info(ir_node *n) {
    switch(get_irn_opcode(n)) {
    case iro_Block:
@@ -152,17 +175,24 @@ static INLINE void new_backedge_info(ir_node *n) {
    }
  }
  
-/* Copies the node to the new obstack. The Ins of the new node point to
-   the predecessors on the old obstack.  For block/phi nodes not all
-   predecessors might be copied.  n->link points to the new node.
-   For Phi and Block nodes the function allocates in-arrays with an arity
-   only for useful predecessors.  The arity is determined by counting
-   the non-bad predecessors of the block. */
+/**
+ * Copies the node to the new obstack. The Ins of the new node point to
+ * the predecessors on the old obstack.  For block/phi nodes not all
+ * predecessors might be copied.  n->link points to the new node.
+ * For Phi and Block nodes the function allocates in-arrays with an arity
+ * only for useful predecessors.  The arity is determined by counting
+ * the non-bad predecessors of the block.
+ */
  static void
  copy_node (ir_node *n, void *env) {
    ir_node *nn, *block;
    int new_arity;
  
+  /* The end node looses it's flexible in array.  This doesn't matter,
+     as dead node elimination builds End by hand, inlineing doesn't use
+     the End node. */
+  //assert(n->op == op_End ||  ((_ARR_DESCR(n->in))->cookie != ARR_F_MAGIC));
+
    if (get_irn_opcode(n) == iro_Block) {
      block = NULL;
      new_arity = compute_new_arity(n);
@@ -194,12 +224,14 @@ copy_node (ir_node *n, void *env) {
  
  }
  
-/* Copies new predecessors of old node to new node remembered in link.
-   Spare the Bad predecessors of Phi and Block nodes. */
+/**
+ * Copies new predecessors of old node to new node remembered in link.
+ * Spare the Bad predecessors of Phi and Block nodes.
+ */
  static void
  copy_preds (ir_node *n, void *env) {
    ir_node *nn, *block;
-  int i, j;
+  int i, j, irn_arity;
  
    nn = get_new_node(n);
  
@@ -210,7 +242,8 @@ copy_preds (ir_node *n, void *env) {
    if (get_irn_opcode(n) == iro_Block) {
      /* Don't copy Bad nodes. */
      j = 0;
-    for (i = 0; i < get_irn_arity(n); i++)
+    irn_arity = get_irn_arity(n);
+    for (i = 0; i < irn_arity; i++)
        if (get_irn_opcode(get_irn_n(n, i)) != iro_Bad) {
         set_irn_n (nn, j, get_new_node(get_irn_n(n, i)));
         /*if (is_backedge(n, i)) set_backedge(nn, j);*/
@@ -234,7 +267,8 @@ copy_preds (ir_node *n, void *env) {
      block = get_nodes_Block(n);
      set_irn_n (nn, -1, get_new_node(block));
      j = 0;
-    for (i = 0; i < get_irn_arity(n); i++)
+    irn_arity = get_irn_arity(n);
+    for (i = 0; i < irn_arity; i++)
        if (get_irn_opcode(get_irn_n(block, i)) != iro_Bad) {
         set_irn_n (nn, j, get_new_node(get_irn_n(n, i)));
         /*if (is_backedge(n, i)) set_backedge(nn, j);*/
@@ -248,7 +282,8 @@ copy_preds (ir_node *n, void *env) {
      if (get_irn_arity(n) == 1)
        exchange(n, get_irn_n(n, 0));
    } else {
-    for (i = -1; i < get_irn_arity(n); i++)
+    irn_arity = get_irn_arity(n);
+    for (i = -1; i < irn_arity; i++)
        set_irn_n (nn, i, get_new_node(get_irn_n(n, i)));
    }
    /* Now the new node is complete.  We can add it to the hash table for cse.
@@ -257,12 +292,14 @@ copy_preds (ir_node *n, void *env) {
      add_identities (current_ir_graph->value_table, nn);
  }
  
-/* Copies the graph recursively, compacts the keepalive of the end node. */
+/**
+ * Copies the graph recursively, compacts the keepalive of the end node.
+ */
  static void
  copy_graph (void) {
    ir_node *oe, *ne; /* old end, new end */
    ir_node *ka;      /* keep alive */
-  int i;
+  int i, irn_arity;
  
    oe = get_irg_end(current_ir_graph);
    /* copy the end node by hand, allocate dynamic in array! */
@@ -282,10 +319,11 @@ copy_graph (void) {
    /* copy_preds for the end node ... */
    set_nodes_Block(ne, get_new_node(get_nodes_Block(oe)));
  
-  /** ... and now the keep alives. **/
+  /*- ... and now the keep alives. -*/
    /* First pick the not marked block nodes and walk them.  We must pick these
       first as else we will oversee blocks reachable from Phis. */
-  for (i = 0; i < get_irn_arity(oe); i++) {
+  irn_arity = get_irn_arity(oe);
+  for (i = 0; i < irn_arity; i++) {
      ka = get_irn_n(oe, i);
      if ((get_irn_op(ka) == op_Block) &&
         (get_irn_visited(ka) < get_irg_visited(current_ir_graph))) {
@@ -297,7 +335,8 @@ copy_graph (void) {
    }
  
    /* Now pick the Phis.  Here we will keep all! */
-  for (i = 0; i < get_irn_arity(oe); i++) {
+  irn_arity = get_irn_arity(oe);
+  for (i = 0; i < irn_arity; i++) {
      ka = get_irn_n(oe, i);
      if ((get_irn_op(ka) == op_Phi)) {
        if (get_irn_visited(ka) < get_irg_visited(current_ir_graph)) {
@@ -310,10 +349,12 @@ copy_graph (void) {
    }
  }
  
-/* Copies the graph reachable from current_ir_graph->end to the obstack
-   in current_ir_graph and fixes the environment.
-   Then fixes the fields in current_ir_graph containing nodes of the
-   graph.  */
+/**
+ * Copies the graph reachable from current_ir_graph->end to the obstack
+ * in current_ir_graph and fixes the environment.
+ * Then fixes the fields in current_ir_graph containing nodes of the
+ * graph.
+ */
  static void
  copy_graph_env (void) {
    ir_node *old_end;
@@ -359,19 +400,23 @@ copy_graph_env (void) {
      copy_preds(get_irg_bad(current_ir_graph), NULL);
    }
    set_irg_bad(current_ir_graph, get_new_node(get_irg_bad(current_ir_graph)));
+  /* GL removed: we need unknown with mode for analyses.
    if (get_irn_link(get_irg_unknown(current_ir_graph)) == NULL) {
      copy_node(get_irg_unknown(current_ir_graph), NULL);
      copy_preds(get_irg_unknown(current_ir_graph), NULL);
    }
    set_irg_unknown(current_ir_graph, get_new_node(get_irg_unknown(current_ir_graph)));
+  */
  }
  
-/* Copies all reachable nodes to a new obstack.  Removes bad inputs
-   from block nodes and the corresponding inputs from Phi nodes.
-   Merges single exit blocks with single entry blocks and removes
-   1-input Phis.
-   Adds all new nodes to a new hash table for cse.  Does not
-   perform cse, so the hash table might contain common subexpressions. */
+/**
+ * Copies all reachable nodes to a new obstack.  Removes bad inputs
+ * from block nodes and the corresponding inputs from Phi nodes.
+ * Merges single exit blocks with single entry blocks and removes
+ * 1-input Phis.
+ * Adds all new nodes to a new hash table for cse.  Does not
+ * perform cse, so the hash table might contain common subexpressions.
+ */
  /* Amroq call this emigrate() */
  void
  dead_node_elimination(ir_graph *irg) {
@@ -385,12 +430,13 @@ dead_node_elimination(ir_graph *irg) {
  
    /* Handle graph state */
    assert(get_irg_phase_state(current_ir_graph) != phase_building);
+  assert(get_irg_callee_info_state(current_ir_graph) == irg_callee_info_none);
    free_outs(current_ir_graph);
  
    /* @@@ so far we loose loops when copying */
-  set_irg_loop(current_ir_graph, NULL);
+  free_loop_information(current_ir_graph);
  
-  if (get_optimize() && get_opt_dead_node_elimination()) {
+  if (get_opt_optimize() && get_opt_dead_node_elimination()) {
  
      /* A quiet place, where the old obstack can rest in peace,
         until it will be cremated. */
@@ -416,11 +462,13 @@ dead_node_elimination(ir_graph *irg) {
    current_ir_graph = rem;
  }
  
-/* Relink bad predeseccors of a block and store the old in array to the
-   link field. This function is called by relink_bad_predecessors().
-   The array of link field starts with the block operand at position 0.
-   If block has bad predecessors, create a new in array without bad preds.
-   Otherwise let in array untouched. */
+/**
+ * Relink bad predeseccors of a block and store the old in array to the
+ * link field. This function is called by relink_bad_predecessors().
+ * The array of link field starts with the block operand at position 0.
+ * If block has bad predecessors, create a new in array without bad preds.
+ * Otherwise let in array untouched.
+ */
  static void relink_bad_block_predecessors(ir_node *n, void *env) {
    ir_node **new_in, *irn;
    int i, new_irn_n, old_irn_arity, new_irn_arity = 0;
@@ -456,11 +504,13 @@ static void relink_bad_block_predecessors(ir_node *n, void *env) {
    } /* Block is not relinked */
  }
  
-/* Relinks Bad predecesors from Bocks and Phis called by walker
-   remove_bad_predecesors(). If n is a Block, call
-   relink_bad_block_redecessors(). If n is a Phinode, call also the relinking
-   function of Phi's Block. If this block has bad predecessors, relink preds
-   of the Phinode. */
+/**
+ * Relinks Bad predecesors from Bocks and Phis called by walker
+ * remove_bad_predecesors(). If n is a Block, call
+ * relink_bad_block_redecessors(). If n is a Phinode, call also the relinking
+ * function of Phi's Block. If this block has bad predecessors, relink preds
+ * of the Phinode.
+ */
  static void relink_bad_predecessors(ir_node *n, void *env) {
    ir_node *block, **old_in;
    int i, old_irn_arity, new_irn_arity;
@@ -494,26 +544,33 @@ static void relink_bad_predecessors(ir_node *n, void *env) {
    } /* n is a Phi node */
  }
  
-/* Removes Bad Bad predecesors from Blocks and the corresponding
-   inputs to Phi nodes as in dead_node_elimination but without
-   copying the graph.
-   On walking up set the link field to NULL, on walking down call
-   relink_bad_predecessors() (This function stores the old in array
-   to the link field and sets a new in array if arity of predecessors
-   changes) */
+/**
+ * Removes Bad Bad predecesors from Blocks and the corresponding
+ * inputs to Phi nodes as in dead_node_elimination but without
+ * copying the graph.
+ * On walking up set the link field to NULL, on walking down call
+ * relink_bad_predecessors() (This function stores the old in array
+ * to the link field and sets a new in array if arity of predecessors
+ * changes).
+ */
  void remove_bad_predecessors(ir_graph *irg) {
    irg_walk_graph(irg, init_link, relink_bad_predecessors, NULL);
  }
  
  
-/**********************************************************************/
+/*--------------------------------------------------------------------*/
  /*  Funcionality for inlining                                         */
-/**********************************************************************/
+/*--------------------------------------------------------------------*/
  
-/* Copy node for inlineing.  Copies the node by calling copy_node and
-   then updates the entity if it's a local one.  env must be a pointer
-   to the frame type of the procedure. The new entities must be in
-   the link field of the entities. */
+/**
+ * Copy node for inlineing.  Updates attributes that change when
+ * inlineing but not for dead node elimination.
+ *
+ * Copies the node by calling copy_node and then updates the entity if
+ * it's a local one.  env must be a pointer of the frame type of the
+ * inlined procedure. The new entities must be in the link field of
+ * the entities.
+ */
  static INLINE void
  copy_node_inline (ir_node *n, void *env) {
    ir_node *new;
@@ -526,9 +583,13 @@ copy_node_inline (ir_node *n, void *env) {
      if (get_entity_owner(get_Sel_entity(n)) == frame_tp) {
        set_Sel_entity(new, get_entity_link(get_Sel_entity(n)));
      }
+  } else if (get_irn_op(n) == op_Block) {
+    new = get_new_node (n);
+    new->attr.block.irg = current_ir_graph;
    }
  }
  
+
  void inline_method(ir_node *call, ir_graph *called_graph) {
    ir_node *pre_call;
    ir_node *post_call, *post_bl;
@@ -537,13 +598,15 @@ void inline_method(ir_node *call, ir_graph *called_graph) {
    ir_node **res_pred;
    ir_node **cf_pred;
    ir_node *ret, *phi;
-  ir_node *cf_op = NULL, *bl;
-  int arity, n_ret, n_exc, n_res, i, j, rem_opt;
+  int arity, n_ret, n_exc, n_res, i, j, rem_opt, irn_arity;
+  int exc_handling;
    type *called_frame;
  
-  if (!get_optimize() || !get_opt_inline()) return;
+  if ( !(get_irg_inline_property(called_graph) == irg_inline_forced) && (!get_opt_optimize() || !get_opt_inline() ||
+      (get_irg_inline_property(called_graph) == irg_inline_forbidden))) return;
+
    /* --  Turn off optimizations, this can cause problems when allocating new nodes. -- */
-  rem_opt = get_optimize();
+  rem_opt = get_opt_optimize();
    set_optimize(0);
  
    /* Handle graph state */
@@ -566,6 +629,24 @@ void inline_method(ir_node *call, ir_graph *called_graph) {
      return;
    }
  
+  /* -- Decide how to handle exception control flow: Is there a handler
+     for the Call node, or do we branch directly to End on an exception?
+     exc_handling: 0 There is a handler.
+                   1 Branches to End.
+                  2 Exception handling not represented in Firm. -- */
+  {
+    ir_node *proj, *Mproj = NULL, *Xproj = NULL;
+    for (proj = (ir_node *)get_irn_link(call); proj; proj = (ir_node *)get_irn_link(proj)) {
+      assert(get_irn_op(proj) == op_Proj);
+      if (get_Proj_proj(proj) == pn_Call_X_except) Xproj = proj;
+      if (get_Proj_proj(proj) == pn_Call_M_except) Mproj = proj;
+    }
+    if      (Mproj) { assert(Xproj); exc_handling = 0; } // Mproj
+    else if (Xproj) {                exc_handling = 1; } //!Mproj &&  Xproj
+    else            {                exc_handling = 2; } //!Mproj && !Xproj
+  }
+
+
    /* --
        the procedure and later replaces the Start node of the called graph.
        Post_call is the old Call node and collects the results of the called
@@ -639,12 +720,15 @@ void inline_method(ir_node *call, ir_graph *called_graph) {
    /* -- Merge the end of the inlined procedure with the call site -- */
    /* We will turn the old Call node into a Tuple with the following
       predecessors:
-     -1:  Block of Tuple.
-     0: Phi of all Memories of Return statements.
-     1: Jmp from new Block that merges the control flow from all exception
-        predecessors of the old end block.
-     2: Tuple of all arguments.
-     3: Phi of Exception memories.
+       -1:  Block of Tuple.
+       0: Phi of all Memories of Return statements.
+       1: Jmp from new Block that merges the control flow from all exception
+         predecessors of the old end block.
+       2: Tuple of all arguments.
+       3: Phi of Exception memories.
+     In case the old Call directly branches to End on an exception we don't
+     need the block merging all exceptions nor the Phi of the exception
+     memories.
    */
  
    /* -- Precompute some values -- */
@@ -654,18 +738,19 @@ void inline_method(ir_node *call, ir_graph *called_graph) {
    n_res = get_method_n_ress(get_Call_type(call));
  
    res_pred = (ir_node **) malloc (n_res * sizeof (ir_node *));
-  cf_pred = (ir_node **) malloc (arity * sizeof (ir_node *));
+  cf_pred =  (ir_node **) malloc (arity * sizeof (ir_node *));
  
    set_irg_current_block(current_ir_graph, post_bl); /* just to make sure */
  
    /* -- archive keepalives -- */
-  for (i = 0; i < get_irn_arity(end); i++)
+  irn_arity = get_irn_arity(end);
+  for (i = 0; i < irn_arity; i++)
      add_End_keepalive(get_irg_end(current_ir_graph), get_irn_n(end, i));
-  /* The new end node will die, but the in array is not on the obstack ... */
-  free_End(end);
  
-/* --
-      Return nodes by Jump nodes. -- */
+  /* The new end node will die.  We need not free as the in array is on the obstack:
+     copy_node only generated 'D' arrays. */
+
+  /* -- Replace Return nodes by Jump nodes. -- */
    n_ret = 0;
    for (i = 0; i < arity; i++) {
      ir_node *ret;
@@ -677,8 +762,8 @@ void inline_method(ir_node *call, ir_graph *called_graph) {
    }
    set_irn_in(post_bl, n_ret, cf_pred);
  
-/* --
-      turned into a tuple.  -- */
+  /* -- Build a Tuple for all results of the method.
+     Add Phi node if there was more than one Return.  -- */
    turn_into_tuple(post_call, 4);
    /* First the Memory-Phi */
    n_ret = 0;
@@ -719,48 +804,83 @@ void inline_method(ir_node *call, ir_graph *called_graph) {
    } else {
      set_Tuple_pred(call, 2, new_Bad());
    }
-  /* Finally the exception control flow.  We need to add a Phi node to
+  /* Finally the exception control flow.
+     We have two (three) possible situations:
+     First if the Call branches to an exception handler: We need to add a Phi node to
       collect the memory containing the exception objects.  Further we need
       to add another block to get a correct representation of this Phi.  To
       this block we add a Jmp that resolves into the X output of the Call
-     when the Call is turned into a tuple. */
-  n_exc = 0;
-  for (i = 0; i < arity; i++) {
-    ir_node *ret;
-    ret = get_irn_n(end_bl, i);
-    if (is_fragile_op(skip_Proj(ret)) || (get_irn_op(skip_Proj(ret)) == op_Raise)) {
-      cf_pred[n_exc] = ret;
-      n_exc++;
-    }
-  }
-  if (n_exc > 0) {
-    new_Block(n_exc, cf_pred);      /* watch it: current_block is changed! */
-    set_Tuple_pred(call, 1, new_Jmp());
-    /* The Phi for the memories with the exception objects */
+     when the Call is turned into a tuple.
+     Second the Call branches to End, the exception is not handled.  Just
+     add all inlined exception branches to the End node.
+     Third: there is no Exception edge at all. Handle as case two. */
+  if (exc_handling == 0) {
      n_exc = 0;
      for (i = 0; i < arity; i++) {
        ir_node *ret;
-      ret = skip_Proj(get_irn_n(end_bl, i));
-      if (get_irn_op(ret) == op_Call) {
-       cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 3);
+      ret = get_irn_n(end_bl, i);
+      if (is_fragile_op(skip_Proj(ret)) || (get_irn_op(skip_Proj(ret)) == op_Raise)) {
+       cf_pred[n_exc] = ret;
         n_exc++;
-      } else if (is_fragile_op(ret)) {
+      }
+    }
+    if (n_exc > 0) {
+      new_Block(n_exc, cf_pred);      /* watch it: current_block is changed! */
+      set_Tuple_pred(call, 1, new_Jmp());
+      /* The Phi for the memories with the exception objects */
+      n_exc = 0;
+      for (i = 0; i < arity; i++) {
+       ir_node *ret;
+       ret = skip_Proj(get_irn_n(end_bl, i));
+       if (get_irn_op(ret) == op_Call) {
+         cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 3);
+         n_exc++;
+       } else if (is_fragile_op(ret)) {
         /* We rely that all cfops have the memory output at the same position. */
-       cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 0);
-       n_exc++;
-      } else if (get_irn_op(ret) == op_Raise) {
-       cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 1);
-       n_exc++;
+         cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 0);
+         n_exc++;
+       } else if (get_irn_op(ret) == op_Raise) {
+         cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_Block(ret), ret, mode_M, 1);
+         n_exc++;
+       }
        }
+      set_Tuple_pred(call, 3, new_Phi(n_exc, cf_pred, mode_M));
+    } else {
+      set_Tuple_pred(call, 1, new_Bad());
+      set_Tuple_pred(call, 3, new_Bad());
      }
-    set_Tuple_pred(call, 3, new_Phi(n_exc, cf_pred, mode_M));
    } else {
+    ir_node *main_end_bl;
+    int main_end_bl_arity;
+    ir_node **end_preds;
+
+    /* assert(exc_handling == 1 || no exceptions. ) */
+    n_exc = 0;
+    for (i = 0; i < arity; i++) {
+      ir_node *ret = get_irn_n(end_bl, i);
+
+      if (is_fragile_op(skip_Proj(ret)) || (get_irn_op(skip_Proj(ret)) == op_Raise)) {
+        cf_pred[n_exc] = ret;
+        n_exc++;
+      }
+    }
+    main_end_bl = get_irg_end_block(current_ir_graph);
+    main_end_bl_arity = get_irn_arity(main_end_bl);
+    end_preds =  (ir_node **) malloc ((n_exc + main_end_bl_arity) * sizeof (ir_node *));
+
+    for (i = 0; i < main_end_bl_arity; ++i)
+      end_preds[i] = get_irn_n(main_end_bl, i);
+    for (i = 0; i < n_exc; ++i)
+      end_preds[main_end_bl_arity + i] = cf_pred[i];
+    set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
      set_Tuple_pred(call, 1, new_Bad());
      set_Tuple_pred(call, 3, new_Bad());
+    free(end_preds);
    }
    free(res_pred);
    free(cf_pred);
  
+#if 0  /* old. now better, correcter, faster implementation. */
    if (n_exc > 0) {
      /* -- If the exception control flow from the inlined Call directly
         branched to the end block we now have the following control
@@ -768,15 +888,19 @@ void inline_method(ir_node *call, ir_graph *called_graph) {
         remove the Jmp along with it's empty block and add Jmp's
         predecessors as predecessors of this end block.  No problem if
         there is no exception, because then branches Bad to End which
-       is fine. -- */
+       is fine. --
+       @@@ can't we know this beforehand: by getting the Proj(1) from
+       the Call link list and checking whether it goes to Proj. */
      /* find the problematic predecessor of the end block. */
      end_bl = get_irg_end_block(current_ir_graph);
      for (i = 0; i < get_Block_n_cfgpreds(end_bl); i++) {
        cf_op = get_Block_cfgpred(end_bl, i);
        if (get_irn_op(cf_op) == op_Proj) {
         cf_op = get_Proj_pred(cf_op);
-       if (get_irn_op(cf_op) == op_Tuple) {
-         cf_op = get_Tuple_pred(cf_op, 1);
+       if ((get_irn_op(cf_op) == op_Tuple) && (cf_op == call)) {
+         // There are unoptimized tuples from inlineing before when no exc
+         assert(get_Proj_proj(get_Block_cfgpred(end_bl, i)) == pn_Call_X_except);
+         cf_op = get_Tuple_pred(cf_op, pn_Call_X_except);
           assert(get_irn_op(cf_op) == op_Jmp);
           break;
         }
@@ -795,8 +919,11 @@ void inline_method(ir_node *call, ir_graph *called_graph) {
         cf_pred[j] = get_Block_cfgpred(end_bl, j-get_Block_n_cfgpreds(bl) +1);
        set_irn_in(end_bl, arity, cf_pred);
        free(cf_pred);
+      // Remove the exception pred from post-call Tuple.
+      set_Tuple_pred(call, pn_Call_X_except, new_Bad());
      }
    }
+#endif
  
    /* --  Turn cse back on. -- */
    set_optimize(rem_opt);
@@ -812,7 +939,29 @@ static int pos;
     I didn't get a version with NEW_ARR_F to run. */
  #define MAX_INLINE 1024
  
+/**
+ * Returns the irg called from a Call node. If the irg is not
+ * known, NULL is returned.
+ */
+static ir_graph *get_call_called_irg(ir_node *call) {
+  ir_node *addr;
+  tarval *tv;
+  ir_graph *called_irg = NULL;
+
+  assert(get_irn_op(call) == op_Call);
+
+  addr = get_Call_ptr(call);
+  if (get_irn_op(addr) == op_Const) {
+    /* Check whether the constant is the pointer to a compiled entity. */
+    tv = get_Const_tarval(addr);
+    if (tarval_to_entity(tv))
+      called_irg = get_entity_irg(tarval_to_entity(tv));
+  }
+  return called_irg;
+}
+
  static void collect_calls(ir_node *call, void *env) {
+
    ir_node **calls = (ir_node **)env;
    ir_node *addr;
    tarval *tv;
@@ -827,30 +976,33 @@ static void collect_calls(ir_node *call, void *env) {
      if (tarval_to_entity(tv)) {
        called_irg = get_entity_irg(tarval_to_entity(tv));
        if (called_irg && pos < MAX_INLINE) {
-       /* The Call node calls a locally defined method.  Remember to inline. */
-       calls[pos] = call;
-       pos++;
+        /* The Call node calls a locally defined method.  Remember to inline. */
+        calls[pos] = call;
+        pos++;
        }
      }
    }
  }
  
-/* Inlines all small methods at call sites where the called address comes
-   from a Const node that references the entity representing the called
-   method.
-   The size argument is a rough measure for the code size of the method:
-   Methods where the obstack containing the firm graph is smaller than
-   size are inlined. */
+/**
+ * Inlines all small methods at call sites where the called address comes
+ * from a Const node that references the entity representing the called
+ * method.
+ * The size argument is a rough measure for the code size of the method:
+ * Methods where the obstack containing the firm graph is smaller than
+ * size are inlined.
+ */
  void inline_small_irgs(ir_graph *irg, int size) {
    int i;
    ir_node *calls[MAX_INLINE];
    ir_graph *rem = current_ir_graph;
  
-  if (!(get_optimize() && get_opt_inline())) return;
+  if (!(get_opt_optimize() && get_opt_inline())) return;
  
    current_ir_graph = irg;
    /* Handle graph state */
    assert(get_irg_phase_state(current_ir_graph) != phase_building);
+  assert(get_irg_callee_info_state(current_ir_graph) == irg_callee_info_none);
  
    /* Find Call nodes to inline.
       (We can not inline during a walk of the graph, as inlineing the same
@@ -868,8 +1020,9 @@ void inline_small_irgs(ir_graph *irg, int size) {
        ir_graph *callee;
        tv = get_Const_tarval(get_Call_ptr(calls[i]));
        callee = get_entity_irg(tarval_to_entity(tv));
-      if ((_obstack_memory_used(callee->obst) - obstack_room(callee->obst)) < size) {
-       inline_method(calls[i], callee);
+      if (((_obstack_memory_used(callee->obst) - obstack_room(callee->obst)) < size) ||
+         (get_irg_inline_property(callee) == irg_inline_forced)) {
+        inline_method(calls[i], callee);
        }
      }
    }
@@ -877,20 +1030,221 @@ void inline_small_irgs(ir_graph *irg, int size) {
    current_ir_graph = rem;
  }
  
+/**
+ * Environment for inlining irgs.
+ */
+typedef struct {
+  int n_nodes;       /**< Nodes in graph except Id, Tuple, Proj, Start, End */
+  int n_nodes_orig;  /**< for statistics */
+  eset *call_nodes;  /**< All call nodes in this graph */
+  int n_call_nodes;
+  int n_call_nodes_orig; /**< for statistics */
+  int n_callers;   /**< Number of known graphs that call this graphs. */
+  int n_callers_orig; /**< for statistics */
+} inline_irg_env;
+
+static inline_irg_env *new_inline_irg_env(void) {
+  inline_irg_env *env = malloc(sizeof(inline_irg_env));
+  env->n_nodes = -2; /* uncount Start, End */
+  env->n_nodes_orig = -2; /* uncount Start, End */
+  env->call_nodes = eset_create();
+  env->n_call_nodes = 0;
+  env->n_call_nodes_orig = 0;
+  env->n_callers = 0;
+  env->n_callers_orig = 0;
+  return env;
+}
  
-/********************************************************************/
-/*  Code Placement.  Pinns all floating nodes to a block where they */
-/*  will be executed only if needed.                                */
-/********************************************************************/
+static void free_inline_irg_env(inline_irg_env *env) {
+  eset_destroy(env->call_nodes);
+  free(env);
+}
+
+static void collect_calls2(ir_node *call, void *env) {
+  inline_irg_env *x = (inline_irg_env *)env;
+  ir_op *op = get_irn_op(call);
+  ir_graph *callee;
+
+  /* count nodes in irg */
+  if (op != op_Proj && op != op_Tuple && op != op_Sync) {
+    x->n_nodes++;
+    x->n_nodes_orig++;
+  }
+
+  if (op != op_Call) return;
+
+  /* collect all call nodes */
+  eset_insert(x->call_nodes, (void *)call);
+  x->n_call_nodes++;
+  x->n_call_nodes_orig++;
+
+  /* count all static callers */
+  callee = get_call_called_irg(call);
+  if (callee) {
+    ((inline_irg_env *)get_irg_link(callee))->n_callers++;
+    ((inline_irg_env *)get_irg_link(callee))->n_callers_orig++;
+  }
+}
+
+INLINE static int is_leave(ir_graph *irg) {
+  return (((inline_irg_env *)get_irg_link(irg))->n_call_nodes == 0);
+}
+
+INLINE static int is_smaller(ir_graph *callee, int size) {
+  return (((inline_irg_env *)get_irg_link(callee))->n_nodes < size);
+}
+
+
+/**
+ * Inlines small leave methods at call sites where the called address comes
+ * from a Const node that references the entity representing the called
+ * method.
+ * The size argument is a rough measure for the code size of the method:
+ * Methods where the obstack containing the firm graph is smaller than
+ * size are inlined.
+ */
+void inline_leave_functions(int maxsize, int leavesize, int size) {
+  inline_irg_env *env;
+  int i, n_irgs = get_irp_n_irgs();
+  ir_graph *rem = current_ir_graph;
+  int did_inline = 1;
+
+  if (!(get_opt_optimize() && get_opt_inline())) return;
  
-static pdeq *worklist;         /* worklist of ir_node*s */
+  /* extend all irgs by a temporary data structure for inlineing. */
+  for (i = 0; i < n_irgs; ++i)
+    set_irg_link(get_irp_irg(i), new_inline_irg_env());
  
-/* Find the earliest correct block for N.  --- Place N into the
-   same Block as its dominance-deepest Input.  */
+  /* Precompute information in temporary data structure. */
+  for (i = 0; i < n_irgs; ++i) {
+    current_ir_graph = get_irp_irg(i);
+    assert(get_irg_phase_state(current_ir_graph) != phase_building);
+    assert(get_irg_callee_info_state(current_ir_graph) == irg_callee_info_none);
+
+    irg_walk(get_irg_end(current_ir_graph), NULL, collect_calls2,
+            get_irg_link(current_ir_graph));
+    env = (inline_irg_env *)get_irg_link(current_ir_graph);
+  }
+
+  /* and now inline.
+     Inline leaves recursively -- we might construct new leaves. */
+  //int itercnt = 1;
+  while (did_inline) {
+    //printf("iteration %d\n", itercnt++);
+    did_inline = 0;
+    for (i = 0; i < n_irgs; ++i) {
+      ir_node *call;
+      eset *walkset;
+      int phiproj_computed = 0;
+
+      current_ir_graph = get_irp_irg(i);
+      env = (inline_irg_env *)get_irg_link(current_ir_graph);
+
+      /* we can not walk and change a set, nor remove from it.
+      So recompute.*/
+      walkset = env->call_nodes;
+      env->call_nodes = eset_create();
+      for (call = eset_first(walkset); call; call = eset_next(walkset)) {
+        inline_irg_env *callee_env;
+        ir_graph *callee = get_call_called_irg(call);
+
+        if (env->n_nodes > maxsize) break;
+        if (callee &&
+           ((is_leave(callee) && is_smaller(callee, leavesize)) ||
+            (get_irg_inline_property(callee) == irg_inline_forced))) {
+          if (!phiproj_computed) {
+            phiproj_computed = 1;
+            collect_phiprojs(current_ir_graph);
+          }
+          callee_env = (inline_irg_env *)get_irg_link(callee);
+//        printf(" %s: Inlineing %s.\n", get_entity_name(get_irg_entity(current_ir_graph)),
+//           get_entity_name(get_irg_entity(callee)));
+          inline_method(call, callee);
+          did_inline = 1;
+          env->n_call_nodes--;
+          eset_insert_all(env->call_nodes, callee_env->call_nodes);
+          env->n_call_nodes += callee_env->n_call_nodes;
+          env->n_nodes += callee_env->n_nodes;
+          callee_env->n_callers--;
+        } else {
+          eset_insert(env->call_nodes, call);
+        }
+      }
+      eset_destroy(walkset);
+    }
+  }
+
+  //printf("Non leaves\n");
+  /* inline other small functions. */
+  for (i = 0; i < n_irgs; ++i) {
+    ir_node *call;
+    eset *walkset;
+    int phiproj_computed = 0;
+
+    current_ir_graph = get_irp_irg(i);
+    env = (inline_irg_env *)get_irg_link(current_ir_graph);
+
+    /* we can not walk and change a set, nor remove from it.
+       So recompute.*/
+    walkset = env->call_nodes;
+    env->call_nodes = eset_create();
+    for (call = eset_first(walkset); call; call = eset_next(walkset)) {
+      inline_irg_env *callee_env;
+      ir_graph *callee = get_call_called_irg(call);
+
+      if (env->n_nodes > maxsize) break;
+      if (callee && is_smaller(callee, size)) {
+        if (!phiproj_computed) {
+               phiproj_computed = 1;
+               collect_phiprojs(current_ir_graph);
+        }
+        callee_env = (inline_irg_env *)get_irg_link(callee);
+//      printf(" %s: Inlineing %s.\n", get_entity_name(get_irg_entity(current_ir_graph)),
+//      get_entity_name(get_irg_entity(callee)));
+        inline_method(call, callee);
+        did_inline = 1;
+        env->n_call_nodes--;
+        eset_insert_all(env->call_nodes, callee_env->call_nodes);
+        env->n_call_nodes += callee_env->n_call_nodes;
+        env->n_nodes += callee_env->n_nodes;
+        callee_env->n_callers--;
+      } else {
+        eset_insert(env->call_nodes, call);
+      }
+    }
+    eset_destroy(walkset);
+  }
+
+  for (i = 0; i < n_irgs; ++i) {
+    current_ir_graph = get_irp_irg(i);
+#if 0
+    env = (inline_irg_env *)get_irg_link(current_ir_graph);
+    if ((env->n_call_nodes_orig != env->n_call_nodes) ||
+       (env->n_callers_orig != env->n_callers))
+      printf("Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
+            env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
+            env->n_callers_orig, env->n_callers,
+            get_entity_name(get_irg_entity(current_ir_graph)));
+#endif
+    free_inline_irg_env((inline_irg_env *)get_irg_link(current_ir_graph));
+  }
+
+  current_ir_graph = rem;
+}
+
+/*-----------------------------------------------------------------*/
+/*  Code Placement.  Pins all floating nodes to a block where they */
+/*  will be executed only if needed.                               */
+/*-----------------------------------------------------------------*/
+
+/**
+ * Find the earliest correct block for N.  --- Place N into the
+ * same Block as its dominance-deepest Input.
+ */
  static void
-place_floats_early (ir_node *n)
+place_floats_early(ir_node *n, pdeq *worklist)
  {
-  int i, start;
+  int i, start, irn_arity;
  
    /* we must not run into an infinite loop */
    assert (irn_not_visited(n));
@@ -905,19 +1259,21 @@ place_floats_early (ir_node *n)
  
      if ((get_irn_op(n) == op_Const) ||
         (get_irn_op(n) == op_SymConst) ||
-       (is_Bad(n))) {
+       (is_Bad(n)) ||
+       (get_irn_op(n) == op_Unknown)) {
        /* These nodes will not be placed by the loop below. */
        b = get_irg_start_block(current_ir_graph);
        depth = 1;
      }
  
      /* find the block for this node. */
-    for (i = 0; i < get_irn_arity(n); i++) {
+    irn_arity = get_irn_arity(n);
+    for (i = 0; i < irn_arity; i++) {
        ir_node *dep = get_irn_n(n, i);
        ir_node *dep_block;
        if ((irn_not_visited(dep)) &&
           (get_op_pinned(get_irn_op(dep)) == floats)) {
-       place_floats_early (dep);
+       place_floats_early(dep, worklist);
        }
        /* Because all loops contain at least one pinned node, now all
           our inputs are either pinned or place_early has already
@@ -940,7 +1296,8 @@ place_floats_early (ir_node *n)
  
    /* Add predecessors of non floating nodes on worklist. */
    start = (get_irn_op(n) == op_Block) ? 0 : -1;
-  for (i = start; i < get_irn_arity(n); i++) {
+  irn_arity = get_irn_arity(n);
+  for (i = start; i < irn_arity; i++) {
      ir_node *pred = get_irn_n(n, i);
      if (irn_not_visited(pred)) {
        pdeq_putr (worklist, pred);
@@ -948,21 +1305,23 @@ place_floats_early (ir_node *n)
    }
  }
  
-/* Floating nodes form subgraphs that begin at nodes as Const, Load,
-   Start, Call and end at pinned nodes as Store, Call.  Place_early
-   places all floating nodes reachable from its argument through floating
-   nodes and adds all beginnings at pinned nodes to the worklist. */
-static INLINE void place_early (void) {
+/**
+ * Floating nodes form subgraphs that begin at nodes as Const, Load,
+ * Start, Call and end at pinned nodes as Store, Call.  Place_early
+ * places all floating nodes reachable from its argument through floating
+ * nodes and adds all beginnings at pinned nodes to the worklist.
+ */
+static INLINE void place_early(pdeq* worklist) {
    assert(worklist);
    inc_irg_visited(current_ir_graph);
  
    /* this inits the worklist */
-  place_floats_early (get_irg_end(current_ir_graph));
+  place_floats_early(get_irg_end(current_ir_graph), worklist);
  
    /* Work the content of the worklist. */
    while (!pdeq_empty (worklist)) {
      ir_node *n = pdeq_getl (worklist);
-    if (irn_not_visited(n)) place_floats_early (n);
+    if (irn_not_visited(n)) place_floats_early(n, worklist);
    }
  
    set_irg_outs_inconsistent(current_ir_graph);
@@ -970,7 +1329,7 @@ static INLINE void place_early (void) {
  }
  
  
-/* deepest common dominance ancestor of DCA and CONSUMER of PRODUCER */
+/** deepest common dominance ancestor of DCA and CONSUMER of PRODUCER. */
  static ir_node *
  consumer_dom_dca (ir_node *dca, ir_node *consumer, ir_node *producer)
  {
@@ -979,11 +1338,12 @@ consumer_dom_dca (ir_node *dca, ir_node *consumer, ir_node *producer)
    /* Compute the latest block into which we can place a node so that it is
       before consumer. */
    if (get_irn_op(consumer) == op_Phi) {
-    /* our comsumer is a Phi-node, the effective use is in all those
+    /* our consumer is a Phi-node, the effective use is in all those
         blocks through which the Phi-node reaches producer */
-    int i;
+    int i, irn_arity;
      ir_node *phi_block = get_nodes_Block(consumer);
-    for (i = 0;  i < get_irn_arity(consumer); i++) {
+    irn_arity = get_irn_arity(consumer);
+    for (i = 0;  i < irn_arity; i++) {
        if (get_irn_n(consumer, i) == producer) {
         block = get_nodes_Block(get_Block_cfgpred(phi_block, i));
        }
@@ -1010,8 +1370,10 @@ static INLINE int get_irn_loop_depth(ir_node *n) {
    return get_loop_depth(get_irn_loop(n));
  }
  
-/* Move n to a block with less loop depth than it's current block. The
-   new block must be dominated by early. */
+/**
+ * Move n to a block with less loop depth than it's current block. The
+ * new block must be dominated by early.
+ */
  static void
  move_out_of_loops (ir_node *n, ir_node *early)
  {
@@ -1042,14 +1404,16 @@ move_out_of_loops (ir_node *n, ir_node *early)
    }
  }
  
-/* Find the latest legal block for N and place N into the
-   `optimal' Block between the latest and earliest legal block.
-   The `optimal' block is the dominance-deepest block of those
-   with the least loop-nesting-depth.  This places N out of as many
-   loops as possible and then makes it as controldependant as
-   possible. */
+/**
+ * Find the latest legal block for N and place N into the
+ * `optimal' Block between the latest and earliest legal block.
+ * The `optimal' block is the dominance-deepest block of those
+ * with the least loop-nesting-depth.  This places N out of as many
+ * loops as possible and then makes it as control dependant as
+ * possible.
+ */
  static void
-place_floats_late (ir_node *n)
+place_floats_late(ir_node *n, pdeq *worklist)
  {
    int i;
    ir_node *early;
@@ -1060,13 +1424,13 @@ place_floats_late (ir_node *n)
    if ((get_irn_op(n) != op_Block) &&
        (!is_cfop(n)) &&
        (get_irn_mode(n) != mode_X)) {
-    /* Remember the early palacement of this block to move it
+    /* Remember the early placement of this block to move it
         out of loop no further than the early placement. */
      early = get_nodes_Block(n);
      /* Assure that our users are all placed, except the Phi-nodes.
-       --- Each dataflow cycle contains at least one Phi-node.  We
+       --- Each data flow cycle contains at least one Phi-node.  We
         have to break the `user has to be placed before the
-       producer' dependance cycle and the Phi-nodes are the
+       producer' dependence cycle and the Phi-nodes are the
         place to do so, because we need to base our placement on the
         final region of our users, which is OK with Phi-nodes, as they
         are pinned, and they never have to be placed after a
@@ -1074,7 +1438,7 @@ place_floats_late (ir_node *n)
      for (i = 0; i < get_irn_n_outs(n); i++) {
        ir_node *succ = get_irn_out(n, i);
        if (irn_not_visited(succ) && (get_irn_op(succ) != op_Phi))
-       place_floats_late (succ);
+       place_floats_late(succ, worklist);
      }
  
      /* We have to determine the final block of this node... except for
@@ -1106,45 +1470,51 @@ place_floats_late (ir_node *n)
    }
  }
  
-static INLINE void place_late(void) {
+static INLINE void place_late(pdeq* worklist) {
    assert(worklist);
    inc_irg_visited(current_ir_graph);
  
    /* This fills the worklist initially. */
-  place_floats_late(get_irg_start_block(current_ir_graph));
+  place_floats_late(get_irg_start_block(current_ir_graph), worklist);
    /* And now empty the worklist again... */
    while (!pdeq_empty (worklist)) {
      ir_node *n = pdeq_getl (worklist);
-    if (irn_not_visited(n)) place_floats_late(n);
+    if (irn_not_visited(n)) place_floats_late(n, worklist);
    }
  }
  
  void place_code(ir_graph *irg) {
+  pdeq* worklist;
    ir_graph *rem = current_ir_graph;
+
    current_ir_graph = irg;
  
-  if (!(get_optimize() && get_opt_global_cse())) return;
+  if (!(get_opt_optimize() && get_opt_global_cse())) return;
  
    /* Handle graph state */
    assert(get_irg_phase_state(irg) != phase_building);
    if (get_irg_dom_state(irg) != dom_consistent)
      compute_doms(irg);
  
-  construct_backedges(irg);
+  if (get_irg_loopinfo_state(irg) != loopinfo_consistent) {
+    free_loop_information(irg);
+    construct_backedges(irg);
+  }
  
    /* Place all floating nodes as early as possible. This guarantees
       a legal code placement. */
-  worklist = new_pdeq ();
-  place_early();
+  worklist = new_pdeq();
+  place_early(worklist);
  
    /* place_early invalidates the outs, place_late needs them. */
    compute_outs(irg);
    /* Now move the nodes down in the dominator tree. This reduces the
       unnecessary executions of the node. */
-  place_late();
+  place_late(worklist);
  
    set_irg_outs_inconsistent(current_ir_graph);
-  del_pdeq (worklist);
+  set_irg_loopinfo_inconsistent(current_ir_graph);
+  del_pdeq(worklist);
    current_ir_graph = rem;
  }
  
@@ -1158,9 +1528,11 @@ void place_code(ir_graph *irg) {
  /* semantics of Phi nodes.                                          */
  /********************************************************************/
  
-/* Removes Tuples from Block control flow predecessors.
-   Optimizes blocks with equivalent_node().
-   Replaces n by Bad if n is unreachable control flow. */
+/**
+ * Removes Tuples from Block control flow predecessors.
+ * Optimizes blocks with equivalent_node().
+ * Replaces n by Bad if n is unreachable control flow.
+ */
  static void merge_blocks(ir_node *n, void *env) {
    int i;
    set_irn_link(n, NULL);
@@ -1168,31 +1540,35 @@ static void merge_blocks(ir_node *n, void *env) {
    if (get_irn_op(n) == op_Block) {
      /* Remove Tuples */
      for (i = 0; i < get_Block_n_cfgpreds(n); i++)
-      /* GL @@@ : is this possible? if (get_opt_normalize()) -- added, all tests go throug.
+      /* GL @@@ : is this possible? if (get_opt_normalize()) -- added, all tests go through.
          A different order of optimizations might cause problems. */
        if (get_opt_normalize())
         set_Block_cfgpred(n, i, skip_Tuple(get_Block_cfgpred(n, i)));
-  } else if (get_optimize() && (get_irn_mode(n) == mode_X)) {
+  } else if (get_opt_optimize() && (get_irn_mode(n) == mode_X)) {
      /* We will soon visit a block.  Optimize it before visiting! */
      ir_node *b = get_nodes_Block(n);
-    ir_node *new = equivalent_node(b);
-    while (irn_not_visited(b) && (!is_Bad(new)) && (new != b)) {
+    ir_node *new_node = equivalent_node(b);
+    while (irn_not_visited(b) && (!is_Bad(new_node)) && (new_node != b)) {
        /* We would have to run gigo if new is bad, so we
          promote it directly below. */
-      assert(((b == new) || get_opt_control_flow_straightening() || get_opt_control_flow_weak_simplification()) &&
+      assert(((b == new_node) ||
+             get_opt_control_flow_straightening() ||
+             get_opt_control_flow_weak_simplification()) &&
              ("strange flag setting"));
-      exchange (b, new);
-      b = new;
-      new = equivalent_node(b);
+      exchange (b, new_node);
+      b = new_node;
+      new_node = equivalent_node(b);
      }
      /* GL @@@ get_opt_normalize hinzugefuegt, 5.5.2003 */
-    if (is_Bad(new) && get_opt_normalize()) exchange (n, new_Bad());
+    if (is_Bad(new_node) && get_opt_normalize()) exchange(n, new_Bad());
    }
  }
  
-/* Collects all Phi nodes in link list of Block.
-   Marks all blocks "block_visited" if they contain a node other
-   than Jmp. */
+/**
+ * Collects all Phi nodes in link list of Block.
+ * Marks all blocks "block_visited" if they contain a node other
+ * than Jmp.
+ */
  static void collect_nodes(ir_node *n, void *env) {
    if (is_no_Block(n)) {
      ir_node *b = get_nodes_Block(n);
@@ -1207,7 +1583,7 @@ static void collect_nodes(ir_node *n, void *env) {
    }
  }
  
-/* Returns true if pred is pred of block */
+/** Returns true if pred is predecessor of block. */
  static int is_pred_of(ir_node *pred, ir_node *b) {
    int i;
    for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
@@ -1225,7 +1601,7 @@ static int test_whether_dispensable(ir_node *b, int pos) {
  
    if (get_Block_block_visited(pred) + 1
        < get_irg_block_visited(current_ir_graph)) {
-    if (!get_optimize() || !get_opt_control_flow_strong_simplification()) {
+    if (!get_opt_optimize() || !get_opt_control_flow_strong_simplification()) {
        /* Mark block so that is will not be removed. */
        set_Block_block_visited(pred, get_irg_block_visited(current_ir_graph)-1);
        return 1;
@@ -1278,7 +1654,7 @@ static void optimize_blocks(ir_node *b, void *env) {
    }
    in = (ir_node **) malloc(max_preds * sizeof(ir_node *));
  
-/**
+/*-
    printf(" working on "); DDMN(b);
    for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
      pred = get_nodes_Block(get_Block_cfgpred(b, i));
@@ -1289,9 +1665,9 @@ static void optimize_blocks(ir_node *b, void *env) {
        printf("  removing pred %i ", i); DDMN(pred);
      } else { printf("  Nothing to do for "); DDMN(pred); }
    }
-  * end Debug output **/
+  * end Debug output -*/
  
-  /** Fix the Phi nodes **/
+  /*- Fix the Phi nodes -*/
    phi = get_irn_link(b);
    while (phi) {
      assert(get_irn_op(phi) == op_Phi);
@@ -1337,8 +1713,8 @@ static void optimize_blocks(ir_node *b, void *env) {
      phi = get_irn_link(phi);
    }
  
-/**
-      This happens only if merge between loop backedge and single loop entry. **/
+/*-
+      This happens only if merge between loop backedge and single loop entry. -*/
    for (k = 0; k < get_Block_n_cfgpreds(b); k++) {
      pred = get_nodes_Block(get_Block_cfgpred(b, k));
      if (get_Block_block_visited(pred) +1
@@ -1395,7 +1771,7 @@ static void optimize_blocks(ir_node *b, void *env) {
      }
    }
  
-  /** Fix the block **/
+  /*- Fix the block -*/
    n_preds = 0;
    for (i = 0; i < get_Block_n_cfgpreds(b); i++) {
      pred = get_nodes_Block(get_Block_cfgpred(b, i));
@@ -1473,10 +1849,10 @@ void optimize_cf(ir_graph *irg) {
   * Called by walker of remove_critical_cf_edges.
   *
   * Place an empty block to an edge between a blocks of multiple
- * predecessors and a block of multiple sucessors.
+ * predecessors and a block of multiple successors.
   *
   * @param n IR node
- * @param env Envirnment of walker. This field is unused and has
+ * @param env Environment of walker. This field is unused and has
   *            the value NULL.
   */
  static void walk_critical_cf_edges(ir_node *n, void *env) {
@@ -1488,10 +1864,15 @@ static void walk_critical_cf_edges(ir_node *n, void *env) {
        (get_irn_arity(n) > 1)) {
      arity = get_irn_arity(n);
  
+    if (n == get_irg_end_block(current_ir_graph))
+      return;  // No use to add a block here.
+
      for (i=0; i<arity; i++) {
        pre = get_irn_n(n, i);
-      /* Predecessor has multiple sucessors. Insert new flow edge */
-      if ((NULL != pre) && (op_Proj == get_irn_op(pre))) {
+      /* Predecessor has multiple successors. Insert new flow edge */
+      if ((NULL != pre) &&
+         (op_Proj == get_irn_op(pre)) &&
+         op_Raise != get_irn_op(skip_Proj(pre))) {
  
         /* set predecessor array for new block */
         in = NEW_ARR_D (ir_node *, current_ir_graph->obst, 1);
@@ -1502,10 +1883,10 @@ static void walk_critical_cf_edges(ir_node *n, void *env) {
         switch_block(block);
         jmp = new_Jmp();
         switch_block(n);
-       /* set sucessor of new block */
+       /* set successor of new block */
         set_irn_n(n, i, jmp);
  
-      } /* predecessor has multiple sucessors */
+      } /* predecessor has multiple successors */
      } /* for all predecessors */
    } /* n is a block */
  }