Fixed and simplified rot matcher

[libfirm] / ir / opt / opt_inline.c
diff --git a/ir/opt/opt_inline.c b/ir/opt/opt_inline.c

index c57eccf..ab7492f 100644 (file)
--- a/ir/opt/opt_inline.c
+++ b/ir/opt/opt_inline.c
@@ -50,6 +50,7 @@
  #include "irouts.h"
  #include "irloop_t.h"
  #include "irbackedge_t.h"
+#include "opt_inline_t.h"
  #include "cgana.h"
  #include "trouts.h"
  #include "error.h"
@@ -60,6 +61,7 @@
  #include "irhooks.h"
  #include "irtools.h"
  
+DEBUG_ONLY(static firm_dbg_module_t *dbg;)
  
  /*------------------------------------------------------------------*/
  /* Routines for dead node elimination / copying garbage collection  */
@@ -751,6 +753,22 @@ static void find_addr(ir_node *node, void *env) {
                         is_Start(get_Proj_pred(node)) &&
                         get_Proj_proj(node) == pn_Start_P_value_arg_base) {
                 *allow_inline = 0;
+       } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
+               /* From GCC:
+                * Refuse to inline alloca call unless user explicitly forced so as this
+                * may change program's memory overhead drastically when the function
+                * using alloca is called in loop.  In GCC present in SPEC2000 inlining
+                * into schedule_block cause it to require 2GB of ram instead of 256MB.
+                *
+                * Sorryly this is true with our implementation also.
+                * Moreover, we cannot differentiate between alloca() and VLA yet, so this
+                * disables inlining of functions using VLA (with are completely save).
+                *
+                * 2 Solutions:
+                * - add a flag to the Alloc node for "real" alloca() calls
+                * - add a new Stack-Restore node at the end of a function using alloca()
+                */
+               *allow_inline = 0;
         }
  }
  
@@ -800,36 +818,66 @@ enum exc_mode {
  
  /* Inlines a method at the given call site. */
  int inline_method(ir_node *call, ir_graph *called_graph) {
-       ir_node *pre_call;
-       ir_node *post_call, *post_bl;
-       ir_node *in[pn_Start_max];
-       ir_node *end, *end_bl;
-       ir_node **res_pred;
-       ir_node **cf_pred;
-       ir_node *ret, *phi;
-       int arity, n_ret, n_exc, n_res, i, n, j, rem_opt, irn_arity;
-       enum exc_mode exc_handling;
-       ir_type *called_frame, *curr_frame;
+       ir_node             *pre_call;
+       ir_node             *post_call, *post_bl;
+       ir_node             *in[pn_Start_max];
+       ir_node             *end, *end_bl, *block;
+       ir_node             **res_pred;
+       ir_node             **cf_pred;
+       ir_node             **args_in;
+       ir_node             *ret, *phi;
+       int                 arity, n_ret, n_exc, n_res, i, n, j, rem_opt, irn_arity, n_params;
+       enum exc_mode       exc_handling;
+       ir_type             *called_frame, *curr_frame, *mtp, *ctp;
+       ir_entity           *ent;
+       ir_graph            *rem, *irg;
         irg_inline_property prop = get_irg_inline_property(called_graph);
-       ir_entity *ent;
  
         if (prop == irg_inline_forbidden)
                 return 0;
  
         ent = get_irg_entity(called_graph);
  
-       /* Do not inline variadic functions. */
-       if (get_method_variadicity(get_entity_type(ent)) == variadicity_variadic)
+       mtp = get_entity_type(ent);
+       ctp = get_Call_type(call);
+       if (get_method_n_params(mtp) > get_method_n_params(ctp)) {
+               /* this is a bad feature of C: without a prototype, we can can call a function with less
+               parameters than needed. Currently we don't support this, although it would be
+               to use Unknown than. */
                 return 0;
+       }
+
+       /* Argh, compiling C has some bad consequences:
+          the call type AND the method type might be different.
+          It is implementation defendant what happens in that case.
+          We support inlining, if the bitsize of the types matches AND
+          the same arithmetic is used. */
+       n_params = get_method_n_params(mtp);
+       for (i = n_params - 1; i >= 0; --i) {
+               ir_type *param_tp = get_method_param_type(mtp, i);
+               ir_type *arg_tp   = get_method_param_type(ctp, i);
+
+               if (param_tp != arg_tp) {
+                       ir_mode *pmode = get_type_mode(param_tp);
+                       ir_mode *amode = get_type_mode(arg_tp);
+
+                       if (pmode == NULL || amode == NULL)
+                               return 0;
+                       if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
+                               return 0;
+                       if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
+                               return 0;
+                       /* otherwise we can simply "reinterpret" the bits */
+               }
+       }
  
-       assert(get_method_n_params(get_entity_type(ent)) ==
-              get_method_n_params(get_Call_type(call)));
+       irg = get_irn_irg(call);
  
         /*
          * We cannot inline a recursive call. The graph must be copied before
          * the call the inline_method() using create_irg_copy().
          */
-       if (called_graph == current_ir_graph)
+       if (called_graph == irg)
                 return 0;
  
         /*
@@ -840,19 +888,24 @@ int inline_method(ir_node *call, ir_graph *called_graph) {
         if (! can_inline(call, called_graph))
                 return 0;
  
+       rem = current_ir_graph;
+       current_ir_graph = irg;
+
+       DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
+
         /* --  Turn off optimizations, this can cause problems when allocating new nodes. -- */
         rem_opt = get_opt_optimize();
         set_optimize(0);
  
         /* Handle graph state */
-       assert(get_irg_phase_state(current_ir_graph) != phase_building);
-       assert(get_irg_pinned(current_ir_graph) == op_pin_state_pinned);
+       assert(get_irg_phase_state(irg) != phase_building);
+       assert(get_irg_pinned(irg) == op_pin_state_pinned);
         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
-       set_irg_outs_inconsistent(current_ir_graph);
-       set_irg_extblk_inconsistent(current_ir_graph);
-       set_irg_doms_inconsistent(current_ir_graph);
-       set_irg_loopinfo_inconsistent(current_ir_graph);
-       set_irg_callee_info_state(current_ir_graph, irg_callee_info_inconsistent);
+       set_irg_outs_inconsistent(irg);
+       set_irg_extblk_inconsistent(irg);
+       set_irg_doms_inconsistent(irg);
+       set_irg_loopinfo_inconsistent(irg);
+       set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
  
         /* -- Check preconditions -- */
         assert(is_Call(call));
@@ -878,19 +931,33 @@ int inline_method(ir_node *call, ir_graph *called_graph) {
                 else            {                exc_handling = exc_no_handler; } /* !Mproj && !Xproj   */
         }
  
+       /* create the argument tuple */
+       NEW_ARR_A(ir_type *, args_in, n_params);
+
+       block = get_nodes_block(call);
+       for (i = n_params - 1; i >= 0; --i) {
+               ir_node *arg      = get_Call_param(call, i);
+               ir_type *param_tp = get_method_param_type(mtp, i);
+               ir_mode *mode     = get_type_mode(param_tp);
+
+               if (mode != get_irn_mode(arg)) {
+                       arg = new_r_Conv(irg, block, arg, mode);
+               }
+               args_in[i] = arg;
+       }
+
         /* --
            the procedure and later replaces the Start node of the called graph.
            Post_call is the old Call node and collects the results of the called
            graph. Both will end up being a tuple.  -- */
         post_bl = get_nodes_block(call);
-       set_irg_current_block(current_ir_graph, post_bl);
+       set_irg_current_block(irg, post_bl);
         /* XxMxPxPxPxT of Start + parameter of Call */
         in[pn_Start_X_initial_exec]   = new_Jmp();
         in[pn_Start_M]                = get_Call_mem(call);
-       in[pn_Start_P_frame_base]     = get_irg_frame(current_ir_graph);
-       in[pn_Start_P_globals]        = get_irg_globals(current_ir_graph);
-       in[pn_Start_P_tls]            = get_irg_tls(current_ir_graph);
-       in[pn_Start_T_args]           = new_Tuple(get_Call_n_params(call), get_Call_param_arr(call));
+       in[pn_Start_P_frame_base]     = get_irg_frame(irg);
+       in[pn_Start_P_tls]            = get_irg_tls(irg);
+       in[pn_Start_T_args]           = new_Tuple(n_params, args_in);
         /* in[pn_Start_P_value_arg_base] = ??? */
         assert(pn_Start_P_value_arg_base == pn_Start_max - 1 && "pn_Start_P_value_arg_base not supported, fix");
         pre_call = new_Tuple(pn_Start_max - 1, in);
@@ -904,29 +971,29 @@ int inline_method(ir_node *call, ir_graph *called_graph) {
         /* -- Prepare state for dead node elimination -- */
         /* Visited flags in calling irg must be >= flag in called irg.
            Else walker and arity computation will not work. */
-       if (get_irg_visited(current_ir_graph) <= get_irg_visited(called_graph))
-               set_irg_visited(current_ir_graph, get_irg_visited(called_graph)+1);
-       if (get_irg_block_visited(current_ir_graph)< get_irg_block_visited(called_graph))
-               set_irg_block_visited(current_ir_graph, get_irg_block_visited(called_graph));
+       if (get_irg_visited(irg) <= get_irg_visited(called_graph))
+               set_irg_visited(irg, get_irg_visited(called_graph) + 1);
+       if (get_irg_block_visited(irg) < get_irg_block_visited(called_graph))
+               set_irg_block_visited(irg, get_irg_block_visited(called_graph));
         /* Set pre_call as new Start node in link field of the start node of
            calling graph and pre_calls block as new block for the start block
            of calling graph.
            Further mark these nodes so that they are not visited by the
            copying. */
         set_irn_link(get_irg_start(called_graph), pre_call);
-       set_irn_visited(get_irg_start(called_graph), get_irg_visited(current_ir_graph));
+       set_irn_visited(get_irg_start(called_graph), get_irg_visited(irg));
         set_irn_link(get_irg_start_block(called_graph), get_nodes_block(pre_call));
-       set_irn_visited(get_irg_start_block(called_graph), get_irg_visited(current_ir_graph));
-       set_irn_link(get_irg_bad(called_graph), get_irg_bad(current_ir_graph));
-       set_irn_visited(get_irg_bad(called_graph), get_irg_visited(current_ir_graph));
+       set_irn_visited(get_irg_start_block(called_graph), get_irg_visited(irg));
+       set_irn_link(get_irg_bad(called_graph), get_irg_bad(irg));
+       set_irn_visited(get_irg_bad(called_graph), get_irg_visited(irg));
  
         /* Initialize for compaction of in arrays */
-       inc_irg_block_visited(current_ir_graph);
+       inc_irg_block_visited(irg);
  
         /* -- Replicate local entities of the called_graph -- */
         /* copy the entities. */
         called_frame = get_irg_frame_type(called_graph);
-       curr_frame   = get_irg_frame_type(current_ir_graph);
+       curr_frame   = get_irg_frame_type(irg);
         for (i = 0, n = get_class_n_members(called_frame); i < n; ++i) {
                 ir_entity *new_ent, *old_ent;
                 old_ent = get_class_member(called_frame, i);
@@ -937,7 +1004,7 @@ int inline_method(ir_node *call, ir_graph *called_graph) {
         /* visited is > than that of called graph.  With this trick visited will
            remain unchanged so that an outer walker, e.g., searching the call nodes
             to inline, calling this inline will not visit the inlined nodes. */
-       set_irg_visited(current_ir_graph, get_irg_visited(current_ir_graph)-1);
+       set_irg_visited(irg, get_irg_visited(irg)-1);
  
         /* -- Performing dead node elimination inlines the graph -- */
         /* Copies the nodes to the obstack of current_ir_graph. Updates links to new
@@ -946,8 +1013,8 @@ int inline_method(ir_node *call, ir_graph *called_graph) {
                  get_irg_frame_type(called_graph));
  
         /* Repair called_graph */
-       set_irg_visited(called_graph, get_irg_visited(current_ir_graph));
-       set_irg_block_visited(called_graph, get_irg_block_visited(current_ir_graph));
+       set_irg_visited(called_graph, get_irg_visited(irg));
+       set_irg_block_visited(called_graph, get_irg_block_visited(irg));
         set_Block_block_visited(get_irg_start_block(called_graph), 0);
  
         /* -- Merge the end of the inlined procedure with the call site -- */
@@ -973,14 +1040,14 @@ int inline_method(ir_node *call, ir_graph *called_graph) {
         res_pred = xmalloc(n_res * sizeof(*res_pred));
         cf_pred  = xmalloc(arity * sizeof(*res_pred));
  
-       set_irg_current_block(current_ir_graph, post_bl); /* just to make sure */
+       set_irg_current_block(irg, post_bl); /* just to make sure */
  
         /* -- archive keepalives -- */
         irn_arity = get_irn_arity(end);
         for (i = 0; i < irn_arity; i++) {
                 ir_node *ka = get_End_keepalive(end, i);
                 if (! is_Bad(ka))
-                       add_End_keepalive(get_irg_end(current_ir_graph), ka);
+                       add_End_keepalive(get_irg_end(irg), ka);
         }
  
         /* The new end node will die.  We need not free as the in array is on the obstack:
@@ -992,7 +1059,7 @@ int inline_method(ir_node *call, ir_graph *called_graph) {
                 ir_node *ret;
                 ret = get_irn_n(end_bl, i);
                 if (is_Return(ret)) {
-                       cf_pred[n_ret] = new_r_Jmp(current_ir_graph, get_nodes_block(ret));
+                       cf_pred[n_ret] = new_r_Jmp(irg, get_nodes_block(ret));
                         n_ret++;
                 }
         }
@@ -1079,14 +1146,14 @@ int inline_method(ir_node *call, ir_graph *called_graph) {
                                 ir_node *ret;
                                 ret = skip_Proj(get_irn_n(end_bl, i));
                                 if (is_Call(ret)) {
-                                       cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_block(ret), ret, mode_M, 3);
+                                       cf_pred[n_exc] = new_r_Proj(irg, get_nodes_block(ret), ret, mode_M, 3);
                                         n_exc++;
                                 } else if (is_fragile_op(ret)) {
                                         /* We rely that all cfops have the memory output at the same position. */
-                                       cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_block(ret), ret, mode_M, 0);
+                                       cf_pred[n_exc] = new_r_Proj(irg, get_nodes_block(ret), ret, mode_M, 0);
                                         n_exc++;
                                 } else if (is_Raise(ret)) {
-                                       cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_block(ret), ret, mode_M, 1);
+                                       cf_pred[n_exc] = new_r_Proj(irg, get_nodes_block(ret), ret, mode_M, 1);
                                         n_exc++;
                                 }
                         }
@@ -1111,7 +1178,7 @@ int inline_method(ir_node *call, ir_graph *called_graph) {
                                 n_exc++;
                         }
                 }
-               main_end_bl = get_irg_end_block(current_ir_graph);
+               main_end_bl = get_irg_end_block(irg);
                 main_end_bl_arity = get_irn_arity(main_end_bl);
                 end_preds =  xmalloc((n_exc + main_end_bl_arity) * sizeof(*end_preds));
  
@@ -1129,6 +1196,7 @@ int inline_method(ir_node *call, ir_graph *called_graph) {
  
         /* --  Turn CSE back on. -- */
         set_optimize(rem_opt);
+       current_ir_graph = rem;
  
         return 1;
  }
@@ -1142,10 +1210,10 @@ static struct obstack  temp_obst;
  /** Represents a possible inlinable call in a graph. */
  typedef struct _call_entry call_entry;
  struct _call_entry {
-       ir_node    *call;   /**< the Call */
-       ir_graph   *callee; /**< the callee called here */
-       call_entry *next;   /**< for linking the next one */
-       unsigned   weight;  /**< the weight of the call */
+       ir_node    *call;      /**< the Call node */
+       ir_graph   *callee;    /**< the callee IR-graph called here */
+       call_entry *next;      /**< for linking the next one */
+       int        loop_depth; /**< the loop depth of this call */
  };
  
  /**
@@ -1186,10 +1254,10 @@ static void collect_calls(ir_node *call, void *env) {
                         /* The Call node calls a locally defined method.  Remember to inline. */
                         inline_env_t *ienv  = env;
                         call_entry   *entry = obstack_alloc(&ienv->obst, sizeof(*entry));
-                       entry->call   = call;
-                       entry->callee = called_irg;
-                       entry->next   = NULL;
-                       entry->weight = 0;
+                       entry->call       = call;
+                       entry->callee     = called_irg;
+                       entry->next       = NULL;
+                       entry->loop_depth = 0;
  
                         if (ienv->tail == NULL)
                                 ienv->head = entry;
@@ -1209,12 +1277,9 @@ static void collect_calls(ir_node *call, void *env) {
   * size are inlined.
   */
  void inline_small_irgs(ir_graph *irg, int size) {
-  ir_graph *rem = current_ir_graph;
+       ir_graph *rem = current_ir_graph;
         inline_env_t env;
         call_entry *entry;
-       DEBUG_ONLY(firm_dbg_module_t *dbg;)
-
-       FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
  
         current_ir_graph = irg;
         /* Handle graph state */
@@ -1252,14 +1317,15 @@ typedef struct {
         int n_nodes;             /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
         int n_blocks;            /**< Number of Blocks in graph without Start and End block. */
         int n_nodes_orig;        /**< for statistics */
-       call_entry *call_head;   /**< The head of the list of all call nodes in this graph. */
-       call_entry *call_tail;   /**< The tail of the list of all call nodes in this graph .*/
         int n_call_nodes;        /**< Number of Call nodes in the graph. */
         int n_call_nodes_orig;   /**< for statistics */
         int n_callers;           /**< Number of known graphs that call this graphs. */
         int n_callers_orig;      /**< for statistics */
         unsigned got_inline:1;   /**< Set, if at least one call inside this graph was inlined. */
         unsigned local_vars:1;   /**< Set, if a inlined function gets the address of an inlined variable. */
+       unsigned recursive:1;    /**< Set, if this function is self recursive. */
+       call_entry *call_head;   /**< The head of the list of all call nodes in this graph. */
+       call_entry *call_tail;   /**< The tail of the list of all call nodes in this graph .*/
         unsigned *local_weights; /**< Once allocated, the beneficial weight for transmitting local addresses. */
  } inline_irg_env;
  
@@ -1279,14 +1345,16 @@ static inline_irg_env *alloc_inline_irg_env(void) {
         env->n_callers_orig    = 0;
         env->got_inline        = 0;
         env->local_vars        = 0;
+       env->recursive         = 0;
         env->local_weights     = NULL;
         return env;
  }
  
  typedef struct walker_env {
-       inline_irg_env *x;    /**< the inline environment */
-       char ignore_runtime;  /**< the ignore runtime flag */
-       char ignore_callers;  /**< if set, do change callers data */
+       inline_irg_env *x;     /**< the inline environment */
+       call_entry *last_call; /**< points to the last inserted call */
+       char ignore_runtime;   /**< the ignore runtime flag */
+       char ignore_callers;   /**< if set, do change callers data */
  } wenv_t;
  
  /**
@@ -1336,17 +1404,44 @@ static void collect_calls2(ir_node *call, void *ctx) {
                         ++callee_env->n_callers;
                         ++callee_env->n_callers_orig;
                 }
+               if (callee == current_ir_graph)
+                       x->recursive = 1;
  
                 /* link it in the list of possible inlinable entries */
                 entry = obstack_alloc(&temp_obst, sizeof(*entry));
-               entry->call   = call;
-               entry->callee = callee;
-               entry->next   = NULL;
-               if (x->call_tail == NULL)
+               entry->call       = call;
+               entry->callee     = callee;
+               entry->next       = NULL;
+               entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
+
+               /* note: we use call_tail here as a pointer to the last inserted */
+               if (x->call_head == NULL) {
                         x->call_head = entry;
-               else
-                       x->call_tail->next = entry;
-               x->call_tail = entry;
+               } else {
+                       if (entry->loop_depth == env->last_call->loop_depth) {
+                               /* same depth as the last one, enqueue after it */
+                               entry->next          = env->last_call->next;
+                               env->last_call->next = entry;
+                       } else if (entry->loop_depth > x->call_head->loop_depth) {
+                               /* put first */
+                               entry->next  = x->call_head;
+                               x->call_head = entry;
+                       } else {
+                               /* search the insertion point */
+                               call_entry *p;
+
+                               for (p = x->call_head; p->next != NULL; p = p->next)
+                                       if (entry->loop_depth > p->next->loop_depth)
+                                               break;
+                               entry->next = p->next;
+                               p->next     = entry;
+                       }
+               }
+               env->last_call = entry;
+               if (entry->next == NULL) {
+                       /* keep tail up to date */
+                       x->call_tail = entry;
+               }
         }
  }
  
@@ -1379,14 +1474,46 @@ static void append_call_list(inline_irg_env *dst, call_entry *src) {
            in the link field. */
         for (entry = src; entry != NULL; entry = entry->next) {
                 nentry = obstack_alloc(&temp_obst, sizeof(*nentry));
-               nentry->call   = get_irn_link(entry->call);
-               nentry->callee = entry->callee;
-               nentry->next   = NULL;
+               nentry->call         = get_irn_link(entry->call);
+               nentry->callee       = entry->callee;
+               nentry->next         = NULL;
+               nentry->loop_depth   = entry->loop_depth;
                 dst->call_tail->next = nentry;
                 dst->call_tail       = nentry;
         }
  }
  
+/**
+ * Add the nodes of the list src in front to the nodes of the list dst.
+ */
+static call_entry *replace_entry_by_call_list(call_entry *dst, call_entry *src) {
+       call_entry *entry, *nentry, *head, *tail;
+
+       /* Note that the src list points to Call nodes in the inlined graph, but
+          we need Call nodes in our graph. Luckily the inliner leaves this information
+          in the link field. */
+       head = tail = NULL;
+       for (entry = src; entry != NULL; entry = entry->next) {
+               nentry = obstack_alloc(&temp_obst, sizeof(*nentry));
+               nentry->call         = get_irn_link(entry->call);
+               nentry->callee       = entry->callee;
+               nentry->next         = NULL;
+               nentry->loop_depth   = entry->loop_depth + dst->loop_depth;
+               if (head == NULL)
+                       head = nentry;
+               else
+                       tail->next = nentry;
+               tail = nentry;
+       }
+       /* skip the head of dst */
+       if (head != NULL) {
+               tail->next = dst->next;
+       } else {
+               head = dst->next;
+       }
+       return head;
+}
+
  /*
   * Inlines small leave methods at call sites where the called address comes
   * from a Const node that references the entity representing the called
@@ -1406,9 +1533,7 @@ void inline_leave_functions(int maxsize, int leavesize, int size, int ignore_run
         const call_entry *centry;
         pmap             *copied_graphs;
         pmap_entry       *pm_entry;
-       DEBUG_ONLY(firm_dbg_module_t *dbg;)
  
-       FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
         rem = current_ir_graph;
         obstack_init(&temp_obst);
  
@@ -1429,6 +1554,7 @@ void inline_leave_functions(int maxsize, int leavesize, int size, int ignore_run
                 assert(get_irg_phase_state(irg) != phase_building);
                 free_callee_info(irg);
  
+               assure_cf_loop(irg);
                 wenv.x = get_irg_link(irg);
                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
         }
@@ -1541,6 +1667,7 @@ void inline_leave_functions(int maxsize, int leavesize, int size, int ignore_run
                                         callee_env = alloc_inline_irg_env();
                                         set_irg_link(copy, callee_env);
  
+                                       assure_cf_loop(copy);
                                         wenv.x              = callee_env;
                                         wenv.ignore_callers = 1;
                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
@@ -1603,15 +1730,11 @@ void inline_leave_functions(int maxsize, int leavesize, int size, int ignore_run
                 env = (inline_irg_env *)get_irg_link(irg);
  
                 if (env->got_inline) {
-                       /* this irg got calls inlined */
-                       set_irg_outs_inconsistent(irg);
-                       set_irg_doms_inconsistent(irg);
-
                         optimize_graph_df(irg);
                         optimize_cf(irg);
                 }
                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
-                       DB((dbg, SET_LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
+                       DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
                         env->n_callers_orig, env->n_callers,
                         get_entity_name(get_irg_entity(irg))));
@@ -1687,6 +1810,7 @@ static unsigned calc_method_local_weight(ir_node *arg) {
                                         }
                                 }
                         }
+                       break;
                 default:
                         /* any other node: unsupported yet or bad. */
                         return 0;
@@ -1752,7 +1876,7 @@ static int calc_inline_benefice(ir_node *call, ir_graph *callee, unsigned *local
         ir_node   *frame_ptr;
         ir_type   *mtp;
         int       weight = 0;
-       int       i, n_params;
+       int       i, n_params, all_const;
         unsigned  cc, v;
  
         inline_irg_env *curr_env, *callee_env;
@@ -1781,21 +1905,27 @@ static int calc_inline_benefice(ir_node *call, ir_graph *callee, unsigned *local
  
         /* constant parameters improve the benefice */
         frame_ptr = get_irg_frame(current_ir_graph);
+       all_const = 1;
         for (i = 0; i < n_params; ++i) {
                 ir_node *param = get_Call_param(call, i);
  
-               if (is_Const(param) || is_SymConst(param))
+               if (is_Const(param))
                         weight += get_method_param_weight(ent, i);
-               else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
-                       /*
-                        * An address of a local variable is transmitted. After inlining,
-                        * scalar_replacement might be able to remove the local variable,
-                        * so honor this.
-                        */
-                       v = get_method_local_adress_weight(callee, i);
-                       weight += v;
-                       if (v > 0)
-                               *local_adr = 1;
+               else {
+                       all_const = 0;
+                       if (is_SymConst(param))
+                               weight += get_method_param_weight(ent, i);
+                       else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
+                               /*
+                                * An address of a local variable is transmitted. After inlining,
+                                * scalar_replacement might be able to remove the local variable,
+                                * so honor this.
+                                */
+                               v = get_method_local_adress_weight(callee, i);
+                               weight += v;
+                               if (v > 0)
+                                       *local_adr = 1;
+                       }
                 }
         }
  
@@ -1812,33 +1942,52 @@ static int calc_inline_benefice(ir_node *call, ir_graph *callee, unsigned *local
  
         /* reduce the benefice if the current function is already big */
         curr_env = get_irg_link(current_ir_graph);
-       weight -= curr_env->n_nodes / 100;
+       weight -= curr_env->n_nodes / 50;
  
         /* give a bonus for functions with one block */
         if (callee_env->n_blocks == 1)
                 weight = weight * 3 / 2;
  
+       /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
+       else if (callee_env->n_nodes < 20 && !callee_env->recursive)
+               weight += 5000;
+
+       /* and finally for leaves: they do not increase the register pressure
+          because of callee safe registers */
+       else if (callee_env->n_call_nodes == 0)
+               weight += 25;
+
+       /*
+        * Reduce the weight for recursive function IFF not all arguments are const.
+        * inlining recursive functions is rarely good.
+        */
+       if (callee_env->recursive && !all_const)
+               weight -= 500;
+
+       /*
+        * All arguments constant is probably a good sign, give an extra bonus
+        */
+       if (all_const)
+               weight += 100;
+
         return weight;
  }
  
  /**
- * Heuristic inliner. Calculates a benifice value for every call and inlines
+ * Heuristic inliner. Calculates a benefice value for every call and inlines
   * those calls with a value higher than the threshold.
   */
-void inline_functions(int inline_threshold) {
+void inline_functions(int maxsize, int inline_threshold) {
         inline_irg_env   *env;
-       ir_graph         *irg;
         int              i, n_irgs;
         ir_graph         *rem;
         int              did_inline;
         wenv_t           wenv;
-       call_entry       *entry, *tail;
+       call_entry       *curr_call, **last_call;
         const call_entry *centry;
         pmap             *copied_graphs;
         pmap_entry       *pm_entry;
-       DEBUG_ONLY(firm_dbg_module_t *dbg;)
  
-       FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
         rem = current_ir_graph;
         obstack_init(&temp_obst);
  
@@ -1859,43 +2008,48 @@ void inline_functions(int inline_threshold) {
                 assert(get_irg_phase_state(irg) != phase_building);
                 free_callee_info(irg);
  
-               wenv.x = get_irg_link(irg);
+               wenv.x         = get_irg_link(irg);
+               wenv.last_call = NULL;
+               assure_cf_loop(irg);
                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
         }
  
         /* -- and now inline. -- */
         for (i = 0; i < n_irgs; ++i) {
-               ir_node *call;
-               int phiproj_computed = 0;
+               int      phiproj_computed = 0;
+               ir_node  *call;
+               ir_graph *irg = get_irp_irg(i);
  
-               current_ir_graph = get_irp_irg(i);
-               env = get_irg_link(current_ir_graph);
+               current_ir_graph = irg;
+               env = get_irg_link(irg);
  
                 /* note that the list of possible calls is updated during the process */
-               tail = NULL;
-               for (entry = env->call_head; entry != NULL; entry = entry->next) {
+               last_call = &env->call_head;
+               for (curr_call = env->call_head; curr_call != NULL;) {
                         ir_graph   *callee;
                         pmap_entry *e;
                         int        benefice;
                         unsigned   local_adr;
  
-                       call   = entry->call;
-                       callee = entry->callee;
+                       if (env->n_nodes > maxsize) break;
  
-                       /* calculate the benifice on the original call to prevent excessive inlining */
-                       local_adr = 0;
-                       benefice = calc_inline_benefice(call, callee, &local_adr);
-                       DB((dbg, SET_LEVEL_2, "In %+F Call %+F has benefice %d\n", current_ir_graph, callee, benefice));
+                       call   = curr_call->call;
+                       callee = curr_call->callee;
  
                         e = pmap_find(copied_graphs, callee);
                         if (e != NULL) {
                                 /*
-                                * Remap callee if we have a copy.
-                                * FIXME: Should we do this only for recursive Calls ?
-                                */
+                               * Remap callee if we have a copy.
+                               * FIXME: Should we do this only for recursive Calls ?
+                               */
                                 callee = e->value;
                         }
  
+                       /* calculate the benefice on the original call to prevent excessive inlining */
+                       local_adr = 0;
+                       benefice = calc_inline_benefice(call, callee, &local_adr);
+                       DB((dbg, LEVEL_2, "In %+F Call %+F has benefice %d\n", irg, callee, benefice));
+
                         if (benefice > -inline_threshold ||
                                 (get_irg_inline_property(callee) >= irg_inline_forced)) {
                                 if (current_ir_graph == callee) {
@@ -1922,6 +2076,7 @@ void inline_functions(int inline_threshold) {
                                         callee_env = alloc_inline_irg_env();
                                         set_irg_link(copy, callee_env);
  
+                                       assure_cf_loop(copy);
                                         wenv.x              = callee_env;
                                         wenv.ignore_callers = 1;
                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
@@ -1951,52 +2106,47 @@ void inline_functions(int inline_threshold) {
                                         /* was inlined, must be recomputed */
                                         phiproj_computed = 0;
  
+                                       /* after we have inlined callee, all called methods inside callee
+                                       are now called once more */
+                                       for (centry = callee_env->call_head; centry != NULL; centry = centry->next) {
+                                               inline_irg_env *penv = get_irg_link(centry->callee);
+                                               ++penv->n_callers;
+                                       }
+
                                         /* callee was inline. Append it's call list. */
                                         env->got_inline = 1;
                                         if (local_adr)
                                                 env->local_vars = 1;
                                         --env->n_call_nodes;
-                                       append_call_list(env, callee_env->call_head);
+                                       curr_call = replace_entry_by_call_list(curr_call, callee_env->call_head);
                                         env->n_call_nodes += callee_env->n_call_nodes;
                                         env->n_nodes += callee_env->n_nodes;
                                         --callee_env->n_callers;
  
-                                       /* after we have inlined callee, all called methods inside callee
-                                          are now called once more */
-                                       for (centry = callee_env->call_head; centry != NULL; centry = centry->next) {
-                                               inline_irg_env *penv = get_irg_link(centry->callee);
-                                               ++penv->n_callers;
-                                       }
-
-                                       /* remove this call from the list */
-                                       if (tail != NULL)
-                                               tail->next = entry->next;
-                                       else
-                                               env->call_head = entry->next;
+                                       /* remove the current call entry from the list */
+                                       *last_call = curr_call;
                                         continue;
                                 }
                         }
-                       tail = entry;
+                       last_call = &curr_call->next;
+                       curr_call = curr_call->next;
                 }
-               env->call_tail = tail;
-       }
-
-       for (i = 0; i < n_irgs; ++i) {
-               irg = get_irp_irg(i);
-               env = (inline_irg_env *)get_irg_link(irg);
  
                 if (env->got_inline) {
-                       /* this irg got calls inlined */
-                       set_irg_outs_inconsistent(irg);
-                       set_irg_doms_inconsistent(irg);
+                       /* this irg got calls inlined: optimize it */
  
-                       if (env->local_vars)
-                               scalar_replacement_opt(irg);
+                       /* scalar replacement does not work well with Tuple nodes, so optimize them away */
                         optimize_graph_df(irg);
+
+                       if (env->local_vars) {
+                               if (scalar_replacement_opt(irg)) {
+                                       optimize_graph_df(irg);
+                               }
+                       }
                         optimize_cf(irg);
                 }
                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
-                       DB((dbg, SET_LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
+                       DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
                         env->n_callers_orig, env->n_callers,
                         get_entity_name(get_irg_entity(irg))));
@@ -2016,3 +2166,7 @@ void inline_functions(int inline_threshold) {
         obstack_free(&temp_obst, NULL);
         current_ir_graph = rem;
  }
+
+void firm_init_inline(void) {
+       FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
+}