- BugFix: kill partitions with 0 blocks either
[libfirm] / ir / opt / opt_inline.c
index 8df864e..bf36024 100644 (file)
  * @author   Michael Beck, Goetz Lindenmaier
  * @version  $Id$
  */
-#ifdef HAVE_CONFIG_H
-# include "config.h"
-#endif
+#include "config.h"
 
+#include <limits.h>
 #include <assert.h>
 
 #include "irnode_t.h"
 #include "irgmod.h"
 #include "irgwalk.h"
 
-#include "adt/array.h"
-#include "adt/pset.h"
-#include "adt/pmap.h"
-#include "adt/pdeq.h"
-#include "adt/xmalloc.h"
+#include "array_t.h"
+#include "list.h"
+#include "pset.h"
+#include "pmap.h"
+#include "pdeq.h"
+#include "xmalloc.h"
+#include "pqueue.h"
 
 #include "irouts.h"
 #include "irloop_t.h"
 #include "irbackedge_t.h"
+#include "opt_inline_t.h"
 #include "cgana.h"
 #include "trouts.h"
 #include "error.h"
 
+#include "analyze_irg_args.h"
 #include "iredges_t.h"
 #include "irflag_t.h"
 #include "irhooks.h"
 #include "irtools.h"
+#include "iropt_dbg.h"
 
+DEBUG_ONLY(static firm_dbg_module_t *dbg;)
 
 /*------------------------------------------------------------------*/
 /* Routines for dead node elimination / copying garbage collection  */
@@ -87,7 +92,7 @@
  * accesses.  This function is called for all Phi and Block nodes
  * in a Block.
  */
-static INLINE int
+static inline int
 compute_new_arity(ir_node *b) {
        int i, res, irn_arity;
        int irg_v, block_v;
@@ -435,7 +440,8 @@ void dead_node_elimination(ir_graph *irg) {
 #endif
        struct obstack *graveyard_obst = NULL;
        struct obstack *rebirth_obst   = NULL;
-       assert(! edges_activated(irg) && "dead node elimination requires disabled edges");
+
+       edges_deactivate(irg);
 
        /* inform statistics that we started a dead-node elimination run */
        hook_dead_node_elim(irg, 1);
@@ -464,7 +470,7 @@ void dead_node_elimination(ir_graph *irg) {
        graveyard_obst = irg->obst;
 
        /* A new obstack, where the reachable nodes will be copied to. */
-       rebirth_obst = xmalloc(sizeof(*rebirth_obst));
+       rebirth_obst = XMALLOC(struct obstack);
        irg->obst = rebirth_obst;
        obstack_init(irg->obst);
        irg->last_node_idx = 0;
@@ -655,7 +661,7 @@ static void dead_node_subst_hook(void *context, ir_graph *irg, ir_node *old, ir_
  * Make a new Survive DCE environment.
  */
 survive_dce_t *new_survive_dce(void) {
-       survive_dce_t *res = xmalloc(sizeof(res[0]));
+       survive_dce_t *res = XMALLOC(survive_dce_t);
        obstack_init(&res->obst);
        res->places     = pmap_create();
        res->new_places = NULL;
@@ -668,10 +674,6 @@ survive_dce_t *new_survive_dce(void) {
        res->dead_node_elim_subst.context = res;
        res->dead_node_elim_subst.next    = NULL;
 
-#ifndef FIRM_ENABLE_HOOKS
-       assert(0 && "need hooks enabled");
-#endif
-
        register_hook(hook_dead_node_elim, &res->dead_node_elim);
        register_hook(hook_dead_node_elim_subst, &res->dead_node_elim_subst);
        return res;
@@ -722,8 +724,7 @@ void survive_dce_register_irn(survive_dce_t *sd, ir_node **place) {
  * inlined procedure. The new entities must be in the link field of
  * the entities.
  */
-static INLINE void
-copy_node_inline(ir_node *n, void *env) {
+static void copy_node_inline(ir_node *n, void *env) {
        ir_node *nn;
        ir_type *frame_tp = (ir_type *)env;
 
@@ -740,6 +741,27 @@ copy_node_inline(ir_node *n, void *env) {
        }
 }
 
+/**
+ * Copies new predecessors of old node and move constants to
+ * the Start Block.
+ */
+static void copy_preds_inline(ir_node *n, void *env) {
+       ir_node *nn;
+
+       copy_preds(n, env);
+       nn = skip_Id(get_new_node(n));
+       if (is_irn_constlike(nn)) {
+               /* move Constants into the start block */
+               set_nodes_block(nn, get_irg_start_block(current_ir_graph));
+
+               n = identify_remember(current_ir_graph->value_table, nn);
+               if (nn != n) {
+                       DBG_OPT_CSE(nn, n);
+                       exchange(nn, n);
+               }
+       }
+}
+
 /**
  * Walker: checks if P_value_arg_base is used.
  */
@@ -749,6 +771,22 @@ static void find_addr(ir_node *node, void *env) {
                        is_Start(get_Proj_pred(node)) &&
                        get_Proj_proj(node) == pn_Start_P_value_arg_base) {
                *allow_inline = 0;
+       } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
+               /* From GCC:
+                * Refuse to inline alloca call unless user explicitly forced so as this
+                * may change program's memory overhead drastically when the function
+                * using alloca is called in loop.  In GCC present in SPEC2000 inlining
+                * into schedule_block cause it to require 2GB of ram instead of 256MB.
+                *
+                * Sorrily this is true with our implementation also.
+                * Moreover, we cannot differentiate between alloca() and VLA yet, so this
+                * disables inlining of functions using VLA (with are completely save).
+                *
+                * 2 Solutions:
+                * - add a flag to the Alloc node for "real" alloca() calls
+                * - add a new Stack-Restore node at the end of a function using alloca()
+                */
+               *allow_inline = 0;
        }
 }
 
@@ -798,36 +836,67 @@ enum exc_mode {
 
 /* Inlines a method at the given call site. */
 int inline_method(ir_node *call, ir_graph *called_graph) {
-       ir_node *pre_call;
-       ir_node *post_call, *post_bl;
-       ir_node *in[pn_Start_max];
-       ir_node *end, *end_bl;
-       ir_node **res_pred;
-       ir_node **cf_pred;
-       ir_node *ret, *phi;
-       int arity, n_ret, n_exc, n_res, i, n, j, rem_opt, irn_arity;
-       enum exc_mode exc_handling;
-       ir_type *called_frame, *curr_frame;
+       ir_node             *pre_call;
+       ir_node             *post_call, *post_bl;
+       ir_node             *in[pn_Start_max];
+       ir_node             *end, *end_bl, *block;
+       ir_node             **res_pred;
+       ir_node             **cf_pred;
+       ir_node             **args_in;
+       ir_node             *ret, *phi;
+       int                 arity, n_ret, n_exc, n_res, i, n, j, rem_opt, irn_arity, n_params;
+       enum exc_mode       exc_handling;
+       ir_type             *called_frame, *curr_frame, *mtp, *ctp;
+       ir_entity           *ent;
+       ir_graph            *rem, *irg;
        irg_inline_property prop = get_irg_inline_property(called_graph);
-       ir_entity *ent;
+       unsigned long       visited;
 
        if (prop == irg_inline_forbidden)
                return 0;
 
        ent = get_irg_entity(called_graph);
 
-       /* Do not inline variadic functions. */
-       if (get_method_variadicity(get_entity_type(ent)) == variadicity_variadic)
+       mtp = get_entity_type(ent);
+       ctp = get_Call_type(call);
+       if (get_method_n_params(mtp) > get_method_n_params(ctp)) {
+               /* this is a bad feature of C: without a prototype, we can can call a function with less
+               parameters than needed. Currently we don't support this, although it would be
+               to use Unknown than. */
                return 0;
+       }
 
-       assert(get_method_n_params(get_entity_type(ent)) ==
-              get_method_n_params(get_Call_type(call)));
+       /* Argh, compiling C has some bad consequences:
+          the call type AND the method type might be different.
+          It is implementation defendant what happens in that case.
+          We support inlining, if the bitsize of the types matches AND
+          the same arithmetic is used. */
+       n_params = get_method_n_params(mtp);
+       for (i = n_params - 1; i >= 0; --i) {
+               ir_type *param_tp = get_method_param_type(mtp, i);
+               ir_type *arg_tp   = get_method_param_type(ctp, i);
+
+               if (param_tp != arg_tp) {
+                       ir_mode *pmode = get_type_mode(param_tp);
+                       ir_mode *amode = get_type_mode(arg_tp);
+
+                       if (pmode == NULL || amode == NULL)
+                               return 0;
+                       if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
+                               return 0;
+                       if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
+                               return 0;
+                       /* otherwise we can simply "reinterpret" the bits */
+               }
+       }
+
+       irg = get_irn_irg(call);
 
        /*
         * We cannot inline a recursive call. The graph must be copied before
         * the call the inline_method() using create_irg_copy().
         */
-       if (called_graph == current_ir_graph)
+       if (called_graph == irg)
                return 0;
 
        /*
@@ -838,19 +907,24 @@ int inline_method(ir_node *call, ir_graph *called_graph) {
        if (! can_inline(call, called_graph))
                return 0;
 
+       rem = current_ir_graph;
+       current_ir_graph = irg;
+
+       DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
+
        /* --  Turn off optimizations, this can cause problems when allocating new nodes. -- */
        rem_opt = get_opt_optimize();
        set_optimize(0);
 
        /* Handle graph state */
-       assert(get_irg_phase_state(current_ir_graph) != phase_building);
-       assert(get_irg_pinned(current_ir_graph) == op_pin_state_pinned);
+       assert(get_irg_phase_state(irg) != phase_building);
+       assert(get_irg_pinned(irg) == op_pin_state_pinned);
        assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
-       set_irg_outs_inconsistent(current_ir_graph);
-       set_irg_extblk_inconsistent(current_ir_graph);
-       set_irg_doms_inconsistent(current_ir_graph);
-       set_irg_loopinfo_inconsistent(current_ir_graph);
-       set_irg_callee_info_state(current_ir_graph, irg_callee_info_inconsistent);
+       set_irg_outs_inconsistent(irg);
+       set_irg_extblk_inconsistent(irg);
+       set_irg_doms_inconsistent(irg);
+       set_irg_loopinfo_inconsistent(irg);
+       set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
 
        /* -- Check preconditions -- */
        assert(is_Call(call));
@@ -876,19 +950,33 @@ int inline_method(ir_node *call, ir_graph *called_graph) {
                else            {                exc_handling = exc_no_handler; } /* !Mproj && !Xproj   */
        }
 
+       /* create the argument tuple */
+       NEW_ARR_A(ir_type *, args_in, n_params);
+
+       block = get_nodes_block(call);
+       for (i = n_params - 1; i >= 0; --i) {
+               ir_node *arg      = get_Call_param(call, i);
+               ir_type *param_tp = get_method_param_type(mtp, i);
+               ir_mode *mode     = get_type_mode(param_tp);
+
+               if (mode != get_irn_mode(arg)) {
+                       arg = new_r_Conv(irg, block, arg, mode);
+               }
+               args_in[i] = arg;
+       }
+
        /* --
           the procedure and later replaces the Start node of the called graph.
           Post_call is the old Call node and collects the results of the called
           graph. Both will end up being a tuple.  -- */
        post_bl = get_nodes_block(call);
-       set_irg_current_block(current_ir_graph, post_bl);
+       set_irg_current_block(irg, post_bl);
        /* XxMxPxPxPxT of Start + parameter of Call */
        in[pn_Start_X_initial_exec]   = new_Jmp();
        in[pn_Start_M]                = get_Call_mem(call);
-       in[pn_Start_P_frame_base]     = get_irg_frame(current_ir_graph);
-       in[pn_Start_P_globals]        = get_irg_globals(current_ir_graph);
-       in[pn_Start_P_tls]            = get_irg_tls(current_ir_graph);
-       in[pn_Start_T_args]           = new_Tuple(get_Call_n_params(call), get_Call_param_arr(call));
+       in[pn_Start_P_frame_base]     = get_irg_frame(irg);
+       in[pn_Start_P_tls]            = get_irg_tls(irg);
+       in[pn_Start_T_args]           = new_Tuple(n_params, args_in);
        /* in[pn_Start_P_value_arg_base] = ??? */
        assert(pn_Start_P_value_arg_base == pn_Start_max - 1 && "pn_Start_P_value_arg_base not supported, fix");
        pre_call = new_Tuple(pn_Start_max - 1, in);
@@ -902,29 +990,36 @@ int inline_method(ir_node *call, ir_graph *called_graph) {
        /* -- Prepare state for dead node elimination -- */
        /* Visited flags in calling irg must be >= flag in called irg.
           Else walker and arity computation will not work. */
-       if (get_irg_visited(current_ir_graph) <= get_irg_visited(called_graph))
-               set_irg_visited(current_ir_graph, get_irg_visited(called_graph)+1);
-       if (get_irg_block_visited(current_ir_graph)< get_irg_block_visited(called_graph))
-               set_irg_block_visited(current_ir_graph, get_irg_block_visited(called_graph));
+       if (get_irg_visited(irg) <= get_irg_visited(called_graph))
+               set_irg_visited(irg, get_irg_visited(called_graph) + 1);
+       if (get_irg_block_visited(irg) < get_irg_block_visited(called_graph))
+               set_irg_block_visited(irg, get_irg_block_visited(called_graph));
+       visited = get_irg_visited(irg);
+
        /* Set pre_call as new Start node in link field of the start node of
           calling graph and pre_calls block as new block for the start block
           of calling graph.
           Further mark these nodes so that they are not visited by the
           copying. */
        set_irn_link(get_irg_start(called_graph), pre_call);
-       set_irn_visited(get_irg_start(called_graph), get_irg_visited(current_ir_graph));
+       set_irn_visited(get_irg_start(called_graph), visited);
        set_irn_link(get_irg_start_block(called_graph), get_nodes_block(pre_call));
-       set_irn_visited(get_irg_start_block(called_graph), get_irg_visited(current_ir_graph));
+       set_irn_visited(get_irg_start_block(called_graph), visited);
+
        set_irn_link(get_irg_bad(called_graph), get_irg_bad(current_ir_graph));
-       set_irn_visited(get_irg_bad(called_graph), get_irg_visited(current_ir_graph));
+       set_irn_visited(get_irg_bad(called_graph), visited);
+
+       set_irn_link(get_irg_no_mem(called_graph), get_irg_no_mem(current_ir_graph));
+       set_irn_visited(get_irg_no_mem(called_graph), visited);
 
        /* Initialize for compaction of in arrays */
-       inc_irg_block_visited(current_ir_graph);
+       inc_irg_block_visited(irg);
 
        /* -- Replicate local entities of the called_graph -- */
        /* copy the entities. */
+       irp_reserve_resources(irp, IR_RESOURCE_ENTITY_LINK);
        called_frame = get_irg_frame_type(called_graph);
-       curr_frame   = get_irg_frame_type(current_ir_graph);
+       curr_frame   = get_irg_frame_type(irg);
        for (i = 0, n = get_class_n_members(called_frame); i < n; ++i) {
                ir_entity *new_ent, *old_ent;
                old_ent = get_class_member(called_frame, i);
@@ -935,17 +1030,19 @@ int inline_method(ir_node *call, ir_graph *called_graph) {
        /* visited is > than that of called graph.  With this trick visited will
           remain unchanged so that an outer walker, e.g., searching the call nodes
            to inline, calling this inline will not visit the inlined nodes. */
-       set_irg_visited(current_ir_graph, get_irg_visited(current_ir_graph)-1);
+       set_irg_visited(irg, get_irg_visited(irg)-1);
 
        /* -- Performing dead node elimination inlines the graph -- */
        /* Copies the nodes to the obstack of current_ir_graph. Updates links to new
           entities. */
-       irg_walk(get_irg_end(called_graph), copy_node_inline, copy_preds,
+       irg_walk(get_irg_end(called_graph), copy_node_inline, copy_preds_inline,
                 get_irg_frame_type(called_graph));
 
+       irp_free_resources(irp, IR_RESOURCE_ENTITY_LINK);
+
        /* Repair called_graph */
-       set_irg_visited(called_graph, get_irg_visited(current_ir_graph));
-       set_irg_block_visited(called_graph, get_irg_block_visited(current_ir_graph));
+       set_irg_visited(called_graph, get_irg_visited(irg));
+       set_irg_block_visited(called_graph, get_irg_block_visited(irg));
        set_Block_block_visited(get_irg_start_block(called_graph), 0);
 
        /* -- Merge the end of the inlined procedure with the call site -- */
@@ -965,20 +1062,20 @@ int inline_method(ir_node *call, ir_graph *called_graph) {
        /* -- Precompute some values -- */
        end_bl = get_new_node(get_irg_end_block(called_graph));
        end = get_new_node(get_irg_end(called_graph));
-       arity = get_irn_arity(end_bl);    /* arity = n_exc + n_ret  */
+       arity = get_Block_n_cfgpreds(end_bl);    /* arity = n_exc + n_ret  */
        n_res = get_method_n_ress(get_Call_type(call));
 
-       res_pred = xmalloc(n_res * sizeof(*res_pred));
-       cf_pred  = xmalloc(arity * sizeof(*res_pred));
+       res_pred = XMALLOCN(ir_node*, n_res);
+       cf_pred  = XMALLOCN(ir_node*, arity);
 
-       set_irg_current_block(current_ir_graph, post_bl); /* just to make sure */
+       set_irg_current_block(irg, post_bl); /* just to make sure */
 
        /* -- archive keepalives -- */
        irn_arity = get_irn_arity(end);
        for (i = 0; i < irn_arity; i++) {
                ir_node *ka = get_End_keepalive(end, i);
                if (! is_Bad(ka))
-                       add_End_keepalive(get_irg_end(current_ir_graph), ka);
+                       add_End_keepalive(get_irg_end(irg), ka);
        }
 
        /* The new end node will die.  We need not free as the in array is on the obstack:
@@ -988,9 +1085,9 @@ int inline_method(ir_node *call, ir_graph *called_graph) {
        n_ret = 0;
        for (i = 0; i < arity; i++) {
                ir_node *ret;
-               ret = get_irn_n(end_bl, i);
+               ret = get_Block_cfgpred(end_bl, i);
                if (is_Return(ret)) {
-                       cf_pred[n_ret] = new_r_Jmp(current_ir_graph, get_nodes_block(ret));
+                       cf_pred[n_ret] = new_r_Jmp(irg, get_nodes_block(ret));
                        n_ret++;
                }
        }
@@ -1002,7 +1099,7 @@ int inline_method(ir_node *call, ir_graph *called_graph) {
        /* First the Memory-Phi */
        n_ret = 0;
        for (i = 0; i < arity; i++) {
-               ret = get_irn_n(end_bl, i);
+               ret = get_Block_cfgpred(end_bl, i);
                if (is_Return(ret)) {
                        cf_pred[n_ret] = get_Return_mem(ret);
                        n_ret++;
@@ -1020,7 +1117,7 @@ int inline_method(ir_node *call, ir_graph *called_graph) {
                for (j = 0; j < n_res; j++) {
                        n_ret = 0;
                        for (i = 0; i < arity; i++) {
-                               ret = get_irn_n(end_bl, i);
+                               ret = get_Block_cfgpred(end_bl, i);
                                if (is_Return(ret)) {
                                        cf_pred[n_ret] = get_Return_res(ret, j);
                                        n_ret++;
@@ -1061,7 +1158,7 @@ int inline_method(ir_node *call, ir_graph *called_graph) {
                n_exc = 0;
                for (i = 0; i < arity; i++) {
                        ir_node *ret, *irn;
-                       ret = get_irn_n(end_bl, i);
+                       ret = get_Block_cfgpred(end_bl, i);
                        irn = skip_Proj(ret);
                        if (is_fragile_op(irn) || is_Raise(irn)) {
                                cf_pred[n_exc] = ret;
@@ -1075,16 +1172,16 @@ int inline_method(ir_node *call, ir_graph *called_graph) {
                        n_exc = 0;
                        for (i = 0; i < arity; i++) {
                                ir_node *ret;
-                               ret = skip_Proj(get_irn_n(end_bl, i));
+                               ret = skip_Proj(get_Block_cfgpred(end_bl, i));
                                if (is_Call(ret)) {
-                                       cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_block(ret), ret, mode_M, 3);
+                                       cf_pred[n_exc] = new_r_Proj(irg, get_nodes_block(ret), ret, mode_M, 3);
                                        n_exc++;
                                } else if (is_fragile_op(ret)) {
                                        /* We rely that all cfops have the memory output at the same position. */
-                                       cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_block(ret), ret, mode_M, 0);
+                                       cf_pred[n_exc] = new_r_Proj(irg, get_nodes_block(ret), ret, mode_M, 0);
                                        n_exc++;
                                } else if (is_Raise(ret)) {
-                                       cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_block(ret), ret, mode_M, 1);
+                                       cf_pred[n_exc] = new_r_Proj(irg, get_nodes_block(ret), ret, mode_M, 1);
                                        n_exc++;
                                }
                        }
@@ -1101,7 +1198,7 @@ int inline_method(ir_node *call, ir_graph *called_graph) {
                /* assert(exc_handling == 1 || no exceptions. ) */
                n_exc = 0;
                for (i = 0; i < arity; i++) {
-                       ir_node *ret = get_irn_n(end_bl, i);
+                       ir_node *ret = get_Block_cfgpred(end_bl, i);
                        ir_node *irn = skip_Proj(ret);
 
                        if (is_fragile_op(irn) || is_Raise(irn)) {
@@ -1109,9 +1206,9 @@ int inline_method(ir_node *call, ir_graph *called_graph) {
                                n_exc++;
                        }
                }
-               main_end_bl = get_irg_end_block(current_ir_graph);
+               main_end_bl       = get_irg_end_block(irg);
                main_end_bl_arity = get_irn_arity(main_end_bl);
-               end_preds =  xmalloc((n_exc + main_end_bl_arity) * sizeof(*end_preds));
+               end_preds         = XMALLOCN(ir_node*, n_exc + main_end_bl_arity);
 
                for (i = 0; i < main_end_bl_arity; ++i)
                        end_preds[i] = get_irn_n(main_end_bl, i);
@@ -1127,30 +1224,34 @@ int inline_method(ir_node *call, ir_graph *called_graph) {
 
        /* --  Turn CSE back on. -- */
        set_optimize(rem_opt);
+       current_ir_graph = rem;
 
        return 1;
 }
 
 /********************************************************************/
-/* Apply inlineing to small methods.                                */
+/* Apply inlining to small methods.                                 */
 /********************************************************************/
 
+static struct obstack  temp_obst;
+
 /** Represents a possible inlinable call in a graph. */
-typedef struct _call_entry call_entry;
-struct _call_entry {
-       ir_node    *call;   /**< the Call */
-       ir_graph   *callee; /**< the callee called here */
-       call_entry *next;   /**< for linking the next one */
-       unsigned   weight;  /**< the weight of the call */
-};
+typedef struct _call_entry {
+       ir_node    *call;       /**< The Call node. */
+       ir_graph   *callee;     /**< The callee IR-graph. */
+       list_head  list;        /**< List head for linking the next one. */
+       int        loop_depth;  /**< The loop depth of this call. */
+       int        benefice;    /**< The calculated benefice of this call. */
+       unsigned   local_adr:1; /**< Set if this call gets an address of a local variable. */
+       unsigned   all_const:1; /**< Set if this call has only constant parameters. */
+} call_entry;
 
 /**
  * environment for inlining small irgs
  */
 typedef struct _inline_env_t {
-       struct obstack obst;  /**< an obstack where call_entries are allocated on. */
-       call_entry *head;     /**< the head of the call entry list */
-       call_entry *tail;     /**< the tail of the call entry list */
+       struct obstack obst;  /**< An obstack where call_entries are allocated on. */
+       list_head      calls; /**< The call entry list. */
 } inline_env_t;
 
 /**
@@ -1175,6 +1276,7 @@ static ir_graph *get_call_called_irg(ir_node *call) {
  * Walker: Collect all calls to known graphs inside a graph.
  */
 static void collect_calls(ir_node *call, void *env) {
+       (void) env;
        if (is_Call(call)) {
                ir_graph *called_irg = get_call_called_irg(call);
 
@@ -1182,16 +1284,14 @@ static void collect_calls(ir_node *call, void *env) {
                        /* The Call node calls a locally defined method.  Remember to inline. */
                        inline_env_t *ienv  = env;
                        call_entry   *entry = obstack_alloc(&ienv->obst, sizeof(*entry));
-                       entry->call   = call;
-                       entry->callee = called_irg;
-                       entry->next   = NULL;
-                       entry->weight = 0;
-
-                       if (ienv->tail == NULL)
-                               ienv->head = entry;
-                       else
-                               ienv->tail->next = entry;
-                       ienv->tail = entry;
+                       entry->call       = call;
+                       entry->callee     = called_irg;
+                       entry->loop_depth = 0;
+                       entry->benefice   = 0;
+                       entry->local_adr  = 0;
+                       entry->all_const  = 0;
+
+                       list_add_tail(&entry->list, &ienv->calls);
                }
        }
 }
@@ -1205,12 +1305,9 @@ static void collect_calls(ir_node *call, void *env) {
  * size are inlined.
  */
 void inline_small_irgs(ir_graph *irg, int size) {
-  ir_graph *rem = current_ir_graph;
+       ir_graph *rem = current_ir_graph;
        inline_env_t env;
        call_entry *entry;
-       DEBUG_ONLY(firm_dbg_module_t *dbg;)
-
-       FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
 
        current_ir_graph = irg;
        /* Handle graph state */
@@ -1218,21 +1315,29 @@ void inline_small_irgs(ir_graph *irg, int size) {
        free_callee_info(irg);
 
        /* Find Call nodes to inline.
-          (We can not inline during a walk of the graph, as inlineing the same
+          (We can not inline during a walk of the graph, as inlining the same
           method several times changes the visited flag of the walked graph:
-          after the first inlineing visited of the callee equals visited of
-          the caller.  With the next inlineing both are increased.) */
+          after the first inlining visited of the callee equals visited of
+          the caller.  With the next inlining both are increased.) */
        obstack_init(&env.obst);
-       env.head = env.tail = NULL;
+       INIT_LIST_HEAD(&env.calls);
        irg_walk_graph(irg, NULL, collect_calls, &env);
 
-       if (env.head != NULL) {
+       if (! list_empty(&env.calls)) {
                /* There are calls to inline */
                collect_phiprojs(irg);
-               for (entry = env.head; entry != NULL; entry = entry->next) {
-                       ir_graph *callee = entry->callee;
-                       if (((_obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst)) < size) ||
-                           (get_irg_inline_property(callee) >= irg_inline_forced)) {
+
+               list_for_each_entry(call_entry, entry, &env.calls, list) {
+                       ir_graph            *callee = entry->callee;
+                       irg_inline_property prop    = get_irg_inline_property(callee);
+
+                       if (prop == irg_inline_forbidden || get_irg_additional_properties(callee) & mtp_property_weak) {
+                               /* do not inline forbidden / weak graphs */
+                               continue;
+                       }
+
+                       if (prop >= irg_inline_forced ||
+                           _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
                                inline_method(entry->call, callee);
                        }
                }
@@ -1245,39 +1350,44 @@ void inline_small_irgs(ir_graph *irg, int size) {
  * Environment for inlining irgs.
  */
 typedef struct {
-  int n_nodes;             /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
-       int n_nodes_orig;        /**< for statistics */
-       call_entry *call_head;   /**< The head of the list of all call nodes in this graph. */
-       call_entry *call_tail;   /**< The tail of the list of all call nodes in this graph .*/
-       int n_call_nodes;        /**< Number of Call nodes in the graph. */
-       int n_call_nodes_orig;   /**< for statistics */
-       int n_callers;           /**< Number of known graphs that call this graphs. */
-       int n_callers_orig;      /**< for statistics */
-       int got_inline;          /**< Set, if at leat one call inside this graph was inlined. */
+       list_head calls;             /**< List of of all call nodes in this graph. */
+       unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
+       unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
+       unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
+       unsigned  n_nodes_orig;      /**< for statistics */
+       unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
+       unsigned  n_call_nodes_orig; /**< for statistics */
+       unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
+       unsigned  n_callers_orig;    /**< for statistics */
+       unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
+       unsigned  local_vars:1;      /**< Set, if an inlined function got the address of a local variable. */
+       unsigned  recursive:1;       /**< Set, if this function is self recursive. */
 } inline_irg_env;
 
 /**
  * Allocate a new environment for inlining.
  */
-static inline_irg_env *alloc_inline_irg_env(struct obstack *obst) {
-       inline_irg_env *env    = obstack_alloc(obst, sizeof(*env));
+static inline_irg_env *alloc_inline_irg_env(void) {
+       inline_irg_env *env    = obstack_alloc(&temp_obst, sizeof(*env));
+       INIT_LIST_HEAD(&env->calls);
+       env->local_weights     = NULL;
        env->n_nodes           = -2; /* do not count count Start, End */
+       env->n_blocks          = -2; /* do not count count Start, End Block */
        env->n_nodes_orig      = -2; /* do not count Start, End */
-       env->call_head         = NULL;
-       env->call_tail         = NULL;
        env->n_call_nodes      = 0;
        env->n_call_nodes_orig = 0;
        env->n_callers         = 0;
        env->n_callers_orig    = 0;
        env->got_inline        = 0;
+       env->local_vars        = 0;
+       env->recursive         = 0;
        return env;
 }
 
 typedef struct walker_env {
-       struct obstack *obst; /**< the obstack for allocations. */
-       inline_irg_env *x;    /**< the inline environment */
-       char ignore_runtime;  /**< the ignore runtime flag */
-       char ignore_callers;  /**< if set, do change callers data */
+       inline_irg_env *x;     /**< the inline environment */
+       char ignore_runtime;   /**< the ignore runtime flag */
+       char ignore_callers;   /**< if set, do change callers data */
 } wenv_t;
 
 /**
@@ -1293,8 +1403,12 @@ static void collect_calls2(ir_node *call, void *ctx) {
 
        /* count meaningful nodes in irg */
        if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
-               ++x->n_nodes;
-               ++x->n_nodes_orig;
+               if (code != iro_Block) {
+                       ++x->n_nodes;
+                       ++x->n_nodes_orig;
+               } else {
+                       ++x->n_blocks;
+               }
        }
 
        if (code != iro_Call) return;
@@ -1323,17 +1437,19 @@ static void collect_calls2(ir_node *call, void *ctx) {
                        ++callee_env->n_callers;
                        ++callee_env->n_callers_orig;
                }
+               if (callee == current_ir_graph)
+                       x->recursive = 1;
 
                /* link it in the list of possible inlinable entries */
-               entry = obstack_alloc(env->obst, sizeof(*entry));
-               entry->call   = call;
-               entry->callee = callee;
-               entry->next   = NULL;
-               if (x->call_tail == NULL)
-                       x->call_head = entry;
-               else
-                       x->call_tail->next = entry;
-               x->call_tail = entry;
+               entry = obstack_alloc(&temp_obst, sizeof(*entry));
+               entry->call       = call;
+               entry->callee     = callee;
+               entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
+               entry->benefice   = 0;
+               entry->local_adr  = 0;
+               entry->all_const  = 0;
+
+               list_add_tail(&entry->list, &x->calls);
        }
 }
 
@@ -1341,7 +1457,7 @@ static void collect_calls2(ir_node *call, void *ctx) {
  * Returns TRUE if the number of callers is 0 in the irg's environment,
  * hence this irg is a leave.
  */
-INLINE static int is_leave(ir_graph *irg) {
+inline static int is_leave(ir_graph *irg) {
        inline_irg_env *env = get_irg_link(irg);
        return env->n_call_nodes == 0;
 }
@@ -1350,28 +1466,52 @@ INLINE static int is_leave(ir_graph *irg) {
  * Returns TRUE if the number of nodes in the callee is
  * smaller then size in the irg's environment.
  */
-INLINE static int is_smaller(ir_graph *callee, int size) {
+inline static int is_smaller(ir_graph *callee, unsigned size) {
        inline_irg_env *env = get_irg_link(callee);
        return env->n_nodes < size;
 }
 
 /**
- * Append the nodes of the list src to the nodes of the list in environment dst.
+ * Duplicate a call entry.
+ *
+ * @param entry     the original entry to duplicate
+ * @param new_call  the new call node
+ * @param loop_depth_delta
+ *                  delta value for the loop depth
  */
-static void append_call_list(struct obstack *obst, inline_irg_env *dst, call_entry *src) {
+static call_entry *duplicate_call_entry(const call_entry *entry,
+                                        ir_node *new_call, int loop_depth_delta) {
+       call_entry *nentry = obstack_alloc(&temp_obst, sizeof(*nentry));
+       nentry->call       = new_call;
+       nentry->callee     = entry->callee;
+       nentry->benefice   = entry->benefice;
+       nentry->loop_depth = entry->loop_depth + loop_depth_delta;
+       nentry->local_adr  = entry->local_adr;
+       nentry->all_const  = entry->all_const;
+
+       return nentry;
+}
+
+/**
+ * Append all call nodes of the source environment to the nodes of in the destination
+ * environment.
+ *
+ * @param dst         destination environment
+ * @param src         source environment
+ * @param loop_depth  the loop depth of the call that is replaced by the src list
+ */
+static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth) {
        call_entry *entry, *nentry;
 
        /* Note that the src list points to Call nodes in the inlined graph, but
           we need Call nodes in our graph. Luckily the inliner leaves this information
           in the link field. */
-       for (entry = src; entry != NULL; entry = entry->next) {
-               nentry = obstack_alloc(obst, sizeof(*nentry));
-               nentry->call   = get_irn_link(entry->call);
-               nentry->callee = entry->callee;
-               nentry->next   = NULL;
-               dst->call_tail->next = nentry;
-               dst->call_tail       = nentry;
+       list_for_each_entry(call_entry, entry, &src->calls, list) {
+               nentry = duplicate_call_entry(entry, get_irn_link(entry->call), loop_depth);
+               list_add_tail(&nentry->list, &dst->calls);
        }
+       dst->n_call_nodes += src->n_call_nodes;
+       dst->n_nodes      += src->n_nodes;
 }
 
 /*
@@ -1382,23 +1522,22 @@ static void append_call_list(struct obstack *obst, inline_irg_env *dst, call_ent
  * Methods where the obstack containing the firm graph is smaller than
  * size are inlined.
  */
-void inline_leave_functions(int maxsize, int leavesize, int size, int ignore_runtime) {
+void inline_leave_functions(unsigned maxsize, unsigned leavesize,
+                            unsigned size, int ignore_runtime)
+{
        inline_irg_env   *env;
        ir_graph         *irg;
        int              i, n_irgs;
        ir_graph         *rem;
        int              did_inline;
        wenv_t           wenv;
-       call_entry       *entry, *tail;
+       call_entry       *entry, *next;
        const call_entry *centry;
-       struct obstack   obst;
        pmap             *copied_graphs;
        pmap_entry       *pm_entry;
-       DEBUG_ONLY(firm_dbg_module_t *dbg;)
 
-       FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
        rem = current_ir_graph;
-       obstack_init(&obst);
+       obstack_init(&temp_obst);
 
        /* a map for the copied graphs, used to inline recursive calls */
        copied_graphs = pmap_create();
@@ -1406,10 +1545,9 @@ void inline_leave_functions(int maxsize, int leavesize, int size, int ignore_run
        /* extend all irgs by a temporary data structure for inlining. */
        n_irgs = get_irp_n_irgs();
        for (i = 0; i < n_irgs; ++i)
-               set_irg_link(get_irp_irg(i), alloc_inline_irg_env(&obst));
+               set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
 
-       /* Precompute information in temporary data structure. */
-       wenv.obst           = &obst;
+       /* Pre-compute information in temporary data structure. */
        wenv.ignore_runtime = ignore_runtime;
        wenv.ignore_callers = 0;
        for (i = 0; i < n_irgs; ++i) {
@@ -1418,6 +1556,7 @@ void inline_leave_functions(int maxsize, int leavesize, int size, int ignore_run
                assert(get_irg_phase_state(irg) != phase_building);
                free_callee_info(irg);
 
+               assure_cf_loop(irg);
                wenv.x = get_irg_link(irg);
                irg_walk_graph(irg, NULL, collect_calls2, &wenv);
        }
@@ -1433,19 +1572,26 @@ void inline_leave_functions(int maxsize, int leavesize, int size, int ignore_run
                        int phiproj_computed = 0;
 
                        current_ir_graph = get_irp_irg(i);
-                       env = (inline_irg_env *)get_irg_link(current_ir_graph);
+                       env              = get_irg_link(current_ir_graph);
 
-                       tail = NULL;
-                       for (entry = env->call_head; entry != NULL; entry = entry->next) {
-                               ir_graph *callee;
+                       list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
+                               ir_graph            *callee;
+                               irg_inline_property  prop;
 
-                               if (env->n_nodes > maxsize) break;
+                               if (env->n_nodes > maxsize)
+                                       break;
 
                                call   = entry->call;
                                callee = entry->callee;
 
+                               prop = get_irg_inline_property(callee);
+                               if (prop == irg_inline_forbidden || get_irg_additional_properties(callee) & mtp_property_weak) {
+                                       /* do not inline forbidden / weak graphs */
+                                       continue;
+                               }
+
                                if (is_leave(callee) && (
-                                   is_smaller(callee, leavesize) || (get_irg_inline_property(callee) >= irg_inline_forced))) {
+                                   is_smaller(callee, leavesize) || prop >= irg_inline_forced)) {
                                        if (!phiproj_computed) {
                                                phiproj_computed = 1;
                                                collect_phiprojs(current_ir_graph);
@@ -1453,9 +1599,9 @@ void inline_leave_functions(int maxsize, int leavesize, int size, int ignore_run
                                        did_inline = inline_method(call, callee);
 
                                        if (did_inline) {
-                                               inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
+                                               inline_irg_env *callee_env = get_irg_link(callee);
 
-                                               /* was inlined, must be recomputed */
+                                               /* call was inlined, Phi/Projs for current graph must be recomputed */
                                                phiproj_computed = 0;
 
                                                /* Do some statistics */
@@ -1465,16 +1611,11 @@ void inline_leave_functions(int maxsize, int leavesize, int size, int ignore_run
                                                --callee_env->n_callers;
 
                                                /* remove this call from the list */
-                                               if (tail != NULL)
-                                                       tail->next = entry->next;
-                                               else
-                                                       env->call_head = entry->next;
+                                               list_del(&entry->list);
                                                continue;
                                        }
                                }
-                               tail = entry;
                        }
-                       env->call_tail = tail;
                }
        } while (did_inline);
 
@@ -1484,17 +1625,23 @@ void inline_leave_functions(int maxsize, int leavesize, int size, int ignore_run
                int phiproj_computed = 0;
 
                current_ir_graph = get_irp_irg(i);
-               env = (inline_irg_env *)get_irg_link(current_ir_graph);
+               env              = get_irg_link(current_ir_graph);
 
                /* note that the list of possible calls is updated during the process */
-               tail = NULL;
-               for (entry = env->call_head; entry != NULL; entry = entry->next) {
-                       ir_graph   *callee;
-                       pmap_entry *e;
+               list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
+                       irg_inline_property prop;
+                       ir_graph            *callee;
+                       pmap_entry          *e;
 
                        call   = entry->call;
                        callee = entry->callee;
 
+                       prop = get_irg_inline_property(callee);
+                       if (prop == irg_inline_forbidden || get_irg_additional_properties(callee) & mtp_property_weak) {
+                               /* do not inline forbidden / weak graphs */
+                               continue;
+                       }
+
                        e = pmap_find(copied_graphs, callee);
                        if (e != NULL) {
                                /*
@@ -1504,8 +1651,8 @@ void inline_leave_functions(int maxsize, int leavesize, int size, int ignore_run
                                callee = e->value;
                        }
 
-                       if (((is_smaller(callee, size) && (env->n_nodes < maxsize)) ||    /* small function */
-                               (get_irg_inline_property(callee) >= irg_inline_forced))) {
+                       if (prop >= irg_inline_forced ||
+                           (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
                                if (current_ir_graph == callee) {
                                        /*
                                         * Recursive call: we cannot directly inline because we cannot walk
@@ -1527,9 +1674,10 @@ void inline_leave_functions(int maxsize, int leavesize, int size, int ignore_run
                                        phiproj_computed = 0;
 
                                        /* allocate new environment */
-                                       callee_env = alloc_inline_irg_env(&obst);
+                                       callee_env = alloc_inline_irg_env();
                                        set_irg_link(copy, callee_env);
 
+                                       assure_cf_loop(copy);
                                        wenv.x              = callee_env;
                                        wenv.ignore_callers = 1;
                                        irg_walk_graph(copy, NULL, collect_calls2, &wenv);
@@ -1556,51 +1704,588 @@ void inline_leave_functions(int maxsize, int leavesize, int size, int ignore_run
                                if (did_inline) {
                                        inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
 
-                                       /* was inlined, must be recomputed */
+                                       /* call was inlined, Phi/Projs for current graph must be recomputed */
                                        phiproj_computed = 0;
 
                                        /* callee was inline. Append it's call list. */
                                        env->got_inline = 1;
                                        --env->n_call_nodes;
-                                       append_call_list(&obst, env, callee_env->call_head);
-                                       env->n_call_nodes += callee_env->n_call_nodes;
-                                       env->n_nodes += callee_env->n_nodes;
+                                       append_call_list(env, callee_env, entry->loop_depth);
                                        --callee_env->n_callers;
 
                                        /* after we have inlined callee, all called methods inside callee
                                           are now called once more */
-                                       for (centry = callee_env->call_head; centry != NULL; centry = centry->next) {
+                                       list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
                                                inline_irg_env *penv = get_irg_link(centry->callee);
                                                ++penv->n_callers;
                                        }
 
                                        /* remove this call from the list */
-                                       if (tail != NULL)
-                                               tail->next = entry->next;
-                                       else
-                                               env->call_head = entry->next;
+                                       list_del(&entry->list);
                                        continue;
                                }
                        }
-                       tail = entry;
                }
-               env->call_tail = tail;
        }
 
        for (i = 0; i < n_irgs; ++i) {
                irg = get_irp_irg(i);
-               env = (inline_irg_env *)get_irg_link(irg);
+               env = get_irg_link(irg);
 
                if (env->got_inline) {
-                       /* this irg got calls inlined */
-                       set_irg_outs_inconsistent(irg);
-                       set_irg_doms_inconsistent(irg);
-
                        optimize_graph_df(irg);
                        optimize_cf(irg);
                }
                if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
-                       DB((dbg, SET_LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
+                       DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
+                       env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
+                       env->n_callers_orig, env->n_callers,
+                       get_entity_name(get_irg_entity(irg))));
+               }
+       }
+
+       /* kill the copied graphs: we don't need them anymore */
+       foreach_pmap(copied_graphs, pm_entry) {
+               ir_graph *copy = pm_entry->value;
+
+               /* reset the entity, otherwise it will be deleted in the next step ... */
+               set_irg_entity(copy, NULL);
+               free_ir_graph(copy);
+       }
+       pmap_destroy(copied_graphs);
+
+       obstack_free(&temp_obst, NULL);
+       current_ir_graph = rem;
+}
+
+/**
+ * Calculate the parameter weights for transmitting the address of a local variable.
+ */
+static unsigned calc_method_local_weight(ir_node *arg) {
+       int      i, j, k;
+       unsigned v, weight = 0;
+
+       for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
+               ir_node *succ = get_irn_out(arg, i);
+
+               switch (get_irn_opcode(succ)) {
+               case iro_Load:
+               case iro_Store:
+                       /* Loads and Store can be removed */
+                       weight += 3;
+                       break;
+               case iro_Sel:
+                       /* check if all args are constant */
+                       for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
+                               ir_node *idx = get_Sel_index(succ, j);
+                               if (! is_Const(idx))
+                                       return 0;
+                       }
+                       /* Check users on this Sel. Note: if a 0 is returned here, there was
+                          some unsupported node. */
+                       v = calc_method_local_weight(succ);
+                       if (v == 0)
+                               return 0;
+                       /* we can kill one Sel with constant indexes, this is cheap */
+                       weight += v + 1;
+                       break;
+               case iro_Id:
+                       /* when looking backward we might find Id nodes */
+                       weight += calc_method_local_weight(succ);
+                       break;
+               case iro_Tuple:
+                       /* unoptimized tuple */
+                       for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
+                               ir_node *pred = get_Tuple_pred(succ, j);
+                               if (pred == arg) {
+                                       /* look for Proj(j) */
+                                       for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
+                                               ir_node *succ_succ = get_irn_out(succ, k);
+                                               if (is_Proj(succ_succ)) {
+                                                       if (get_Proj_proj(succ_succ) == j) {
+                                                               /* found */
+                                                               weight += calc_method_local_weight(succ_succ);
+                                                       }
+                                               } else {
+                                                       /* this should NOT happen */
+                                                       return 0;
+                                               }
+                                       }
+                               }
+                       }
+                       break;
+               default:
+                       /* any other node: unsupported yet or bad. */
+                       return 0;
+               }
+       }
+       return weight;
+}
+
+/**
+ * Calculate the parameter weights for transmitting the address of a local variable.
+ */
+static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg) {
+       ir_entity *ent = get_irg_entity(irg);
+       ir_type  *mtp;
+       int      nparams, i, proj_nr;
+       ir_node  *irg_args, *arg;
+
+       mtp      = get_entity_type(ent);
+       nparams  = get_method_n_params(mtp);
+
+       /* allocate a new array. currently used as 'analysed' flag */
+       env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
+
+       /* If the method haven't parameters we have nothing to do. */
+       if (nparams <= 0)
+               return;
+
+       assure_irg_outs(irg);
+       irg_args = get_irg_args(irg);
+       for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
+               arg     = get_irn_out(irg_args, i);
+               proj_nr = get_Proj_proj(arg);
+               env->local_weights[proj_nr] = calc_method_local_weight(arg);
+       }
+}
+
+/**
+ * Calculate the benefice for transmitting an local variable address.
+ * After inlining, the local variable might be transformed into a
+ * SSA variable by scalar_replacement().
+ */
+static unsigned get_method_local_adress_weight(ir_graph *callee, int pos) {
+       inline_irg_env *env = get_irg_link(callee);
+
+       if (env->local_weights != NULL) {
+               if (pos < ARR_LEN(env->local_weights))
+                       return env->local_weights[pos];
+               return 0;
+       }
+
+       analyze_irg_local_weights(env, callee);
+
+       if (pos < ARR_LEN(env->local_weights))
+               return env->local_weights[pos];
+       return 0;
+}
+
+/**
+ * Calculate a benefice value for inlining the given call.
+ *
+ * @param call       the call node we have to inspect
+ * @param callee     the called graph
+ */
+static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
+{
+       ir_node   *call = entry->call;
+       ir_entity *ent  = get_irg_entity(callee);
+       ir_node   *frame_ptr;
+       ir_type   *mtp;
+       int       weight = 0;
+       int       i, n_params, all_const;
+       unsigned  cc, v;
+       irg_inline_property prop;
+
+       inline_irg_env *callee_env;
+
+       prop = get_irg_inline_property(callee);
+       if (prop == irg_inline_forbidden) {
+               DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
+                   call, callee));
+               return entry->benefice = INT_MIN;
+       }
+
+       if (get_irg_additional_properties(callee) & (mtp_property_noreturn | mtp_property_weak)) {
+               DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
+                   call, callee));
+               return entry->benefice = INT_MIN;
+       }
+
+       /* costs for every passed parameter */
+       n_params = get_Call_n_params(call);
+       mtp      = get_entity_type(ent);
+       cc       = get_method_calling_convention(mtp);
+       if (cc & cc_reg_param) {
+               /* register parameter, smaller costs for register parameters */
+               int max_regs = cc & ~cc_bits;
+
+               if (max_regs < n_params)
+                       weight += max_regs * 2 + (n_params - max_regs) * 5;
+               else
+                       weight += n_params * 2;
+       } else {
+               /* parameters are passed an stack */
+               weight += 5 * n_params;
+       }
+
+       /* constant parameters improve the benefice */
+       frame_ptr = get_irg_frame(current_ir_graph);
+       all_const = 1;
+       for (i = 0; i < n_params; ++i) {
+               ir_node *param = get_Call_param(call, i);
+
+               if (is_Const(param)) {
+                       weight += get_method_param_weight(ent, i);
+               } else {
+                       all_const = 0;
+                       if (is_SymConst(param))
+                               weight += get_method_param_weight(ent, i);
+                       else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
+                               /*
+                                * An address of a local variable is transmitted. After
+                                * inlining, scalar_replacement might be able to remove the
+                                * local variable, so honor this.
+                                */
+                               v = get_method_local_adress_weight(callee, i);
+                               weight += v;
+                               if (v > 0)
+                                       entry->local_adr = 1;
+                       }
+               }
+       }
+       entry->all_const = all_const;
+
+       callee_env = get_irg_link(callee);
+       if (callee_env->n_callers == 1 &&
+           callee != current_ir_graph &&
+               get_entity_visibility(ent) == visibility_local) {
+               weight += 700;
+       }
+
+       /* give a bonus for functions with one block */
+       if (callee_env->n_blocks == 1)
+               weight = weight * 3 / 2;
+
+       /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
+       if (callee_env->n_nodes < 30 && !callee_env->recursive)
+               weight += 2000;
+
+       /* and finally for leaves: they do not increase the register pressure
+          because of callee safe registers */
+       if (callee_env->n_call_nodes == 0)
+               weight += 400;
+
+       /** it's important to inline inner loops first */
+       if (entry->loop_depth > 30)
+               weight += 30 * 1024;
+       else
+               weight += entry->loop_depth * 1024;
+
+       /*
+        * All arguments constant is probably a good sign, give an extra bonus
+        */
+       if (all_const)
+               weight += 1024;
+
+       return entry->benefice = weight;
+}
+
+static ir_graph **irgs;
+static int      last_irg;
+
+/**
+ * Callgraph walker, collect all visited graphs.
+ */
+static void callgraph_walker(ir_graph *irg, void *data) {
+       (void) data;
+       irgs[last_irg++] = irg;
+}
+
+/**
+ * Creates an inline order for all graphs.
+ *
+ * @return the list of graphs.
+ */
+static ir_graph **create_irg_list(void) {
+       ir_entity **free_methods;
+       int       arr_len;
+       int       n_irgs = get_irp_n_irgs();
+
+       cgana(&arr_len, &free_methods);
+       xfree(free_methods);
+
+       compute_callgraph();
+
+       last_irg = 0;
+       irgs     = XMALLOCNZ(ir_graph*, n_irgs);
+
+       callgraph_walk(NULL, callgraph_walker, NULL);
+       assert(n_irgs == last_irg);
+
+       return irgs;
+}
+
+/**
+ * Push a call onto the priority list if its benefice is big enough.
+ *
+ * @param pqueue   the priority queue of calls
+ * @param call     the call entry
+ * @param inlien_threshold
+ *                 the threshold value
+ */
+static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
+                            int inline_threshold)
+{
+       ir_graph            *callee  = call->callee;
+       irg_inline_property prop     = get_irg_inline_property(callee);
+       int                 benefice = calc_inline_benefice(call, callee);
+
+       DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
+           get_irn_irg(call->call), call->call, callee, benefice));
+
+       if (prop < irg_inline_forced && benefice < inline_threshold) {
+               return;
+       }
+
+       pqueue_put(pqueue, call, benefice);
+}
+
+/**
+ * Try to inline calls into a graph.
+ *
+ * @param irg      the graph into which we inline
+ * @param maxsize  do NOT inline if the size of irg gets
+ *                 bigger than this amount
+ * @param inline_threshold
+ *                 threshold value for inline decision
+ * @param copied_graphs
+ *                 map containing copied of recursive graphs
+ */
+static void inline_into(ir_graph *irg, unsigned maxsize,
+                        int inline_threshold, pmap *copied_graphs)
+{
+       int            phiproj_computed = 0;
+       inline_irg_env *env = get_irg_link(irg);
+       call_entry     *curr_call;
+       wenv_t         wenv;
+       pqueue_t       *pqueue;
+
+       if (env->n_call_nodes == 0)
+               return;
+
+       if (env->n_nodes > maxsize) {
+               DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
+               return;
+       }
+
+       current_ir_graph = irg;
+
+       /* put irgs into the pqueue */
+       pqueue = new_pqueue();
+
+       list_for_each_entry(call_entry, curr_call, &env->calls, list) {
+               assert(is_Call(curr_call->call));
+               maybe_push_call(pqueue, curr_call, inline_threshold);
+       }
+
+       /* note that the list of possible calls is updated during the process */
+       while (!pqueue_empty(pqueue)) {
+               int                 did_inline;
+               call_entry          *curr_call  = pqueue_pop_front(pqueue);
+               ir_graph            *callee     = curr_call->callee;
+               ir_node             *call_node  = curr_call->call;
+               inline_irg_env      *callee_env = get_irg_link(callee);
+               irg_inline_property prop        = get_irg_inline_property(callee);
+               int                 loop_depth;
+               const call_entry    *centry;
+               pmap_entry          *e;
+
+               if ((prop < irg_inline_forced) && env->n_nodes + callee_env->n_nodes > maxsize) {
+                       DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
+                                               env->n_nodes, callee, callee_env->n_nodes));
+                       continue;
+               }
+
+               e = pmap_find(copied_graphs, callee);
+               if (e != NULL) {
+                       int benefice = curr_call->benefice;
+                       /*
+                        * Reduce the weight for recursive function IFF not all arguments are const.
+                        * inlining recursive functions is rarely good.
+                        */
+                       if (!curr_call->all_const)
+                               benefice -= 2000;
+                       if (benefice < inline_threshold)
+                               continue;
+
+                       /*
+                        * Remap callee if we have a copy.
+                        */
+                       callee     = e->value;
+                       callee_env = get_irg_link(callee);
+               }
+
+               if (current_ir_graph == callee) {
+                       /*
+                        * Recursive call: we cannot directly inline because we cannot
+                        * walk the graph and change it. So we have to make a copy of
+                        * the graph first.
+                        */
+                       int benefice = curr_call->benefice;
+                       ir_graph *copy;
+
+                       /*
+                        * Reduce the weight for recursive function IFF not all arguments are const.
+                        * inlining recursive functions is rarely good.
+                        */
+                       if (!curr_call->all_const)
+                               benefice -= 2000;
+                       if (benefice < inline_threshold)
+                               continue;
+
+                       /*
+                        * No copy yet, create one.
+                        * Note that recursive methods are never leaves, so it is
+                        * sufficient to test this condition here.
+                        */
+                       copy = create_irg_copy(callee);
+
+                       /* create_irg_copy() destroys the Proj links, recompute them */
+                       phiproj_computed = 0;
+
+                       /* allocate a new environment */
+                       callee_env = alloc_inline_irg_env();
+                       set_irg_link(copy, callee_env);
+
+                       assure_cf_loop(copy);
+                       wenv.x              = callee_env;
+                       wenv.ignore_callers = 1;
+                       irg_walk_graph(copy, NULL, collect_calls2, &wenv);
+
+                       /*
+                        * Enter the entity of the original graph. This is needed
+                        * for inline_method(). However, note that ent->irg still points
+                        * to callee, NOT to copy.
+                        */
+                       set_irg_entity(copy, get_irg_entity(callee));
+
+                       pmap_insert(copied_graphs, callee, copy);
+                       callee = copy;
+
+                       /* we have only one caller: the original graph */
+                       callee_env->n_callers      = 1;
+                       callee_env->n_callers_orig = 1;
+               }
+               if (! phiproj_computed) {
+                       phiproj_computed = 1;
+                       collect_phiprojs(current_ir_graph);
+               }
+               did_inline = inline_method(call_node, callee);
+               if (!did_inline)
+                       continue;
+
+               /* call was inlined, Phi/Projs for current graph must be recomputed */
+               phiproj_computed = 0;
+
+               /* remove it from the caller list */
+               list_del(&curr_call->list);
+
+               /* callee was inline. Append it's call list. */
+               env->got_inline = 1;
+               if (curr_call->local_adr)
+                       env->local_vars = 1;
+               --env->n_call_nodes;
+
+               /* we just generate a bunch of new calls */
+               loop_depth = curr_call->loop_depth;
+               list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
+                       inline_irg_env *penv = get_irg_link(centry->callee);
+                       ir_node        *new_call;
+                       call_entry     *new_entry;
+
+                       /* after we have inlined callee, all called methods inside
+                        * callee are now called once more */
+                       ++penv->n_callers;
+
+                       /* Note that the src list points to Call nodes in the inlined graph,
+                        * but we need Call nodes in our graph. Luckily the inliner leaves
+                        * this information in the link field. */
+                       new_call = get_irn_link(centry->call);
+                       assert(is_Call(new_call));
+
+                       new_entry = duplicate_call_entry(centry, new_call, loop_depth);
+                       list_add_tail(&new_entry->list, &env->calls);
+                       maybe_push_call(pqueue, new_entry, inline_threshold);
+               }
+
+               env->n_call_nodes += callee_env->n_call_nodes;
+               env->n_nodes += callee_env->n_nodes;
+               --callee_env->n_callers;
+       }
+
+       del_pqueue(pqueue);
+}
+
+/*
+ * Heuristic inliner. Calculates a benefice value for every call and inlines
+ * those calls with a value higher than the threshold.
+ */
+void inline_functions(unsigned maxsize, int inline_threshold) {
+       inline_irg_env   *env;
+       int              i, n_irgs;
+       ir_graph         *rem;
+       wenv_t           wenv;
+       pmap             *copied_graphs;
+       pmap_entry       *pm_entry;
+       ir_graph         **irgs;
+
+       rem = current_ir_graph;
+       obstack_init(&temp_obst);
+
+       irgs = create_irg_list();
+
+       /* a map for the copied graphs, used to inline recursive calls */
+       copied_graphs = pmap_create();
+
+       /* extend all irgs by a temporary data structure for inlining. */
+       n_irgs = get_irp_n_irgs();
+       for (i = 0; i < n_irgs; ++i)
+               set_irg_link(irgs[i], alloc_inline_irg_env());
+
+       /* Pre-compute information in temporary data structure. */
+       wenv.ignore_runtime = 0;
+       wenv.ignore_callers = 0;
+       for (i = 0; i < n_irgs; ++i) {
+               ir_graph *irg = irgs[i];
+
+               free_callee_info(irg);
+
+               wenv.x = get_irg_link(irg);
+               assure_cf_loop(irg);
+               irg_walk_graph(irg, NULL, collect_calls2, &wenv);
+       }
+
+       /* -- and now inline. -- */
+       for (i = 0; i < n_irgs; ++i) {
+               ir_graph *irg = irgs[i];
+
+               inline_into(irg, maxsize, inline_threshold, copied_graphs);
+       }
+
+       for (i = 0; i < n_irgs; ++i) {
+               ir_graph *irg = irgs[i];
+
+               env = get_irg_link(irg);
+               if (env->got_inline) {
+                       /* this irg got calls inlined: optimize it */
+                       if (get_opt_combo()) {
+                               if (env->local_vars) {
+                                       scalar_replacement_opt(irg);
+                               }
+                               combo(irg);
+                       } else {
+                               if (env->local_vars) {
+                                       if (scalar_replacement_opt(irg)) {
+                                               optimize_graph_df(irg);
+                                       }
+                               }
+                               optimize_cf(irg);
+                       }
+               }
+               if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
+                       DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
                        env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
                        env->n_callers_orig, env->n_callers,
                        get_entity_name(get_irg_entity(irg))));
@@ -1617,6 +2302,12 @@ void inline_leave_functions(int maxsize, int leavesize, int size, int ignore_run
        }
        pmap_destroy(copied_graphs);
 
-       obstack_free(&obst, NULL);
+       xfree(irgs);
+
+       obstack_free(&temp_obst, NULL);
        current_ir_graph = rem;
 }
+
+void firm_init_inline(void) {
+       FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
+}