switch scalar replacement to new style debug output
[libfirm] / ir / opt / opt_inline.c
index 8df864e..0fd8328 100644 (file)
@@ -27,6 +27,7 @@
 # include "config.h"
 #endif
 
+#include <limits.h>
 #include <assert.h>
 
 #include "irnode_t.h"
 #include "irouts.h"
 #include "irloop_t.h"
 #include "irbackedge_t.h"
+#include "opt_inline_t.h"
 #include "cgana.h"
 #include "trouts.h"
 #include "error.h"
 
+#include "analyze_irg_args.h"
 #include "iredges_t.h"
 #include "irflag_t.h"
 #include "irhooks.h"
 #include "irtools.h"
 
+DEBUG_ONLY(static firm_dbg_module_t *dbg;)
 
 /*------------------------------------------------------------------*/
 /* Routines for dead node elimination / copying garbage collection  */
@@ -1135,6 +1139,8 @@ int inline_method(ir_node *call, ir_graph *called_graph) {
 /* Apply inlineing to small methods.                                */
 /********************************************************************/
 
+static struct obstack  temp_obst;
+
 /** Represents a possible inlinable call in a graph. */
 typedef struct _call_entry call_entry;
 struct _call_entry {
@@ -1208,9 +1214,6 @@ void inline_small_irgs(ir_graph *irg, int size) {
   ir_graph *rem = current_ir_graph;
        inline_env_t env;
        call_entry *entry;
-       DEBUG_ONLY(firm_dbg_module_t *dbg;)
-
-       FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
 
        current_ir_graph = irg;
        /* Handle graph state */
@@ -1227,15 +1230,23 @@ void inline_small_irgs(ir_graph *irg, int size) {
        irg_walk_graph(irg, NULL, collect_calls, &env);
 
        if (env.head != NULL) {
+               int did_inline = 0;
+
                /* There are calls to inline */
                collect_phiprojs(irg);
                for (entry = env.head; entry != NULL; entry = entry->next) {
                        ir_graph *callee = entry->callee;
                        if (((_obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst)) < size) ||
                            (get_irg_inline_property(callee) >= irg_inline_forced)) {
-                               inline_method(entry->call, callee);
+                               did_inline |= inline_method(entry->call, callee);
                        }
                }
+               if (did_inline != 0) {
+                       /* this irg got calls inlined */
+                       set_irg_outs_inconsistent(irg);
+                       set_irg_doms_inconsistent(irg);
+                       set_irg_loopinfo_inconsistent(irg);
+               }
        }
        obstack_free(&env.obst, NULL);
        current_ir_graph = rem;
@@ -1245,7 +1256,8 @@ void inline_small_irgs(ir_graph *irg, int size) {
  * Environment for inlining irgs.
  */
 typedef struct {
-  int n_nodes;             /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
+       int n_nodes;             /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
+       int n_blocks;            /**< Number of Blocks in graph without Start and End block. */
        int n_nodes_orig;        /**< for statistics */
        call_entry *call_head;   /**< The head of the list of all call nodes in this graph. */
        call_entry *call_tail;   /**< The tail of the list of all call nodes in this graph .*/
@@ -1253,15 +1265,18 @@ typedef struct {
        int n_call_nodes_orig;   /**< for statistics */
        int n_callers;           /**< Number of known graphs that call this graphs. */
        int n_callers_orig;      /**< for statistics */
-       int got_inline;          /**< Set, if at leat one call inside this graph was inlined. */
+       unsigned got_inline:1;   /**< Set, if at least one call inside this graph was inlined. */
+       unsigned local_vars:1;   /**< Set, if a inlined function gets the address of an inlined variable. */
+       unsigned *local_weights; /**< Once allocated, the beneficial weight for transmitting local addresses. */
 } inline_irg_env;
 
 /**
  * Allocate a new environment for inlining.
  */
-static inline_irg_env *alloc_inline_irg_env(struct obstack *obst) {
-       inline_irg_env *env    = obstack_alloc(obst, sizeof(*env));
+static inline_irg_env *alloc_inline_irg_env(void) {
+       inline_irg_env *env    = obstack_alloc(&temp_obst, sizeof(*env));
        env->n_nodes           = -2; /* do not count count Start, End */
+       env->n_blocks          = -2; /* do not count count Start, End Block */
        env->n_nodes_orig      = -2; /* do not count Start, End */
        env->call_head         = NULL;
        env->call_tail         = NULL;
@@ -1270,11 +1285,12 @@ static inline_irg_env *alloc_inline_irg_env(struct obstack *obst) {
        env->n_callers         = 0;
        env->n_callers_orig    = 0;
        env->got_inline        = 0;
+       env->local_vars        = 0;
+       env->local_weights     = NULL;
        return env;
 }
 
 typedef struct walker_env {
-       struct obstack *obst; /**< the obstack for allocations. */
        inline_irg_env *x;    /**< the inline environment */
        char ignore_runtime;  /**< the ignore runtime flag */
        char ignore_callers;  /**< if set, do change callers data */
@@ -1293,8 +1309,12 @@ static void collect_calls2(ir_node *call, void *ctx) {
 
        /* count meaningful nodes in irg */
        if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
-               ++x->n_nodes;
-               ++x->n_nodes_orig;
+               if (code != iro_Block) {
+                       ++x->n_nodes;
+                       ++x->n_nodes_orig;
+               } else {
+                       ++x->n_blocks;
+               }
        }
 
        if (code != iro_Call) return;
@@ -1325,7 +1345,7 @@ static void collect_calls2(ir_node *call, void *ctx) {
                }
 
                /* link it in the list of possible inlinable entries */
-               entry = obstack_alloc(env->obst, sizeof(*entry));
+               entry = obstack_alloc(&temp_obst, sizeof(*entry));
                entry->call   = call;
                entry->callee = callee;
                entry->next   = NULL;
@@ -1358,14 +1378,14 @@ INLINE static int is_smaller(ir_graph *callee, int size) {
 /**
  * Append the nodes of the list src to the nodes of the list in environment dst.
  */
-static void append_call_list(struct obstack *obst, inline_irg_env *dst, call_entry *src) {
+static void append_call_list(inline_irg_env *dst, call_entry *src) {
        call_entry *entry, *nentry;
 
        /* Note that the src list points to Call nodes in the inlined graph, but
           we need Call nodes in our graph. Luckily the inliner leaves this information
           in the link field. */
        for (entry = src; entry != NULL; entry = entry->next) {
-               nentry = obstack_alloc(obst, sizeof(*nentry));
+               nentry = obstack_alloc(&temp_obst, sizeof(*nentry));
                nentry->call   = get_irn_link(entry->call);
                nentry->callee = entry->callee;
                nentry->next   = NULL;
@@ -1391,14 +1411,11 @@ void inline_leave_functions(int maxsize, int leavesize, int size, int ignore_run
        wenv_t           wenv;
        call_entry       *entry, *tail;
        const call_entry *centry;
-       struct obstack   obst;
        pmap             *copied_graphs;
        pmap_entry       *pm_entry;
-       DEBUG_ONLY(firm_dbg_module_t *dbg;)
 
-       FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
        rem = current_ir_graph;
-       obstack_init(&obst);
+       obstack_init(&temp_obst);
 
        /* a map for the copied graphs, used to inline recursive calls */
        copied_graphs = pmap_create();
@@ -1406,10 +1423,9 @@ void inline_leave_functions(int maxsize, int leavesize, int size, int ignore_run
        /* extend all irgs by a temporary data structure for inlining. */
        n_irgs = get_irp_n_irgs();
        for (i = 0; i < n_irgs; ++i)
-               set_irg_link(get_irp_irg(i), alloc_inline_irg_env(&obst));
+               set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
 
        /* Precompute information in temporary data structure. */
-       wenv.obst           = &obst;
        wenv.ignore_runtime = ignore_runtime;
        wenv.ignore_callers = 0;
        for (i = 0; i < n_irgs; ++i) {
@@ -1527,7 +1543,387 @@ void inline_leave_functions(int maxsize, int leavesize, int size, int ignore_run
                                        phiproj_computed = 0;
 
                                        /* allocate new environment */
-                                       callee_env = alloc_inline_irg_env(&obst);
+                                       callee_env = alloc_inline_irg_env();
+                                       set_irg_link(copy, callee_env);
+
+                                       wenv.x              = callee_env;
+                                       wenv.ignore_callers = 1;
+                                       irg_walk_graph(copy, NULL, collect_calls2, &wenv);
+
+                                       /*
+                                        * Enter the entity of the original graph. This is needed
+                                        * for inline_method(). However, note that ent->irg still points
+                                        * to callee, NOT to copy.
+                                        */
+                                       set_irg_entity(copy, get_irg_entity(callee));
+
+                                       pmap_insert(copied_graphs, callee, copy);
+                                       callee = copy;
+
+                                       /* we have only one caller: the original graph */
+                                       callee_env->n_callers      = 1;
+                                       callee_env->n_callers_orig = 1;
+                               }
+                               if (! phiproj_computed) {
+                                       phiproj_computed = 1;
+                                       collect_phiprojs(current_ir_graph);
+                               }
+                               did_inline = inline_method(call, callee);
+                               if (did_inline) {
+                                       inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
+
+                                       /* was inlined, must be recomputed */
+                                       phiproj_computed = 0;
+
+                                       /* callee was inline. Append it's call list. */
+                                       env->got_inline = 1;
+                                       --env->n_call_nodes;
+                                       append_call_list(env, callee_env->call_head);
+                                       env->n_call_nodes += callee_env->n_call_nodes;
+                                       env->n_nodes += callee_env->n_nodes;
+                                       --callee_env->n_callers;
+
+                                       /* after we have inlined callee, all called methods inside callee
+                                          are now called once more */
+                                       for (centry = callee_env->call_head; centry != NULL; centry = centry->next) {
+                                               inline_irg_env *penv = get_irg_link(centry->callee);
+                                               ++penv->n_callers;
+                                       }
+
+                                       /* remove this call from the list */
+                                       if (tail != NULL)
+                                               tail->next = entry->next;
+                                       else
+                                               env->call_head = entry->next;
+                                       continue;
+                               }
+                       }
+                       tail = entry;
+               }
+               env->call_tail = tail;
+       }
+
+       for (i = 0; i < n_irgs; ++i) {
+               irg = get_irp_irg(i);
+               env = (inline_irg_env *)get_irg_link(irg);
+
+               if (env->got_inline) {
+                       /* this irg got calls inlined */
+                       set_irg_outs_inconsistent(irg);
+                       set_irg_doms_inconsistent(irg);
+                       set_irg_loopinfo_inconsistent(irg);
+
+                       optimize_graph_df(irg);
+                       optimize_cf(irg);
+               }
+               if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
+                       DB((dbg, SET_LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
+                       env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
+                       env->n_callers_orig, env->n_callers,
+                       get_entity_name(get_irg_entity(irg))));
+               }
+       }
+
+       /* kill the copied graphs: we don't need them anymore */
+       foreach_pmap(copied_graphs, pm_entry) {
+               ir_graph *copy = pm_entry->value;
+
+               /* reset the entity, otherwise it will be deleted in the next step ... */
+               set_irg_entity(copy, NULL);
+               free_ir_graph(copy);
+       }
+       pmap_destroy(copied_graphs);
+
+       obstack_free(&temp_obst, NULL);
+       current_ir_graph = rem;
+}
+
+/**
+ * Calculate the parameter weights for transmitting the address of a local variable.
+ */
+static unsigned calc_method_local_weight(ir_node *arg) {
+       int      i, j, k;
+       unsigned v, weight = 0;
+
+       for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
+               ir_node *succ = get_irn_out(arg, i);
+
+               switch (get_irn_opcode(succ)) {
+               case iro_Load:
+               case iro_Store:
+                       /* Loads and Store can be removed */
+                       weight += 3;
+                       break;
+               case iro_Sel:
+                       /* check if all args are constant */
+                       for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
+                               ir_node *idx = get_Sel_index(succ, j);
+                               if (! is_Const(idx))
+                                       return 0;
+                       }
+                       /* Check users on this Sel. Note: if a 0 is returned here, there was
+                          some unsupported node. */
+                       v = calc_method_local_weight(succ);
+                       if (v == 0)
+                               return 0;
+                       /* we can kill one Sel with constant indexes, this is cheap */
+                       weight += v + 1;
+                       break;
+               case iro_Id:
+                       /* when looking backward we might find Id nodes */
+                       weight += calc_method_local_weight(succ);
+                       break;
+               case iro_Tuple:
+                       /* unoptimized tuple */
+                       for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
+                               ir_node *pred = get_Tuple_pred(succ, j);
+                               if (pred == arg) {
+                                       /* look for Proj(j) */
+                                       for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
+                                               ir_node *succ_succ = get_irn_out(succ, k);
+                                               if (is_Proj(succ_succ)) {
+                                                       if (get_Proj_proj(succ_succ) == j) {
+                                                               /* found */
+                                                               weight += calc_method_local_weight(succ_succ);
+                                                       }
+                                               } else {
+                                                       /* this should NOT happen */
+                                                       return 0;
+                                               }
+                                       }
+                               }
+                       }
+               default:
+                       /* any other node: unsupported yet or bad. */
+                       return 0;
+               }
+       }
+       return weight;
+}
+
+/**
+ * Calculate the parameter weights for transmitting the address of a local variable.
+ */
+static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg) {
+       ir_entity *ent = get_irg_entity(irg);
+       ir_type  *mtp;
+       int      nparams, i, proj_nr;
+       ir_node  *irg_args, *arg;
+
+       mtp      = get_entity_type(ent);
+       nparams  = get_method_n_params(mtp);
+
+       /* allocate a new array. currently used as 'analysed' flag */
+       env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
+
+       /* If the method haven't parameters we have nothing to do. */
+       if (nparams <= 0)
+               return;
+
+       assure_irg_outs(irg);
+       irg_args = get_irg_args(irg);
+       for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
+               arg     = get_irn_out(irg_args, i);
+               proj_nr = get_Proj_proj(arg);
+               env->local_weights[proj_nr] = calc_method_local_weight(arg);
+       }
+}
+
+/**
+ * Calculate the benefice for transmitting an local variable address.
+ * After inlining, the local variable might be transformed into a
+ * SSA variable by scalar_replacement().
+ */
+static unsigned get_method_local_adress_weight(ir_graph *callee, int pos) {
+       inline_irg_env *env = get_irg_link(callee);
+
+       if (env->local_weights != NULL) {
+               if (pos < ARR_LEN(env->local_weights))
+                       return env->local_weights[pos];
+               return 0;
+       }
+
+       analyze_irg_local_weights(env, callee);
+
+       if (pos < ARR_LEN(env->local_weights))
+               return env->local_weights[pos];
+       return 0;
+}
+
+/**
+ * calculate a benefice value for inlining the given call.
+ */
+static int calc_inline_benefice(ir_node *call, ir_graph *callee, unsigned *local_adr) {
+       ir_entity *ent = get_irg_entity(callee);
+       ir_node   *frame_ptr;
+       ir_type   *mtp;
+       int       weight = 0;
+       int       i, n_params;
+       unsigned  cc, v;
+
+       inline_irg_env *curr_env, *callee_env;
+
+       if (get_entity_additional_properties(ent) & mtp_property_noreturn) {
+               /* do NOT inline noreturn calls */
+               return INT_MIN;
+       }
+
+       /* costs for every passed parameter */
+       n_params = get_Call_n_params(call);
+       mtp      = get_entity_type(ent);
+       cc       = get_method_calling_convention(mtp);
+       if (cc & cc_reg_param) {
+               /* register parameter, smaller costs for register parameters */
+               int max_regs = cc & ~cc_bits;
+
+               if (max_regs < n_params)
+                       weight += max_regs * 2 + (n_params - max_regs) * 5;
+               else
+                       weight += n_params * 2;
+       } else {
+               /* parameters are passed an stack */
+               weight += 5 * n_params;
+       }
+
+       /* constant parameters improve the benefice */
+       frame_ptr = get_irg_frame(current_ir_graph);
+       for (i = 0; i < n_params; ++i) {
+               ir_node *param = get_Call_param(call, i);
+
+               if (is_Const(param) || is_SymConst(param))
+                       weight += get_method_param_weight(ent, i);
+               else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
+                       /*
+                        * An address of a local variable is transmitted. After inlining,
+                        * scalar_replacement might be able to remove the local variable,
+                        * so honor this.
+                        */
+                       v = get_method_local_adress_weight(callee, i);
+                       weight += v;
+                       if (v > 0)
+                               *local_adr = 1;
+               }
+       }
+
+       callee_env = get_irg_link(callee);
+       if (get_entity_visibility(ent) == visibility_local &&
+           callee_env->n_callers_orig == 1 &&
+           callee != current_ir_graph) {
+               /* we are the only caller, give big bonus */
+               weight += 5000;
+       }
+
+       /* do not inline big functions */
+       weight -= callee_env->n_nodes;
+
+       /* reduce the benefice if the current function is already big */
+       curr_env = get_irg_link(current_ir_graph);
+       weight -= curr_env->n_nodes / 100;
+
+       /* give a bonus for functions with one block */
+       if (callee_env->n_blocks == 1)
+               weight = weight * 3 / 2;
+
+       return weight;
+}
+
+/**
+ * Heuristic inliner. Calculates a benifice value for every call and inlines
+ * those calls with a value higher than the threshold.
+ */
+void inline_functions(int inline_threshold) {
+       inline_irg_env   *env;
+       ir_graph         *irg;
+       int              i, n_irgs;
+       ir_graph         *rem;
+       int              did_inline;
+       wenv_t           wenv;
+       call_entry       *entry, *tail;
+       const call_entry *centry;
+       pmap             *copied_graphs;
+       pmap_entry       *pm_entry;
+
+       rem = current_ir_graph;
+       obstack_init(&temp_obst);
+
+       /* a map for the copied graphs, used to inline recursive calls */
+       copied_graphs = pmap_create();
+
+       /* extend all irgs by a temporary data structure for inlining. */
+       n_irgs = get_irp_n_irgs();
+       for (i = 0; i < n_irgs; ++i)
+               set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
+
+       /* Precompute information in temporary data structure. */
+       wenv.ignore_runtime = 0;
+       wenv.ignore_callers = 0;
+       for (i = 0; i < n_irgs; ++i) {
+               ir_graph *irg = get_irp_irg(i);
+
+               assert(get_irg_phase_state(irg) != phase_building);
+               free_callee_info(irg);
+
+               wenv.x = get_irg_link(irg);
+               irg_walk_graph(irg, NULL, collect_calls2, &wenv);
+       }
+
+       /* -- and now inline. -- */
+       for (i = 0; i < n_irgs; ++i) {
+               ir_node *call;
+               int phiproj_computed = 0;
+
+               current_ir_graph = get_irp_irg(i);
+               env = get_irg_link(current_ir_graph);
+
+               /* note that the list of possible calls is updated during the process */
+               tail = NULL;
+               for (entry = env->call_head; entry != NULL; entry = entry->next) {
+                       ir_graph   *callee;
+                       pmap_entry *e;
+                       int        benefice;
+                       unsigned   local_adr;
+
+                       call   = entry->call;
+                       callee = entry->callee;
+
+                       /* calculate the benifice on the original call to prevent excessive inlining */
+                       local_adr = 0;
+                       benefice = calc_inline_benefice(call, callee, &local_adr);
+                       DB((dbg, SET_LEVEL_2, "In %+F Call %+F has benefice %d\n", current_ir_graph, callee, benefice));
+
+                       e = pmap_find(copied_graphs, callee);
+                       if (e != NULL) {
+                               /*
+                                * Remap callee if we have a copy.
+                                * FIXME: Should we do this only for recursive Calls ?
+                                */
+                               callee = e->value;
+                       }
+
+                       if (benefice > -inline_threshold ||
+                               (get_irg_inline_property(callee) >= irg_inline_forced)) {
+                               if (current_ir_graph == callee) {
+                                       /*
+                                        * Recursive call: we cannot directly inline because we cannot walk
+                                        * the graph and change it. So we have to make a copy of the graph
+                                        * first.
+                                        */
+
+                                       inline_irg_env *callee_env;
+                                       ir_graph       *copy;
+
+                                       /*
+                                        * No copy yet, create one.
+                                        * Note that recursive methods are never leaves, so it is sufficient
+                                        * to test this condition here.
+                                        */
+                                       copy = create_irg_copy(callee);
+
+                                       /* create_irg_copy() destroys the Proj links, recompute them */
+                                       phiproj_computed = 0;
+
+                                       /* allocate new environment */
+                                       callee_env = alloc_inline_irg_env();
                                        set_irg_link(copy, callee_env);
 
                                        wenv.x              = callee_env;
@@ -1561,8 +1957,10 @@ void inline_leave_functions(int maxsize, int leavesize, int size, int ignore_run
 
                                        /* callee was inline. Append it's call list. */
                                        env->got_inline = 1;
+                                       if (local_adr)
+                                               env->local_vars = 1;
                                        --env->n_call_nodes;
-                                       append_call_list(&obst, env, callee_env->call_head);
+                                       append_call_list(env, callee_env->call_head);
                                        env->n_call_nodes += callee_env->n_call_nodes;
                                        env->n_nodes += callee_env->n_nodes;
                                        --callee_env->n_callers;
@@ -1595,7 +1993,10 @@ void inline_leave_functions(int maxsize, int leavesize, int size, int ignore_run
                        /* this irg got calls inlined */
                        set_irg_outs_inconsistent(irg);
                        set_irg_doms_inconsistent(irg);
+                       set_irg_loopinfo_inconsistent(irg);
 
+                       if (env->local_vars)
+                               scalar_replacement_opt(irg);
                        optimize_graph_df(irg);
                        optimize_cf(irg);
                }
@@ -1617,6 +2018,10 @@ void inline_leave_functions(int maxsize, int leavesize, int size, int ignore_run
        }
        pmap_destroy(copied_graphs);
 
-       obstack_free(&obst, NULL);
+       obstack_free(&temp_obst, NULL);
        current_ir_graph = rem;
 }
+
+void firm_init_inline(void) {
+       FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
+}