#include "irouts.h"
#include "irloop_t.h"
#include "irbackedge_t.h"
+#include "opt_inline_t.h"
#include "cgana.h"
#include "trouts.h"
#include "error.h"
#include "irhooks.h"
#include "irtools.h"
+DEBUG_ONLY(static firm_dbg_module_t *dbg;)
/*------------------------------------------------------------------*/
/* Routines for dead node elimination / copying garbage collection */
is_Start(get_Proj_pred(node)) &&
get_Proj_proj(node) == pn_Start_P_value_arg_base) {
*allow_inline = 0;
+ } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
+ /* From GCC:
+ * Refuse to inline alloca call unless user explicitly forced so as this
+ * may change program's memory overhead drastically when the function
+ * using alloca is called in loop. In GCC present in SPEC2000 inlining
+ * into schedule_block cause it to require 2GB of ram instead of 256MB.
+ *
+ * Sorryly this is true with our implementation also.
+ * Moreover, we cannot differentiate between alloca() and VLA yet, so this
+ * disables inlining of functions using VLA (with are completely save).
+ *
+ * 2 Solutions:
+ * - add a flag to the Alloc node for "real" alloca() calls
+ * - add a new Stack-Restore node at the end of a function using alloca()
+ */
+ *allow_inline = 0;
}
}
/* Inlines a method at the given call site. */
int inline_method(ir_node *call, ir_graph *called_graph) {
- ir_node *pre_call;
- ir_node *post_call, *post_bl;
- ir_node *in[pn_Start_max];
- ir_node *end, *end_bl;
- ir_node **res_pred;
- ir_node **cf_pred;
- ir_node *ret, *phi;
- int arity, n_ret, n_exc, n_res, i, n, j, rem_opt, irn_arity;
- enum exc_mode exc_handling;
- ir_type *called_frame, *curr_frame;
+ ir_node *pre_call;
+ ir_node *post_call, *post_bl;
+ ir_node *in[pn_Start_max];
+ ir_node *end, *end_bl;
+ ir_node **res_pred;
+ ir_node **cf_pred;
+ ir_node *ret, *phi;
+ int arity, n_ret, n_exc, n_res, i, n, j, rem_opt, irn_arity;
+ enum exc_mode exc_handling;
+ ir_type *called_frame, *curr_frame;
+ ir_entity *ent;
+ ir_graph *rem, *irg;
irg_inline_property prop = get_irg_inline_property(called_graph);
- ir_entity *ent;
if (prop == irg_inline_forbidden)
return 0;
ent = get_irg_entity(called_graph);
/* Do not inline variadic functions. */
- if (get_method_variadicity(get_entity_type(ent)) == variadicity_variadic)
- return 0;
+ if (get_method_variadicity(get_entity_type(ent)) == variadicity_variadic) {
+ /* Arg, KR functions are marked as variadic one's, so check further */
+ ir_type *mtp = get_entity_type(ent);
+ ir_type *ctp = get_Call_type(call);
+ int n_params = get_method_n_params(mtp);
+ int i;
+
+ /* This is too strong, but probably ok. Function calls with a wrong number of
+ parameters should not be inlined. */
+ if (n_params != get_method_n_params(ctp))
+ return 0;
+
+ /* check types: for K&R calls, this was not done by the compiler. Again, this is
+ too strong, but ok for now. */
+ for (i = n_params - 1; i >= 0; --i) {
+ ir_type *param_tp = get_method_param_type(mtp, i);
+ ir_type *arg_tp = get_method_param_type(ctp, i);
+
+ if (param_tp != arg_tp)
+ return 0;
+ }
+ DB((dbg, LEVEL_1, "Inlining allowed for variadic function %+F\n", called_graph));
+ /* types match, fine: when the frame is access, the inliner stops at can_inline() */
+ }
assert(get_method_n_params(get_entity_type(ent)) ==
get_method_n_params(get_Call_type(call)));
+ irg = get_irn_irg(call);
+
/*
* We cannot inline a recursive call. The graph must be copied before
* the call the inline_method() using create_irg_copy().
*/
- if (called_graph == current_ir_graph)
+ if (called_graph == irg)
return 0;
/*
if (! can_inline(call, called_graph))
return 0;
+ rem = current_ir_graph;
+ current_ir_graph = irg;
+
+ DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
+
/* -- Turn off optimizations, this can cause problems when allocating new nodes. -- */
rem_opt = get_opt_optimize();
set_optimize(0);
/* Handle graph state */
- assert(get_irg_phase_state(current_ir_graph) != phase_building);
- assert(get_irg_pinned(current_ir_graph) == op_pin_state_pinned);
+ assert(get_irg_phase_state(irg) != phase_building);
+ assert(get_irg_pinned(irg) == op_pin_state_pinned);
assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
- set_irg_outs_inconsistent(current_ir_graph);
- set_irg_extblk_inconsistent(current_ir_graph);
- set_irg_doms_inconsistent(current_ir_graph);
- set_irg_loopinfo_inconsistent(current_ir_graph);
- set_irg_callee_info_state(current_ir_graph, irg_callee_info_inconsistent);
+ set_irg_outs_inconsistent(irg);
+ set_irg_extblk_inconsistent(irg);
+ set_irg_doms_inconsistent(irg);
+ set_irg_loopinfo_inconsistent(irg);
+ set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
/* -- Check preconditions -- */
assert(is_Call(call));
Post_call is the old Call node and collects the results of the called
graph. Both will end up being a tuple. -- */
post_bl = get_nodes_block(call);
- set_irg_current_block(current_ir_graph, post_bl);
+ set_irg_current_block(irg, post_bl);
/* XxMxPxPxPxT of Start + parameter of Call */
in[pn_Start_X_initial_exec] = new_Jmp();
in[pn_Start_M] = get_Call_mem(call);
- in[pn_Start_P_frame_base] = get_irg_frame(current_ir_graph);
- in[pn_Start_P_globals] = get_irg_globals(current_ir_graph);
- in[pn_Start_P_tls] = get_irg_tls(current_ir_graph);
+ in[pn_Start_P_frame_base] = get_irg_frame(irg);
+ in[pn_Start_P_tls] = get_irg_tls(irg);
in[pn_Start_T_args] = new_Tuple(get_Call_n_params(call), get_Call_param_arr(call));
/* in[pn_Start_P_value_arg_base] = ??? */
assert(pn_Start_P_value_arg_base == pn_Start_max - 1 && "pn_Start_P_value_arg_base not supported, fix");
/* -- Prepare state for dead node elimination -- */
/* Visited flags in calling irg must be >= flag in called irg.
Else walker and arity computation will not work. */
- if (get_irg_visited(current_ir_graph) <= get_irg_visited(called_graph))
- set_irg_visited(current_ir_graph, get_irg_visited(called_graph)+1);
- if (get_irg_block_visited(current_ir_graph)< get_irg_block_visited(called_graph))
- set_irg_block_visited(current_ir_graph, get_irg_block_visited(called_graph));
+ if (get_irg_visited(irg) <= get_irg_visited(called_graph))
+ set_irg_visited(irg, get_irg_visited(called_graph)+1);
+ if (get_irg_block_visited(irg) < get_irg_block_visited(called_graph))
+ set_irg_block_visited(irg, get_irg_block_visited(called_graph));
/* Set pre_call as new Start node in link field of the start node of
calling graph and pre_calls block as new block for the start block
of calling graph.
Further mark these nodes so that they are not visited by the
copying. */
set_irn_link(get_irg_start(called_graph), pre_call);
- set_irn_visited(get_irg_start(called_graph), get_irg_visited(current_ir_graph));
+ set_irn_visited(get_irg_start(called_graph), get_irg_visited(irg));
set_irn_link(get_irg_start_block(called_graph), get_nodes_block(pre_call));
- set_irn_visited(get_irg_start_block(called_graph), get_irg_visited(current_ir_graph));
- set_irn_link(get_irg_bad(called_graph), get_irg_bad(current_ir_graph));
- set_irn_visited(get_irg_bad(called_graph), get_irg_visited(current_ir_graph));
+ set_irn_visited(get_irg_start_block(called_graph), get_irg_visited(irg));
+ set_irn_link(get_irg_bad(called_graph), get_irg_bad(irg));
+ set_irn_visited(get_irg_bad(called_graph), get_irg_visited(irg));
/* Initialize for compaction of in arrays */
- inc_irg_block_visited(current_ir_graph);
+ inc_irg_block_visited(irg);
/* -- Replicate local entities of the called_graph -- */
/* copy the entities. */
called_frame = get_irg_frame_type(called_graph);
- curr_frame = get_irg_frame_type(current_ir_graph);
+ curr_frame = get_irg_frame_type(irg);
for (i = 0, n = get_class_n_members(called_frame); i < n; ++i) {
ir_entity *new_ent, *old_ent;
old_ent = get_class_member(called_frame, i);
/* visited is > than that of called graph. With this trick visited will
remain unchanged so that an outer walker, e.g., searching the call nodes
to inline, calling this inline will not visit the inlined nodes. */
- set_irg_visited(current_ir_graph, get_irg_visited(current_ir_graph)-1);
+ set_irg_visited(irg, get_irg_visited(irg)-1);
/* -- Performing dead node elimination inlines the graph -- */
/* Copies the nodes to the obstack of current_ir_graph. Updates links to new
get_irg_frame_type(called_graph));
/* Repair called_graph */
- set_irg_visited(called_graph, get_irg_visited(current_ir_graph));
- set_irg_block_visited(called_graph, get_irg_block_visited(current_ir_graph));
+ set_irg_visited(called_graph, get_irg_visited(irg));
+ set_irg_block_visited(called_graph, get_irg_block_visited(irg));
set_Block_block_visited(get_irg_start_block(called_graph), 0);
/* -- Merge the end of the inlined procedure with the call site -- */
res_pred = xmalloc(n_res * sizeof(*res_pred));
cf_pred = xmalloc(arity * sizeof(*res_pred));
- set_irg_current_block(current_ir_graph, post_bl); /* just to make sure */
+ set_irg_current_block(irg, post_bl); /* just to make sure */
/* -- archive keepalives -- */
irn_arity = get_irn_arity(end);
for (i = 0; i < irn_arity; i++) {
ir_node *ka = get_End_keepalive(end, i);
if (! is_Bad(ka))
- add_End_keepalive(get_irg_end(current_ir_graph), ka);
+ add_End_keepalive(get_irg_end(irg), ka);
}
/* The new end node will die. We need not free as the in array is on the obstack:
ir_node *ret;
ret = get_irn_n(end_bl, i);
if (is_Return(ret)) {
- cf_pred[n_ret] = new_r_Jmp(current_ir_graph, get_nodes_block(ret));
+ cf_pred[n_ret] = new_r_Jmp(irg, get_nodes_block(ret));
n_ret++;
}
}
ir_node *ret;
ret = skip_Proj(get_irn_n(end_bl, i));
if (is_Call(ret)) {
- cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_block(ret), ret, mode_M, 3);
+ cf_pred[n_exc] = new_r_Proj(irg, get_nodes_block(ret), ret, mode_M, 3);
n_exc++;
} else if (is_fragile_op(ret)) {
/* We rely that all cfops have the memory output at the same position. */
- cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_block(ret), ret, mode_M, 0);
+ cf_pred[n_exc] = new_r_Proj(irg, get_nodes_block(ret), ret, mode_M, 0);
n_exc++;
} else if (is_Raise(ret)) {
- cf_pred[n_exc] = new_r_Proj(current_ir_graph, get_nodes_block(ret), ret, mode_M, 1);
+ cf_pred[n_exc] = new_r_Proj(irg, get_nodes_block(ret), ret, mode_M, 1);
n_exc++;
}
}
n_exc++;
}
}
- main_end_bl = get_irg_end_block(current_ir_graph);
+ main_end_bl = get_irg_end_block(irg);
main_end_bl_arity = get_irn_arity(main_end_bl);
end_preds = xmalloc((n_exc + main_end_bl_arity) * sizeof(*end_preds));
/* -- Turn CSE back on. -- */
set_optimize(rem_opt);
+ current_ir_graph = rem;
return 1;
}
* size are inlined.
*/
void inline_small_irgs(ir_graph *irg, int size) {
- ir_graph *rem = current_ir_graph;
+ ir_graph *rem = current_ir_graph;
inline_env_t env;
call_entry *entry;
- DEBUG_ONLY(firm_dbg_module_t *dbg;)
-
- FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
current_ir_graph = irg;
/* Handle graph state */
int n_call_nodes_orig; /**< for statistics */
int n_callers; /**< Number of known graphs that call this graphs. */
int n_callers_orig; /**< for statistics */
- int got_inline; /**< Set, if at least one call inside this graph was inlined. */
+ unsigned got_inline:1; /**< Set, if at least one call inside this graph was inlined. */
+ unsigned local_vars:1; /**< Set, if a inlined function gets the address of an inlined variable. */
unsigned *local_weights; /**< Once allocated, the beneficial weight for transmitting local addresses. */
} inline_irg_env;
env->n_callers = 0;
env->n_callers_orig = 0;
env->got_inline = 0;
+ env->local_vars = 0;
env->local_weights = NULL;
return env;
}
const call_entry *centry;
pmap *copied_graphs;
pmap_entry *pm_entry;
- DEBUG_ONLY(firm_dbg_module_t *dbg;)
- FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
rem = current_ir_graph;
obstack_init(&temp_obst);
env = (inline_irg_env *)get_irg_link(irg);
if (env->got_inline) {
- /* this irg got calls inlined */
- set_irg_outs_inconsistent(irg);
- set_irg_doms_inconsistent(irg);
-
optimize_graph_df(irg);
optimize_cf(irg);
}
if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
- DB((dbg, SET_LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
+ DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
env->n_callers_orig, env->n_callers,
get_entity_name(get_irg_entity(irg))));
* Calculate the parameter weights for transmitting the address of a local variable.
*/
static unsigned calc_method_local_weight(ir_node *arg) {
- int i, j;
+ int i, j, k;
unsigned v, weight = 0;
for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
/* we can kill one Sel with constant indexes, this is cheap */
weight += v + 1;
break;
+ case iro_Id:
+ /* when looking backward we might find Id nodes */
+ weight += calc_method_local_weight(succ);
+ break;
+ case iro_Tuple:
+ /* unoptimized tuple */
+ for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
+ ir_node *pred = get_Tuple_pred(succ, j);
+ if (pred == arg) {
+ /* look for Proj(j) */
+ for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
+ ir_node *succ_succ = get_irn_out(succ, k);
+ if (is_Proj(succ_succ)) {
+ if (get_Proj_proj(succ_succ) == j) {
+ /* found */
+ weight += calc_method_local_weight(succ_succ);
+ }
+ } else {
+ /* this should NOT happen */
+ return 0;
+ }
+ }
+ }
+ }
default:
/* any other node: unsupported yet or bad. */
return 0;
/**
* calculate a benefice value for inlining the given call.
*/
-static int calc_inline_benefice(ir_node *call, ir_graph *callee) {
+static int calc_inline_benefice(ir_node *call, ir_graph *callee, unsigned *local_adr) {
ir_entity *ent = get_irg_entity(callee);
ir_node *frame_ptr;
ir_type *mtp;
int weight = 0;
int i, n_params;
- unsigned cc;
+ unsigned cc, v;
inline_irg_env *curr_env, *callee_env;
* scalar_replacement might be able to remove the local variable,
* so honor this.
*/
- weight += get_method_local_adress_weight(callee, i);
+ v = get_method_local_adress_weight(callee, i);
+ weight += v;
+ if (v > 0)
+ *local_adr = 1;
}
}
*/
void inline_functions(int inline_threshold) {
inline_irg_env *env;
- ir_graph *irg;
int i, n_irgs;
ir_graph *rem;
int did_inline;
const call_entry *centry;
pmap *copied_graphs;
pmap_entry *pm_entry;
- DEBUG_ONLY(firm_dbg_module_t *dbg;)
- FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
rem = current_ir_graph;
obstack_init(&temp_obst);
/* -- and now inline. -- */
for (i = 0; i < n_irgs; ++i) {
- ir_node *call;
- int phiproj_computed = 0;
+ int phiproj_computed = 0;
+ ir_node *call;
+ ir_graph *irg = get_irp_irg(i);
- current_ir_graph = get_irp_irg(i);
- env = get_irg_link(current_ir_graph);
+ current_ir_graph = irg;
+ env = get_irg_link(irg);
/* note that the list of possible calls is updated during the process */
tail = NULL;
ir_graph *callee;
pmap_entry *e;
int benefice;
+ unsigned local_adr;
call = entry->call;
callee = entry->callee;
/* calculate the benifice on the original call to prevent excessive inlining */
- benefice = calc_inline_benefice(call, callee);
- DB((dbg, SET_LEVEL_2, "In %+F Call %+F has benefice %d\n", current_ir_graph, callee, benefice));
+ local_adr = 0;
+ benefice = calc_inline_benefice(call, callee, &local_adr);
+ DB((dbg, LEVEL_2, "In %+F Call %+F has benefice %d\n", irg, callee, benefice));
e = pmap_find(copied_graphs, callee);
if (e != NULL) {
/* callee was inline. Append it's call list. */
env->got_inline = 1;
+ if (local_adr)
+ env->local_vars = 1;
--env->n_call_nodes;
append_call_list(env, callee_env->call_head);
env->n_call_nodes += callee_env->n_call_nodes;
tail = entry;
}
env->call_tail = tail;
- }
-
- for (i = 0; i < n_irgs; ++i) {
- irg = get_irp_irg(i);
- env = (inline_irg_env *)get_irg_link(irg);
if (env->got_inline) {
- /* this irg got calls inlined */
- set_irg_outs_inconsistent(irg);
- set_irg_doms_inconsistent(irg);
+ /* this irg got calls inlined: optimize it */
+ /* scalar replacement does not work well with Tuple nodes, so optimize them away */
optimize_graph_df(irg);
+
+ if (env->local_vars) {
+ if (scalar_replacement_opt(irg)) {
+ optimize_graph_df(irg);
+ }
+ }
+
optimize_cf(irg);
}
if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
- DB((dbg, SET_LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
+ DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
env->n_callers_orig, env->n_callers,
get_entity_name(get_irg_entity(irg))));
obstack_free(&temp_obst, NULL);
current_ir_graph = rem;
}
+
+void firm_init_inline(void) {
+ FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
+}