2 * This file is part of libFirm.
3 * Copyright (C) 2012 University of Karlsruhe.
8 * @brief Dead node elimination and Procedure Inlining.
9 * @author Michael Beck, Goetz Lindenmaier
18 #include "irgraph_t.h"
21 #include "iroptimize.h"
38 #include "irbackedge_t.h"
44 #include "analyze_irg_args.h"
45 #include "iredges_t.h"
49 #include "iropt_dbg.h"
51 #include "irnodemap.h"
53 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
55 /*------------------------------------------------------------------*/
56 /* Routines for dead node elimination / copying garbage collection */
58 /*------------------------------------------------------------------*/
61 * Remember the new node in the old node by using a field all nodes have.
63 static void set_new_node(ir_node *node, ir_node *new_node)
65 set_irn_link(node, new_node);
69 * Get this new node, before the old node is forgotten.
71 static inline ir_node *get_new_node(ir_node *old_node)
73 assert(irn_visited(old_node));
74 return (ir_node*) get_irn_link(old_node);
77 /*--------------------------------------------------------------------*/
78 /* Functionality for inlining */
79 /*--------------------------------------------------------------------*/
82 * Copy node for inlineing. Updates attributes that change when
83 * inlineing but not for dead node elimination.
85 * Copies the node by calling copy_node() and then updates the entity if
86 * it's a local one. env must be a pointer of the frame type of the
87 * inlined procedure. The new entities must be in the link field of
90 static void copy_node_inline(ir_node *node, void *env)
92 ir_graph *new_irg = (ir_graph*) env;
93 ir_node *new_node = irn_copy_into_irg(node, new_irg);
95 set_new_node(node, new_node);
97 ir_graph *old_irg = get_irn_irg(node);
98 ir_type *old_frame_type = get_irg_frame_type(old_irg);
99 ir_entity *old_entity = get_Sel_entity(node);
100 assert(is_Sel(new_node));
101 /* use copied entities from the new frame */
102 if (get_entity_owner(old_entity) == old_frame_type) {
103 ir_entity *new_entity = (ir_entity*)get_entity_link(old_entity);
104 assert(new_entity != NULL);
105 set_Sel_entity(new_node, new_entity);
107 } else if (is_Block(new_node)) {
108 new_node->attr.block.irg.irg = new_irg;
112 static void set_preds_inline(ir_node *node, void *env)
116 irn_rewire_inputs(node);
118 /* move constants into start block */
119 new_node = get_new_node(node);
120 if (is_irn_start_block_placed(new_node)) {
121 ir_graph *new_irg = (ir_graph *) env;
122 ir_node *start_block = get_irg_start_block(new_irg);
123 set_nodes_block(new_node, start_block);
128 * Walker: checks if P_value_arg_base is used.
130 static void find_addr(ir_node *node, void *env)
132 bool *allow_inline = (bool*)env;
134 if (is_Block(node) && get_Block_entity(node)) {
136 * Currently we can't handle blocks whose address was taken correctly
139 *allow_inline = false;
140 } else if (is_Sel(node)) {
141 ir_graph *irg = current_ir_graph;
142 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
143 /* access to frame */
144 ir_entity *ent = get_Sel_entity(node);
145 if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
146 /* access to value_type */
147 *allow_inline = false;
149 if (is_parameter_entity(ent)) {
150 *allow_inline = false;
153 } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
155 * Refuse to inline alloca call unless user explicitly forced so as this
156 * may change program's memory overhead drastically when the function
157 * using alloca is called in loop. In GCC present in SPEC2000 inlining
158 * into schedule_block cause it to require 2GB of ram instead of 256MB.
160 * Sorrily this is true with our implementation also.
161 * Moreover, we cannot differentiate between alloca() and VLA yet, so
162 * this disables inlining of functions using VLA (which are completely
166 * - add a flag to the Alloc node for "real" alloca() calls
167 * - add a new Stack-Restore node at the end of a function using
170 *allow_inline = false;
175 * Check if we can inline a given call.
176 * Currently, we cannot inline two cases:
177 * - call with compound arguments
178 * - graphs that take the address of a parameter
180 * check these conditions here
182 static bool can_inline(ir_node *call, ir_graph *called_graph)
184 ir_entity *called = get_irg_entity(called_graph);
185 ir_type *called_type = get_entity_type(called);
186 ir_type *call_type = get_Call_type(call);
187 size_t n_params = get_method_n_params(called_type);
188 size_t n_arguments = get_method_n_params(call_type);
189 size_t n_res = get_method_n_ress(called_type);
190 mtp_additional_properties props = get_entity_additional_properties(called);
194 if (props & mtp_property_noinline)
197 if (n_arguments != n_params) {
198 /* this is a bad feature of C: without a prototype, we can
199 * call a function with less parameters than needed. Currently
200 * we don't support this, although we could use Unknown than. */
203 if (n_res != get_method_n_ress(call_type)) {
207 /* Argh, compiling C has some bad consequences:
208 * It is implementation dependent what happens in that case.
209 * We support inlining, if the bitsize of the types matches AND
210 * the same arithmetic is used. */
211 for (i = 0; i < n_params; ++i) {
212 ir_type *param_tp = get_method_param_type(called_type, i);
213 ir_type *arg_tp = get_method_param_type(call_type, i);
215 if (param_tp != arg_tp) {
216 ir_mode *pmode = get_type_mode(param_tp);
217 ir_mode *amode = get_type_mode(arg_tp);
219 if (pmode == NULL || amode == NULL)
221 if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
223 if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
225 /* otherwise we can simply "reinterpret" the bits */
228 for (i = 0; i < n_res; ++i) {
229 ir_type *decl_res_tp = get_method_res_type(called_type, i);
230 ir_type *used_res_tp = get_method_res_type(call_type, i);
232 if (decl_res_tp != used_res_tp) {
233 ir_mode *decl_mode = get_type_mode(decl_res_tp);
234 ir_mode *used_mode = get_type_mode(used_res_tp);
235 if (decl_mode == NULL || used_mode == NULL)
237 if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
239 if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
241 /* otherwise we can "reinterpret" the bits */
245 /* check parameters for compound arguments */
246 for (i = 0; i < n_params; ++i) {
247 ir_type *p_type = get_method_param_type(call_type, i);
249 if (is_compound_type(p_type) || is_Array_type(p_type))
253 /* check results for compound arguments */
254 for (i = 0; i < n_res; ++i) {
255 ir_type *r_type = get_method_res_type(call_type, i);
257 if (is_compound_type(r_type) || is_Array_type(r_type))
262 irg_walk_graph(called_graph, find_addr, NULL, &res);
268 exc_handler, /**< There is a handler. */
269 exc_no_handler /**< Exception handling not represented. */
273 * copy all entities on the stack frame on 1 irg to the stackframe of another.
274 * Sets entity links of the old entities to the copies
276 static void copy_frame_entities(ir_graph *from, ir_graph *to)
278 ir_type *from_frame = get_irg_frame_type(from);
279 ir_type *to_frame = get_irg_frame_type(to);
280 size_t n_members = get_class_n_members(from_frame);
282 assert(from_frame != to_frame);
284 for (i = 0; i < n_members; ++i) {
285 ir_entity *old_ent = get_class_member(from_frame, i);
286 ir_entity *new_ent = copy_entity_own(old_ent, to_frame);
287 set_entity_link(old_ent, new_ent);
288 assert (!is_parameter_entity(old_ent));
292 /* Inlines a method at the given call site. */
293 int inline_method(ir_node *const call, ir_graph *called_graph)
295 /* we cannot inline some types of calls */
296 if (! can_inline(call, called_graph))
299 /* We cannot inline a recursive call. The graph must be copied before
300 * the call the inline_method() using create_irg_copy(). */
301 ir_graph *irg = get_irn_irg(call);
302 if (called_graph == irg)
305 ir_entity *ent = get_irg_entity(called_graph);
306 ir_type *mtp = get_entity_type(ent);
307 ir_type *ctp = get_Call_type(call);
308 int n_params = get_method_n_params(mtp);
310 ir_graph *rem = current_ir_graph;
311 current_ir_graph = irg;
313 DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
315 /* optimizations can cause problems when allocating new nodes */
316 int rem_opt = get_opt_optimize();
319 /* Handle graph state */
320 assert(get_irg_pinned(irg) == op_pin_state_pinned);
321 assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
322 clear_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE
323 | IR_GRAPH_PROPERTY_CONSISTENT_ENTITY_USAGE);
324 set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
325 clear_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_ENTITY_USAGE);
326 edges_deactivate(irg);
328 /* here we know we WILL inline, so inform the statistics */
329 hook_inline(call, called_graph);
331 /* -- Decide how to handle exception control flow: Is there a handler
332 for the Call node, or do we branch directly to End on an exception?
334 0 There is a handler.
335 2 Exception handling not represented in Firm. -- */
336 ir_node *Xproj = NULL;
337 for (ir_node *proj = (ir_node*)get_irn_link(call); proj != NULL;
338 proj = (ir_node*)get_irn_link(proj)) {
339 long proj_nr = get_Proj_proj(proj);
340 if (proj_nr == pn_Call_X_except) Xproj = proj;
342 enum exc_mode exc_handling = Xproj != NULL ? exc_handler : exc_no_handler;
344 /* create the argument tuple */
345 ir_node **args_in = ALLOCAN(ir_node*, n_params);
347 ir_node *block = get_nodes_block(call);
348 for (int i = n_params - 1; i >= 0; --i) {
349 ir_node *arg = get_Call_param(call, i);
350 ir_type *param_tp = get_method_param_type(mtp, i);
351 ir_mode *mode = get_type_mode(param_tp);
353 if (mode != get_irn_mode(arg)) {
354 arg = new_r_Conv(block, arg, mode);
359 /* the procedure and later replaces the Start node of the called graph.
360 * Post_call is the old Call node and collects the results of the called
361 * graph. Both will end up being a tuple. */
362 ir_node *post_bl = get_nodes_block(call);
363 /* XxMxPxPxPxT of Start + parameter of Call */
364 ir_node *in[pn_Start_max+1];
365 in[pn_Start_M] = get_Call_mem(call);
366 in[pn_Start_X_initial_exec] = new_r_Jmp(post_bl);
367 in[pn_Start_P_frame_base] = get_irg_frame(irg);
368 in[pn_Start_T_args] = new_r_Tuple(post_bl, n_params, args_in);
369 ir_node *pre_call = new_r_Tuple(post_bl, pn_Start_max+1, in);
372 The new block gets the ins of the old block, pre_call and all its
373 predecessors and all Phi nodes. -- */
374 part_block(pre_call);
376 /* increment visited flag for later walk */
377 inc_irg_visited(called_graph);
379 /* link some nodes to nodes in the current graph so instead of copying
380 * the linked nodes will get used.
381 * So the copier will use the created Tuple instead of copying the start
382 * node, similar for singleton nodes like NoMem and Bad.
383 * Note: this will prohibit predecessors to be copied - only do it for
384 * nodes without predecessors */
385 ir_node *start_block = get_irg_start_block(called_graph);
386 set_new_node(start_block, get_nodes_block(pre_call));
387 mark_irn_visited(start_block);
389 ir_node *start = get_irg_start(called_graph);
390 set_new_node(start, pre_call);
391 mark_irn_visited(start);
393 ir_node *nomem = get_irg_no_mem(called_graph);
394 set_new_node(nomem, get_irg_no_mem(irg));
395 mark_irn_visited(nomem);
397 /* entitiy link is used to link entities on old stackframe to the
399 irp_reserve_resources(irp, IRP_RESOURCE_ENTITY_LINK);
401 /* copy entities and nodes */
402 assert(!irn_visited(get_irg_end(called_graph)));
403 copy_frame_entities(called_graph, irg);
404 irg_walk_core(get_irg_end(called_graph), copy_node_inline, set_preds_inline,
407 irp_free_resources(irp, IRP_RESOURCE_ENTITY_LINK);
409 /* -- Merge the end of the inlined procedure with the call site -- */
410 /* We will turn the old Call node into a Tuple with the following
413 0: Phi of all Memories of Return statements.
414 1: Jmp from new Block that merges the control flow from all exception
415 predecessors of the old end block.
416 2: Tuple of all arguments.
417 3: Phi of Exception memories.
418 In case the old Call directly branches to End on an exception we don't
419 need the block merging all exceptions nor the Phi of the exception
423 /* Precompute some values */
424 ir_node *end_bl = get_new_node(get_irg_end_block(called_graph));
425 ir_node *end = get_new_node(get_irg_end(called_graph));
426 int arity = get_Block_n_cfgpreds(end_bl); /* arity = n_exc + n_ret */
427 int n_res = get_method_n_ress(get_Call_type(call));
429 ir_node **res_pred = XMALLOCN(ir_node*, n_res);
430 ir_node **cf_pred = XMALLOCN(ir_node*, arity);
432 /* archive keepalives */
433 int irn_arity = get_irn_arity(end);
434 for (int i = 0; i < irn_arity; i++) {
435 ir_node *ka = get_End_keepalive(end, i);
437 add_End_keepalive(get_irg_end(irg), ka);
440 /* replace Return nodes by Jump nodes */
442 for (int i = 0; i < arity; i++) {
443 ir_node *ret = get_Block_cfgpred(end_bl, i);
444 if (is_Return(ret)) {
445 ir_node *block = get_nodes_block(ret);
446 cf_pred[n_ret] = new_r_Jmp(block);
450 set_irn_in(post_bl, n_ret, cf_pred);
452 /* build a Tuple for all results of the method.
453 * add Phi node if there was more than one Return. */
454 /* First the Memory-Phi */
456 for (int i = 0; i < arity; i++) {
457 ir_node *ret = get_Block_cfgpred(end_bl, i);
458 if (is_Return(ret)) {
459 cf_pred[n_mem_phi++] = get_Return_mem(ret);
461 /* memory output for some exceptions is directly connected to End */
463 cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 3);
464 } else if (is_fragile_op(ret)) {
465 /* We rely that all cfops have the memory output at the same position. */
466 cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 0);
467 } else if (is_Raise(ret)) {
468 cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 1);
471 ir_node *const call_mem = new_r_Phi(post_bl, n_mem_phi, cf_pred, mode_M);
472 /* Conserve Phi-list for further inlinings -- but might be optimized */
473 if (get_nodes_block(call_mem) == post_bl) {
474 set_irn_link(call_mem, get_irn_link(post_bl));
475 set_irn_link(post_bl, call_mem);
477 /* Now the real results */
480 for (int j = 0; j < n_res; j++) {
481 ir_type *res_type = get_method_res_type(ctp, j);
482 ir_mode *res_mode = get_type_mode(res_type);
484 for (int i = 0; i < arity; i++) {
485 ir_node *ret = get_Block_cfgpred(end_bl, i);
486 if (is_Return(ret)) {
487 ir_node *res = get_Return_res(ret, j);
488 if (get_irn_mode(res) != res_mode) {
489 ir_node *block = get_nodes_block(res);
490 res = new_r_Conv(block, res, res_mode);
492 cf_pred[n_ret] = res;
496 ir_node *const phi = n_ret > 0
497 ? new_r_Phi(post_bl, n_ret, cf_pred, res_mode)
498 : new_r_Bad(irg, res_mode);
500 /* Conserve Phi-list for further inlinings -- but might be optimized */
501 if (get_nodes_block(phi) == post_bl) {
502 set_Phi_next(phi, get_Block_phis(post_bl));
503 set_Block_phis(post_bl, phi);
506 call_res = new_r_Tuple(post_bl, n_res, res_pred);
508 call_res = new_r_Bad(irg, mode_T);
510 /* handle the regular call */
511 ir_node *const call_x_reg = new_r_Jmp(post_bl);
513 /* Finally the exception control flow.
514 We have two possible situations:
515 First if the Call branches to an exception handler:
516 We need to add a Phi node to
517 collect the memory containing the exception objects. Further we need
518 to add another block to get a correct representation of this Phi. To
519 this block we add a Jmp that resolves into the X output of the Call
520 when the Call is turned into a tuple.
521 Second: There is no exception edge. Just add all inlined exception
522 branches to the End node.
525 if (exc_handling == exc_handler) {
527 for (int i = 0; i < arity; i++) {
528 ir_node *ret = get_Block_cfgpred(end_bl, i);
529 ir_node *irn = skip_Proj(ret);
530 if (is_fragile_op(irn) || is_Raise(irn)) {
531 cf_pred[n_exc] = ret;
538 call_x_exc = cf_pred[0];
540 ir_node *block = new_r_Block(irg, n_exc, cf_pred);
541 call_x_exc = new_r_Jmp(block);
544 call_x_exc = new_r_Bad(irg, mode_X);
547 /* assert(exc_handling == 1 || no exceptions. ) */
549 for (int i = 0; i < arity; i++) {
550 ir_node *ret = get_Block_cfgpred(end_bl, i);
551 ir_node *irn = skip_Proj(ret);
553 if (is_fragile_op(irn) || is_Raise(irn)) {
554 cf_pred[n_exc] = ret;
558 ir_node *main_end_bl = get_irg_end_block(irg);
559 int main_end_bl_arity = get_irn_arity(main_end_bl);
560 ir_node **end_preds = XMALLOCN(ir_node*, n_exc+main_end_bl_arity);
562 for (int i = 0; i < main_end_bl_arity; ++i)
563 end_preds[i] = get_irn_n(main_end_bl, i);
564 for (int i = 0; i < n_exc; ++i)
565 end_preds[main_end_bl_arity + i] = cf_pred[i];
566 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
567 call_x_exc = new_r_Bad(irg, mode_X);
573 ir_node *const call_in[] = {
574 [pn_Call_M] = call_mem,
575 [pn_Call_T_result] = call_res,
576 [pn_Call_X_regular] = call_x_reg,
577 [pn_Call_X_except] = call_x_exc,
579 turn_into_tuple(call, ARRAY_SIZE(call_in), call_in);
581 /* -- Turn CSE back on. -- */
582 set_optimize(rem_opt);
583 current_ir_graph = rem;
588 /********************************************************************/
589 /* Apply inlining to small methods. */
590 /********************************************************************/
592 static struct obstack temp_obst;
594 /** Represents a possible inlinable call in a graph. */
595 typedef struct call_entry {
596 ir_node *call; /**< The Call node. */
597 ir_graph *callee; /**< The callee IR-graph. */
598 list_head list; /**< List head for linking the next one. */
599 int loop_depth; /**< The loop depth of this call. */
600 int benefice; /**< The calculated benefice of this call. */
601 unsigned local_adr:1; /**< Set if this call gets an address of a local variable. */
602 unsigned all_const:1; /**< Set if this call has only constant parameters. */
606 * Returns the irg called from a Call node. If the irg is not
607 * known, NULL is returned.
609 * @param call the call node
611 static ir_graph *get_call_called_irg(ir_node *call)
615 addr = get_Call_ptr(call);
616 if (is_SymConst_addr_ent(addr)) {
617 ir_entity *ent = get_SymConst_entity(addr);
618 /* we don't know which function gets finally bound to a weak symbol */
619 if (get_entity_linkage(ent) & IR_LINKAGE_WEAK)
622 return get_entity_irg(ent);
629 * Environment for inlining irgs.
632 list_head calls; /**< List of of all call nodes in this graph. */
633 unsigned *local_weights; /**< Once allocated, the beneficial weight for transmitting local addresses. */
634 unsigned n_nodes; /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
635 unsigned n_blocks; /**< Number of Blocks in graph without Start and End block. */
636 unsigned n_nodes_orig; /**< for statistics */
637 unsigned n_call_nodes; /**< Number of Call nodes in the graph. */
638 unsigned n_call_nodes_orig; /**< for statistics */
639 unsigned n_callers; /**< Number of known graphs that call this graphs. */
640 unsigned n_callers_orig; /**< for statistics */
641 unsigned got_inline:1; /**< Set, if at least one call inside this graph was inlined. */
642 unsigned recursive:1; /**< Set, if this function is self recursive. */
646 * Allocate a new environment for inlining.
648 static inline_irg_env *alloc_inline_irg_env(void)
650 inline_irg_env *env = OALLOC(&temp_obst, inline_irg_env);
651 INIT_LIST_HEAD(&env->calls);
652 env->local_weights = NULL;
653 env->n_nodes = -2; /* do not count count Start, End */
654 env->n_blocks = -2; /* do not count count Start, End Block */
655 env->n_nodes_orig = -2; /* do not count Start, End */
656 env->n_call_nodes = 0;
657 env->n_call_nodes_orig = 0;
659 env->n_callers_orig = 0;
665 typedef struct walker_env {
666 inline_irg_env *x; /**< the inline environment */
667 char ignore_runtime; /**< the ignore runtime flag */
668 char ignore_callers; /**< if set, do change callers data */
672 * post-walker: collect all calls in the inline-environment
673 * of a graph and sum some statistics.
675 static void collect_calls2(ir_node *call, void *ctx)
677 wenv_t *env = (wenv_t*)ctx;
678 inline_irg_env *x = env->x;
679 unsigned code = get_irn_opcode(call);
683 /* count meaningful nodes in irg */
684 if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
685 if (code != iro_Block) {
693 if (code != iro_Call) return;
695 /* check, if it's a runtime call */
696 if (env->ignore_runtime) {
697 ir_node *symc = get_Call_ptr(call);
699 if (is_SymConst_addr_ent(symc)) {
700 ir_entity *ent = get_SymConst_entity(symc);
702 if (get_entity_additional_properties(ent) & mtp_property_runtime)
707 /* collect all call nodes */
709 ++x->n_call_nodes_orig;
711 callee = get_call_called_irg(call);
712 if (callee != NULL) {
713 if (! env->ignore_callers) {
714 inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
715 /* count all static callers */
716 ++callee_env->n_callers;
717 ++callee_env->n_callers_orig;
719 if (callee == current_ir_graph)
722 /* link it in the list of possible inlinable entries */
723 entry = OALLOC(&temp_obst, call_entry);
725 entry->callee = callee;
726 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
728 entry->local_adr = 0;
729 entry->all_const = 0;
731 list_add_tail(&entry->list, &x->calls);
736 * Duplicate a call entry.
738 * @param entry the original entry to duplicate
739 * @param new_call the new call node
740 * @param loop_depth_delta
741 * delta value for the loop depth
743 static call_entry *duplicate_call_entry(const call_entry *entry,
744 ir_node *new_call, int loop_depth_delta)
746 call_entry *nentry = OALLOC(&temp_obst, call_entry);
747 nentry->call = new_call;
748 nentry->callee = entry->callee;
749 nentry->benefice = entry->benefice;
750 nentry->loop_depth = entry->loop_depth + loop_depth_delta;
751 nentry->local_adr = entry->local_adr;
752 nentry->all_const = entry->all_const;
758 * Calculate the parameter weights for transmitting the address of a local variable.
760 static unsigned calc_method_local_weight(ir_node *arg)
763 unsigned v, weight = 0;
765 for (unsigned i = get_irn_n_outs(arg); i-- > 0; ) {
766 ir_node *succ = get_irn_out(arg, i);
768 switch (get_irn_opcode(succ)) {
771 /* Loads and Store can be removed */
775 /* check if all args are constant */
776 for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
777 ir_node *idx = get_Sel_index(succ, j);
781 /* Check users on this Sel. Note: if a 0 is returned here, there was
782 some unsupported node. */
783 v = calc_method_local_weight(succ);
786 /* we can kill one Sel with constant indexes, this is cheap */
790 /* when looking backward we might find Id nodes */
791 weight += calc_method_local_weight(succ);
794 /* unoptimized tuple */
795 for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
796 ir_node *pred = get_Tuple_pred(succ, j);
798 /* look for Proj(j) */
799 for (unsigned k = get_irn_n_outs(succ); k-- > 0; ) {
800 ir_node *succ_succ = get_irn_out(succ, k);
801 if (is_Proj(succ_succ)) {
802 if (get_Proj_proj(succ_succ) == j) {
804 weight += calc_method_local_weight(succ_succ);
807 /* this should NOT happen */
815 /* any other node: unsupported yet or bad. */
823 * Calculate the parameter weights for transmitting the address of a local variable.
825 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg)
827 ir_entity *ent = get_irg_entity(irg);
831 ir_node *irg_args, *arg;
833 mtp = get_entity_type(ent);
834 nparams = get_method_n_params(mtp);
836 /* allocate a new array. currently used as 'analysed' flag */
837 env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
839 /* If the method haven't parameters we have nothing to do. */
843 assure_irg_outs(irg);
844 irg_args = get_irg_args(irg);
845 for (unsigned i = get_irn_n_outs(irg_args); i-- > 0; ) {
846 arg = get_irn_out(irg_args, i);
847 proj_nr = get_Proj_proj(arg);
848 env->local_weights[proj_nr] = calc_method_local_weight(arg);
853 * Calculate the benefice for transmitting an local variable address.
854 * After inlining, the local variable might be transformed into a
855 * SSA variable by scalar_replacement().
857 static unsigned get_method_local_adress_weight(ir_graph *callee, size_t pos)
859 inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
861 if (env->local_weights == NULL)
862 analyze_irg_local_weights(env, callee);
864 if (pos < ARR_LEN(env->local_weights))
865 return env->local_weights[pos];
870 * Calculate a benefice value for inlining the given call.
872 * @param call the call node we have to inspect
873 * @param callee the called graph
875 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
877 ir_node *call = entry->call;
878 ir_entity *ent = get_irg_entity(callee);
879 ir_type *callee_frame;
880 size_t i, n_members, n_params;
887 inline_irg_env *callee_env;
889 mtp_additional_properties props = get_entity_additional_properties(ent);
890 if (props & mtp_property_noinline) {
891 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
893 return entry->benefice = INT_MIN;
896 callee_frame = get_irg_frame_type(callee);
897 n_members = get_class_n_members(callee_frame);
898 for (i = 0; i < n_members; ++i) {
899 ir_entity *frame_ent = get_class_member(callee_frame, i);
900 if (is_parameter_entity(frame_ent)) {
901 // TODO inliner should handle parameter entities by inserting Store operations
902 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden due to parameter entity\n", call, callee));
903 add_entity_additional_properties(ent, mtp_property_noinline);
904 return entry->benefice = INT_MIN;
908 if (props & mtp_property_noreturn) {
909 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
911 return entry->benefice = INT_MIN;
914 /* costs for every passed parameter */
915 n_params = get_Call_n_params(call);
916 mtp = get_entity_type(ent);
917 cc = get_method_calling_convention(mtp);
918 if (cc & cc_reg_param) {
919 /* register parameter, smaller costs for register parameters */
920 size_t max_regs = cc & ~cc_bits;
922 if (max_regs < n_params)
923 weight += max_regs * 2 + (n_params - max_regs) * 5;
925 weight += n_params * 2;
927 /* parameters are passed an stack */
928 weight += 5 * n_params;
931 /* constant parameters improve the benefice */
932 frame_ptr = get_irg_frame(current_ir_graph);
934 for (i = 0; i < n_params; ++i) {
935 ir_node *param = get_Call_param(call, i);
937 if (is_Const(param)) {
938 weight += get_method_param_weight(ent, i);
941 if (is_SymConst(param))
942 weight += get_method_param_weight(ent, i);
943 else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
945 * An address of a local variable is transmitted. After
946 * inlining, scalar_replacement might be able to remove the
947 * local variable, so honor this.
949 v = get_method_local_adress_weight(callee, i);
952 entry->local_adr = 1;
956 entry->all_const = all_const;
958 callee_env = (inline_irg_env*)get_irg_link(callee);
959 if (callee_env->n_callers == 1 &&
960 callee != current_ir_graph &&
961 !entity_is_externally_visible(ent)) {
965 /* give a bonus for functions with one block */
966 if (callee_env->n_blocks == 1)
967 weight = weight * 3 / 2;
969 /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
970 if (callee_env->n_nodes < 30 && !callee_env->recursive)
973 /* and finally for leafs: they do not increase the register pressure
974 because of callee safe registers */
975 if (callee_env->n_call_nodes == 0)
978 /** it's important to inline inner loops first */
979 if (entry->loop_depth > 30)
982 weight += entry->loop_depth * 1024;
985 * All arguments constant is probably a good sign, give an extra bonus
990 return entry->benefice = weight;
993 typedef struct walk_env_t {
999 * Callgraph walker, collect all visited graphs.
1001 static void callgraph_walker(ir_graph *irg, void *data)
1003 walk_env_t *env = (walk_env_t *)data;
1004 env->irgs[env->last_irg++] = irg;
1008 * Creates an inline order for all graphs.
1010 * @return the list of graphs.
1012 static ir_graph **create_irg_list(void)
1014 ir_entity **free_methods;
1015 size_t n_irgs = get_irp_n_irgs();
1018 cgana(&free_methods);
1019 xfree(free_methods);
1021 compute_callgraph();
1023 env.irgs = XMALLOCNZ(ir_graph*, n_irgs);
1026 callgraph_walk(NULL, callgraph_walker, &env);
1027 assert(n_irgs == env.last_irg);
1035 * Push a call onto the priority list if its benefice is big enough.
1037 * @param pqueue the priority queue of calls
1038 * @param call the call entry
1039 * @param inlien_threshold
1040 * the threshold value
1042 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
1043 int inline_threshold)
1045 ir_graph *callee = call->callee;
1046 int benefice = calc_inline_benefice(call, callee);
1048 DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
1049 get_irn_irg(call->call), call->call, callee, benefice));
1051 ir_entity *ent = get_irg_entity(callee);
1052 mtp_additional_properties props = get_entity_additional_properties(ent);
1053 if (!(props & mtp_property_always_inline) && benefice < inline_threshold) {
1057 pqueue_put(pqueue, call, benefice);
1061 * Try to inline calls into a graph.
1063 * @param irg the graph into which we inline
1064 * @param maxsize do NOT inline if the size of irg gets
1065 * bigger than this amount
1066 * @param inline_threshold
1067 * threshold value for inline decision
1068 * @param copied_graphs
1069 * map containing copied of recursive graphs
1071 static void inline_into(ir_graph *irg, unsigned maxsize,
1072 int inline_threshold, pmap *copied_graphs)
1074 int phiproj_computed = 0;
1075 inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
1079 if (env->n_call_nodes == 0)
1082 if (env->n_nodes > maxsize) {
1083 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
1087 current_ir_graph = irg;
1088 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1090 /* put irgs into the pqueue */
1091 pqueue = new_pqueue();
1093 list_for_each_entry(call_entry, curr_call, &env->calls, list) {
1094 assert(is_Call(curr_call->call));
1095 maybe_push_call(pqueue, curr_call, inline_threshold);
1098 /* note that the list of possible calls is updated during the process */
1099 while (!pqueue_empty(pqueue)) {
1101 call_entry *curr_call = (call_entry*)pqueue_pop_front(pqueue);
1102 ir_graph *callee = curr_call->callee;
1103 ir_node *call_node = curr_call->call;
1104 inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
1105 ir_entity *ent = get_irg_entity(callee);
1106 mtp_additional_properties props
1107 = get_entity_additional_properties(ent);
1111 if (!(props & mtp_property_always_inline)
1112 && env->n_nodes + callee_env->n_nodes > maxsize) {
1113 DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
1114 env->n_nodes, callee, callee_env->n_nodes));
1118 calleee = pmap_get(ir_graph, copied_graphs, callee);
1119 if (calleee != NULL) {
1120 int benefice = curr_call->benefice;
1122 * Reduce the weight for recursive function IFF not all arguments are const.
1123 * inlining recursive functions is rarely good.
1125 if (!curr_call->all_const)
1127 if (benefice < inline_threshold)
1131 * Remap callee if we have a copy.
1134 callee_env = (inline_irg_env*)get_irg_link(callee);
1137 if (current_ir_graph == callee) {
1139 * Recursive call: we cannot directly inline because we cannot
1140 * walk the graph and change it. So we have to make a copy of
1143 int benefice = curr_call->benefice;
1147 * Reduce the weight for recursive function IFF not all arguments are const.
1148 * inlining recursive functions is rarely good.
1150 if (!curr_call->all_const)
1152 if (benefice < inline_threshold)
1155 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1158 * No copy yet, create one.
1159 * Note that recursive methods are never leafs, so it is
1160 * sufficient to test this condition here.
1162 copy = create_irg_copy(callee);
1164 /* create_irg_copy() destroys the Proj links, recompute them */
1165 phiproj_computed = 0;
1167 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1169 /* allocate a new environment */
1170 callee_env = alloc_inline_irg_env();
1171 set_irg_link(copy, callee_env);
1173 assure_irg_properties(copy, IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO);
1174 memset(&wenv, 0, sizeof(wenv));
1175 wenv.x = callee_env;
1176 wenv.ignore_callers = 1;
1177 irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1180 * Enter the entity of the original graph. This is needed
1181 * for inline_method(). However, note that ent->irg still points
1182 * to callee, NOT to copy.
1184 set_irg_entity(copy, get_irg_entity(callee));
1186 pmap_insert(copied_graphs, callee, copy);
1189 /* we have only one caller: the original graph */
1190 callee_env->n_callers = 1;
1191 callee_env->n_callers_orig = 1;
1193 if (! phiproj_computed) {
1194 phiproj_computed = 1;
1195 collect_phiprojs(current_ir_graph);
1197 did_inline = inline_method(call_node, callee);
1201 /* call was inlined, Phi/Projs for current graph must be recomputed */
1202 phiproj_computed = 0;
1204 /* remove it from the caller list */
1205 list_del(&curr_call->list);
1207 /* callee was inline. Append its call list. */
1208 env->got_inline = 1;
1209 --env->n_call_nodes;
1211 /* we just generate a bunch of new calls */
1212 loop_depth = curr_call->loop_depth;
1213 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1214 inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1216 call_entry *new_entry;
1218 /* after we have inlined callee, all called methods inside
1219 * callee are now called once more */
1222 /* Note that the src list points to Call nodes in the inlined graph,
1223 * but we need Call nodes in our graph. Luckily the inliner leaves
1224 * this information in the link field. */
1225 new_call = (ir_node*)get_irn_link(centry->call);
1226 if (get_irn_irg(new_call) != irg) {
1227 /* centry->call has not been copied, which means it is dead.
1228 * This might happen during inlining, if a const function,
1229 * which cannot be inlined is only used as an unused argument
1230 * of another function, which is inlined. */
1233 assert(is_Call(new_call));
1235 new_entry = duplicate_call_entry(centry, new_call, loop_depth);
1236 list_add_tail(&new_entry->list, &env->calls);
1237 maybe_push_call(pqueue, new_entry, inline_threshold);
1240 env->n_call_nodes += callee_env->n_call_nodes;
1241 env->n_nodes += callee_env->n_nodes;
1242 --callee_env->n_callers;
1244 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1249 * Heuristic inliner. Calculates a benefice value for every call and inlines
1250 * those calls with a value higher than the threshold.
1252 void inline_functions(unsigned maxsize, int inline_threshold,
1253 opt_ptr after_inline_opt)
1255 inline_irg_env *env;
1259 pmap *copied_graphs;
1260 pmap_entry *pm_entry;
1263 rem = current_ir_graph;
1264 obstack_init(&temp_obst);
1266 irgs = create_irg_list();
1268 /* a map for the copied graphs, used to inline recursive calls */
1269 copied_graphs = pmap_create();
1271 /* extend all irgs by a temporary data structure for inlining. */
1272 n_irgs = get_irp_n_irgs();
1273 for (i = 0; i < n_irgs; ++i)
1274 set_irg_link(irgs[i], alloc_inline_irg_env());
1276 /* Pre-compute information in temporary data structure. */
1277 wenv.ignore_runtime = 0;
1278 wenv.ignore_callers = 0;
1279 for (i = 0; i < n_irgs; ++i) {
1280 ir_graph *irg = irgs[i];
1282 free_callee_info(irg);
1284 wenv.x = (inline_irg_env*)get_irg_link(irg);
1285 assure_loopinfo(irg);
1286 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1289 /* -- and now inline. -- */
1290 for (i = 0; i < n_irgs; ++i) {
1291 ir_graph *irg = irgs[i];
1293 inline_into(irg, maxsize, inline_threshold, copied_graphs);
1296 for (i = 0; i < n_irgs; ++i) {
1297 ir_graph *irg = irgs[i];
1299 env = (inline_irg_env*)get_irg_link(irg);
1300 if (env->got_inline && after_inline_opt != NULL) {
1301 /* this irg got calls inlined: optimize it */
1302 after_inline_opt(irg);
1304 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1305 DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1306 env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1307 env->n_callers_orig, env->n_callers,
1308 get_entity_name(get_irg_entity(irg))));
1312 /* kill the copied graphs: we don't need them anymore */
1313 foreach_pmap(copied_graphs, pm_entry) {
1314 ir_graph *copy = (ir_graph*)pm_entry->value;
1316 /* reset the entity, otherwise it will be deleted in the next step ... */
1317 set_irg_entity(copy, NULL);
1318 free_ir_graph(copy);
1320 pmap_destroy(copied_graphs);
1324 obstack_free(&temp_obst, NULL);
1325 current_ir_graph = rem;
1328 typedef struct inline_functions_pass_t {
1329 ir_prog_pass_t pass;
1331 int inline_threshold;
1332 opt_ptr after_inline_opt;
1333 } inline_functions_pass_t;
1336 * Wrapper to run inline_functions() as a ir_prog pass.
1338 static int inline_functions_wrapper(ir_prog *irp, void *context)
1340 inline_functions_pass_t *pass = (inline_functions_pass_t*)context;
1343 inline_functions(pass->maxsize, pass->inline_threshold,
1344 pass->after_inline_opt);
1348 /* create a ir_prog pass for inline_functions */
1349 ir_prog_pass_t *inline_functions_pass(
1350 const char *name, unsigned maxsize, int inline_threshold,
1351 opt_ptr after_inline_opt)
1353 inline_functions_pass_t *pass = XMALLOCZ(inline_functions_pass_t);
1355 pass->maxsize = maxsize;
1356 pass->inline_threshold = inline_threshold;
1357 pass->after_inline_opt = after_inline_opt;
1359 return def_prog_pass_constructor(
1360 &pass->pass, name ? name : "inline_functions",
1361 inline_functions_wrapper);
1364 void firm_init_inline(void)
1366 FIRM_DBG_REGISTER(dbg, "firm.opt.inline");