2 * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Dead node elimination and Procedure Inlining.
23 * @author Michael Beck, Goetz Lindenmaier
32 #include "irgraph_t.h"
35 #include "iroptimize.h"
52 #include "irbackedge_t.h"
58 #include "analyze_irg_args.h"
59 #include "iredges_t.h"
63 #include "iropt_dbg.h"
65 #include "irnodemap.h"
67 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
69 /*------------------------------------------------------------------*/
70 /* Routines for dead node elimination / copying garbage collection */
72 /*------------------------------------------------------------------*/
75 * Remember the new node in the old node by using a field all nodes have.
77 static void set_new_node(ir_node *node, ir_node *new_node)
79 set_irn_link(node, new_node);
83 * Get this new node, before the old node is forgotten.
85 static inline ir_node *get_new_node(ir_node *old_node)
87 assert(irn_visited(old_node));
88 return (ir_node*) get_irn_link(old_node);
91 /*--------------------------------------------------------------------*/
92 /* Functionality for inlining */
93 /*--------------------------------------------------------------------*/
96 * Copy node for inlineing. Updates attributes that change when
97 * inlineing but not for dead node elimination.
99 * Copies the node by calling copy_node() and then updates the entity if
100 * it's a local one. env must be a pointer of the frame type of the
101 * inlined procedure. The new entities must be in the link field of
104 static void copy_node_inline(ir_node *node, void *env)
106 ir_graph *new_irg = (ir_graph*) env;
107 ir_node *new_node = irn_copy_into_irg(node, new_irg);
109 set_new_node(node, new_node);
111 ir_graph *old_irg = get_irn_irg(node);
112 ir_type *old_frame_type = get_irg_frame_type(old_irg);
113 ir_entity *old_entity = get_Sel_entity(node);
114 assert(is_Sel(new_node));
115 /* use copied entities from the new frame */
116 if (get_entity_owner(old_entity) == old_frame_type) {
117 ir_entity *new_entity = (ir_entity*)get_entity_link(old_entity);
118 assert(new_entity != NULL);
119 set_Sel_entity(new_node, new_entity);
121 } else if (is_Block(new_node)) {
122 new_node->attr.block.irg.irg = new_irg;
126 static void set_preds_inline(ir_node *node, void *env)
130 irn_rewire_inputs(node);
132 /* move constants into start block */
133 new_node = get_new_node(node);
134 if (is_irn_constlike(new_node)) {
135 ir_graph *new_irg = (ir_graph *) env;
136 ir_node *start_block = get_irg_start_block(new_irg);
137 set_nodes_block(new_node, start_block);
142 * Walker: checks if P_value_arg_base is used.
144 static void find_addr(ir_node *node, void *env)
146 bool *allow_inline = (bool*)env;
148 if (is_Block(node) && get_Block_entity(node)) {
150 * Currently we can't handle blocks whose address was taken correctly
153 *allow_inline = false;
154 } else if (is_Sel(node)) {
155 ir_graph *irg = current_ir_graph;
156 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
157 /* access to frame */
158 ir_entity *ent = get_Sel_entity(node);
159 if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
160 /* access to value_type */
161 *allow_inline = false;
163 if (is_parameter_entity(ent)) {
164 *allow_inline = false;
167 } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
169 * Refuse to inline alloca call unless user explicitly forced so as this
170 * may change program's memory overhead drastically when the function
171 * using alloca is called in loop. In GCC present in SPEC2000 inlining
172 * into schedule_block cause it to require 2GB of ram instead of 256MB.
174 * Sorrily this is true with our implementation also.
175 * Moreover, we cannot differentiate between alloca() and VLA yet, so
176 * this disables inlining of functions using VLA (which are completely
180 * - add a flag to the Alloc node for "real" alloca() calls
181 * - add a new Stack-Restore node at the end of a function using
184 *allow_inline = false;
189 * Check if we can inline a given call.
190 * Currently, we cannot inline two cases:
191 * - call with compound arguments
192 * - graphs that take the address of a parameter
194 * check these conditions here
196 static bool can_inline(ir_node *call, ir_graph *called_graph)
198 ir_entity *called = get_irg_entity(called_graph);
199 ir_type *called_type = get_entity_type(called);
200 ir_type *call_type = get_Call_type(call);
201 size_t n_params = get_method_n_params(called_type);
202 size_t n_arguments = get_method_n_params(call_type);
203 size_t n_res = get_method_n_ress(called_type);
204 irg_inline_property prop = get_irg_inline_property(called_graph);
208 if (prop == irg_inline_forbidden)
211 if (n_arguments != n_params) {
212 /* this is a bad feature of C: without a prototype, we can
213 * call a function with less parameters than needed. Currently
214 * we don't support this, although we could use Unknown than. */
217 if (n_res != get_method_n_ress(call_type)) {
221 /* Argh, compiling C has some bad consequences:
222 * It is implementation dependent what happens in that case.
223 * We support inlining, if the bitsize of the types matches AND
224 * the same arithmetic is used. */
225 for (i = 0; i < n_params; ++i) {
226 ir_type *param_tp = get_method_param_type(called_type, i);
227 ir_type *arg_tp = get_method_param_type(call_type, i);
229 if (param_tp != arg_tp) {
230 ir_mode *pmode = get_type_mode(param_tp);
231 ir_mode *amode = get_type_mode(arg_tp);
233 if (pmode == NULL || amode == NULL)
235 if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
237 if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
239 /* otherwise we can simply "reinterpret" the bits */
242 for (i = 0; i < n_res; ++i) {
243 ir_type *decl_res_tp = get_method_res_type(called_type, i);
244 ir_type *used_res_tp = get_method_res_type(call_type, i);
246 if (decl_res_tp != used_res_tp) {
247 ir_mode *decl_mode = get_type_mode(decl_res_tp);
248 ir_mode *used_mode = get_type_mode(used_res_tp);
249 if (decl_mode == NULL || used_mode == NULL)
251 if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
253 if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
255 /* otherwise we can "reinterpret" the bits */
259 /* check parameters for compound arguments */
260 for (i = 0; i < n_params; ++i) {
261 ir_type *p_type = get_method_param_type(call_type, i);
263 if (is_compound_type(p_type))
267 /* check results for compound arguments */
268 for (i = 0; i < n_res; ++i) {
269 ir_type *r_type = get_method_res_type(call_type, i);
271 if (is_compound_type(r_type))
276 irg_walk_graph(called_graph, find_addr, NULL, &res);
282 exc_handler, /**< There is a handler. */
283 exc_no_handler /**< Exception handling not represented. */
287 * copy all entities on the stack frame on 1 irg to the stackframe of another.
288 * Sets entity links of the old entities to the copies
290 static void copy_frame_entities(ir_graph *from, ir_graph *to)
292 ir_type *from_frame = get_irg_frame_type(from);
293 ir_type *to_frame = get_irg_frame_type(to);
294 size_t n_members = get_class_n_members(from_frame);
296 assert(from_frame != to_frame);
298 for (i = 0; i < n_members; ++i) {
299 ir_entity *old_ent = get_class_member(from_frame, i);
300 ir_entity *new_ent = copy_entity_own(old_ent, to_frame);
301 set_entity_link(old_ent, new_ent);
302 assert (!is_parameter_entity(old_ent));
306 /* Inlines a method at the given call site. */
307 int inline_method(ir_node *call, ir_graph *called_graph)
310 ir_node *post_call, *post_bl;
311 ir_node *in[pn_Start_max+1];
312 ir_node *end, *end_bl, *block;
317 int arity, n_ret, n_exc, n_res, i, j, rem_opt;
318 int irn_arity, n_params;
320 enum exc_mode exc_handling;
325 ir_graph *irg = get_irn_irg(call);
327 /* we cannot inline some types of calls */
328 if (! can_inline(call, called_graph))
331 /* We cannot inline a recursive call. The graph must be copied before
332 * the call the inline_method() using create_irg_copy(). */
333 if (called_graph == irg)
336 ent = get_irg_entity(called_graph);
337 mtp = get_entity_type(ent);
338 ctp = get_Call_type(call);
339 n_params = get_method_n_params(mtp);
340 n_res = get_method_n_ress(mtp);
342 rem = current_ir_graph;
343 current_ir_graph = irg;
345 DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
347 /* optimizations can cause problems when allocating new nodes */
348 rem_opt = get_opt_optimize();
351 /* Handle graph state */
352 assert(get_irg_phase_state(irg) != phase_building);
353 assert(get_irg_pinned(irg) == op_pin_state_pinned);
354 assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
355 clear_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE
356 | IR_GRAPH_PROPERTY_CONSISTENT_ENTITY_USAGE);
357 set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
358 clear_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_ENTITY_USAGE);
359 edges_deactivate(irg);
361 /* here we know we WILL inline, so inform the statistics */
362 hook_inline(call, called_graph);
364 /* -- Decide how to handle exception control flow: Is there a handler
365 for the Call node, or do we branch directly to End on an exception?
367 0 There is a handler.
368 2 Exception handling not represented in Firm. -- */
370 ir_node *Xproj = NULL;
372 for (proj = (ir_node*)get_irn_link(call); proj != NULL;
373 proj = (ir_node*)get_irn_link(proj)) {
374 long proj_nr = get_Proj_proj(proj);
375 if (proj_nr == pn_Call_X_except) Xproj = proj;
377 exc_handling = Xproj != NULL ? exc_handler : exc_no_handler;
380 /* create the argument tuple */
381 args_in = ALLOCAN(ir_node*, n_params);
383 block = get_nodes_block(call);
384 for (i = n_params - 1; i >= 0; --i) {
385 ir_node *arg = get_Call_param(call, i);
386 ir_type *param_tp = get_method_param_type(mtp, i);
387 ir_mode *mode = get_type_mode(param_tp);
389 if (mode != get_irn_mode(arg)) {
390 arg = new_r_Conv(block, arg, mode);
395 /* the procedure and later replaces the Start node of the called graph.
396 * Post_call is the old Call node and collects the results of the called
397 * graph. Both will end up being a tuple. */
398 post_bl = get_nodes_block(call);
399 /* XxMxPxPxPxT of Start + parameter of Call */
400 in[pn_Start_M] = get_Call_mem(call);
401 in[pn_Start_X_initial_exec] = new_r_Jmp(post_bl);
402 in[pn_Start_P_frame_base] = get_irg_frame(irg);
403 in[pn_Start_T_args] = new_r_Tuple(post_bl, n_params, args_in);
404 pre_call = new_r_Tuple(post_bl, pn_Start_max+1, in);
408 The new block gets the ins of the old block, pre_call and all its
409 predecessors and all Phi nodes. -- */
410 part_block(pre_call);
412 /* increment visited flag for later walk */
413 inc_irg_visited(called_graph);
415 /* link some nodes to nodes in the current graph so instead of copying
416 * the linked nodes will get used.
417 * So the copier will use the created Tuple instead of copying the start
418 * node, similar for singleton nodes like NoMem and Bad.
419 * Note: this will prohibit predecessors to be copied - only do it for
420 * nodes without predecessors */
422 ir_node *start_block;
426 start_block = get_irg_start_block(called_graph);
427 set_new_node(start_block, get_nodes_block(pre_call));
428 mark_irn_visited(start_block);
430 start = get_irg_start(called_graph);
431 set_new_node(start, pre_call);
432 mark_irn_visited(start);
434 nomem = get_irg_no_mem(called_graph);
435 set_new_node(nomem, get_irg_no_mem(irg));
436 mark_irn_visited(nomem);
439 /* entitiy link is used to link entities on old stackframe to the
441 irp_reserve_resources(irp, IRP_RESOURCE_ENTITY_LINK);
443 /* copy entities and nodes */
444 assert(!irn_visited(get_irg_end(called_graph)));
445 copy_frame_entities(called_graph, irg);
446 irg_walk_core(get_irg_end(called_graph), copy_node_inline, set_preds_inline,
449 irp_free_resources(irp, IRP_RESOURCE_ENTITY_LINK);
451 /* -- Merge the end of the inlined procedure with the call site -- */
452 /* We will turn the old Call node into a Tuple with the following
455 0: Phi of all Memories of Return statements.
456 1: Jmp from new Block that merges the control flow from all exception
457 predecessors of the old end block.
458 2: Tuple of all arguments.
459 3: Phi of Exception memories.
460 In case the old Call directly branches to End on an exception we don't
461 need the block merging all exceptions nor the Phi of the exception
465 /* Precompute some values */
466 end_bl = get_new_node(get_irg_end_block(called_graph));
467 end = get_new_node(get_irg_end(called_graph));
468 arity = get_Block_n_cfgpreds(end_bl); /* arity = n_exc + n_ret */
469 n_res = get_method_n_ress(get_Call_type(call));
471 res_pred = XMALLOCN(ir_node*, n_res);
472 cf_pred = XMALLOCN(ir_node*, arity);
474 /* archive keepalives */
475 irn_arity = get_irn_arity(end);
476 for (i = 0; i < irn_arity; i++) {
477 ir_node *ka = get_End_keepalive(end, i);
479 add_End_keepalive(get_irg_end(irg), ka);
482 /* replace Return nodes by Jump nodes */
484 for (i = 0; i < arity; i++) {
486 ret = get_Block_cfgpred(end_bl, i);
487 if (is_Return(ret)) {
488 ir_node *block = get_nodes_block(ret);
489 cf_pred[n_ret] = new_r_Jmp(block);
493 set_irn_in(post_bl, n_ret, cf_pred);
495 /* build a Tuple for all results of the method.
496 * add Phi node if there was more than one Return. */
497 turn_into_tuple(post_call, pn_Call_max+1);
498 /* First the Memory-Phi */
500 for (i = 0; i < arity; i++) {
501 ret = get_Block_cfgpred(end_bl, i);
502 if (is_Return(ret)) {
503 cf_pred[n_mem_phi++] = get_Return_mem(ret);
505 /* memory output for some exceptions is directly connected to End */
507 cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 3);
508 } else if (is_fragile_op(ret)) {
509 /* We rely that all cfops have the memory output at the same position. */
510 cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 0);
511 } else if (is_Raise(ret)) {
512 cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 1);
515 phi = new_r_Phi(post_bl, n_mem_phi, cf_pred, mode_M);
516 set_Tuple_pred(call, pn_Call_M, phi);
517 /* Conserve Phi-list for further inlinings -- but might be optimized */
518 if (get_nodes_block(phi) == post_bl) {
519 set_irn_link(phi, get_irn_link(post_bl));
520 set_irn_link(post_bl, phi);
522 /* Now the real results */
524 ir_node *result_tuple;
525 for (j = 0; j < n_res; j++) {
526 ir_type *res_type = get_method_res_type(ctp, j);
527 ir_mode *res_mode = get_type_mode(res_type);
529 for (i = 0; i < arity; i++) {
530 ret = get_Block_cfgpred(end_bl, i);
531 if (is_Return(ret)) {
532 ir_node *res = get_Return_res(ret, j);
533 if (get_irn_mode(res) != res_mode) {
534 ir_node *block = get_nodes_block(res);
535 res = new_r_Conv(block, res, res_mode);
537 cf_pred[n_ret] = res;
542 phi = new_r_Phi(post_bl, n_ret, cf_pred, res_mode);
544 phi = new_r_Bad(irg, res_mode);
547 /* Conserve Phi-list for further inlinings -- but might be optimized */
548 if (get_nodes_block(phi) == post_bl) {
549 set_Phi_next(phi, get_Block_phis(post_bl));
550 set_Block_phis(post_bl, phi);
553 result_tuple = new_r_Tuple(post_bl, n_res, res_pred);
554 set_Tuple_pred(call, pn_Call_T_result, result_tuple);
556 set_Tuple_pred(call, pn_Call_T_result, new_r_Bad(irg, mode_T));
558 /* handle the regular call */
559 set_Tuple_pred(call, pn_Call_X_regular, new_r_Jmp(post_bl));
561 /* Finally the exception control flow.
562 We have two possible situations:
563 First if the Call branches to an exception handler:
564 We need to add a Phi node to
565 collect the memory containing the exception objects. Further we need
566 to add another block to get a correct representation of this Phi. To
567 this block we add a Jmp that resolves into the X output of the Call
568 when the Call is turned into a tuple.
569 Second: There is no exception edge. Just add all inlined exception
570 branches to the End node.
572 if (exc_handling == exc_handler) {
574 for (i = 0; i < arity; i++) {
576 ret = get_Block_cfgpred(end_bl, i);
577 irn = skip_Proj(ret);
578 if (is_fragile_op(irn) || is_Raise(irn)) {
579 cf_pred[n_exc] = ret;
586 set_Tuple_pred(call, pn_Call_X_except, cf_pred[0]);
588 ir_node *block = new_r_Block(irg, n_exc, cf_pred);
589 set_Tuple_pred(call, pn_Call_X_except, new_r_Jmp(block));
592 set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
595 ir_node *main_end_bl;
596 int main_end_bl_arity;
599 /* assert(exc_handling == 1 || no exceptions. ) */
601 for (i = 0; i < arity; i++) {
602 ir_node *ret = get_Block_cfgpred(end_bl, i);
603 ir_node *irn = skip_Proj(ret);
605 if (is_fragile_op(irn) || is_Raise(irn)) {
606 cf_pred[n_exc] = ret;
610 main_end_bl = get_irg_end_block(irg);
611 main_end_bl_arity = get_irn_arity(main_end_bl);
612 end_preds = XMALLOCN(ir_node*, n_exc + main_end_bl_arity);
614 for (i = 0; i < main_end_bl_arity; ++i)
615 end_preds[i] = get_irn_n(main_end_bl, i);
616 for (i = 0; i < n_exc; ++i)
617 end_preds[main_end_bl_arity + i] = cf_pred[i];
618 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
619 set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
625 /* -- Turn CSE back on. -- */
626 set_optimize(rem_opt);
627 current_ir_graph = rem;
632 /********************************************************************/
633 /* Apply inlining to small methods. */
634 /********************************************************************/
636 static struct obstack temp_obst;
638 /** Represents a possible inlinable call in a graph. */
639 typedef struct call_entry {
640 ir_node *call; /**< The Call node. */
641 ir_graph *callee; /**< The callee IR-graph. */
642 list_head list; /**< List head for linking the next one. */
643 int loop_depth; /**< The loop depth of this call. */
644 int benefice; /**< The calculated benefice of this call. */
645 unsigned local_adr:1; /**< Set if this call gets an address of a local variable. */
646 unsigned all_const:1; /**< Set if this call has only constant parameters. */
650 * environment for inlining small irgs
652 typedef struct inline_env_t {
653 struct obstack obst; /**< An obstack where call_entries are allocated on. */
654 list_head calls; /**< The call entry list. */
658 * Returns the irg called from a Call node. If the irg is not
659 * known, NULL is returned.
661 * @param call the call node
663 static ir_graph *get_call_called_irg(ir_node *call)
667 addr = get_Call_ptr(call);
668 if (is_SymConst_addr_ent(addr)) {
669 ir_entity *ent = get_SymConst_entity(addr);
670 /* we don't know which function gets finally bound to a weak symbol */
671 if (get_entity_linkage(ent) & IR_LINKAGE_WEAK)
674 return get_entity_irg(ent);
681 * Walker: Collect all calls to known graphs inside a graph.
683 static void collect_calls(ir_node *call, void *env)
687 ir_graph *called_irg = get_call_called_irg(call);
689 if (called_irg != NULL) {
690 /* The Call node calls a locally defined method. Remember to inline. */
691 inline_env_t *ienv = (inline_env_t*)env;
692 call_entry *entry = OALLOC(&ienv->obst, call_entry);
694 entry->callee = called_irg;
695 entry->loop_depth = 0;
697 entry->local_adr = 0;
698 entry->all_const = 0;
700 list_add_tail(&entry->list, &ienv->calls);
706 * Inlines all small methods at call sites where the called address comes
707 * from a Const node that references the entity representing the called
709 * The size argument is a rough measure for the code size of the method:
710 * Methods where the obstack containing the firm graph is smaller than
713 void inline_small_irgs(ir_graph *irg, int size)
715 ir_graph *rem = current_ir_graph;
719 current_ir_graph = irg;
720 /* Handle graph state */
721 assert(get_irg_phase_state(irg) != phase_building);
722 free_callee_info(irg);
724 /* Find Call nodes to inline.
725 (We can not inline during a walk of the graph, as inlining the same
726 method several times changes the visited flag of the walked graph:
727 after the first inlining visited of the callee equals visited of
728 the caller. With the next inlining both are increased.) */
729 obstack_init(&env.obst);
730 INIT_LIST_HEAD(&env.calls);
731 irg_walk_graph(irg, NULL, collect_calls, &env);
733 if (! list_empty(&env.calls)) {
734 /* There are calls to inline */
735 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
736 collect_phiprojs(irg);
738 list_for_each_entry(call_entry, entry, &env.calls, list) {
739 ir_graph *callee = entry->callee;
740 irg_inline_property prop = get_irg_inline_property(callee);
742 if (prop == irg_inline_forbidden) {
746 if (prop >= irg_inline_forced ||
747 _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
748 inline_method(entry->call, callee);
751 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
753 obstack_free(&env.obst, NULL);
754 current_ir_graph = rem;
757 typedef struct inline_small_irgs_pass_t {
758 ir_graph_pass_t pass;
760 } inline_small_irgs_pass_t;
763 * Wrapper to run inline_small_irgs() as a pass.
765 static int inline_small_irgs_wrapper(ir_graph *irg, void *context)
767 inline_small_irgs_pass_t *pass = (inline_small_irgs_pass_t*)context;
769 inline_small_irgs(irg, pass->size);
773 /* create a pass for inline_small_irgs() */
774 ir_graph_pass_t *inline_small_irgs_pass(const char *name, int size)
776 inline_small_irgs_pass_t *pass = XMALLOCZ(inline_small_irgs_pass_t);
779 return def_graph_pass_constructor(
780 &pass->pass, name ? name : "inline_small_irgs", inline_small_irgs_wrapper);
784 * Environment for inlining irgs.
787 list_head calls; /**< List of of all call nodes in this graph. */
788 unsigned *local_weights; /**< Once allocated, the beneficial weight for transmitting local addresses. */
789 unsigned n_nodes; /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
790 unsigned n_blocks; /**< Number of Blocks in graph without Start and End block. */
791 unsigned n_nodes_orig; /**< for statistics */
792 unsigned n_call_nodes; /**< Number of Call nodes in the graph. */
793 unsigned n_call_nodes_orig; /**< for statistics */
794 unsigned n_callers; /**< Number of known graphs that call this graphs. */
795 unsigned n_callers_orig; /**< for statistics */
796 unsigned got_inline:1; /**< Set, if at least one call inside this graph was inlined. */
797 unsigned recursive:1; /**< Set, if this function is self recursive. */
801 * Allocate a new environment for inlining.
803 static inline_irg_env *alloc_inline_irg_env(void)
805 inline_irg_env *env = OALLOC(&temp_obst, inline_irg_env);
806 INIT_LIST_HEAD(&env->calls);
807 env->local_weights = NULL;
808 env->n_nodes = -2; /* do not count count Start, End */
809 env->n_blocks = -2; /* do not count count Start, End Block */
810 env->n_nodes_orig = -2; /* do not count Start, End */
811 env->n_call_nodes = 0;
812 env->n_call_nodes_orig = 0;
814 env->n_callers_orig = 0;
820 typedef struct walker_env {
821 inline_irg_env *x; /**< the inline environment */
822 char ignore_runtime; /**< the ignore runtime flag */
823 char ignore_callers; /**< if set, do change callers data */
827 * post-walker: collect all calls in the inline-environment
828 * of a graph and sum some statistics.
830 static void collect_calls2(ir_node *call, void *ctx)
832 wenv_t *env = (wenv_t*)ctx;
833 inline_irg_env *x = env->x;
834 unsigned code = get_irn_opcode(call);
838 /* count meaningful nodes in irg */
839 if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
840 if (code != iro_Block) {
848 if (code != iro_Call) return;
850 /* check, if it's a runtime call */
851 if (env->ignore_runtime) {
852 ir_node *symc = get_Call_ptr(call);
854 if (is_SymConst_addr_ent(symc)) {
855 ir_entity *ent = get_SymConst_entity(symc);
857 if (get_entity_additional_properties(ent) & mtp_property_runtime)
862 /* collect all call nodes */
864 ++x->n_call_nodes_orig;
866 callee = get_call_called_irg(call);
867 if (callee != NULL) {
868 if (! env->ignore_callers) {
869 inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
870 /* count all static callers */
871 ++callee_env->n_callers;
872 ++callee_env->n_callers_orig;
874 if (callee == current_ir_graph)
877 /* link it in the list of possible inlinable entries */
878 entry = OALLOC(&temp_obst, call_entry);
880 entry->callee = callee;
881 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
883 entry->local_adr = 0;
884 entry->all_const = 0;
886 list_add_tail(&entry->list, &x->calls);
891 * Returns TRUE if the number of callers is 0 in the irg's environment,
892 * hence this irg is a leaf.
894 inline static int is_leaf(ir_graph *irg)
896 inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
897 return env->n_call_nodes == 0;
901 * Returns TRUE if the number of nodes in the callee is
902 * smaller then size in the irg's environment.
904 inline static int is_smaller(ir_graph *callee, unsigned size)
906 inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
907 return env->n_nodes < size;
911 * Duplicate a call entry.
913 * @param entry the original entry to duplicate
914 * @param new_call the new call node
915 * @param loop_depth_delta
916 * delta value for the loop depth
918 static call_entry *duplicate_call_entry(const call_entry *entry,
919 ir_node *new_call, int loop_depth_delta)
921 call_entry *nentry = OALLOC(&temp_obst, call_entry);
922 nentry->call = new_call;
923 nentry->callee = entry->callee;
924 nentry->benefice = entry->benefice;
925 nentry->loop_depth = entry->loop_depth + loop_depth_delta;
926 nentry->local_adr = entry->local_adr;
927 nentry->all_const = entry->all_const;
933 * Append all call nodes of the source environment to the nodes of in the destination
936 * @param dst destination environment
937 * @param src source environment
938 * @param loop_depth the loop depth of the call that is replaced by the src list
940 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth)
942 call_entry *entry, *nentry;
944 /* Note that the src list points to Call nodes in the inlined graph, but
945 we need Call nodes in our graph. Luckily the inliner leaves this information
946 in the link field. */
947 list_for_each_entry(call_entry, entry, &src->calls, list) {
948 nentry = duplicate_call_entry(entry, (ir_node*)get_irn_link(entry->call), loop_depth);
949 list_add_tail(&nentry->list, &dst->calls);
951 dst->n_call_nodes += src->n_call_nodes;
952 dst->n_nodes += src->n_nodes;
956 * Inlines small leaf methods at call sites where the called address comes
957 * from a Const node that references the entity representing the called
959 * The size argument is a rough measure for the code size of the method:
960 * Methods where the obstack containing the firm graph is smaller than
963 void inline_leaf_functions(unsigned maxsize, unsigned leafsize,
964 unsigned size, int ignore_runtime)
972 call_entry *entry, *next;
973 const call_entry *centry;
975 pmap_entry *pm_entry;
977 rem = current_ir_graph;
978 obstack_init(&temp_obst);
980 /* a map for the copied graphs, used to inline recursive calls */
981 copied_graphs = pmap_create();
983 /* extend all irgs by a temporary data structure for inlining. */
984 n_irgs = get_irp_n_irgs();
985 for (i = 0; i < n_irgs; ++i)
986 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
988 /* Pre-compute information in temporary data structure. */
989 wenv.ignore_runtime = ignore_runtime;
990 wenv.ignore_callers = 0;
991 for (i = 0; i < n_irgs; ++i) {
992 ir_graph *irg = get_irp_irg(i);
994 assert(get_irg_phase_state(irg) != phase_building);
995 free_callee_info(irg);
997 assure_irg_properties(irg,
998 IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO);
999 wenv.x = (inline_irg_env*)get_irg_link(irg);
1000 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1001 confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_ALL);
1004 /* -- and now inline. -- */
1006 /* Inline leafs recursively -- we might construct new leafs. */
1010 for (i = 0; i < n_irgs; ++i) {
1012 int phiproj_computed = 0;
1014 current_ir_graph = get_irp_irg(i);
1015 env = (inline_irg_env*)get_irg_link(current_ir_graph);
1017 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1018 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1020 irg_inline_property prop;
1022 if (env->n_nodes > maxsize)
1026 callee = entry->callee;
1028 prop = get_irg_inline_property(callee);
1029 if (prop == irg_inline_forbidden) {
1033 if (is_leaf(callee) && (
1034 is_smaller(callee, leafsize) || prop >= irg_inline_forced)) {
1035 if (!phiproj_computed) {
1036 phiproj_computed = 1;
1037 collect_phiprojs(current_ir_graph);
1039 did_inline = inline_method(call, callee);
1042 inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
1044 /* call was inlined, Phi/Projs for current graph must be recomputed */
1045 phiproj_computed = 0;
1047 /* Do some statistics */
1048 env->got_inline = 1;
1049 --env->n_call_nodes;
1050 env->n_nodes += callee_env->n_nodes;
1051 --callee_env->n_callers;
1053 /* remove this call from the list */
1054 list_del(&entry->list);
1059 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1061 } while (did_inline);
1063 /* inline other small functions. */
1064 for (i = 0; i < n_irgs; ++i) {
1066 int phiproj_computed = 0;
1068 current_ir_graph = get_irp_irg(i);
1069 env = (inline_irg_env*)get_irg_link(current_ir_graph);
1071 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1073 /* note that the list of possible calls is updated during the process */
1074 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1075 irg_inline_property prop;
1080 callee = entry->callee;
1082 prop = get_irg_inline_property(callee);
1083 if (prop == irg_inline_forbidden) {
1087 calleee = (ir_graph*)pmap_get(copied_graphs, callee);
1088 if (calleee != NULL) {
1090 * Remap callee if we have a copy.
1091 * FIXME: Should we do this only for recursive Calls ?
1096 if (prop >= irg_inline_forced ||
1097 (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1098 if (current_ir_graph == callee) {
1100 * Recursive call: we cannot directly inline because we cannot walk
1101 * the graph and change it. So we have to make a copy of the graph
1105 inline_irg_env *callee_env;
1108 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1111 * No copy yet, create one.
1112 * Note that recursive methods are never leafs, so it is sufficient
1113 * to test this condition here.
1115 copy = create_irg_copy(callee);
1117 /* create_irg_copy() destroys the Proj links, recompute them */
1118 phiproj_computed = 0;
1120 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1122 /* allocate new environment */
1123 callee_env = alloc_inline_irg_env();
1124 set_irg_link(copy, callee_env);
1126 assure_irg_properties(copy,
1127 IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO);
1128 wenv.x = callee_env;
1129 wenv.ignore_callers = 1;
1130 irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1133 * Enter the entity of the original graph. This is needed
1134 * for inline_method(). However, note that ent->irg still points
1135 * to callee, NOT to copy.
1137 set_irg_entity(copy, get_irg_entity(callee));
1139 pmap_insert(copied_graphs, callee, copy);
1142 /* we have only one caller: the original graph */
1143 callee_env->n_callers = 1;
1144 callee_env->n_callers_orig = 1;
1146 if (! phiproj_computed) {
1147 phiproj_computed = 1;
1148 collect_phiprojs(current_ir_graph);
1150 did_inline = inline_method(call, callee);
1152 inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1154 /* call was inlined, Phi/Projs for current graph must be recomputed */
1155 phiproj_computed = 0;
1157 /* callee was inline. Append its call list. */
1158 env->got_inline = 1;
1159 --env->n_call_nodes;
1160 append_call_list(env, callee_env, entry->loop_depth);
1161 --callee_env->n_callers;
1163 /* after we have inlined callee, all called methods inside callee
1164 are now called once more */
1165 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1166 inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1170 /* remove this call from the list */
1171 list_del(&entry->list);
1176 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1179 for (i = 0; i < n_irgs; ++i) {
1180 irg = get_irp_irg(i);
1181 env = (inline_irg_env*)get_irg_link(irg);
1183 if (env->got_inline) {
1184 optimize_graph_df(irg);
1187 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1188 DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1189 env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1190 env->n_callers_orig, env->n_callers,
1191 get_entity_name(get_irg_entity(irg))));
1195 /* kill the copied graphs: we don't need them anymore */
1196 foreach_pmap(copied_graphs, pm_entry) {
1197 ir_graph *copy = (ir_graph*)pm_entry->value;
1199 /* reset the entity, otherwise it will be deleted in the next step ... */
1200 set_irg_entity(copy, NULL);
1201 free_ir_graph(copy);
1203 pmap_destroy(copied_graphs);
1205 obstack_free(&temp_obst, NULL);
1206 current_ir_graph = rem;
1209 typedef struct inline_leaf_functions_pass_t {
1210 ir_prog_pass_t pass;
1215 } inline_leaf_functions_pass_t;
1218 * Wrapper to run inline_leaf_functions() as a ir_prog pass.
1220 static int inline_leaf_functions_wrapper(ir_prog *irp, void *context)
1222 inline_leaf_functions_pass_t *pass = (inline_leaf_functions_pass_t*)context;
1225 inline_leaf_functions(
1226 pass->maxsize, pass->leafsize,
1227 pass->size, pass->ignore_runtime);
1231 /* create a pass for inline_leaf_functions() */
1232 ir_prog_pass_t *inline_leaf_functions_pass(
1233 const char *name, unsigned maxsize, unsigned leafsize,
1234 unsigned size, int ignore_runtime)
1236 inline_leaf_functions_pass_t *pass = XMALLOCZ(inline_leaf_functions_pass_t);
1238 pass->maxsize = maxsize;
1239 pass->leafsize = leafsize;
1241 pass->ignore_runtime = ignore_runtime;
1243 return def_prog_pass_constructor(
1245 name ? name : "inline_leaf_functions",
1246 inline_leaf_functions_wrapper);
1250 * Calculate the parameter weights for transmitting the address of a local variable.
1252 static unsigned calc_method_local_weight(ir_node *arg)
1255 unsigned v, weight = 0;
1257 for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
1258 ir_node *succ = get_irn_out(arg, i);
1260 switch (get_irn_opcode(succ)) {
1263 /* Loads and Store can be removed */
1267 /* check if all args are constant */
1268 for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1269 ir_node *idx = get_Sel_index(succ, j);
1270 if (! is_Const(idx))
1273 /* Check users on this Sel. Note: if a 0 is returned here, there was
1274 some unsupported node. */
1275 v = calc_method_local_weight(succ);
1278 /* we can kill one Sel with constant indexes, this is cheap */
1282 /* when looking backward we might find Id nodes */
1283 weight += calc_method_local_weight(succ);
1286 /* unoptimized tuple */
1287 for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1288 ir_node *pred = get_Tuple_pred(succ, j);
1290 /* look for Proj(j) */
1291 for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
1292 ir_node *succ_succ = get_irn_out(succ, k);
1293 if (is_Proj(succ_succ)) {
1294 if (get_Proj_proj(succ_succ) == j) {
1296 weight += calc_method_local_weight(succ_succ);
1299 /* this should NOT happen */
1307 /* any other node: unsupported yet or bad. */
1315 * Calculate the parameter weights for transmitting the address of a local variable.
1317 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg)
1319 ir_entity *ent = get_irg_entity(irg);
1324 ir_node *irg_args, *arg;
1326 mtp = get_entity_type(ent);
1327 nparams = get_method_n_params(mtp);
1329 /* allocate a new array. currently used as 'analysed' flag */
1330 env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1332 /* If the method haven't parameters we have nothing to do. */
1336 assure_irg_outs(irg);
1337 irg_args = get_irg_args(irg);
1338 for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
1339 arg = get_irn_out(irg_args, i);
1340 proj_nr = get_Proj_proj(arg);
1341 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1346 * Calculate the benefice for transmitting an local variable address.
1347 * After inlining, the local variable might be transformed into a
1348 * SSA variable by scalar_replacement().
1350 static unsigned get_method_local_adress_weight(ir_graph *callee, size_t pos)
1352 inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
1354 if (env->local_weights == NULL)
1355 analyze_irg_local_weights(env, callee);
1357 if (pos < ARR_LEN(env->local_weights))
1358 return env->local_weights[pos];
1363 * Calculate a benefice value for inlining the given call.
1365 * @param call the call node we have to inspect
1366 * @param callee the called graph
1368 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
1370 ir_node *call = entry->call;
1371 ir_entity *ent = get_irg_entity(callee);
1372 ir_type *callee_frame;
1373 size_t i, n_members, n_params;
1379 irg_inline_property prop;
1381 inline_irg_env *callee_env;
1383 prop = get_irg_inline_property(callee);
1384 if (prop == irg_inline_forbidden) {
1385 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
1387 return entry->benefice = INT_MIN;
1390 callee_frame = get_irg_frame_type(callee);
1391 n_members = get_class_n_members(callee_frame);
1392 for (i = 0; i < n_members; ++i) {
1393 ir_entity *frame_ent = get_class_member(callee_frame, i);
1394 if (is_parameter_entity(frame_ent)) {
1395 // TODO inliner should handle parameter entities by inserting Store operations
1396 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden due to parameter entity\n", call, callee));
1397 set_irg_inline_property(callee, irg_inline_forbidden);
1398 return entry->benefice = INT_MIN;
1402 if (get_irg_additional_properties(callee) & mtp_property_noreturn) {
1403 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
1405 return entry->benefice = INT_MIN;
1408 /* costs for every passed parameter */
1409 n_params = get_Call_n_params(call);
1410 mtp = get_entity_type(ent);
1411 cc = get_method_calling_convention(mtp);
1412 if (cc & cc_reg_param) {
1413 /* register parameter, smaller costs for register parameters */
1414 size_t max_regs = cc & ~cc_bits;
1416 if (max_regs < n_params)
1417 weight += max_regs * 2 + (n_params - max_regs) * 5;
1419 weight += n_params * 2;
1421 /* parameters are passed an stack */
1422 weight += 5 * n_params;
1425 /* constant parameters improve the benefice */
1426 frame_ptr = get_irg_frame(current_ir_graph);
1428 for (i = 0; i < n_params; ++i) {
1429 ir_node *param = get_Call_param(call, i);
1431 if (is_Const(param)) {
1432 weight += get_method_param_weight(ent, i);
1435 if (is_SymConst(param))
1436 weight += get_method_param_weight(ent, i);
1437 else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
1439 * An address of a local variable is transmitted. After
1440 * inlining, scalar_replacement might be able to remove the
1441 * local variable, so honor this.
1443 v = get_method_local_adress_weight(callee, i);
1446 entry->local_adr = 1;
1450 entry->all_const = all_const;
1452 callee_env = (inline_irg_env*)get_irg_link(callee);
1453 if (callee_env->n_callers == 1 &&
1454 callee != current_ir_graph &&
1455 !entity_is_externally_visible(ent)) {
1459 /* give a bonus for functions with one block */
1460 if (callee_env->n_blocks == 1)
1461 weight = weight * 3 / 2;
1463 /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
1464 if (callee_env->n_nodes < 30 && !callee_env->recursive)
1467 /* and finally for leafs: they do not increase the register pressure
1468 because of callee safe registers */
1469 if (callee_env->n_call_nodes == 0)
1472 /** it's important to inline inner loops first */
1473 if (entry->loop_depth > 30)
1474 weight += 30 * 1024;
1476 weight += entry->loop_depth * 1024;
1479 * All arguments constant is probably a good sign, give an extra bonus
1484 return entry->benefice = weight;
1487 typedef struct walk_env_t {
1493 * Callgraph walker, collect all visited graphs.
1495 static void callgraph_walker(ir_graph *irg, void *data)
1497 walk_env_t *env = (walk_env_t *)data;
1498 env->irgs[env->last_irg++] = irg;
1502 * Creates an inline order for all graphs.
1504 * @return the list of graphs.
1506 static ir_graph **create_irg_list(void)
1508 ir_entity **free_methods;
1509 size_t n_irgs = get_irp_n_irgs();
1512 cgana(&free_methods);
1513 xfree(free_methods);
1515 compute_callgraph();
1517 env.irgs = XMALLOCNZ(ir_graph*, n_irgs);
1520 callgraph_walk(NULL, callgraph_walker, &env);
1521 assert(n_irgs == env.last_irg);
1529 * Push a call onto the priority list if its benefice is big enough.
1531 * @param pqueue the priority queue of calls
1532 * @param call the call entry
1533 * @param inlien_threshold
1534 * the threshold value
1536 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
1537 int inline_threshold)
1539 ir_graph *callee = call->callee;
1540 irg_inline_property prop = get_irg_inline_property(callee);
1541 int benefice = calc_inline_benefice(call, callee);
1543 DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
1544 get_irn_irg(call->call), call->call, callee, benefice));
1546 if (prop < irg_inline_forced && benefice < inline_threshold) {
1550 pqueue_put(pqueue, call, benefice);
1554 * Try to inline calls into a graph.
1556 * @param irg the graph into which we inline
1557 * @param maxsize do NOT inline if the size of irg gets
1558 * bigger than this amount
1559 * @param inline_threshold
1560 * threshold value for inline decision
1561 * @param copied_graphs
1562 * map containing copied of recursive graphs
1564 static void inline_into(ir_graph *irg, unsigned maxsize,
1565 int inline_threshold, pmap *copied_graphs)
1567 int phiproj_computed = 0;
1568 inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
1569 call_entry *curr_call;
1573 if (env->n_call_nodes == 0)
1576 if (env->n_nodes > maxsize) {
1577 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
1581 current_ir_graph = irg;
1582 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1584 /* put irgs into the pqueue */
1585 pqueue = new_pqueue();
1587 list_for_each_entry(call_entry, curr_call, &env->calls, list) {
1588 assert(is_Call(curr_call->call));
1589 maybe_push_call(pqueue, curr_call, inline_threshold);
1592 /* note that the list of possible calls is updated during the process */
1593 while (!pqueue_empty(pqueue)) {
1595 call_entry *curr_call = (call_entry*)pqueue_pop_front(pqueue);
1596 ir_graph *callee = curr_call->callee;
1597 ir_node *call_node = curr_call->call;
1598 inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
1599 irg_inline_property prop = get_irg_inline_property(callee);
1602 const call_entry *centry;
1604 if ((prop < irg_inline_forced) && env->n_nodes + callee_env->n_nodes > maxsize) {
1605 DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
1606 env->n_nodes, callee, callee_env->n_nodes));
1610 calleee = (ir_graph*)pmap_get(copied_graphs, callee);
1611 if (calleee != NULL) {
1612 int benefice = curr_call->benefice;
1614 * Reduce the weight for recursive function IFF not all arguments are const.
1615 * inlining recursive functions is rarely good.
1617 if (!curr_call->all_const)
1619 if (benefice < inline_threshold)
1623 * Remap callee if we have a copy.
1626 callee_env = (inline_irg_env*)get_irg_link(callee);
1629 if (current_ir_graph == callee) {
1631 * Recursive call: we cannot directly inline because we cannot
1632 * walk the graph and change it. So we have to make a copy of
1635 int benefice = curr_call->benefice;
1639 * Reduce the weight for recursive function IFF not all arguments are const.
1640 * inlining recursive functions is rarely good.
1642 if (!curr_call->all_const)
1644 if (benefice < inline_threshold)
1647 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1650 * No copy yet, create one.
1651 * Note that recursive methods are never leafs, so it is
1652 * sufficient to test this condition here.
1654 copy = create_irg_copy(callee);
1656 /* create_irg_copy() destroys the Proj links, recompute them */
1657 phiproj_computed = 0;
1659 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1661 /* allocate a new environment */
1662 callee_env = alloc_inline_irg_env();
1663 set_irg_link(copy, callee_env);
1665 assure_irg_properties(copy, IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO);
1666 memset(&wenv, 0, sizeof(wenv));
1667 wenv.x = callee_env;
1668 wenv.ignore_callers = 1;
1669 irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1672 * Enter the entity of the original graph. This is needed
1673 * for inline_method(). However, note that ent->irg still points
1674 * to callee, NOT to copy.
1676 set_irg_entity(copy, get_irg_entity(callee));
1678 pmap_insert(copied_graphs, callee, copy);
1681 /* we have only one caller: the original graph */
1682 callee_env->n_callers = 1;
1683 callee_env->n_callers_orig = 1;
1685 if (! phiproj_computed) {
1686 phiproj_computed = 1;
1687 collect_phiprojs(current_ir_graph);
1689 did_inline = inline_method(call_node, callee);
1693 /* call was inlined, Phi/Projs for current graph must be recomputed */
1694 phiproj_computed = 0;
1696 /* remove it from the caller list */
1697 list_del(&curr_call->list);
1699 /* callee was inline. Append its call list. */
1700 env->got_inline = 1;
1701 --env->n_call_nodes;
1703 /* we just generate a bunch of new calls */
1704 loop_depth = curr_call->loop_depth;
1705 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1706 inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1708 call_entry *new_entry;
1710 /* after we have inlined callee, all called methods inside
1711 * callee are now called once more */
1714 /* Note that the src list points to Call nodes in the inlined graph,
1715 * but we need Call nodes in our graph. Luckily the inliner leaves
1716 * this information in the link field. */
1717 new_call = (ir_node*)get_irn_link(centry->call);
1718 if (get_irn_irg(new_call) != irg) {
1719 /* centry->call has not been copied, which means it is dead.
1720 * This might happen during inlining, if a const function,
1721 * which cannot be inlined is only used as an unused argument
1722 * of another function, which is inlined. */
1725 assert(is_Call(new_call));
1727 new_entry = duplicate_call_entry(centry, new_call, loop_depth);
1728 list_add_tail(&new_entry->list, &env->calls);
1729 maybe_push_call(pqueue, new_entry, inline_threshold);
1732 env->n_call_nodes += callee_env->n_call_nodes;
1733 env->n_nodes += callee_env->n_nodes;
1734 --callee_env->n_callers;
1736 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1741 * Heuristic inliner. Calculates a benefice value for every call and inlines
1742 * those calls with a value higher than the threshold.
1744 void inline_functions(unsigned maxsize, int inline_threshold,
1745 opt_ptr after_inline_opt)
1747 inline_irg_env *env;
1751 pmap *copied_graphs;
1752 pmap_entry *pm_entry;
1755 rem = current_ir_graph;
1756 obstack_init(&temp_obst);
1758 irgs = create_irg_list();
1760 /* a map for the copied graphs, used to inline recursive calls */
1761 copied_graphs = pmap_create();
1763 /* extend all irgs by a temporary data structure for inlining. */
1764 n_irgs = get_irp_n_irgs();
1765 for (i = 0; i < n_irgs; ++i)
1766 set_irg_link(irgs[i], alloc_inline_irg_env());
1768 /* Pre-compute information in temporary data structure. */
1769 wenv.ignore_runtime = 0;
1770 wenv.ignore_callers = 0;
1771 for (i = 0; i < n_irgs; ++i) {
1772 ir_graph *irg = irgs[i];
1774 free_callee_info(irg);
1776 wenv.x = (inline_irg_env*)get_irg_link(irg);
1777 assure_loopinfo(irg);
1778 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1781 /* -- and now inline. -- */
1782 for (i = 0; i < n_irgs; ++i) {
1783 ir_graph *irg = irgs[i];
1785 inline_into(irg, maxsize, inline_threshold, copied_graphs);
1788 for (i = 0; i < n_irgs; ++i) {
1789 ir_graph *irg = irgs[i];
1791 env = (inline_irg_env*)get_irg_link(irg);
1792 if (env->got_inline && after_inline_opt != NULL) {
1793 /* this irg got calls inlined: optimize it */
1794 after_inline_opt(irg);
1796 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1797 DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1798 env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1799 env->n_callers_orig, env->n_callers,
1800 get_entity_name(get_irg_entity(irg))));
1804 /* kill the copied graphs: we don't need them anymore */
1805 foreach_pmap(copied_graphs, pm_entry) {
1806 ir_graph *copy = (ir_graph*)pm_entry->value;
1808 /* reset the entity, otherwise it will be deleted in the next step ... */
1809 set_irg_entity(copy, NULL);
1810 free_ir_graph(copy);
1812 pmap_destroy(copied_graphs);
1816 obstack_free(&temp_obst, NULL);
1817 current_ir_graph = rem;
1820 typedef struct inline_functions_pass_t {
1821 ir_prog_pass_t pass;
1823 int inline_threshold;
1824 opt_ptr after_inline_opt;
1825 } inline_functions_pass_t;
1828 * Wrapper to run inline_functions() as a ir_prog pass.
1830 static int inline_functions_wrapper(ir_prog *irp, void *context)
1832 inline_functions_pass_t *pass = (inline_functions_pass_t*)context;
1835 inline_functions(pass->maxsize, pass->inline_threshold,
1836 pass->after_inline_opt);
1840 /* create a ir_prog pass for inline_functions */
1841 ir_prog_pass_t *inline_functions_pass(
1842 const char *name, unsigned maxsize, int inline_threshold,
1843 opt_ptr after_inline_opt)
1845 inline_functions_pass_t *pass = XMALLOCZ(inline_functions_pass_t);
1847 pass->maxsize = maxsize;
1848 pass->inline_threshold = inline_threshold;
1849 pass->after_inline_opt = after_inline_opt;
1851 return def_prog_pass_constructor(
1852 &pass->pass, name ? name : "inline_functions",
1853 inline_functions_wrapper);
1856 void firm_init_inline(void)
1858 FIRM_DBG_REGISTER(dbg, "firm.opt.inline");