2 * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Dead node elimination and Procedure Inlining.
23 * @author Michael Beck, Goetz Lindenmaier
32 #include "irgraph_t.h"
35 #include "iroptimize.h"
52 #include "irbackedge_t.h"
58 #include "analyze_irg_args.h"
59 #include "iredges_t.h"
63 #include "iropt_dbg.h"
65 #include "irnodemap.h"
67 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
69 /*------------------------------------------------------------------*/
70 /* Routines for dead node elimination / copying garbage collection */
72 /*------------------------------------------------------------------*/
75 * Remember the new node in the old node by using a field all nodes have.
77 static void set_new_node(ir_node *node, ir_node *new_node)
79 set_irn_link(node, new_node);
83 * Get this new node, before the old node is forgotten.
85 static inline ir_node *get_new_node(ir_node *old_node)
87 assert(irn_visited(old_node));
88 return (ir_node*) get_irn_link(old_node);
91 /*--------------------------------------------------------------------*/
92 /* Functionality for inlining */
93 /*--------------------------------------------------------------------*/
96 * Copy node for inlineing. Updates attributes that change when
97 * inlineing but not for dead node elimination.
99 * Copies the node by calling copy_node() and then updates the entity if
100 * it's a local one. env must be a pointer of the frame type of the
101 * inlined procedure. The new entities must be in the link field of
104 static void copy_node_inline(ir_node *node, void *env)
106 ir_graph *new_irg = (ir_graph*) env;
107 ir_node *new_node = irn_copy_into_irg(node, new_irg);
109 set_new_node(node, new_node);
111 ir_graph *old_irg = get_irn_irg(node);
112 ir_type *old_frame_type = get_irg_frame_type(old_irg);
113 ir_entity *old_entity = get_Sel_entity(node);
114 assert(is_Sel(new_node));
115 /* use copied entities from the new frame */
116 if (get_entity_owner(old_entity) == old_frame_type) {
117 ir_entity *new_entity = (ir_entity*)get_entity_link(old_entity);
118 assert(new_entity != NULL);
119 set_Sel_entity(new_node, new_entity);
121 } else if (is_Block(new_node)) {
122 new_node->attr.block.irg.irg = new_irg;
126 static void set_preds_inline(ir_node *node, void *env)
130 irn_rewire_inputs(node);
132 /* move constants into start block */
133 new_node = get_new_node(node);
134 if (is_irn_start_block_placed(new_node)) {
135 ir_graph *new_irg = (ir_graph *) env;
136 ir_node *start_block = get_irg_start_block(new_irg);
137 set_nodes_block(new_node, start_block);
142 * Walker: checks if P_value_arg_base is used.
144 static void find_addr(ir_node *node, void *env)
146 bool *allow_inline = (bool*)env;
148 if (is_Block(node) && get_Block_entity(node)) {
150 * Currently we can't handle blocks whose address was taken correctly
153 *allow_inline = false;
154 } else if (is_Sel(node)) {
155 ir_graph *irg = current_ir_graph;
156 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
157 /* access to frame */
158 ir_entity *ent = get_Sel_entity(node);
159 if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
160 /* access to value_type */
161 *allow_inline = false;
163 if (is_parameter_entity(ent)) {
164 *allow_inline = false;
167 } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
169 * Refuse to inline alloca call unless user explicitly forced so as this
170 * may change program's memory overhead drastically when the function
171 * using alloca is called in loop. In GCC present in SPEC2000 inlining
172 * into schedule_block cause it to require 2GB of ram instead of 256MB.
174 * Sorrily this is true with our implementation also.
175 * Moreover, we cannot differentiate between alloca() and VLA yet, so
176 * this disables inlining of functions using VLA (which are completely
180 * - add a flag to the Alloc node for "real" alloca() calls
181 * - add a new Stack-Restore node at the end of a function using
184 *allow_inline = false;
189 * Check if we can inline a given call.
190 * Currently, we cannot inline two cases:
191 * - call with compound arguments
192 * - graphs that take the address of a parameter
194 * check these conditions here
196 static bool can_inline(ir_node *call, ir_graph *called_graph)
198 ir_entity *called = get_irg_entity(called_graph);
199 ir_type *called_type = get_entity_type(called);
200 ir_type *call_type = get_Call_type(call);
201 size_t n_params = get_method_n_params(called_type);
202 size_t n_arguments = get_method_n_params(call_type);
203 size_t n_res = get_method_n_ress(called_type);
204 mtp_additional_properties props = get_entity_additional_properties(called);
208 if (props & mtp_property_noinline)
211 if (n_arguments != n_params) {
212 /* this is a bad feature of C: without a prototype, we can
213 * call a function with less parameters than needed. Currently
214 * we don't support this, although we could use Unknown than. */
217 if (n_res != get_method_n_ress(call_type)) {
221 /* Argh, compiling C has some bad consequences:
222 * It is implementation dependent what happens in that case.
223 * We support inlining, if the bitsize of the types matches AND
224 * the same arithmetic is used. */
225 for (i = 0; i < n_params; ++i) {
226 ir_type *param_tp = get_method_param_type(called_type, i);
227 ir_type *arg_tp = get_method_param_type(call_type, i);
229 if (param_tp != arg_tp) {
230 ir_mode *pmode = get_type_mode(param_tp);
231 ir_mode *amode = get_type_mode(arg_tp);
233 if (pmode == NULL || amode == NULL)
235 if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
237 if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
239 /* otherwise we can simply "reinterpret" the bits */
242 for (i = 0; i < n_res; ++i) {
243 ir_type *decl_res_tp = get_method_res_type(called_type, i);
244 ir_type *used_res_tp = get_method_res_type(call_type, i);
246 if (decl_res_tp != used_res_tp) {
247 ir_mode *decl_mode = get_type_mode(decl_res_tp);
248 ir_mode *used_mode = get_type_mode(used_res_tp);
249 if (decl_mode == NULL || used_mode == NULL)
251 if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
253 if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
255 /* otherwise we can "reinterpret" the bits */
259 /* check parameters for compound arguments */
260 for (i = 0; i < n_params; ++i) {
261 ir_type *p_type = get_method_param_type(call_type, i);
263 if (is_compound_type(p_type) || is_Array_type(p_type))
267 /* check results for compound arguments */
268 for (i = 0; i < n_res; ++i) {
269 ir_type *r_type = get_method_res_type(call_type, i);
271 if (is_compound_type(r_type) || is_Array_type(r_type))
276 irg_walk_graph(called_graph, find_addr, NULL, &res);
282 exc_handler, /**< There is a handler. */
283 exc_no_handler /**< Exception handling not represented. */
287 * copy all entities on the stack frame on 1 irg to the stackframe of another.
288 * Sets entity links of the old entities to the copies
290 static void copy_frame_entities(ir_graph *from, ir_graph *to)
292 ir_type *from_frame = get_irg_frame_type(from);
293 ir_type *to_frame = get_irg_frame_type(to);
294 size_t n_members = get_class_n_members(from_frame);
296 assert(from_frame != to_frame);
298 for (i = 0; i < n_members; ++i) {
299 ir_entity *old_ent = get_class_member(from_frame, i);
300 ir_entity *new_ent = copy_entity_own(old_ent, to_frame);
301 set_entity_link(old_ent, new_ent);
302 assert (!is_parameter_entity(old_ent));
306 /* Inlines a method at the given call site. */
307 int inline_method(ir_node *const call, ir_graph *called_graph)
309 /* we cannot inline some types of calls */
310 if (! can_inline(call, called_graph))
313 /* We cannot inline a recursive call. The graph must be copied before
314 * the call the inline_method() using create_irg_copy(). */
315 ir_graph *irg = get_irn_irg(call);
316 if (called_graph == irg)
319 ir_entity *ent = get_irg_entity(called_graph);
320 ir_type *mtp = get_entity_type(ent);
321 ir_type *ctp = get_Call_type(call);
322 int n_params = get_method_n_params(mtp);
324 ir_graph *rem = current_ir_graph;
325 current_ir_graph = irg;
327 DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
329 /* optimizations can cause problems when allocating new nodes */
330 int rem_opt = get_opt_optimize();
333 /* Handle graph state */
334 assert(get_irg_pinned(irg) == op_pin_state_pinned);
335 assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
336 clear_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE
337 | IR_GRAPH_PROPERTY_CONSISTENT_ENTITY_USAGE);
338 set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
339 clear_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_ENTITY_USAGE);
340 edges_deactivate(irg);
342 /* here we know we WILL inline, so inform the statistics */
343 hook_inline(call, called_graph);
345 /* -- Decide how to handle exception control flow: Is there a handler
346 for the Call node, or do we branch directly to End on an exception?
348 0 There is a handler.
349 2 Exception handling not represented in Firm. -- */
350 ir_node *Xproj = NULL;
351 for (ir_node *proj = (ir_node*)get_irn_link(call); proj != NULL;
352 proj = (ir_node*)get_irn_link(proj)) {
353 long proj_nr = get_Proj_proj(proj);
354 if (proj_nr == pn_Call_X_except) Xproj = proj;
356 enum exc_mode exc_handling = Xproj != NULL ? exc_handler : exc_no_handler;
358 /* create the argument tuple */
359 ir_node **args_in = ALLOCAN(ir_node*, n_params);
361 ir_node *block = get_nodes_block(call);
362 for (int i = n_params - 1; i >= 0; --i) {
363 ir_node *arg = get_Call_param(call, i);
364 ir_type *param_tp = get_method_param_type(mtp, i);
365 ir_mode *mode = get_type_mode(param_tp);
367 if (mode != get_irn_mode(arg)) {
368 arg = new_r_Conv(block, arg, mode);
373 /* the procedure and later replaces the Start node of the called graph.
374 * Post_call is the old Call node and collects the results of the called
375 * graph. Both will end up being a tuple. */
376 ir_node *post_bl = get_nodes_block(call);
377 /* XxMxPxPxPxT of Start + parameter of Call */
378 ir_node *in[pn_Start_max+1];
379 in[pn_Start_M] = get_Call_mem(call);
380 in[pn_Start_X_initial_exec] = new_r_Jmp(post_bl);
381 in[pn_Start_P_frame_base] = get_irg_frame(irg);
382 in[pn_Start_T_args] = new_r_Tuple(post_bl, n_params, args_in);
383 ir_node *pre_call = new_r_Tuple(post_bl, pn_Start_max+1, in);
386 The new block gets the ins of the old block, pre_call and all its
387 predecessors and all Phi nodes. -- */
388 part_block(pre_call);
390 /* increment visited flag for later walk */
391 inc_irg_visited(called_graph);
393 /* link some nodes to nodes in the current graph so instead of copying
394 * the linked nodes will get used.
395 * So the copier will use the created Tuple instead of copying the start
396 * node, similar for singleton nodes like NoMem and Bad.
397 * Note: this will prohibit predecessors to be copied - only do it for
398 * nodes without predecessors */
399 ir_node *start_block = get_irg_start_block(called_graph);
400 set_new_node(start_block, get_nodes_block(pre_call));
401 mark_irn_visited(start_block);
403 ir_node *start = get_irg_start(called_graph);
404 set_new_node(start, pre_call);
405 mark_irn_visited(start);
407 ir_node *nomem = get_irg_no_mem(called_graph);
408 set_new_node(nomem, get_irg_no_mem(irg));
409 mark_irn_visited(nomem);
411 /* entitiy link is used to link entities on old stackframe to the
413 irp_reserve_resources(irp, IRP_RESOURCE_ENTITY_LINK);
415 /* copy entities and nodes */
416 assert(!irn_visited(get_irg_end(called_graph)));
417 copy_frame_entities(called_graph, irg);
418 irg_walk_core(get_irg_end(called_graph), copy_node_inline, set_preds_inline,
421 irp_free_resources(irp, IRP_RESOURCE_ENTITY_LINK);
423 /* -- Merge the end of the inlined procedure with the call site -- */
424 /* We will turn the old Call node into a Tuple with the following
427 0: Phi of all Memories of Return statements.
428 1: Jmp from new Block that merges the control flow from all exception
429 predecessors of the old end block.
430 2: Tuple of all arguments.
431 3: Phi of Exception memories.
432 In case the old Call directly branches to End on an exception we don't
433 need the block merging all exceptions nor the Phi of the exception
437 /* Precompute some values */
438 ir_node *end_bl = get_new_node(get_irg_end_block(called_graph));
439 ir_node *end = get_new_node(get_irg_end(called_graph));
440 int arity = get_Block_n_cfgpreds(end_bl); /* arity = n_exc + n_ret */
441 int n_res = get_method_n_ress(get_Call_type(call));
443 ir_node **res_pred = XMALLOCN(ir_node*, n_res);
444 ir_node **cf_pred = XMALLOCN(ir_node*, arity);
446 /* archive keepalives */
447 int irn_arity = get_irn_arity(end);
448 for (int i = 0; i < irn_arity; i++) {
449 ir_node *ka = get_End_keepalive(end, i);
451 add_End_keepalive(get_irg_end(irg), ka);
454 /* replace Return nodes by Jump nodes */
456 for (int i = 0; i < arity; i++) {
457 ir_node *ret = get_Block_cfgpred(end_bl, i);
458 if (is_Return(ret)) {
459 ir_node *block = get_nodes_block(ret);
460 cf_pred[n_ret] = new_r_Jmp(block);
464 set_irn_in(post_bl, n_ret, cf_pred);
466 /* build a Tuple for all results of the method.
467 * add Phi node if there was more than one Return. */
468 /* First the Memory-Phi */
470 for (int i = 0; i < arity; i++) {
471 ir_node *ret = get_Block_cfgpred(end_bl, i);
472 if (is_Return(ret)) {
473 cf_pred[n_mem_phi++] = get_Return_mem(ret);
475 /* memory output for some exceptions is directly connected to End */
477 cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 3);
478 } else if (is_fragile_op(ret)) {
479 /* We rely that all cfops have the memory output at the same position. */
480 cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 0);
481 } else if (is_Raise(ret)) {
482 cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 1);
485 ir_node *const call_mem = new_r_Phi(post_bl, n_mem_phi, cf_pred, mode_M);
486 /* Conserve Phi-list for further inlinings -- but might be optimized */
487 if (get_nodes_block(call_mem) == post_bl) {
488 set_irn_link(call_mem, get_irn_link(post_bl));
489 set_irn_link(post_bl, call_mem);
491 /* Now the real results */
494 for (int j = 0; j < n_res; j++) {
495 ir_type *res_type = get_method_res_type(ctp, j);
496 ir_mode *res_mode = get_type_mode(res_type);
498 for (int i = 0; i < arity; i++) {
499 ir_node *ret = get_Block_cfgpred(end_bl, i);
500 if (is_Return(ret)) {
501 ir_node *res = get_Return_res(ret, j);
502 if (get_irn_mode(res) != res_mode) {
503 ir_node *block = get_nodes_block(res);
504 res = new_r_Conv(block, res, res_mode);
506 cf_pred[n_ret] = res;
510 ir_node *const phi = n_ret > 0
511 ? new_r_Phi(post_bl, n_ret, cf_pred, res_mode)
512 : new_r_Bad(irg, res_mode);
514 /* Conserve Phi-list for further inlinings -- but might be optimized */
515 if (get_nodes_block(phi) == post_bl) {
516 set_Phi_next(phi, get_Block_phis(post_bl));
517 set_Block_phis(post_bl, phi);
520 call_res = new_r_Tuple(post_bl, n_res, res_pred);
522 call_res = new_r_Bad(irg, mode_T);
524 /* handle the regular call */
525 ir_node *const call_x_reg = new_r_Jmp(post_bl);
527 /* Finally the exception control flow.
528 We have two possible situations:
529 First if the Call branches to an exception handler:
530 We need to add a Phi node to
531 collect the memory containing the exception objects. Further we need
532 to add another block to get a correct representation of this Phi. To
533 this block we add a Jmp that resolves into the X output of the Call
534 when the Call is turned into a tuple.
535 Second: There is no exception edge. Just add all inlined exception
536 branches to the End node.
539 if (exc_handling == exc_handler) {
541 for (int i = 0; i < arity; i++) {
542 ir_node *ret = get_Block_cfgpred(end_bl, i);
543 ir_node *irn = skip_Proj(ret);
544 if (is_fragile_op(irn) || is_Raise(irn)) {
545 cf_pred[n_exc] = ret;
552 call_x_exc = cf_pred[0];
554 ir_node *block = new_r_Block(irg, n_exc, cf_pred);
555 call_x_exc = new_r_Jmp(block);
558 call_x_exc = new_r_Bad(irg, mode_X);
561 /* assert(exc_handling == 1 || no exceptions. ) */
563 for (int i = 0; i < arity; i++) {
564 ir_node *ret = get_Block_cfgpred(end_bl, i);
565 ir_node *irn = skip_Proj(ret);
567 if (is_fragile_op(irn) || is_Raise(irn)) {
568 cf_pred[n_exc] = ret;
572 ir_node *main_end_bl = get_irg_end_block(irg);
573 int main_end_bl_arity = get_irn_arity(main_end_bl);
574 ir_node **end_preds = XMALLOCN(ir_node*, n_exc+main_end_bl_arity);
576 for (int i = 0; i < main_end_bl_arity; ++i)
577 end_preds[i] = get_irn_n(main_end_bl, i);
578 for (int i = 0; i < n_exc; ++i)
579 end_preds[main_end_bl_arity + i] = cf_pred[i];
580 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
581 call_x_exc = new_r_Bad(irg, mode_X);
587 ir_node *const call_in[] = {
588 [pn_Call_M] = call_mem,
589 [pn_Call_T_result] = call_res,
590 [pn_Call_X_regular] = call_x_reg,
591 [pn_Call_X_except] = call_x_exc,
593 turn_into_tuple(call, ARRAY_SIZE(call_in), call_in);
595 /* -- Turn CSE back on. -- */
596 set_optimize(rem_opt);
597 current_ir_graph = rem;
602 /********************************************************************/
603 /* Apply inlining to small methods. */
604 /********************************************************************/
606 static struct obstack temp_obst;
608 /** Represents a possible inlinable call in a graph. */
609 typedef struct call_entry {
610 ir_node *call; /**< The Call node. */
611 ir_graph *callee; /**< The callee IR-graph. */
612 list_head list; /**< List head for linking the next one. */
613 int loop_depth; /**< The loop depth of this call. */
614 int benefice; /**< The calculated benefice of this call. */
615 unsigned local_adr:1; /**< Set if this call gets an address of a local variable. */
616 unsigned all_const:1; /**< Set if this call has only constant parameters. */
620 * Returns the irg called from a Call node. If the irg is not
621 * known, NULL is returned.
623 * @param call the call node
625 static ir_graph *get_call_called_irg(ir_node *call)
629 addr = get_Call_ptr(call);
630 if (is_SymConst_addr_ent(addr)) {
631 ir_entity *ent = get_SymConst_entity(addr);
632 /* we don't know which function gets finally bound to a weak symbol */
633 if (get_entity_linkage(ent) & IR_LINKAGE_WEAK)
636 return get_entity_irg(ent);
643 * Environment for inlining irgs.
646 list_head calls; /**< List of of all call nodes in this graph. */
647 unsigned *local_weights; /**< Once allocated, the beneficial weight for transmitting local addresses. */
648 unsigned n_nodes; /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
649 unsigned n_blocks; /**< Number of Blocks in graph without Start and End block. */
650 unsigned n_nodes_orig; /**< for statistics */
651 unsigned n_call_nodes; /**< Number of Call nodes in the graph. */
652 unsigned n_call_nodes_orig; /**< for statistics */
653 unsigned n_callers; /**< Number of known graphs that call this graphs. */
654 unsigned n_callers_orig; /**< for statistics */
655 unsigned got_inline:1; /**< Set, if at least one call inside this graph was inlined. */
656 unsigned recursive:1; /**< Set, if this function is self recursive. */
660 * Allocate a new environment for inlining.
662 static inline_irg_env *alloc_inline_irg_env(void)
664 inline_irg_env *env = OALLOC(&temp_obst, inline_irg_env);
665 INIT_LIST_HEAD(&env->calls);
666 env->local_weights = NULL;
667 env->n_nodes = -2; /* do not count count Start, End */
668 env->n_blocks = -2; /* do not count count Start, End Block */
669 env->n_nodes_orig = -2; /* do not count Start, End */
670 env->n_call_nodes = 0;
671 env->n_call_nodes_orig = 0;
673 env->n_callers_orig = 0;
679 typedef struct walker_env {
680 inline_irg_env *x; /**< the inline environment */
681 char ignore_runtime; /**< the ignore runtime flag */
682 char ignore_callers; /**< if set, do change callers data */
686 * post-walker: collect all calls in the inline-environment
687 * of a graph and sum some statistics.
689 static void collect_calls2(ir_node *call, void *ctx)
691 wenv_t *env = (wenv_t*)ctx;
692 inline_irg_env *x = env->x;
693 unsigned code = get_irn_opcode(call);
697 /* count meaningful nodes in irg */
698 if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
699 if (code != iro_Block) {
707 if (code != iro_Call) return;
709 /* check, if it's a runtime call */
710 if (env->ignore_runtime) {
711 ir_node *symc = get_Call_ptr(call);
713 if (is_SymConst_addr_ent(symc)) {
714 ir_entity *ent = get_SymConst_entity(symc);
716 if (get_entity_additional_properties(ent) & mtp_property_runtime)
721 /* collect all call nodes */
723 ++x->n_call_nodes_orig;
725 callee = get_call_called_irg(call);
726 if (callee != NULL) {
727 if (! env->ignore_callers) {
728 inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
729 /* count all static callers */
730 ++callee_env->n_callers;
731 ++callee_env->n_callers_orig;
733 if (callee == current_ir_graph)
736 /* link it in the list of possible inlinable entries */
737 entry = OALLOC(&temp_obst, call_entry);
739 entry->callee = callee;
740 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
742 entry->local_adr = 0;
743 entry->all_const = 0;
745 list_add_tail(&entry->list, &x->calls);
750 * Duplicate a call entry.
752 * @param entry the original entry to duplicate
753 * @param new_call the new call node
754 * @param loop_depth_delta
755 * delta value for the loop depth
757 static call_entry *duplicate_call_entry(const call_entry *entry,
758 ir_node *new_call, int loop_depth_delta)
760 call_entry *nentry = OALLOC(&temp_obst, call_entry);
761 nentry->call = new_call;
762 nentry->callee = entry->callee;
763 nentry->benefice = entry->benefice;
764 nentry->loop_depth = entry->loop_depth + loop_depth_delta;
765 nentry->local_adr = entry->local_adr;
766 nentry->all_const = entry->all_const;
772 * Calculate the parameter weights for transmitting the address of a local variable.
774 static unsigned calc_method_local_weight(ir_node *arg)
777 unsigned v, weight = 0;
779 for (unsigned i = get_irn_n_outs(arg); i-- > 0; ) {
780 ir_node *succ = get_irn_out(arg, i);
782 switch (get_irn_opcode(succ)) {
785 /* Loads and Store can be removed */
789 /* check if all args are constant */
790 for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
791 ir_node *idx = get_Sel_index(succ, j);
795 /* Check users on this Sel. Note: if a 0 is returned here, there was
796 some unsupported node. */
797 v = calc_method_local_weight(succ);
800 /* we can kill one Sel with constant indexes, this is cheap */
804 /* when looking backward we might find Id nodes */
805 weight += calc_method_local_weight(succ);
808 /* unoptimized tuple */
809 for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
810 ir_node *pred = get_Tuple_pred(succ, j);
812 /* look for Proj(j) */
813 for (unsigned k = get_irn_n_outs(succ); k-- > 0; ) {
814 ir_node *succ_succ = get_irn_out(succ, k);
815 if (is_Proj(succ_succ)) {
816 if (get_Proj_proj(succ_succ) == j) {
818 weight += calc_method_local_weight(succ_succ);
821 /* this should NOT happen */
829 /* any other node: unsupported yet or bad. */
837 * Calculate the parameter weights for transmitting the address of a local variable.
839 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg)
841 ir_entity *ent = get_irg_entity(irg);
845 ir_node *irg_args, *arg;
847 mtp = get_entity_type(ent);
848 nparams = get_method_n_params(mtp);
850 /* allocate a new array. currently used as 'analysed' flag */
851 env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
853 /* If the method haven't parameters we have nothing to do. */
857 assure_irg_outs(irg);
858 irg_args = get_irg_args(irg);
859 for (unsigned i = get_irn_n_outs(irg_args); i-- > 0; ) {
860 arg = get_irn_out(irg_args, i);
861 proj_nr = get_Proj_proj(arg);
862 env->local_weights[proj_nr] = calc_method_local_weight(arg);
867 * Calculate the benefice for transmitting an local variable address.
868 * After inlining, the local variable might be transformed into a
869 * SSA variable by scalar_replacement().
871 static unsigned get_method_local_adress_weight(ir_graph *callee, size_t pos)
873 inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
875 if (env->local_weights == NULL)
876 analyze_irg_local_weights(env, callee);
878 if (pos < ARR_LEN(env->local_weights))
879 return env->local_weights[pos];
884 * Calculate a benefice value for inlining the given call.
886 * @param call the call node we have to inspect
887 * @param callee the called graph
889 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
891 ir_node *call = entry->call;
892 ir_entity *ent = get_irg_entity(callee);
893 ir_type *callee_frame;
894 size_t i, n_members, n_params;
901 inline_irg_env *callee_env;
903 mtp_additional_properties props = get_entity_additional_properties(ent);
904 if (props & mtp_property_noinline) {
905 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
907 return entry->benefice = INT_MIN;
910 callee_frame = get_irg_frame_type(callee);
911 n_members = get_class_n_members(callee_frame);
912 for (i = 0; i < n_members; ++i) {
913 ir_entity *frame_ent = get_class_member(callee_frame, i);
914 if (is_parameter_entity(frame_ent)) {
915 // TODO inliner should handle parameter entities by inserting Store operations
916 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden due to parameter entity\n", call, callee));
917 add_entity_additional_properties(ent, mtp_property_noinline);
918 return entry->benefice = INT_MIN;
922 if (props & mtp_property_noreturn) {
923 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
925 return entry->benefice = INT_MIN;
928 /* costs for every passed parameter */
929 n_params = get_Call_n_params(call);
930 mtp = get_entity_type(ent);
931 cc = get_method_calling_convention(mtp);
932 if (cc & cc_reg_param) {
933 /* register parameter, smaller costs for register parameters */
934 size_t max_regs = cc & ~cc_bits;
936 if (max_regs < n_params)
937 weight += max_regs * 2 + (n_params - max_regs) * 5;
939 weight += n_params * 2;
941 /* parameters are passed an stack */
942 weight += 5 * n_params;
945 /* constant parameters improve the benefice */
946 frame_ptr = get_irg_frame(current_ir_graph);
948 for (i = 0; i < n_params; ++i) {
949 ir_node *param = get_Call_param(call, i);
951 if (is_Const(param)) {
952 weight += get_method_param_weight(ent, i);
955 if (is_SymConst(param))
956 weight += get_method_param_weight(ent, i);
957 else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
959 * An address of a local variable is transmitted. After
960 * inlining, scalar_replacement might be able to remove the
961 * local variable, so honor this.
963 v = get_method_local_adress_weight(callee, i);
966 entry->local_adr = 1;
970 entry->all_const = all_const;
972 callee_env = (inline_irg_env*)get_irg_link(callee);
973 if (callee_env->n_callers == 1 &&
974 callee != current_ir_graph &&
975 !entity_is_externally_visible(ent)) {
979 /* give a bonus for functions with one block */
980 if (callee_env->n_blocks == 1)
981 weight = weight * 3 / 2;
983 /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
984 if (callee_env->n_nodes < 30 && !callee_env->recursive)
987 /* and finally for leafs: they do not increase the register pressure
988 because of callee safe registers */
989 if (callee_env->n_call_nodes == 0)
992 /** it's important to inline inner loops first */
993 if (entry->loop_depth > 30)
996 weight += entry->loop_depth * 1024;
999 * All arguments constant is probably a good sign, give an extra bonus
1004 return entry->benefice = weight;
1007 typedef struct walk_env_t {
1013 * Callgraph walker, collect all visited graphs.
1015 static void callgraph_walker(ir_graph *irg, void *data)
1017 walk_env_t *env = (walk_env_t *)data;
1018 env->irgs[env->last_irg++] = irg;
1022 * Creates an inline order for all graphs.
1024 * @return the list of graphs.
1026 static ir_graph **create_irg_list(void)
1028 ir_entity **free_methods;
1029 size_t n_irgs = get_irp_n_irgs();
1032 cgana(&free_methods);
1033 xfree(free_methods);
1035 compute_callgraph();
1037 env.irgs = XMALLOCNZ(ir_graph*, n_irgs);
1040 callgraph_walk(NULL, callgraph_walker, &env);
1041 assert(n_irgs == env.last_irg);
1049 * Push a call onto the priority list if its benefice is big enough.
1051 * @param pqueue the priority queue of calls
1052 * @param call the call entry
1053 * @param inlien_threshold
1054 * the threshold value
1056 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
1057 int inline_threshold)
1059 ir_graph *callee = call->callee;
1060 int benefice = calc_inline_benefice(call, callee);
1062 DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
1063 get_irn_irg(call->call), call->call, callee, benefice));
1065 ir_entity *ent = get_irg_entity(callee);
1066 mtp_additional_properties props = get_entity_additional_properties(ent);
1067 if (!(props & mtp_property_always_inline) && benefice < inline_threshold) {
1071 pqueue_put(pqueue, call, benefice);
1075 * Try to inline calls into a graph.
1077 * @param irg the graph into which we inline
1078 * @param maxsize do NOT inline if the size of irg gets
1079 * bigger than this amount
1080 * @param inline_threshold
1081 * threshold value for inline decision
1082 * @param copied_graphs
1083 * map containing copied of recursive graphs
1085 static void inline_into(ir_graph *irg, unsigned maxsize,
1086 int inline_threshold, pmap *copied_graphs)
1088 int phiproj_computed = 0;
1089 inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
1093 if (env->n_call_nodes == 0)
1096 if (env->n_nodes > maxsize) {
1097 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
1101 current_ir_graph = irg;
1102 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1104 /* put irgs into the pqueue */
1105 pqueue = new_pqueue();
1107 list_for_each_entry(call_entry, curr_call, &env->calls, list) {
1108 assert(is_Call(curr_call->call));
1109 maybe_push_call(pqueue, curr_call, inline_threshold);
1112 /* note that the list of possible calls is updated during the process */
1113 while (!pqueue_empty(pqueue)) {
1115 call_entry *curr_call = (call_entry*)pqueue_pop_front(pqueue);
1116 ir_graph *callee = curr_call->callee;
1117 ir_node *call_node = curr_call->call;
1118 inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
1119 ir_entity *ent = get_irg_entity(callee);
1120 mtp_additional_properties props
1121 = get_entity_additional_properties(ent);
1125 if (!(props & mtp_property_always_inline)
1126 && env->n_nodes + callee_env->n_nodes > maxsize) {
1127 DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
1128 env->n_nodes, callee, callee_env->n_nodes));
1132 calleee = pmap_get(ir_graph, copied_graphs, callee);
1133 if (calleee != NULL) {
1134 int benefice = curr_call->benefice;
1136 * Reduce the weight for recursive function IFF not all arguments are const.
1137 * inlining recursive functions is rarely good.
1139 if (!curr_call->all_const)
1141 if (benefice < inline_threshold)
1145 * Remap callee if we have a copy.
1148 callee_env = (inline_irg_env*)get_irg_link(callee);
1151 if (current_ir_graph == callee) {
1153 * Recursive call: we cannot directly inline because we cannot
1154 * walk the graph and change it. So we have to make a copy of
1157 int benefice = curr_call->benefice;
1161 * Reduce the weight for recursive function IFF not all arguments are const.
1162 * inlining recursive functions is rarely good.
1164 if (!curr_call->all_const)
1166 if (benefice < inline_threshold)
1169 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1172 * No copy yet, create one.
1173 * Note that recursive methods are never leafs, so it is
1174 * sufficient to test this condition here.
1176 copy = create_irg_copy(callee);
1178 /* create_irg_copy() destroys the Proj links, recompute them */
1179 phiproj_computed = 0;
1181 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1183 /* allocate a new environment */
1184 callee_env = alloc_inline_irg_env();
1185 set_irg_link(copy, callee_env);
1187 assure_irg_properties(copy, IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO);
1188 memset(&wenv, 0, sizeof(wenv));
1189 wenv.x = callee_env;
1190 wenv.ignore_callers = 1;
1191 irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1194 * Enter the entity of the original graph. This is needed
1195 * for inline_method(). However, note that ent->irg still points
1196 * to callee, NOT to copy.
1198 set_irg_entity(copy, get_irg_entity(callee));
1200 pmap_insert(copied_graphs, callee, copy);
1203 /* we have only one caller: the original graph */
1204 callee_env->n_callers = 1;
1205 callee_env->n_callers_orig = 1;
1207 if (! phiproj_computed) {
1208 phiproj_computed = 1;
1209 collect_phiprojs(current_ir_graph);
1211 did_inline = inline_method(call_node, callee);
1215 /* call was inlined, Phi/Projs for current graph must be recomputed */
1216 phiproj_computed = 0;
1218 /* remove it from the caller list */
1219 list_del(&curr_call->list);
1221 /* callee was inline. Append its call list. */
1222 env->got_inline = 1;
1223 --env->n_call_nodes;
1225 /* we just generate a bunch of new calls */
1226 loop_depth = curr_call->loop_depth;
1227 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1228 inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1230 call_entry *new_entry;
1232 /* after we have inlined callee, all called methods inside
1233 * callee are now called once more */
1236 /* Note that the src list points to Call nodes in the inlined graph,
1237 * but we need Call nodes in our graph. Luckily the inliner leaves
1238 * this information in the link field. */
1239 new_call = (ir_node*)get_irn_link(centry->call);
1240 if (get_irn_irg(new_call) != irg) {
1241 /* centry->call has not been copied, which means it is dead.
1242 * This might happen during inlining, if a const function,
1243 * which cannot be inlined is only used as an unused argument
1244 * of another function, which is inlined. */
1247 assert(is_Call(new_call));
1249 new_entry = duplicate_call_entry(centry, new_call, loop_depth);
1250 list_add_tail(&new_entry->list, &env->calls);
1251 maybe_push_call(pqueue, new_entry, inline_threshold);
1254 env->n_call_nodes += callee_env->n_call_nodes;
1255 env->n_nodes += callee_env->n_nodes;
1256 --callee_env->n_callers;
1258 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1263 * Heuristic inliner. Calculates a benefice value for every call and inlines
1264 * those calls with a value higher than the threshold.
1266 void inline_functions(unsigned maxsize, int inline_threshold,
1267 opt_ptr after_inline_opt)
1269 inline_irg_env *env;
1273 pmap *copied_graphs;
1274 pmap_entry *pm_entry;
1277 rem = current_ir_graph;
1278 obstack_init(&temp_obst);
1280 irgs = create_irg_list();
1282 /* a map for the copied graphs, used to inline recursive calls */
1283 copied_graphs = pmap_create();
1285 /* extend all irgs by a temporary data structure for inlining. */
1286 n_irgs = get_irp_n_irgs();
1287 for (i = 0; i < n_irgs; ++i)
1288 set_irg_link(irgs[i], alloc_inline_irg_env());
1290 /* Pre-compute information in temporary data structure. */
1291 wenv.ignore_runtime = 0;
1292 wenv.ignore_callers = 0;
1293 for (i = 0; i < n_irgs; ++i) {
1294 ir_graph *irg = irgs[i];
1296 free_callee_info(irg);
1298 wenv.x = (inline_irg_env*)get_irg_link(irg);
1299 assure_loopinfo(irg);
1300 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1303 /* -- and now inline. -- */
1304 for (i = 0; i < n_irgs; ++i) {
1305 ir_graph *irg = irgs[i];
1307 inline_into(irg, maxsize, inline_threshold, copied_graphs);
1310 for (i = 0; i < n_irgs; ++i) {
1311 ir_graph *irg = irgs[i];
1313 env = (inline_irg_env*)get_irg_link(irg);
1314 if (env->got_inline && after_inline_opt != NULL) {
1315 /* this irg got calls inlined: optimize it */
1316 after_inline_opt(irg);
1318 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1319 DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1320 env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1321 env->n_callers_orig, env->n_callers,
1322 get_entity_name(get_irg_entity(irg))));
1326 /* kill the copied graphs: we don't need them anymore */
1327 foreach_pmap(copied_graphs, pm_entry) {
1328 ir_graph *copy = (ir_graph*)pm_entry->value;
1330 /* reset the entity, otherwise it will be deleted in the next step ... */
1331 set_irg_entity(copy, NULL);
1332 free_ir_graph(copy);
1334 pmap_destroy(copied_graphs);
1338 obstack_free(&temp_obst, NULL);
1339 current_ir_graph = rem;
1342 typedef struct inline_functions_pass_t {
1343 ir_prog_pass_t pass;
1345 int inline_threshold;
1346 opt_ptr after_inline_opt;
1347 } inline_functions_pass_t;
1350 * Wrapper to run inline_functions() as a ir_prog pass.
1352 static int inline_functions_wrapper(ir_prog *irp, void *context)
1354 inline_functions_pass_t *pass = (inline_functions_pass_t*)context;
1357 inline_functions(pass->maxsize, pass->inline_threshold,
1358 pass->after_inline_opt);
1362 /* create a ir_prog pass for inline_functions */
1363 ir_prog_pass_t *inline_functions_pass(
1364 const char *name, unsigned maxsize, int inline_threshold,
1365 opt_ptr after_inline_opt)
1367 inline_functions_pass_t *pass = XMALLOCZ(inline_functions_pass_t);
1369 pass->maxsize = maxsize;
1370 pass->inline_threshold = inline_threshold;
1371 pass->after_inline_opt = after_inline_opt;
1373 return def_prog_pass_constructor(
1374 &pass->pass, name ? name : "inline_functions",
1375 inline_functions_wrapper);
1378 void firm_init_inline(void)
1380 FIRM_DBG_REGISTER(dbg, "firm.opt.inline");