2 * Copyright (C) 1995-2011 University of Karlsruhe. All right reserved.
4 * This file is part of libFirm.
6 * This file may be distributed and/or modified under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation and appearing in the file LICENSE.GPL included in the
9 * packaging of this file.
11 * Licensees holding valid libFirm Professional Edition licenses may use
12 * this file in accordance with the libFirm Commercial License.
13 * Agreement provided with the Software.
15 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * @brief Dead node elimination and Procedure Inlining.
23 * @author Michael Beck, Goetz Lindenmaier
32 #include "irgraph_t.h"
35 #include "iroptimize.h"
52 #include "irbackedge_t.h"
58 #include "analyze_irg_args.h"
59 #include "iredges_t.h"
63 #include "iropt_dbg.h"
65 #include "irnodemap.h"
67 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
69 /*------------------------------------------------------------------*/
70 /* Routines for dead node elimination / copying garbage collection */
72 /*------------------------------------------------------------------*/
75 * Remember the new node in the old node by using a field all nodes have.
77 static void set_new_node(ir_node *node, ir_node *new_node)
79 set_irn_link(node, new_node);
83 * Get this new node, before the old node is forgotten.
85 static inline ir_node *get_new_node(ir_node *old_node)
87 assert(irn_visited(old_node));
88 return (ir_node*) get_irn_link(old_node);
91 /*--------------------------------------------------------------------*/
92 /* Functionality for inlining */
93 /*--------------------------------------------------------------------*/
96 * Copy node for inlineing. Updates attributes that change when
97 * inlineing but not for dead node elimination.
99 * Copies the node by calling copy_node() and then updates the entity if
100 * it's a local one. env must be a pointer of the frame type of the
101 * inlined procedure. The new entities must be in the link field of
104 static void copy_node_inline(ir_node *node, void *env)
106 ir_graph *new_irg = (ir_graph*) env;
107 ir_node *new_node = irn_copy_into_irg(node, new_irg);
109 set_new_node(node, new_node);
111 ir_graph *old_irg = get_irn_irg(node);
112 ir_type *old_frame_type = get_irg_frame_type(old_irg);
113 ir_entity *old_entity = get_Sel_entity(node);
114 assert(is_Sel(new_node));
115 /* use copied entities from the new frame */
116 if (get_entity_owner(old_entity) == old_frame_type) {
117 ir_entity *new_entity = (ir_entity*)get_entity_link(old_entity);
118 assert(new_entity != NULL);
119 set_Sel_entity(new_node, new_entity);
121 } else if (is_Block(new_node)) {
122 new_node->attr.block.irg.irg = new_irg;
126 static void set_preds_inline(ir_node *node, void *env)
130 irn_rewire_inputs(node);
132 /* move constants into start block */
133 new_node = get_new_node(node);
134 if (is_irn_start_block_placed(new_node)) {
135 ir_graph *new_irg = (ir_graph *) env;
136 ir_node *start_block = get_irg_start_block(new_irg);
137 set_nodes_block(new_node, start_block);
142 * Walker: checks if P_value_arg_base is used.
144 static void find_addr(ir_node *node, void *env)
146 bool *allow_inline = (bool*)env;
148 if (is_Block(node) && get_Block_entity(node)) {
150 * Currently we can't handle blocks whose address was taken correctly
153 *allow_inline = false;
154 } else if (is_Sel(node)) {
155 ir_graph *irg = current_ir_graph;
156 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
157 /* access to frame */
158 ir_entity *ent = get_Sel_entity(node);
159 if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
160 /* access to value_type */
161 *allow_inline = false;
163 if (is_parameter_entity(ent)) {
164 *allow_inline = false;
167 } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
169 * Refuse to inline alloca call unless user explicitly forced so as this
170 * may change program's memory overhead drastically when the function
171 * using alloca is called in loop. In GCC present in SPEC2000 inlining
172 * into schedule_block cause it to require 2GB of ram instead of 256MB.
174 * Sorrily this is true with our implementation also.
175 * Moreover, we cannot differentiate between alloca() and VLA yet, so
176 * this disables inlining of functions using VLA (which are completely
180 * - add a flag to the Alloc node for "real" alloca() calls
181 * - add a new Stack-Restore node at the end of a function using
184 *allow_inline = false;
189 * Check if we can inline a given call.
190 * Currently, we cannot inline two cases:
191 * - call with compound arguments
192 * - graphs that take the address of a parameter
194 * check these conditions here
196 static bool can_inline(ir_node *call, ir_graph *called_graph)
198 ir_entity *called = get_irg_entity(called_graph);
199 ir_type *called_type = get_entity_type(called);
200 ir_type *call_type = get_Call_type(call);
201 size_t n_params = get_method_n_params(called_type);
202 size_t n_arguments = get_method_n_params(call_type);
203 size_t n_res = get_method_n_ress(called_type);
204 mtp_additional_properties props = get_entity_additional_properties(called);
208 if (props & mtp_property_noinline)
211 if (n_arguments != n_params) {
212 /* this is a bad feature of C: without a prototype, we can
213 * call a function with less parameters than needed. Currently
214 * we don't support this, although we could use Unknown than. */
217 if (n_res != get_method_n_ress(call_type)) {
221 /* Argh, compiling C has some bad consequences:
222 * It is implementation dependent what happens in that case.
223 * We support inlining, if the bitsize of the types matches AND
224 * the same arithmetic is used. */
225 for (i = 0; i < n_params; ++i) {
226 ir_type *param_tp = get_method_param_type(called_type, i);
227 ir_type *arg_tp = get_method_param_type(call_type, i);
229 if (param_tp != arg_tp) {
230 ir_mode *pmode = get_type_mode(param_tp);
231 ir_mode *amode = get_type_mode(arg_tp);
233 if (pmode == NULL || amode == NULL)
235 if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
237 if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
239 /* otherwise we can simply "reinterpret" the bits */
242 for (i = 0; i < n_res; ++i) {
243 ir_type *decl_res_tp = get_method_res_type(called_type, i);
244 ir_type *used_res_tp = get_method_res_type(call_type, i);
246 if (decl_res_tp != used_res_tp) {
247 ir_mode *decl_mode = get_type_mode(decl_res_tp);
248 ir_mode *used_mode = get_type_mode(used_res_tp);
249 if (decl_mode == NULL || used_mode == NULL)
251 if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
253 if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
255 /* otherwise we can "reinterpret" the bits */
259 /* check parameters for compound arguments */
260 for (i = 0; i < n_params; ++i) {
261 ir_type *p_type = get_method_param_type(call_type, i);
263 if (is_compound_type(p_type))
267 /* check results for compound arguments */
268 for (i = 0; i < n_res; ++i) {
269 ir_type *r_type = get_method_res_type(call_type, i);
271 if (is_compound_type(r_type))
276 irg_walk_graph(called_graph, find_addr, NULL, &res);
282 exc_handler, /**< There is a handler. */
283 exc_no_handler /**< Exception handling not represented. */
287 * copy all entities on the stack frame on 1 irg to the stackframe of another.
288 * Sets entity links of the old entities to the copies
290 static void copy_frame_entities(ir_graph *from, ir_graph *to)
292 ir_type *from_frame = get_irg_frame_type(from);
293 ir_type *to_frame = get_irg_frame_type(to);
294 size_t n_members = get_class_n_members(from_frame);
296 assert(from_frame != to_frame);
298 for (i = 0; i < n_members; ++i) {
299 ir_entity *old_ent = get_class_member(from_frame, i);
300 ir_entity *new_ent = copy_entity_own(old_ent, to_frame);
301 set_entity_link(old_ent, new_ent);
302 assert (!is_parameter_entity(old_ent));
306 /* Inlines a method at the given call site. */
307 int inline_method(ir_node *call, ir_graph *called_graph)
309 /* we cannot inline some types of calls */
310 if (! can_inline(call, called_graph))
313 /* We cannot inline a recursive call. The graph must be copied before
314 * the call the inline_method() using create_irg_copy(). */
315 ir_graph *irg = get_irn_irg(call);
316 if (called_graph == irg)
319 ir_entity *ent = get_irg_entity(called_graph);
320 ir_type *mtp = get_entity_type(ent);
321 ir_type *ctp = get_Call_type(call);
322 int n_params = get_method_n_params(mtp);
324 ir_graph *rem = current_ir_graph;
325 current_ir_graph = irg;
327 DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
329 /* optimizations can cause problems when allocating new nodes */
330 int rem_opt = get_opt_optimize();
333 /* Handle graph state */
334 assert(get_irg_pinned(irg) == op_pin_state_pinned);
335 assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
336 clear_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE
337 | IR_GRAPH_PROPERTY_CONSISTENT_ENTITY_USAGE);
338 set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
339 clear_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_ENTITY_USAGE);
340 edges_deactivate(irg);
342 /* here we know we WILL inline, so inform the statistics */
343 hook_inline(call, called_graph);
345 /* -- Decide how to handle exception control flow: Is there a handler
346 for the Call node, or do we branch directly to End on an exception?
348 0 There is a handler.
349 2 Exception handling not represented in Firm. -- */
350 ir_node *Xproj = NULL;
351 for (ir_node *proj = (ir_node*)get_irn_link(call); proj != NULL;
352 proj = (ir_node*)get_irn_link(proj)) {
353 long proj_nr = get_Proj_proj(proj);
354 if (proj_nr == pn_Call_X_except) Xproj = proj;
356 enum exc_mode exc_handling = Xproj != NULL ? exc_handler : exc_no_handler;
358 /* create the argument tuple */
359 ir_node **args_in = ALLOCAN(ir_node*, n_params);
361 ir_node *block = get_nodes_block(call);
362 for (int i = n_params - 1; i >= 0; --i) {
363 ir_node *arg = get_Call_param(call, i);
364 ir_type *param_tp = get_method_param_type(mtp, i);
365 ir_mode *mode = get_type_mode(param_tp);
367 if (mode != get_irn_mode(arg)) {
368 arg = new_r_Conv(block, arg, mode);
373 /* the procedure and later replaces the Start node of the called graph.
374 * Post_call is the old Call node and collects the results of the called
375 * graph. Both will end up being a tuple. */
376 ir_node *post_bl = get_nodes_block(call);
377 /* XxMxPxPxPxT of Start + parameter of Call */
378 ir_node *in[pn_Start_max+1];
379 in[pn_Start_M] = get_Call_mem(call);
380 in[pn_Start_X_initial_exec] = new_r_Jmp(post_bl);
381 in[pn_Start_P_frame_base] = get_irg_frame(irg);
382 in[pn_Start_T_args] = new_r_Tuple(post_bl, n_params, args_in);
383 ir_node *pre_call = new_r_Tuple(post_bl, pn_Start_max+1, in);
384 ir_node *post_call = call;
387 The new block gets the ins of the old block, pre_call and all its
388 predecessors and all Phi nodes. -- */
389 part_block(pre_call);
391 /* increment visited flag for later walk */
392 inc_irg_visited(called_graph);
394 /* link some nodes to nodes in the current graph so instead of copying
395 * the linked nodes will get used.
396 * So the copier will use the created Tuple instead of copying the start
397 * node, similar for singleton nodes like NoMem and Bad.
398 * Note: this will prohibit predecessors to be copied - only do it for
399 * nodes without predecessors */
400 ir_node *start_block = get_irg_start_block(called_graph);
401 set_new_node(start_block, get_nodes_block(pre_call));
402 mark_irn_visited(start_block);
404 ir_node *start = get_irg_start(called_graph);
405 set_new_node(start, pre_call);
406 mark_irn_visited(start);
408 ir_node *nomem = get_irg_no_mem(called_graph);
409 set_new_node(nomem, get_irg_no_mem(irg));
410 mark_irn_visited(nomem);
412 /* entitiy link is used to link entities on old stackframe to the
414 irp_reserve_resources(irp, IRP_RESOURCE_ENTITY_LINK);
416 /* copy entities and nodes */
417 assert(!irn_visited(get_irg_end(called_graph)));
418 copy_frame_entities(called_graph, irg);
419 irg_walk_core(get_irg_end(called_graph), copy_node_inline, set_preds_inline,
422 irp_free_resources(irp, IRP_RESOURCE_ENTITY_LINK);
424 /* -- Merge the end of the inlined procedure with the call site -- */
425 /* We will turn the old Call node into a Tuple with the following
428 0: Phi of all Memories of Return statements.
429 1: Jmp from new Block that merges the control flow from all exception
430 predecessors of the old end block.
431 2: Tuple of all arguments.
432 3: Phi of Exception memories.
433 In case the old Call directly branches to End on an exception we don't
434 need the block merging all exceptions nor the Phi of the exception
438 /* Precompute some values */
439 ir_node *end_bl = get_new_node(get_irg_end_block(called_graph));
440 ir_node *end = get_new_node(get_irg_end(called_graph));
441 int arity = get_Block_n_cfgpreds(end_bl); /* arity = n_exc + n_ret */
442 int n_res = get_method_n_ress(get_Call_type(call));
444 ir_node **res_pred = XMALLOCN(ir_node*, n_res);
445 ir_node **cf_pred = XMALLOCN(ir_node*, arity);
447 /* archive keepalives */
448 int irn_arity = get_irn_arity(end);
449 for (int i = 0; i < irn_arity; i++) {
450 ir_node *ka = get_End_keepalive(end, i);
452 add_End_keepalive(get_irg_end(irg), ka);
455 /* replace Return nodes by Jump nodes */
457 for (int i = 0; i < arity; i++) {
458 ir_node *ret = get_Block_cfgpred(end_bl, i);
459 if (is_Return(ret)) {
460 ir_node *block = get_nodes_block(ret);
461 cf_pred[n_ret] = new_r_Jmp(block);
465 set_irn_in(post_bl, n_ret, cf_pred);
467 /* build a Tuple for all results of the method.
468 * add Phi node if there was more than one Return. */
469 turn_into_tuple(post_call, pn_Call_max+1);
470 /* First the Memory-Phi */
472 for (int i = 0; i < arity; i++) {
473 ir_node *ret = get_Block_cfgpred(end_bl, i);
474 if (is_Return(ret)) {
475 cf_pred[n_mem_phi++] = get_Return_mem(ret);
477 /* memory output for some exceptions is directly connected to End */
479 cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 3);
480 } else if (is_fragile_op(ret)) {
481 /* We rely that all cfops have the memory output at the same position. */
482 cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 0);
483 } else if (is_Raise(ret)) {
484 cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 1);
487 ir_node *phi = new_r_Phi(post_bl, n_mem_phi, cf_pred, mode_M);
488 set_Tuple_pred(call, pn_Call_M, phi);
489 /* Conserve Phi-list for further inlinings -- but might be optimized */
490 if (get_nodes_block(phi) == post_bl) {
491 set_irn_link(phi, get_irn_link(post_bl));
492 set_irn_link(post_bl, phi);
494 /* Now the real results */
496 for (int j = 0; j < n_res; j++) {
497 ir_type *res_type = get_method_res_type(ctp, j);
498 ir_mode *res_mode = get_type_mode(res_type);
500 for (int i = 0; i < arity; i++) {
501 ir_node *ret = get_Block_cfgpred(end_bl, i);
502 if (is_Return(ret)) {
503 ir_node *res = get_Return_res(ret, j);
504 if (get_irn_mode(res) != res_mode) {
505 ir_node *block = get_nodes_block(res);
506 res = new_r_Conv(block, res, res_mode);
508 cf_pred[n_ret] = res;
513 phi = new_r_Phi(post_bl, n_ret, cf_pred, res_mode);
515 phi = new_r_Bad(irg, res_mode);
518 /* Conserve Phi-list for further inlinings -- but might be optimized */
519 if (get_nodes_block(phi) == post_bl) {
520 set_Phi_next(phi, get_Block_phis(post_bl));
521 set_Block_phis(post_bl, phi);
524 ir_node *result_tuple = new_r_Tuple(post_bl, n_res, res_pred);
525 set_Tuple_pred(call, pn_Call_T_result, result_tuple);
527 set_Tuple_pred(call, pn_Call_T_result, new_r_Bad(irg, mode_T));
529 /* handle the regular call */
530 set_Tuple_pred(call, pn_Call_X_regular, new_r_Jmp(post_bl));
532 /* Finally the exception control flow.
533 We have two possible situations:
534 First if the Call branches to an exception handler:
535 We need to add a Phi node to
536 collect the memory containing the exception objects. Further we need
537 to add another block to get a correct representation of this Phi. To
538 this block we add a Jmp that resolves into the X output of the Call
539 when the Call is turned into a tuple.
540 Second: There is no exception edge. Just add all inlined exception
541 branches to the End node.
543 if (exc_handling == exc_handler) {
545 for (int i = 0; i < arity; i++) {
546 ir_node *ret = get_Block_cfgpred(end_bl, i);
547 ir_node *irn = skip_Proj(ret);
548 if (is_fragile_op(irn) || is_Raise(irn)) {
549 cf_pred[n_exc] = ret;
556 set_Tuple_pred(call, pn_Call_X_except, cf_pred[0]);
558 ir_node *block = new_r_Block(irg, n_exc, cf_pred);
559 set_Tuple_pred(call, pn_Call_X_except, new_r_Jmp(block));
562 set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
565 /* assert(exc_handling == 1 || no exceptions. ) */
567 for (int i = 0; i < arity; i++) {
568 ir_node *ret = get_Block_cfgpred(end_bl, i);
569 ir_node *irn = skip_Proj(ret);
571 if (is_fragile_op(irn) || is_Raise(irn)) {
572 cf_pred[n_exc] = ret;
576 ir_node *main_end_bl = get_irg_end_block(irg);
577 int main_end_bl_arity = get_irn_arity(main_end_bl);
578 ir_node **end_preds = XMALLOCN(ir_node*, n_exc+main_end_bl_arity);
580 for (int i = 0; i < main_end_bl_arity; ++i)
581 end_preds[i] = get_irn_n(main_end_bl, i);
582 for (int i = 0; i < n_exc; ++i)
583 end_preds[main_end_bl_arity + i] = cf_pred[i];
584 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
585 set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
591 /* -- Turn CSE back on. -- */
592 set_optimize(rem_opt);
593 current_ir_graph = rem;
598 /********************************************************************/
599 /* Apply inlining to small methods. */
600 /********************************************************************/
602 static struct obstack temp_obst;
604 /** Represents a possible inlinable call in a graph. */
605 typedef struct call_entry {
606 ir_node *call; /**< The Call node. */
607 ir_graph *callee; /**< The callee IR-graph. */
608 list_head list; /**< List head for linking the next one. */
609 int loop_depth; /**< The loop depth of this call. */
610 int benefice; /**< The calculated benefice of this call. */
611 unsigned local_adr:1; /**< Set if this call gets an address of a local variable. */
612 unsigned all_const:1; /**< Set if this call has only constant parameters. */
616 * Returns the irg called from a Call node. If the irg is not
617 * known, NULL is returned.
619 * @param call the call node
621 static ir_graph *get_call_called_irg(ir_node *call)
625 addr = get_Call_ptr(call);
626 if (is_SymConst_addr_ent(addr)) {
627 ir_entity *ent = get_SymConst_entity(addr);
628 /* we don't know which function gets finally bound to a weak symbol */
629 if (get_entity_linkage(ent) & IR_LINKAGE_WEAK)
632 return get_entity_irg(ent);
639 * Environment for inlining irgs.
642 list_head calls; /**< List of of all call nodes in this graph. */
643 unsigned *local_weights; /**< Once allocated, the beneficial weight for transmitting local addresses. */
644 unsigned n_nodes; /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
645 unsigned n_blocks; /**< Number of Blocks in graph without Start and End block. */
646 unsigned n_nodes_orig; /**< for statistics */
647 unsigned n_call_nodes; /**< Number of Call nodes in the graph. */
648 unsigned n_call_nodes_orig; /**< for statistics */
649 unsigned n_callers; /**< Number of known graphs that call this graphs. */
650 unsigned n_callers_orig; /**< for statistics */
651 unsigned got_inline:1; /**< Set, if at least one call inside this graph was inlined. */
652 unsigned recursive:1; /**< Set, if this function is self recursive. */
656 * Allocate a new environment for inlining.
658 static inline_irg_env *alloc_inline_irg_env(void)
660 inline_irg_env *env = OALLOC(&temp_obst, inline_irg_env);
661 INIT_LIST_HEAD(&env->calls);
662 env->local_weights = NULL;
663 env->n_nodes = -2; /* do not count count Start, End */
664 env->n_blocks = -2; /* do not count count Start, End Block */
665 env->n_nodes_orig = -2; /* do not count Start, End */
666 env->n_call_nodes = 0;
667 env->n_call_nodes_orig = 0;
669 env->n_callers_orig = 0;
675 typedef struct walker_env {
676 inline_irg_env *x; /**< the inline environment */
677 char ignore_runtime; /**< the ignore runtime flag */
678 char ignore_callers; /**< if set, do change callers data */
682 * post-walker: collect all calls in the inline-environment
683 * of a graph and sum some statistics.
685 static void collect_calls2(ir_node *call, void *ctx)
687 wenv_t *env = (wenv_t*)ctx;
688 inline_irg_env *x = env->x;
689 unsigned code = get_irn_opcode(call);
693 /* count meaningful nodes in irg */
694 if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
695 if (code != iro_Block) {
703 if (code != iro_Call) return;
705 /* check, if it's a runtime call */
706 if (env->ignore_runtime) {
707 ir_node *symc = get_Call_ptr(call);
709 if (is_SymConst_addr_ent(symc)) {
710 ir_entity *ent = get_SymConst_entity(symc);
712 if (get_entity_additional_properties(ent) & mtp_property_runtime)
717 /* collect all call nodes */
719 ++x->n_call_nodes_orig;
721 callee = get_call_called_irg(call);
722 if (callee != NULL) {
723 if (! env->ignore_callers) {
724 inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
725 /* count all static callers */
726 ++callee_env->n_callers;
727 ++callee_env->n_callers_orig;
729 if (callee == current_ir_graph)
732 /* link it in the list of possible inlinable entries */
733 entry = OALLOC(&temp_obst, call_entry);
735 entry->callee = callee;
736 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
738 entry->local_adr = 0;
739 entry->all_const = 0;
741 list_add_tail(&entry->list, &x->calls);
746 * Duplicate a call entry.
748 * @param entry the original entry to duplicate
749 * @param new_call the new call node
750 * @param loop_depth_delta
751 * delta value for the loop depth
753 static call_entry *duplicate_call_entry(const call_entry *entry,
754 ir_node *new_call, int loop_depth_delta)
756 call_entry *nentry = OALLOC(&temp_obst, call_entry);
757 nentry->call = new_call;
758 nentry->callee = entry->callee;
759 nentry->benefice = entry->benefice;
760 nentry->loop_depth = entry->loop_depth + loop_depth_delta;
761 nentry->local_adr = entry->local_adr;
762 nentry->all_const = entry->all_const;
768 * Calculate the parameter weights for transmitting the address of a local variable.
770 static unsigned calc_method_local_weight(ir_node *arg)
773 unsigned v, weight = 0;
775 for (unsigned i = get_irn_n_outs(arg); i-- > 0; ) {
776 ir_node *succ = get_irn_out(arg, i);
778 switch (get_irn_opcode(succ)) {
781 /* Loads and Store can be removed */
785 /* check if all args are constant */
786 for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
787 ir_node *idx = get_Sel_index(succ, j);
791 /* Check users on this Sel. Note: if a 0 is returned here, there was
792 some unsupported node. */
793 v = calc_method_local_weight(succ);
796 /* we can kill one Sel with constant indexes, this is cheap */
800 /* when looking backward we might find Id nodes */
801 weight += calc_method_local_weight(succ);
804 /* unoptimized tuple */
805 for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
806 ir_node *pred = get_Tuple_pred(succ, j);
808 /* look for Proj(j) */
809 for (unsigned k = get_irn_n_outs(succ); k-- > 0; ) {
810 ir_node *succ_succ = get_irn_out(succ, k);
811 if (is_Proj(succ_succ)) {
812 if (get_Proj_proj(succ_succ) == j) {
814 weight += calc_method_local_weight(succ_succ);
817 /* this should NOT happen */
825 /* any other node: unsupported yet or bad. */
833 * Calculate the parameter weights for transmitting the address of a local variable.
835 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg)
837 ir_entity *ent = get_irg_entity(irg);
841 ir_node *irg_args, *arg;
843 mtp = get_entity_type(ent);
844 nparams = get_method_n_params(mtp);
846 /* allocate a new array. currently used as 'analysed' flag */
847 env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
849 /* If the method haven't parameters we have nothing to do. */
853 assure_irg_outs(irg);
854 irg_args = get_irg_args(irg);
855 for (unsigned i = get_irn_n_outs(irg_args); i-- > 0; ) {
856 arg = get_irn_out(irg_args, i);
857 proj_nr = get_Proj_proj(arg);
858 env->local_weights[proj_nr] = calc_method_local_weight(arg);
863 * Calculate the benefice for transmitting an local variable address.
864 * After inlining, the local variable might be transformed into a
865 * SSA variable by scalar_replacement().
867 static unsigned get_method_local_adress_weight(ir_graph *callee, size_t pos)
869 inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
871 if (env->local_weights == NULL)
872 analyze_irg_local_weights(env, callee);
874 if (pos < ARR_LEN(env->local_weights))
875 return env->local_weights[pos];
880 * Calculate a benefice value for inlining the given call.
882 * @param call the call node we have to inspect
883 * @param callee the called graph
885 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
887 ir_node *call = entry->call;
888 ir_entity *ent = get_irg_entity(callee);
889 ir_type *callee_frame;
890 size_t i, n_members, n_params;
897 inline_irg_env *callee_env;
899 mtp_additional_properties props = get_entity_additional_properties(ent);
900 if (props & mtp_property_noinline) {
901 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
903 return entry->benefice = INT_MIN;
906 callee_frame = get_irg_frame_type(callee);
907 n_members = get_class_n_members(callee_frame);
908 for (i = 0; i < n_members; ++i) {
909 ir_entity *frame_ent = get_class_member(callee_frame, i);
910 if (is_parameter_entity(frame_ent)) {
911 // TODO inliner should handle parameter entities by inserting Store operations
912 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden due to parameter entity\n", call, callee));
913 add_entity_additional_properties(ent, mtp_property_noinline);
914 return entry->benefice = INT_MIN;
918 if (props & mtp_property_noreturn) {
919 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
921 return entry->benefice = INT_MIN;
924 /* costs for every passed parameter */
925 n_params = get_Call_n_params(call);
926 mtp = get_entity_type(ent);
927 cc = get_method_calling_convention(mtp);
928 if (cc & cc_reg_param) {
929 /* register parameter, smaller costs for register parameters */
930 size_t max_regs = cc & ~cc_bits;
932 if (max_regs < n_params)
933 weight += max_regs * 2 + (n_params - max_regs) * 5;
935 weight += n_params * 2;
937 /* parameters are passed an stack */
938 weight += 5 * n_params;
941 /* constant parameters improve the benefice */
942 frame_ptr = get_irg_frame(current_ir_graph);
944 for (i = 0; i < n_params; ++i) {
945 ir_node *param = get_Call_param(call, i);
947 if (is_Const(param)) {
948 weight += get_method_param_weight(ent, i);
951 if (is_SymConst(param))
952 weight += get_method_param_weight(ent, i);
953 else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
955 * An address of a local variable is transmitted. After
956 * inlining, scalar_replacement might be able to remove the
957 * local variable, so honor this.
959 v = get_method_local_adress_weight(callee, i);
962 entry->local_adr = 1;
966 entry->all_const = all_const;
968 callee_env = (inline_irg_env*)get_irg_link(callee);
969 if (callee_env->n_callers == 1 &&
970 callee != current_ir_graph &&
971 !entity_is_externally_visible(ent)) {
975 /* give a bonus for functions with one block */
976 if (callee_env->n_blocks == 1)
977 weight = weight * 3 / 2;
979 /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
980 if (callee_env->n_nodes < 30 && !callee_env->recursive)
983 /* and finally for leafs: they do not increase the register pressure
984 because of callee safe registers */
985 if (callee_env->n_call_nodes == 0)
988 /** it's important to inline inner loops first */
989 if (entry->loop_depth > 30)
992 weight += entry->loop_depth * 1024;
995 * All arguments constant is probably a good sign, give an extra bonus
1000 return entry->benefice = weight;
1003 typedef struct walk_env_t {
1009 * Callgraph walker, collect all visited graphs.
1011 static void callgraph_walker(ir_graph *irg, void *data)
1013 walk_env_t *env = (walk_env_t *)data;
1014 env->irgs[env->last_irg++] = irg;
1018 * Creates an inline order for all graphs.
1020 * @return the list of graphs.
1022 static ir_graph **create_irg_list(void)
1024 ir_entity **free_methods;
1025 size_t n_irgs = get_irp_n_irgs();
1028 cgana(&free_methods);
1029 xfree(free_methods);
1031 compute_callgraph();
1033 env.irgs = XMALLOCNZ(ir_graph*, n_irgs);
1036 callgraph_walk(NULL, callgraph_walker, &env);
1037 assert(n_irgs == env.last_irg);
1045 * Push a call onto the priority list if its benefice is big enough.
1047 * @param pqueue the priority queue of calls
1048 * @param call the call entry
1049 * @param inlien_threshold
1050 * the threshold value
1052 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
1053 int inline_threshold)
1055 ir_graph *callee = call->callee;
1056 int benefice = calc_inline_benefice(call, callee);
1058 DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
1059 get_irn_irg(call->call), call->call, callee, benefice));
1061 ir_entity *ent = get_irg_entity(callee);
1062 mtp_additional_properties props = get_entity_additional_properties(ent);
1063 if (!(props & mtp_property_always_inline) && benefice < inline_threshold) {
1067 pqueue_put(pqueue, call, benefice);
1071 * Try to inline calls into a graph.
1073 * @param irg the graph into which we inline
1074 * @param maxsize do NOT inline if the size of irg gets
1075 * bigger than this amount
1076 * @param inline_threshold
1077 * threshold value for inline decision
1078 * @param copied_graphs
1079 * map containing copied of recursive graphs
1081 static void inline_into(ir_graph *irg, unsigned maxsize,
1082 int inline_threshold, pmap *copied_graphs)
1084 int phiproj_computed = 0;
1085 inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
1089 if (env->n_call_nodes == 0)
1092 if (env->n_nodes > maxsize) {
1093 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
1097 current_ir_graph = irg;
1098 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1100 /* put irgs into the pqueue */
1101 pqueue = new_pqueue();
1103 list_for_each_entry(call_entry, curr_call, &env->calls, list) {
1104 assert(is_Call(curr_call->call));
1105 maybe_push_call(pqueue, curr_call, inline_threshold);
1108 /* note that the list of possible calls is updated during the process */
1109 while (!pqueue_empty(pqueue)) {
1111 call_entry *curr_call = (call_entry*)pqueue_pop_front(pqueue);
1112 ir_graph *callee = curr_call->callee;
1113 ir_node *call_node = curr_call->call;
1114 inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
1115 ir_entity *ent = get_irg_entity(callee);
1116 mtp_additional_properties props
1117 = get_entity_additional_properties(ent);
1121 if (!(props & mtp_property_always_inline)
1122 && env->n_nodes + callee_env->n_nodes > maxsize) {
1123 DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
1124 env->n_nodes, callee, callee_env->n_nodes));
1128 calleee = pmap_get(ir_graph, copied_graphs, callee);
1129 if (calleee != NULL) {
1130 int benefice = curr_call->benefice;
1132 * Reduce the weight for recursive function IFF not all arguments are const.
1133 * inlining recursive functions is rarely good.
1135 if (!curr_call->all_const)
1137 if (benefice < inline_threshold)
1141 * Remap callee if we have a copy.
1144 callee_env = (inline_irg_env*)get_irg_link(callee);
1147 if (current_ir_graph == callee) {
1149 * Recursive call: we cannot directly inline because we cannot
1150 * walk the graph and change it. So we have to make a copy of
1153 int benefice = curr_call->benefice;
1157 * Reduce the weight for recursive function IFF not all arguments are const.
1158 * inlining recursive functions is rarely good.
1160 if (!curr_call->all_const)
1162 if (benefice < inline_threshold)
1165 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1168 * No copy yet, create one.
1169 * Note that recursive methods are never leafs, so it is
1170 * sufficient to test this condition here.
1172 copy = create_irg_copy(callee);
1174 /* create_irg_copy() destroys the Proj links, recompute them */
1175 phiproj_computed = 0;
1177 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1179 /* allocate a new environment */
1180 callee_env = alloc_inline_irg_env();
1181 set_irg_link(copy, callee_env);
1183 assure_irg_properties(copy, IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO);
1184 memset(&wenv, 0, sizeof(wenv));
1185 wenv.x = callee_env;
1186 wenv.ignore_callers = 1;
1187 irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1190 * Enter the entity of the original graph. This is needed
1191 * for inline_method(). However, note that ent->irg still points
1192 * to callee, NOT to copy.
1194 set_irg_entity(copy, get_irg_entity(callee));
1196 pmap_insert(copied_graphs, callee, copy);
1199 /* we have only one caller: the original graph */
1200 callee_env->n_callers = 1;
1201 callee_env->n_callers_orig = 1;
1203 if (! phiproj_computed) {
1204 phiproj_computed = 1;
1205 collect_phiprojs(current_ir_graph);
1207 did_inline = inline_method(call_node, callee);
1211 /* call was inlined, Phi/Projs for current graph must be recomputed */
1212 phiproj_computed = 0;
1214 /* remove it from the caller list */
1215 list_del(&curr_call->list);
1217 /* callee was inline. Append its call list. */
1218 env->got_inline = 1;
1219 --env->n_call_nodes;
1221 /* we just generate a bunch of new calls */
1222 loop_depth = curr_call->loop_depth;
1223 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1224 inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1226 call_entry *new_entry;
1228 /* after we have inlined callee, all called methods inside
1229 * callee are now called once more */
1232 /* Note that the src list points to Call nodes in the inlined graph,
1233 * but we need Call nodes in our graph. Luckily the inliner leaves
1234 * this information in the link field. */
1235 new_call = (ir_node*)get_irn_link(centry->call);
1236 if (get_irn_irg(new_call) != irg) {
1237 /* centry->call has not been copied, which means it is dead.
1238 * This might happen during inlining, if a const function,
1239 * which cannot be inlined is only used as an unused argument
1240 * of another function, which is inlined. */
1243 assert(is_Call(new_call));
1245 new_entry = duplicate_call_entry(centry, new_call, loop_depth);
1246 list_add_tail(&new_entry->list, &env->calls);
1247 maybe_push_call(pqueue, new_entry, inline_threshold);
1250 env->n_call_nodes += callee_env->n_call_nodes;
1251 env->n_nodes += callee_env->n_nodes;
1252 --callee_env->n_callers;
1254 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1259 * Heuristic inliner. Calculates a benefice value for every call and inlines
1260 * those calls with a value higher than the threshold.
1262 void inline_functions(unsigned maxsize, int inline_threshold,
1263 opt_ptr after_inline_opt)
1265 inline_irg_env *env;
1269 pmap *copied_graphs;
1270 pmap_entry *pm_entry;
1273 rem = current_ir_graph;
1274 obstack_init(&temp_obst);
1276 irgs = create_irg_list();
1278 /* a map for the copied graphs, used to inline recursive calls */
1279 copied_graphs = pmap_create();
1281 /* extend all irgs by a temporary data structure for inlining. */
1282 n_irgs = get_irp_n_irgs();
1283 for (i = 0; i < n_irgs; ++i)
1284 set_irg_link(irgs[i], alloc_inline_irg_env());
1286 /* Pre-compute information in temporary data structure. */
1287 wenv.ignore_runtime = 0;
1288 wenv.ignore_callers = 0;
1289 for (i = 0; i < n_irgs; ++i) {
1290 ir_graph *irg = irgs[i];
1292 free_callee_info(irg);
1294 wenv.x = (inline_irg_env*)get_irg_link(irg);
1295 assure_loopinfo(irg);
1296 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1299 /* -- and now inline. -- */
1300 for (i = 0; i < n_irgs; ++i) {
1301 ir_graph *irg = irgs[i];
1303 inline_into(irg, maxsize, inline_threshold, copied_graphs);
1306 for (i = 0; i < n_irgs; ++i) {
1307 ir_graph *irg = irgs[i];
1309 env = (inline_irg_env*)get_irg_link(irg);
1310 if (env->got_inline && after_inline_opt != NULL) {
1311 /* this irg got calls inlined: optimize it */
1312 after_inline_opt(irg);
1314 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1315 DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1316 env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1317 env->n_callers_orig, env->n_callers,
1318 get_entity_name(get_irg_entity(irg))));
1322 /* kill the copied graphs: we don't need them anymore */
1323 foreach_pmap(copied_graphs, pm_entry) {
1324 ir_graph *copy = (ir_graph*)pm_entry->value;
1326 /* reset the entity, otherwise it will be deleted in the next step ... */
1327 set_irg_entity(copy, NULL);
1328 free_ir_graph(copy);
1330 pmap_destroy(copied_graphs);
1334 obstack_free(&temp_obst, NULL);
1335 current_ir_graph = rem;
1338 typedef struct inline_functions_pass_t {
1339 ir_prog_pass_t pass;
1341 int inline_threshold;
1342 opt_ptr after_inline_opt;
1343 } inline_functions_pass_t;
1346 * Wrapper to run inline_functions() as a ir_prog pass.
1348 static int inline_functions_wrapper(ir_prog *irp, void *context)
1350 inline_functions_pass_t *pass = (inline_functions_pass_t*)context;
1353 inline_functions(pass->maxsize, pass->inline_threshold,
1354 pass->after_inline_opt);
1358 /* create a ir_prog pass for inline_functions */
1359 ir_prog_pass_t *inline_functions_pass(
1360 const char *name, unsigned maxsize, int inline_threshold,
1361 opt_ptr after_inline_opt)
1363 inline_functions_pass_t *pass = XMALLOCZ(inline_functions_pass_t);
1365 pass->maxsize = maxsize;
1366 pass->inline_threshold = inline_threshold;
1367 pass->after_inline_opt = after_inline_opt;
1369 return def_prog_pass_constructor(
1370 &pass->pass, name ? name : "inline_functions",
1371 inline_functions_wrapper);
1374 void firm_init_inline(void)
1376 FIRM_DBG_REGISTER(dbg, "firm.opt.inline");