84874fad6829362489231f1fbccfaef45d87327e
[libfirm] / ir / opt / opt_inline.c
1 /*
2  * Copyright (C) 1995-2011 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief    Dead node elimination and Procedure Inlining.
23  * @author   Michael Beck, Goetz Lindenmaier
24  * @version  $Id$
25  */
26 #include "config.h"
27
28 #include <limits.h>
29 #include <stdbool.h>
30 #include <assert.h>
31
32 #include "irnode_t.h"
33 #include "irgraph_t.h"
34 #include "irprog_t.h"
35
36 #include "iroptimize.h"
37 #include "ircons_t.h"
38 #include "iropt_t.h"
39 #include "irgopt.h"
40 #include "irgmod.h"
41 #include "irgwalk.h"
42
43 #include "array_t.h"
44 #include "list.h"
45 #include "pset.h"
46 #include "pmap.h"
47 #include "pdeq.h"
48 #include "xmalloc.h"
49 #include "pqueue.h"
50
51 #include "irouts.h"
52 #include "irloop_t.h"
53 #include "irbackedge_t.h"
54 #include "opt_init.h"
55 #include "cgana.h"
56 #include "trouts.h"
57 #include "error.h"
58
59 #include "analyze_irg_args.h"
60 #include "iredges_t.h"
61 #include "irflag_t.h"
62 #include "irhooks.h"
63 #include "irtools.h"
64 #include "iropt_dbg.h"
65 #include "irpass_t.h"
66 #include "irphase_t.h"
67
68 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
69
70 /*------------------------------------------------------------------*/
71 /* Routines for dead node elimination / copying garbage collection  */
72 /* of the obstack.                                                  */
73 /*------------------------------------------------------------------*/
74
75 /**
76  * Remember the new node in the old node by using a field all nodes have.
77  */
78 static void set_new_node(ir_node *node, ir_node *new_node)
79 {
80         set_irn_link(node, new_node);
81 }
82
83 /**
84  * Get this new node, before the old node is forgotten.
85  */
86 static inline ir_node *get_new_node(ir_node *old_node)
87 {
88         assert(irn_visited(old_node));
89         return (ir_node*) get_irn_link(old_node);
90 }
91
92 /*--------------------------------------------------------------------*/
93 /*  Functionality for inlining                                         */
94 /*--------------------------------------------------------------------*/
95
96 /**
97  * Copy node for inlineing.  Updates attributes that change when
98  * inlineing but not for dead node elimination.
99  *
100  * Copies the node by calling copy_node() and then updates the entity if
101  * it's a local one.  env must be a pointer of the frame type of the
102  * inlined procedure. The new entities must be in the link field of
103  * the entities.
104  */
105 static void copy_node_inline(ir_node *node, void *env)
106 {
107         ir_graph *new_irg  = (ir_graph*) env;
108         ir_node  *new_node = irn_copy_into_irg(node, new_irg);
109
110         set_new_node(node, new_node);
111         if (is_Sel(node)) {
112                 ir_graph  *old_irg        = get_irn_irg(node);
113                 ir_type   *old_frame_type = get_irg_frame_type(old_irg);
114                 ir_entity *old_entity     = get_Sel_entity(node);
115                 assert(is_Sel(new_node));
116                 /* use copied entities from the new frame */
117                 if (get_entity_owner(old_entity) == old_frame_type) {
118                         ir_entity *new_entity = (ir_entity*)get_entity_link(old_entity);
119                         assert(new_entity != NULL);
120                         set_Sel_entity(new_node, new_entity);
121                 }
122         } else if (is_Block(new_node)) {
123                 new_node->attr.block.irg.irg = new_irg;
124         }
125 }
126
127 static void set_preds_inline(ir_node *node, void *env)
128 {
129         ir_node *new_node;
130
131         irn_rewire_inputs(node);
132
133         /* move constants into start block */
134         new_node = get_new_node(node);
135         if (is_irn_constlike(new_node)) {
136                 ir_graph *new_irg     = (ir_graph *) env;
137                 ir_node  *start_block = get_irg_start_block(new_irg);
138                 set_nodes_block(new_node, start_block);
139         }
140 }
141
142 /**
143  * Walker: checks if P_value_arg_base is used.
144  */
145 static void find_addr(ir_node *node, void *env)
146 {
147         bool *allow_inline = (bool*)env;
148
149         if (is_Sel(node)) {
150                 ir_graph *irg = current_ir_graph;
151                 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
152                         /* access to frame */
153                         ir_entity *ent = get_Sel_entity(node);
154                         if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
155                                 /* access to value_type */
156                                 *allow_inline = false;
157                         }
158                 }
159         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
160                 /* From GCC:
161                  * Refuse to inline alloca call unless user explicitly forced so as this
162                  * may change program's memory overhead drastically when the function
163                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
164                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
165                  *
166                  * Sorrily this is true with our implementation also.
167                  * Moreover, we cannot differentiate between alloca() and VLA yet, so
168                  * this disables inlining of functions using VLA (which are completely
169                  * save).
170                  *
171                  * 2 Solutions:
172                  * - add a flag to the Alloc node for "real" alloca() calls
173                  * - add a new Stack-Restore node at the end of a function using
174                  *   alloca()
175                  */
176                 *allow_inline = false;
177         }
178 }
179
180 /**
181  * Check if we can inline a given call.
182  * Currently, we cannot inline two cases:
183  * - call with compound arguments
184  * - graphs that take the address of a parameter
185  *
186  * check these conditions here
187  */
188 static bool can_inline(ir_node *call, ir_graph *called_graph)
189 {
190         ir_entity          *called      = get_irg_entity(called_graph);
191         ir_type            *called_type = get_entity_type(called);
192         ir_type            *call_type   = get_Call_type(call);
193         size_t              n_params    = get_method_n_params(called_type);
194         size_t              n_arguments = get_method_n_params(call_type);
195         size_t              n_res       = get_method_n_ress(called_type);
196         irg_inline_property prop        = get_irg_inline_property(called_graph);
197         size_t              i;
198         bool                res;
199
200         if (prop == irg_inline_forbidden)
201                 return false;
202
203         if (n_arguments != n_params) {
204                 /* this is a bad feature of C: without a prototype, we can
205                  * call a function with less parameters than needed. Currently
206                  * we don't support this, although we could use Unknown than. */
207                 return false;
208         }
209         if (n_res != get_method_n_ress(call_type)) {
210                 return false;
211         }
212
213         /* Argh, compiling C has some bad consequences:
214          * It is implementation dependent what happens in that case.
215          * We support inlining, if the bitsize of the types matches AND
216          * the same arithmetic is used. */
217         for (i = 0; i < n_params; ++i) {
218                 ir_type *param_tp = get_method_param_type(called_type, i);
219                 ir_type *arg_tp   = get_method_param_type(call_type, i);
220
221                 if (param_tp != arg_tp) {
222                         ir_mode *pmode = get_type_mode(param_tp);
223                         ir_mode *amode = get_type_mode(arg_tp);
224
225                         if (pmode == NULL || amode == NULL)
226                                 return false;
227                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
228                                 return false;
229                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
230                                 return false;
231                         /* otherwise we can simply "reinterpret" the bits */
232                 }
233         }
234         for (i = 0; i < n_res; ++i) {
235                 ir_type *decl_res_tp = get_method_res_type(called_type, i);
236                 ir_type *used_res_tp = get_method_res_type(call_type, i);
237
238                 if (decl_res_tp != used_res_tp) {
239                         ir_mode *decl_mode = get_type_mode(decl_res_tp);
240                         ir_mode *used_mode = get_type_mode(used_res_tp);
241                         if (decl_mode == NULL || used_mode == NULL)
242                                 return false;
243                         if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
244                                 return false;
245                         if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
246                                 return false;
247                         /* otherwise we can "reinterpret" the bits */
248                 }
249         }
250
251         /* check parameters for compound arguments */
252         for (i = 0; i < n_params; ++i) {
253                 ir_type *p_type = get_method_param_type(call_type, i);
254
255                 if (is_compound_type(p_type))
256                         return false;
257         }
258
259         /* check results for compound arguments */
260         for (i = 0; i < n_res; ++i) {
261                 ir_type *r_type = get_method_res_type(call_type, i);
262
263                 if (is_compound_type(r_type))
264                         return false;
265         }
266
267         res = true;
268         irg_walk_graph(called_graph, find_addr, NULL, &res);
269
270         return res;
271 }
272
273 enum exc_mode {
274         exc_handler,    /**< There is a handler. */
275         exc_no_handler  /**< Exception handling not represented. */
276 };
277
278 /**
279  * copy all entities on the stack frame on 1 irg to the stackframe of another.
280  * Sets entity links of the old entities to the copies
281  */
282 static void copy_frame_entities(ir_graph *from, ir_graph *to)
283 {
284         ir_type *from_frame = get_irg_frame_type(from);
285         ir_type *to_frame   = get_irg_frame_type(to);
286         size_t   n_members  = get_class_n_members(from_frame);
287         size_t   i;
288         assert(from_frame != to_frame);
289
290         for (i = 0; i < n_members; ++i) {
291                 ir_entity *old_ent = get_class_member(from_frame, i);
292                 ir_entity *new_ent = copy_entity_own(old_ent, to_frame);
293                 set_entity_link(old_ent, new_ent);
294         }
295 }
296
297 /* Inlines a method at the given call site. */
298 int inline_method(ir_node *call, ir_graph *called_graph)
299 {
300         ir_node       *pre_call;
301         ir_node       *post_call, *post_bl;
302         ir_node       *in[pn_Start_max];
303         ir_node       *end, *end_bl, *block;
304         ir_node       **res_pred;
305         ir_node       **cf_pred;
306         ir_node       **args_in;
307         ir_node       *ret, *phi;
308         int           arity, n_ret, n_exc, n_res, i, j, rem_opt;
309         int           irn_arity, n_params;
310         int           n_mem_phi;
311         enum exc_mode exc_handling;
312         ir_type       *mtp;
313         ir_type       *ctp;
314         ir_entity     *ent;
315         ir_graph      *rem;
316         ir_graph      *irg = get_irn_irg(call);
317
318         /* we cannot inline some types of calls */
319         if (! can_inline(call, called_graph))
320                 return 0;
321
322         /* We cannot inline a recursive call. The graph must be copied before
323          * the call the inline_method() using create_irg_copy(). */
324         if (called_graph == irg)
325                 return 0;
326
327         ent      = get_irg_entity(called_graph);
328         mtp      = get_entity_type(ent);
329         ctp      = get_Call_type(call);
330         n_params = get_method_n_params(mtp);
331         n_res    = get_method_n_ress(mtp);
332
333         rem = current_ir_graph;
334         current_ir_graph = irg;
335
336         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
337
338         /* optimizations can cause problems when allocating new nodes */
339         rem_opt = get_opt_optimize();
340         set_optimize(0);
341
342         /* Handle graph state */
343         assert(get_irg_phase_state(irg) != phase_building);
344         assert(get_irg_pinned(irg) == op_pin_state_pinned);
345         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
346         set_irg_outs_inconsistent(irg);
347         set_irg_extblk_inconsistent(irg);
348         set_irg_doms_inconsistent(irg);
349         set_irg_loopinfo_inconsistent(irg);
350         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
351         set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
352         edges_deactivate(irg);
353
354         /* here we know we WILL inline, so inform the statistics */
355         hook_inline(call, called_graph);
356
357         /* -- Decide how to handle exception control flow: Is there a handler
358            for the Call node, or do we branch directly to End on an exception?
359            exc_handling:
360            0 There is a handler.
361            2 Exception handling not represented in Firm. -- */
362         {
363                 ir_node *Xproj = NULL;
364                 ir_node *proj;
365                 for (proj = (ir_node*)get_irn_link(call); proj != NULL;
366                      proj = (ir_node*)get_irn_link(proj)) {
367                         long proj_nr = get_Proj_proj(proj);
368                         if (proj_nr == pn_Call_X_except) Xproj = proj;
369                 }
370                 exc_handling = Xproj != NULL ? exc_handler : exc_no_handler;
371         }
372
373         /* create the argument tuple */
374         args_in = ALLOCAN(ir_node*, n_params);
375
376         block = get_nodes_block(call);
377         for (i = n_params - 1; i >= 0; --i) {
378                 ir_node *arg      = get_Call_param(call, i);
379                 ir_type *param_tp = get_method_param_type(mtp, i);
380                 ir_mode *mode     = get_type_mode(param_tp);
381
382                 if (mode != get_irn_mode(arg)) {
383                         arg = new_r_Conv(block, arg, mode);
384                 }
385                 args_in[i] = arg;
386         }
387
388         /* the procedure and later replaces the Start node of the called graph.
389          * Post_call is the old Call node and collects the results of the called
390          * graph. Both will end up being a tuple. */
391         post_bl = get_nodes_block(call);
392         /* XxMxPxPxPxT of Start + parameter of Call */
393         in[pn_Start_M]              = get_Call_mem(call);
394         in[pn_Start_X_initial_exec] = new_r_Jmp(post_bl);
395         in[pn_Start_P_frame_base]   = get_irg_frame(irg);
396         in[pn_Start_P_tls]          = get_irg_tls(irg);
397         in[pn_Start_T_args]         = new_r_Tuple(post_bl, n_params, args_in);
398         pre_call = new_r_Tuple(post_bl, pn_Start_max, in);
399         post_call = call;
400
401         /* --
402            The new block gets the ins of the old block, pre_call and all its
403            predecessors and all Phi nodes. -- */
404         part_block(pre_call);
405
406         /* increment visited flag for later walk */
407         inc_irg_visited(called_graph);
408
409         /* link some nodes to nodes in the current graph so instead of copying
410          * the linked nodes will get used.
411          * So the copier will use the created Tuple instead of copying the start
412          * node, similar for singleton nodes like NoMem and Bad.
413          * Note: this will prohibit predecessors to be copied - only do it for
414          *       nodes without predecessors */
415         {
416                 ir_node *start_block;
417                 ir_node *start;
418                 ir_node *bad;
419                 ir_node *nomem;
420
421                 start_block = get_irg_start_block(called_graph);
422                 set_new_node(start_block, get_nodes_block(pre_call));
423                 mark_irn_visited(start_block);
424
425                 start = get_irg_start(called_graph);
426                 set_new_node(start, pre_call);
427                 mark_irn_visited(start);
428
429                 bad = get_irg_bad(called_graph);
430                 set_new_node(bad, get_irg_bad(irg));
431                 mark_irn_visited(bad);
432
433                 nomem = get_irg_no_mem(called_graph);
434                 set_new_node(nomem, get_irg_no_mem(irg));
435                 mark_irn_visited(nomem);
436         }
437
438         /* entitiy link is used to link entities on old stackframe to the
439          * new stackframe */
440         irp_reserve_resources(irp, IR_RESOURCE_ENTITY_LINK);
441
442         /* copy entities and nodes */
443         assert(!irn_visited(get_irg_end(called_graph)));
444         copy_frame_entities(called_graph, irg);
445         irg_walk_core(get_irg_end(called_graph), copy_node_inline, set_preds_inline,
446                       irg);
447
448         irp_free_resources(irp, IR_RESOURCE_ENTITY_LINK);
449
450         /* -- Merge the end of the inlined procedure with the call site -- */
451         /* We will turn the old Call node into a Tuple with the following
452            predecessors:
453            -1:  Block of Tuple.
454            0: Phi of all Memories of Return statements.
455            1: Jmp from new Block that merges the control flow from all exception
456            predecessors of the old end block.
457            2: Tuple of all arguments.
458            3: Phi of Exception memories.
459            In case the old Call directly branches to End on an exception we don't
460            need the block merging all exceptions nor the Phi of the exception
461            memories.
462         */
463
464         /* Precompute some values */
465         end_bl = get_new_node(get_irg_end_block(called_graph));
466         end    = get_new_node(get_irg_end(called_graph));
467         arity  = get_Block_n_cfgpreds(end_bl);    /* arity = n_exc + n_ret  */
468         n_res  = get_method_n_ress(get_Call_type(call));
469
470         res_pred = XMALLOCN(ir_node*, n_res);
471         cf_pred  = XMALLOCN(ir_node*, arity);
472
473         /* archive keepalives */
474         irn_arity = get_irn_arity(end);
475         for (i = 0; i < irn_arity; i++) {
476                 ir_node *ka = get_End_keepalive(end, i);
477                 if (! is_Bad(ka))
478                         add_End_keepalive(get_irg_end(irg), ka);
479         }
480
481         /* replace Return nodes by Jump nodes */
482         n_ret = 0;
483         for (i = 0; i < arity; i++) {
484                 ir_node *ret;
485                 ret = get_Block_cfgpred(end_bl, i);
486                 if (is_Return(ret)) {
487                         ir_node *block = get_nodes_block(ret);
488                         cf_pred[n_ret] = new_r_Jmp(block);
489                         n_ret++;
490                 }
491         }
492         set_irn_in(post_bl, n_ret, cf_pred);
493
494         /* build a Tuple for all results of the method.
495          * add Phi node if there was more than one Return. */
496         turn_into_tuple(post_call, pn_Call_max);
497         /* First the Memory-Phi */
498         n_mem_phi = 0;
499         for (i = 0; i < arity; i++) {
500                 ret = get_Block_cfgpred(end_bl, i);
501                 if (is_Return(ret)) {
502                         cf_pred[n_mem_phi++] = get_Return_mem(ret);
503                 }
504                 /* memory output for some exceptions is directly connected to End */
505                 if (is_Call(ret)) {
506                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 3);
507                 } else if (is_fragile_op(ret)) {
508                         /* We rely that all cfops have the memory output at the same position. */
509                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 0);
510                 } else if (is_Raise(ret)) {
511                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 1);
512                 }
513         }
514         phi = new_r_Phi(post_bl, n_mem_phi, cf_pred, mode_M);
515         set_Tuple_pred(call, pn_Call_M, phi);
516         /* Conserve Phi-list for further inlinings -- but might be optimized */
517         if (get_nodes_block(phi) == post_bl) {
518                 set_irn_link(phi, get_irn_link(post_bl));
519                 set_irn_link(post_bl, phi);
520         }
521         /* Now the real results */
522         if (n_res > 0) {
523                 ir_node *result_tuple;
524                 for (j = 0; j < n_res; j++) {
525                         ir_type *res_type = get_method_res_type(ctp, j);
526                         ir_mode *res_mode = get_type_mode(res_type);
527                         n_ret = 0;
528                         for (i = 0; i < arity; i++) {
529                                 ret = get_Block_cfgpred(end_bl, i);
530                                 if (is_Return(ret)) {
531                                         ir_node *res = get_Return_res(ret, j);
532                                         if (get_irn_mode(res) != res_mode) {
533                                                 ir_node *block = get_nodes_block(res);
534                                                 res = new_r_Conv(block, res, res_mode);
535                                         }
536                                         cf_pred[n_ret] = res;
537                                         n_ret++;
538                                 }
539                         }
540                         if (n_ret > 0) {
541                                 ir_mode *mode = get_irn_mode(cf_pred[0]);
542                                 phi = new_r_Phi(post_bl, n_ret, cf_pred, mode);
543                         } else {
544                                 phi = new_r_Bad(irg);
545                         }
546                         res_pred[j] = phi;
547                         /* Conserve Phi-list for further inlinings -- but might be optimized */
548                         if (get_nodes_block(phi) == post_bl) {
549                                 set_Phi_next(phi, get_Block_phis(post_bl));
550                                 set_Block_phis(post_bl, phi);
551                         }
552                 }
553                 result_tuple = new_r_Tuple(post_bl, n_res, res_pred);
554                 set_Tuple_pred(call, pn_Call_T_result, result_tuple);
555         } else {
556                 set_Tuple_pred(call, pn_Call_T_result, new_r_Bad(irg));
557         }
558         /* handle the regular call */
559         set_Tuple_pred(call, pn_Call_X_regular, new_r_Jmp(post_bl));
560
561         /* Finally the exception control flow.
562            We have two possible situations:
563            First if the Call branches to an exception handler:
564            We need to add a Phi node to
565            collect the memory containing the exception objects.  Further we need
566            to add another block to get a correct representation of this Phi.  To
567            this block we add a Jmp that resolves into the X output of the Call
568            when the Call is turned into a tuple.
569            Second: There is no exception edge. Just add all inlined exception
570            branches to the End node.
571          */
572         if (exc_handling == exc_handler) {
573                 n_exc = 0;
574                 for (i = 0; i < arity; i++) {
575                         ir_node *ret, *irn;
576                         ret = get_Block_cfgpred(end_bl, i);
577                         irn = skip_Proj(ret);
578                         if (is_fragile_op(irn) || is_Raise(irn)) {
579                                 cf_pred[n_exc] = ret;
580                                 ++n_exc;
581                         }
582                 }
583                 if (n_exc > 0) {
584                         if (n_exc == 1) {
585                                 /* simple fix */
586                                 set_Tuple_pred(call, pn_Call_X_except, cf_pred[0]);
587                         } else {
588                                 ir_node *block = new_r_Block(irg, n_exc, cf_pred);
589                                 set_Tuple_pred(call, pn_Call_X_except, new_r_Jmp(block));
590                         }
591                 } else {
592                         set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg));
593                 }
594         } else {
595                 ir_node *main_end_bl;
596                 int main_end_bl_arity;
597                 ir_node **end_preds;
598
599                 /* assert(exc_handling == 1 || no exceptions. ) */
600                 n_exc = 0;
601                 for (i = 0; i < arity; i++) {
602                         ir_node *ret = get_Block_cfgpred(end_bl, i);
603                         ir_node *irn = skip_Proj(ret);
604
605                         if (is_fragile_op(irn) || is_Raise(irn)) {
606                                 cf_pred[n_exc] = ret;
607                                 n_exc++;
608                         }
609                 }
610                 main_end_bl       = get_irg_end_block(irg);
611                 main_end_bl_arity = get_irn_arity(main_end_bl);
612                 end_preds         = XMALLOCN(ir_node*, n_exc + main_end_bl_arity);
613
614                 for (i = 0; i < main_end_bl_arity; ++i)
615                         end_preds[i] = get_irn_n(main_end_bl, i);
616                 for (i = 0; i < n_exc; ++i)
617                         end_preds[main_end_bl_arity + i] = cf_pred[i];
618                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
619                 set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg));
620                 free(end_preds);
621         }
622         free(res_pred);
623         free(cf_pred);
624
625         /* --  Turn CSE back on. -- */
626         set_optimize(rem_opt);
627         current_ir_graph = rem;
628
629         return 1;
630 }
631
632 /********************************************************************/
633 /* Apply inlining to small methods.                                 */
634 /********************************************************************/
635
636 static struct obstack  temp_obst;
637
638 /** Represents a possible inlinable call in a graph. */
639 typedef struct call_entry {
640         ir_node    *call;       /**< The Call node. */
641         ir_graph   *callee;     /**< The callee IR-graph. */
642         list_head  list;        /**< List head for linking the next one. */
643         int        loop_depth;  /**< The loop depth of this call. */
644         int        benefice;    /**< The calculated benefice of this call. */
645         unsigned   local_adr:1; /**< Set if this call gets an address of a local variable. */
646         unsigned   all_const:1; /**< Set if this call has only constant parameters. */
647 } call_entry;
648
649 /**
650  * environment for inlining small irgs
651  */
652 typedef struct inline_env_t {
653         struct obstack obst;  /**< An obstack where call_entries are allocated on. */
654         list_head      calls; /**< The call entry list. */
655 } inline_env_t;
656
657 /**
658  * Returns the irg called from a Call node. If the irg is not
659  * known, NULL is returned.
660  *
661  * @param call  the call node
662  */
663 static ir_graph *get_call_called_irg(ir_node *call)
664 {
665         ir_node *addr;
666
667         addr = get_Call_ptr(call);
668         if (is_Global(addr)) {
669                 ir_entity *ent = get_Global_entity(addr);
670                 /* we don't know which function gets finally bound to a weak symbol */
671                 if (get_entity_linkage(ent) & IR_LINKAGE_WEAK)
672                         return NULL;
673
674                 return get_entity_irg(ent);
675         }
676
677         return NULL;
678 }
679
680 /**
681  * Walker: Collect all calls to known graphs inside a graph.
682  */
683 static void collect_calls(ir_node *call, void *env)
684 {
685         (void) env;
686         if (is_Call(call)) {
687                 ir_graph *called_irg = get_call_called_irg(call);
688
689                 if (called_irg != NULL) {
690                         /* The Call node calls a locally defined method.  Remember to inline. */
691                         inline_env_t *ienv  = (inline_env_t*)env;
692                         call_entry   *entry = OALLOC(&ienv->obst, call_entry);
693                         entry->call       = call;
694                         entry->callee     = called_irg;
695                         entry->loop_depth = 0;
696                         entry->benefice   = 0;
697                         entry->local_adr  = 0;
698                         entry->all_const  = 0;
699
700                         list_add_tail(&entry->list, &ienv->calls);
701                 }
702         }
703 }
704
705 /**
706  * Inlines all small methods at call sites where the called address comes
707  * from a Const node that references the entity representing the called
708  * method.
709  * The size argument is a rough measure for the code size of the method:
710  * Methods where the obstack containing the firm graph is smaller than
711  * size are inlined.
712  */
713 void inline_small_irgs(ir_graph *irg, int size)
714 {
715         ir_graph *rem = current_ir_graph;
716         inline_env_t env;
717         call_entry *entry;
718
719         current_ir_graph = irg;
720         /* Handle graph state */
721         assert(get_irg_phase_state(irg) != phase_building);
722         free_callee_info(irg);
723
724         /* Find Call nodes to inline.
725            (We can not inline during a walk of the graph, as inlining the same
726            method several times changes the visited flag of the walked graph:
727            after the first inlining visited of the callee equals visited of
728            the caller.  With the next inlining both are increased.) */
729         obstack_init(&env.obst);
730         INIT_LIST_HEAD(&env.calls);
731         irg_walk_graph(irg, NULL, collect_calls, &env);
732
733         if (! list_empty(&env.calls)) {
734                 /* There are calls to inline */
735                 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
736                 collect_phiprojs(irg);
737
738                 list_for_each_entry(call_entry, entry, &env.calls, list) {
739                         ir_graph            *callee = entry->callee;
740                         irg_inline_property prop    = get_irg_inline_property(callee);
741
742                         if (prop == irg_inline_forbidden) {
743                                 continue;
744                         }
745
746                         if (prop >= irg_inline_forced ||
747                             _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
748                                 inline_method(entry->call, callee);
749                         }
750                 }
751                 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
752         }
753         obstack_free(&env.obst, NULL);
754         current_ir_graph = rem;
755 }
756
757 typedef struct inline_small_irgs_pass_t {
758         ir_graph_pass_t pass;
759         int            size;
760 } inline_small_irgs_pass_t;
761
762 /**
763  * Wrapper to run inline_small_irgs() as a pass.
764  */
765 static int inline_small_irgs_wrapper(ir_graph *irg, void *context)
766 {
767         inline_small_irgs_pass_t *pass = (inline_small_irgs_pass_t*)context;
768
769         inline_small_irgs(irg, pass->size);
770         return 0;
771 }
772
773 /* create a pass for inline_small_irgs() */
774 ir_graph_pass_t *inline_small_irgs_pass(const char *name, int size)
775 {
776         inline_small_irgs_pass_t *pass = XMALLOCZ(inline_small_irgs_pass_t);
777
778         pass->size = size;
779         return def_graph_pass_constructor(
780                 &pass->pass, name ? name : "inline_small_irgs", inline_small_irgs_wrapper);
781 }
782
783 /**
784  * Environment for inlining irgs.
785  */
786 typedef struct {
787         list_head calls;             /**< List of of all call nodes in this graph. */
788         unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
789         unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
790         unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
791         unsigned  n_nodes_orig;      /**< for statistics */
792         unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
793         unsigned  n_call_nodes_orig; /**< for statistics */
794         unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
795         unsigned  n_callers_orig;    /**< for statistics */
796         unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
797         unsigned  recursive:1;       /**< Set, if this function is self recursive. */
798 } inline_irg_env;
799
800 /**
801  * Allocate a new environment for inlining.
802  */
803 static inline_irg_env *alloc_inline_irg_env(void)
804 {
805         inline_irg_env *env    = OALLOC(&temp_obst, inline_irg_env);
806         INIT_LIST_HEAD(&env->calls);
807         env->local_weights     = NULL;
808         env->n_nodes           = -2; /* do not count count Start, End */
809         env->n_blocks          = -2; /* do not count count Start, End Block */
810         env->n_nodes_orig      = -2; /* do not count Start, End */
811         env->n_call_nodes      = 0;
812         env->n_call_nodes_orig = 0;
813         env->n_callers         = 0;
814         env->n_callers_orig    = 0;
815         env->got_inline        = 0;
816         env->recursive         = 0;
817         return env;
818 }
819
820 typedef struct walker_env {
821         inline_irg_env *x;     /**< the inline environment */
822         char ignore_runtime;   /**< the ignore runtime flag */
823         char ignore_callers;   /**< if set, do change callers data */
824 } wenv_t;
825
826 /**
827  * post-walker: collect all calls in the inline-environment
828  * of a graph and sum some statistics.
829  */
830 static void collect_calls2(ir_node *call, void *ctx)
831 {
832         wenv_t         *env = (wenv_t*)ctx;
833         inline_irg_env *x = env->x;
834         unsigned        code = get_irn_opcode(call);
835         ir_graph       *callee;
836         call_entry     *entry;
837
838         /* count meaningful nodes in irg */
839         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
840                 if (code != iro_Block) {
841                         ++x->n_nodes;
842                         ++x->n_nodes_orig;
843                 } else {
844                         ++x->n_blocks;
845                 }
846         }
847
848         if (code != iro_Call) return;
849
850         /* check, if it's a runtime call */
851         if (env->ignore_runtime) {
852                 ir_node *symc = get_Call_ptr(call);
853
854                 if (is_Global(symc)) {
855                         ir_entity *ent = get_Global_entity(symc);
856
857                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
858                                 return;
859                 }
860         }
861
862         /* collect all call nodes */
863         ++x->n_call_nodes;
864         ++x->n_call_nodes_orig;
865
866         callee = get_call_called_irg(call);
867         if (callee != NULL) {
868                 if (! env->ignore_callers) {
869                         inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
870                         /* count all static callers */
871                         ++callee_env->n_callers;
872                         ++callee_env->n_callers_orig;
873                 }
874                 if (callee == current_ir_graph)
875                         x->recursive = 1;
876
877                 /* link it in the list of possible inlinable entries */
878                 entry = OALLOC(&temp_obst, call_entry);
879                 entry->call       = call;
880                 entry->callee     = callee;
881                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
882                 entry->benefice   = 0;
883                 entry->local_adr  = 0;
884                 entry->all_const  = 0;
885
886                 list_add_tail(&entry->list, &x->calls);
887         }
888 }
889
890 /**
891  * Returns TRUE if the number of callers is 0 in the irg's environment,
892  * hence this irg is a leave.
893  */
894 inline static int is_leave(ir_graph *irg)
895 {
896         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
897         return env->n_call_nodes == 0;
898 }
899
900 /**
901  * Returns TRUE if the number of nodes in the callee is
902  * smaller then size in the irg's environment.
903  */
904 inline static int is_smaller(ir_graph *callee, unsigned size)
905 {
906         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
907         return env->n_nodes < size;
908 }
909
910 /**
911  * Duplicate a call entry.
912  *
913  * @param entry     the original entry to duplicate
914  * @param new_call  the new call node
915  * @param loop_depth_delta
916  *                  delta value for the loop depth
917  */
918 static call_entry *duplicate_call_entry(const call_entry *entry,
919                                         ir_node *new_call, int loop_depth_delta)
920 {
921         call_entry *nentry = OALLOC(&temp_obst, call_entry);
922         nentry->call       = new_call;
923         nentry->callee     = entry->callee;
924         nentry->benefice   = entry->benefice;
925         nentry->loop_depth = entry->loop_depth + loop_depth_delta;
926         nentry->local_adr  = entry->local_adr;
927         nentry->all_const  = entry->all_const;
928
929         return nentry;
930 }
931
932 /**
933  * Append all call nodes of the source environment to the nodes of in the destination
934  * environment.
935  *
936  * @param dst         destination environment
937  * @param src         source environment
938  * @param loop_depth  the loop depth of the call that is replaced by the src list
939  */
940 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth)
941 {
942         call_entry *entry, *nentry;
943
944         /* Note that the src list points to Call nodes in the inlined graph, but
945            we need Call nodes in our graph. Luckily the inliner leaves this information
946            in the link field. */
947         list_for_each_entry(call_entry, entry, &src->calls, list) {
948                 nentry = duplicate_call_entry(entry, (ir_node*)get_irn_link(entry->call), loop_depth);
949                 list_add_tail(&nentry->list, &dst->calls);
950         }
951         dst->n_call_nodes += src->n_call_nodes;
952         dst->n_nodes      += src->n_nodes;
953 }
954
955 /*
956  * Inlines small leave methods at call sites where the called address comes
957  * from a Const node that references the entity representing the called
958  * method.
959  * The size argument is a rough measure for the code size of the method:
960  * Methods where the obstack containing the firm graph is smaller than
961  * size are inlined.
962  */
963 void inline_leave_functions(unsigned maxsize, unsigned leavesize,
964                             unsigned size, int ignore_runtime)
965 {
966         inline_irg_env   *env;
967         ir_graph         *irg;
968         size_t           i, n_irgs;
969         ir_graph         *rem;
970         int              did_inline;
971         wenv_t           wenv;
972         call_entry       *entry, *next;
973         const call_entry *centry;
974         pmap             *copied_graphs;
975         pmap_entry       *pm_entry;
976
977         rem = current_ir_graph;
978         obstack_init(&temp_obst);
979
980         /* a map for the copied graphs, used to inline recursive calls */
981         copied_graphs = pmap_create();
982
983         /* extend all irgs by a temporary data structure for inlining. */
984         n_irgs = get_irp_n_irgs();
985         for (i = 0; i < n_irgs; ++i)
986                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
987
988         /* Pre-compute information in temporary data structure. */
989         wenv.ignore_runtime = ignore_runtime;
990         wenv.ignore_callers = 0;
991         for (i = 0; i < n_irgs; ++i) {
992                 ir_graph *irg = get_irp_irg(i);
993
994                 assert(get_irg_phase_state(irg) != phase_building);
995                 free_callee_info(irg);
996
997                 assure_cf_loop(irg);
998                 wenv.x = (inline_irg_env*)get_irg_link(irg);
999                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1000         }
1001
1002         /* -- and now inline. -- */
1003
1004         /* Inline leaves recursively -- we might construct new leaves. */
1005         do {
1006                 did_inline = 0;
1007
1008                 for (i = 0; i < n_irgs; ++i) {
1009                         ir_node *call;
1010                         int phiproj_computed = 0;
1011
1012                         current_ir_graph = get_irp_irg(i);
1013                         env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1014
1015                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1016                         list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1017                                 ir_graph            *callee;
1018                                 irg_inline_property  prop;
1019
1020                                 if (env->n_nodes > maxsize)
1021                                         break;
1022
1023                                 call   = entry->call;
1024                                 callee = entry->callee;
1025
1026                                 prop = get_irg_inline_property(callee);
1027                                 if (prop == irg_inline_forbidden) {
1028                                         continue;
1029                                 }
1030
1031                                 if (is_leave(callee) && (
1032                                     is_smaller(callee, leavesize) || prop >= irg_inline_forced)) {
1033                                         if (!phiproj_computed) {
1034                                                 phiproj_computed = 1;
1035                                                 collect_phiprojs(current_ir_graph);
1036                                         }
1037                                         did_inline = inline_method(call, callee);
1038
1039                                         if (did_inline) {
1040                                                 inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
1041
1042                                                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1043                                                 phiproj_computed = 0;
1044
1045                                                 /* Do some statistics */
1046                                                 env->got_inline = 1;
1047                                                 --env->n_call_nodes;
1048                                                 env->n_nodes += callee_env->n_nodes;
1049                                                 --callee_env->n_callers;
1050
1051                                                 /* remove this call from the list */
1052                                                 list_del(&entry->list);
1053                                                 continue;
1054                                         }
1055                                 }
1056                         }
1057                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1058                 }
1059         } while (did_inline);
1060
1061         /* inline other small functions. */
1062         for (i = 0; i < n_irgs; ++i) {
1063                 ir_node *call;
1064                 int phiproj_computed = 0;
1065
1066                 current_ir_graph = get_irp_irg(i);
1067                 env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1068
1069                 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1070
1071                 /* note that the list of possible calls is updated during the process */
1072                 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1073                         irg_inline_property prop;
1074                         ir_graph            *callee;
1075                         pmap_entry          *e;
1076
1077                         call   = entry->call;
1078                         callee = entry->callee;
1079
1080                         prop = get_irg_inline_property(callee);
1081                         if (prop == irg_inline_forbidden) {
1082                                 continue;
1083                         }
1084
1085                         e = pmap_find(copied_graphs, callee);
1086                         if (e != NULL) {
1087                                 /*
1088                                  * Remap callee if we have a copy.
1089                                  * FIXME: Should we do this only for recursive Calls ?
1090                                  */
1091                                 callee = (ir_graph*)e->value;
1092                         }
1093
1094                         if (prop >= irg_inline_forced ||
1095                             (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1096                                 if (current_ir_graph == callee) {
1097                                         /*
1098                                          * Recursive call: we cannot directly inline because we cannot walk
1099                                          * the graph and change it. So we have to make a copy of the graph
1100                                          * first.
1101                                          */
1102
1103                                         inline_irg_env *callee_env;
1104                                         ir_graph       *copy;
1105
1106                                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1107
1108                                         /*
1109                                          * No copy yet, create one.
1110                                          * Note that recursive methods are never leaves, so it is sufficient
1111                                          * to test this condition here.
1112                                          */
1113                                         copy = create_irg_copy(callee);
1114
1115                                         /* create_irg_copy() destroys the Proj links, recompute them */
1116                                         phiproj_computed = 0;
1117
1118                                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1119
1120                                         /* allocate new environment */
1121                                         callee_env = alloc_inline_irg_env();
1122                                         set_irg_link(copy, callee_env);
1123
1124                                         assure_cf_loop(copy);
1125                                         wenv.x              = callee_env;
1126                                         wenv.ignore_callers = 1;
1127                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1128
1129                                         /*
1130                                          * Enter the entity of the original graph. This is needed
1131                                          * for inline_method(). However, note that ent->irg still points
1132                                          * to callee, NOT to copy.
1133                                          */
1134                                         set_irg_entity(copy, get_irg_entity(callee));
1135
1136                                         pmap_insert(copied_graphs, callee, copy);
1137                                         callee = copy;
1138
1139                                         /* we have only one caller: the original graph */
1140                                         callee_env->n_callers      = 1;
1141                                         callee_env->n_callers_orig = 1;
1142                                 }
1143                                 if (! phiproj_computed) {
1144                                         phiproj_computed = 1;
1145                                         collect_phiprojs(current_ir_graph);
1146                                 }
1147                                 did_inline = inline_method(call, callee);
1148                                 if (did_inline) {
1149                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1150
1151                                         /* call was inlined, Phi/Projs for current graph must be recomputed */
1152                                         phiproj_computed = 0;
1153
1154                                         /* callee was inline. Append its call list. */
1155                                         env->got_inline = 1;
1156                                         --env->n_call_nodes;
1157                                         append_call_list(env, callee_env, entry->loop_depth);
1158                                         --callee_env->n_callers;
1159
1160                                         /* after we have inlined callee, all called methods inside callee
1161                                            are now called once more */
1162                                         list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1163                                                 inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1164                                                 ++penv->n_callers;
1165                                         }
1166
1167                                         /* remove this call from the list */
1168                                         list_del(&entry->list);
1169                                         continue;
1170                                 }
1171                         }
1172                 }
1173                 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1174         }
1175
1176         for (i = 0; i < n_irgs; ++i) {
1177                 irg = get_irp_irg(i);
1178                 env = (inline_irg_env*)get_irg_link(irg);
1179
1180                 if (env->got_inline) {
1181                         optimize_graph_df(irg);
1182                         optimize_cf(irg);
1183                 }
1184                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1185                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1186                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1187                         env->n_callers_orig, env->n_callers,
1188                         get_entity_name(get_irg_entity(irg))));
1189                 }
1190         }
1191
1192         /* kill the copied graphs: we don't need them anymore */
1193         foreach_pmap(copied_graphs, pm_entry) {
1194                 ir_graph *copy = (ir_graph*)pm_entry->value;
1195
1196                 /* reset the entity, otherwise it will be deleted in the next step ... */
1197                 set_irg_entity(copy, NULL);
1198                 free_ir_graph(copy);
1199         }
1200         pmap_destroy(copied_graphs);
1201
1202         obstack_free(&temp_obst, NULL);
1203         current_ir_graph = rem;
1204 }
1205
1206 typedef struct inline_leave_functions_pass_t {
1207         ir_prog_pass_t pass;
1208         unsigned       maxsize;
1209         unsigned       leavesize;
1210         unsigned       size;
1211         int            ignore_runtime;
1212 } inline_leave_functions_pass_t;
1213
1214 /**
1215  * Wrapper to run inline_leave_functions() as a ir_prog pass.
1216  */
1217 static int inline_leave_functions_wrapper(ir_prog *irp, void *context)
1218 {
1219         inline_leave_functions_pass_t *pass = (inline_leave_functions_pass_t*)context;
1220
1221         (void)irp;
1222         inline_leave_functions(
1223                 pass->maxsize, pass->leavesize,
1224                 pass->size, pass->ignore_runtime);
1225         return 0;
1226 }
1227
1228 /* create a pass for inline_leave_functions() */
1229 ir_prog_pass_t *inline_leave_functions_pass(
1230         const char *name, unsigned maxsize, unsigned leavesize,
1231         unsigned size, int ignore_runtime)
1232 {
1233         inline_leave_functions_pass_t *pass = XMALLOCZ(inline_leave_functions_pass_t);
1234
1235         pass->maxsize        = maxsize;
1236         pass->leavesize      = leavesize;
1237         pass->size           = size;
1238         pass->ignore_runtime = ignore_runtime;
1239
1240         return def_prog_pass_constructor(
1241                 &pass->pass,
1242                 name ? name : "inline_leave_functions",
1243                 inline_leave_functions_wrapper);
1244 }
1245
1246 /**
1247  * Calculate the parameter weights for transmitting the address of a local variable.
1248  */
1249 static unsigned calc_method_local_weight(ir_node *arg)
1250 {
1251         int      i, j, k;
1252         unsigned v, weight = 0;
1253
1254         for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
1255                 ir_node *succ = get_irn_out(arg, i);
1256
1257                 switch (get_irn_opcode(succ)) {
1258                 case iro_Load:
1259                 case iro_Store:
1260                         /* Loads and Store can be removed */
1261                         weight += 3;
1262                         break;
1263                 case iro_Sel:
1264                         /* check if all args are constant */
1265                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1266                                 ir_node *idx = get_Sel_index(succ, j);
1267                                 if (! is_Const(idx))
1268                                         return 0;
1269                         }
1270                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1271                            some unsupported node. */
1272                         v = calc_method_local_weight(succ);
1273                         if (v == 0)
1274                                 return 0;
1275                         /* we can kill one Sel with constant indexes, this is cheap */
1276                         weight += v + 1;
1277                         break;
1278                 case iro_Id:
1279                         /* when looking backward we might find Id nodes */
1280                         weight += calc_method_local_weight(succ);
1281                         break;
1282                 case iro_Tuple:
1283                         /* unoptimized tuple */
1284                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1285                                 ir_node *pred = get_Tuple_pred(succ, j);
1286                                 if (pred == arg) {
1287                                         /* look for Proj(j) */
1288                                         for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
1289                                                 ir_node *succ_succ = get_irn_out(succ, k);
1290                                                 if (is_Proj(succ_succ)) {
1291                                                         if (get_Proj_proj(succ_succ) == j) {
1292                                                                 /* found */
1293                                                                 weight += calc_method_local_weight(succ_succ);
1294                                                         }
1295                                                 } else {
1296                                                         /* this should NOT happen */
1297                                                         return 0;
1298                                                 }
1299                                         }
1300                                 }
1301                         }
1302                         break;
1303                 default:
1304                         /* any other node: unsupported yet or bad. */
1305                         return 0;
1306                 }
1307         }
1308         return weight;
1309 }
1310
1311 /**
1312  * Calculate the parameter weights for transmitting the address of a local variable.
1313  */
1314 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg)
1315 {
1316         ir_entity *ent = get_irg_entity(irg);
1317         ir_type  *mtp;
1318         size_t   nparams;
1319         int      i;
1320         long     proj_nr;
1321         ir_node  *irg_args, *arg;
1322
1323         mtp      = get_entity_type(ent);
1324         nparams  = get_method_n_params(mtp);
1325
1326         /* allocate a new array. currently used as 'analysed' flag */
1327         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1328
1329         /* If the method haven't parameters we have nothing to do. */
1330         if (nparams <= 0)
1331                 return;
1332
1333         assure_irg_outs(irg);
1334         irg_args = get_irg_args(irg);
1335         for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
1336                 arg     = get_irn_out(irg_args, i);
1337                 proj_nr = get_Proj_proj(arg);
1338                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1339         }
1340 }
1341
1342 /**
1343  * Calculate the benefice for transmitting an local variable address.
1344  * After inlining, the local variable might be transformed into a
1345  * SSA variable by scalar_replacement().
1346  */
1347 static unsigned get_method_local_adress_weight(ir_graph *callee, size_t pos)
1348 {
1349         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
1350
1351         if (env->local_weights == NULL)
1352                 analyze_irg_local_weights(env, callee);
1353
1354         if (pos < ARR_LEN(env->local_weights))
1355                 return env->local_weights[pos];
1356         return 0;
1357 }
1358
1359 /**
1360  * Calculate a benefice value for inlining the given call.
1361  *
1362  * @param call       the call node we have to inspect
1363  * @param callee     the called graph
1364  */
1365 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
1366 {
1367         ir_node   *call = entry->call;
1368         ir_entity *ent  = get_irg_entity(callee);
1369         ir_node   *frame_ptr;
1370         ir_type   *mtp;
1371         int       weight = 0;
1372         int       i, n_params, all_const;
1373         unsigned  cc, v;
1374         irg_inline_property prop;
1375
1376         inline_irg_env *callee_env;
1377
1378         prop = get_irg_inline_property(callee);
1379         if (prop == irg_inline_forbidden) {
1380                 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
1381                     call, callee));
1382                 return entry->benefice = INT_MIN;
1383         }
1384
1385         if (get_irg_additional_properties(callee) & mtp_property_noreturn) {
1386                 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
1387                     call, callee));
1388                 return entry->benefice = INT_MIN;
1389         }
1390
1391         /* costs for every passed parameter */
1392         n_params = get_Call_n_params(call);
1393         mtp      = get_entity_type(ent);
1394         cc       = get_method_calling_convention(mtp);
1395         if (cc & cc_reg_param) {
1396                 /* register parameter, smaller costs for register parameters */
1397                 int max_regs = cc & ~cc_bits;
1398
1399                 if (max_regs < n_params)
1400                         weight += max_regs * 2 + (n_params - max_regs) * 5;
1401                 else
1402                         weight += n_params * 2;
1403         } else {
1404                 /* parameters are passed an stack */
1405                 weight += 5 * n_params;
1406         }
1407
1408         /* constant parameters improve the benefice */
1409         frame_ptr = get_irg_frame(current_ir_graph);
1410         all_const = 1;
1411         for (i = 0; i < n_params; ++i) {
1412                 ir_node *param = get_Call_param(call, i);
1413
1414                 if (is_Const(param)) {
1415                         weight += get_method_param_weight(ent, i);
1416                 } else {
1417                         all_const = 0;
1418                         if (is_SymConst(param))
1419                                 weight += get_method_param_weight(ent, i);
1420                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
1421                                 /*
1422                                  * An address of a local variable is transmitted. After
1423                                  * inlining, scalar_replacement might be able to remove the
1424                                  * local variable, so honor this.
1425                                  */
1426                                 v = get_method_local_adress_weight(callee, i);
1427                                 weight += v;
1428                                 if (v > 0)
1429                                         entry->local_adr = 1;
1430                         }
1431                 }
1432         }
1433         entry->all_const = all_const;
1434
1435         callee_env = (inline_irg_env*)get_irg_link(callee);
1436         if (callee_env->n_callers == 1 &&
1437             callee != current_ir_graph &&
1438             !entity_is_externally_visible(ent)) {
1439                 weight += 700;
1440         }
1441
1442         /* give a bonus for functions with one block */
1443         if (callee_env->n_blocks == 1)
1444                 weight = weight * 3 / 2;
1445
1446         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
1447         if (callee_env->n_nodes < 30 && !callee_env->recursive)
1448                 weight += 2000;
1449
1450         /* and finally for leaves: they do not increase the register pressure
1451            because of callee safe registers */
1452         if (callee_env->n_call_nodes == 0)
1453                 weight += 400;
1454
1455         /** it's important to inline inner loops first */
1456         if (entry->loop_depth > 30)
1457                 weight += 30 * 1024;
1458         else
1459                 weight += entry->loop_depth * 1024;
1460
1461         /*
1462          * All arguments constant is probably a good sign, give an extra bonus
1463          */
1464         if (all_const)
1465                 weight += 1024;
1466
1467         return entry->benefice = weight;
1468 }
1469
1470 typedef struct walk_env_t {
1471         ir_graph **irgs;
1472         size_t   last_irg;
1473 } walk_env_t;
1474
1475 /**
1476  * Callgraph walker, collect all visited graphs.
1477  */
1478 static void callgraph_walker(ir_graph *irg, void *data)
1479 {
1480         walk_env_t *env = (walk_env_t *)data;
1481         env->irgs[env->last_irg++] = irg;
1482 }
1483
1484 /**
1485  * Creates an inline order for all graphs.
1486  *
1487  * @return the list of graphs.
1488  */
1489 static ir_graph **create_irg_list(void)
1490 {
1491         ir_entity  **free_methods;
1492         size_t     n_irgs = get_irp_n_irgs();
1493         walk_env_t env;
1494
1495         cgana(&free_methods);
1496         xfree(free_methods);
1497
1498         compute_callgraph();
1499
1500         env.irgs     = XMALLOCNZ(ir_graph*, n_irgs);
1501         env.last_irg = 0;
1502
1503         callgraph_walk(NULL, callgraph_walker, &env);
1504         assert(n_irgs == env.last_irg);
1505
1506         return env.irgs;
1507 }
1508
1509 /**
1510  * Push a call onto the priority list if its benefice is big enough.
1511  *
1512  * @param pqueue   the priority queue of calls
1513  * @param call     the call entry
1514  * @param inlien_threshold
1515  *                 the threshold value
1516  */
1517 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
1518                             int inline_threshold)
1519 {
1520         ir_graph            *callee  = call->callee;
1521         irg_inline_property prop     = get_irg_inline_property(callee);
1522         int                 benefice = calc_inline_benefice(call, callee);
1523
1524         DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
1525             get_irn_irg(call->call), call->call, callee, benefice));
1526
1527         if (prop < irg_inline_forced && benefice < inline_threshold) {
1528                 return;
1529         }
1530
1531         pqueue_put(pqueue, call, benefice);
1532 }
1533
1534 /**
1535  * Try to inline calls into a graph.
1536  *
1537  * @param irg      the graph into which we inline
1538  * @param maxsize  do NOT inline if the size of irg gets
1539  *                 bigger than this amount
1540  * @param inline_threshold
1541  *                 threshold value for inline decision
1542  * @param copied_graphs
1543  *                 map containing copied of recursive graphs
1544  */
1545 static void inline_into(ir_graph *irg, unsigned maxsize,
1546                         int inline_threshold, pmap *copied_graphs)
1547 {
1548         int            phiproj_computed = 0;
1549         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
1550         call_entry     *curr_call;
1551         wenv_t         wenv;
1552         pqueue_t       *pqueue;
1553
1554         if (env->n_call_nodes == 0)
1555                 return;
1556
1557         if (env->n_nodes > maxsize) {
1558                 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
1559                 return;
1560         }
1561
1562         current_ir_graph = irg;
1563         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1564
1565         /* put irgs into the pqueue */
1566         pqueue = new_pqueue();
1567
1568         list_for_each_entry(call_entry, curr_call, &env->calls, list) {
1569                 assert(is_Call(curr_call->call));
1570                 maybe_push_call(pqueue, curr_call, inline_threshold);
1571         }
1572
1573         /* note that the list of possible calls is updated during the process */
1574         while (!pqueue_empty(pqueue)) {
1575                 int                 did_inline;
1576                 call_entry          *curr_call  = (call_entry*)pqueue_pop_front(pqueue);
1577                 ir_graph            *callee     = curr_call->callee;
1578                 ir_node             *call_node  = curr_call->call;
1579                 inline_irg_env      *callee_env = (inline_irg_env*)get_irg_link(callee);
1580                 irg_inline_property prop        = get_irg_inline_property(callee);
1581                 int                 loop_depth;
1582                 const call_entry    *centry;
1583                 pmap_entry          *e;
1584
1585                 if ((prop < irg_inline_forced) && env->n_nodes + callee_env->n_nodes > maxsize) {
1586                         DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
1587                                                 env->n_nodes, callee, callee_env->n_nodes));
1588                         continue;
1589                 }
1590
1591                 e = pmap_find(copied_graphs, callee);
1592                 if (e != NULL) {
1593                         int benefice = curr_call->benefice;
1594                         /*
1595                          * Reduce the weight for recursive function IFF not all arguments are const.
1596                          * inlining recursive functions is rarely good.
1597                          */
1598                         if (!curr_call->all_const)
1599                                 benefice -= 2000;
1600                         if (benefice < inline_threshold)
1601                                 continue;
1602
1603                         /*
1604                          * Remap callee if we have a copy.
1605                          */
1606                         callee     = (ir_graph*)e->value;
1607                         callee_env = (inline_irg_env*)get_irg_link(callee);
1608                 }
1609
1610                 if (current_ir_graph == callee) {
1611                         /*
1612                          * Recursive call: we cannot directly inline because we cannot
1613                          * walk the graph and change it. So we have to make a copy of
1614                          * the graph first.
1615                          */
1616                         int benefice = curr_call->benefice;
1617                         ir_graph *copy;
1618
1619                         /*
1620                          * Reduce the weight for recursive function IFF not all arguments are const.
1621                          * inlining recursive functions is rarely good.
1622                          */
1623                         if (!curr_call->all_const)
1624                                 benefice -= 2000;
1625                         if (benefice < inline_threshold)
1626                                 continue;
1627
1628                         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1629
1630                         /*
1631                          * No copy yet, create one.
1632                          * Note that recursive methods are never leaves, so it is
1633                          * sufficient to test this condition here.
1634                          */
1635                         copy = create_irg_copy(callee);
1636
1637                         /* create_irg_copy() destroys the Proj links, recompute them */
1638                         phiproj_computed = 0;
1639
1640                         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1641
1642                         /* allocate a new environment */
1643                         callee_env = alloc_inline_irg_env();
1644                         set_irg_link(copy, callee_env);
1645
1646                         assure_cf_loop(copy);
1647                         wenv.x              = callee_env;
1648                         wenv.ignore_callers = 1;
1649                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1650
1651                         /*
1652                          * Enter the entity of the original graph. This is needed
1653                          * for inline_method(). However, note that ent->irg still points
1654                          * to callee, NOT to copy.
1655                          */
1656                         set_irg_entity(copy, get_irg_entity(callee));
1657
1658                         pmap_insert(copied_graphs, callee, copy);
1659                         callee = copy;
1660
1661                         /* we have only one caller: the original graph */
1662                         callee_env->n_callers      = 1;
1663                         callee_env->n_callers_orig = 1;
1664                 }
1665                 if (! phiproj_computed) {
1666                         phiproj_computed = 1;
1667                         collect_phiprojs(current_ir_graph);
1668                 }
1669                 did_inline = inline_method(call_node, callee);
1670                 if (!did_inline)
1671                         continue;
1672
1673                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1674                 phiproj_computed = 0;
1675
1676                 /* remove it from the caller list */
1677                 list_del(&curr_call->list);
1678
1679                 /* callee was inline. Append its call list. */
1680                 env->got_inline = 1;
1681                 --env->n_call_nodes;
1682
1683                 /* we just generate a bunch of new calls */
1684                 loop_depth = curr_call->loop_depth;
1685                 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1686                         inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1687                         ir_node        *new_call;
1688                         call_entry     *new_entry;
1689
1690                         /* after we have inlined callee, all called methods inside
1691                          * callee are now called once more */
1692                         ++penv->n_callers;
1693
1694                         /* Note that the src list points to Call nodes in the inlined graph,
1695                          * but we need Call nodes in our graph. Luckily the inliner leaves
1696                          * this information in the link field. */
1697                         new_call = (ir_node*)get_irn_link(centry->call);
1698                         assert(is_Call(new_call));
1699
1700                         new_entry = duplicate_call_entry(centry, new_call, loop_depth);
1701                         list_add_tail(&new_entry->list, &env->calls);
1702                         maybe_push_call(pqueue, new_entry, inline_threshold);
1703                 }
1704
1705                 env->n_call_nodes += callee_env->n_call_nodes;
1706                 env->n_nodes += callee_env->n_nodes;
1707                 --callee_env->n_callers;
1708         }
1709         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1710         del_pqueue(pqueue);
1711 }
1712
1713 /*
1714  * Heuristic inliner. Calculates a benefice value for every call and inlines
1715  * those calls with a value higher than the threshold.
1716  */
1717 void inline_functions(unsigned maxsize, int inline_threshold,
1718                       opt_ptr after_inline_opt)
1719 {
1720         inline_irg_env   *env;
1721         size_t           i, n_irgs;
1722         ir_graph         *rem;
1723         wenv_t           wenv;
1724         pmap             *copied_graphs;
1725         pmap_entry       *pm_entry;
1726         ir_graph         **irgs;
1727
1728         rem = current_ir_graph;
1729         obstack_init(&temp_obst);
1730
1731         irgs = create_irg_list();
1732
1733         /* a map for the copied graphs, used to inline recursive calls */
1734         copied_graphs = pmap_create();
1735
1736         /* extend all irgs by a temporary data structure for inlining. */
1737         n_irgs = get_irp_n_irgs();
1738         for (i = 0; i < n_irgs; ++i)
1739                 set_irg_link(irgs[i], alloc_inline_irg_env());
1740
1741         /* Pre-compute information in temporary data structure. */
1742         wenv.ignore_runtime = 0;
1743         wenv.ignore_callers = 0;
1744         for (i = 0; i < n_irgs; ++i) {
1745                 ir_graph *irg = irgs[i];
1746
1747                 free_callee_info(irg);
1748
1749                 wenv.x = (inline_irg_env*)get_irg_link(irg);
1750                 assure_cf_loop(irg);
1751                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1752         }
1753
1754         /* -- and now inline. -- */
1755         for (i = 0; i < n_irgs; ++i) {
1756                 ir_graph *irg = irgs[i];
1757
1758                 inline_into(irg, maxsize, inline_threshold, copied_graphs);
1759         }
1760
1761         for (i = 0; i < n_irgs; ++i) {
1762                 ir_graph *irg = irgs[i];
1763
1764                 env = (inline_irg_env*)get_irg_link(irg);
1765                 if (env->got_inline && after_inline_opt != NULL) {
1766                         /* this irg got calls inlined: optimize it */
1767                         after_inline_opt(irg);
1768                 }
1769                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1770                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1771                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1772                         env->n_callers_orig, env->n_callers,
1773                         get_entity_name(get_irg_entity(irg))));
1774                 }
1775         }
1776
1777         /* kill the copied graphs: we don't need them anymore */
1778         foreach_pmap(copied_graphs, pm_entry) {
1779                 ir_graph *copy = (ir_graph*)pm_entry->value;
1780
1781                 /* reset the entity, otherwise it will be deleted in the next step ... */
1782                 set_irg_entity(copy, NULL);
1783                 free_ir_graph(copy);
1784         }
1785         pmap_destroy(copied_graphs);
1786
1787         xfree(irgs);
1788
1789         obstack_free(&temp_obst, NULL);
1790         current_ir_graph = rem;
1791 }
1792
1793 typedef struct inline_functions_pass_t {
1794         ir_prog_pass_t pass;
1795         unsigned       maxsize;
1796         int            inline_threshold;
1797         opt_ptr        after_inline_opt;
1798 } inline_functions_pass_t;
1799
1800 /**
1801  * Wrapper to run inline_functions() as a ir_prog pass.
1802  */
1803 static int inline_functions_wrapper(ir_prog *irp, void *context)
1804 {
1805         inline_functions_pass_t *pass = (inline_functions_pass_t*)context;
1806
1807         (void)irp;
1808         inline_functions(pass->maxsize, pass->inline_threshold,
1809                          pass->after_inline_opt);
1810         return 0;
1811 }
1812
1813 /* create a ir_prog pass for inline_functions */
1814 ir_prog_pass_t *inline_functions_pass(
1815           const char *name, unsigned maxsize, int inline_threshold,
1816           opt_ptr after_inline_opt)
1817 {
1818         inline_functions_pass_t *pass = XMALLOCZ(inline_functions_pass_t);
1819
1820         pass->maxsize          = maxsize;
1821         pass->inline_threshold = inline_threshold;
1822         pass->after_inline_opt = after_inline_opt;
1823
1824         return def_prog_pass_constructor(
1825                 &pass->pass, name ? name : "inline_functions",
1826                 inline_functions_wrapper);
1827 }
1828
1829 void firm_init_inline(void)
1830 {
1831         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
1832 }