5c6f7116be6785d86bc5b20e97b0bc1650907852
[libfirm] / ir / opt / opt_inline.c
1 /*
2  * Copyright (C) 1995-2011 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief    Dead node elimination and Procedure Inlining.
23  * @author   Michael Beck, Goetz Lindenmaier
24  * @version  $Id$
25  */
26 #include "config.h"
27
28 #include <limits.h>
29 #include <stdbool.h>
30 #include <assert.h>
31
32 #include "irnode_t.h"
33 #include "irgraph_t.h"
34 #include "irprog_t.h"
35
36 #include "iroptimize.h"
37 #include "ircons_t.h"
38 #include "iropt_t.h"
39 #include "irgopt.h"
40 #include "irgmod.h"
41 #include "irgwalk.h"
42
43 #include "array_t.h"
44 #include "list.h"
45 #include "pset.h"
46 #include "pmap.h"
47 #include "pdeq.h"
48 #include "xmalloc.h"
49 #include "pqueue.h"
50
51 #include "irouts.h"
52 #include "irloop_t.h"
53 #include "irbackedge_t.h"
54 #include "opt_init.h"
55 #include "cgana.h"
56 #include "trouts.h"
57 #include "error.h"
58
59 #include "analyze_irg_args.h"
60 #include "iredges_t.h"
61 #include "irflag_t.h"
62 #include "irhooks.h"
63 #include "irtools.h"
64 #include "iropt_dbg.h"
65 #include "irpass_t.h"
66 #include "irphase_t.h"
67
68 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
69
70 /*------------------------------------------------------------------*/
71 /* Routines for dead node elimination / copying garbage collection  */
72 /* of the obstack.                                                  */
73 /*------------------------------------------------------------------*/
74
75 /**
76  * Remember the new node in the old node by using a field all nodes have.
77  */
78 static void set_new_node(ir_node *node, ir_node *new_node)
79 {
80         set_irn_link(node, new_node);
81 }
82
83 /**
84  * Get this new node, before the old node is forgotten.
85  */
86 static inline ir_node *get_new_node(ir_node *old_node)
87 {
88         assert(irn_visited(old_node));
89         return (ir_node*) get_irn_link(old_node);
90 }
91
92 /*--------------------------------------------------------------------*/
93 /*  Functionality for inlining                                         */
94 /*--------------------------------------------------------------------*/
95
96 /**
97  * Copy node for inlineing.  Updates attributes that change when
98  * inlineing but not for dead node elimination.
99  *
100  * Copies the node by calling copy_node() and then updates the entity if
101  * it's a local one.  env must be a pointer of the frame type of the
102  * inlined procedure. The new entities must be in the link field of
103  * the entities.
104  */
105 static void copy_node_inline(ir_node *node, void *env)
106 {
107         ir_graph *new_irg  = (ir_graph*) env;
108         ir_node  *new_node = irn_copy_into_irg(node, new_irg);
109
110         set_new_node(node, new_node);
111         if (is_Sel(node)) {
112                 ir_graph  *old_irg        = get_irn_irg(node);
113                 ir_type   *old_frame_type = get_irg_frame_type(old_irg);
114                 ir_entity *old_entity     = get_Sel_entity(node);
115                 assert(is_Sel(new_node));
116                 /* use copied entities from the new frame */
117                 if (get_entity_owner(old_entity) == old_frame_type) {
118                         ir_entity *new_entity = (ir_entity*)get_entity_link(old_entity);
119                         assert(new_entity != NULL);
120                         set_Sel_entity(new_node, new_entity);
121                 }
122         } else if (is_Block(new_node)) {
123                 new_node->attr.block.irg.irg = new_irg;
124         }
125 }
126
127 static void set_preds_inline(ir_node *node, void *env)
128 {
129         ir_node *new_node;
130
131         irn_rewire_inputs(node);
132
133         /* move constants into start block */
134         new_node = get_new_node(node);
135         if (is_irn_constlike(new_node)) {
136                 ir_graph *new_irg     = (ir_graph *) env;
137                 ir_node  *start_block = get_irg_start_block(new_irg);
138                 set_nodes_block(new_node, start_block);
139         }
140 }
141
142 /**
143  * Walker: checks if P_value_arg_base is used.
144  */
145 static void find_addr(ir_node *node, void *env)
146 {
147         bool *allow_inline = (bool*)env;
148
149         if (is_Sel(node)) {
150                 ir_graph *irg = current_ir_graph;
151                 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
152                         /* access to frame */
153                         ir_entity *ent = get_Sel_entity(node);
154                         if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
155                                 /* access to value_type */
156                                 *allow_inline = false;
157                         }
158                 }
159         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
160                 /* From GCC:
161                  * Refuse to inline alloca call unless user explicitly forced so as this
162                  * may change program's memory overhead drastically when the function
163                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
164                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
165                  *
166                  * Sorrily this is true with our implementation also.
167                  * Moreover, we cannot differentiate between alloca() and VLA yet, so
168                  * this disables inlining of functions using VLA (which are completely
169                  * save).
170                  *
171                  * 2 Solutions:
172                  * - add a flag to the Alloc node for "real" alloca() calls
173                  * - add a new Stack-Restore node at the end of a function using
174                  *   alloca()
175                  */
176                 *allow_inline = false;
177         }
178 }
179
180 /**
181  * Check if we can inline a given call.
182  * Currently, we cannot inline two cases:
183  * - call with compound arguments
184  * - graphs that take the address of a parameter
185  *
186  * check these conditions here
187  */
188 static bool can_inline(ir_node *call, ir_graph *called_graph)
189 {
190         ir_entity          *called      = get_irg_entity(called_graph);
191         ir_type            *called_type = get_entity_type(called);
192         ir_type            *call_type   = get_Call_type(call);
193         size_t              n_params    = get_method_n_params(called_type);
194         size_t              n_arguments = get_method_n_params(call_type);
195         size_t              n_res       = get_method_n_ress(called_type);
196         irg_inline_property prop        = get_irg_inline_property(called_graph);
197         size_t              i;
198         bool                res;
199
200         if (prop == irg_inline_forbidden)
201                 return false;
202
203         if (n_arguments != n_params) {
204                 /* this is a bad feature of C: without a prototype, we can
205                  * call a function with less parameters than needed. Currently
206                  * we don't support this, although we could use Unknown than. */
207                 return false;
208         }
209         if (n_res != get_method_n_ress(call_type)) {
210                 return false;
211         }
212
213         /* Argh, compiling C has some bad consequences:
214          * It is implementation dependent what happens in that case.
215          * We support inlining, if the bitsize of the types matches AND
216          * the same arithmetic is used. */
217         for (i = 0; i < n_params; ++i) {
218                 ir_type *param_tp = get_method_param_type(called_type, i);
219                 ir_type *arg_tp   = get_method_param_type(call_type, i);
220
221                 if (param_tp != arg_tp) {
222                         ir_mode *pmode = get_type_mode(param_tp);
223                         ir_mode *amode = get_type_mode(arg_tp);
224
225                         if (pmode == NULL || amode == NULL)
226                                 return false;
227                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
228                                 return false;
229                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
230                                 return false;
231                         /* otherwise we can simply "reinterpret" the bits */
232                 }
233         }
234         for (i = 0; i < n_res; ++i) {
235                 ir_type *decl_res_tp = get_method_res_type(called_type, i);
236                 ir_type *used_res_tp = get_method_res_type(call_type, i);
237
238                 if (decl_res_tp != used_res_tp) {
239                         ir_mode *decl_mode = get_type_mode(decl_res_tp);
240                         ir_mode *used_mode = get_type_mode(used_res_tp);
241                         if (decl_mode == NULL || used_mode == NULL)
242                                 return false;
243                         if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
244                                 return false;
245                         if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
246                                 return false;
247                         /* otherwise we can "reinterpret" the bits */
248                 }
249         }
250
251         /* check parameters for compound arguments */
252         for (i = 0; i < n_params; ++i) {
253                 ir_type *p_type = get_method_param_type(call_type, i);
254
255                 if (is_compound_type(p_type))
256                         return false;
257         }
258
259         /* check results for compound arguments */
260         for (i = 0; i < n_res; ++i) {
261                 ir_type *r_type = get_method_res_type(call_type, i);
262
263                 if (is_compound_type(r_type))
264                         return false;
265         }
266
267         res = true;
268         irg_walk_graph(called_graph, find_addr, NULL, &res);
269
270         return res;
271 }
272
273 enum exc_mode {
274         exc_handler,    /**< There is a handler. */
275         exc_no_handler  /**< Exception handling not represented. */
276 };
277
278 /**
279  * copy all entities on the stack frame on 1 irg to the stackframe of another.
280  * Sets entity links of the old entities to the copies
281  */
282 static void copy_frame_entities(ir_graph *from, ir_graph *to)
283 {
284         ir_type *from_frame = get_irg_frame_type(from);
285         ir_type *to_frame   = get_irg_frame_type(to);
286         size_t   n_members  = get_class_n_members(from_frame);
287         size_t   i;
288         assert(from_frame != to_frame);
289
290         for (i = 0; i < n_members; ++i) {
291                 ir_entity *old_ent = get_class_member(from_frame, i);
292                 ir_entity *new_ent = copy_entity_own(old_ent, to_frame);
293                 set_entity_link(old_ent, new_ent);
294         }
295 }
296
297 /* Inlines a method at the given call site. */
298 int inline_method(ir_node *call, ir_graph *called_graph)
299 {
300         ir_node       *pre_call;
301         ir_node       *post_call, *post_bl;
302         ir_node       *in[pn_Start_max];
303         ir_node       *end, *end_bl, *block;
304         ir_node       **res_pred;
305         ir_node       **cf_pred;
306         ir_node       **args_in;
307         ir_node       *ret, *phi;
308         int           arity, n_ret, n_exc, n_res, i, j, rem_opt;
309         int           irn_arity, n_params;
310         int           n_mem_phi;
311         enum exc_mode exc_handling;
312         ir_type       *mtp;
313         ir_type       *ctp;
314         ir_entity     *ent;
315         ir_graph      *rem;
316         ir_graph      *irg = get_irn_irg(call);
317
318         /* we cannot inline some types of calls */
319         if (! can_inline(call, called_graph))
320                 return 0;
321
322         /* We cannot inline a recursive call. The graph must be copied before
323          * the call the inline_method() using create_irg_copy(). */
324         if (called_graph == irg)
325                 return 0;
326
327         ent      = get_irg_entity(called_graph);
328         mtp      = get_entity_type(ent);
329         ctp      = get_Call_type(call);
330         n_params = get_method_n_params(mtp);
331         n_res    = get_method_n_ress(mtp);
332
333         rem = current_ir_graph;
334         current_ir_graph = irg;
335
336         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
337
338         /* optimizations can cause problems when allocating new nodes */
339         rem_opt = get_opt_optimize();
340         set_optimize(0);
341
342         /* Handle graph state */
343         assert(get_irg_phase_state(irg) != phase_building);
344         assert(get_irg_pinned(irg) == op_pin_state_pinned);
345         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
346         set_irg_outs_inconsistent(irg);
347         set_irg_extblk_inconsistent(irg);
348         set_irg_doms_inconsistent(irg);
349         set_irg_loopinfo_inconsistent(irg);
350         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
351         set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
352         edges_deactivate(irg);
353
354         /* here we know we WILL inline, so inform the statistics */
355         hook_inline(call, called_graph);
356
357         /* -- Decide how to handle exception control flow: Is there a handler
358            for the Call node, or do we branch directly to End on an exception?
359            exc_handling:
360            0 There is a handler.
361            2 Exception handling not represented in Firm. -- */
362         {
363                 ir_node *Xproj = NULL;
364                 ir_node *proj;
365                 for (proj = (ir_node*)get_irn_link(call); proj != NULL;
366                      proj = (ir_node*)get_irn_link(proj)) {
367                         long proj_nr = get_Proj_proj(proj);
368                         if (proj_nr == pn_Call_X_except) Xproj = proj;
369                 }
370                 exc_handling = Xproj != NULL ? exc_handler : exc_no_handler;
371         }
372
373         /* create the argument tuple */
374         args_in = ALLOCAN(ir_node*, n_params);
375
376         block = get_nodes_block(call);
377         for (i = n_params - 1; i >= 0; --i) {
378                 ir_node *arg      = get_Call_param(call, i);
379                 ir_type *param_tp = get_method_param_type(mtp, i);
380                 ir_mode *mode     = get_type_mode(param_tp);
381
382                 if (mode != get_irn_mode(arg)) {
383                         arg = new_r_Conv(block, arg, mode);
384                 }
385                 args_in[i] = arg;
386         }
387
388         /* the procedure and later replaces the Start node of the called graph.
389          * Post_call is the old Call node and collects the results of the called
390          * graph. Both will end up being a tuple. */
391         post_bl = get_nodes_block(call);
392         /* XxMxPxPxPxT of Start + parameter of Call */
393         in[pn_Start_M]              = get_Call_mem(call);
394         in[pn_Start_X_initial_exec] = new_r_Jmp(post_bl);
395         in[pn_Start_P_frame_base]   = get_irg_frame(irg);
396         in[pn_Start_T_args]         = new_r_Tuple(post_bl, n_params, args_in);
397         pre_call = new_r_Tuple(post_bl, pn_Start_max, in);
398         post_call = call;
399
400         /* --
401            The new block gets the ins of the old block, pre_call and all its
402            predecessors and all Phi nodes. -- */
403         part_block(pre_call);
404
405         /* increment visited flag for later walk */
406         inc_irg_visited(called_graph);
407
408         /* link some nodes to nodes in the current graph so instead of copying
409          * the linked nodes will get used.
410          * So the copier will use the created Tuple instead of copying the start
411          * node, similar for singleton nodes like NoMem and Bad.
412          * Note: this will prohibit predecessors to be copied - only do it for
413          *       nodes without predecessors */
414         {
415                 ir_node *start_block;
416                 ir_node *start;
417                 ir_node *nomem;
418
419                 start_block = get_irg_start_block(called_graph);
420                 set_new_node(start_block, get_nodes_block(pre_call));
421                 mark_irn_visited(start_block);
422
423                 start = get_irg_start(called_graph);
424                 set_new_node(start, pre_call);
425                 mark_irn_visited(start);
426
427                 nomem = get_irg_no_mem(called_graph);
428                 set_new_node(nomem, get_irg_no_mem(irg));
429                 mark_irn_visited(nomem);
430         }
431
432         /* entitiy link is used to link entities on old stackframe to the
433          * new stackframe */
434         irp_reserve_resources(irp, IR_RESOURCE_ENTITY_LINK);
435
436         /* copy entities and nodes */
437         assert(!irn_visited(get_irg_end(called_graph)));
438         copy_frame_entities(called_graph, irg);
439         irg_walk_core(get_irg_end(called_graph), copy_node_inline, set_preds_inline,
440                       irg);
441
442         irp_free_resources(irp, IR_RESOURCE_ENTITY_LINK);
443
444         /* -- Merge the end of the inlined procedure with the call site -- */
445         /* We will turn the old Call node into a Tuple with the following
446            predecessors:
447            -1:  Block of Tuple.
448            0: Phi of all Memories of Return statements.
449            1: Jmp from new Block that merges the control flow from all exception
450            predecessors of the old end block.
451            2: Tuple of all arguments.
452            3: Phi of Exception memories.
453            In case the old Call directly branches to End on an exception we don't
454            need the block merging all exceptions nor the Phi of the exception
455            memories.
456         */
457
458         /* Precompute some values */
459         end_bl = get_new_node(get_irg_end_block(called_graph));
460         end    = get_new_node(get_irg_end(called_graph));
461         arity  = get_Block_n_cfgpreds(end_bl);    /* arity = n_exc + n_ret  */
462         n_res  = get_method_n_ress(get_Call_type(call));
463
464         res_pred = XMALLOCN(ir_node*, n_res);
465         cf_pred  = XMALLOCN(ir_node*, arity);
466
467         /* archive keepalives */
468         irn_arity = get_irn_arity(end);
469         for (i = 0; i < irn_arity; i++) {
470                 ir_node *ka = get_End_keepalive(end, i);
471                 if (! is_Bad(ka))
472                         add_End_keepalive(get_irg_end(irg), ka);
473         }
474
475         /* replace Return nodes by Jump nodes */
476         n_ret = 0;
477         for (i = 0; i < arity; i++) {
478                 ir_node *ret;
479                 ret = get_Block_cfgpred(end_bl, i);
480                 if (is_Return(ret)) {
481                         ir_node *block = get_nodes_block(ret);
482                         cf_pred[n_ret] = new_r_Jmp(block);
483                         n_ret++;
484                 }
485         }
486         set_irn_in(post_bl, n_ret, cf_pred);
487
488         /* build a Tuple for all results of the method.
489          * add Phi node if there was more than one Return. */
490         turn_into_tuple(post_call, pn_Call_max);
491         /* First the Memory-Phi */
492         n_mem_phi = 0;
493         for (i = 0; i < arity; i++) {
494                 ret = get_Block_cfgpred(end_bl, i);
495                 if (is_Return(ret)) {
496                         cf_pred[n_mem_phi++] = get_Return_mem(ret);
497                 }
498                 /* memory output for some exceptions is directly connected to End */
499                 if (is_Call(ret)) {
500                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 3);
501                 } else if (is_fragile_op(ret)) {
502                         /* We rely that all cfops have the memory output at the same position. */
503                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 0);
504                 } else if (is_Raise(ret)) {
505                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 1);
506                 }
507         }
508         phi = new_r_Phi(post_bl, n_mem_phi, cf_pred, mode_M);
509         set_Tuple_pred(call, pn_Call_M, phi);
510         /* Conserve Phi-list for further inlinings -- but might be optimized */
511         if (get_nodes_block(phi) == post_bl) {
512                 set_irn_link(phi, get_irn_link(post_bl));
513                 set_irn_link(post_bl, phi);
514         }
515         /* Now the real results */
516         if (n_res > 0) {
517                 ir_node *result_tuple;
518                 for (j = 0; j < n_res; j++) {
519                         ir_type *res_type = get_method_res_type(ctp, j);
520                         ir_mode *res_mode = get_type_mode(res_type);
521                         n_ret = 0;
522                         for (i = 0; i < arity; i++) {
523                                 ret = get_Block_cfgpred(end_bl, i);
524                                 if (is_Return(ret)) {
525                                         ir_node *res = get_Return_res(ret, j);
526                                         if (get_irn_mode(res) != res_mode) {
527                                                 ir_node *block = get_nodes_block(res);
528                                                 res = new_r_Conv(block, res, res_mode);
529                                         }
530                                         cf_pred[n_ret] = res;
531                                         n_ret++;
532                                 }
533                         }
534                         if (n_ret > 0) {
535                                 phi = new_r_Phi(post_bl, n_ret, cf_pred, res_mode);
536                         } else {
537                                 phi = new_r_Bad(irg, res_mode);
538                         }
539                         res_pred[j] = phi;
540                         /* Conserve Phi-list for further inlinings -- but might be optimized */
541                         if (get_nodes_block(phi) == post_bl) {
542                                 set_Phi_next(phi, get_Block_phis(post_bl));
543                                 set_Block_phis(post_bl, phi);
544                         }
545                 }
546                 result_tuple = new_r_Tuple(post_bl, n_res, res_pred);
547                 set_Tuple_pred(call, pn_Call_T_result, result_tuple);
548         } else {
549                 set_Tuple_pred(call, pn_Call_T_result, new_r_Bad(irg, mode_T));
550         }
551         /* handle the regular call */
552         set_Tuple_pred(call, pn_Call_X_regular, new_r_Jmp(post_bl));
553
554         /* Finally the exception control flow.
555            We have two possible situations:
556            First if the Call branches to an exception handler:
557            We need to add a Phi node to
558            collect the memory containing the exception objects.  Further we need
559            to add another block to get a correct representation of this Phi.  To
560            this block we add a Jmp that resolves into the X output of the Call
561            when the Call is turned into a tuple.
562            Second: There is no exception edge. Just add all inlined exception
563            branches to the End node.
564          */
565         if (exc_handling == exc_handler) {
566                 n_exc = 0;
567                 for (i = 0; i < arity; i++) {
568                         ir_node *ret, *irn;
569                         ret = get_Block_cfgpred(end_bl, i);
570                         irn = skip_Proj(ret);
571                         if (is_fragile_op(irn) || is_Raise(irn)) {
572                                 cf_pred[n_exc] = ret;
573                                 ++n_exc;
574                         }
575                 }
576                 if (n_exc > 0) {
577                         if (n_exc == 1) {
578                                 /* simple fix */
579                                 set_Tuple_pred(call, pn_Call_X_except, cf_pred[0]);
580                         } else {
581                                 ir_node *block = new_r_Block(irg, n_exc, cf_pred);
582                                 set_Tuple_pred(call, pn_Call_X_except, new_r_Jmp(block));
583                         }
584                 } else {
585                         set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
586                 }
587         } else {
588                 ir_node *main_end_bl;
589                 int main_end_bl_arity;
590                 ir_node **end_preds;
591
592                 /* assert(exc_handling == 1 || no exceptions. ) */
593                 n_exc = 0;
594                 for (i = 0; i < arity; i++) {
595                         ir_node *ret = get_Block_cfgpred(end_bl, i);
596                         ir_node *irn = skip_Proj(ret);
597
598                         if (is_fragile_op(irn) || is_Raise(irn)) {
599                                 cf_pred[n_exc] = ret;
600                                 n_exc++;
601                         }
602                 }
603                 main_end_bl       = get_irg_end_block(irg);
604                 main_end_bl_arity = get_irn_arity(main_end_bl);
605                 end_preds         = XMALLOCN(ir_node*, n_exc + main_end_bl_arity);
606
607                 for (i = 0; i < main_end_bl_arity; ++i)
608                         end_preds[i] = get_irn_n(main_end_bl, i);
609                 for (i = 0; i < n_exc; ++i)
610                         end_preds[main_end_bl_arity + i] = cf_pred[i];
611                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
612                 set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
613                 free(end_preds);
614         }
615         free(res_pred);
616         free(cf_pred);
617
618         /* --  Turn CSE back on. -- */
619         set_optimize(rem_opt);
620         current_ir_graph = rem;
621
622         return 1;
623 }
624
625 /********************************************************************/
626 /* Apply inlining to small methods.                                 */
627 /********************************************************************/
628
629 static struct obstack  temp_obst;
630
631 /** Represents a possible inlinable call in a graph. */
632 typedef struct call_entry {
633         ir_node    *call;       /**< The Call node. */
634         ir_graph   *callee;     /**< The callee IR-graph. */
635         list_head  list;        /**< List head for linking the next one. */
636         int        loop_depth;  /**< The loop depth of this call. */
637         int        benefice;    /**< The calculated benefice of this call. */
638         unsigned   local_adr:1; /**< Set if this call gets an address of a local variable. */
639         unsigned   all_const:1; /**< Set if this call has only constant parameters. */
640 } call_entry;
641
642 /**
643  * environment for inlining small irgs
644  */
645 typedef struct inline_env_t {
646         struct obstack obst;  /**< An obstack where call_entries are allocated on. */
647         list_head      calls; /**< The call entry list. */
648 } inline_env_t;
649
650 /**
651  * Returns the irg called from a Call node. If the irg is not
652  * known, NULL is returned.
653  *
654  * @param call  the call node
655  */
656 static ir_graph *get_call_called_irg(ir_node *call)
657 {
658         ir_node *addr;
659
660         addr = get_Call_ptr(call);
661         if (is_Global(addr)) {
662                 ir_entity *ent = get_Global_entity(addr);
663                 /* we don't know which function gets finally bound to a weak symbol */
664                 if (get_entity_linkage(ent) & IR_LINKAGE_WEAK)
665                         return NULL;
666
667                 return get_entity_irg(ent);
668         }
669
670         return NULL;
671 }
672
673 /**
674  * Walker: Collect all calls to known graphs inside a graph.
675  */
676 static void collect_calls(ir_node *call, void *env)
677 {
678         (void) env;
679         if (is_Call(call)) {
680                 ir_graph *called_irg = get_call_called_irg(call);
681
682                 if (called_irg != NULL) {
683                         /* The Call node calls a locally defined method.  Remember to inline. */
684                         inline_env_t *ienv  = (inline_env_t*)env;
685                         call_entry   *entry = OALLOC(&ienv->obst, call_entry);
686                         entry->call       = call;
687                         entry->callee     = called_irg;
688                         entry->loop_depth = 0;
689                         entry->benefice   = 0;
690                         entry->local_adr  = 0;
691                         entry->all_const  = 0;
692
693                         list_add_tail(&entry->list, &ienv->calls);
694                 }
695         }
696 }
697
698 /**
699  * Inlines all small methods at call sites where the called address comes
700  * from a Const node that references the entity representing the called
701  * method.
702  * The size argument is a rough measure for the code size of the method:
703  * Methods where the obstack containing the firm graph is smaller than
704  * size are inlined.
705  */
706 void inline_small_irgs(ir_graph *irg, int size)
707 {
708         ir_graph *rem = current_ir_graph;
709         inline_env_t env;
710         call_entry *entry;
711
712         current_ir_graph = irg;
713         /* Handle graph state */
714         assert(get_irg_phase_state(irg) != phase_building);
715         free_callee_info(irg);
716
717         /* Find Call nodes to inline.
718            (We can not inline during a walk of the graph, as inlining the same
719            method several times changes the visited flag of the walked graph:
720            after the first inlining visited of the callee equals visited of
721            the caller.  With the next inlining both are increased.) */
722         obstack_init(&env.obst);
723         INIT_LIST_HEAD(&env.calls);
724         irg_walk_graph(irg, NULL, collect_calls, &env);
725
726         if (! list_empty(&env.calls)) {
727                 /* There are calls to inline */
728                 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
729                 collect_phiprojs(irg);
730
731                 list_for_each_entry(call_entry, entry, &env.calls, list) {
732                         ir_graph            *callee = entry->callee;
733                         irg_inline_property prop    = get_irg_inline_property(callee);
734
735                         if (prop == irg_inline_forbidden) {
736                                 continue;
737                         }
738
739                         if (prop >= irg_inline_forced ||
740                             _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
741                                 inline_method(entry->call, callee);
742                         }
743                 }
744                 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
745         }
746         obstack_free(&env.obst, NULL);
747         current_ir_graph = rem;
748 }
749
750 typedef struct inline_small_irgs_pass_t {
751         ir_graph_pass_t pass;
752         int            size;
753 } inline_small_irgs_pass_t;
754
755 /**
756  * Wrapper to run inline_small_irgs() as a pass.
757  */
758 static int inline_small_irgs_wrapper(ir_graph *irg, void *context)
759 {
760         inline_small_irgs_pass_t *pass = (inline_small_irgs_pass_t*)context;
761
762         inline_small_irgs(irg, pass->size);
763         return 0;
764 }
765
766 /* create a pass for inline_small_irgs() */
767 ir_graph_pass_t *inline_small_irgs_pass(const char *name, int size)
768 {
769         inline_small_irgs_pass_t *pass = XMALLOCZ(inline_small_irgs_pass_t);
770
771         pass->size = size;
772         return def_graph_pass_constructor(
773                 &pass->pass, name ? name : "inline_small_irgs", inline_small_irgs_wrapper);
774 }
775
776 /**
777  * Environment for inlining irgs.
778  */
779 typedef struct {
780         list_head calls;             /**< List of of all call nodes in this graph. */
781         unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
782         unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
783         unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
784         unsigned  n_nodes_orig;      /**< for statistics */
785         unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
786         unsigned  n_call_nodes_orig; /**< for statistics */
787         unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
788         unsigned  n_callers_orig;    /**< for statistics */
789         unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
790         unsigned  recursive:1;       /**< Set, if this function is self recursive. */
791 } inline_irg_env;
792
793 /**
794  * Allocate a new environment for inlining.
795  */
796 static inline_irg_env *alloc_inline_irg_env(void)
797 {
798         inline_irg_env *env    = OALLOC(&temp_obst, inline_irg_env);
799         INIT_LIST_HEAD(&env->calls);
800         env->local_weights     = NULL;
801         env->n_nodes           = -2; /* do not count count Start, End */
802         env->n_blocks          = -2; /* do not count count Start, End Block */
803         env->n_nodes_orig      = -2; /* do not count Start, End */
804         env->n_call_nodes      = 0;
805         env->n_call_nodes_orig = 0;
806         env->n_callers         = 0;
807         env->n_callers_orig    = 0;
808         env->got_inline        = 0;
809         env->recursive         = 0;
810         return env;
811 }
812
813 typedef struct walker_env {
814         inline_irg_env *x;     /**< the inline environment */
815         char ignore_runtime;   /**< the ignore runtime flag */
816         char ignore_callers;   /**< if set, do change callers data */
817 } wenv_t;
818
819 /**
820  * post-walker: collect all calls in the inline-environment
821  * of a graph and sum some statistics.
822  */
823 static void collect_calls2(ir_node *call, void *ctx)
824 {
825         wenv_t         *env = (wenv_t*)ctx;
826         inline_irg_env *x = env->x;
827         unsigned        code = get_irn_opcode(call);
828         ir_graph       *callee;
829         call_entry     *entry;
830
831         /* count meaningful nodes in irg */
832         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
833                 if (code != iro_Block) {
834                         ++x->n_nodes;
835                         ++x->n_nodes_orig;
836                 } else {
837                         ++x->n_blocks;
838                 }
839         }
840
841         if (code != iro_Call) return;
842
843         /* check, if it's a runtime call */
844         if (env->ignore_runtime) {
845                 ir_node *symc = get_Call_ptr(call);
846
847                 if (is_Global(symc)) {
848                         ir_entity *ent = get_Global_entity(symc);
849
850                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
851                                 return;
852                 }
853         }
854
855         /* collect all call nodes */
856         ++x->n_call_nodes;
857         ++x->n_call_nodes_orig;
858
859         callee = get_call_called_irg(call);
860         if (callee != NULL) {
861                 if (! env->ignore_callers) {
862                         inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
863                         /* count all static callers */
864                         ++callee_env->n_callers;
865                         ++callee_env->n_callers_orig;
866                 }
867                 if (callee == current_ir_graph)
868                         x->recursive = 1;
869
870                 /* link it in the list of possible inlinable entries */
871                 entry = OALLOC(&temp_obst, call_entry);
872                 entry->call       = call;
873                 entry->callee     = callee;
874                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
875                 entry->benefice   = 0;
876                 entry->local_adr  = 0;
877                 entry->all_const  = 0;
878
879                 list_add_tail(&entry->list, &x->calls);
880         }
881 }
882
883 /**
884  * Returns TRUE if the number of callers is 0 in the irg's environment,
885  * hence this irg is a leave.
886  */
887 inline static int is_leave(ir_graph *irg)
888 {
889         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
890         return env->n_call_nodes == 0;
891 }
892
893 /**
894  * Returns TRUE if the number of nodes in the callee is
895  * smaller then size in the irg's environment.
896  */
897 inline static int is_smaller(ir_graph *callee, unsigned size)
898 {
899         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
900         return env->n_nodes < size;
901 }
902
903 /**
904  * Duplicate a call entry.
905  *
906  * @param entry     the original entry to duplicate
907  * @param new_call  the new call node
908  * @param loop_depth_delta
909  *                  delta value for the loop depth
910  */
911 static call_entry *duplicate_call_entry(const call_entry *entry,
912                                         ir_node *new_call, int loop_depth_delta)
913 {
914         call_entry *nentry = OALLOC(&temp_obst, call_entry);
915         nentry->call       = new_call;
916         nentry->callee     = entry->callee;
917         nentry->benefice   = entry->benefice;
918         nentry->loop_depth = entry->loop_depth + loop_depth_delta;
919         nentry->local_adr  = entry->local_adr;
920         nentry->all_const  = entry->all_const;
921
922         return nentry;
923 }
924
925 /**
926  * Append all call nodes of the source environment to the nodes of in the destination
927  * environment.
928  *
929  * @param dst         destination environment
930  * @param src         source environment
931  * @param loop_depth  the loop depth of the call that is replaced by the src list
932  */
933 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth)
934 {
935         call_entry *entry, *nentry;
936
937         /* Note that the src list points to Call nodes in the inlined graph, but
938            we need Call nodes in our graph. Luckily the inliner leaves this information
939            in the link field. */
940         list_for_each_entry(call_entry, entry, &src->calls, list) {
941                 nentry = duplicate_call_entry(entry, (ir_node*)get_irn_link(entry->call), loop_depth);
942                 list_add_tail(&nentry->list, &dst->calls);
943         }
944         dst->n_call_nodes += src->n_call_nodes;
945         dst->n_nodes      += src->n_nodes;
946 }
947
948 /*
949  * Inlines small leave methods at call sites where the called address comes
950  * from a Const node that references the entity representing the called
951  * method.
952  * The size argument is a rough measure for the code size of the method:
953  * Methods where the obstack containing the firm graph is smaller than
954  * size are inlined.
955  */
956 void inline_leave_functions(unsigned maxsize, unsigned leavesize,
957                             unsigned size, int ignore_runtime)
958 {
959         inline_irg_env   *env;
960         ir_graph         *irg;
961         size_t           i, n_irgs;
962         ir_graph         *rem;
963         int              did_inline;
964         wenv_t           wenv;
965         call_entry       *entry, *next;
966         const call_entry *centry;
967         pmap             *copied_graphs;
968         pmap_entry       *pm_entry;
969
970         rem = current_ir_graph;
971         obstack_init(&temp_obst);
972
973         /* a map for the copied graphs, used to inline recursive calls */
974         copied_graphs = pmap_create();
975
976         /* extend all irgs by a temporary data structure for inlining. */
977         n_irgs = get_irp_n_irgs();
978         for (i = 0; i < n_irgs; ++i)
979                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
980
981         /* Pre-compute information in temporary data structure. */
982         wenv.ignore_runtime = ignore_runtime;
983         wenv.ignore_callers = 0;
984         for (i = 0; i < n_irgs; ++i) {
985                 ir_graph *irg = get_irp_irg(i);
986
987                 assert(get_irg_phase_state(irg) != phase_building);
988                 free_callee_info(irg);
989
990                 assure_cf_loop(irg);
991                 wenv.x = (inline_irg_env*)get_irg_link(irg);
992                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
993         }
994
995         /* -- and now inline. -- */
996
997         /* Inline leaves recursively -- we might construct new leaves. */
998         do {
999                 did_inline = 0;
1000
1001                 for (i = 0; i < n_irgs; ++i) {
1002                         ir_node *call;
1003                         int phiproj_computed = 0;
1004
1005                         current_ir_graph = get_irp_irg(i);
1006                         env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1007
1008                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1009                         list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1010                                 ir_graph            *callee;
1011                                 irg_inline_property  prop;
1012
1013                                 if (env->n_nodes > maxsize)
1014                                         break;
1015
1016                                 call   = entry->call;
1017                                 callee = entry->callee;
1018
1019                                 prop = get_irg_inline_property(callee);
1020                                 if (prop == irg_inline_forbidden) {
1021                                         continue;
1022                                 }
1023
1024                                 if (is_leave(callee) && (
1025                                     is_smaller(callee, leavesize) || prop >= irg_inline_forced)) {
1026                                         if (!phiproj_computed) {
1027                                                 phiproj_computed = 1;
1028                                                 collect_phiprojs(current_ir_graph);
1029                                         }
1030                                         did_inline = inline_method(call, callee);
1031
1032                                         if (did_inline) {
1033                                                 inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
1034
1035                                                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1036                                                 phiproj_computed = 0;
1037
1038                                                 /* Do some statistics */
1039                                                 env->got_inline = 1;
1040                                                 --env->n_call_nodes;
1041                                                 env->n_nodes += callee_env->n_nodes;
1042                                                 --callee_env->n_callers;
1043
1044                                                 /* remove this call from the list */
1045                                                 list_del(&entry->list);
1046                                                 continue;
1047                                         }
1048                                 }
1049                         }
1050                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1051                 }
1052         } while (did_inline);
1053
1054         /* inline other small functions. */
1055         for (i = 0; i < n_irgs; ++i) {
1056                 ir_node *call;
1057                 int phiproj_computed = 0;
1058
1059                 current_ir_graph = get_irp_irg(i);
1060                 env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1061
1062                 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1063
1064                 /* note that the list of possible calls is updated during the process */
1065                 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1066                         irg_inline_property prop;
1067                         ir_graph            *callee;
1068                         pmap_entry          *e;
1069
1070                         call   = entry->call;
1071                         callee = entry->callee;
1072
1073                         prop = get_irg_inline_property(callee);
1074                         if (prop == irg_inline_forbidden) {
1075                                 continue;
1076                         }
1077
1078                         e = pmap_find(copied_graphs, callee);
1079                         if (e != NULL) {
1080                                 /*
1081                                  * Remap callee if we have a copy.
1082                                  * FIXME: Should we do this only for recursive Calls ?
1083                                  */
1084                                 callee = (ir_graph*)e->value;
1085                         }
1086
1087                         if (prop >= irg_inline_forced ||
1088                             (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1089                                 if (current_ir_graph == callee) {
1090                                         /*
1091                                          * Recursive call: we cannot directly inline because we cannot walk
1092                                          * the graph and change it. So we have to make a copy of the graph
1093                                          * first.
1094                                          */
1095
1096                                         inline_irg_env *callee_env;
1097                                         ir_graph       *copy;
1098
1099                                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1100
1101                                         /*
1102                                          * No copy yet, create one.
1103                                          * Note that recursive methods are never leaves, so it is sufficient
1104                                          * to test this condition here.
1105                                          */
1106                                         copy = create_irg_copy(callee);
1107
1108                                         /* create_irg_copy() destroys the Proj links, recompute them */
1109                                         phiproj_computed = 0;
1110
1111                                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1112
1113                                         /* allocate new environment */
1114                                         callee_env = alloc_inline_irg_env();
1115                                         set_irg_link(copy, callee_env);
1116
1117                                         assure_cf_loop(copy);
1118                                         wenv.x              = callee_env;
1119                                         wenv.ignore_callers = 1;
1120                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1121
1122                                         /*
1123                                          * Enter the entity of the original graph. This is needed
1124                                          * for inline_method(). However, note that ent->irg still points
1125                                          * to callee, NOT to copy.
1126                                          */
1127                                         set_irg_entity(copy, get_irg_entity(callee));
1128
1129                                         pmap_insert(copied_graphs, callee, copy);
1130                                         callee = copy;
1131
1132                                         /* we have only one caller: the original graph */
1133                                         callee_env->n_callers      = 1;
1134                                         callee_env->n_callers_orig = 1;
1135                                 }
1136                                 if (! phiproj_computed) {
1137                                         phiproj_computed = 1;
1138                                         collect_phiprojs(current_ir_graph);
1139                                 }
1140                                 did_inline = inline_method(call, callee);
1141                                 if (did_inline) {
1142                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1143
1144                                         /* call was inlined, Phi/Projs for current graph must be recomputed */
1145                                         phiproj_computed = 0;
1146
1147                                         /* callee was inline. Append its call list. */
1148                                         env->got_inline = 1;
1149                                         --env->n_call_nodes;
1150                                         append_call_list(env, callee_env, entry->loop_depth);
1151                                         --callee_env->n_callers;
1152
1153                                         /* after we have inlined callee, all called methods inside callee
1154                                            are now called once more */
1155                                         list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1156                                                 inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1157                                                 ++penv->n_callers;
1158                                         }
1159
1160                                         /* remove this call from the list */
1161                                         list_del(&entry->list);
1162                                         continue;
1163                                 }
1164                         }
1165                 }
1166                 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1167         }
1168
1169         for (i = 0; i < n_irgs; ++i) {
1170                 irg = get_irp_irg(i);
1171                 env = (inline_irg_env*)get_irg_link(irg);
1172
1173                 if (env->got_inline) {
1174                         optimize_graph_df(irg);
1175                         optimize_cf(irg);
1176                 }
1177                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1178                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1179                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1180                         env->n_callers_orig, env->n_callers,
1181                         get_entity_name(get_irg_entity(irg))));
1182                 }
1183         }
1184
1185         /* kill the copied graphs: we don't need them anymore */
1186         foreach_pmap(copied_graphs, pm_entry) {
1187                 ir_graph *copy = (ir_graph*)pm_entry->value;
1188
1189                 /* reset the entity, otherwise it will be deleted in the next step ... */
1190                 set_irg_entity(copy, NULL);
1191                 free_ir_graph(copy);
1192         }
1193         pmap_destroy(copied_graphs);
1194
1195         obstack_free(&temp_obst, NULL);
1196         current_ir_graph = rem;
1197 }
1198
1199 typedef struct inline_leave_functions_pass_t {
1200         ir_prog_pass_t pass;
1201         unsigned       maxsize;
1202         unsigned       leavesize;
1203         unsigned       size;
1204         int            ignore_runtime;
1205 } inline_leave_functions_pass_t;
1206
1207 /**
1208  * Wrapper to run inline_leave_functions() as a ir_prog pass.
1209  */
1210 static int inline_leave_functions_wrapper(ir_prog *irp, void *context)
1211 {
1212         inline_leave_functions_pass_t *pass = (inline_leave_functions_pass_t*)context;
1213
1214         (void)irp;
1215         inline_leave_functions(
1216                 pass->maxsize, pass->leavesize,
1217                 pass->size, pass->ignore_runtime);
1218         return 0;
1219 }
1220
1221 /* create a pass for inline_leave_functions() */
1222 ir_prog_pass_t *inline_leave_functions_pass(
1223         const char *name, unsigned maxsize, unsigned leavesize,
1224         unsigned size, int ignore_runtime)
1225 {
1226         inline_leave_functions_pass_t *pass = XMALLOCZ(inline_leave_functions_pass_t);
1227
1228         pass->maxsize        = maxsize;
1229         pass->leavesize      = leavesize;
1230         pass->size           = size;
1231         pass->ignore_runtime = ignore_runtime;
1232
1233         return def_prog_pass_constructor(
1234                 &pass->pass,
1235                 name ? name : "inline_leave_functions",
1236                 inline_leave_functions_wrapper);
1237 }
1238
1239 /**
1240  * Calculate the parameter weights for transmitting the address of a local variable.
1241  */
1242 static unsigned calc_method_local_weight(ir_node *arg)
1243 {
1244         int      i, j, k;
1245         unsigned v, weight = 0;
1246
1247         for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
1248                 ir_node *succ = get_irn_out(arg, i);
1249
1250                 switch (get_irn_opcode(succ)) {
1251                 case iro_Load:
1252                 case iro_Store:
1253                         /* Loads and Store can be removed */
1254                         weight += 3;
1255                         break;
1256                 case iro_Sel:
1257                         /* check if all args are constant */
1258                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1259                                 ir_node *idx = get_Sel_index(succ, j);
1260                                 if (! is_Const(idx))
1261                                         return 0;
1262                         }
1263                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1264                            some unsupported node. */
1265                         v = calc_method_local_weight(succ);
1266                         if (v == 0)
1267                                 return 0;
1268                         /* we can kill one Sel with constant indexes, this is cheap */
1269                         weight += v + 1;
1270                         break;
1271                 case iro_Id:
1272                         /* when looking backward we might find Id nodes */
1273                         weight += calc_method_local_weight(succ);
1274                         break;
1275                 case iro_Tuple:
1276                         /* unoptimized tuple */
1277                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1278                                 ir_node *pred = get_Tuple_pred(succ, j);
1279                                 if (pred == arg) {
1280                                         /* look for Proj(j) */
1281                                         for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
1282                                                 ir_node *succ_succ = get_irn_out(succ, k);
1283                                                 if (is_Proj(succ_succ)) {
1284                                                         if (get_Proj_proj(succ_succ) == j) {
1285                                                                 /* found */
1286                                                                 weight += calc_method_local_weight(succ_succ);
1287                                                         }
1288                                                 } else {
1289                                                         /* this should NOT happen */
1290                                                         return 0;
1291                                                 }
1292                                         }
1293                                 }
1294                         }
1295                         break;
1296                 default:
1297                         /* any other node: unsupported yet or bad. */
1298                         return 0;
1299                 }
1300         }
1301         return weight;
1302 }
1303
1304 /**
1305  * Calculate the parameter weights for transmitting the address of a local variable.
1306  */
1307 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg)
1308 {
1309         ir_entity *ent = get_irg_entity(irg);
1310         ir_type  *mtp;
1311         size_t   nparams;
1312         int      i;
1313         long     proj_nr;
1314         ir_node  *irg_args, *arg;
1315
1316         mtp      = get_entity_type(ent);
1317         nparams  = get_method_n_params(mtp);
1318
1319         /* allocate a new array. currently used as 'analysed' flag */
1320         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1321
1322         /* If the method haven't parameters we have nothing to do. */
1323         if (nparams <= 0)
1324                 return;
1325
1326         assure_irg_outs(irg);
1327         irg_args = get_irg_args(irg);
1328         for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
1329                 arg     = get_irn_out(irg_args, i);
1330                 proj_nr = get_Proj_proj(arg);
1331                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1332         }
1333 }
1334
1335 /**
1336  * Calculate the benefice for transmitting an local variable address.
1337  * After inlining, the local variable might be transformed into a
1338  * SSA variable by scalar_replacement().
1339  */
1340 static unsigned get_method_local_adress_weight(ir_graph *callee, size_t pos)
1341 {
1342         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
1343
1344         if (env->local_weights == NULL)
1345                 analyze_irg_local_weights(env, callee);
1346
1347         if (pos < ARR_LEN(env->local_weights))
1348                 return env->local_weights[pos];
1349         return 0;
1350 }
1351
1352 /**
1353  * Calculate a benefice value for inlining the given call.
1354  *
1355  * @param call       the call node we have to inspect
1356  * @param callee     the called graph
1357  */
1358 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
1359 {
1360         ir_node   *call = entry->call;
1361         ir_entity *ent  = get_irg_entity(callee);
1362         ir_node   *frame_ptr;
1363         ir_type   *mtp;
1364         int       weight = 0;
1365         int       i, n_params, all_const;
1366         unsigned  cc, v;
1367         irg_inline_property prop;
1368
1369         inline_irg_env *callee_env;
1370
1371         prop = get_irg_inline_property(callee);
1372         if (prop == irg_inline_forbidden) {
1373                 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
1374                     call, callee));
1375                 return entry->benefice = INT_MIN;
1376         }
1377
1378         if (get_irg_additional_properties(callee) & mtp_property_noreturn) {
1379                 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
1380                     call, callee));
1381                 return entry->benefice = INT_MIN;
1382         }
1383
1384         /* costs for every passed parameter */
1385         n_params = get_Call_n_params(call);
1386         mtp      = get_entity_type(ent);
1387         cc       = get_method_calling_convention(mtp);
1388         if (cc & cc_reg_param) {
1389                 /* register parameter, smaller costs for register parameters */
1390                 int max_regs = cc & ~cc_bits;
1391
1392                 if (max_regs < n_params)
1393                         weight += max_regs * 2 + (n_params - max_regs) * 5;
1394                 else
1395                         weight += n_params * 2;
1396         } else {
1397                 /* parameters are passed an stack */
1398                 weight += 5 * n_params;
1399         }
1400
1401         /* constant parameters improve the benefice */
1402         frame_ptr = get_irg_frame(current_ir_graph);
1403         all_const = 1;
1404         for (i = 0; i < n_params; ++i) {
1405                 ir_node *param = get_Call_param(call, i);
1406
1407                 if (is_Const(param)) {
1408                         weight += get_method_param_weight(ent, i);
1409                 } else {
1410                         all_const = 0;
1411                         if (is_SymConst(param))
1412                                 weight += get_method_param_weight(ent, i);
1413                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
1414                                 /*
1415                                  * An address of a local variable is transmitted. After
1416                                  * inlining, scalar_replacement might be able to remove the
1417                                  * local variable, so honor this.
1418                                  */
1419                                 v = get_method_local_adress_weight(callee, i);
1420                                 weight += v;
1421                                 if (v > 0)
1422                                         entry->local_adr = 1;
1423                         }
1424                 }
1425         }
1426         entry->all_const = all_const;
1427
1428         callee_env = (inline_irg_env*)get_irg_link(callee);
1429         if (callee_env->n_callers == 1 &&
1430             callee != current_ir_graph &&
1431             !entity_is_externally_visible(ent)) {
1432                 weight += 700;
1433         }
1434
1435         /* give a bonus for functions with one block */
1436         if (callee_env->n_blocks == 1)
1437                 weight = weight * 3 / 2;
1438
1439         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
1440         if (callee_env->n_nodes < 30 && !callee_env->recursive)
1441                 weight += 2000;
1442
1443         /* and finally for leaves: they do not increase the register pressure
1444            because of callee safe registers */
1445         if (callee_env->n_call_nodes == 0)
1446                 weight += 400;
1447
1448         /** it's important to inline inner loops first */
1449         if (entry->loop_depth > 30)
1450                 weight += 30 * 1024;
1451         else
1452                 weight += entry->loop_depth * 1024;
1453
1454         /*
1455          * All arguments constant is probably a good sign, give an extra bonus
1456          */
1457         if (all_const)
1458                 weight += 1024;
1459
1460         return entry->benefice = weight;
1461 }
1462
1463 typedef struct walk_env_t {
1464         ir_graph **irgs;
1465         size_t   last_irg;
1466 } walk_env_t;
1467
1468 /**
1469  * Callgraph walker, collect all visited graphs.
1470  */
1471 static void callgraph_walker(ir_graph *irg, void *data)
1472 {
1473         walk_env_t *env = (walk_env_t *)data;
1474         env->irgs[env->last_irg++] = irg;
1475 }
1476
1477 /**
1478  * Creates an inline order for all graphs.
1479  *
1480  * @return the list of graphs.
1481  */
1482 static ir_graph **create_irg_list(void)
1483 {
1484         ir_entity  **free_methods;
1485         size_t     n_irgs = get_irp_n_irgs();
1486         walk_env_t env;
1487
1488         cgana(&free_methods);
1489         xfree(free_methods);
1490
1491         compute_callgraph();
1492
1493         env.irgs     = XMALLOCNZ(ir_graph*, n_irgs);
1494         env.last_irg = 0;
1495
1496         callgraph_walk(NULL, callgraph_walker, &env);
1497         assert(n_irgs == env.last_irg);
1498
1499         return env.irgs;
1500 }
1501
1502 /**
1503  * Push a call onto the priority list if its benefice is big enough.
1504  *
1505  * @param pqueue   the priority queue of calls
1506  * @param call     the call entry
1507  * @param inlien_threshold
1508  *                 the threshold value
1509  */
1510 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
1511                             int inline_threshold)
1512 {
1513         ir_graph            *callee  = call->callee;
1514         irg_inline_property prop     = get_irg_inline_property(callee);
1515         int                 benefice = calc_inline_benefice(call, callee);
1516
1517         DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
1518             get_irn_irg(call->call), call->call, callee, benefice));
1519
1520         if (prop < irg_inline_forced && benefice < inline_threshold) {
1521                 return;
1522         }
1523
1524         pqueue_put(pqueue, call, benefice);
1525 }
1526
1527 /**
1528  * Try to inline calls into a graph.
1529  *
1530  * @param irg      the graph into which we inline
1531  * @param maxsize  do NOT inline if the size of irg gets
1532  *                 bigger than this amount
1533  * @param inline_threshold
1534  *                 threshold value for inline decision
1535  * @param copied_graphs
1536  *                 map containing copied of recursive graphs
1537  */
1538 static void inline_into(ir_graph *irg, unsigned maxsize,
1539                         int inline_threshold, pmap *copied_graphs)
1540 {
1541         int            phiproj_computed = 0;
1542         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
1543         call_entry     *curr_call;
1544         wenv_t         wenv;
1545         pqueue_t       *pqueue;
1546
1547         if (env->n_call_nodes == 0)
1548                 return;
1549
1550         if (env->n_nodes > maxsize) {
1551                 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
1552                 return;
1553         }
1554
1555         current_ir_graph = irg;
1556         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1557
1558         /* put irgs into the pqueue */
1559         pqueue = new_pqueue();
1560
1561         list_for_each_entry(call_entry, curr_call, &env->calls, list) {
1562                 assert(is_Call(curr_call->call));
1563                 maybe_push_call(pqueue, curr_call, inline_threshold);
1564         }
1565
1566         /* note that the list of possible calls is updated during the process */
1567         while (!pqueue_empty(pqueue)) {
1568                 int                 did_inline;
1569                 call_entry          *curr_call  = (call_entry*)pqueue_pop_front(pqueue);
1570                 ir_graph            *callee     = curr_call->callee;
1571                 ir_node             *call_node  = curr_call->call;
1572                 inline_irg_env      *callee_env = (inline_irg_env*)get_irg_link(callee);
1573                 irg_inline_property prop        = get_irg_inline_property(callee);
1574                 int                 loop_depth;
1575                 const call_entry    *centry;
1576                 pmap_entry          *e;
1577
1578                 if ((prop < irg_inline_forced) && env->n_nodes + callee_env->n_nodes > maxsize) {
1579                         DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
1580                                                 env->n_nodes, callee, callee_env->n_nodes));
1581                         continue;
1582                 }
1583
1584                 e = pmap_find(copied_graphs, callee);
1585                 if (e != NULL) {
1586                         int benefice = curr_call->benefice;
1587                         /*
1588                          * Reduce the weight for recursive function IFF not all arguments are const.
1589                          * inlining recursive functions is rarely good.
1590                          */
1591                         if (!curr_call->all_const)
1592                                 benefice -= 2000;
1593                         if (benefice < inline_threshold)
1594                                 continue;
1595
1596                         /*
1597                          * Remap callee if we have a copy.
1598                          */
1599                         callee     = (ir_graph*)e->value;
1600                         callee_env = (inline_irg_env*)get_irg_link(callee);
1601                 }
1602
1603                 if (current_ir_graph == callee) {
1604                         /*
1605                          * Recursive call: we cannot directly inline because we cannot
1606                          * walk the graph and change it. So we have to make a copy of
1607                          * the graph first.
1608                          */
1609                         int benefice = curr_call->benefice;
1610                         ir_graph *copy;
1611
1612                         /*
1613                          * Reduce the weight for recursive function IFF not all arguments are const.
1614                          * inlining recursive functions is rarely good.
1615                          */
1616                         if (!curr_call->all_const)
1617                                 benefice -= 2000;
1618                         if (benefice < inline_threshold)
1619                                 continue;
1620
1621                         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1622
1623                         /*
1624                          * No copy yet, create one.
1625                          * Note that recursive methods are never leaves, so it is
1626                          * sufficient to test this condition here.
1627                          */
1628                         copy = create_irg_copy(callee);
1629
1630                         /* create_irg_copy() destroys the Proj links, recompute them */
1631                         phiproj_computed = 0;
1632
1633                         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1634
1635                         /* allocate a new environment */
1636                         callee_env = alloc_inline_irg_env();
1637                         set_irg_link(copy, callee_env);
1638
1639                         assure_cf_loop(copy);
1640                         memset(&wenv, 0, sizeof(wenv));
1641                         wenv.x              = callee_env;
1642                         wenv.ignore_callers = 1;
1643                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1644
1645                         /*
1646                          * Enter the entity of the original graph. This is needed
1647                          * for inline_method(). However, note that ent->irg still points
1648                          * to callee, NOT to copy.
1649                          */
1650                         set_irg_entity(copy, get_irg_entity(callee));
1651
1652                         pmap_insert(copied_graphs, callee, copy);
1653                         callee = copy;
1654
1655                         /* we have only one caller: the original graph */
1656                         callee_env->n_callers      = 1;
1657                         callee_env->n_callers_orig = 1;
1658                 }
1659                 if (! phiproj_computed) {
1660                         phiproj_computed = 1;
1661                         collect_phiprojs(current_ir_graph);
1662                 }
1663                 did_inline = inline_method(call_node, callee);
1664                 if (!did_inline)
1665                         continue;
1666
1667                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1668                 phiproj_computed = 0;
1669
1670                 /* remove it from the caller list */
1671                 list_del(&curr_call->list);
1672
1673                 /* callee was inline. Append its call list. */
1674                 env->got_inline = 1;
1675                 --env->n_call_nodes;
1676
1677                 /* we just generate a bunch of new calls */
1678                 loop_depth = curr_call->loop_depth;
1679                 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1680                         inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1681                         ir_node        *new_call;
1682                         call_entry     *new_entry;
1683
1684                         /* after we have inlined callee, all called methods inside
1685                          * callee are now called once more */
1686                         ++penv->n_callers;
1687
1688                         /* Note that the src list points to Call nodes in the inlined graph,
1689                          * but we need Call nodes in our graph. Luckily the inliner leaves
1690                          * this information in the link field. */
1691                         new_call = (ir_node*)get_irn_link(centry->call);
1692                         assert(is_Call(new_call));
1693
1694                         new_entry = duplicate_call_entry(centry, new_call, loop_depth);
1695                         list_add_tail(&new_entry->list, &env->calls);
1696                         maybe_push_call(pqueue, new_entry, inline_threshold);
1697                 }
1698
1699                 env->n_call_nodes += callee_env->n_call_nodes;
1700                 env->n_nodes += callee_env->n_nodes;
1701                 --callee_env->n_callers;
1702         }
1703         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1704         del_pqueue(pqueue);
1705 }
1706
1707 /*
1708  * Heuristic inliner. Calculates a benefice value for every call and inlines
1709  * those calls with a value higher than the threshold.
1710  */
1711 void inline_functions(unsigned maxsize, int inline_threshold,
1712                       opt_ptr after_inline_opt)
1713 {
1714         inline_irg_env   *env;
1715         size_t           i, n_irgs;
1716         ir_graph         *rem;
1717         wenv_t           wenv;
1718         pmap             *copied_graphs;
1719         pmap_entry       *pm_entry;
1720         ir_graph         **irgs;
1721
1722         rem = current_ir_graph;
1723         obstack_init(&temp_obst);
1724
1725         irgs = create_irg_list();
1726
1727         /* a map for the copied graphs, used to inline recursive calls */
1728         copied_graphs = pmap_create();
1729
1730         /* extend all irgs by a temporary data structure for inlining. */
1731         n_irgs = get_irp_n_irgs();
1732         for (i = 0; i < n_irgs; ++i)
1733                 set_irg_link(irgs[i], alloc_inline_irg_env());
1734
1735         /* Pre-compute information in temporary data structure. */
1736         wenv.ignore_runtime = 0;
1737         wenv.ignore_callers = 0;
1738         for (i = 0; i < n_irgs; ++i) {
1739                 ir_graph *irg = irgs[i];
1740
1741                 free_callee_info(irg);
1742
1743                 wenv.x = (inline_irg_env*)get_irg_link(irg);
1744                 assure_cf_loop(irg);
1745                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1746         }
1747
1748         /* -- and now inline. -- */
1749         for (i = 0; i < n_irgs; ++i) {
1750                 ir_graph *irg = irgs[i];
1751
1752                 inline_into(irg, maxsize, inline_threshold, copied_graphs);
1753         }
1754
1755         for (i = 0; i < n_irgs; ++i) {
1756                 ir_graph *irg = irgs[i];
1757
1758                 env = (inline_irg_env*)get_irg_link(irg);
1759                 if (env->got_inline && after_inline_opt != NULL) {
1760                         /* this irg got calls inlined: optimize it */
1761                         after_inline_opt(irg);
1762                 }
1763                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1764                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1765                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1766                         env->n_callers_orig, env->n_callers,
1767                         get_entity_name(get_irg_entity(irg))));
1768                 }
1769         }
1770
1771         /* kill the copied graphs: we don't need them anymore */
1772         foreach_pmap(copied_graphs, pm_entry) {
1773                 ir_graph *copy = (ir_graph*)pm_entry->value;
1774
1775                 /* reset the entity, otherwise it will be deleted in the next step ... */
1776                 set_irg_entity(copy, NULL);
1777                 free_ir_graph(copy);
1778         }
1779         pmap_destroy(copied_graphs);
1780
1781         xfree(irgs);
1782
1783         obstack_free(&temp_obst, NULL);
1784         current_ir_graph = rem;
1785 }
1786
1787 typedef struct inline_functions_pass_t {
1788         ir_prog_pass_t pass;
1789         unsigned       maxsize;
1790         int            inline_threshold;
1791         opt_ptr        after_inline_opt;
1792 } inline_functions_pass_t;
1793
1794 /**
1795  * Wrapper to run inline_functions() as a ir_prog pass.
1796  */
1797 static int inline_functions_wrapper(ir_prog *irp, void *context)
1798 {
1799         inline_functions_pass_t *pass = (inline_functions_pass_t*)context;
1800
1801         (void)irp;
1802         inline_functions(pass->maxsize, pass->inline_threshold,
1803                          pass->after_inline_opt);
1804         return 0;
1805 }
1806
1807 /* create a ir_prog pass for inline_functions */
1808 ir_prog_pass_t *inline_functions_pass(
1809           const char *name, unsigned maxsize, int inline_threshold,
1810           opt_ptr after_inline_opt)
1811 {
1812         inline_functions_pass_t *pass = XMALLOCZ(inline_functions_pass_t);
1813
1814         pass->maxsize          = maxsize;
1815         pass->inline_threshold = inline_threshold;
1816         pass->after_inline_opt = after_inline_opt;
1817
1818         return def_prog_pass_constructor(
1819                 &pass->pass, name ? name : "inline_functions",
1820                 inline_functions_wrapper);
1821 }
1822
1823 void firm_init_inline(void)
1824 {
1825         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
1826 }