Inliner must clear parameter flag of entities
[libfirm] / ir / opt / opt_inline.c
1 /*
2  * Copyright (C) 1995-2011 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief    Dead node elimination and Procedure Inlining.
23  * @author   Michael Beck, Goetz Lindenmaier
24  * @version  $Id$
25  */
26 #include "config.h"
27
28 #include <limits.h>
29 #include <stdbool.h>
30 #include <assert.h>
31
32 #include "irnode_t.h"
33 #include "irgraph_t.h"
34 #include "irprog_t.h"
35
36 #include "iroptimize.h"
37 #include "ircons_t.h"
38 #include "iropt_t.h"
39 #include "irgopt.h"
40 #include "irgmod.h"
41 #include "irgwalk.h"
42
43 #include "array_t.h"
44 #include "list.h"
45 #include "pset.h"
46 #include "pmap.h"
47 #include "pdeq.h"
48 #include "xmalloc.h"
49 #include "pqueue.h"
50
51 #include "irouts.h"
52 #include "irloop_t.h"
53 #include "irbackedge_t.h"
54 #include "opt_init.h"
55 #include "cgana.h"
56 #include "trouts.h"
57 #include "error.h"
58
59 #include "analyze_irg_args.h"
60 #include "iredges_t.h"
61 #include "irflag_t.h"
62 #include "irhooks.h"
63 #include "irtools.h"
64 #include "iropt_dbg.h"
65 #include "irpass_t.h"
66 #include "irphase_t.h"
67
68 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
69
70 /*------------------------------------------------------------------*/
71 /* Routines for dead node elimination / copying garbage collection  */
72 /* of the obstack.                                                  */
73 /*------------------------------------------------------------------*/
74
75 /**
76  * Remember the new node in the old node by using a field all nodes have.
77  */
78 static void set_new_node(ir_node *node, ir_node *new_node)
79 {
80         set_irn_link(node, new_node);
81 }
82
83 /**
84  * Get this new node, before the old node is forgotten.
85  */
86 static inline ir_node *get_new_node(ir_node *old_node)
87 {
88         assert(irn_visited(old_node));
89         return (ir_node*) get_irn_link(old_node);
90 }
91
92 /*--------------------------------------------------------------------*/
93 /*  Functionality for inlining                                         */
94 /*--------------------------------------------------------------------*/
95
96 /**
97  * Copy node for inlineing.  Updates attributes that change when
98  * inlineing but not for dead node elimination.
99  *
100  * Copies the node by calling copy_node() and then updates the entity if
101  * it's a local one.  env must be a pointer of the frame type of the
102  * inlined procedure. The new entities must be in the link field of
103  * the entities.
104  */
105 static void copy_node_inline(ir_node *node, void *env)
106 {
107         ir_graph *new_irg  = (ir_graph*) env;
108         ir_node  *new_node = irn_copy_into_irg(node, new_irg);
109
110         set_new_node(node, new_node);
111         if (is_Sel(node)) {
112                 ir_graph  *old_irg        = get_irn_irg(node);
113                 ir_type   *old_frame_type = get_irg_frame_type(old_irg);
114                 ir_entity *old_entity     = get_Sel_entity(node);
115                 assert(is_Sel(new_node));
116                 /* use copied entities from the new frame */
117                 if (get_entity_owner(old_entity) == old_frame_type) {
118                         ir_entity *new_entity = (ir_entity*)get_entity_link(old_entity);
119                         assert(new_entity != NULL);
120                         set_Sel_entity(new_node, new_entity);
121                 }
122         } else if (is_Block(new_node)) {
123                 new_node->attr.block.irg.irg = new_irg;
124         }
125 }
126
127 static void set_preds_inline(ir_node *node, void *env)
128 {
129         ir_node *new_node;
130
131         irn_rewire_inputs(node);
132
133         /* move constants into start block */
134         new_node = get_new_node(node);
135         if (is_irn_constlike(new_node)) {
136                 ir_graph *new_irg     = (ir_graph *) env;
137                 ir_node  *start_block = get_irg_start_block(new_irg);
138                 set_nodes_block(new_node, start_block);
139         }
140 }
141
142 /**
143  * Walker: checks if P_value_arg_base is used.
144  */
145 static void find_addr(ir_node *node, void *env)
146 {
147         bool *allow_inline = (bool*)env;
148
149         if (is_Sel(node)) {
150                 ir_graph *irg = current_ir_graph;
151                 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
152                         /* access to frame */
153                         ir_entity *ent = get_Sel_entity(node);
154                         if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
155                                 /* access to value_type */
156                                 *allow_inline = false;
157                         }
158                         if (is_parameter_entity(ent)) {
159                                 *allow_inline = false;
160                         }
161                 }
162         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
163                 /* From GCC:
164                  * Refuse to inline alloca call unless user explicitly forced so as this
165                  * may change program's memory overhead drastically when the function
166                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
167                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
168                  *
169                  * Sorrily this is true with our implementation also.
170                  * Moreover, we cannot differentiate between alloca() and VLA yet, so
171                  * this disables inlining of functions using VLA (which are completely
172                  * save).
173                  *
174                  * 2 Solutions:
175                  * - add a flag to the Alloc node for "real" alloca() calls
176                  * - add a new Stack-Restore node at the end of a function using
177                  *   alloca()
178                  */
179                 *allow_inline = false;
180         }
181 }
182
183 /**
184  * Check if we can inline a given call.
185  * Currently, we cannot inline two cases:
186  * - call with compound arguments
187  * - graphs that take the address of a parameter
188  *
189  * check these conditions here
190  */
191 static bool can_inline(ir_node *call, ir_graph *called_graph)
192 {
193         ir_entity          *called      = get_irg_entity(called_graph);
194         ir_type            *called_type = get_entity_type(called);
195         ir_type            *call_type   = get_Call_type(call);
196         size_t              n_params    = get_method_n_params(called_type);
197         size_t              n_arguments = get_method_n_params(call_type);
198         size_t              n_res       = get_method_n_ress(called_type);
199         irg_inline_property prop        = get_irg_inline_property(called_graph);
200         size_t              i;
201         bool                res;
202
203         if (prop == irg_inline_forbidden)
204                 return false;
205
206         if (n_arguments != n_params) {
207                 /* this is a bad feature of C: without a prototype, we can
208                  * call a function with less parameters than needed. Currently
209                  * we don't support this, although we could use Unknown than. */
210                 return false;
211         }
212         if (n_res != get_method_n_ress(call_type)) {
213                 return false;
214         }
215
216         /* Argh, compiling C has some bad consequences:
217          * It is implementation dependent what happens in that case.
218          * We support inlining, if the bitsize of the types matches AND
219          * the same arithmetic is used. */
220         for (i = 0; i < n_params; ++i) {
221                 ir_type *param_tp = get_method_param_type(called_type, i);
222                 ir_type *arg_tp   = get_method_param_type(call_type, i);
223
224                 if (param_tp != arg_tp) {
225                         ir_mode *pmode = get_type_mode(param_tp);
226                         ir_mode *amode = get_type_mode(arg_tp);
227
228                         if (pmode == NULL || amode == NULL)
229                                 return false;
230                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
231                                 return false;
232                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
233                                 return false;
234                         /* otherwise we can simply "reinterpret" the bits */
235                 }
236         }
237         for (i = 0; i < n_res; ++i) {
238                 ir_type *decl_res_tp = get_method_res_type(called_type, i);
239                 ir_type *used_res_tp = get_method_res_type(call_type, i);
240
241                 if (decl_res_tp != used_res_tp) {
242                         ir_mode *decl_mode = get_type_mode(decl_res_tp);
243                         ir_mode *used_mode = get_type_mode(used_res_tp);
244                         if (decl_mode == NULL || used_mode == NULL)
245                                 return false;
246                         if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
247                                 return false;
248                         if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
249                                 return false;
250                         /* otherwise we can "reinterpret" the bits */
251                 }
252         }
253
254         /* check parameters for compound arguments */
255         for (i = 0; i < n_params; ++i) {
256                 ir_type *p_type = get_method_param_type(call_type, i);
257
258                 if (is_compound_type(p_type))
259                         return false;
260         }
261
262         /* check results for compound arguments */
263         for (i = 0; i < n_res; ++i) {
264                 ir_type *r_type = get_method_res_type(call_type, i);
265
266                 if (is_compound_type(r_type))
267                         return false;
268         }
269
270         res = true;
271         irg_walk_graph(called_graph, find_addr, NULL, &res);
272
273         return res;
274 }
275
276 enum exc_mode {
277         exc_handler,    /**< There is a handler. */
278         exc_no_handler  /**< Exception handling not represented. */
279 };
280
281 /**
282  * copy all entities on the stack frame on 1 irg to the stackframe of another.
283  * Sets entity links of the old entities to the copies
284  */
285 static void copy_frame_entities(ir_graph *from, ir_graph *to)
286 {
287         ir_type *from_frame = get_irg_frame_type(from);
288         ir_type *to_frame   = get_irg_frame_type(to);
289         size_t   n_members  = get_class_n_members(from_frame);
290         size_t   i;
291         assert(from_frame != to_frame);
292
293         for (i = 0; i < n_members; ++i) {
294                 ir_entity *old_ent = get_class_member(from_frame, i);
295                 ir_entity *new_ent = copy_entity_own(old_ent, to_frame);
296                 set_entity_link(old_ent, new_ent);
297                 /* since the entity is inlined now, this is not a parameter entity */
298                 new_ent->is_parameter = 0;
299         }
300 }
301
302 /* Inlines a method at the given call site. */
303 int inline_method(ir_node *call, ir_graph *called_graph)
304 {
305         ir_node       *pre_call;
306         ir_node       *post_call, *post_bl;
307         ir_node       *in[pn_Start_max+1];
308         ir_node       *end, *end_bl, *block;
309         ir_node       **res_pred;
310         ir_node       **cf_pred;
311         ir_node       **args_in;
312         ir_node       *ret, *phi;
313         int           arity, n_ret, n_exc, n_res, i, j, rem_opt;
314         int           irn_arity, n_params;
315         int           n_mem_phi;
316         enum exc_mode exc_handling;
317         ir_type       *mtp;
318         ir_type       *ctp;
319         ir_entity     *ent;
320         ir_graph      *rem;
321         ir_graph      *irg = get_irn_irg(call);
322
323         /* we cannot inline some types of calls */
324         if (! can_inline(call, called_graph))
325                 return 0;
326
327         /* We cannot inline a recursive call. The graph must be copied before
328          * the call the inline_method() using create_irg_copy(). */
329         if (called_graph == irg)
330                 return 0;
331
332         ent      = get_irg_entity(called_graph);
333         mtp      = get_entity_type(ent);
334         ctp      = get_Call_type(call);
335         n_params = get_method_n_params(mtp);
336         n_res    = get_method_n_ress(mtp);
337
338         rem = current_ir_graph;
339         current_ir_graph = irg;
340
341         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
342
343         /* optimizations can cause problems when allocating new nodes */
344         rem_opt = get_opt_optimize();
345         set_optimize(0);
346
347         /* Handle graph state */
348         assert(get_irg_phase_state(irg) != phase_building);
349         assert(get_irg_pinned(irg) == op_pin_state_pinned);
350         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
351         set_irg_extblk_inconsistent(irg);
352         set_irg_doms_inconsistent(irg);
353         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
354         set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
355         edges_deactivate(irg);
356
357         /* here we know we WILL inline, so inform the statistics */
358         hook_inline(call, called_graph);
359
360         /* -- Decide how to handle exception control flow: Is there a handler
361            for the Call node, or do we branch directly to End on an exception?
362            exc_handling:
363            0 There is a handler.
364            2 Exception handling not represented in Firm. -- */
365         {
366                 ir_node *Xproj = NULL;
367                 ir_node *proj;
368                 for (proj = (ir_node*)get_irn_link(call); proj != NULL;
369                      proj = (ir_node*)get_irn_link(proj)) {
370                         long proj_nr = get_Proj_proj(proj);
371                         if (proj_nr == pn_Call_X_except) Xproj = proj;
372                 }
373                 exc_handling = Xproj != NULL ? exc_handler : exc_no_handler;
374         }
375
376         /* create the argument tuple */
377         args_in = ALLOCAN(ir_node*, n_params);
378
379         block = get_nodes_block(call);
380         for (i = n_params - 1; i >= 0; --i) {
381                 ir_node *arg      = get_Call_param(call, i);
382                 ir_type *param_tp = get_method_param_type(mtp, i);
383                 ir_mode *mode     = get_type_mode(param_tp);
384
385                 if (mode != get_irn_mode(arg)) {
386                         arg = new_r_Conv(block, arg, mode);
387                 }
388                 args_in[i] = arg;
389         }
390
391         /* the procedure and later replaces the Start node of the called graph.
392          * Post_call is the old Call node and collects the results of the called
393          * graph. Both will end up being a tuple. */
394         post_bl = get_nodes_block(call);
395         /* XxMxPxPxPxT of Start + parameter of Call */
396         in[pn_Start_M]              = get_Call_mem(call);
397         in[pn_Start_X_initial_exec] = new_r_Jmp(post_bl);
398         in[pn_Start_P_frame_base]   = get_irg_frame(irg);
399         in[pn_Start_T_args]         = new_r_Tuple(post_bl, n_params, args_in);
400         pre_call = new_r_Tuple(post_bl, pn_Start_max+1, in);
401         post_call = call;
402
403         /* --
404            The new block gets the ins of the old block, pre_call and all its
405            predecessors and all Phi nodes. -- */
406         part_block(pre_call);
407
408         /* increment visited flag for later walk */
409         inc_irg_visited(called_graph);
410
411         /* link some nodes to nodes in the current graph so instead of copying
412          * the linked nodes will get used.
413          * So the copier will use the created Tuple instead of copying the start
414          * node, similar for singleton nodes like NoMem and Bad.
415          * Note: this will prohibit predecessors to be copied - only do it for
416          *       nodes without predecessors */
417         {
418                 ir_node *start_block;
419                 ir_node *start;
420                 ir_node *nomem;
421
422                 start_block = get_irg_start_block(called_graph);
423                 set_new_node(start_block, get_nodes_block(pre_call));
424                 mark_irn_visited(start_block);
425
426                 start = get_irg_start(called_graph);
427                 set_new_node(start, pre_call);
428                 mark_irn_visited(start);
429
430                 nomem = get_irg_no_mem(called_graph);
431                 set_new_node(nomem, get_irg_no_mem(irg));
432                 mark_irn_visited(nomem);
433         }
434
435         /* entitiy link is used to link entities on old stackframe to the
436          * new stackframe */
437         irp_reserve_resources(irp, IRP_RESOURCE_ENTITY_LINK);
438
439         /* copy entities and nodes */
440         assert(!irn_visited(get_irg_end(called_graph)));
441         copy_frame_entities(called_graph, irg);
442         irg_walk_core(get_irg_end(called_graph), copy_node_inline, set_preds_inline,
443                       irg);
444
445         irp_free_resources(irp, IRP_RESOURCE_ENTITY_LINK);
446
447         /* -- Merge the end of the inlined procedure with the call site -- */
448         /* We will turn the old Call node into a Tuple with the following
449            predecessors:
450            -1:  Block of Tuple.
451            0: Phi of all Memories of Return statements.
452            1: Jmp from new Block that merges the control flow from all exception
453            predecessors of the old end block.
454            2: Tuple of all arguments.
455            3: Phi of Exception memories.
456            In case the old Call directly branches to End on an exception we don't
457            need the block merging all exceptions nor the Phi of the exception
458            memories.
459         */
460
461         /* Precompute some values */
462         end_bl = get_new_node(get_irg_end_block(called_graph));
463         end    = get_new_node(get_irg_end(called_graph));
464         arity  = get_Block_n_cfgpreds(end_bl);    /* arity = n_exc + n_ret  */
465         n_res  = get_method_n_ress(get_Call_type(call));
466
467         res_pred = XMALLOCN(ir_node*, n_res);
468         cf_pred  = XMALLOCN(ir_node*, arity);
469
470         /* archive keepalives */
471         irn_arity = get_irn_arity(end);
472         for (i = 0; i < irn_arity; i++) {
473                 ir_node *ka = get_End_keepalive(end, i);
474                 if (! is_Bad(ka))
475                         add_End_keepalive(get_irg_end(irg), ka);
476         }
477
478         /* replace Return nodes by Jump nodes */
479         n_ret = 0;
480         for (i = 0; i < arity; i++) {
481                 ir_node *ret;
482                 ret = get_Block_cfgpred(end_bl, i);
483                 if (is_Return(ret)) {
484                         ir_node *block = get_nodes_block(ret);
485                         cf_pred[n_ret] = new_r_Jmp(block);
486                         n_ret++;
487                 }
488         }
489         set_irn_in(post_bl, n_ret, cf_pred);
490
491         /* build a Tuple for all results of the method.
492          * add Phi node if there was more than one Return. */
493         turn_into_tuple(post_call, pn_Call_max+1);
494         /* First the Memory-Phi */
495         n_mem_phi = 0;
496         for (i = 0; i < arity; i++) {
497                 ret = get_Block_cfgpred(end_bl, i);
498                 if (is_Return(ret)) {
499                         cf_pred[n_mem_phi++] = get_Return_mem(ret);
500                 }
501                 /* memory output for some exceptions is directly connected to End */
502                 if (is_Call(ret)) {
503                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 3);
504                 } else if (is_fragile_op(ret)) {
505                         /* We rely that all cfops have the memory output at the same position. */
506                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 0);
507                 } else if (is_Raise(ret)) {
508                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 1);
509                 }
510         }
511         phi = new_r_Phi(post_bl, n_mem_phi, cf_pred, mode_M);
512         set_Tuple_pred(call, pn_Call_M, phi);
513         /* Conserve Phi-list for further inlinings -- but might be optimized */
514         if (get_nodes_block(phi) == post_bl) {
515                 set_irn_link(phi, get_irn_link(post_bl));
516                 set_irn_link(post_bl, phi);
517         }
518         /* Now the real results */
519         if (n_res > 0) {
520                 ir_node *result_tuple;
521                 for (j = 0; j < n_res; j++) {
522                         ir_type *res_type = get_method_res_type(ctp, j);
523                         ir_mode *res_mode = get_type_mode(res_type);
524                         n_ret = 0;
525                         for (i = 0; i < arity; i++) {
526                                 ret = get_Block_cfgpred(end_bl, i);
527                                 if (is_Return(ret)) {
528                                         ir_node *res = get_Return_res(ret, j);
529                                         if (get_irn_mode(res) != res_mode) {
530                                                 ir_node *block = get_nodes_block(res);
531                                                 res = new_r_Conv(block, res, res_mode);
532                                         }
533                                         cf_pred[n_ret] = res;
534                                         n_ret++;
535                                 }
536                         }
537                         if (n_ret > 0) {
538                                 phi = new_r_Phi(post_bl, n_ret, cf_pred, res_mode);
539                         } else {
540                                 phi = new_r_Bad(irg, res_mode);
541                         }
542                         res_pred[j] = phi;
543                         /* Conserve Phi-list for further inlinings -- but might be optimized */
544                         if (get_nodes_block(phi) == post_bl) {
545                                 set_Phi_next(phi, get_Block_phis(post_bl));
546                                 set_Block_phis(post_bl, phi);
547                         }
548                 }
549                 result_tuple = new_r_Tuple(post_bl, n_res, res_pred);
550                 set_Tuple_pred(call, pn_Call_T_result, result_tuple);
551         } else {
552                 set_Tuple_pred(call, pn_Call_T_result, new_r_Bad(irg, mode_T));
553         }
554         /* handle the regular call */
555         set_Tuple_pred(call, pn_Call_X_regular, new_r_Jmp(post_bl));
556
557         /* Finally the exception control flow.
558            We have two possible situations:
559            First if the Call branches to an exception handler:
560            We need to add a Phi node to
561            collect the memory containing the exception objects.  Further we need
562            to add another block to get a correct representation of this Phi.  To
563            this block we add a Jmp that resolves into the X output of the Call
564            when the Call is turned into a tuple.
565            Second: There is no exception edge. Just add all inlined exception
566            branches to the End node.
567          */
568         if (exc_handling == exc_handler) {
569                 n_exc = 0;
570                 for (i = 0; i < arity; i++) {
571                         ir_node *ret, *irn;
572                         ret = get_Block_cfgpred(end_bl, i);
573                         irn = skip_Proj(ret);
574                         if (is_fragile_op(irn) || is_Raise(irn)) {
575                                 cf_pred[n_exc] = ret;
576                                 ++n_exc;
577                         }
578                 }
579                 if (n_exc > 0) {
580                         if (n_exc == 1) {
581                                 /* simple fix */
582                                 set_Tuple_pred(call, pn_Call_X_except, cf_pred[0]);
583                         } else {
584                                 ir_node *block = new_r_Block(irg, n_exc, cf_pred);
585                                 set_Tuple_pred(call, pn_Call_X_except, new_r_Jmp(block));
586                         }
587                 } else {
588                         set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
589                 }
590         } else {
591                 ir_node *main_end_bl;
592                 int main_end_bl_arity;
593                 ir_node **end_preds;
594
595                 /* assert(exc_handling == 1 || no exceptions. ) */
596                 n_exc = 0;
597                 for (i = 0; i < arity; i++) {
598                         ir_node *ret = get_Block_cfgpred(end_bl, i);
599                         ir_node *irn = skip_Proj(ret);
600
601                         if (is_fragile_op(irn) || is_Raise(irn)) {
602                                 cf_pred[n_exc] = ret;
603                                 n_exc++;
604                         }
605                 }
606                 main_end_bl       = get_irg_end_block(irg);
607                 main_end_bl_arity = get_irn_arity(main_end_bl);
608                 end_preds         = XMALLOCN(ir_node*, n_exc + main_end_bl_arity);
609
610                 for (i = 0; i < main_end_bl_arity; ++i)
611                         end_preds[i] = get_irn_n(main_end_bl, i);
612                 for (i = 0; i < n_exc; ++i)
613                         end_preds[main_end_bl_arity + i] = cf_pred[i];
614                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
615                 set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
616                 free(end_preds);
617         }
618         free(res_pred);
619         free(cf_pred);
620
621         /* --  Turn CSE back on. -- */
622         set_optimize(rem_opt);
623         current_ir_graph = rem;
624
625         return 1;
626 }
627
628 /********************************************************************/
629 /* Apply inlining to small methods.                                 */
630 /********************************************************************/
631
632 static struct obstack  temp_obst;
633
634 /** Represents a possible inlinable call in a graph. */
635 typedef struct call_entry {
636         ir_node    *call;       /**< The Call node. */
637         ir_graph   *callee;     /**< The callee IR-graph. */
638         list_head  list;        /**< List head for linking the next one. */
639         int        loop_depth;  /**< The loop depth of this call. */
640         int        benefice;    /**< The calculated benefice of this call. */
641         unsigned   local_adr:1; /**< Set if this call gets an address of a local variable. */
642         unsigned   all_const:1; /**< Set if this call has only constant parameters. */
643 } call_entry;
644
645 /**
646  * environment for inlining small irgs
647  */
648 typedef struct inline_env_t {
649         struct obstack obst;  /**< An obstack where call_entries are allocated on. */
650         list_head      calls; /**< The call entry list. */
651 } inline_env_t;
652
653 /**
654  * Returns the irg called from a Call node. If the irg is not
655  * known, NULL is returned.
656  *
657  * @param call  the call node
658  */
659 static ir_graph *get_call_called_irg(ir_node *call)
660 {
661         ir_node *addr;
662
663         addr = get_Call_ptr(call);
664         if (is_Global(addr)) {
665                 ir_entity *ent = get_Global_entity(addr);
666                 /* we don't know which function gets finally bound to a weak symbol */
667                 if (get_entity_linkage(ent) & IR_LINKAGE_WEAK)
668                         return NULL;
669
670                 return get_entity_irg(ent);
671         }
672
673         return NULL;
674 }
675
676 /**
677  * Walker: Collect all calls to known graphs inside a graph.
678  */
679 static void collect_calls(ir_node *call, void *env)
680 {
681         (void) env;
682         if (is_Call(call)) {
683                 ir_graph *called_irg = get_call_called_irg(call);
684
685                 if (called_irg != NULL) {
686                         /* The Call node calls a locally defined method.  Remember to inline. */
687                         inline_env_t *ienv  = (inline_env_t*)env;
688                         call_entry   *entry = OALLOC(&ienv->obst, call_entry);
689                         entry->call       = call;
690                         entry->callee     = called_irg;
691                         entry->loop_depth = 0;
692                         entry->benefice   = 0;
693                         entry->local_adr  = 0;
694                         entry->all_const  = 0;
695
696                         list_add_tail(&entry->list, &ienv->calls);
697                 }
698         }
699 }
700
701 /**
702  * Inlines all small methods at call sites where the called address comes
703  * from a Const node that references the entity representing the called
704  * method.
705  * The size argument is a rough measure for the code size of the method:
706  * Methods where the obstack containing the firm graph is smaller than
707  * size are inlined.
708  */
709 void inline_small_irgs(ir_graph *irg, int size)
710 {
711         ir_graph *rem = current_ir_graph;
712         inline_env_t env;
713         call_entry *entry;
714
715         current_ir_graph = irg;
716         /* Handle graph state */
717         assert(get_irg_phase_state(irg) != phase_building);
718         free_callee_info(irg);
719
720         /* Find Call nodes to inline.
721            (We can not inline during a walk of the graph, as inlining the same
722            method several times changes the visited flag of the walked graph:
723            after the first inlining visited of the callee equals visited of
724            the caller.  With the next inlining both are increased.) */
725         obstack_init(&env.obst);
726         INIT_LIST_HEAD(&env.calls);
727         irg_walk_graph(irg, NULL, collect_calls, &env);
728
729         if (! list_empty(&env.calls)) {
730                 /* There are calls to inline */
731                 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
732                 collect_phiprojs(irg);
733
734                 list_for_each_entry(call_entry, entry, &env.calls, list) {
735                         ir_graph            *callee = entry->callee;
736                         irg_inline_property prop    = get_irg_inline_property(callee);
737
738                         if (prop == irg_inline_forbidden) {
739                                 continue;
740                         }
741
742                         if (prop >= irg_inline_forced ||
743                             _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
744                                 inline_method(entry->call, callee);
745                         }
746                 }
747                 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
748         }
749         obstack_free(&env.obst, NULL);
750         current_ir_graph = rem;
751 }
752
753 typedef struct inline_small_irgs_pass_t {
754         ir_graph_pass_t pass;
755         int            size;
756 } inline_small_irgs_pass_t;
757
758 /**
759  * Wrapper to run inline_small_irgs() as a pass.
760  */
761 static int inline_small_irgs_wrapper(ir_graph *irg, void *context)
762 {
763         inline_small_irgs_pass_t *pass = (inline_small_irgs_pass_t*)context;
764
765         inline_small_irgs(irg, pass->size);
766         return 0;
767 }
768
769 /* create a pass for inline_small_irgs() */
770 ir_graph_pass_t *inline_small_irgs_pass(const char *name, int size)
771 {
772         inline_small_irgs_pass_t *pass = XMALLOCZ(inline_small_irgs_pass_t);
773
774         pass->size = size;
775         return def_graph_pass_constructor(
776                 &pass->pass, name ? name : "inline_small_irgs", inline_small_irgs_wrapper);
777 }
778
779 /**
780  * Environment for inlining irgs.
781  */
782 typedef struct {
783         list_head calls;             /**< List of of all call nodes in this graph. */
784         unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
785         unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
786         unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
787         unsigned  n_nodes_orig;      /**< for statistics */
788         unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
789         unsigned  n_call_nodes_orig; /**< for statistics */
790         unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
791         unsigned  n_callers_orig;    /**< for statistics */
792         unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
793         unsigned  recursive:1;       /**< Set, if this function is self recursive. */
794 } inline_irg_env;
795
796 /**
797  * Allocate a new environment for inlining.
798  */
799 static inline_irg_env *alloc_inline_irg_env(void)
800 {
801         inline_irg_env *env    = OALLOC(&temp_obst, inline_irg_env);
802         INIT_LIST_HEAD(&env->calls);
803         env->local_weights     = NULL;
804         env->n_nodes           = -2; /* do not count count Start, End */
805         env->n_blocks          = -2; /* do not count count Start, End Block */
806         env->n_nodes_orig      = -2; /* do not count Start, End */
807         env->n_call_nodes      = 0;
808         env->n_call_nodes_orig = 0;
809         env->n_callers         = 0;
810         env->n_callers_orig    = 0;
811         env->got_inline        = 0;
812         env->recursive         = 0;
813         return env;
814 }
815
816 typedef struct walker_env {
817         inline_irg_env *x;     /**< the inline environment */
818         char ignore_runtime;   /**< the ignore runtime flag */
819         char ignore_callers;   /**< if set, do change callers data */
820 } wenv_t;
821
822 /**
823  * post-walker: collect all calls in the inline-environment
824  * of a graph and sum some statistics.
825  */
826 static void collect_calls2(ir_node *call, void *ctx)
827 {
828         wenv_t         *env = (wenv_t*)ctx;
829         inline_irg_env *x = env->x;
830         unsigned        code = get_irn_opcode(call);
831         ir_graph       *callee;
832         call_entry     *entry;
833
834         /* count meaningful nodes in irg */
835         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
836                 if (code != iro_Block) {
837                         ++x->n_nodes;
838                         ++x->n_nodes_orig;
839                 } else {
840                         ++x->n_blocks;
841                 }
842         }
843
844         if (code != iro_Call) return;
845
846         /* check, if it's a runtime call */
847         if (env->ignore_runtime) {
848                 ir_node *symc = get_Call_ptr(call);
849
850                 if (is_Global(symc)) {
851                         ir_entity *ent = get_Global_entity(symc);
852
853                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
854                                 return;
855                 }
856         }
857
858         /* collect all call nodes */
859         ++x->n_call_nodes;
860         ++x->n_call_nodes_orig;
861
862         callee = get_call_called_irg(call);
863         if (callee != NULL) {
864                 if (! env->ignore_callers) {
865                         inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
866                         /* count all static callers */
867                         ++callee_env->n_callers;
868                         ++callee_env->n_callers_orig;
869                 }
870                 if (callee == current_ir_graph)
871                         x->recursive = 1;
872
873                 /* link it in the list of possible inlinable entries */
874                 entry = OALLOC(&temp_obst, call_entry);
875                 entry->call       = call;
876                 entry->callee     = callee;
877                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
878                 entry->benefice   = 0;
879                 entry->local_adr  = 0;
880                 entry->all_const  = 0;
881
882                 list_add_tail(&entry->list, &x->calls);
883         }
884 }
885
886 /**
887  * Returns TRUE if the number of callers is 0 in the irg's environment,
888  * hence this irg is a leave.
889  */
890 inline static int is_leave(ir_graph *irg)
891 {
892         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
893         return env->n_call_nodes == 0;
894 }
895
896 /**
897  * Returns TRUE if the number of nodes in the callee is
898  * smaller then size in the irg's environment.
899  */
900 inline static int is_smaller(ir_graph *callee, unsigned size)
901 {
902         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
903         return env->n_nodes < size;
904 }
905
906 /**
907  * Duplicate a call entry.
908  *
909  * @param entry     the original entry to duplicate
910  * @param new_call  the new call node
911  * @param loop_depth_delta
912  *                  delta value for the loop depth
913  */
914 static call_entry *duplicate_call_entry(const call_entry *entry,
915                                         ir_node *new_call, int loop_depth_delta)
916 {
917         call_entry *nentry = OALLOC(&temp_obst, call_entry);
918         nentry->call       = new_call;
919         nentry->callee     = entry->callee;
920         nentry->benefice   = entry->benefice;
921         nentry->loop_depth = entry->loop_depth + loop_depth_delta;
922         nentry->local_adr  = entry->local_adr;
923         nentry->all_const  = entry->all_const;
924
925         return nentry;
926 }
927
928 /**
929  * Append all call nodes of the source environment to the nodes of in the destination
930  * environment.
931  *
932  * @param dst         destination environment
933  * @param src         source environment
934  * @param loop_depth  the loop depth of the call that is replaced by the src list
935  */
936 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth)
937 {
938         call_entry *entry, *nentry;
939
940         /* Note that the src list points to Call nodes in the inlined graph, but
941            we need Call nodes in our graph. Luckily the inliner leaves this information
942            in the link field. */
943         list_for_each_entry(call_entry, entry, &src->calls, list) {
944                 nentry = duplicate_call_entry(entry, (ir_node*)get_irn_link(entry->call), loop_depth);
945                 list_add_tail(&nentry->list, &dst->calls);
946         }
947         dst->n_call_nodes += src->n_call_nodes;
948         dst->n_nodes      += src->n_nodes;
949 }
950
951 /*
952  * Inlines small leave methods at call sites where the called address comes
953  * from a Const node that references the entity representing the called
954  * method.
955  * The size argument is a rough measure for the code size of the method:
956  * Methods where the obstack containing the firm graph is smaller than
957  * size are inlined.
958  */
959 void inline_leave_functions(unsigned maxsize, unsigned leavesize,
960                             unsigned size, int ignore_runtime)
961 {
962         inline_irg_env   *env;
963         ir_graph         *irg;
964         size_t           i, n_irgs;
965         ir_graph         *rem;
966         int              did_inline;
967         wenv_t           wenv;
968         call_entry       *entry, *next;
969         const call_entry *centry;
970         pmap             *copied_graphs;
971         pmap_entry       *pm_entry;
972
973         rem = current_ir_graph;
974         obstack_init(&temp_obst);
975
976         /* a map for the copied graphs, used to inline recursive calls */
977         copied_graphs = pmap_create();
978
979         /* extend all irgs by a temporary data structure for inlining. */
980         n_irgs = get_irp_n_irgs();
981         for (i = 0; i < n_irgs; ++i)
982                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
983
984         /* Pre-compute information in temporary data structure. */
985         wenv.ignore_runtime = ignore_runtime;
986         wenv.ignore_callers = 0;
987         for (i = 0; i < n_irgs; ++i) {
988                 ir_graph *irg = get_irp_irg(i);
989
990                 assert(get_irg_phase_state(irg) != phase_building);
991                 free_callee_info(irg);
992
993                 assure_cf_loop(irg);
994                 wenv.x = (inline_irg_env*)get_irg_link(irg);
995                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
996         }
997
998         /* -- and now inline. -- */
999
1000         /* Inline leaves recursively -- we might construct new leaves. */
1001         do {
1002                 did_inline = 0;
1003
1004                 for (i = 0; i < n_irgs; ++i) {
1005                         ir_node *call;
1006                         int phiproj_computed = 0;
1007
1008                         current_ir_graph = get_irp_irg(i);
1009                         env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1010
1011                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1012                         list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1013                                 ir_graph            *callee;
1014                                 irg_inline_property  prop;
1015
1016                                 if (env->n_nodes > maxsize)
1017                                         break;
1018
1019                                 call   = entry->call;
1020                                 callee = entry->callee;
1021
1022                                 prop = get_irg_inline_property(callee);
1023                                 if (prop == irg_inline_forbidden) {
1024                                         continue;
1025                                 }
1026
1027                                 if (is_leave(callee) && (
1028                                     is_smaller(callee, leavesize) || prop >= irg_inline_forced)) {
1029                                         if (!phiproj_computed) {
1030                                                 phiproj_computed = 1;
1031                                                 collect_phiprojs(current_ir_graph);
1032                                         }
1033                                         did_inline = inline_method(call, callee);
1034
1035                                         if (did_inline) {
1036                                                 inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
1037
1038                                                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1039                                                 phiproj_computed = 0;
1040
1041                                                 /* Do some statistics */
1042                                                 env->got_inline = 1;
1043                                                 --env->n_call_nodes;
1044                                                 env->n_nodes += callee_env->n_nodes;
1045                                                 --callee_env->n_callers;
1046
1047                                                 /* remove this call from the list */
1048                                                 list_del(&entry->list);
1049                                                 continue;
1050                                         }
1051                                 }
1052                         }
1053                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1054                 }
1055         } while (did_inline);
1056
1057         /* inline other small functions. */
1058         for (i = 0; i < n_irgs; ++i) {
1059                 ir_node *call;
1060                 int phiproj_computed = 0;
1061
1062                 current_ir_graph = get_irp_irg(i);
1063                 env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1064
1065                 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1066
1067                 /* note that the list of possible calls is updated during the process */
1068                 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1069                         irg_inline_property prop;
1070                         ir_graph            *callee;
1071                         pmap_entry          *e;
1072
1073                         call   = entry->call;
1074                         callee = entry->callee;
1075
1076                         prop = get_irg_inline_property(callee);
1077                         if (prop == irg_inline_forbidden) {
1078                                 continue;
1079                         }
1080
1081                         e = pmap_find(copied_graphs, callee);
1082                         if (e != NULL) {
1083                                 /*
1084                                  * Remap callee if we have a copy.
1085                                  * FIXME: Should we do this only for recursive Calls ?
1086                                  */
1087                                 callee = (ir_graph*)e->value;
1088                         }
1089
1090                         if (prop >= irg_inline_forced ||
1091                             (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1092                                 if (current_ir_graph == callee) {
1093                                         /*
1094                                          * Recursive call: we cannot directly inline because we cannot walk
1095                                          * the graph and change it. So we have to make a copy of the graph
1096                                          * first.
1097                                          */
1098
1099                                         inline_irg_env *callee_env;
1100                                         ir_graph       *copy;
1101
1102                                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1103
1104                                         /*
1105                                          * No copy yet, create one.
1106                                          * Note that recursive methods are never leaves, so it is sufficient
1107                                          * to test this condition here.
1108                                          */
1109                                         copy = create_irg_copy(callee);
1110
1111                                         /* create_irg_copy() destroys the Proj links, recompute them */
1112                                         phiproj_computed = 0;
1113
1114                                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1115
1116                                         /* allocate new environment */
1117                                         callee_env = alloc_inline_irg_env();
1118                                         set_irg_link(copy, callee_env);
1119
1120                                         assure_cf_loop(copy);
1121                                         wenv.x              = callee_env;
1122                                         wenv.ignore_callers = 1;
1123                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1124
1125                                         /*
1126                                          * Enter the entity of the original graph. This is needed
1127                                          * for inline_method(). However, note that ent->irg still points
1128                                          * to callee, NOT to copy.
1129                                          */
1130                                         set_irg_entity(copy, get_irg_entity(callee));
1131
1132                                         pmap_insert(copied_graphs, callee, copy);
1133                                         callee = copy;
1134
1135                                         /* we have only one caller: the original graph */
1136                                         callee_env->n_callers      = 1;
1137                                         callee_env->n_callers_orig = 1;
1138                                 }
1139                                 if (! phiproj_computed) {
1140                                         phiproj_computed = 1;
1141                                         collect_phiprojs(current_ir_graph);
1142                                 }
1143                                 did_inline = inline_method(call, callee);
1144                                 if (did_inline) {
1145                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1146
1147                                         /* call was inlined, Phi/Projs for current graph must be recomputed */
1148                                         phiproj_computed = 0;
1149
1150                                         /* callee was inline. Append its call list. */
1151                                         env->got_inline = 1;
1152                                         --env->n_call_nodes;
1153                                         append_call_list(env, callee_env, entry->loop_depth);
1154                                         --callee_env->n_callers;
1155
1156                                         /* after we have inlined callee, all called methods inside callee
1157                                            are now called once more */
1158                                         list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1159                                                 inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1160                                                 ++penv->n_callers;
1161                                         }
1162
1163                                         /* remove this call from the list */
1164                                         list_del(&entry->list);
1165                                         continue;
1166                                 }
1167                         }
1168                 }
1169                 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1170         }
1171
1172         for (i = 0; i < n_irgs; ++i) {
1173                 irg = get_irp_irg(i);
1174                 env = (inline_irg_env*)get_irg_link(irg);
1175
1176                 if (env->got_inline) {
1177                         optimize_graph_df(irg);
1178                         optimize_cf(irg);
1179                 }
1180                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1181                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1182                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1183                         env->n_callers_orig, env->n_callers,
1184                         get_entity_name(get_irg_entity(irg))));
1185                 }
1186         }
1187
1188         /* kill the copied graphs: we don't need them anymore */
1189         foreach_pmap(copied_graphs, pm_entry) {
1190                 ir_graph *copy = (ir_graph*)pm_entry->value;
1191
1192                 /* reset the entity, otherwise it will be deleted in the next step ... */
1193                 set_irg_entity(copy, NULL);
1194                 free_ir_graph(copy);
1195         }
1196         pmap_destroy(copied_graphs);
1197
1198         obstack_free(&temp_obst, NULL);
1199         current_ir_graph = rem;
1200 }
1201
1202 typedef struct inline_leave_functions_pass_t {
1203         ir_prog_pass_t pass;
1204         unsigned       maxsize;
1205         unsigned       leavesize;
1206         unsigned       size;
1207         int            ignore_runtime;
1208 } inline_leave_functions_pass_t;
1209
1210 /**
1211  * Wrapper to run inline_leave_functions() as a ir_prog pass.
1212  */
1213 static int inline_leave_functions_wrapper(ir_prog *irp, void *context)
1214 {
1215         inline_leave_functions_pass_t *pass = (inline_leave_functions_pass_t*)context;
1216
1217         (void)irp;
1218         inline_leave_functions(
1219                 pass->maxsize, pass->leavesize,
1220                 pass->size, pass->ignore_runtime);
1221         return 0;
1222 }
1223
1224 /* create a pass for inline_leave_functions() */
1225 ir_prog_pass_t *inline_leave_functions_pass(
1226         const char *name, unsigned maxsize, unsigned leavesize,
1227         unsigned size, int ignore_runtime)
1228 {
1229         inline_leave_functions_pass_t *pass = XMALLOCZ(inline_leave_functions_pass_t);
1230
1231         pass->maxsize        = maxsize;
1232         pass->leavesize      = leavesize;
1233         pass->size           = size;
1234         pass->ignore_runtime = ignore_runtime;
1235
1236         return def_prog_pass_constructor(
1237                 &pass->pass,
1238                 name ? name : "inline_leave_functions",
1239                 inline_leave_functions_wrapper);
1240 }
1241
1242 /**
1243  * Calculate the parameter weights for transmitting the address of a local variable.
1244  */
1245 static unsigned calc_method_local_weight(ir_node *arg)
1246 {
1247         int      i, j, k;
1248         unsigned v, weight = 0;
1249
1250         for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
1251                 ir_node *succ = get_irn_out(arg, i);
1252
1253                 switch (get_irn_opcode(succ)) {
1254                 case iro_Load:
1255                 case iro_Store:
1256                         /* Loads and Store can be removed */
1257                         weight += 3;
1258                         break;
1259                 case iro_Sel:
1260                         /* check if all args are constant */
1261                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1262                                 ir_node *idx = get_Sel_index(succ, j);
1263                                 if (! is_Const(idx))
1264                                         return 0;
1265                         }
1266                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1267                            some unsupported node. */
1268                         v = calc_method_local_weight(succ);
1269                         if (v == 0)
1270                                 return 0;
1271                         /* we can kill one Sel with constant indexes, this is cheap */
1272                         weight += v + 1;
1273                         break;
1274                 case iro_Id:
1275                         /* when looking backward we might find Id nodes */
1276                         weight += calc_method_local_weight(succ);
1277                         break;
1278                 case iro_Tuple:
1279                         /* unoptimized tuple */
1280                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1281                                 ir_node *pred = get_Tuple_pred(succ, j);
1282                                 if (pred == arg) {
1283                                         /* look for Proj(j) */
1284                                         for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
1285                                                 ir_node *succ_succ = get_irn_out(succ, k);
1286                                                 if (is_Proj(succ_succ)) {
1287                                                         if (get_Proj_proj(succ_succ) == j) {
1288                                                                 /* found */
1289                                                                 weight += calc_method_local_weight(succ_succ);
1290                                                         }
1291                                                 } else {
1292                                                         /* this should NOT happen */
1293                                                         return 0;
1294                                                 }
1295                                         }
1296                                 }
1297                         }
1298                         break;
1299                 default:
1300                         /* any other node: unsupported yet or bad. */
1301                         return 0;
1302                 }
1303         }
1304         return weight;
1305 }
1306
1307 /**
1308  * Calculate the parameter weights for transmitting the address of a local variable.
1309  */
1310 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg)
1311 {
1312         ir_entity *ent = get_irg_entity(irg);
1313         ir_type  *mtp;
1314         size_t   nparams;
1315         int      i;
1316         long     proj_nr;
1317         ir_node  *irg_args, *arg;
1318
1319         mtp      = get_entity_type(ent);
1320         nparams  = get_method_n_params(mtp);
1321
1322         /* allocate a new array. currently used as 'analysed' flag */
1323         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1324
1325         /* If the method haven't parameters we have nothing to do. */
1326         if (nparams <= 0)
1327                 return;
1328
1329         assure_irg_outs(irg);
1330         irg_args = get_irg_args(irg);
1331         for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
1332                 arg     = get_irn_out(irg_args, i);
1333                 proj_nr = get_Proj_proj(arg);
1334                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1335         }
1336 }
1337
1338 /**
1339  * Calculate the benefice for transmitting an local variable address.
1340  * After inlining, the local variable might be transformed into a
1341  * SSA variable by scalar_replacement().
1342  */
1343 static unsigned get_method_local_adress_weight(ir_graph *callee, size_t pos)
1344 {
1345         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
1346
1347         if (env->local_weights == NULL)
1348                 analyze_irg_local_weights(env, callee);
1349
1350         if (pos < ARR_LEN(env->local_weights))
1351                 return env->local_weights[pos];
1352         return 0;
1353 }
1354
1355 /**
1356  * Calculate a benefice value for inlining the given call.
1357  *
1358  * @param call       the call node we have to inspect
1359  * @param callee     the called graph
1360  */
1361 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
1362 {
1363         ir_node   *call = entry->call;
1364         ir_entity *ent  = get_irg_entity(callee);
1365         ir_node   *frame_ptr;
1366         ir_type   *mtp;
1367         int       weight = 0;
1368         int       i, n_params, all_const;
1369         unsigned  cc, v;
1370         irg_inline_property prop;
1371
1372         inline_irg_env *callee_env;
1373
1374         prop = get_irg_inline_property(callee);
1375         if (prop == irg_inline_forbidden) {
1376                 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
1377                     call, callee));
1378                 return entry->benefice = INT_MIN;
1379         }
1380
1381         if (get_irg_additional_properties(callee) & mtp_property_noreturn) {
1382                 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
1383                     call, callee));
1384                 return entry->benefice = INT_MIN;
1385         }
1386
1387         /* costs for every passed parameter */
1388         n_params = get_Call_n_params(call);
1389         mtp      = get_entity_type(ent);
1390         cc       = get_method_calling_convention(mtp);
1391         if (cc & cc_reg_param) {
1392                 /* register parameter, smaller costs for register parameters */
1393                 int max_regs = cc & ~cc_bits;
1394
1395                 if (max_regs < n_params)
1396                         weight += max_regs * 2 + (n_params - max_regs) * 5;
1397                 else
1398                         weight += n_params * 2;
1399         } else {
1400                 /* parameters are passed an stack */
1401                 weight += 5 * n_params;
1402         }
1403
1404         /* constant parameters improve the benefice */
1405         frame_ptr = get_irg_frame(current_ir_graph);
1406         all_const = 1;
1407         for (i = 0; i < n_params; ++i) {
1408                 ir_node *param = get_Call_param(call, i);
1409
1410                 if (is_Const(param)) {
1411                         weight += get_method_param_weight(ent, i);
1412                 } else {
1413                         all_const = 0;
1414                         if (is_SymConst(param))
1415                                 weight += get_method_param_weight(ent, i);
1416                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
1417                                 /*
1418                                  * An address of a local variable is transmitted. After
1419                                  * inlining, scalar_replacement might be able to remove the
1420                                  * local variable, so honor this.
1421                                  */
1422                                 v = get_method_local_adress_weight(callee, i);
1423                                 weight += v;
1424                                 if (v > 0)
1425                                         entry->local_adr = 1;
1426                         }
1427                 }
1428         }
1429         entry->all_const = all_const;
1430
1431         callee_env = (inline_irg_env*)get_irg_link(callee);
1432         if (callee_env->n_callers == 1 &&
1433             callee != current_ir_graph &&
1434             !entity_is_externally_visible(ent)) {
1435                 weight += 700;
1436         }
1437
1438         /* give a bonus for functions with one block */
1439         if (callee_env->n_blocks == 1)
1440                 weight = weight * 3 / 2;
1441
1442         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
1443         if (callee_env->n_nodes < 30 && !callee_env->recursive)
1444                 weight += 2000;
1445
1446         /* and finally for leaves: they do not increase the register pressure
1447            because of callee safe registers */
1448         if (callee_env->n_call_nodes == 0)
1449                 weight += 400;
1450
1451         /** it's important to inline inner loops first */
1452         if (entry->loop_depth > 30)
1453                 weight += 30 * 1024;
1454         else
1455                 weight += entry->loop_depth * 1024;
1456
1457         /*
1458          * All arguments constant is probably a good sign, give an extra bonus
1459          */
1460         if (all_const)
1461                 weight += 1024;
1462
1463         return entry->benefice = weight;
1464 }
1465
1466 typedef struct walk_env_t {
1467         ir_graph **irgs;
1468         size_t   last_irg;
1469 } walk_env_t;
1470
1471 /**
1472  * Callgraph walker, collect all visited graphs.
1473  */
1474 static void callgraph_walker(ir_graph *irg, void *data)
1475 {
1476         walk_env_t *env = (walk_env_t *)data;
1477         env->irgs[env->last_irg++] = irg;
1478 }
1479
1480 /**
1481  * Creates an inline order for all graphs.
1482  *
1483  * @return the list of graphs.
1484  */
1485 static ir_graph **create_irg_list(void)
1486 {
1487         ir_entity  **free_methods;
1488         size_t     n_irgs = get_irp_n_irgs();
1489         walk_env_t env;
1490
1491         cgana(&free_methods);
1492         xfree(free_methods);
1493
1494         compute_callgraph();
1495
1496         env.irgs     = XMALLOCNZ(ir_graph*, n_irgs);
1497         env.last_irg = 0;
1498
1499         callgraph_walk(NULL, callgraph_walker, &env);
1500         assert(n_irgs == env.last_irg);
1501
1502         return env.irgs;
1503 }
1504
1505 /**
1506  * Push a call onto the priority list if its benefice is big enough.
1507  *
1508  * @param pqueue   the priority queue of calls
1509  * @param call     the call entry
1510  * @param inlien_threshold
1511  *                 the threshold value
1512  */
1513 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
1514                             int inline_threshold)
1515 {
1516         ir_graph            *callee  = call->callee;
1517         irg_inline_property prop     = get_irg_inline_property(callee);
1518         int                 benefice = calc_inline_benefice(call, callee);
1519
1520         DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
1521             get_irn_irg(call->call), call->call, callee, benefice));
1522
1523         if (prop < irg_inline_forced && benefice < inline_threshold) {
1524                 return;
1525         }
1526
1527         pqueue_put(pqueue, call, benefice);
1528 }
1529
1530 /**
1531  * Try to inline calls into a graph.
1532  *
1533  * @param irg      the graph into which we inline
1534  * @param maxsize  do NOT inline if the size of irg gets
1535  *                 bigger than this amount
1536  * @param inline_threshold
1537  *                 threshold value for inline decision
1538  * @param copied_graphs
1539  *                 map containing copied of recursive graphs
1540  */
1541 static void inline_into(ir_graph *irg, unsigned maxsize,
1542                         int inline_threshold, pmap *copied_graphs)
1543 {
1544         int            phiproj_computed = 0;
1545         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
1546         call_entry     *curr_call;
1547         wenv_t         wenv;
1548         pqueue_t       *pqueue;
1549
1550         if (env->n_call_nodes == 0)
1551                 return;
1552
1553         if (env->n_nodes > maxsize) {
1554                 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
1555                 return;
1556         }
1557
1558         current_ir_graph = irg;
1559         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1560
1561         /* put irgs into the pqueue */
1562         pqueue = new_pqueue();
1563
1564         list_for_each_entry(call_entry, curr_call, &env->calls, list) {
1565                 assert(is_Call(curr_call->call));
1566                 maybe_push_call(pqueue, curr_call, inline_threshold);
1567         }
1568
1569         /* note that the list of possible calls is updated during the process */
1570         while (!pqueue_empty(pqueue)) {
1571                 int                 did_inline;
1572                 call_entry          *curr_call  = (call_entry*)pqueue_pop_front(pqueue);
1573                 ir_graph            *callee     = curr_call->callee;
1574                 ir_node             *call_node  = curr_call->call;
1575                 inline_irg_env      *callee_env = (inline_irg_env*)get_irg_link(callee);
1576                 irg_inline_property prop        = get_irg_inline_property(callee);
1577                 int                 loop_depth;
1578                 const call_entry    *centry;
1579                 pmap_entry          *e;
1580
1581                 if ((prop < irg_inline_forced) && env->n_nodes + callee_env->n_nodes > maxsize) {
1582                         DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
1583                                                 env->n_nodes, callee, callee_env->n_nodes));
1584                         continue;
1585                 }
1586
1587                 e = pmap_find(copied_graphs, callee);
1588                 if (e != NULL) {
1589                         int benefice = curr_call->benefice;
1590                         /*
1591                          * Reduce the weight for recursive function IFF not all arguments are const.
1592                          * inlining recursive functions is rarely good.
1593                          */
1594                         if (!curr_call->all_const)
1595                                 benefice -= 2000;
1596                         if (benefice < inline_threshold)
1597                                 continue;
1598
1599                         /*
1600                          * Remap callee if we have a copy.
1601                          */
1602                         callee     = (ir_graph*)e->value;
1603                         callee_env = (inline_irg_env*)get_irg_link(callee);
1604                 }
1605
1606                 if (current_ir_graph == callee) {
1607                         /*
1608                          * Recursive call: we cannot directly inline because we cannot
1609                          * walk the graph and change it. So we have to make a copy of
1610                          * the graph first.
1611                          */
1612                         int benefice = curr_call->benefice;
1613                         ir_graph *copy;
1614
1615                         /*
1616                          * Reduce the weight for recursive function IFF not all arguments are const.
1617                          * inlining recursive functions is rarely good.
1618                          */
1619                         if (!curr_call->all_const)
1620                                 benefice -= 2000;
1621                         if (benefice < inline_threshold)
1622                                 continue;
1623
1624                         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1625
1626                         /*
1627                          * No copy yet, create one.
1628                          * Note that recursive methods are never leaves, so it is
1629                          * sufficient to test this condition here.
1630                          */
1631                         copy = create_irg_copy(callee);
1632
1633                         /* create_irg_copy() destroys the Proj links, recompute them */
1634                         phiproj_computed = 0;
1635
1636                         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1637
1638                         /* allocate a new environment */
1639                         callee_env = alloc_inline_irg_env();
1640                         set_irg_link(copy, callee_env);
1641
1642                         assure_cf_loop(copy);
1643                         memset(&wenv, 0, sizeof(wenv));
1644                         wenv.x              = callee_env;
1645                         wenv.ignore_callers = 1;
1646                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1647
1648                         /*
1649                          * Enter the entity of the original graph. This is needed
1650                          * for inline_method(). However, note that ent->irg still points
1651                          * to callee, NOT to copy.
1652                          */
1653                         set_irg_entity(copy, get_irg_entity(callee));
1654
1655                         pmap_insert(copied_graphs, callee, copy);
1656                         callee = copy;
1657
1658                         /* we have only one caller: the original graph */
1659                         callee_env->n_callers      = 1;
1660                         callee_env->n_callers_orig = 1;
1661                 }
1662                 if (! phiproj_computed) {
1663                         phiproj_computed = 1;
1664                         collect_phiprojs(current_ir_graph);
1665                 }
1666                 did_inline = inline_method(call_node, callee);
1667                 if (!did_inline)
1668                         continue;
1669
1670                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1671                 phiproj_computed = 0;
1672
1673                 /* remove it from the caller list */
1674                 list_del(&curr_call->list);
1675
1676                 /* callee was inline. Append its call list. */
1677                 env->got_inline = 1;
1678                 --env->n_call_nodes;
1679
1680                 /* we just generate a bunch of new calls */
1681                 loop_depth = curr_call->loop_depth;
1682                 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1683                         inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1684                         ir_node        *new_call;
1685                         call_entry     *new_entry;
1686
1687                         /* after we have inlined callee, all called methods inside
1688                          * callee are now called once more */
1689                         ++penv->n_callers;
1690
1691                         /* Note that the src list points to Call nodes in the inlined graph,
1692                          * but we need Call nodes in our graph. Luckily the inliner leaves
1693                          * this information in the link field. */
1694                         new_call = (ir_node*)get_irn_link(centry->call);
1695                         if (get_irn_irg(new_call) != irg) {
1696                                 /* centry->call has not been copied, which means it is dead.
1697                                  * This might happen during inlining, if a const function,
1698                                  * which cannot be inlined is only used as an unused argument
1699                                  * of another function, which is inlined. */
1700                                 continue;
1701                         }
1702                         assert(is_Call(new_call));
1703
1704                         new_entry = duplicate_call_entry(centry, new_call, loop_depth);
1705                         list_add_tail(&new_entry->list, &env->calls);
1706                         maybe_push_call(pqueue, new_entry, inline_threshold);
1707                 }
1708
1709                 env->n_call_nodes += callee_env->n_call_nodes;
1710                 env->n_nodes += callee_env->n_nodes;
1711                 --callee_env->n_callers;
1712         }
1713         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1714         del_pqueue(pqueue);
1715 }
1716
1717 /*
1718  * Heuristic inliner. Calculates a benefice value for every call and inlines
1719  * those calls with a value higher than the threshold.
1720  */
1721 void inline_functions(unsigned maxsize, int inline_threshold,
1722                       opt_ptr after_inline_opt)
1723 {
1724         inline_irg_env   *env;
1725         size_t           i, n_irgs;
1726         ir_graph         *rem;
1727         wenv_t           wenv;
1728         pmap             *copied_graphs;
1729         pmap_entry       *pm_entry;
1730         ir_graph         **irgs;
1731
1732         rem = current_ir_graph;
1733         obstack_init(&temp_obst);
1734
1735         irgs = create_irg_list();
1736
1737         /* a map for the copied graphs, used to inline recursive calls */
1738         copied_graphs = pmap_create();
1739
1740         /* extend all irgs by a temporary data structure for inlining. */
1741         n_irgs = get_irp_n_irgs();
1742         for (i = 0; i < n_irgs; ++i)
1743                 set_irg_link(irgs[i], alloc_inline_irg_env());
1744
1745         /* Pre-compute information in temporary data structure. */
1746         wenv.ignore_runtime = 0;
1747         wenv.ignore_callers = 0;
1748         for (i = 0; i < n_irgs; ++i) {
1749                 ir_graph *irg = irgs[i];
1750
1751                 free_callee_info(irg);
1752
1753                 wenv.x = (inline_irg_env*)get_irg_link(irg);
1754                 assure_cf_loop(irg);
1755                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1756         }
1757
1758         /* -- and now inline. -- */
1759         for (i = 0; i < n_irgs; ++i) {
1760                 ir_graph *irg = irgs[i];
1761
1762                 inline_into(irg, maxsize, inline_threshold, copied_graphs);
1763         }
1764
1765         for (i = 0; i < n_irgs; ++i) {
1766                 ir_graph *irg = irgs[i];
1767
1768                 env = (inline_irg_env*)get_irg_link(irg);
1769                 if (env->got_inline && after_inline_opt != NULL) {
1770                         /* this irg got calls inlined: optimize it */
1771                         after_inline_opt(irg);
1772                 }
1773                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1774                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1775                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1776                         env->n_callers_orig, env->n_callers,
1777                         get_entity_name(get_irg_entity(irg))));
1778                 }
1779         }
1780
1781         /* kill the copied graphs: we don't need them anymore */
1782         foreach_pmap(copied_graphs, pm_entry) {
1783                 ir_graph *copy = (ir_graph*)pm_entry->value;
1784
1785                 /* reset the entity, otherwise it will be deleted in the next step ... */
1786                 set_irg_entity(copy, NULL);
1787                 free_ir_graph(copy);
1788         }
1789         pmap_destroy(copied_graphs);
1790
1791         xfree(irgs);
1792
1793         obstack_free(&temp_obst, NULL);
1794         current_ir_graph = rem;
1795 }
1796
1797 typedef struct inline_functions_pass_t {
1798         ir_prog_pass_t pass;
1799         unsigned       maxsize;
1800         int            inline_threshold;
1801         opt_ptr        after_inline_opt;
1802 } inline_functions_pass_t;
1803
1804 /**
1805  * Wrapper to run inline_functions() as a ir_prog pass.
1806  */
1807 static int inline_functions_wrapper(ir_prog *irp, void *context)
1808 {
1809         inline_functions_pass_t *pass = (inline_functions_pass_t*)context;
1810
1811         (void)irp;
1812         inline_functions(pass->maxsize, pass->inline_threshold,
1813                          pass->after_inline_opt);
1814         return 0;
1815 }
1816
1817 /* create a ir_prog pass for inline_functions */
1818 ir_prog_pass_t *inline_functions_pass(
1819           const char *name, unsigned maxsize, int inline_threshold,
1820           opt_ptr after_inline_opt)
1821 {
1822         inline_functions_pass_t *pass = XMALLOCZ(inline_functions_pass_t);
1823
1824         pass->maxsize          = maxsize;
1825         pass->inline_threshold = inline_threshold;
1826         pass->after_inline_opt = after_inline_opt;
1827
1828         return def_prog_pass_constructor(
1829                 &pass->pass, name ? name : "inline_functions",
1830                 inline_functions_wrapper);
1831 }
1832
1833 void firm_init_inline(void)
1834 {
1835         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
1836 }