clarify some node comments
[libfirm] / ir / opt / opt_inline.c
1 /*
2  * Copyright (C) 1995-2011 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief    Dead node elimination and Procedure Inlining.
23  * @author   Michael Beck, Goetz Lindenmaier
24  */
25 #include "config.h"
26
27 #include <limits.h>
28 #include <stdbool.h>
29 #include <assert.h>
30
31 #include "irnode_t.h"
32 #include "irgraph_t.h"
33 #include "irprog_t.h"
34
35 #include "iroptimize.h"
36 #include "ircons_t.h"
37 #include "iropt_t.h"
38 #include "irgopt.h"
39 #include "irgmod.h"
40 #include "irgwalk.h"
41
42 #include "array_t.h"
43 #include "list.h"
44 #include "pset.h"
45 #include "pmap.h"
46 #include "pdeq.h"
47 #include "xmalloc.h"
48 #include "pqueue.h"
49
50 #include "irouts.h"
51 #include "irloop_t.h"
52 #include "irbackedge_t.h"
53 #include "opt_init.h"
54 #include "cgana.h"
55 #include "trouts.h"
56 #include "error.h"
57
58 #include "analyze_irg_args.h"
59 #include "iredges_t.h"
60 #include "irflag_t.h"
61 #include "irhooks.h"
62 #include "irtools.h"
63 #include "iropt_dbg.h"
64 #include "irpass_t.h"
65 #include "irnodemap.h"
66
67 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
68
69 /*------------------------------------------------------------------*/
70 /* Routines for dead node elimination / copying garbage collection  */
71 /* of the obstack.                                                  */
72 /*------------------------------------------------------------------*/
73
74 /**
75  * Remember the new node in the old node by using a field all nodes have.
76  */
77 static void set_new_node(ir_node *node, ir_node *new_node)
78 {
79         set_irn_link(node, new_node);
80 }
81
82 /**
83  * Get this new node, before the old node is forgotten.
84  */
85 static inline ir_node *get_new_node(ir_node *old_node)
86 {
87         assert(irn_visited(old_node));
88         return (ir_node*) get_irn_link(old_node);
89 }
90
91 /*--------------------------------------------------------------------*/
92 /*  Functionality for inlining                                         */
93 /*--------------------------------------------------------------------*/
94
95 /**
96  * Copy node for inlineing.  Updates attributes that change when
97  * inlineing but not for dead node elimination.
98  *
99  * Copies the node by calling copy_node() and then updates the entity if
100  * it's a local one.  env must be a pointer of the frame type of the
101  * inlined procedure. The new entities must be in the link field of
102  * the entities.
103  */
104 static void copy_node_inline(ir_node *node, void *env)
105 {
106         ir_graph *new_irg  = (ir_graph*) env;
107         ir_node  *new_node = irn_copy_into_irg(node, new_irg);
108
109         set_new_node(node, new_node);
110         if (is_Sel(node)) {
111                 ir_graph  *old_irg        = get_irn_irg(node);
112                 ir_type   *old_frame_type = get_irg_frame_type(old_irg);
113                 ir_entity *old_entity     = get_Sel_entity(node);
114                 assert(is_Sel(new_node));
115                 /* use copied entities from the new frame */
116                 if (get_entity_owner(old_entity) == old_frame_type) {
117                         ir_entity *new_entity = (ir_entity*)get_entity_link(old_entity);
118                         assert(new_entity != NULL);
119                         set_Sel_entity(new_node, new_entity);
120                 }
121         } else if (is_Block(new_node)) {
122                 new_node->attr.block.irg.irg = new_irg;
123         }
124 }
125
126 static void set_preds_inline(ir_node *node, void *env)
127 {
128         ir_node *new_node;
129
130         irn_rewire_inputs(node);
131
132         /* move constants into start block */
133         new_node = get_new_node(node);
134         if (is_irn_constlike(new_node)) {
135                 ir_graph *new_irg     = (ir_graph *) env;
136                 ir_node  *start_block = get_irg_start_block(new_irg);
137                 set_nodes_block(new_node, start_block);
138         }
139 }
140
141 /**
142  * Walker: checks if P_value_arg_base is used.
143  */
144 static void find_addr(ir_node *node, void *env)
145 {
146         bool *allow_inline = (bool*)env;
147
148         if (is_Block(node) && get_Block_entity(node)) {
149                 /**
150                  * Currently we can't handle blocks whose address was taken correctly
151                  * when inlining
152                  */
153                 *allow_inline = false;
154         } else if (is_Sel(node)) {
155                 ir_graph *irg = current_ir_graph;
156                 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
157                         /* access to frame */
158                         ir_entity *ent = get_Sel_entity(node);
159                         if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
160                                 /* access to value_type */
161                                 *allow_inline = false;
162                         }
163                         if (is_parameter_entity(ent)) {
164                                 *allow_inline = false;
165                         }
166                 }
167         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
168                 /* From GCC:
169                  * Refuse to inline alloca call unless user explicitly forced so as this
170                  * may change program's memory overhead drastically when the function
171                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
172                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
173                  *
174                  * Sorrily this is true with our implementation also.
175                  * Moreover, we cannot differentiate between alloca() and VLA yet, so
176                  * this disables inlining of functions using VLA (which are completely
177                  * save).
178                  *
179                  * 2 Solutions:
180                  * - add a flag to the Alloc node for "real" alloca() calls
181                  * - add a new Stack-Restore node at the end of a function using
182                  *   alloca()
183                  */
184                 *allow_inline = false;
185         }
186 }
187
188 /**
189  * Check if we can inline a given call.
190  * Currently, we cannot inline two cases:
191  * - call with compound arguments
192  * - graphs that take the address of a parameter
193  *
194  * check these conditions here
195  */
196 static bool can_inline(ir_node *call, ir_graph *called_graph)
197 {
198         ir_entity          *called      = get_irg_entity(called_graph);
199         ir_type            *called_type = get_entity_type(called);
200         ir_type            *call_type   = get_Call_type(call);
201         size_t              n_params    = get_method_n_params(called_type);
202         size_t              n_arguments = get_method_n_params(call_type);
203         size_t              n_res       = get_method_n_ress(called_type);
204         irg_inline_property prop        = get_irg_inline_property(called_graph);
205         size_t              i;
206         bool                res;
207
208         if (prop == irg_inline_forbidden)
209                 return false;
210
211         if (n_arguments != n_params) {
212                 /* this is a bad feature of C: without a prototype, we can
213                  * call a function with less parameters than needed. Currently
214                  * we don't support this, although we could use Unknown than. */
215                 return false;
216         }
217         if (n_res != get_method_n_ress(call_type)) {
218                 return false;
219         }
220
221         /* Argh, compiling C has some bad consequences:
222          * It is implementation dependent what happens in that case.
223          * We support inlining, if the bitsize of the types matches AND
224          * the same arithmetic is used. */
225         for (i = 0; i < n_params; ++i) {
226                 ir_type *param_tp = get_method_param_type(called_type, i);
227                 ir_type *arg_tp   = get_method_param_type(call_type, i);
228
229                 if (param_tp != arg_tp) {
230                         ir_mode *pmode = get_type_mode(param_tp);
231                         ir_mode *amode = get_type_mode(arg_tp);
232
233                         if (pmode == NULL || amode == NULL)
234                                 return false;
235                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
236                                 return false;
237                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
238                                 return false;
239                         /* otherwise we can simply "reinterpret" the bits */
240                 }
241         }
242         for (i = 0; i < n_res; ++i) {
243                 ir_type *decl_res_tp = get_method_res_type(called_type, i);
244                 ir_type *used_res_tp = get_method_res_type(call_type, i);
245
246                 if (decl_res_tp != used_res_tp) {
247                         ir_mode *decl_mode = get_type_mode(decl_res_tp);
248                         ir_mode *used_mode = get_type_mode(used_res_tp);
249                         if (decl_mode == NULL || used_mode == NULL)
250                                 return false;
251                         if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
252                                 return false;
253                         if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
254                                 return false;
255                         /* otherwise we can "reinterpret" the bits */
256                 }
257         }
258
259         /* check parameters for compound arguments */
260         for (i = 0; i < n_params; ++i) {
261                 ir_type *p_type = get_method_param_type(call_type, i);
262
263                 if (is_compound_type(p_type))
264                         return false;
265         }
266
267         /* check results for compound arguments */
268         for (i = 0; i < n_res; ++i) {
269                 ir_type *r_type = get_method_res_type(call_type, i);
270
271                 if (is_compound_type(r_type))
272                         return false;
273         }
274
275         res = true;
276         irg_walk_graph(called_graph, find_addr, NULL, &res);
277
278         return res;
279 }
280
281 enum exc_mode {
282         exc_handler,    /**< There is a handler. */
283         exc_no_handler  /**< Exception handling not represented. */
284 };
285
286 /**
287  * copy all entities on the stack frame on 1 irg to the stackframe of another.
288  * Sets entity links of the old entities to the copies
289  */
290 static void copy_frame_entities(ir_graph *from, ir_graph *to)
291 {
292         ir_type *from_frame = get_irg_frame_type(from);
293         ir_type *to_frame   = get_irg_frame_type(to);
294         size_t   n_members  = get_class_n_members(from_frame);
295         size_t   i;
296         assert(from_frame != to_frame);
297
298         for (i = 0; i < n_members; ++i) {
299                 ir_entity *old_ent = get_class_member(from_frame, i);
300                 ir_entity *new_ent = copy_entity_own(old_ent, to_frame);
301                 set_entity_link(old_ent, new_ent);
302                 assert (!is_parameter_entity(old_ent));
303         }
304 }
305
306 /* Inlines a method at the given call site. */
307 int inline_method(ir_node *call, ir_graph *called_graph)
308 {
309         ir_node       *pre_call;
310         ir_node       *post_call, *post_bl;
311         ir_node       *in[pn_Start_max+1];
312         ir_node       *end, *end_bl, *block;
313         ir_node       **res_pred;
314         ir_node       **cf_pred;
315         ir_node       **args_in;
316         ir_node       *ret, *phi;
317         int           arity, n_ret, n_exc, n_res, i, j, rem_opt;
318         int           irn_arity, n_params;
319         int           n_mem_phi;
320         enum exc_mode exc_handling;
321         ir_type       *mtp;
322         ir_type       *ctp;
323         ir_entity     *ent;
324         ir_graph      *rem;
325         ir_graph      *irg = get_irn_irg(call);
326
327         /* we cannot inline some types of calls */
328         if (! can_inline(call, called_graph))
329                 return 0;
330
331         /* We cannot inline a recursive call. The graph must be copied before
332          * the call the inline_method() using create_irg_copy(). */
333         if (called_graph == irg)
334                 return 0;
335
336         ent      = get_irg_entity(called_graph);
337         mtp      = get_entity_type(ent);
338         ctp      = get_Call_type(call);
339         n_params = get_method_n_params(mtp);
340         n_res    = get_method_n_ress(mtp);
341
342         rem = current_ir_graph;
343         current_ir_graph = irg;
344
345         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
346
347         /* optimizations can cause problems when allocating new nodes */
348         rem_opt = get_opt_optimize();
349         set_optimize(0);
350
351         /* Handle graph state */
352         assert(get_irg_phase_state(irg) != phase_building);
353         assert(get_irg_pinned(irg) == op_pin_state_pinned);
354         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
355         clear_irg_state(irg, IR_GRAPH_STATE_CONSISTENT_DOMINANCE
356                            | IR_GRAPH_STATE_VALID_EXTENDED_BLOCKS
357                            | IR_GRAPH_STATE_CONSISTENT_ENTITY_USAGE);
358         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
359         clear_irg_state(irg, IR_GRAPH_STATE_CONSISTENT_ENTITY_USAGE);
360         edges_deactivate(irg);
361
362         /* here we know we WILL inline, so inform the statistics */
363         hook_inline(call, called_graph);
364
365         /* -- Decide how to handle exception control flow: Is there a handler
366            for the Call node, or do we branch directly to End on an exception?
367            exc_handling:
368            0 There is a handler.
369            2 Exception handling not represented in Firm. -- */
370         {
371                 ir_node *Xproj = NULL;
372                 ir_node *proj;
373                 for (proj = (ir_node*)get_irn_link(call); proj != NULL;
374                      proj = (ir_node*)get_irn_link(proj)) {
375                         long proj_nr = get_Proj_proj(proj);
376                         if (proj_nr == pn_Call_X_except) Xproj = proj;
377                 }
378                 exc_handling = Xproj != NULL ? exc_handler : exc_no_handler;
379         }
380
381         /* create the argument tuple */
382         args_in = ALLOCAN(ir_node*, n_params);
383
384         block = get_nodes_block(call);
385         for (i = n_params - 1; i >= 0; --i) {
386                 ir_node *arg      = get_Call_param(call, i);
387                 ir_type *param_tp = get_method_param_type(mtp, i);
388                 ir_mode *mode     = get_type_mode(param_tp);
389
390                 if (mode != get_irn_mode(arg)) {
391                         arg = new_r_Conv(block, arg, mode);
392                 }
393                 args_in[i] = arg;
394         }
395
396         /* the procedure and later replaces the Start node of the called graph.
397          * Post_call is the old Call node and collects the results of the called
398          * graph. Both will end up being a tuple. */
399         post_bl = get_nodes_block(call);
400         /* XxMxPxPxPxT of Start + parameter of Call */
401         in[pn_Start_M]              = get_Call_mem(call);
402         in[pn_Start_X_initial_exec] = new_r_Jmp(post_bl);
403         in[pn_Start_P_frame_base]   = get_irg_frame(irg);
404         in[pn_Start_T_args]         = new_r_Tuple(post_bl, n_params, args_in);
405         pre_call = new_r_Tuple(post_bl, pn_Start_max+1, in);
406         post_call = call;
407
408         /* --
409            The new block gets the ins of the old block, pre_call and all its
410            predecessors and all Phi nodes. -- */
411         part_block(pre_call);
412
413         /* increment visited flag for later walk */
414         inc_irg_visited(called_graph);
415
416         /* link some nodes to nodes in the current graph so instead of copying
417          * the linked nodes will get used.
418          * So the copier will use the created Tuple instead of copying the start
419          * node, similar for singleton nodes like NoMem and Bad.
420          * Note: this will prohibit predecessors to be copied - only do it for
421          *       nodes without predecessors */
422         {
423                 ir_node *start_block;
424                 ir_node *start;
425                 ir_node *nomem;
426
427                 start_block = get_irg_start_block(called_graph);
428                 set_new_node(start_block, get_nodes_block(pre_call));
429                 mark_irn_visited(start_block);
430
431                 start = get_irg_start(called_graph);
432                 set_new_node(start, pre_call);
433                 mark_irn_visited(start);
434
435                 nomem = get_irg_no_mem(called_graph);
436                 set_new_node(nomem, get_irg_no_mem(irg));
437                 mark_irn_visited(nomem);
438         }
439
440         /* entitiy link is used to link entities on old stackframe to the
441          * new stackframe */
442         irp_reserve_resources(irp, IRP_RESOURCE_ENTITY_LINK);
443
444         /* copy entities and nodes */
445         assert(!irn_visited(get_irg_end(called_graph)));
446         copy_frame_entities(called_graph, irg);
447         irg_walk_core(get_irg_end(called_graph), copy_node_inline, set_preds_inline,
448                       irg);
449
450         irp_free_resources(irp, IRP_RESOURCE_ENTITY_LINK);
451
452         /* -- Merge the end of the inlined procedure with the call site -- */
453         /* We will turn the old Call node into a Tuple with the following
454            predecessors:
455            -1:  Block of Tuple.
456            0: Phi of all Memories of Return statements.
457            1: Jmp from new Block that merges the control flow from all exception
458            predecessors of the old end block.
459            2: Tuple of all arguments.
460            3: Phi of Exception memories.
461            In case the old Call directly branches to End on an exception we don't
462            need the block merging all exceptions nor the Phi of the exception
463            memories.
464         */
465
466         /* Precompute some values */
467         end_bl = get_new_node(get_irg_end_block(called_graph));
468         end    = get_new_node(get_irg_end(called_graph));
469         arity  = get_Block_n_cfgpreds(end_bl);    /* arity = n_exc + n_ret  */
470         n_res  = get_method_n_ress(get_Call_type(call));
471
472         res_pred = XMALLOCN(ir_node*, n_res);
473         cf_pred  = XMALLOCN(ir_node*, arity);
474
475         /* archive keepalives */
476         irn_arity = get_irn_arity(end);
477         for (i = 0; i < irn_arity; i++) {
478                 ir_node *ka = get_End_keepalive(end, i);
479                 if (! is_Bad(ka))
480                         add_End_keepalive(get_irg_end(irg), ka);
481         }
482
483         /* replace Return nodes by Jump nodes */
484         n_ret = 0;
485         for (i = 0; i < arity; i++) {
486                 ir_node *ret;
487                 ret = get_Block_cfgpred(end_bl, i);
488                 if (is_Return(ret)) {
489                         ir_node *block = get_nodes_block(ret);
490                         cf_pred[n_ret] = new_r_Jmp(block);
491                         n_ret++;
492                 }
493         }
494         set_irn_in(post_bl, n_ret, cf_pred);
495
496         /* build a Tuple for all results of the method.
497          * add Phi node if there was more than one Return. */
498         turn_into_tuple(post_call, pn_Call_max+1);
499         /* First the Memory-Phi */
500         n_mem_phi = 0;
501         for (i = 0; i < arity; i++) {
502                 ret = get_Block_cfgpred(end_bl, i);
503                 if (is_Return(ret)) {
504                         cf_pred[n_mem_phi++] = get_Return_mem(ret);
505                 }
506                 /* memory output for some exceptions is directly connected to End */
507                 if (is_Call(ret)) {
508                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 3);
509                 } else if (is_fragile_op(ret)) {
510                         /* We rely that all cfops have the memory output at the same position. */
511                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 0);
512                 } else if (is_Raise(ret)) {
513                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 1);
514                 }
515         }
516         phi = new_r_Phi(post_bl, n_mem_phi, cf_pred, mode_M);
517         set_Tuple_pred(call, pn_Call_M, phi);
518         /* Conserve Phi-list for further inlinings -- but might be optimized */
519         if (get_nodes_block(phi) == post_bl) {
520                 set_irn_link(phi, get_irn_link(post_bl));
521                 set_irn_link(post_bl, phi);
522         }
523         /* Now the real results */
524         if (n_res > 0) {
525                 ir_node *result_tuple;
526                 for (j = 0; j < n_res; j++) {
527                         ir_type *res_type = get_method_res_type(ctp, j);
528                         ir_mode *res_mode = get_type_mode(res_type);
529                         n_ret = 0;
530                         for (i = 0; i < arity; i++) {
531                                 ret = get_Block_cfgpred(end_bl, i);
532                                 if (is_Return(ret)) {
533                                         ir_node *res = get_Return_res(ret, j);
534                                         if (get_irn_mode(res) != res_mode) {
535                                                 ir_node *block = get_nodes_block(res);
536                                                 res = new_r_Conv(block, res, res_mode);
537                                         }
538                                         cf_pred[n_ret] = res;
539                                         n_ret++;
540                                 }
541                         }
542                         if (n_ret > 0) {
543                                 phi = new_r_Phi(post_bl, n_ret, cf_pred, res_mode);
544                         } else {
545                                 phi = new_r_Bad(irg, res_mode);
546                         }
547                         res_pred[j] = phi;
548                         /* Conserve Phi-list for further inlinings -- but might be optimized */
549                         if (get_nodes_block(phi) == post_bl) {
550                                 set_Phi_next(phi, get_Block_phis(post_bl));
551                                 set_Block_phis(post_bl, phi);
552                         }
553                 }
554                 result_tuple = new_r_Tuple(post_bl, n_res, res_pred);
555                 set_Tuple_pred(call, pn_Call_T_result, result_tuple);
556         } else {
557                 set_Tuple_pred(call, pn_Call_T_result, new_r_Bad(irg, mode_T));
558         }
559         /* handle the regular call */
560         set_Tuple_pred(call, pn_Call_X_regular, new_r_Jmp(post_bl));
561
562         /* Finally the exception control flow.
563            We have two possible situations:
564            First if the Call branches to an exception handler:
565            We need to add a Phi node to
566            collect the memory containing the exception objects.  Further we need
567            to add another block to get a correct representation of this Phi.  To
568            this block we add a Jmp that resolves into the X output of the Call
569            when the Call is turned into a tuple.
570            Second: There is no exception edge. Just add all inlined exception
571            branches to the End node.
572          */
573         if (exc_handling == exc_handler) {
574                 n_exc = 0;
575                 for (i = 0; i < arity; i++) {
576                         ir_node *ret, *irn;
577                         ret = get_Block_cfgpred(end_bl, i);
578                         irn = skip_Proj(ret);
579                         if (is_fragile_op(irn) || is_Raise(irn)) {
580                                 cf_pred[n_exc] = ret;
581                                 ++n_exc;
582                         }
583                 }
584                 if (n_exc > 0) {
585                         if (n_exc == 1) {
586                                 /* simple fix */
587                                 set_Tuple_pred(call, pn_Call_X_except, cf_pred[0]);
588                         } else {
589                                 ir_node *block = new_r_Block(irg, n_exc, cf_pred);
590                                 set_Tuple_pred(call, pn_Call_X_except, new_r_Jmp(block));
591                         }
592                 } else {
593                         set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
594                 }
595         } else {
596                 ir_node *main_end_bl;
597                 int main_end_bl_arity;
598                 ir_node **end_preds;
599
600                 /* assert(exc_handling == 1 || no exceptions. ) */
601                 n_exc = 0;
602                 for (i = 0; i < arity; i++) {
603                         ir_node *ret = get_Block_cfgpred(end_bl, i);
604                         ir_node *irn = skip_Proj(ret);
605
606                         if (is_fragile_op(irn) || is_Raise(irn)) {
607                                 cf_pred[n_exc] = ret;
608                                 n_exc++;
609                         }
610                 }
611                 main_end_bl       = get_irg_end_block(irg);
612                 main_end_bl_arity = get_irn_arity(main_end_bl);
613                 end_preds         = XMALLOCN(ir_node*, n_exc + main_end_bl_arity);
614
615                 for (i = 0; i < main_end_bl_arity; ++i)
616                         end_preds[i] = get_irn_n(main_end_bl, i);
617                 for (i = 0; i < n_exc; ++i)
618                         end_preds[main_end_bl_arity + i] = cf_pred[i];
619                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
620                 set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
621                 free(end_preds);
622         }
623         free(res_pred);
624         free(cf_pred);
625
626         /* --  Turn CSE back on. -- */
627         set_optimize(rem_opt);
628         current_ir_graph = rem;
629
630         return 1;
631 }
632
633 /********************************************************************/
634 /* Apply inlining to small methods.                                 */
635 /********************************************************************/
636
637 static struct obstack  temp_obst;
638
639 /** Represents a possible inlinable call in a graph. */
640 typedef struct call_entry {
641         ir_node    *call;       /**< The Call node. */
642         ir_graph   *callee;     /**< The callee IR-graph. */
643         list_head  list;        /**< List head for linking the next one. */
644         int        loop_depth;  /**< The loop depth of this call. */
645         int        benefice;    /**< The calculated benefice of this call. */
646         unsigned   local_adr:1; /**< Set if this call gets an address of a local variable. */
647         unsigned   all_const:1; /**< Set if this call has only constant parameters. */
648 } call_entry;
649
650 /**
651  * environment for inlining small irgs
652  */
653 typedef struct inline_env_t {
654         struct obstack obst;  /**< An obstack where call_entries are allocated on. */
655         list_head      calls; /**< The call entry list. */
656 } inline_env_t;
657
658 /**
659  * Returns the irg called from a Call node. If the irg is not
660  * known, NULL is returned.
661  *
662  * @param call  the call node
663  */
664 static ir_graph *get_call_called_irg(ir_node *call)
665 {
666         ir_node *addr;
667
668         addr = get_Call_ptr(call);
669         if (is_SymConst_addr_ent(addr)) {
670                 ir_entity *ent = get_SymConst_entity(addr);
671                 /* we don't know which function gets finally bound to a weak symbol */
672                 if (get_entity_linkage(ent) & IR_LINKAGE_WEAK)
673                         return NULL;
674
675                 return get_entity_irg(ent);
676         }
677
678         return NULL;
679 }
680
681 /**
682  * Walker: Collect all calls to known graphs inside a graph.
683  */
684 static void collect_calls(ir_node *call, void *env)
685 {
686         (void) env;
687         if (is_Call(call)) {
688                 ir_graph *called_irg = get_call_called_irg(call);
689
690                 if (called_irg != NULL) {
691                         /* The Call node calls a locally defined method.  Remember to inline. */
692                         inline_env_t *ienv  = (inline_env_t*)env;
693                         call_entry   *entry = OALLOC(&ienv->obst, call_entry);
694                         entry->call       = call;
695                         entry->callee     = called_irg;
696                         entry->loop_depth = 0;
697                         entry->benefice   = 0;
698                         entry->local_adr  = 0;
699                         entry->all_const  = 0;
700
701                         list_add_tail(&entry->list, &ienv->calls);
702                 }
703         }
704 }
705
706 /**
707  * Inlines all small methods at call sites where the called address comes
708  * from a Const node that references the entity representing the called
709  * method.
710  * The size argument is a rough measure for the code size of the method:
711  * Methods where the obstack containing the firm graph is smaller than
712  * size are inlined.
713  */
714 void inline_small_irgs(ir_graph *irg, int size)
715 {
716         ir_graph *rem = current_ir_graph;
717         inline_env_t env;
718         call_entry *entry;
719
720         current_ir_graph = irg;
721         /* Handle graph state */
722         assert(get_irg_phase_state(irg) != phase_building);
723         free_callee_info(irg);
724
725         /* Find Call nodes to inline.
726            (We can not inline during a walk of the graph, as inlining the same
727            method several times changes the visited flag of the walked graph:
728            after the first inlining visited of the callee equals visited of
729            the caller.  With the next inlining both are increased.) */
730         obstack_init(&env.obst);
731         INIT_LIST_HEAD(&env.calls);
732         irg_walk_graph(irg, NULL, collect_calls, &env);
733
734         if (! list_empty(&env.calls)) {
735                 /* There are calls to inline */
736                 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
737                 collect_phiprojs(irg);
738
739                 list_for_each_entry(call_entry, entry, &env.calls, list) {
740                         ir_graph            *callee = entry->callee;
741                         irg_inline_property prop    = get_irg_inline_property(callee);
742
743                         if (prop == irg_inline_forbidden) {
744                                 continue;
745                         }
746
747                         if (prop >= irg_inline_forced ||
748                             _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
749                                 inline_method(entry->call, callee);
750                         }
751                 }
752                 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
753         }
754         obstack_free(&env.obst, NULL);
755         current_ir_graph = rem;
756 }
757
758 typedef struct inline_small_irgs_pass_t {
759         ir_graph_pass_t pass;
760         int            size;
761 } inline_small_irgs_pass_t;
762
763 /**
764  * Wrapper to run inline_small_irgs() as a pass.
765  */
766 static int inline_small_irgs_wrapper(ir_graph *irg, void *context)
767 {
768         inline_small_irgs_pass_t *pass = (inline_small_irgs_pass_t*)context;
769
770         inline_small_irgs(irg, pass->size);
771         return 0;
772 }
773
774 /* create a pass for inline_small_irgs() */
775 ir_graph_pass_t *inline_small_irgs_pass(const char *name, int size)
776 {
777         inline_small_irgs_pass_t *pass = XMALLOCZ(inline_small_irgs_pass_t);
778
779         pass->size = size;
780         return def_graph_pass_constructor(
781                 &pass->pass, name ? name : "inline_small_irgs", inline_small_irgs_wrapper);
782 }
783
784 /**
785  * Environment for inlining irgs.
786  */
787 typedef struct {
788         list_head calls;             /**< List of of all call nodes in this graph. */
789         unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
790         unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
791         unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
792         unsigned  n_nodes_orig;      /**< for statistics */
793         unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
794         unsigned  n_call_nodes_orig; /**< for statistics */
795         unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
796         unsigned  n_callers_orig;    /**< for statistics */
797         unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
798         unsigned  recursive:1;       /**< Set, if this function is self recursive. */
799 } inline_irg_env;
800
801 /**
802  * Allocate a new environment for inlining.
803  */
804 static inline_irg_env *alloc_inline_irg_env(void)
805 {
806         inline_irg_env *env    = OALLOC(&temp_obst, inline_irg_env);
807         INIT_LIST_HEAD(&env->calls);
808         env->local_weights     = NULL;
809         env->n_nodes           = -2; /* do not count count Start, End */
810         env->n_blocks          = -2; /* do not count count Start, End Block */
811         env->n_nodes_orig      = -2; /* do not count Start, End */
812         env->n_call_nodes      = 0;
813         env->n_call_nodes_orig = 0;
814         env->n_callers         = 0;
815         env->n_callers_orig    = 0;
816         env->got_inline        = 0;
817         env->recursive         = 0;
818         return env;
819 }
820
821 typedef struct walker_env {
822         inline_irg_env *x;     /**< the inline environment */
823         char ignore_runtime;   /**< the ignore runtime flag */
824         char ignore_callers;   /**< if set, do change callers data */
825 } wenv_t;
826
827 /**
828  * post-walker: collect all calls in the inline-environment
829  * of a graph and sum some statistics.
830  */
831 static void collect_calls2(ir_node *call, void *ctx)
832 {
833         wenv_t         *env = (wenv_t*)ctx;
834         inline_irg_env *x = env->x;
835         unsigned        code = get_irn_opcode(call);
836         ir_graph       *callee;
837         call_entry     *entry;
838
839         /* count meaningful nodes in irg */
840         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
841                 if (code != iro_Block) {
842                         ++x->n_nodes;
843                         ++x->n_nodes_orig;
844                 } else {
845                         ++x->n_blocks;
846                 }
847         }
848
849         if (code != iro_Call) return;
850
851         /* check, if it's a runtime call */
852         if (env->ignore_runtime) {
853                 ir_node *symc = get_Call_ptr(call);
854
855                 if (is_SymConst_addr_ent(symc)) {
856                         ir_entity *ent = get_SymConst_entity(symc);
857
858                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
859                                 return;
860                 }
861         }
862
863         /* collect all call nodes */
864         ++x->n_call_nodes;
865         ++x->n_call_nodes_orig;
866
867         callee = get_call_called_irg(call);
868         if (callee != NULL) {
869                 if (! env->ignore_callers) {
870                         inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
871                         /* count all static callers */
872                         ++callee_env->n_callers;
873                         ++callee_env->n_callers_orig;
874                 }
875                 if (callee == current_ir_graph)
876                         x->recursive = 1;
877
878                 /* link it in the list of possible inlinable entries */
879                 entry = OALLOC(&temp_obst, call_entry);
880                 entry->call       = call;
881                 entry->callee     = callee;
882                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
883                 entry->benefice   = 0;
884                 entry->local_adr  = 0;
885                 entry->all_const  = 0;
886
887                 list_add_tail(&entry->list, &x->calls);
888         }
889 }
890
891 /**
892  * Returns TRUE if the number of callers is 0 in the irg's environment,
893  * hence this irg is a leave.
894  */
895 inline static int is_leave(ir_graph *irg)
896 {
897         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
898         return env->n_call_nodes == 0;
899 }
900
901 /**
902  * Returns TRUE if the number of nodes in the callee is
903  * smaller then size in the irg's environment.
904  */
905 inline static int is_smaller(ir_graph *callee, unsigned size)
906 {
907         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
908         return env->n_nodes < size;
909 }
910
911 /**
912  * Duplicate a call entry.
913  *
914  * @param entry     the original entry to duplicate
915  * @param new_call  the new call node
916  * @param loop_depth_delta
917  *                  delta value for the loop depth
918  */
919 static call_entry *duplicate_call_entry(const call_entry *entry,
920                                         ir_node *new_call, int loop_depth_delta)
921 {
922         call_entry *nentry = OALLOC(&temp_obst, call_entry);
923         nentry->call       = new_call;
924         nentry->callee     = entry->callee;
925         nentry->benefice   = entry->benefice;
926         nentry->loop_depth = entry->loop_depth + loop_depth_delta;
927         nentry->local_adr  = entry->local_adr;
928         nentry->all_const  = entry->all_const;
929
930         return nentry;
931 }
932
933 /**
934  * Append all call nodes of the source environment to the nodes of in the destination
935  * environment.
936  *
937  * @param dst         destination environment
938  * @param src         source environment
939  * @param loop_depth  the loop depth of the call that is replaced by the src list
940  */
941 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth)
942 {
943         call_entry *entry, *nentry;
944
945         /* Note that the src list points to Call nodes in the inlined graph, but
946            we need Call nodes in our graph. Luckily the inliner leaves this information
947            in the link field. */
948         list_for_each_entry(call_entry, entry, &src->calls, list) {
949                 nentry = duplicate_call_entry(entry, (ir_node*)get_irn_link(entry->call), loop_depth);
950                 list_add_tail(&nentry->list, &dst->calls);
951         }
952         dst->n_call_nodes += src->n_call_nodes;
953         dst->n_nodes      += src->n_nodes;
954 }
955
956 /*
957  * Inlines small leave methods at call sites where the called address comes
958  * from a Const node that references the entity representing the called
959  * method.
960  * The size argument is a rough measure for the code size of the method:
961  * Methods where the obstack containing the firm graph is smaller than
962  * size are inlined.
963  */
964 void inline_leave_functions(unsigned maxsize, unsigned leavesize,
965                             unsigned size, int ignore_runtime)
966 {
967         inline_irg_env   *env;
968         ir_graph         *irg;
969         size_t           i, n_irgs;
970         ir_graph         *rem;
971         int              did_inline;
972         wenv_t           wenv;
973         call_entry       *entry, *next;
974         const call_entry *centry;
975         pmap             *copied_graphs;
976         pmap_entry       *pm_entry;
977
978         rem = current_ir_graph;
979         obstack_init(&temp_obst);
980
981         /* a map for the copied graphs, used to inline recursive calls */
982         copied_graphs = pmap_create();
983
984         /* extend all irgs by a temporary data structure for inlining. */
985         n_irgs = get_irp_n_irgs();
986         for (i = 0; i < n_irgs; ++i)
987                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
988
989         /* Pre-compute information in temporary data structure. */
990         wenv.ignore_runtime = ignore_runtime;
991         wenv.ignore_callers = 0;
992         for (i = 0; i < n_irgs; ++i) {
993                 ir_graph *irg = get_irp_irg(i);
994
995                 assert(get_irg_phase_state(irg) != phase_building);
996                 free_callee_info(irg);
997
998                 assure_loopinfo(irg);
999                 wenv.x = (inline_irg_env*)get_irg_link(irg);
1000                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1001         }
1002
1003         /* -- and now inline. -- */
1004
1005         /* Inline leaves recursively -- we might construct new leaves. */
1006         do {
1007                 did_inline = 0;
1008
1009                 for (i = 0; i < n_irgs; ++i) {
1010                         ir_node *call;
1011                         int phiproj_computed = 0;
1012
1013                         current_ir_graph = get_irp_irg(i);
1014                         env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1015
1016                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1017                         list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1018                                 ir_graph            *callee;
1019                                 irg_inline_property  prop;
1020
1021                                 if (env->n_nodes > maxsize)
1022                                         break;
1023
1024                                 call   = entry->call;
1025                                 callee = entry->callee;
1026
1027                                 prop = get_irg_inline_property(callee);
1028                                 if (prop == irg_inline_forbidden) {
1029                                         continue;
1030                                 }
1031
1032                                 if (is_leave(callee) && (
1033                                     is_smaller(callee, leavesize) || prop >= irg_inline_forced)) {
1034                                         if (!phiproj_computed) {
1035                                                 phiproj_computed = 1;
1036                                                 collect_phiprojs(current_ir_graph);
1037                                         }
1038                                         did_inline = inline_method(call, callee);
1039
1040                                         if (did_inline) {
1041                                                 inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
1042
1043                                                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1044                                                 phiproj_computed = 0;
1045
1046                                                 /* Do some statistics */
1047                                                 env->got_inline = 1;
1048                                                 --env->n_call_nodes;
1049                                                 env->n_nodes += callee_env->n_nodes;
1050                                                 --callee_env->n_callers;
1051
1052                                                 /* remove this call from the list */
1053                                                 list_del(&entry->list);
1054                                                 continue;
1055                                         }
1056                                 }
1057                         }
1058                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1059                 }
1060         } while (did_inline);
1061
1062         /* inline other small functions. */
1063         for (i = 0; i < n_irgs; ++i) {
1064                 ir_node *call;
1065                 int phiproj_computed = 0;
1066
1067                 current_ir_graph = get_irp_irg(i);
1068                 env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1069
1070                 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1071
1072                 /* note that the list of possible calls is updated during the process */
1073                 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1074                         irg_inline_property prop;
1075                         ir_graph            *callee;
1076                         pmap_entry          *e;
1077
1078                         call   = entry->call;
1079                         callee = entry->callee;
1080
1081                         prop = get_irg_inline_property(callee);
1082                         if (prop == irg_inline_forbidden) {
1083                                 continue;
1084                         }
1085
1086                         e = pmap_find(copied_graphs, callee);
1087                         if (e != NULL) {
1088                                 /*
1089                                  * Remap callee if we have a copy.
1090                                  * FIXME: Should we do this only for recursive Calls ?
1091                                  */
1092                                 callee = (ir_graph*)e->value;
1093                         }
1094
1095                         if (prop >= irg_inline_forced ||
1096                             (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1097                                 if (current_ir_graph == callee) {
1098                                         /*
1099                                          * Recursive call: we cannot directly inline because we cannot walk
1100                                          * the graph and change it. So we have to make a copy of the graph
1101                                          * first.
1102                                          */
1103
1104                                         inline_irg_env *callee_env;
1105                                         ir_graph       *copy;
1106
1107                                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1108
1109                                         /*
1110                                          * No copy yet, create one.
1111                                          * Note that recursive methods are never leaves, so it is sufficient
1112                                          * to test this condition here.
1113                                          */
1114                                         copy = create_irg_copy(callee);
1115
1116                                         /* create_irg_copy() destroys the Proj links, recompute them */
1117                                         phiproj_computed = 0;
1118
1119                                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1120
1121                                         /* allocate new environment */
1122                                         callee_env = alloc_inline_irg_env();
1123                                         set_irg_link(copy, callee_env);
1124
1125                                         assure_loopinfo(copy);
1126                                         wenv.x              = callee_env;
1127                                         wenv.ignore_callers = 1;
1128                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1129
1130                                         /*
1131                                          * Enter the entity of the original graph. This is needed
1132                                          * for inline_method(). However, note that ent->irg still points
1133                                          * to callee, NOT to copy.
1134                                          */
1135                                         set_irg_entity(copy, get_irg_entity(callee));
1136
1137                                         pmap_insert(copied_graphs, callee, copy);
1138                                         callee = copy;
1139
1140                                         /* we have only one caller: the original graph */
1141                                         callee_env->n_callers      = 1;
1142                                         callee_env->n_callers_orig = 1;
1143                                 }
1144                                 if (! phiproj_computed) {
1145                                         phiproj_computed = 1;
1146                                         collect_phiprojs(current_ir_graph);
1147                                 }
1148                                 did_inline = inline_method(call, callee);
1149                                 if (did_inline) {
1150                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1151
1152                                         /* call was inlined, Phi/Projs for current graph must be recomputed */
1153                                         phiproj_computed = 0;
1154
1155                                         /* callee was inline. Append its call list. */
1156                                         env->got_inline = 1;
1157                                         --env->n_call_nodes;
1158                                         append_call_list(env, callee_env, entry->loop_depth);
1159                                         --callee_env->n_callers;
1160
1161                                         /* after we have inlined callee, all called methods inside callee
1162                                            are now called once more */
1163                                         list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1164                                                 inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1165                                                 ++penv->n_callers;
1166                                         }
1167
1168                                         /* remove this call from the list */
1169                                         list_del(&entry->list);
1170                                         continue;
1171                                 }
1172                         }
1173                 }
1174                 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1175         }
1176
1177         for (i = 0; i < n_irgs; ++i) {
1178                 irg = get_irp_irg(i);
1179                 env = (inline_irg_env*)get_irg_link(irg);
1180
1181                 if (env->got_inline) {
1182                         optimize_graph_df(irg);
1183                         optimize_cf(irg);
1184                 }
1185                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1186                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1187                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1188                         env->n_callers_orig, env->n_callers,
1189                         get_entity_name(get_irg_entity(irg))));
1190                 }
1191         }
1192
1193         /* kill the copied graphs: we don't need them anymore */
1194         foreach_pmap(copied_graphs, pm_entry) {
1195                 ir_graph *copy = (ir_graph*)pm_entry->value;
1196
1197                 /* reset the entity, otherwise it will be deleted in the next step ... */
1198                 set_irg_entity(copy, NULL);
1199                 free_ir_graph(copy);
1200         }
1201         pmap_destroy(copied_graphs);
1202
1203         obstack_free(&temp_obst, NULL);
1204         current_ir_graph = rem;
1205 }
1206
1207 typedef struct inline_leave_functions_pass_t {
1208         ir_prog_pass_t pass;
1209         unsigned       maxsize;
1210         unsigned       leavesize;
1211         unsigned       size;
1212         int            ignore_runtime;
1213 } inline_leave_functions_pass_t;
1214
1215 /**
1216  * Wrapper to run inline_leave_functions() as a ir_prog pass.
1217  */
1218 static int inline_leave_functions_wrapper(ir_prog *irp, void *context)
1219 {
1220         inline_leave_functions_pass_t *pass = (inline_leave_functions_pass_t*)context;
1221
1222         (void)irp;
1223         inline_leave_functions(
1224                 pass->maxsize, pass->leavesize,
1225                 pass->size, pass->ignore_runtime);
1226         return 0;
1227 }
1228
1229 /* create a pass for inline_leave_functions() */
1230 ir_prog_pass_t *inline_leave_functions_pass(
1231         const char *name, unsigned maxsize, unsigned leavesize,
1232         unsigned size, int ignore_runtime)
1233 {
1234         inline_leave_functions_pass_t *pass = XMALLOCZ(inline_leave_functions_pass_t);
1235
1236         pass->maxsize        = maxsize;
1237         pass->leavesize      = leavesize;
1238         pass->size           = size;
1239         pass->ignore_runtime = ignore_runtime;
1240
1241         return def_prog_pass_constructor(
1242                 &pass->pass,
1243                 name ? name : "inline_leave_functions",
1244                 inline_leave_functions_wrapper);
1245 }
1246
1247 /**
1248  * Calculate the parameter weights for transmitting the address of a local variable.
1249  */
1250 static unsigned calc_method_local_weight(ir_node *arg)
1251 {
1252         int      i, j, k;
1253         unsigned v, weight = 0;
1254
1255         for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
1256                 ir_node *succ = get_irn_out(arg, i);
1257
1258                 switch (get_irn_opcode(succ)) {
1259                 case iro_Load:
1260                 case iro_Store:
1261                         /* Loads and Store can be removed */
1262                         weight += 3;
1263                         break;
1264                 case iro_Sel:
1265                         /* check if all args are constant */
1266                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1267                                 ir_node *idx = get_Sel_index(succ, j);
1268                                 if (! is_Const(idx))
1269                                         return 0;
1270                         }
1271                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1272                            some unsupported node. */
1273                         v = calc_method_local_weight(succ);
1274                         if (v == 0)
1275                                 return 0;
1276                         /* we can kill one Sel with constant indexes, this is cheap */
1277                         weight += v + 1;
1278                         break;
1279                 case iro_Id:
1280                         /* when looking backward we might find Id nodes */
1281                         weight += calc_method_local_weight(succ);
1282                         break;
1283                 case iro_Tuple:
1284                         /* unoptimized tuple */
1285                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1286                                 ir_node *pred = get_Tuple_pred(succ, j);
1287                                 if (pred == arg) {
1288                                         /* look for Proj(j) */
1289                                         for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
1290                                                 ir_node *succ_succ = get_irn_out(succ, k);
1291                                                 if (is_Proj(succ_succ)) {
1292                                                         if (get_Proj_proj(succ_succ) == j) {
1293                                                                 /* found */
1294                                                                 weight += calc_method_local_weight(succ_succ);
1295                                                         }
1296                                                 } else {
1297                                                         /* this should NOT happen */
1298                                                         return 0;
1299                                                 }
1300                                         }
1301                                 }
1302                         }
1303                         break;
1304                 default:
1305                         /* any other node: unsupported yet or bad. */
1306                         return 0;
1307                 }
1308         }
1309         return weight;
1310 }
1311
1312 /**
1313  * Calculate the parameter weights for transmitting the address of a local variable.
1314  */
1315 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg)
1316 {
1317         ir_entity *ent = get_irg_entity(irg);
1318         ir_type  *mtp;
1319         size_t   nparams;
1320         int      i;
1321         long     proj_nr;
1322         ir_node  *irg_args, *arg;
1323
1324         mtp      = get_entity_type(ent);
1325         nparams  = get_method_n_params(mtp);
1326
1327         /* allocate a new array. currently used as 'analysed' flag */
1328         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1329
1330         /* If the method haven't parameters we have nothing to do. */
1331         if (nparams <= 0)
1332                 return;
1333
1334         assure_irg_outs(irg);
1335         irg_args = get_irg_args(irg);
1336         for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
1337                 arg     = get_irn_out(irg_args, i);
1338                 proj_nr = get_Proj_proj(arg);
1339                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1340         }
1341 }
1342
1343 /**
1344  * Calculate the benefice for transmitting an local variable address.
1345  * After inlining, the local variable might be transformed into a
1346  * SSA variable by scalar_replacement().
1347  */
1348 static unsigned get_method_local_adress_weight(ir_graph *callee, size_t pos)
1349 {
1350         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
1351
1352         if (env->local_weights == NULL)
1353                 analyze_irg_local_weights(env, callee);
1354
1355         if (pos < ARR_LEN(env->local_weights))
1356                 return env->local_weights[pos];
1357         return 0;
1358 }
1359
1360 /**
1361  * Calculate a benefice value for inlining the given call.
1362  *
1363  * @param call       the call node we have to inspect
1364  * @param callee     the called graph
1365  */
1366 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
1367 {
1368         ir_node   *call = entry->call;
1369         ir_entity *ent  = get_irg_entity(callee);
1370         ir_type   *callee_frame;
1371         size_t    i, n_members, n_params;
1372         ir_node   *frame_ptr;
1373         ir_type   *mtp;
1374         int       weight = 0;
1375         int       all_const;
1376         unsigned  cc, v;
1377         irg_inline_property prop;
1378
1379         inline_irg_env *callee_env;
1380
1381         prop = get_irg_inline_property(callee);
1382         if (prop == irg_inline_forbidden) {
1383                 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
1384                     call, callee));
1385                 return entry->benefice = INT_MIN;
1386         }
1387
1388         callee_frame = get_irg_frame_type(callee);
1389         n_members = get_class_n_members(callee_frame);
1390         for (i = 0; i < n_members; ++i) {
1391                 ir_entity *frame_ent = get_class_member(callee_frame, i);
1392                 if (is_parameter_entity(frame_ent)) {
1393                         // TODO inliner should handle parameter entities by inserting Store operations
1394                         DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden due to parameter entity\n", call, callee));
1395                         set_irg_inline_property(callee, irg_inline_forbidden);
1396                         return entry->benefice = INT_MIN;
1397                 }
1398         }
1399
1400         if (get_irg_additional_properties(callee) & mtp_property_noreturn) {
1401                 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
1402                     call, callee));
1403                 return entry->benefice = INT_MIN;
1404         }
1405
1406         /* costs for every passed parameter */
1407         n_params = get_Call_n_params(call);
1408         mtp      = get_entity_type(ent);
1409         cc       = get_method_calling_convention(mtp);
1410         if (cc & cc_reg_param) {
1411                 /* register parameter, smaller costs for register parameters */
1412                 size_t max_regs = cc & ~cc_bits;
1413
1414                 if (max_regs < n_params)
1415                         weight += max_regs * 2 + (n_params - max_regs) * 5;
1416                 else
1417                         weight += n_params * 2;
1418         } else {
1419                 /* parameters are passed an stack */
1420                 weight += 5 * n_params;
1421         }
1422
1423         /* constant parameters improve the benefice */
1424         frame_ptr = get_irg_frame(current_ir_graph);
1425         all_const = 1;
1426         for (i = 0; i < n_params; ++i) {
1427                 ir_node *param = get_Call_param(call, i);
1428
1429                 if (is_Const(param)) {
1430                         weight += get_method_param_weight(ent, i);
1431                 } else {
1432                         all_const = 0;
1433                         if (is_SymConst(param))
1434                                 weight += get_method_param_weight(ent, i);
1435                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
1436                                 /*
1437                                  * An address of a local variable is transmitted. After
1438                                  * inlining, scalar_replacement might be able to remove the
1439                                  * local variable, so honor this.
1440                                  */
1441                                 v = get_method_local_adress_weight(callee, i);
1442                                 weight += v;
1443                                 if (v > 0)
1444                                         entry->local_adr = 1;
1445                         }
1446                 }
1447         }
1448         entry->all_const = all_const;
1449
1450         callee_env = (inline_irg_env*)get_irg_link(callee);
1451         if (callee_env->n_callers == 1 &&
1452             callee != current_ir_graph &&
1453             !entity_is_externally_visible(ent)) {
1454                 weight += 700;
1455         }
1456
1457         /* give a bonus for functions with one block */
1458         if (callee_env->n_blocks == 1)
1459                 weight = weight * 3 / 2;
1460
1461         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
1462         if (callee_env->n_nodes < 30 && !callee_env->recursive)
1463                 weight += 2000;
1464
1465         /* and finally for leaves: they do not increase the register pressure
1466            because of callee safe registers */
1467         if (callee_env->n_call_nodes == 0)
1468                 weight += 400;
1469
1470         /** it's important to inline inner loops first */
1471         if (entry->loop_depth > 30)
1472                 weight += 30 * 1024;
1473         else
1474                 weight += entry->loop_depth * 1024;
1475
1476         /*
1477          * All arguments constant is probably a good sign, give an extra bonus
1478          */
1479         if (all_const)
1480                 weight += 1024;
1481
1482         return entry->benefice = weight;
1483 }
1484
1485 typedef struct walk_env_t {
1486         ir_graph **irgs;
1487         size_t   last_irg;
1488 } walk_env_t;
1489
1490 /**
1491  * Callgraph walker, collect all visited graphs.
1492  */
1493 static void callgraph_walker(ir_graph *irg, void *data)
1494 {
1495         walk_env_t *env = (walk_env_t *)data;
1496         env->irgs[env->last_irg++] = irg;
1497 }
1498
1499 /**
1500  * Creates an inline order for all graphs.
1501  *
1502  * @return the list of graphs.
1503  */
1504 static ir_graph **create_irg_list(void)
1505 {
1506         ir_entity  **free_methods;
1507         size_t     n_irgs = get_irp_n_irgs();
1508         walk_env_t env;
1509
1510         cgana(&free_methods);
1511         xfree(free_methods);
1512
1513         compute_callgraph();
1514
1515         env.irgs     = XMALLOCNZ(ir_graph*, n_irgs);
1516         env.last_irg = 0;
1517
1518         callgraph_walk(NULL, callgraph_walker, &env);
1519         assert(n_irgs == env.last_irg);
1520
1521         return env.irgs;
1522 }
1523
1524 /**
1525  * Push a call onto the priority list if its benefice is big enough.
1526  *
1527  * @param pqueue   the priority queue of calls
1528  * @param call     the call entry
1529  * @param inlien_threshold
1530  *                 the threshold value
1531  */
1532 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
1533                             int inline_threshold)
1534 {
1535         ir_graph            *callee  = call->callee;
1536         irg_inline_property prop     = get_irg_inline_property(callee);
1537         int                 benefice = calc_inline_benefice(call, callee);
1538
1539         DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
1540             get_irn_irg(call->call), call->call, callee, benefice));
1541
1542         if (prop < irg_inline_forced && benefice < inline_threshold) {
1543                 return;
1544         }
1545
1546         pqueue_put(pqueue, call, benefice);
1547 }
1548
1549 /**
1550  * Try to inline calls into a graph.
1551  *
1552  * @param irg      the graph into which we inline
1553  * @param maxsize  do NOT inline if the size of irg gets
1554  *                 bigger than this amount
1555  * @param inline_threshold
1556  *                 threshold value for inline decision
1557  * @param copied_graphs
1558  *                 map containing copied of recursive graphs
1559  */
1560 static void inline_into(ir_graph *irg, unsigned maxsize,
1561                         int inline_threshold, pmap *copied_graphs)
1562 {
1563         int            phiproj_computed = 0;
1564         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
1565         call_entry     *curr_call;
1566         wenv_t         wenv;
1567         pqueue_t       *pqueue;
1568
1569         if (env->n_call_nodes == 0)
1570                 return;
1571
1572         if (env->n_nodes > maxsize) {
1573                 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
1574                 return;
1575         }
1576
1577         current_ir_graph = irg;
1578         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1579
1580         /* put irgs into the pqueue */
1581         pqueue = new_pqueue();
1582
1583         list_for_each_entry(call_entry, curr_call, &env->calls, list) {
1584                 assert(is_Call(curr_call->call));
1585                 maybe_push_call(pqueue, curr_call, inline_threshold);
1586         }
1587
1588         /* note that the list of possible calls is updated during the process */
1589         while (!pqueue_empty(pqueue)) {
1590                 int                 did_inline;
1591                 call_entry          *curr_call  = (call_entry*)pqueue_pop_front(pqueue);
1592                 ir_graph            *callee     = curr_call->callee;
1593                 ir_node             *call_node  = curr_call->call;
1594                 inline_irg_env      *callee_env = (inline_irg_env*)get_irg_link(callee);
1595                 irg_inline_property prop        = get_irg_inline_property(callee);
1596                 int                 loop_depth;
1597                 const call_entry    *centry;
1598                 pmap_entry          *e;
1599
1600                 if ((prop < irg_inline_forced) && env->n_nodes + callee_env->n_nodes > maxsize) {
1601                         DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
1602                                                 env->n_nodes, callee, callee_env->n_nodes));
1603                         continue;
1604                 }
1605
1606                 e = pmap_find(copied_graphs, callee);
1607                 if (e != NULL) {
1608                         int benefice = curr_call->benefice;
1609                         /*
1610                          * Reduce the weight for recursive function IFF not all arguments are const.
1611                          * inlining recursive functions is rarely good.
1612                          */
1613                         if (!curr_call->all_const)
1614                                 benefice -= 2000;
1615                         if (benefice < inline_threshold)
1616                                 continue;
1617
1618                         /*
1619                          * Remap callee if we have a copy.
1620                          */
1621                         callee     = (ir_graph*)e->value;
1622                         callee_env = (inline_irg_env*)get_irg_link(callee);
1623                 }
1624
1625                 if (current_ir_graph == callee) {
1626                         /*
1627                          * Recursive call: we cannot directly inline because we cannot
1628                          * walk the graph and change it. So we have to make a copy of
1629                          * the graph first.
1630                          */
1631                         int benefice = curr_call->benefice;
1632                         ir_graph *copy;
1633
1634                         /*
1635                          * Reduce the weight for recursive function IFF not all arguments are const.
1636                          * inlining recursive functions is rarely good.
1637                          */
1638                         if (!curr_call->all_const)
1639                                 benefice -= 2000;
1640                         if (benefice < inline_threshold)
1641                                 continue;
1642
1643                         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1644
1645                         /*
1646                          * No copy yet, create one.
1647                          * Note that recursive methods are never leaves, so it is
1648                          * sufficient to test this condition here.
1649                          */
1650                         copy = create_irg_copy(callee);
1651
1652                         /* create_irg_copy() destroys the Proj links, recompute them */
1653                         phiproj_computed = 0;
1654
1655                         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1656
1657                         /* allocate a new environment */
1658                         callee_env = alloc_inline_irg_env();
1659                         set_irg_link(copy, callee_env);
1660
1661                         assure_loopinfo(copy);
1662                         memset(&wenv, 0, sizeof(wenv));
1663                         wenv.x              = callee_env;
1664                         wenv.ignore_callers = 1;
1665                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1666
1667                         /*
1668                          * Enter the entity of the original graph. This is needed
1669                          * for inline_method(). However, note that ent->irg still points
1670                          * to callee, NOT to copy.
1671                          */
1672                         set_irg_entity(copy, get_irg_entity(callee));
1673
1674                         pmap_insert(copied_graphs, callee, copy);
1675                         callee = copy;
1676
1677                         /* we have only one caller: the original graph */
1678                         callee_env->n_callers      = 1;
1679                         callee_env->n_callers_orig = 1;
1680                 }
1681                 if (! phiproj_computed) {
1682                         phiproj_computed = 1;
1683                         collect_phiprojs(current_ir_graph);
1684                 }
1685                 did_inline = inline_method(call_node, callee);
1686                 if (!did_inline)
1687                         continue;
1688
1689                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1690                 phiproj_computed = 0;
1691
1692                 /* remove it from the caller list */
1693                 list_del(&curr_call->list);
1694
1695                 /* callee was inline. Append its call list. */
1696                 env->got_inline = 1;
1697                 --env->n_call_nodes;
1698
1699                 /* we just generate a bunch of new calls */
1700                 loop_depth = curr_call->loop_depth;
1701                 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1702                         inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1703                         ir_node        *new_call;
1704                         call_entry     *new_entry;
1705
1706                         /* after we have inlined callee, all called methods inside
1707                          * callee are now called once more */
1708                         ++penv->n_callers;
1709
1710                         /* Note that the src list points to Call nodes in the inlined graph,
1711                          * but we need Call nodes in our graph. Luckily the inliner leaves
1712                          * this information in the link field. */
1713                         new_call = (ir_node*)get_irn_link(centry->call);
1714                         if (get_irn_irg(new_call) != irg) {
1715                                 /* centry->call has not been copied, which means it is dead.
1716                                  * This might happen during inlining, if a const function,
1717                                  * which cannot be inlined is only used as an unused argument
1718                                  * of another function, which is inlined. */
1719                                 continue;
1720                         }
1721                         assert(is_Call(new_call));
1722
1723                         new_entry = duplicate_call_entry(centry, new_call, loop_depth);
1724                         list_add_tail(&new_entry->list, &env->calls);
1725                         maybe_push_call(pqueue, new_entry, inline_threshold);
1726                 }
1727
1728                 env->n_call_nodes += callee_env->n_call_nodes;
1729                 env->n_nodes += callee_env->n_nodes;
1730                 --callee_env->n_callers;
1731         }
1732         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1733         del_pqueue(pqueue);
1734 }
1735
1736 /*
1737  * Heuristic inliner. Calculates a benefice value for every call and inlines
1738  * those calls with a value higher than the threshold.
1739  */
1740 void inline_functions(unsigned maxsize, int inline_threshold,
1741                       opt_ptr after_inline_opt)
1742 {
1743         inline_irg_env   *env;
1744         size_t           i, n_irgs;
1745         ir_graph         *rem;
1746         wenv_t           wenv;
1747         pmap             *copied_graphs;
1748         pmap_entry       *pm_entry;
1749         ir_graph         **irgs;
1750
1751         rem = current_ir_graph;
1752         obstack_init(&temp_obst);
1753
1754         irgs = create_irg_list();
1755
1756         /* a map for the copied graphs, used to inline recursive calls */
1757         copied_graphs = pmap_create();
1758
1759         /* extend all irgs by a temporary data structure for inlining. */
1760         n_irgs = get_irp_n_irgs();
1761         for (i = 0; i < n_irgs; ++i)
1762                 set_irg_link(irgs[i], alloc_inline_irg_env());
1763
1764         /* Pre-compute information in temporary data structure. */
1765         wenv.ignore_runtime = 0;
1766         wenv.ignore_callers = 0;
1767         for (i = 0; i < n_irgs; ++i) {
1768                 ir_graph *irg = irgs[i];
1769
1770                 free_callee_info(irg);
1771
1772                 wenv.x = (inline_irg_env*)get_irg_link(irg);
1773                 assure_loopinfo(irg);
1774                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1775         }
1776
1777         /* -- and now inline. -- */
1778         for (i = 0; i < n_irgs; ++i) {
1779                 ir_graph *irg = irgs[i];
1780
1781                 inline_into(irg, maxsize, inline_threshold, copied_graphs);
1782         }
1783
1784         for (i = 0; i < n_irgs; ++i) {
1785                 ir_graph *irg = irgs[i];
1786
1787                 env = (inline_irg_env*)get_irg_link(irg);
1788                 if (env->got_inline && after_inline_opt != NULL) {
1789                         /* this irg got calls inlined: optimize it */
1790                         after_inline_opt(irg);
1791                 }
1792                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1793                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1794                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1795                         env->n_callers_orig, env->n_callers,
1796                         get_entity_name(get_irg_entity(irg))));
1797                 }
1798         }
1799
1800         /* kill the copied graphs: we don't need them anymore */
1801         foreach_pmap(copied_graphs, pm_entry) {
1802                 ir_graph *copy = (ir_graph*)pm_entry->value;
1803
1804                 /* reset the entity, otherwise it will be deleted in the next step ... */
1805                 set_irg_entity(copy, NULL);
1806                 free_ir_graph(copy);
1807         }
1808         pmap_destroy(copied_graphs);
1809
1810         xfree(irgs);
1811
1812         obstack_free(&temp_obst, NULL);
1813         current_ir_graph = rem;
1814 }
1815
1816 typedef struct inline_functions_pass_t {
1817         ir_prog_pass_t pass;
1818         unsigned       maxsize;
1819         int            inline_threshold;
1820         opt_ptr        after_inline_opt;
1821 } inline_functions_pass_t;
1822
1823 /**
1824  * Wrapper to run inline_functions() as a ir_prog pass.
1825  */
1826 static int inline_functions_wrapper(ir_prog *irp, void *context)
1827 {
1828         inline_functions_pass_t *pass = (inline_functions_pass_t*)context;
1829
1830         (void)irp;
1831         inline_functions(pass->maxsize, pass->inline_threshold,
1832                          pass->after_inline_opt);
1833         return 0;
1834 }
1835
1836 /* create a ir_prog pass for inline_functions */
1837 ir_prog_pass_t *inline_functions_pass(
1838           const char *name, unsigned maxsize, int inline_threshold,
1839           opt_ptr after_inline_opt)
1840 {
1841         inline_functions_pass_t *pass = XMALLOCZ(inline_functions_pass_t);
1842
1843         pass->maxsize          = maxsize;
1844         pass->inline_threshold = inline_threshold;
1845         pass->after_inline_opt = after_inline_opt;
1846
1847         return def_prog_pass_constructor(
1848                 &pass->pass, name ? name : "inline_functions",
1849                 inline_functions_wrapper);
1850 }
1851
1852 void firm_init_inline(void)
1853 {
1854         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
1855 }