0be240b7304f59da643b5ca2266cfeea444798d2
[libfirm] / ir / opt / opt_inline.c
1 /*
2  * Copyright (C) 1995-2011 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief    Dead node elimination and Procedure Inlining.
23  * @author   Michael Beck, Goetz Lindenmaier
24  * @version  $Id$
25  */
26 #include "config.h"
27
28 #include <limits.h>
29 #include <stdbool.h>
30 #include <assert.h>
31
32 #include "irnode_t.h"
33 #include "irgraph_t.h"
34 #include "irprog_t.h"
35
36 #include "iroptimize.h"
37 #include "ircons_t.h"
38 #include "iropt_t.h"
39 #include "irgopt.h"
40 #include "irgmod.h"
41 #include "irgwalk.h"
42
43 #include "array_t.h"
44 #include "list.h"
45 #include "pset.h"
46 #include "pmap.h"
47 #include "pdeq.h"
48 #include "xmalloc.h"
49 #include "pqueue.h"
50
51 #include "irouts.h"
52 #include "irloop_t.h"
53 #include "irbackedge_t.h"
54 #include "opt_init.h"
55 #include "cgana.h"
56 #include "trouts.h"
57 #include "error.h"
58
59 #include "analyze_irg_args.h"
60 #include "iredges_t.h"
61 #include "irflag_t.h"
62 #include "irhooks.h"
63 #include "irtools.h"
64 #include "iropt_dbg.h"
65 #include "irpass_t.h"
66 #include "irphase_t.h"
67
68 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
69
70 /*------------------------------------------------------------------*/
71 /* Routines for dead node elimination / copying garbage collection  */
72 /* of the obstack.                                                  */
73 /*------------------------------------------------------------------*/
74
75 /**
76  * Remember the new node in the old node by using a field all nodes have.
77  */
78 static void set_new_node(ir_node *node, ir_node *new_node)
79 {
80         set_irn_link(node, new_node);
81 }
82
83 /**
84  * Get this new node, before the old node is forgotten.
85  */
86 static inline ir_node *get_new_node(ir_node *old_node)
87 {
88         assert(irn_visited(old_node));
89         return (ir_node*) get_irn_link(old_node);
90 }
91
92 /*--------------------------------------------------------------------*/
93 /*  Functionality for inlining                                         */
94 /*--------------------------------------------------------------------*/
95
96 /**
97  * Copy node for inlineing.  Updates attributes that change when
98  * inlineing but not for dead node elimination.
99  *
100  * Copies the node by calling copy_node() and then updates the entity if
101  * it's a local one.  env must be a pointer of the frame type of the
102  * inlined procedure. The new entities must be in the link field of
103  * the entities.
104  */
105 static void copy_node_inline(ir_node *node, void *env)
106 {
107         ir_graph *new_irg  = (ir_graph*) env;
108         ir_node  *new_node = irn_copy_into_irg(node, new_irg);
109
110         set_new_node(node, new_node);
111         if (is_Sel(node)) {
112                 ir_graph  *old_irg        = get_irn_irg(node);
113                 ir_type   *old_frame_type = get_irg_frame_type(old_irg);
114                 ir_entity *old_entity     = get_Sel_entity(node);
115                 assert(is_Sel(new_node));
116                 /* use copied entities from the new frame */
117                 if (get_entity_owner(old_entity) == old_frame_type) {
118                         ir_entity *new_entity = (ir_entity*)get_entity_link(old_entity);
119                         assert(new_entity != NULL);
120                         set_Sel_entity(new_node, new_entity);
121                 }
122         } else if (is_Block(new_node)) {
123                 new_node->attr.block.irg.irg = new_irg;
124         }
125 }
126
127 static void set_preds_inline(ir_node *node, void *env)
128 {
129         ir_node *new_node;
130
131         irn_rewire_inputs(node);
132
133         /* move constants into start block */
134         new_node = get_new_node(node);
135         if (is_irn_constlike(new_node)) {
136                 ir_graph *new_irg     = (ir_graph *) env;
137                 ir_node  *start_block = get_irg_start_block(new_irg);
138                 set_nodes_block(new_node, start_block);
139         }
140 }
141
142 /**
143  * Walker: checks if P_value_arg_base is used.
144  */
145 static void find_addr(ir_node *node, void *env)
146 {
147         bool *allow_inline = (bool*)env;
148
149         if (is_Sel(node)) {
150                 ir_graph *irg = current_ir_graph;
151                 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
152                         /* access to frame */
153                         ir_entity *ent = get_Sel_entity(node);
154                         if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
155                                 /* access to value_type */
156                                 *allow_inline = false;
157                         }
158                 }
159         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
160                 /* From GCC:
161                  * Refuse to inline alloca call unless user explicitly forced so as this
162                  * may change program's memory overhead drastically when the function
163                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
164                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
165                  *
166                  * Sorrily this is true with our implementation also.
167                  * Moreover, we cannot differentiate between alloca() and VLA yet, so
168                  * this disables inlining of functions using VLA (which are completely
169                  * save).
170                  *
171                  * 2 Solutions:
172                  * - add a flag to the Alloc node for "real" alloca() calls
173                  * - add a new Stack-Restore node at the end of a function using
174                  *   alloca()
175                  */
176                 *allow_inline = false;
177         }
178 }
179
180 /**
181  * Check if we can inline a given call.
182  * Currently, we cannot inline two cases:
183  * - call with compound arguments
184  * - graphs that take the address of a parameter
185  *
186  * check these conditions here
187  */
188 static bool can_inline(ir_node *call, ir_graph *called_graph)
189 {
190         ir_entity          *called      = get_irg_entity(called_graph);
191         ir_type            *called_type = get_entity_type(called);
192         ir_type            *call_type   = get_Call_type(call);
193         size_t              n_params    = get_method_n_params(called_type);
194         size_t              n_arguments = get_method_n_params(call_type);
195         size_t              n_res       = get_method_n_ress(called_type);
196         irg_inline_property prop        = get_irg_inline_property(called_graph);
197         size_t              i;
198         bool                res;
199
200         if (prop == irg_inline_forbidden)
201                 return false;
202
203         if (n_arguments != n_params) {
204                 /* this is a bad feature of C: without a prototype, we can
205                  * call a function with less parameters than needed. Currently
206                  * we don't support this, although we could use Unknown than. */
207                 return false;
208         }
209         if (n_res != get_method_n_ress(call_type)) {
210                 return false;
211         }
212
213         /* Argh, compiling C has some bad consequences:
214          * It is implementation dependent what happens in that case.
215          * We support inlining, if the bitsize of the types matches AND
216          * the same arithmetic is used. */
217         for (i = 0; i < n_params; ++i) {
218                 ir_type *param_tp = get_method_param_type(called_type, i);
219                 ir_type *arg_tp   = get_method_param_type(call_type, i);
220
221                 if (param_tp != arg_tp) {
222                         ir_mode *pmode = get_type_mode(param_tp);
223                         ir_mode *amode = get_type_mode(arg_tp);
224
225                         if (pmode == NULL || amode == NULL)
226                                 return false;
227                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
228                                 return false;
229                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
230                                 return false;
231                         /* otherwise we can simply "reinterpret" the bits */
232                 }
233         }
234         for (i = 0; i < n_res; ++i) {
235                 ir_type *decl_res_tp = get_method_res_type(called_type, i);
236                 ir_type *used_res_tp = get_method_res_type(call_type, i);
237
238                 if (decl_res_tp != used_res_tp) {
239                         ir_mode *decl_mode = get_type_mode(decl_res_tp);
240                         ir_mode *used_mode = get_type_mode(used_res_tp);
241                         if (decl_mode == NULL || used_mode == NULL)
242                                 return false;
243                         if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
244                                 return false;
245                         if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
246                                 return false;
247                         /* otherwise we can "reinterpret" the bits */
248                 }
249         }
250
251         /* check parameters for compound arguments */
252         for (i = 0; i < n_params; ++i) {
253                 ir_type *p_type = get_method_param_type(call_type, i);
254
255                 if (is_compound_type(p_type))
256                         return false;
257         }
258
259         /* check results for compound arguments */
260         for (i = 0; i < n_res; ++i) {
261                 ir_type *r_type = get_method_res_type(call_type, i);
262
263                 if (is_compound_type(r_type))
264                         return false;
265         }
266
267         res = true;
268         irg_walk_graph(called_graph, find_addr, NULL, &res);
269
270         return res;
271 }
272
273 enum exc_mode {
274         exc_handler,    /**< There is a handler. */
275         exc_no_handler  /**< Exception handling not represented. */
276 };
277
278 /**
279  * copy all entities on the stack frame on 1 irg to the stackframe of another.
280  * Sets entity links of the old entities to the copies
281  */
282 static void copy_frame_entities(ir_graph *from, ir_graph *to)
283 {
284         ir_type *from_frame = get_irg_frame_type(from);
285         ir_type *to_frame   = get_irg_frame_type(to);
286         int      n_members  = get_class_n_members(from_frame);
287         int      i;
288         assert(from_frame != to_frame);
289
290         for (i = 0; i < n_members; ++i) {
291                 ir_entity *old_ent = get_class_member(from_frame, i);
292                 ir_entity *new_ent = copy_entity_own(old_ent, to_frame);
293                 set_entity_link(old_ent, new_ent);
294         }
295 }
296
297 /* Inlines a method at the given call site. */
298 int inline_method(ir_node *call, ir_graph *called_graph)
299 {
300         ir_node       *pre_call;
301         ir_node       *post_call, *post_bl;
302         ir_node       *in[pn_Start_max];
303         ir_node       *end, *end_bl, *block;
304         ir_node       **res_pred;
305         ir_node       **cf_pred;
306         ir_node       **args_in;
307         ir_node       *ret, *phi;
308         int           arity, n_ret, n_exc, n_res, i, j, rem_opt;
309         int           irn_arity, n_params;
310         int           n_mem_phi;
311         enum exc_mode exc_handling;
312         ir_type       *mtp;
313         ir_type       *ctp;
314         ir_entity     *ent;
315         ir_graph      *rem;
316         ir_graph      *irg = get_irn_irg(call);
317
318         /* we cannot inline some types of calls */
319         if (! can_inline(call, called_graph))
320                 return 0;
321
322         /* We cannot inline a recursive call. The graph must be copied before
323          * the call the inline_method() using create_irg_copy(). */
324         if (called_graph == irg)
325                 return 0;
326
327         ent      = get_irg_entity(called_graph);
328         mtp      = get_entity_type(ent);
329         ctp      = get_Call_type(call);
330         n_params = get_method_n_params(mtp);
331         n_res    = get_method_n_ress(mtp);
332
333         rem = current_ir_graph;
334         current_ir_graph = irg;
335
336         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
337
338         /* optimizations can cause problems when allocating new nodes */
339         rem_opt = get_opt_optimize();
340         set_optimize(0);
341
342         /* Handle graph state */
343         assert(get_irg_phase_state(irg) != phase_building);
344         assert(get_irg_pinned(irg) == op_pin_state_pinned);
345         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
346         set_irg_outs_inconsistent(irg);
347         set_irg_extblk_inconsistent(irg);
348         set_irg_doms_inconsistent(irg);
349         set_irg_loopinfo_inconsistent(irg);
350         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
351         set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
352         edges_deactivate(irg);
353
354         /* here we know we WILL inline, so inform the statistics */
355         hook_inline(call, called_graph);
356
357         /* -- Decide how to handle exception control flow: Is there a handler
358            for the Call node, or do we branch directly to End on an exception?
359            exc_handling:
360            0 There is a handler.
361            2 Exception handling not represented in Firm. -- */
362         {
363                 ir_node *Xproj = NULL;
364                 ir_node *proj;
365                 for (proj = (ir_node*)get_irn_link(call); proj != NULL;
366                      proj = (ir_node*)get_irn_link(proj)) {
367                         long proj_nr = get_Proj_proj(proj);
368                         if (proj_nr == pn_Call_X_except) Xproj = proj;
369                 }
370                 exc_handling = Xproj != NULL ? exc_handler : exc_no_handler;
371         }
372
373         /* create the argument tuple */
374         args_in = ALLOCAN(ir_node*, n_params);
375
376         block = get_nodes_block(call);
377         for (i = n_params - 1; i >= 0; --i) {
378                 ir_node *arg      = get_Call_param(call, i);
379                 ir_type *param_tp = get_method_param_type(mtp, i);
380                 ir_mode *mode     = get_type_mode(param_tp);
381
382                 if (mode != get_irn_mode(arg)) {
383                         arg = new_r_Conv(block, arg, mode);
384                 }
385                 args_in[i] = arg;
386         }
387
388         /* the procedure and later replaces the Start node of the called graph.
389          * Post_call is the old Call node and collects the results of the called
390          * graph. Both will end up being a tuple. */
391         post_bl = get_nodes_block(call);
392         /* XxMxPxPxPxT of Start + parameter of Call */
393         in[pn_Start_M]              = get_Call_mem(call);
394         in[pn_Start_X_initial_exec] = new_r_Jmp(post_bl);
395         in[pn_Start_P_frame_base]   = get_irg_frame(irg);
396         in[pn_Start_P_tls]          = get_irg_tls(irg);
397         in[pn_Start_T_args]         = new_r_Tuple(post_bl, n_params, args_in);
398         pre_call = new_r_Tuple(post_bl, pn_Start_max, in);
399         post_call = call;
400
401         /* --
402            The new block gets the ins of the old block, pre_call and all its
403            predecessors and all Phi nodes. -- */
404         part_block(pre_call);
405
406         /* increment visited flag for later walk */
407         inc_irg_visited(called_graph);
408
409         /* link some nodes to nodes in the current graph so instead of copying
410          * the linked nodes will get used.
411          * So the copier will use the created Tuple instead of copying the start
412          * node, similar for singleton nodes like NoMem and Bad.
413          * Note: this will prohibit predecessors to be copied - only do it for
414          *       nodes without predecessors */
415         {
416                 ir_node *start_block;
417                 ir_node *start;
418                 ir_node *bad;
419                 ir_node *nomem;
420
421                 start_block = get_irg_start_block(called_graph);
422                 set_new_node(start_block, get_nodes_block(pre_call));
423                 mark_irn_visited(start_block);
424
425                 start = get_irg_start(called_graph);
426                 set_new_node(start, pre_call);
427                 mark_irn_visited(start);
428
429                 bad = get_irg_bad(called_graph);
430                 set_new_node(bad, get_irg_bad(irg));
431                 mark_irn_visited(bad);
432
433                 nomem = get_irg_no_mem(called_graph);
434                 set_new_node(nomem, get_irg_no_mem(irg));
435                 mark_irn_visited(nomem);
436         }
437
438         /* entitiy link is used to link entities on old stackframe to the
439          * new stackframe */
440         irp_reserve_resources(irp, IR_RESOURCE_ENTITY_LINK);
441
442         /* copy entities and nodes */
443         assert(!irn_visited(get_irg_end(called_graph)));
444         copy_frame_entities(called_graph, irg);
445         irg_walk_core(get_irg_end(called_graph), copy_node_inline, set_preds_inline,
446                       irg);
447
448         irp_free_resources(irp, IR_RESOURCE_ENTITY_LINK);
449
450         /* -- Merge the end of the inlined procedure with the call site -- */
451         /* We will turn the old Call node into a Tuple with the following
452            predecessors:
453            -1:  Block of Tuple.
454            0: Phi of all Memories of Return statements.
455            1: Jmp from new Block that merges the control flow from all exception
456            predecessors of the old end block.
457            2: Tuple of all arguments.
458            3: Phi of Exception memories.
459            In case the old Call directly branches to End on an exception we don't
460            need the block merging all exceptions nor the Phi of the exception
461            memories.
462         */
463
464         /* Precompute some values */
465         end_bl = get_new_node(get_irg_end_block(called_graph));
466         end    = get_new_node(get_irg_end(called_graph));
467         arity  = get_Block_n_cfgpreds(end_bl);    /* arity = n_exc + n_ret  */
468         n_res  = get_method_n_ress(get_Call_type(call));
469
470         res_pred = XMALLOCN(ir_node*, n_res);
471         cf_pred  = XMALLOCN(ir_node*, arity);
472
473         /* archive keepalives */
474         irn_arity = get_irn_arity(end);
475         for (i = 0; i < irn_arity; i++) {
476                 ir_node *ka = get_End_keepalive(end, i);
477                 if (! is_Bad(ka))
478                         add_End_keepalive(get_irg_end(irg), ka);
479         }
480
481         /* replace Return nodes by Jump nodes */
482         n_ret = 0;
483         for (i = 0; i < arity; i++) {
484                 ir_node *ret;
485                 ret = get_Block_cfgpred(end_bl, i);
486                 if (is_Return(ret)) {
487                         ir_node *block = get_nodes_block(ret);
488                         cf_pred[n_ret] = new_r_Jmp(block);
489                         n_ret++;
490                 }
491         }
492         set_irn_in(post_bl, n_ret, cf_pred);
493
494         /* build a Tuple for all results of the method.
495          * add Phi node if there was more than one Return. */
496         turn_into_tuple(post_call, pn_Call_max);
497         /* First the Memory-Phi */
498         n_mem_phi = 0;
499         for (i = 0; i < arity; i++) {
500                 ret = get_Block_cfgpred(end_bl, i);
501                 if (is_Return(ret)) {
502                         cf_pred[n_mem_phi++] = get_Return_mem(ret);
503                 }
504                 /* memory output for some exceptions is directly connected to End */
505                 if (is_Call(ret)) {
506                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 3);
507                 } else if (is_fragile_op(ret)) {
508                         /* We rely that all cfops have the memory output at the same position. */
509                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 0);
510                 } else if (is_Raise(ret)) {
511                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 1);
512                 }
513         }
514         phi = new_r_Phi(post_bl, n_mem_phi, cf_pred, mode_M);
515         set_Tuple_pred(call, pn_Call_M, phi);
516         /* Conserve Phi-list for further inlinings -- but might be optimized */
517         if (get_nodes_block(phi) == post_bl) {
518                 set_irn_link(phi, get_irn_link(post_bl));
519                 set_irn_link(post_bl, phi);
520         }
521         /* Now the real results */
522         if (n_res > 0) {
523                 ir_node *result_tuple;
524                 for (j = 0; j < n_res; j++) {
525                         ir_type *res_type = get_method_res_type(ctp, j);
526                         ir_mode *res_mode = get_type_mode(res_type);
527                         n_ret = 0;
528                         for (i = 0; i < arity; i++) {
529                                 ret = get_Block_cfgpred(end_bl, i);
530                                 if (is_Return(ret)) {
531                                         ir_node *res = get_Return_res(ret, j);
532                                         if (get_irn_mode(res) != res_mode) {
533                                                 ir_node *block = get_nodes_block(res);
534                                                 res = new_r_Conv(block, res, res_mode);
535                                         }
536                                         cf_pred[n_ret] = res;
537                                         n_ret++;
538                                 }
539                         }
540                         if (n_ret > 0) {
541                                 ir_mode *mode = get_irn_mode(cf_pred[0]);
542                                 phi = new_r_Phi(post_bl, n_ret, cf_pred, mode);
543                         } else {
544                                 phi = new_r_Bad(irg);
545                         }
546                         res_pred[j] = phi;
547                         /* Conserve Phi-list for further inlinings -- but might be optimized */
548                         if (get_nodes_block(phi) == post_bl) {
549                                 set_Phi_next(phi, get_Block_phis(post_bl));
550                                 set_Block_phis(post_bl, phi);
551                         }
552                 }
553                 result_tuple = new_r_Tuple(post_bl, n_res, res_pred);
554                 set_Tuple_pred(call, pn_Call_T_result, result_tuple);
555         } else {
556                 set_Tuple_pred(call, pn_Call_T_result, new_r_Bad(irg));
557         }
558         /* handle the regular call */
559         set_Tuple_pred(call, pn_Call_X_regular, new_r_Jmp(post_bl));
560
561         /* For now, we cannot inline calls with value_base */
562         set_Tuple_pred(call, pn_Call_P_value_res_base, new_r_Bad(irg));
563
564         /* Finally the exception control flow.
565            We have two possible situations:
566            First if the Call branches to an exception handler:
567            We need to add a Phi node to
568            collect the memory containing the exception objects.  Further we need
569            to add another block to get a correct representation of this Phi.  To
570            this block we add a Jmp that resolves into the X output of the Call
571            when the Call is turned into a tuple.
572            Second: There is no exception edge. Just add all inlined exception
573            branches to the End node.
574          */
575         if (exc_handling == exc_handler) {
576                 n_exc = 0;
577                 for (i = 0; i < arity; i++) {
578                         ir_node *ret, *irn;
579                         ret = get_Block_cfgpred(end_bl, i);
580                         irn = skip_Proj(ret);
581                         if (is_fragile_op(irn) || is_Raise(irn)) {
582                                 cf_pred[n_exc] = ret;
583                                 ++n_exc;
584                         }
585                 }
586                 if (n_exc > 0) {
587                         if (n_exc == 1) {
588                                 /* simple fix */
589                                 set_Tuple_pred(call, pn_Call_X_except, cf_pred[0]);
590                         } else {
591                                 ir_node *block = new_r_Block(irg, n_exc, cf_pred);
592                                 set_Tuple_pred(call, pn_Call_X_except, new_r_Jmp(block));
593                         }
594                 } else {
595                         set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg));
596                 }
597         } else {
598                 ir_node *main_end_bl;
599                 int main_end_bl_arity;
600                 ir_node **end_preds;
601
602                 /* assert(exc_handling == 1 || no exceptions. ) */
603                 n_exc = 0;
604                 for (i = 0; i < arity; i++) {
605                         ir_node *ret = get_Block_cfgpred(end_bl, i);
606                         ir_node *irn = skip_Proj(ret);
607
608                         if (is_fragile_op(irn) || is_Raise(irn)) {
609                                 cf_pred[n_exc] = ret;
610                                 n_exc++;
611                         }
612                 }
613                 main_end_bl       = get_irg_end_block(irg);
614                 main_end_bl_arity = get_irn_arity(main_end_bl);
615                 end_preds         = XMALLOCN(ir_node*, n_exc + main_end_bl_arity);
616
617                 for (i = 0; i < main_end_bl_arity; ++i)
618                         end_preds[i] = get_irn_n(main_end_bl, i);
619                 for (i = 0; i < n_exc; ++i)
620                         end_preds[main_end_bl_arity + i] = cf_pred[i];
621                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
622                 set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg));
623                 free(end_preds);
624         }
625         free(res_pred);
626         free(cf_pred);
627
628         /* --  Turn CSE back on. -- */
629         set_optimize(rem_opt);
630         current_ir_graph = rem;
631
632         return 1;
633 }
634
635 /********************************************************************/
636 /* Apply inlining to small methods.                                 */
637 /********************************************************************/
638
639 static struct obstack  temp_obst;
640
641 /** Represents a possible inlinable call in a graph. */
642 typedef struct call_entry {
643         ir_node    *call;       /**< The Call node. */
644         ir_graph   *callee;     /**< The callee IR-graph. */
645         list_head  list;        /**< List head for linking the next one. */
646         int        loop_depth;  /**< The loop depth of this call. */
647         int        benefice;    /**< The calculated benefice of this call. */
648         unsigned   local_adr:1; /**< Set if this call gets an address of a local variable. */
649         unsigned   all_const:1; /**< Set if this call has only constant parameters. */
650 } call_entry;
651
652 /**
653  * environment for inlining small irgs
654  */
655 typedef struct inline_env_t {
656         struct obstack obst;  /**< An obstack where call_entries are allocated on. */
657         list_head      calls; /**< The call entry list. */
658 } inline_env_t;
659
660 /**
661  * Returns the irg called from a Call node. If the irg is not
662  * known, NULL is returned.
663  *
664  * @param call  the call node
665  */
666 static ir_graph *get_call_called_irg(ir_node *call)
667 {
668         ir_node *addr;
669
670         addr = get_Call_ptr(call);
671         if (is_Global(addr)) {
672                 ir_entity *ent = get_Global_entity(addr);
673                 /* we don't know which function gets finally bound to a weak symbol */
674                 if (get_entity_linkage(ent) & IR_LINKAGE_WEAK)
675                         return NULL;
676
677                 return get_entity_irg(ent);
678         }
679
680         return NULL;
681 }
682
683 /**
684  * Walker: Collect all calls to known graphs inside a graph.
685  */
686 static void collect_calls(ir_node *call, void *env)
687 {
688         (void) env;
689         if (is_Call(call)) {
690                 ir_graph *called_irg = get_call_called_irg(call);
691
692                 if (called_irg != NULL) {
693                         /* The Call node calls a locally defined method.  Remember to inline. */
694                         inline_env_t *ienv  = (inline_env_t*)env;
695                         call_entry   *entry = OALLOC(&ienv->obst, call_entry);
696                         entry->call       = call;
697                         entry->callee     = called_irg;
698                         entry->loop_depth = 0;
699                         entry->benefice   = 0;
700                         entry->local_adr  = 0;
701                         entry->all_const  = 0;
702
703                         list_add_tail(&entry->list, &ienv->calls);
704                 }
705         }
706 }
707
708 /**
709  * Inlines all small methods at call sites where the called address comes
710  * from a Const node that references the entity representing the called
711  * method.
712  * The size argument is a rough measure for the code size of the method:
713  * Methods where the obstack containing the firm graph is smaller than
714  * size are inlined.
715  */
716 void inline_small_irgs(ir_graph *irg, int size)
717 {
718         ir_graph *rem = current_ir_graph;
719         inline_env_t env;
720         call_entry *entry;
721
722         current_ir_graph = irg;
723         /* Handle graph state */
724         assert(get_irg_phase_state(irg) != phase_building);
725         free_callee_info(irg);
726
727         /* Find Call nodes to inline.
728            (We can not inline during a walk of the graph, as inlining the same
729            method several times changes the visited flag of the walked graph:
730            after the first inlining visited of the callee equals visited of
731            the caller.  With the next inlining both are increased.) */
732         obstack_init(&env.obst);
733         INIT_LIST_HEAD(&env.calls);
734         irg_walk_graph(irg, NULL, collect_calls, &env);
735
736         if (! list_empty(&env.calls)) {
737                 /* There are calls to inline */
738                 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
739                 collect_phiprojs(irg);
740
741                 list_for_each_entry(call_entry, entry, &env.calls, list) {
742                         ir_graph            *callee = entry->callee;
743                         irg_inline_property prop    = get_irg_inline_property(callee);
744
745                         if (prop == irg_inline_forbidden) {
746                                 continue;
747                         }
748
749                         if (prop >= irg_inline_forced ||
750                             _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
751                                 inline_method(entry->call, callee);
752                         }
753                 }
754                 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
755         }
756         obstack_free(&env.obst, NULL);
757         current_ir_graph = rem;
758 }
759
760 typedef struct inline_small_irgs_pass_t {
761         ir_graph_pass_t pass;
762         int            size;
763 } inline_small_irgs_pass_t;
764
765 /**
766  * Wrapper to run inline_small_irgs() as a pass.
767  */
768 static int inline_small_irgs_wrapper(ir_graph *irg, void *context)
769 {
770         inline_small_irgs_pass_t *pass = (inline_small_irgs_pass_t*)context;
771
772         inline_small_irgs(irg, pass->size);
773         return 0;
774 }
775
776 /* create a pass for inline_small_irgs() */
777 ir_graph_pass_t *inline_small_irgs_pass(const char *name, int size)
778 {
779         inline_small_irgs_pass_t *pass = XMALLOCZ(inline_small_irgs_pass_t);
780
781         pass->size = size;
782         return def_graph_pass_constructor(
783                 &pass->pass, name ? name : "inline_small_irgs", inline_small_irgs_wrapper);
784 }
785
786 /**
787  * Environment for inlining irgs.
788  */
789 typedef struct {
790         list_head calls;             /**< List of of all call nodes in this graph. */
791         unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
792         unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
793         unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
794         unsigned  n_nodes_orig;      /**< for statistics */
795         unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
796         unsigned  n_call_nodes_orig; /**< for statistics */
797         unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
798         unsigned  n_callers_orig;    /**< for statistics */
799         unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
800         unsigned  recursive:1;       /**< Set, if this function is self recursive. */
801 } inline_irg_env;
802
803 /**
804  * Allocate a new environment for inlining.
805  */
806 static inline_irg_env *alloc_inline_irg_env(void)
807 {
808         inline_irg_env *env    = OALLOC(&temp_obst, inline_irg_env);
809         INIT_LIST_HEAD(&env->calls);
810         env->local_weights     = NULL;
811         env->n_nodes           = -2; /* do not count count Start, End */
812         env->n_blocks          = -2; /* do not count count Start, End Block */
813         env->n_nodes_orig      = -2; /* do not count Start, End */
814         env->n_call_nodes      = 0;
815         env->n_call_nodes_orig = 0;
816         env->n_callers         = 0;
817         env->n_callers_orig    = 0;
818         env->got_inline        = 0;
819         env->recursive         = 0;
820         return env;
821 }
822
823 typedef struct walker_env {
824         inline_irg_env *x;     /**< the inline environment */
825         char ignore_runtime;   /**< the ignore runtime flag */
826         char ignore_callers;   /**< if set, do change callers data */
827 } wenv_t;
828
829 /**
830  * post-walker: collect all calls in the inline-environment
831  * of a graph and sum some statistics.
832  */
833 static void collect_calls2(ir_node *call, void *ctx)
834 {
835         wenv_t         *env = (wenv_t*)ctx;
836         inline_irg_env *x = env->x;
837         unsigned        code = get_irn_opcode(call);
838         ir_graph       *callee;
839         call_entry     *entry;
840
841         /* count meaningful nodes in irg */
842         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
843                 if (code != iro_Block) {
844                         ++x->n_nodes;
845                         ++x->n_nodes_orig;
846                 } else {
847                         ++x->n_blocks;
848                 }
849         }
850
851         if (code != iro_Call) return;
852
853         /* check, if it's a runtime call */
854         if (env->ignore_runtime) {
855                 ir_node *symc = get_Call_ptr(call);
856
857                 if (is_Global(symc)) {
858                         ir_entity *ent = get_Global_entity(symc);
859
860                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
861                                 return;
862                 }
863         }
864
865         /* collect all call nodes */
866         ++x->n_call_nodes;
867         ++x->n_call_nodes_orig;
868
869         callee = get_call_called_irg(call);
870         if (callee != NULL) {
871                 if (! env->ignore_callers) {
872                         inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
873                         /* count all static callers */
874                         ++callee_env->n_callers;
875                         ++callee_env->n_callers_orig;
876                 }
877                 if (callee == current_ir_graph)
878                         x->recursive = 1;
879
880                 /* link it in the list of possible inlinable entries */
881                 entry = OALLOC(&temp_obst, call_entry);
882                 entry->call       = call;
883                 entry->callee     = callee;
884                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
885                 entry->benefice   = 0;
886                 entry->local_adr  = 0;
887                 entry->all_const  = 0;
888
889                 list_add_tail(&entry->list, &x->calls);
890         }
891 }
892
893 /**
894  * Returns TRUE if the number of callers is 0 in the irg's environment,
895  * hence this irg is a leave.
896  */
897 inline static int is_leave(ir_graph *irg)
898 {
899         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
900         return env->n_call_nodes == 0;
901 }
902
903 /**
904  * Returns TRUE if the number of nodes in the callee is
905  * smaller then size in the irg's environment.
906  */
907 inline static int is_smaller(ir_graph *callee, unsigned size)
908 {
909         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
910         return env->n_nodes < size;
911 }
912
913 /**
914  * Duplicate a call entry.
915  *
916  * @param entry     the original entry to duplicate
917  * @param new_call  the new call node
918  * @param loop_depth_delta
919  *                  delta value for the loop depth
920  */
921 static call_entry *duplicate_call_entry(const call_entry *entry,
922                                         ir_node *new_call, int loop_depth_delta)
923 {
924         call_entry *nentry = OALLOC(&temp_obst, call_entry);
925         nentry->call       = new_call;
926         nentry->callee     = entry->callee;
927         nentry->benefice   = entry->benefice;
928         nentry->loop_depth = entry->loop_depth + loop_depth_delta;
929         nentry->local_adr  = entry->local_adr;
930         nentry->all_const  = entry->all_const;
931
932         return nentry;
933 }
934
935 /**
936  * Append all call nodes of the source environment to the nodes of in the destination
937  * environment.
938  *
939  * @param dst         destination environment
940  * @param src         source environment
941  * @param loop_depth  the loop depth of the call that is replaced by the src list
942  */
943 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth)
944 {
945         call_entry *entry, *nentry;
946
947         /* Note that the src list points to Call nodes in the inlined graph, but
948            we need Call nodes in our graph. Luckily the inliner leaves this information
949            in the link field. */
950         list_for_each_entry(call_entry, entry, &src->calls, list) {
951                 nentry = duplicate_call_entry(entry, (ir_node*)get_irn_link(entry->call), loop_depth);
952                 list_add_tail(&nentry->list, &dst->calls);
953         }
954         dst->n_call_nodes += src->n_call_nodes;
955         dst->n_nodes      += src->n_nodes;
956 }
957
958 /*
959  * Inlines small leave methods at call sites where the called address comes
960  * from a Const node that references the entity representing the called
961  * method.
962  * The size argument is a rough measure for the code size of the method:
963  * Methods where the obstack containing the firm graph is smaller than
964  * size are inlined.
965  */
966 void inline_leave_functions(unsigned maxsize, unsigned leavesize,
967                             unsigned size, int ignore_runtime)
968 {
969         inline_irg_env   *env;
970         ir_graph         *irg;
971         size_t           i, n_irgs;
972         ir_graph         *rem;
973         int              did_inline;
974         wenv_t           wenv;
975         call_entry       *entry, *next;
976         const call_entry *centry;
977         pmap             *copied_graphs;
978         pmap_entry       *pm_entry;
979
980         rem = current_ir_graph;
981         obstack_init(&temp_obst);
982
983         /* a map for the copied graphs, used to inline recursive calls */
984         copied_graphs = pmap_create();
985
986         /* extend all irgs by a temporary data structure for inlining. */
987         n_irgs = get_irp_n_irgs();
988         for (i = 0; i < n_irgs; ++i)
989                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
990
991         /* Pre-compute information in temporary data structure. */
992         wenv.ignore_runtime = ignore_runtime;
993         wenv.ignore_callers = 0;
994         for (i = 0; i < n_irgs; ++i) {
995                 ir_graph *irg = get_irp_irg(i);
996
997                 assert(get_irg_phase_state(irg) != phase_building);
998                 free_callee_info(irg);
999
1000                 assure_cf_loop(irg);
1001                 wenv.x = (inline_irg_env*)get_irg_link(irg);
1002                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1003         }
1004
1005         /* -- and now inline. -- */
1006
1007         /* Inline leaves recursively -- we might construct new leaves. */
1008         do {
1009                 did_inline = 0;
1010
1011                 for (i = 0; i < n_irgs; ++i) {
1012                         ir_node *call;
1013                         int phiproj_computed = 0;
1014
1015                         current_ir_graph = get_irp_irg(i);
1016                         env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1017
1018                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1019                         list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1020                                 ir_graph            *callee;
1021                                 irg_inline_property  prop;
1022
1023                                 if (env->n_nodes > maxsize)
1024                                         break;
1025
1026                                 call   = entry->call;
1027                                 callee = entry->callee;
1028
1029                                 prop = get_irg_inline_property(callee);
1030                                 if (prop == irg_inline_forbidden) {
1031                                         continue;
1032                                 }
1033
1034                                 if (is_leave(callee) && (
1035                                     is_smaller(callee, leavesize) || prop >= irg_inline_forced)) {
1036                                         if (!phiproj_computed) {
1037                                                 phiproj_computed = 1;
1038                                                 collect_phiprojs(current_ir_graph);
1039                                         }
1040                                         did_inline = inline_method(call, callee);
1041
1042                                         if (did_inline) {
1043                                                 inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
1044
1045                                                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1046                                                 phiproj_computed = 0;
1047
1048                                                 /* Do some statistics */
1049                                                 env->got_inline = 1;
1050                                                 --env->n_call_nodes;
1051                                                 env->n_nodes += callee_env->n_nodes;
1052                                                 --callee_env->n_callers;
1053
1054                                                 /* remove this call from the list */
1055                                                 list_del(&entry->list);
1056                                                 continue;
1057                                         }
1058                                 }
1059                         }
1060                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1061                 }
1062         } while (did_inline);
1063
1064         /* inline other small functions. */
1065         for (i = 0; i < n_irgs; ++i) {
1066                 ir_node *call;
1067                 int phiproj_computed = 0;
1068
1069                 current_ir_graph = get_irp_irg(i);
1070                 env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1071
1072                 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1073
1074                 /* note that the list of possible calls is updated during the process */
1075                 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1076                         irg_inline_property prop;
1077                         ir_graph            *callee;
1078                         pmap_entry          *e;
1079
1080                         call   = entry->call;
1081                         callee = entry->callee;
1082
1083                         prop = get_irg_inline_property(callee);
1084                         if (prop == irg_inline_forbidden) {
1085                                 continue;
1086                         }
1087
1088                         e = pmap_find(copied_graphs, callee);
1089                         if (e != NULL) {
1090                                 /*
1091                                  * Remap callee if we have a copy.
1092                                  * FIXME: Should we do this only for recursive Calls ?
1093                                  */
1094                                 callee = (ir_graph*)e->value;
1095                         }
1096
1097                         if (prop >= irg_inline_forced ||
1098                             (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1099                                 if (current_ir_graph == callee) {
1100                                         /*
1101                                          * Recursive call: we cannot directly inline because we cannot walk
1102                                          * the graph and change it. So we have to make a copy of the graph
1103                                          * first.
1104                                          */
1105
1106                                         inline_irg_env *callee_env;
1107                                         ir_graph       *copy;
1108
1109                                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1110
1111                                         /*
1112                                          * No copy yet, create one.
1113                                          * Note that recursive methods are never leaves, so it is sufficient
1114                                          * to test this condition here.
1115                                          */
1116                                         copy = create_irg_copy(callee);
1117
1118                                         /* create_irg_copy() destroys the Proj links, recompute them */
1119                                         phiproj_computed = 0;
1120
1121                                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1122
1123                                         /* allocate new environment */
1124                                         callee_env = alloc_inline_irg_env();
1125                                         set_irg_link(copy, callee_env);
1126
1127                                         assure_cf_loop(copy);
1128                                         wenv.x              = callee_env;
1129                                         wenv.ignore_callers = 1;
1130                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1131
1132                                         /*
1133                                          * Enter the entity of the original graph. This is needed
1134                                          * for inline_method(). However, note that ent->irg still points
1135                                          * to callee, NOT to copy.
1136                                          */
1137                                         set_irg_entity(copy, get_irg_entity(callee));
1138
1139                                         pmap_insert(copied_graphs, callee, copy);
1140                                         callee = copy;
1141
1142                                         /* we have only one caller: the original graph */
1143                                         callee_env->n_callers      = 1;
1144                                         callee_env->n_callers_orig = 1;
1145                                 }
1146                                 if (! phiproj_computed) {
1147                                         phiproj_computed = 1;
1148                                         collect_phiprojs(current_ir_graph);
1149                                 }
1150                                 did_inline = inline_method(call, callee);
1151                                 if (did_inline) {
1152                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1153
1154                                         /* call was inlined, Phi/Projs for current graph must be recomputed */
1155                                         phiproj_computed = 0;
1156
1157                                         /* callee was inline. Append it's call list. */
1158                                         env->got_inline = 1;
1159                                         --env->n_call_nodes;
1160                                         append_call_list(env, callee_env, entry->loop_depth);
1161                                         --callee_env->n_callers;
1162
1163                                         /* after we have inlined callee, all called methods inside callee
1164                                            are now called once more */
1165                                         list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1166                                                 inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1167                                                 ++penv->n_callers;
1168                                         }
1169
1170                                         /* remove this call from the list */
1171                                         list_del(&entry->list);
1172                                         continue;
1173                                 }
1174                         }
1175                 }
1176                 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1177         }
1178
1179         for (i = 0; i < n_irgs; ++i) {
1180                 irg = get_irp_irg(i);
1181                 env = (inline_irg_env*)get_irg_link(irg);
1182
1183                 if (env->got_inline) {
1184                         optimize_graph_df(irg);
1185                         optimize_cf(irg);
1186                 }
1187                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1188                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1189                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1190                         env->n_callers_orig, env->n_callers,
1191                         get_entity_name(get_irg_entity(irg))));
1192                 }
1193         }
1194
1195         /* kill the copied graphs: we don't need them anymore */
1196         foreach_pmap(copied_graphs, pm_entry) {
1197                 ir_graph *copy = (ir_graph*)pm_entry->value;
1198
1199                 /* reset the entity, otherwise it will be deleted in the next step ... */
1200                 set_irg_entity(copy, NULL);
1201                 free_ir_graph(copy);
1202         }
1203         pmap_destroy(copied_graphs);
1204
1205         obstack_free(&temp_obst, NULL);
1206         current_ir_graph = rem;
1207 }
1208
1209 typedef struct inline_leave_functions_pass_t {
1210         ir_prog_pass_t pass;
1211         unsigned       maxsize;
1212         unsigned       leavesize;
1213         unsigned       size;
1214         int            ignore_runtime;
1215 } inline_leave_functions_pass_t;
1216
1217 /**
1218  * Wrapper to run inline_leave_functions() as a ir_prog pass.
1219  */
1220 static int inline_leave_functions_wrapper(ir_prog *irp, void *context)
1221 {
1222         inline_leave_functions_pass_t *pass = (inline_leave_functions_pass_t*)context;
1223
1224         (void)irp;
1225         inline_leave_functions(
1226                 pass->maxsize, pass->leavesize,
1227                 pass->size, pass->ignore_runtime);
1228         return 0;
1229 }
1230
1231 /* create a pass for inline_leave_functions() */
1232 ir_prog_pass_t *inline_leave_functions_pass(
1233         const char *name, unsigned maxsize, unsigned leavesize,
1234         unsigned size, int ignore_runtime)
1235 {
1236         inline_leave_functions_pass_t *pass = XMALLOCZ(inline_leave_functions_pass_t);
1237
1238         pass->maxsize        = maxsize;
1239         pass->leavesize      = leavesize;
1240         pass->size           = size;
1241         pass->ignore_runtime = ignore_runtime;
1242
1243         return def_prog_pass_constructor(
1244                 &pass->pass,
1245                 name ? name : "inline_leave_functions",
1246                 inline_leave_functions_wrapper);
1247 }
1248
1249 /**
1250  * Calculate the parameter weights for transmitting the address of a local variable.
1251  */
1252 static unsigned calc_method_local_weight(ir_node *arg)
1253 {
1254         int      i, j, k;
1255         unsigned v, weight = 0;
1256
1257         for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
1258                 ir_node *succ = get_irn_out(arg, i);
1259
1260                 switch (get_irn_opcode(succ)) {
1261                 case iro_Load:
1262                 case iro_Store:
1263                         /* Loads and Store can be removed */
1264                         weight += 3;
1265                         break;
1266                 case iro_Sel:
1267                         /* check if all args are constant */
1268                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1269                                 ir_node *idx = get_Sel_index(succ, j);
1270                                 if (! is_Const(idx))
1271                                         return 0;
1272                         }
1273                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1274                            some unsupported node. */
1275                         v = calc_method_local_weight(succ);
1276                         if (v == 0)
1277                                 return 0;
1278                         /* we can kill one Sel with constant indexes, this is cheap */
1279                         weight += v + 1;
1280                         break;
1281                 case iro_Id:
1282                         /* when looking backward we might find Id nodes */
1283                         weight += calc_method_local_weight(succ);
1284                         break;
1285                 case iro_Tuple:
1286                         /* unoptimized tuple */
1287                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1288                                 ir_node *pred = get_Tuple_pred(succ, j);
1289                                 if (pred == arg) {
1290                                         /* look for Proj(j) */
1291                                         for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
1292                                                 ir_node *succ_succ = get_irn_out(succ, k);
1293                                                 if (is_Proj(succ_succ)) {
1294                                                         if (get_Proj_proj(succ_succ) == j) {
1295                                                                 /* found */
1296                                                                 weight += calc_method_local_weight(succ_succ);
1297                                                         }
1298                                                 } else {
1299                                                         /* this should NOT happen */
1300                                                         return 0;
1301                                                 }
1302                                         }
1303                                 }
1304                         }
1305                         break;
1306                 default:
1307                         /* any other node: unsupported yet or bad. */
1308                         return 0;
1309                 }
1310         }
1311         return weight;
1312 }
1313
1314 /**
1315  * Calculate the parameter weights for transmitting the address of a local variable.
1316  */
1317 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg)
1318 {
1319         ir_entity *ent = get_irg_entity(irg);
1320         ir_type  *mtp;
1321         int      nparams, i, proj_nr;
1322         ir_node  *irg_args, *arg;
1323
1324         mtp      = get_entity_type(ent);
1325         nparams  = get_method_n_params(mtp);
1326
1327         /* allocate a new array. currently used as 'analysed' flag */
1328         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1329
1330         /* If the method haven't parameters we have nothing to do. */
1331         if (nparams <= 0)
1332                 return;
1333
1334         assure_irg_outs(irg);
1335         irg_args = get_irg_args(irg);
1336         for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
1337                 arg     = get_irn_out(irg_args, i);
1338                 proj_nr = get_Proj_proj(arg);
1339                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1340         }
1341 }
1342
1343 /**
1344  * Calculate the benefice for transmitting an local variable address.
1345  * After inlining, the local variable might be transformed into a
1346  * SSA variable by scalar_replacement().
1347  */
1348 static unsigned get_method_local_adress_weight(ir_graph *callee, size_t pos)
1349 {
1350         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
1351
1352         if (env->local_weights == NULL)
1353                 analyze_irg_local_weights(env, callee);
1354
1355         if (pos < ARR_LEN(env->local_weights))
1356                 return env->local_weights[pos];
1357         return 0;
1358 }
1359
1360 /**
1361  * Calculate a benefice value for inlining the given call.
1362  *
1363  * @param call       the call node we have to inspect
1364  * @param callee     the called graph
1365  */
1366 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
1367 {
1368         ir_node   *call = entry->call;
1369         ir_entity *ent  = get_irg_entity(callee);
1370         ir_node   *frame_ptr;
1371         ir_type   *mtp;
1372         int       weight = 0;
1373         int       i, n_params, all_const;
1374         unsigned  cc, v;
1375         irg_inline_property prop;
1376
1377         inline_irg_env *callee_env;
1378
1379         prop = get_irg_inline_property(callee);
1380         if (prop == irg_inline_forbidden) {
1381                 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
1382                     call, callee));
1383                 return entry->benefice = INT_MIN;
1384         }
1385
1386         if (get_irg_additional_properties(callee) & mtp_property_noreturn) {
1387                 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
1388                     call, callee));
1389                 return entry->benefice = INT_MIN;
1390         }
1391
1392         /* costs for every passed parameter */
1393         n_params = get_Call_n_params(call);
1394         mtp      = get_entity_type(ent);
1395         cc       = get_method_calling_convention(mtp);
1396         if (cc & cc_reg_param) {
1397                 /* register parameter, smaller costs for register parameters */
1398                 int max_regs = cc & ~cc_bits;
1399
1400                 if (max_regs < n_params)
1401                         weight += max_regs * 2 + (n_params - max_regs) * 5;
1402                 else
1403                         weight += n_params * 2;
1404         } else {
1405                 /* parameters are passed an stack */
1406                 weight += 5 * n_params;
1407         }
1408
1409         /* constant parameters improve the benefice */
1410         frame_ptr = get_irg_frame(current_ir_graph);
1411         all_const = 1;
1412         for (i = 0; i < n_params; ++i) {
1413                 ir_node *param = get_Call_param(call, i);
1414
1415                 if (is_Const(param)) {
1416                         weight += get_method_param_weight(ent, i);
1417                 } else {
1418                         all_const = 0;
1419                         if (is_SymConst(param))
1420                                 weight += get_method_param_weight(ent, i);
1421                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
1422                                 /*
1423                                  * An address of a local variable is transmitted. After
1424                                  * inlining, scalar_replacement might be able to remove the
1425                                  * local variable, so honor this.
1426                                  */
1427                                 v = get_method_local_adress_weight(callee, i);
1428                                 weight += v;
1429                                 if (v > 0)
1430                                         entry->local_adr = 1;
1431                         }
1432                 }
1433         }
1434         entry->all_const = all_const;
1435
1436         callee_env = (inline_irg_env*)get_irg_link(callee);
1437         if (callee_env->n_callers == 1 &&
1438             callee != current_ir_graph &&
1439             !entity_is_externally_visible(ent)) {
1440                 weight += 700;
1441         }
1442
1443         /* give a bonus for functions with one block */
1444         if (callee_env->n_blocks == 1)
1445                 weight = weight * 3 / 2;
1446
1447         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
1448         if (callee_env->n_nodes < 30 && !callee_env->recursive)
1449                 weight += 2000;
1450
1451         /* and finally for leaves: they do not increase the register pressure
1452            because of callee safe registers */
1453         if (callee_env->n_call_nodes == 0)
1454                 weight += 400;
1455
1456         /** it's important to inline inner loops first */
1457         if (entry->loop_depth > 30)
1458                 weight += 30 * 1024;
1459         else
1460                 weight += entry->loop_depth * 1024;
1461
1462         /*
1463          * All arguments constant is probably a good sign, give an extra bonus
1464          */
1465         if (all_const)
1466                 weight += 1024;
1467
1468         return entry->benefice = weight;
1469 }
1470
1471 typedef struct walk_env_t {
1472         ir_graph **irgs;
1473         size_t   last_irg;
1474 } walk_env_t;
1475
1476 /**
1477  * Callgraph walker, collect all visited graphs.
1478  */
1479 static void callgraph_walker(ir_graph *irg, void *data)
1480 {
1481         walk_env_t *env = (walk_env_t *)data;
1482         env->irgs[env->last_irg++] = irg;
1483 }
1484
1485 /**
1486  * Creates an inline order for all graphs.
1487  *
1488  * @return the list of graphs.
1489  */
1490 static ir_graph **create_irg_list(void)
1491 {
1492         ir_entity  **free_methods;
1493         size_t     n_irgs = get_irp_n_irgs();
1494         walk_env_t env;
1495
1496         cgana(&free_methods);
1497         xfree(free_methods);
1498
1499         compute_callgraph();
1500
1501         env.irgs     = XMALLOCNZ(ir_graph*, n_irgs);
1502         env.last_irg = 0;
1503
1504         callgraph_walk(NULL, callgraph_walker, &env);
1505         assert(n_irgs == env.last_irg);
1506
1507         return env.irgs;
1508 }
1509
1510 /**
1511  * Push a call onto the priority list if its benefice is big enough.
1512  *
1513  * @param pqueue   the priority queue of calls
1514  * @param call     the call entry
1515  * @param inlien_threshold
1516  *                 the threshold value
1517  */
1518 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
1519                             int inline_threshold)
1520 {
1521         ir_graph            *callee  = call->callee;
1522         irg_inline_property prop     = get_irg_inline_property(callee);
1523         int                 benefice = calc_inline_benefice(call, callee);
1524
1525         DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
1526             get_irn_irg(call->call), call->call, callee, benefice));
1527
1528         if (prop < irg_inline_forced && benefice < inline_threshold) {
1529                 return;
1530         }
1531
1532         pqueue_put(pqueue, call, benefice);
1533 }
1534
1535 /**
1536  * Try to inline calls into a graph.
1537  *
1538  * @param irg      the graph into which we inline
1539  * @param maxsize  do NOT inline if the size of irg gets
1540  *                 bigger than this amount
1541  * @param inline_threshold
1542  *                 threshold value for inline decision
1543  * @param copied_graphs
1544  *                 map containing copied of recursive graphs
1545  */
1546 static void inline_into(ir_graph *irg, unsigned maxsize,
1547                         int inline_threshold, pmap *copied_graphs)
1548 {
1549         int            phiproj_computed = 0;
1550         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
1551         call_entry     *curr_call;
1552         wenv_t         wenv;
1553         pqueue_t       *pqueue;
1554
1555         if (env->n_call_nodes == 0)
1556                 return;
1557
1558         if (env->n_nodes > maxsize) {
1559                 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
1560                 return;
1561         }
1562
1563         current_ir_graph = irg;
1564         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1565
1566         /* put irgs into the pqueue */
1567         pqueue = new_pqueue();
1568
1569         list_for_each_entry(call_entry, curr_call, &env->calls, list) {
1570                 assert(is_Call(curr_call->call));
1571                 maybe_push_call(pqueue, curr_call, inline_threshold);
1572         }
1573
1574         /* note that the list of possible calls is updated during the process */
1575         while (!pqueue_empty(pqueue)) {
1576                 int                 did_inline;
1577                 call_entry          *curr_call  = (call_entry*)pqueue_pop_front(pqueue);
1578                 ir_graph            *callee     = curr_call->callee;
1579                 ir_node             *call_node  = curr_call->call;
1580                 inline_irg_env      *callee_env = (inline_irg_env*)get_irg_link(callee);
1581                 irg_inline_property prop        = get_irg_inline_property(callee);
1582                 int                 loop_depth;
1583                 const call_entry    *centry;
1584                 pmap_entry          *e;
1585
1586                 if ((prop < irg_inline_forced) && env->n_nodes + callee_env->n_nodes > maxsize) {
1587                         DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
1588                                                 env->n_nodes, callee, callee_env->n_nodes));
1589                         continue;
1590                 }
1591
1592                 e = pmap_find(copied_graphs, callee);
1593                 if (e != NULL) {
1594                         int benefice = curr_call->benefice;
1595                         /*
1596                          * Reduce the weight for recursive function IFF not all arguments are const.
1597                          * inlining recursive functions is rarely good.
1598                          */
1599                         if (!curr_call->all_const)
1600                                 benefice -= 2000;
1601                         if (benefice < inline_threshold)
1602                                 continue;
1603
1604                         /*
1605                          * Remap callee if we have a copy.
1606                          */
1607                         callee     = (ir_graph*)e->value;
1608                         callee_env = (inline_irg_env*)get_irg_link(callee);
1609                 }
1610
1611                 if (current_ir_graph == callee) {
1612                         /*
1613                          * Recursive call: we cannot directly inline because we cannot
1614                          * walk the graph and change it. So we have to make a copy of
1615                          * the graph first.
1616                          */
1617                         int benefice = curr_call->benefice;
1618                         ir_graph *copy;
1619
1620                         /*
1621                          * Reduce the weight for recursive function IFF not all arguments are const.
1622                          * inlining recursive functions is rarely good.
1623                          */
1624                         if (!curr_call->all_const)
1625                                 benefice -= 2000;
1626                         if (benefice < inline_threshold)
1627                                 continue;
1628
1629                         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1630
1631                         /*
1632                          * No copy yet, create one.
1633                          * Note that recursive methods are never leaves, so it is
1634                          * sufficient to test this condition here.
1635                          */
1636                         copy = create_irg_copy(callee);
1637
1638                         /* create_irg_copy() destroys the Proj links, recompute them */
1639                         phiproj_computed = 0;
1640
1641                         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1642
1643                         /* allocate a new environment */
1644                         callee_env = alloc_inline_irg_env();
1645                         set_irg_link(copy, callee_env);
1646
1647                         assure_cf_loop(copy);
1648                         wenv.x              = callee_env;
1649                         wenv.ignore_callers = 1;
1650                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1651
1652                         /*
1653                          * Enter the entity of the original graph. This is needed
1654                          * for inline_method(). However, note that ent->irg still points
1655                          * to callee, NOT to copy.
1656                          */
1657                         set_irg_entity(copy, get_irg_entity(callee));
1658
1659                         pmap_insert(copied_graphs, callee, copy);
1660                         callee = copy;
1661
1662                         /* we have only one caller: the original graph */
1663                         callee_env->n_callers      = 1;
1664                         callee_env->n_callers_orig = 1;
1665                 }
1666                 if (! phiproj_computed) {
1667                         phiproj_computed = 1;
1668                         collect_phiprojs(current_ir_graph);
1669                 }
1670                 did_inline = inline_method(call_node, callee);
1671                 if (!did_inline)
1672                         continue;
1673
1674                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1675                 phiproj_computed = 0;
1676
1677                 /* remove it from the caller list */
1678                 list_del(&curr_call->list);
1679
1680                 /* callee was inline. Append it's call list. */
1681                 env->got_inline = 1;
1682                 --env->n_call_nodes;
1683
1684                 /* we just generate a bunch of new calls */
1685                 loop_depth = curr_call->loop_depth;
1686                 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1687                         inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1688                         ir_node        *new_call;
1689                         call_entry     *new_entry;
1690
1691                         /* after we have inlined callee, all called methods inside
1692                          * callee are now called once more */
1693                         ++penv->n_callers;
1694
1695                         /* Note that the src list points to Call nodes in the inlined graph,
1696                          * but we need Call nodes in our graph. Luckily the inliner leaves
1697                          * this information in the link field. */
1698                         new_call = (ir_node*)get_irn_link(centry->call);
1699                         assert(is_Call(new_call));
1700
1701                         new_entry = duplicate_call_entry(centry, new_call, loop_depth);
1702                         list_add_tail(&new_entry->list, &env->calls);
1703                         maybe_push_call(pqueue, new_entry, inline_threshold);
1704                 }
1705
1706                 env->n_call_nodes += callee_env->n_call_nodes;
1707                 env->n_nodes += callee_env->n_nodes;
1708                 --callee_env->n_callers;
1709         }
1710         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1711         del_pqueue(pqueue);
1712 }
1713
1714 /*
1715  * Heuristic inliner. Calculates a benefice value for every call and inlines
1716  * those calls with a value higher than the threshold.
1717  */
1718 void inline_functions(unsigned maxsize, int inline_threshold,
1719                       opt_ptr after_inline_opt)
1720 {
1721         inline_irg_env   *env;
1722         size_t           i, n_irgs;
1723         ir_graph         *rem;
1724         wenv_t           wenv;
1725         pmap             *copied_graphs;
1726         pmap_entry       *pm_entry;
1727         ir_graph         **irgs;
1728
1729         rem = current_ir_graph;
1730         obstack_init(&temp_obst);
1731
1732         irgs = create_irg_list();
1733
1734         /* a map for the copied graphs, used to inline recursive calls */
1735         copied_graphs = pmap_create();
1736
1737         /* extend all irgs by a temporary data structure for inlining. */
1738         n_irgs = get_irp_n_irgs();
1739         for (i = 0; i < n_irgs; ++i)
1740                 set_irg_link(irgs[i], alloc_inline_irg_env());
1741
1742         /* Pre-compute information in temporary data structure. */
1743         wenv.ignore_runtime = 0;
1744         wenv.ignore_callers = 0;
1745         for (i = 0; i < n_irgs; ++i) {
1746                 ir_graph *irg = irgs[i];
1747
1748                 free_callee_info(irg);
1749
1750                 wenv.x = (inline_irg_env*)get_irg_link(irg);
1751                 assure_cf_loop(irg);
1752                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1753         }
1754
1755         /* -- and now inline. -- */
1756         for (i = 0; i < n_irgs; ++i) {
1757                 ir_graph *irg = irgs[i];
1758
1759                 inline_into(irg, maxsize, inline_threshold, copied_graphs);
1760         }
1761
1762         for (i = 0; i < n_irgs; ++i) {
1763                 ir_graph *irg = irgs[i];
1764
1765                 env = (inline_irg_env*)get_irg_link(irg);
1766                 if (env->got_inline && after_inline_opt != NULL) {
1767                         /* this irg got calls inlined: optimize it */
1768                         after_inline_opt(irg);
1769                 }
1770                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1771                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1772                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1773                         env->n_callers_orig, env->n_callers,
1774                         get_entity_name(get_irg_entity(irg))));
1775                 }
1776         }
1777
1778         /* kill the copied graphs: we don't need them anymore */
1779         foreach_pmap(copied_graphs, pm_entry) {
1780                 ir_graph *copy = (ir_graph*)pm_entry->value;
1781
1782                 /* reset the entity, otherwise it will be deleted in the next step ... */
1783                 set_irg_entity(copy, NULL);
1784                 free_ir_graph(copy);
1785         }
1786         pmap_destroy(copied_graphs);
1787
1788         xfree(irgs);
1789
1790         obstack_free(&temp_obst, NULL);
1791         current_ir_graph = rem;
1792 }
1793
1794 typedef struct inline_functions_pass_t {
1795         ir_prog_pass_t pass;
1796         unsigned       maxsize;
1797         int            inline_threshold;
1798         opt_ptr        after_inline_opt;
1799 } inline_functions_pass_t;
1800
1801 /**
1802  * Wrapper to run inline_functions() as a ir_prog pass.
1803  */
1804 static int inline_functions_wrapper(ir_prog *irp, void *context)
1805 {
1806         inline_functions_pass_t *pass = (inline_functions_pass_t*)context;
1807
1808         (void)irp;
1809         inline_functions(pass->maxsize, pass->inline_threshold,
1810                          pass->after_inline_opt);
1811         return 0;
1812 }
1813
1814 /* create a ir_prog pass for inline_functions */
1815 ir_prog_pass_t *inline_functions_pass(
1816           const char *name, unsigned maxsize, int inline_threshold,
1817           opt_ptr after_inline_opt)
1818 {
1819         inline_functions_pass_t *pass = XMALLOCZ(inline_functions_pass_t);
1820
1821         pass->maxsize          = maxsize;
1822         pass->inline_threshold = inline_threshold;
1823         pass->after_inline_opt = after_inline_opt;
1824
1825         return def_prog_pass_constructor(
1826                 &pass->pass, name ? name : "inline_functions",
1827                 inline_functions_wrapper);
1828 }
1829
1830 void firm_init_inline(void)
1831 {
1832         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
1833 }