Code cleanup
[libfirm] / ir / opt / opt_inline.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief    Dead node elimination and Procedure Inlining.
23  * @author   Michael Beck, Goetz Lindenmaier
24  * @version  $Id$
25  */
26 #include "config.h"
27
28 #include <limits.h>
29 #include <stdbool.h>
30 #include <assert.h>
31
32 #include "irnode_t.h"
33 #include "irgraph_t.h"
34 #include "irprog_t.h"
35
36 #include "iroptimize.h"
37 #include "ircons_t.h"
38 #include "iropt_t.h"
39 #include "irgopt.h"
40 #include "irgmod.h"
41 #include "irgwalk.h"
42
43 #include "array_t.h"
44 #include "list.h"
45 #include "pset.h"
46 #include "pmap.h"
47 #include "pdeq.h"
48 #include "xmalloc.h"
49 #include "pqueue.h"
50
51 #include "irouts.h"
52 #include "irloop_t.h"
53 #include "irbackedge_t.h"
54 #include "opt_init.h"
55 #include "cgana.h"
56 #include "trouts.h"
57 #include "error.h"
58
59 #include "analyze_irg_args.h"
60 #include "iredges_t.h"
61 #include "irflag_t.h"
62 #include "irhooks.h"
63 #include "irtools.h"
64 #include "iropt_dbg.h"
65 #include "irpass_t.h"
66 #include "irphase_t.h"
67
68 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
69
70 /*------------------------------------------------------------------*/
71 /* Routines for dead node elimination / copying garbage collection  */
72 /* of the obstack.                                                  */
73 /*------------------------------------------------------------------*/
74
75 /**
76  * Remember the new node in the old node by using a field all nodes have.
77  */
78 static void set_new_node(ir_node *node, ir_node *new_node)
79 {
80         set_irn_link(node, new_node);
81 }
82
83 /**
84  * Get this new node, before the old node is forgotten.
85  */
86 static inline ir_node *get_new_node(ir_node *old_node)
87 {
88         assert(irn_visited(old_node));
89         return (ir_node*) get_irn_link(old_node);
90 }
91
92 /*--------------------------------------------------------------------*/
93 /*  Functionality for inlining                                         */
94 /*--------------------------------------------------------------------*/
95
96 /**
97  * Copy node for inlineing.  Updates attributes that change when
98  * inlineing but not for dead node elimination.
99  *
100  * Copies the node by calling copy_node() and then updates the entity if
101  * it's a local one.  env must be a pointer of the frame type of the
102  * inlined procedure. The new entities must be in the link field of
103  * the entities.
104  */
105 static void copy_node_inline(ir_node *node, void *env)
106 {
107         ir_graph *new_irg  = (ir_graph*) env;
108         ir_node  *new_node = irn_copy_into_irg(node, new_irg);
109
110         set_new_node(node, new_node);
111         if (is_Sel(node)) {
112                 ir_graph  *old_irg        = get_irn_irg(node);
113                 ir_type   *old_frame_type = get_irg_frame_type(old_irg);
114                 ir_entity *old_entity     = get_Sel_entity(node);
115                 assert(is_Sel(new_node));
116                 /* use copied entities from the new frame */
117                 if (get_entity_owner(old_entity) == old_frame_type) {
118                         ir_entity *new_entity = (ir_entity*)get_entity_link(old_entity);
119                         assert(new_entity != NULL);
120                         set_Sel_entity(new_node, new_entity);
121                 }
122         } else if (is_Block(new_node)) {
123                 new_node->attr.block.irg.irg = new_irg;
124         }
125 }
126
127 static void set_preds_inline(ir_node *node, void *env)
128 {
129         ir_node *new_node;
130
131         irn_rewire_inputs(node);
132
133         /* move constants into start block */
134         new_node = get_new_node(node);
135         if (is_irn_constlike(new_node)) {
136                 ir_graph *new_irg     = (ir_graph *) env;
137                 ir_node  *start_block = get_irg_start_block(new_irg);
138                 set_nodes_block(new_node, start_block);
139         }
140 }
141
142 /**
143  * Walker: checks if P_value_arg_base is used.
144  */
145 static void find_addr(ir_node *node, void *env)
146 {
147         bool *allow_inline = (bool*)env;
148
149         if (is_Sel(node)) {
150                 ir_graph *irg = current_ir_graph;
151                 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
152                         /* access to frame */
153                         ir_entity *ent = get_Sel_entity(node);
154                         if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
155                                 /* access to value_type */
156                                 *allow_inline = false;
157                         }
158                 }
159         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
160                 /* From GCC:
161                  * Refuse to inline alloca call unless user explicitly forced so as this
162                  * may change program's memory overhead drastically when the function
163                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
164                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
165                  *
166                  * Sorrily this is true with our implementation also.
167                  * Moreover, we cannot differentiate between alloca() and VLA yet, so
168                  * this disables inlining of functions using VLA (which are completely
169                  * save).
170                  *
171                  * 2 Solutions:
172                  * - add a flag to the Alloc node for "real" alloca() calls
173                  * - add a new Stack-Restore node at the end of a function using
174                  *   alloca()
175                  */
176                 *allow_inline = false;
177         }
178 }
179
180 /**
181  * Check if we can inline a given call.
182  * Currently, we cannot inline two cases:
183  * - call with compound arguments
184  * - graphs that take the address of a parameter
185  *
186  * check these conditions here
187  */
188 static bool can_inline(ir_node *call, ir_graph *called_graph)
189 {
190         ir_entity          *called      = get_irg_entity(called_graph);
191         ir_type            *called_type = get_entity_type(called);
192         ir_type            *call_type   = get_Call_type(call);
193         int                 n_params    = get_method_n_params(called_type);
194         int                 n_arguments = get_method_n_params(call_type);
195         int                 n_res       = get_method_n_ress(called_type);
196         irg_inline_property prop        = get_irg_inline_property(called_graph);
197         int                 i;
198         bool                res;
199
200         if (prop == irg_inline_forbidden)
201                 return false;
202
203         if (n_arguments != n_params) {
204                 /* this is a bad feature of C: without a prototype, we can
205                  * call a function with less parameters than needed. Currently
206                  * we don't support this, although we could use Unknown than. */
207                 return false;
208         }
209         if (n_res != get_method_n_ress(call_type)) {
210                 return false;
211         }
212
213         /* Argh, compiling C has some bad consequences:
214          * It is implementation dependent what happens in that case.
215          * We support inlining, if the bitsize of the types matches AND
216          * the same arithmetic is used. */
217         for (i = n_params - 1; i >= 0; --i) {
218                 ir_type *param_tp = get_method_param_type(called_type, i);
219                 ir_type *arg_tp   = get_method_param_type(call_type, i);
220
221                 if (param_tp != arg_tp) {
222                         ir_mode *pmode = get_type_mode(param_tp);
223                         ir_mode *amode = get_type_mode(arg_tp);
224
225                         if (pmode == NULL || amode == NULL)
226                                 return false;
227                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
228                                 return false;
229                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
230                                 return false;
231                         /* otherwise we can simply "reinterpret" the bits */
232                 }
233         }
234         for (i = n_res - 1; i >= 0; --i) {
235                 ir_type *decl_res_tp = get_method_res_type(called_type, i);
236                 ir_type *used_res_tp = get_method_res_type(call_type, i);
237
238                 if (decl_res_tp != used_res_tp) {
239                         ir_mode *decl_mode = get_type_mode(decl_res_tp);
240                         ir_mode *used_mode = get_type_mode(used_res_tp);
241                         if (decl_mode == NULL || used_mode == NULL)
242                                 return false;
243                         if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
244                                 return false;
245                         if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
246                                 return false;
247                         /* otherwise we can "reinterpret" the bits */
248                 }
249         }
250
251         /* check parameters for compound arguments */
252         for (i = 0; i < n_params; ++i) {
253                 ir_type *p_type = get_method_param_type(call_type, i);
254
255                 if (is_compound_type(p_type))
256                         return false;
257         }
258
259         /* check results for compound arguments */
260         for (i = 0; i < n_res; ++i) {
261                 ir_type *r_type = get_method_res_type(call_type, i);
262
263                 if (is_compound_type(r_type))
264                         return false;
265         }
266
267         res = true;
268         irg_walk_graph(called_graph, find_addr, NULL, &res);
269
270         return res;
271 }
272
273 enum exc_mode {
274         exc_handler,    /**< There is a handler. */
275         exc_no_handler  /**< Exception handling not represented. */
276 };
277
278 /**
279  * copy all entities on the stack frame on 1 irg to the stackframe of another.
280  * Sets entity links of the old entities to the copies
281  */
282 static void copy_frame_entities(ir_graph *from, ir_graph *to)
283 {
284         ir_type *from_frame = get_irg_frame_type(from);
285         ir_type *to_frame   = get_irg_frame_type(to);
286         int      n_members  = get_class_n_members(from_frame);
287         int      i;
288         assert(from_frame != to_frame);
289
290         for (i = 0; i < n_members; ++i) {
291                 ir_entity *old_ent = get_class_member(from_frame, i);
292                 ir_entity *new_ent = copy_entity_own(old_ent, to_frame);
293                 set_entity_link(old_ent, new_ent);
294         }
295 }
296
297 /* Inlines a method at the given call site. */
298 int inline_method(ir_node *call, ir_graph *called_graph)
299 {
300         ir_node       *pre_call;
301         ir_node       *post_call, *post_bl;
302         ir_node       *in[pn_Start_max];
303         ir_node       *end, *end_bl, *block;
304         ir_node       **res_pred;
305         ir_node       **cf_pred;
306         ir_node       **args_in;
307         ir_node       *ret, *phi;
308         int           arity, n_ret, n_exc, n_res, i, j, rem_opt;
309         int           irn_arity, n_params;
310         int           n_mem_phi;
311         enum exc_mode exc_handling;
312         ir_type       *mtp;
313         ir_type       *ctp;
314         ir_entity     *ent;
315         ir_graph      *rem;
316         ir_graph      *irg = get_irn_irg(call);
317
318         /* we cannot inline some types of calls */
319         if (! can_inline(call, called_graph))
320                 return 0;
321
322         /* We cannot inline a recursive call. The graph must be copied before
323          * the call the inline_method() using create_irg_copy(). */
324         if (called_graph == irg)
325                 return 0;
326
327         ent      = get_irg_entity(called_graph);
328         mtp      = get_entity_type(ent);
329         ctp      = get_Call_type(call);
330         n_params = get_method_n_params(mtp);
331         n_res    = get_method_n_ress(mtp);
332
333         rem = current_ir_graph;
334         current_ir_graph = irg;
335
336         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
337
338         /* optimizations can cause problems when allocating new nodes */
339         rem_opt = get_opt_optimize();
340         set_optimize(0);
341
342         /* Handle graph state */
343         assert(get_irg_phase_state(irg) != phase_building);
344         assert(get_irg_pinned(irg) == op_pin_state_pinned);
345         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
346         set_irg_outs_inconsistent(irg);
347         set_irg_extblk_inconsistent(irg);
348         set_irg_doms_inconsistent(irg);
349         set_irg_loopinfo_inconsistent(irg);
350         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
351         set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
352         edges_deactivate(irg);
353
354         /* here we know we WILL inline, so inform the statistics */
355         hook_inline(call, called_graph);
356
357         /* -- Decide how to handle exception control flow: Is there a handler
358            for the Call node, or do we branch directly to End on an exception?
359            exc_handling:
360            0 There is a handler.
361            2 Exception handling not represented in Firm. -- */
362         {
363                 ir_node *Xproj = NULL;
364                 ir_node *proj;
365                 for (proj = (ir_node*)get_irn_link(call); proj != NULL;
366                      proj = (ir_node*)get_irn_link(proj)) {
367                         long proj_nr = get_Proj_proj(proj);
368                         if (proj_nr == pn_Call_X_except) Xproj = proj;
369                 }
370                 exc_handling = Xproj != NULL ? exc_handler : exc_no_handler;
371         }
372
373         /* create the argument tuple */
374         args_in = ALLOCAN(ir_node*, n_params);
375
376         block = get_nodes_block(call);
377         for (i = n_params - 1; i >= 0; --i) {
378                 ir_node *arg      = get_Call_param(call, i);
379                 ir_type *param_tp = get_method_param_type(mtp, i);
380                 ir_mode *mode     = get_type_mode(param_tp);
381
382                 if (mode != get_irn_mode(arg)) {
383                         arg = new_r_Conv(block, arg, mode);
384                 }
385                 args_in[i] = arg;
386         }
387
388         /* the procedure and later replaces the Start node of the called graph.
389          * Post_call is the old Call node and collects the results of the called
390          * graph. Both will end up being a tuple. */
391         post_bl = get_nodes_block(call);
392         /* XxMxPxPxPxT of Start + parameter of Call */
393         in[pn_Start_M]              = get_Call_mem(call);
394         in[pn_Start_X_initial_exec] = new_r_Jmp(post_bl);
395         in[pn_Start_P_frame_base]   = get_irg_frame(irg);
396         in[pn_Start_P_tls]          = get_irg_tls(irg);
397         in[pn_Start_T_args]         = new_r_Tuple(post_bl, n_params, args_in);
398         pre_call = new_r_Tuple(post_bl, pn_Start_max, in);
399         post_call = call;
400
401         /* --
402            The new block gets the ins of the old block, pre_call and all its
403            predecessors and all Phi nodes. -- */
404         part_block(pre_call);
405
406         /* increment visited flag for later walk */
407         inc_irg_visited(called_graph);
408
409         /* link some nodes to nodes in the current graph so instead of copying
410          * the linked nodes will get used.
411          * So the copier will use the created Tuple instead of copying the start
412          * node, similar for singleton nodes like NoMem and Bad.
413          * Note: this will prohibit predecessors to be copied - only do it for
414          *       nodes without predecessors */
415         {
416                 ir_node *start_block;
417                 ir_node *start;
418                 ir_node *bad;
419                 ir_node *nomem;
420
421                 start_block = get_irg_start_block(called_graph);
422                 set_new_node(start_block, get_nodes_block(pre_call));
423                 mark_irn_visited(start_block);
424
425                 start = get_irg_start(called_graph);
426                 set_new_node(start, pre_call);
427                 mark_irn_visited(start);
428
429                 bad = get_irg_bad(called_graph);
430                 set_new_node(bad, get_irg_bad(irg));
431                 mark_irn_visited(bad);
432
433                 nomem = get_irg_no_mem(called_graph);
434                 set_new_node(nomem, get_irg_no_mem(irg));
435                 mark_irn_visited(nomem);
436         }
437
438         /* entitiy link is used to link entities on old stackframe to the
439          * new stackframe */
440         irp_reserve_resources(irp, IR_RESOURCE_ENTITY_LINK);
441
442         /* copy entities and nodes */
443         assert(!irn_visited(get_irg_end(called_graph)));
444         copy_frame_entities(called_graph, irg);
445         irg_walk_core(get_irg_end(called_graph), copy_node_inline, set_preds_inline,
446                       irg);
447
448         irp_free_resources(irp, IR_RESOURCE_ENTITY_LINK);
449
450         /* -- Merge the end of the inlined procedure with the call site -- */
451         /* We will turn the old Call node into a Tuple with the following
452            predecessors:
453            -1:  Block of Tuple.
454            0: Phi of all Memories of Return statements.
455            1: Jmp from new Block that merges the control flow from all exception
456            predecessors of the old end block.
457            2: Tuple of all arguments.
458            3: Phi of Exception memories.
459            In case the old Call directly branches to End on an exception we don't
460            need the block merging all exceptions nor the Phi of the exception
461            memories.
462         */
463
464         /* Precompute some values */
465         end_bl = get_new_node(get_irg_end_block(called_graph));
466         end    = get_new_node(get_irg_end(called_graph));
467         arity  = get_Block_n_cfgpreds(end_bl);    /* arity = n_exc + n_ret  */
468         n_res  = get_method_n_ress(get_Call_type(call));
469
470         res_pred = XMALLOCN(ir_node*, n_res);
471         cf_pred  = XMALLOCN(ir_node*, arity);
472
473         /* archive keepalives */
474         irn_arity = get_irn_arity(end);
475         for (i = 0; i < irn_arity; i++) {
476                 ir_node *ka = get_End_keepalive(end, i);
477                 if (! is_Bad(ka))
478                         add_End_keepalive(get_irg_end(irg), ka);
479         }
480
481         /* replace Return nodes by Jump nodes */
482         n_ret = 0;
483         for (i = 0; i < arity; i++) {
484                 ir_node *ret;
485                 ret = get_Block_cfgpred(end_bl, i);
486                 if (is_Return(ret)) {
487                         ir_node *block = get_nodes_block(ret);
488                         cf_pred[n_ret] = new_r_Jmp(block);
489                         n_ret++;
490                 }
491         }
492         set_irn_in(post_bl, n_ret, cf_pred);
493
494         /* build a Tuple for all results of the method.
495          * add Phi node if there was more than one Return. */
496         turn_into_tuple(post_call, pn_Call_max);
497         /* First the Memory-Phi */
498         n_mem_phi = 0;
499         for (i = 0; i < arity; i++) {
500                 ret = get_Block_cfgpred(end_bl, i);
501                 if (is_Return(ret)) {
502                         cf_pred[n_mem_phi++] = get_Return_mem(ret);
503                 }
504                 /* memory output for some exceptions is directly connected to End */
505                 if (is_Call(ret)) {
506                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 3);
507                 } else if (is_fragile_op(ret)) {
508                         /* We rely that all cfops have the memory output at the same position. */
509                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 0);
510                 } else if (is_Raise(ret)) {
511                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 1);
512                 }
513         }
514         phi = new_r_Phi(post_bl, n_mem_phi, cf_pred, mode_M);
515         set_Tuple_pred(call, pn_Call_M, phi);
516         /* Conserve Phi-list for further inlinings -- but might be optimized */
517         if (get_nodes_block(phi) == post_bl) {
518                 set_irn_link(phi, get_irn_link(post_bl));
519                 set_irn_link(post_bl, phi);
520         }
521         /* Now the real results */
522         if (n_res > 0) {
523                 ir_node *result_tuple;
524                 for (j = 0; j < n_res; j++) {
525                         ir_type *res_type = get_method_res_type(ctp, j);
526                         ir_mode *res_mode = get_type_mode(res_type);
527                         n_ret = 0;
528                         for (i = 0; i < arity; i++) {
529                                 ret = get_Block_cfgpred(end_bl, i);
530                                 if (is_Return(ret)) {
531                                         ir_node *res = get_Return_res(ret, j);
532                                         if (get_irn_mode(res) != res_mode) {
533                                                 ir_node *block = get_nodes_block(res);
534                                                 res = new_r_Conv(block, res, res_mode);
535                                         }
536                                         cf_pred[n_ret] = res;
537                                         n_ret++;
538                                 }
539                         }
540                         if (n_ret > 0) {
541                                 ir_mode *mode = get_irn_mode(cf_pred[0]);
542                                 phi = new_r_Phi(post_bl, n_ret, cf_pred, mode);
543                         } else {
544                                 phi = new_r_Bad(irg);
545                         }
546                         res_pred[j] = phi;
547                         /* Conserve Phi-list for further inlinings -- but might be optimized */
548                         if (get_nodes_block(phi) == post_bl) {
549                                 set_Phi_next(phi, get_Block_phis(post_bl));
550                                 set_Block_phis(post_bl, phi);
551                         }
552                 }
553                 result_tuple = new_r_Tuple(post_bl, n_res, res_pred);
554                 set_Tuple_pred(call, pn_Call_T_result, result_tuple);
555         } else {
556                 set_Tuple_pred(call, pn_Call_T_result, new_r_Bad(irg));
557         }
558         /* handle the regular call */
559         set_Tuple_pred(call, pn_Call_X_regular, new_r_Jmp(post_bl));
560
561         /* For now, we cannot inline calls with value_base */
562         set_Tuple_pred(call, pn_Call_P_value_res_base, new_r_Bad(irg));
563
564         /* Finally the exception control flow.
565            We have two possible situations:
566            First if the Call branches to an exception handler:
567            We need to add a Phi node to
568            collect the memory containing the exception objects.  Further we need
569            to add another block to get a correct representation of this Phi.  To
570            this block we add a Jmp that resolves into the X output of the Call
571            when the Call is turned into a tuple.
572            Second: There is no exception edge. Just add all inlined exception
573            branches to the End node.
574          */
575         if (exc_handling == exc_handler) {
576                 n_exc = 0;
577                 for (i = 0; i < arity; i++) {
578                         ir_node *ret, *irn;
579                         ret = get_Block_cfgpred(end_bl, i);
580                         irn = skip_Proj(ret);
581                         if (is_fragile_op(irn) || is_Raise(irn)) {
582                                 cf_pred[n_exc] = ret;
583                                 ++n_exc;
584                         }
585                 }
586                 if (n_exc > 0) {
587                         if (n_exc == 1) {
588                                 /* simple fix */
589                                 set_Tuple_pred(call, pn_Call_X_except, cf_pred[0]);
590                         } else {
591                                 ir_node *block = new_r_Block(irg, n_exc, cf_pred);
592                                 set_Tuple_pred(call, pn_Call_X_except, new_r_Jmp(block));
593                         }
594                 } else {
595                         set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg));
596                 }
597         } else {
598                 ir_node *main_end_bl;
599                 int main_end_bl_arity;
600                 ir_node **end_preds;
601
602                 /* assert(exc_handling == 1 || no exceptions. ) */
603                 n_exc = 0;
604                 for (i = 0; i < arity; i++) {
605                         ir_node *ret = get_Block_cfgpred(end_bl, i);
606                         ir_node *irn = skip_Proj(ret);
607
608                         if (is_fragile_op(irn) || is_Raise(irn)) {
609                                 cf_pred[n_exc] = ret;
610                                 n_exc++;
611                         }
612                 }
613                 main_end_bl       = get_irg_end_block(irg);
614                 main_end_bl_arity = get_irn_arity(main_end_bl);
615                 end_preds         = XMALLOCN(ir_node*, n_exc + main_end_bl_arity);
616
617                 for (i = 0; i < main_end_bl_arity; ++i)
618                         end_preds[i] = get_irn_n(main_end_bl, i);
619                 for (i = 0; i < n_exc; ++i)
620                         end_preds[main_end_bl_arity + i] = cf_pred[i];
621                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
622                 set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg));
623                 free(end_preds);
624         }
625         free(res_pred);
626         free(cf_pred);
627
628         /* --  Turn CSE back on. -- */
629         set_optimize(rem_opt);
630         current_ir_graph = rem;
631
632         return 1;
633 }
634
635 /********************************************************************/
636 /* Apply inlining to small methods.                                 */
637 /********************************************************************/
638
639 static struct obstack  temp_obst;
640
641 /** Represents a possible inlinable call in a graph. */
642 typedef struct call_entry {
643         ir_node    *call;       /**< The Call node. */
644         ir_graph   *callee;     /**< The callee IR-graph. */
645         list_head  list;        /**< List head for linking the next one. */
646         int        loop_depth;  /**< The loop depth of this call. */
647         int        benefice;    /**< The calculated benefice of this call. */
648         unsigned   local_adr:1; /**< Set if this call gets an address of a local variable. */
649         unsigned   all_const:1; /**< Set if this call has only constant parameters. */
650 } call_entry;
651
652 /**
653  * environment for inlining small irgs
654  */
655 typedef struct inline_env_t {
656         struct obstack obst;  /**< An obstack where call_entries are allocated on. */
657         list_head      calls; /**< The call entry list. */
658 } inline_env_t;
659
660 /**
661  * Returns the irg called from a Call node. If the irg is not
662  * known, NULL is returned.
663  *
664  * @param call  the call node
665  */
666 static ir_graph *get_call_called_irg(ir_node *call)
667 {
668         ir_node *addr;
669
670         addr = get_Call_ptr(call);
671         if (is_Global(addr)) {
672                 ir_entity *ent = get_Global_entity(addr);
673                 /* we don't know which function gets finally bound to a weak symbol */
674                 if (get_entity_linkage(ent) & IR_LINKAGE_WEAK)
675                         return NULL;
676
677                 return get_entity_irg(ent);
678         }
679
680         return NULL;
681 }
682
683 /**
684  * Walker: Collect all calls to known graphs inside a graph.
685  */
686 static void collect_calls(ir_node *call, void *env)
687 {
688         (void) env;
689         if (is_Call(call)) {
690                 ir_graph *called_irg = get_call_called_irg(call);
691
692                 if (called_irg != NULL) {
693                         /* The Call node calls a locally defined method.  Remember to inline. */
694                         inline_env_t *ienv  = (inline_env_t*)env;
695                         call_entry   *entry = OALLOC(&ienv->obst, call_entry);
696                         entry->call       = call;
697                         entry->callee     = called_irg;
698                         entry->loop_depth = 0;
699                         entry->benefice   = 0;
700                         entry->local_adr  = 0;
701                         entry->all_const  = 0;
702
703                         list_add_tail(&entry->list, &ienv->calls);
704                 }
705         }
706 }
707
708 /**
709  * Inlines all small methods at call sites where the called address comes
710  * from a Const node that references the entity representing the called
711  * method.
712  * The size argument is a rough measure for the code size of the method:
713  * Methods where the obstack containing the firm graph is smaller than
714  * size are inlined.
715  */
716 void inline_small_irgs(ir_graph *irg, int size)
717 {
718         ir_graph *rem = current_ir_graph;
719         inline_env_t env;
720         call_entry *entry;
721
722         current_ir_graph = irg;
723         /* Handle graph state */
724         assert(get_irg_phase_state(irg) != phase_building);
725         free_callee_info(irg);
726
727         /* Find Call nodes to inline.
728            (We can not inline during a walk of the graph, as inlining the same
729            method several times changes the visited flag of the walked graph:
730            after the first inlining visited of the callee equals visited of
731            the caller.  With the next inlining both are increased.) */
732         obstack_init(&env.obst);
733         INIT_LIST_HEAD(&env.calls);
734         irg_walk_graph(irg, NULL, collect_calls, &env);
735
736         if (! list_empty(&env.calls)) {
737                 /* There are calls to inline */
738                 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
739                 collect_phiprojs(irg);
740
741                 list_for_each_entry(call_entry, entry, &env.calls, list) {
742                         ir_graph            *callee = entry->callee;
743                         irg_inline_property prop    = get_irg_inline_property(callee);
744
745                         if (prop == irg_inline_forbidden) {
746                                 continue;
747                         }
748
749                         if (prop >= irg_inline_forced ||
750                             _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
751                                 inline_method(entry->call, callee);
752                         }
753                 }
754                 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
755         }
756         obstack_free(&env.obst, NULL);
757         current_ir_graph = rem;
758 }
759
760 typedef struct inline_small_irgs_pass_t {
761         ir_graph_pass_t pass;
762         int            size;
763 } inline_small_irgs_pass_t;
764
765 /**
766  * Wrapper to run inline_small_irgs() as a pass.
767  */
768 static int inline_small_irgs_wrapper(ir_graph *irg, void *context)
769 {
770         inline_small_irgs_pass_t *pass = (inline_small_irgs_pass_t*)context;
771
772         inline_small_irgs(irg, pass->size);
773         return 0;
774 }
775
776 /* create a pass for inline_small_irgs() */
777 ir_graph_pass_t *inline_small_irgs_pass(const char *name, int size)
778 {
779         inline_small_irgs_pass_t *pass = XMALLOCZ(inline_small_irgs_pass_t);
780
781         pass->size = size;
782         return def_graph_pass_constructor(
783                 &pass->pass, name ? name : "inline_small_irgs", inline_small_irgs_wrapper);
784 }
785
786 /**
787  * Environment for inlining irgs.
788  */
789 typedef struct {
790         list_head calls;             /**< List of of all call nodes in this graph. */
791         unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
792         unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
793         unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
794         unsigned  n_nodes_orig;      /**< for statistics */
795         unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
796         unsigned  n_call_nodes_orig; /**< for statistics */
797         unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
798         unsigned  n_callers_orig;    /**< for statistics */
799         unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
800         unsigned  recursive:1;       /**< Set, if this function is self recursive. */
801 } inline_irg_env;
802
803 /**
804  * Allocate a new environment for inlining.
805  */
806 static inline_irg_env *alloc_inline_irg_env(void)
807 {
808         inline_irg_env *env    = OALLOC(&temp_obst, inline_irg_env);
809         INIT_LIST_HEAD(&env->calls);
810         env->local_weights     = NULL;
811         env->n_nodes           = -2; /* do not count count Start, End */
812         env->n_blocks          = -2; /* do not count count Start, End Block */
813         env->n_nodes_orig      = -2; /* do not count Start, End */
814         env->n_call_nodes      = 0;
815         env->n_call_nodes_orig = 0;
816         env->n_callers         = 0;
817         env->n_callers_orig    = 0;
818         env->got_inline        = 0;
819         env->recursive         = 0;
820         return env;
821 }
822
823 typedef struct walker_env {
824         inline_irg_env *x;     /**< the inline environment */
825         char ignore_runtime;   /**< the ignore runtime flag */
826         char ignore_callers;   /**< if set, do change callers data */
827 } wenv_t;
828
829 /**
830  * post-walker: collect all calls in the inline-environment
831  * of a graph and sum some statistics.
832  */
833 static void collect_calls2(ir_node *call, void *ctx)
834 {
835         wenv_t         *env = (wenv_t*)ctx;
836         inline_irg_env *x = env->x;
837         unsigned        code = get_irn_opcode(call);
838         ir_graph       *callee;
839         call_entry     *entry;
840
841         /* count meaningful nodes in irg */
842         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
843                 if (code != iro_Block) {
844                         ++x->n_nodes;
845                         ++x->n_nodes_orig;
846                 } else {
847                         ++x->n_blocks;
848                 }
849         }
850
851         if (code != iro_Call) return;
852
853         /* check, if it's a runtime call */
854         if (env->ignore_runtime) {
855                 ir_node *symc = get_Call_ptr(call);
856
857                 if (is_Global(symc)) {
858                         ir_entity *ent = get_Global_entity(symc);
859
860                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
861                                 return;
862                 }
863         }
864
865         /* collect all call nodes */
866         ++x->n_call_nodes;
867         ++x->n_call_nodes_orig;
868
869         callee = get_call_called_irg(call);
870         if (callee != NULL) {
871                 if (! env->ignore_callers) {
872                         inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
873                         /* count all static callers */
874                         ++callee_env->n_callers;
875                         ++callee_env->n_callers_orig;
876                 }
877                 if (callee == current_ir_graph)
878                         x->recursive = 1;
879
880                 /* link it in the list of possible inlinable entries */
881                 entry = OALLOC(&temp_obst, call_entry);
882                 entry->call       = call;
883                 entry->callee     = callee;
884                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
885                 entry->benefice   = 0;
886                 entry->local_adr  = 0;
887                 entry->all_const  = 0;
888
889                 list_add_tail(&entry->list, &x->calls);
890         }
891 }
892
893 /**
894  * Returns TRUE if the number of callers is 0 in the irg's environment,
895  * hence this irg is a leave.
896  */
897 inline static int is_leave(ir_graph *irg)
898 {
899         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
900         return env->n_call_nodes == 0;
901 }
902
903 /**
904  * Returns TRUE if the number of nodes in the callee is
905  * smaller then size in the irg's environment.
906  */
907 inline static int is_smaller(ir_graph *callee, unsigned size)
908 {
909         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
910         return env->n_nodes < size;
911 }
912
913 /**
914  * Duplicate a call entry.
915  *
916  * @param entry     the original entry to duplicate
917  * @param new_call  the new call node
918  * @param loop_depth_delta
919  *                  delta value for the loop depth
920  */
921 static call_entry *duplicate_call_entry(const call_entry *entry,
922                                         ir_node *new_call, int loop_depth_delta)
923 {
924         call_entry *nentry = OALLOC(&temp_obst, call_entry);
925         nentry->call       = new_call;
926         nentry->callee     = entry->callee;
927         nentry->benefice   = entry->benefice;
928         nentry->loop_depth = entry->loop_depth + loop_depth_delta;
929         nentry->local_adr  = entry->local_adr;
930         nentry->all_const  = entry->all_const;
931
932         return nentry;
933 }
934
935 /**
936  * Append all call nodes of the source environment to the nodes of in the destination
937  * environment.
938  *
939  * @param dst         destination environment
940  * @param src         source environment
941  * @param loop_depth  the loop depth of the call that is replaced by the src list
942  */
943 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth)
944 {
945         call_entry *entry, *nentry;
946
947         /* Note that the src list points to Call nodes in the inlined graph, but
948            we need Call nodes in our graph. Luckily the inliner leaves this information
949            in the link field. */
950         list_for_each_entry(call_entry, entry, &src->calls, list) {
951                 nentry = duplicate_call_entry(entry, (ir_node*)get_irn_link(entry->call), loop_depth);
952                 list_add_tail(&nentry->list, &dst->calls);
953         }
954         dst->n_call_nodes += src->n_call_nodes;
955         dst->n_nodes      += src->n_nodes;
956 }
957
958 /*
959  * Inlines small leave methods at call sites where the called address comes
960  * from a Const node that references the entity representing the called
961  * method.
962  * The size argument is a rough measure for the code size of the method:
963  * Methods where the obstack containing the firm graph is smaller than
964  * size are inlined.
965  */
966 void inline_leave_functions(unsigned maxsize, unsigned leavesize,
967                             unsigned size, int ignore_runtime)
968 {
969         inline_irg_env   *env;
970         ir_graph         *irg;
971         int              i, n_irgs;
972         ir_graph         *rem;
973         int              did_inline;
974         wenv_t           wenv;
975         call_entry       *entry, *next;
976         const call_entry *centry;
977         pmap             *copied_graphs;
978         pmap_entry       *pm_entry;
979
980         rem = current_ir_graph;
981         obstack_init(&temp_obst);
982
983         /* a map for the copied graphs, used to inline recursive calls */
984         copied_graphs = pmap_create();
985
986         /* extend all irgs by a temporary data structure for inlining. */
987         n_irgs = get_irp_n_irgs();
988         for (i = 0; i < n_irgs; ++i)
989                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
990
991         /* Pre-compute information in temporary data structure. */
992         wenv.ignore_runtime = ignore_runtime;
993         wenv.ignore_callers = 0;
994         for (i = 0; i < n_irgs; ++i) {
995                 ir_graph *irg = get_irp_irg(i);
996
997                 assert(get_irg_phase_state(irg) != phase_building);
998                 free_callee_info(irg);
999
1000                 assure_cf_loop(irg);
1001                 wenv.x = (inline_irg_env*)get_irg_link(irg);
1002                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1003         }
1004
1005         /* -- and now inline. -- */
1006
1007         /* Inline leaves recursively -- we might construct new leaves. */
1008         do {
1009                 did_inline = 0;
1010
1011                 for (i = 0; i < n_irgs; ++i) {
1012                         ir_node *call;
1013                         int phiproj_computed = 0;
1014
1015                         current_ir_graph = get_irp_irg(i);
1016                         env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1017
1018                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1019                         list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1020                                 ir_graph            *callee;
1021                                 irg_inline_property  prop;
1022
1023                                 if (env->n_nodes > maxsize)
1024                                         break;
1025
1026                                 call   = entry->call;
1027                                 callee = entry->callee;
1028
1029                                 prop = get_irg_inline_property(callee);
1030                                 if (prop == irg_inline_forbidden) {
1031                                         continue;
1032                                 }
1033
1034                                 if (is_leave(callee) && (
1035                                     is_smaller(callee, leavesize) || prop >= irg_inline_forced)) {
1036                                         if (!phiproj_computed) {
1037                                                 phiproj_computed = 1;
1038                                                 collect_phiprojs(current_ir_graph);
1039                                         }
1040                                         did_inline = inline_method(call, callee);
1041
1042                                         if (did_inline) {
1043                                                 inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
1044
1045                                                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1046                                                 phiproj_computed = 0;
1047
1048                                                 /* Do some statistics */
1049                                                 env->got_inline = 1;
1050                                                 --env->n_call_nodes;
1051                                                 env->n_nodes += callee_env->n_nodes;
1052                                                 --callee_env->n_callers;
1053
1054                                                 /* remove this call from the list */
1055                                                 list_del(&entry->list);
1056                                                 continue;
1057                                         }
1058                                 }
1059                         }
1060                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1061                 }
1062         } while (did_inline);
1063
1064         /* inline other small functions. */
1065         for (i = 0; i < n_irgs; ++i) {
1066                 ir_node *call;
1067                 int phiproj_computed = 0;
1068
1069                 current_ir_graph = get_irp_irg(i);
1070                 env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1071
1072                 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1073
1074                 /* note that the list of possible calls is updated during the process */
1075                 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1076                         irg_inline_property prop;
1077                         ir_graph            *callee;
1078                         pmap_entry          *e;
1079
1080                         call   = entry->call;
1081                         callee = entry->callee;
1082
1083                         prop = get_irg_inline_property(callee);
1084                         if (prop == irg_inline_forbidden) {
1085                                 continue;
1086                         }
1087
1088                         e = pmap_find(copied_graphs, callee);
1089                         if (e != NULL) {
1090                                 /*
1091                                  * Remap callee if we have a copy.
1092                                  * FIXME: Should we do this only for recursive Calls ?
1093                                  */
1094                                 callee = (ir_graph*)e->value;
1095                         }
1096
1097                         if (prop >= irg_inline_forced ||
1098                             (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1099                                 if (current_ir_graph == callee) {
1100                                         /*
1101                                          * Recursive call: we cannot directly inline because we cannot walk
1102                                          * the graph and change it. So we have to make a copy of the graph
1103                                          * first.
1104                                          */
1105
1106                                         inline_irg_env *callee_env;
1107                                         ir_graph       *copy;
1108
1109                                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1110
1111                                         /*
1112                                          * No copy yet, create one.
1113                                          * Note that recursive methods are never leaves, so it is sufficient
1114                                          * to test this condition here.
1115                                          */
1116                                         copy = create_irg_copy(callee);
1117
1118                                         /* create_irg_copy() destroys the Proj links, recompute them */
1119                                         phiproj_computed = 0;
1120
1121                                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1122
1123                                         /* allocate new environment */
1124                                         callee_env = alloc_inline_irg_env();
1125                                         set_irg_link(copy, callee_env);
1126
1127                                         assure_cf_loop(copy);
1128                                         wenv.x              = callee_env;
1129                                         wenv.ignore_callers = 1;
1130                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1131
1132                                         /*
1133                                          * Enter the entity of the original graph. This is needed
1134                                          * for inline_method(). However, note that ent->irg still points
1135                                          * to callee, NOT to copy.
1136                                          */
1137                                         set_irg_entity(copy, get_irg_entity(callee));
1138
1139                                         pmap_insert(copied_graphs, callee, copy);
1140                                         callee = copy;
1141
1142                                         /* we have only one caller: the original graph */
1143                                         callee_env->n_callers      = 1;
1144                                         callee_env->n_callers_orig = 1;
1145                                 }
1146                                 if (! phiproj_computed) {
1147                                         phiproj_computed = 1;
1148                                         collect_phiprojs(current_ir_graph);
1149                                 }
1150                                 did_inline = inline_method(call, callee);
1151                                 if (did_inline) {
1152                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1153
1154                                         /* call was inlined, Phi/Projs for current graph must be recomputed */
1155                                         phiproj_computed = 0;
1156
1157                                         /* callee was inline. Append it's call list. */
1158                                         env->got_inline = 1;
1159                                         --env->n_call_nodes;
1160                                         append_call_list(env, callee_env, entry->loop_depth);
1161                                         --callee_env->n_callers;
1162
1163                                         /* after we have inlined callee, all called methods inside callee
1164                                            are now called once more */
1165                                         list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1166                                                 inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1167                                                 ++penv->n_callers;
1168                                         }
1169
1170                                         /* remove this call from the list */
1171                                         list_del(&entry->list);
1172                                         continue;
1173                                 }
1174                         }
1175                 }
1176                 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1177         }
1178
1179         for (i = 0; i < n_irgs; ++i) {
1180                 irg = get_irp_irg(i);
1181                 env = (inline_irg_env*)get_irg_link(irg);
1182
1183                 if (env->got_inline) {
1184                         optimize_graph_df(irg);
1185                         optimize_cf(irg);
1186                 }
1187                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1188                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1189                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1190                         env->n_callers_orig, env->n_callers,
1191                         get_entity_name(get_irg_entity(irg))));
1192                 }
1193         }
1194
1195         /* kill the copied graphs: we don't need them anymore */
1196         foreach_pmap(copied_graphs, pm_entry) {
1197                 ir_graph *copy = (ir_graph*)pm_entry->value;
1198
1199                 /* reset the entity, otherwise it will be deleted in the next step ... */
1200                 set_irg_entity(copy, NULL);
1201                 free_ir_graph(copy);
1202         }
1203         pmap_destroy(copied_graphs);
1204
1205         obstack_free(&temp_obst, NULL);
1206         current_ir_graph = rem;
1207 }
1208
1209 typedef struct inline_leave_functions_pass_t {
1210         ir_prog_pass_t pass;
1211         unsigned       maxsize;
1212         unsigned       leavesize;
1213         unsigned       size;
1214         int            ignore_runtime;
1215 } inline_leave_functions_pass_t;
1216
1217 /**
1218  * Wrapper to run inline_leave_functions() as a ir_prog pass.
1219  */
1220 static int inline_leave_functions_wrapper(ir_prog *irp, void *context)
1221 {
1222         inline_leave_functions_pass_t *pass = (inline_leave_functions_pass_t*)context;
1223
1224         (void)irp;
1225         inline_leave_functions(
1226                 pass->maxsize, pass->leavesize,
1227                 pass->size, pass->ignore_runtime);
1228         return 0;
1229 }
1230
1231 /* create a pass for inline_leave_functions() */
1232 ir_prog_pass_t *inline_leave_functions_pass(
1233         const char *name, unsigned maxsize, unsigned leavesize,
1234         unsigned size, int ignore_runtime)
1235 {
1236         inline_leave_functions_pass_t *pass = XMALLOCZ(inline_leave_functions_pass_t);
1237
1238         pass->maxsize        = maxsize;
1239         pass->leavesize      = leavesize;
1240         pass->size           = size;
1241         pass->ignore_runtime = ignore_runtime;
1242
1243         return def_prog_pass_constructor(
1244                 &pass->pass,
1245                 name ? name : "inline_leave_functions",
1246                 inline_leave_functions_wrapper);
1247 }
1248
1249 /**
1250  * Calculate the parameter weights for transmitting the address of a local variable.
1251  */
1252 static unsigned calc_method_local_weight(ir_node *arg)
1253 {
1254         int      i, j, k;
1255         unsigned v, weight = 0;
1256
1257         for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
1258                 ir_node *succ = get_irn_out(arg, i);
1259
1260                 switch (get_irn_opcode(succ)) {
1261                 case iro_Load:
1262                 case iro_Store:
1263                         /* Loads and Store can be removed */
1264                         weight += 3;
1265                         break;
1266                 case iro_Sel:
1267                         /* check if all args are constant */
1268                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1269                                 ir_node *idx = get_Sel_index(succ, j);
1270                                 if (! is_Const(idx))
1271                                         return 0;
1272                         }
1273                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1274                            some unsupported node. */
1275                         v = calc_method_local_weight(succ);
1276                         if (v == 0)
1277                                 return 0;
1278                         /* we can kill one Sel with constant indexes, this is cheap */
1279                         weight += v + 1;
1280                         break;
1281                 case iro_Id:
1282                         /* when looking backward we might find Id nodes */
1283                         weight += calc_method_local_weight(succ);
1284                         break;
1285                 case iro_Tuple:
1286                         /* unoptimized tuple */
1287                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1288                                 ir_node *pred = get_Tuple_pred(succ, j);
1289                                 if (pred == arg) {
1290                                         /* look for Proj(j) */
1291                                         for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
1292                                                 ir_node *succ_succ = get_irn_out(succ, k);
1293                                                 if (is_Proj(succ_succ)) {
1294                                                         if (get_Proj_proj(succ_succ) == j) {
1295                                                                 /* found */
1296                                                                 weight += calc_method_local_weight(succ_succ);
1297                                                         }
1298                                                 } else {
1299                                                         /* this should NOT happen */
1300                                                         return 0;
1301                                                 }
1302                                         }
1303                                 }
1304                         }
1305                         break;
1306                 default:
1307                         /* any other node: unsupported yet or bad. */
1308                         return 0;
1309                 }
1310         }
1311         return weight;
1312 }
1313
1314 /**
1315  * Calculate the parameter weights for transmitting the address of a local variable.
1316  */
1317 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg)
1318 {
1319         ir_entity *ent = get_irg_entity(irg);
1320         ir_type  *mtp;
1321         int      nparams, i, proj_nr;
1322         ir_node  *irg_args, *arg;
1323
1324         mtp      = get_entity_type(ent);
1325         nparams  = get_method_n_params(mtp);
1326
1327         /* allocate a new array. currently used as 'analysed' flag */
1328         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1329
1330         /* If the method haven't parameters we have nothing to do. */
1331         if (nparams <= 0)
1332                 return;
1333
1334         assure_irg_outs(irg);
1335         irg_args = get_irg_args(irg);
1336         for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
1337                 arg     = get_irn_out(irg_args, i);
1338                 proj_nr = get_Proj_proj(arg);
1339                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1340         }
1341 }
1342
1343 /**
1344  * Calculate the benefice for transmitting an local variable address.
1345  * After inlining, the local variable might be transformed into a
1346  * SSA variable by scalar_replacement().
1347  */
1348 static unsigned get_method_local_adress_weight(ir_graph *callee, int pos)
1349 {
1350         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
1351
1352         if (env->local_weights != NULL) {
1353                 if (pos < ARR_LEN(env->local_weights))
1354                         return env->local_weights[pos];
1355                 return 0;
1356         }
1357
1358         analyze_irg_local_weights(env, callee);
1359
1360         if (pos < ARR_LEN(env->local_weights))
1361                 return env->local_weights[pos];
1362         return 0;
1363 }
1364
1365 /**
1366  * Calculate a benefice value for inlining the given call.
1367  *
1368  * @param call       the call node we have to inspect
1369  * @param callee     the called graph
1370  */
1371 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
1372 {
1373         ir_node   *call = entry->call;
1374         ir_entity *ent  = get_irg_entity(callee);
1375         ir_node   *frame_ptr;
1376         ir_type   *mtp;
1377         int       weight = 0;
1378         int       i, n_params, all_const;
1379         unsigned  cc, v;
1380         irg_inline_property prop;
1381
1382         inline_irg_env *callee_env;
1383
1384         prop = get_irg_inline_property(callee);
1385         if (prop == irg_inline_forbidden) {
1386                 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
1387                     call, callee));
1388                 return entry->benefice = INT_MIN;
1389         }
1390
1391         if (get_irg_additional_properties(callee) & mtp_property_noreturn) {
1392                 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
1393                     call, callee));
1394                 return entry->benefice = INT_MIN;
1395         }
1396
1397         /* costs for every passed parameter */
1398         n_params = get_Call_n_params(call);
1399         mtp      = get_entity_type(ent);
1400         cc       = get_method_calling_convention(mtp);
1401         if (cc & cc_reg_param) {
1402                 /* register parameter, smaller costs for register parameters */
1403                 int max_regs = cc & ~cc_bits;
1404
1405                 if (max_regs < n_params)
1406                         weight += max_regs * 2 + (n_params - max_regs) * 5;
1407                 else
1408                         weight += n_params * 2;
1409         } else {
1410                 /* parameters are passed an stack */
1411                 weight += 5 * n_params;
1412         }
1413
1414         /* constant parameters improve the benefice */
1415         frame_ptr = get_irg_frame(current_ir_graph);
1416         all_const = 1;
1417         for (i = 0; i < n_params; ++i) {
1418                 ir_node *param = get_Call_param(call, i);
1419
1420                 if (is_Const(param)) {
1421                         weight += get_method_param_weight(ent, i);
1422                 } else {
1423                         all_const = 0;
1424                         if (is_SymConst(param))
1425                                 weight += get_method_param_weight(ent, i);
1426                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
1427                                 /*
1428                                  * An address of a local variable is transmitted. After
1429                                  * inlining, scalar_replacement might be able to remove the
1430                                  * local variable, so honor this.
1431                                  */
1432                                 v = get_method_local_adress_weight(callee, i);
1433                                 weight += v;
1434                                 if (v > 0)
1435                                         entry->local_adr = 1;
1436                         }
1437                 }
1438         }
1439         entry->all_const = all_const;
1440
1441         callee_env = (inline_irg_env*)get_irg_link(callee);
1442         if (callee_env->n_callers == 1 &&
1443             callee != current_ir_graph &&
1444             !entity_is_externally_visible(ent)) {
1445                 weight += 700;
1446         }
1447
1448         /* give a bonus for functions with one block */
1449         if (callee_env->n_blocks == 1)
1450                 weight = weight * 3 / 2;
1451
1452         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
1453         if (callee_env->n_nodes < 30 && !callee_env->recursive)
1454                 weight += 2000;
1455
1456         /* and finally for leaves: they do not increase the register pressure
1457            because of callee safe registers */
1458         if (callee_env->n_call_nodes == 0)
1459                 weight += 400;
1460
1461         /** it's important to inline inner loops first */
1462         if (entry->loop_depth > 30)
1463                 weight += 30 * 1024;
1464         else
1465                 weight += entry->loop_depth * 1024;
1466
1467         /*
1468          * All arguments constant is probably a good sign, give an extra bonus
1469          */
1470         if (all_const)
1471                 weight += 1024;
1472
1473         return entry->benefice = weight;
1474 }
1475
1476 static ir_graph **irgs;
1477 static int      last_irg;
1478
1479 /**
1480  * Callgraph walker, collect all visited graphs.
1481  */
1482 static void callgraph_walker(ir_graph *irg, void *data)
1483 {
1484         (void) data;
1485         irgs[last_irg++] = irg;
1486 }
1487
1488 /**
1489  * Creates an inline order for all graphs.
1490  *
1491  * @return the list of graphs.
1492  */
1493 static ir_graph **create_irg_list(void)
1494 {
1495         ir_entity **free_methods;
1496         int       arr_len;
1497         int       n_irgs = get_irp_n_irgs();
1498
1499         cgana(&arr_len, &free_methods);
1500         xfree(free_methods);
1501
1502         compute_callgraph();
1503
1504         last_irg = 0;
1505         irgs     = XMALLOCNZ(ir_graph*, n_irgs);
1506
1507         callgraph_walk(NULL, callgraph_walker, NULL);
1508         assert(n_irgs == last_irg);
1509
1510         return irgs;
1511 }
1512
1513 /**
1514  * Push a call onto the priority list if its benefice is big enough.
1515  *
1516  * @param pqueue   the priority queue of calls
1517  * @param call     the call entry
1518  * @param inlien_threshold
1519  *                 the threshold value
1520  */
1521 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
1522                             int inline_threshold)
1523 {
1524         ir_graph            *callee  = call->callee;
1525         irg_inline_property prop     = get_irg_inline_property(callee);
1526         int                 benefice = calc_inline_benefice(call, callee);
1527
1528         DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
1529             get_irn_irg(call->call), call->call, callee, benefice));
1530
1531         if (prop < irg_inline_forced && benefice < inline_threshold) {
1532                 return;
1533         }
1534
1535         pqueue_put(pqueue, call, benefice);
1536 }
1537
1538 /**
1539  * Try to inline calls into a graph.
1540  *
1541  * @param irg      the graph into which we inline
1542  * @param maxsize  do NOT inline if the size of irg gets
1543  *                 bigger than this amount
1544  * @param inline_threshold
1545  *                 threshold value for inline decision
1546  * @param copied_graphs
1547  *                 map containing copied of recursive graphs
1548  */
1549 static void inline_into(ir_graph *irg, unsigned maxsize,
1550                         int inline_threshold, pmap *copied_graphs)
1551 {
1552         int            phiproj_computed = 0;
1553         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
1554         call_entry     *curr_call;
1555         wenv_t         wenv;
1556         pqueue_t       *pqueue;
1557
1558         if (env->n_call_nodes == 0)
1559                 return;
1560
1561         if (env->n_nodes > maxsize) {
1562                 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
1563                 return;
1564         }
1565
1566         current_ir_graph = irg;
1567         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1568
1569         /* put irgs into the pqueue */
1570         pqueue = new_pqueue();
1571
1572         list_for_each_entry(call_entry, curr_call, &env->calls, list) {
1573                 assert(is_Call(curr_call->call));
1574                 maybe_push_call(pqueue, curr_call, inline_threshold);
1575         }
1576
1577         /* note that the list of possible calls is updated during the process */
1578         while (!pqueue_empty(pqueue)) {
1579                 int                 did_inline;
1580                 call_entry          *curr_call  = (call_entry*)pqueue_pop_front(pqueue);
1581                 ir_graph            *callee     = curr_call->callee;
1582                 ir_node             *call_node  = curr_call->call;
1583                 inline_irg_env      *callee_env = (inline_irg_env*)get_irg_link(callee);
1584                 irg_inline_property prop        = get_irg_inline_property(callee);
1585                 int                 loop_depth;
1586                 const call_entry    *centry;
1587                 pmap_entry          *e;
1588
1589                 if ((prop < irg_inline_forced) && env->n_nodes + callee_env->n_nodes > maxsize) {
1590                         DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
1591                                                 env->n_nodes, callee, callee_env->n_nodes));
1592                         continue;
1593                 }
1594
1595                 e = pmap_find(copied_graphs, callee);
1596                 if (e != NULL) {
1597                         int benefice = curr_call->benefice;
1598                         /*
1599                          * Reduce the weight for recursive function IFF not all arguments are const.
1600                          * inlining recursive functions is rarely good.
1601                          */
1602                         if (!curr_call->all_const)
1603                                 benefice -= 2000;
1604                         if (benefice < inline_threshold)
1605                                 continue;
1606
1607                         /*
1608                          * Remap callee if we have a copy.
1609                          */
1610                         callee     = (ir_graph*)e->value;
1611                         callee_env = (inline_irg_env*)get_irg_link(callee);
1612                 }
1613
1614                 if (current_ir_graph == callee) {
1615                         /*
1616                          * Recursive call: we cannot directly inline because we cannot
1617                          * walk the graph and change it. So we have to make a copy of
1618                          * the graph first.
1619                          */
1620                         int benefice = curr_call->benefice;
1621                         ir_graph *copy;
1622
1623                         /*
1624                          * Reduce the weight for recursive function IFF not all arguments are const.
1625                          * inlining recursive functions is rarely good.
1626                          */
1627                         if (!curr_call->all_const)
1628                                 benefice -= 2000;
1629                         if (benefice < inline_threshold)
1630                                 continue;
1631
1632                         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1633
1634                         /*
1635                          * No copy yet, create one.
1636                          * Note that recursive methods are never leaves, so it is
1637                          * sufficient to test this condition here.
1638                          */
1639                         copy = create_irg_copy(callee);
1640
1641                         /* create_irg_copy() destroys the Proj links, recompute them */
1642                         phiproj_computed = 0;
1643
1644                         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1645
1646                         /* allocate a new environment */
1647                         callee_env = alloc_inline_irg_env();
1648                         set_irg_link(copy, callee_env);
1649
1650                         assure_cf_loop(copy);
1651                         wenv.x              = callee_env;
1652                         wenv.ignore_callers = 1;
1653                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1654
1655                         /*
1656                          * Enter the entity of the original graph. This is needed
1657                          * for inline_method(). However, note that ent->irg still points
1658                          * to callee, NOT to copy.
1659                          */
1660                         set_irg_entity(copy, get_irg_entity(callee));
1661
1662                         pmap_insert(copied_graphs, callee, copy);
1663                         callee = copy;
1664
1665                         /* we have only one caller: the original graph */
1666                         callee_env->n_callers      = 1;
1667                         callee_env->n_callers_orig = 1;
1668                 }
1669                 if (! phiproj_computed) {
1670                         phiproj_computed = 1;
1671                         collect_phiprojs(current_ir_graph);
1672                 }
1673                 did_inline = inline_method(call_node, callee);
1674                 if (!did_inline)
1675                         continue;
1676
1677                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1678                 phiproj_computed = 0;
1679
1680                 /* remove it from the caller list */
1681                 list_del(&curr_call->list);
1682
1683                 /* callee was inline. Append it's call list. */
1684                 env->got_inline = 1;
1685                 --env->n_call_nodes;
1686
1687                 /* we just generate a bunch of new calls */
1688                 loop_depth = curr_call->loop_depth;
1689                 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1690                         inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1691                         ir_node        *new_call;
1692                         call_entry     *new_entry;
1693
1694                         /* after we have inlined callee, all called methods inside
1695                          * callee are now called once more */
1696                         ++penv->n_callers;
1697
1698                         /* Note that the src list points to Call nodes in the inlined graph,
1699                          * but we need Call nodes in our graph. Luckily the inliner leaves
1700                          * this information in the link field. */
1701                         new_call = (ir_node*)get_irn_link(centry->call);
1702                         assert(is_Call(new_call));
1703
1704                         new_entry = duplicate_call_entry(centry, new_call, loop_depth);
1705                         list_add_tail(&new_entry->list, &env->calls);
1706                         maybe_push_call(pqueue, new_entry, inline_threshold);
1707                 }
1708
1709                 env->n_call_nodes += callee_env->n_call_nodes;
1710                 env->n_nodes += callee_env->n_nodes;
1711                 --callee_env->n_callers;
1712         }
1713         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1714         del_pqueue(pqueue);
1715 }
1716
1717 /*
1718  * Heuristic inliner. Calculates a benefice value for every call and inlines
1719  * those calls with a value higher than the threshold.
1720  */
1721 void inline_functions(unsigned maxsize, int inline_threshold,
1722                       opt_ptr after_inline_opt)
1723 {
1724         inline_irg_env   *env;
1725         int              i, n_irgs;
1726         ir_graph         *rem;
1727         wenv_t           wenv;
1728         pmap             *copied_graphs;
1729         pmap_entry       *pm_entry;
1730         ir_graph         **irgs;
1731
1732         rem = current_ir_graph;
1733         obstack_init(&temp_obst);
1734
1735         irgs = create_irg_list();
1736
1737         /* a map for the copied graphs, used to inline recursive calls */
1738         copied_graphs = pmap_create();
1739
1740         /* extend all irgs by a temporary data structure for inlining. */
1741         n_irgs = get_irp_n_irgs();
1742         for (i = 0; i < n_irgs; ++i)
1743                 set_irg_link(irgs[i], alloc_inline_irg_env());
1744
1745         /* Pre-compute information in temporary data structure. */
1746         wenv.ignore_runtime = 0;
1747         wenv.ignore_callers = 0;
1748         for (i = 0; i < n_irgs; ++i) {
1749                 ir_graph *irg = irgs[i];
1750
1751                 free_callee_info(irg);
1752
1753                 wenv.x = (inline_irg_env*)get_irg_link(irg);
1754                 assure_cf_loop(irg);
1755                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1756         }
1757
1758         /* -- and now inline. -- */
1759         for (i = 0; i < n_irgs; ++i) {
1760                 ir_graph *irg = irgs[i];
1761
1762                 inline_into(irg, maxsize, inline_threshold, copied_graphs);
1763         }
1764
1765         for (i = 0; i < n_irgs; ++i) {
1766                 ir_graph *irg = irgs[i];
1767
1768                 env = (inline_irg_env*)get_irg_link(irg);
1769                 if (env->got_inline && after_inline_opt != NULL) {
1770                         /* this irg got calls inlined: optimize it */
1771                         after_inline_opt(irg);
1772                 }
1773                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1774                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1775                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1776                         env->n_callers_orig, env->n_callers,
1777                         get_entity_name(get_irg_entity(irg))));
1778                 }
1779         }
1780
1781         /* kill the copied graphs: we don't need them anymore */
1782         foreach_pmap(copied_graphs, pm_entry) {
1783                 ir_graph *copy = (ir_graph*)pm_entry->value;
1784
1785                 /* reset the entity, otherwise it will be deleted in the next step ... */
1786                 set_irg_entity(copy, NULL);
1787                 free_ir_graph(copy);
1788         }
1789         pmap_destroy(copied_graphs);
1790
1791         xfree(irgs);
1792
1793         obstack_free(&temp_obst, NULL);
1794         current_ir_graph = rem;
1795 }
1796
1797 typedef struct inline_functions_pass_t {
1798         ir_prog_pass_t pass;
1799         unsigned       maxsize;
1800         int            inline_threshold;
1801         opt_ptr        after_inline_opt;
1802 } inline_functions_pass_t;
1803
1804 /**
1805  * Wrapper to run inline_functions() as a ir_prog pass.
1806  */
1807 static int inline_functions_wrapper(ir_prog *irp, void *context)
1808 {
1809         inline_functions_pass_t *pass = (inline_functions_pass_t*)context;
1810
1811         (void)irp;
1812         inline_functions(pass->maxsize, pass->inline_threshold,
1813                          pass->after_inline_opt);
1814         return 0;
1815 }
1816
1817 /* create a ir_prog pass for inline_functions */
1818 ir_prog_pass_t *inline_functions_pass(
1819           const char *name, unsigned maxsize, int inline_threshold,
1820           opt_ptr after_inline_opt)
1821 {
1822         inline_functions_pass_t *pass = XMALLOCZ(inline_functions_pass_t);
1823
1824         pass->maxsize          = maxsize;
1825         pass->inline_threshold = inline_threshold;
1826         pass->after_inline_opt = after_inline_opt;
1827
1828         return def_prog_pass_constructor(
1829                 &pass->pass, name ? name : "inline_functions",
1830                 inline_functions_wrapper);
1831 }
1832
1833 void firm_init_inline(void)
1834 {
1835         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
1836 }