cdb724cc8b1412deab231d92a40c310d40cc4d49
[libfirm] / ir / opt / opt_inline.c
1 /*
2  * Copyright (C) 1995-2011 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief    Dead node elimination and Procedure Inlining.
23  * @author   Michael Beck, Goetz Lindenmaier
24  * @version  $Id$
25  */
26 #include "config.h"
27
28 #include <limits.h>
29 #include <stdbool.h>
30 #include <assert.h>
31
32 #include "irnode_t.h"
33 #include "irgraph_t.h"
34 #include "irprog_t.h"
35
36 #include "iroptimize.h"
37 #include "ircons_t.h"
38 #include "iropt_t.h"
39 #include "irgopt.h"
40 #include "irgmod.h"
41 #include "irgwalk.h"
42
43 #include "array_t.h"
44 #include "list.h"
45 #include "pset.h"
46 #include "pmap.h"
47 #include "pdeq.h"
48 #include "xmalloc.h"
49 #include "pqueue.h"
50
51 #include "irouts.h"
52 #include "irloop_t.h"
53 #include "irbackedge_t.h"
54 #include "opt_init.h"
55 #include "cgana.h"
56 #include "trouts.h"
57 #include "error.h"
58
59 #include "analyze_irg_args.h"
60 #include "iredges_t.h"
61 #include "irflag_t.h"
62 #include "irhooks.h"
63 #include "irtools.h"
64 #include "iropt_dbg.h"
65 #include "irpass_t.h"
66 #include "irphase_t.h"
67
68 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
69
70 /*------------------------------------------------------------------*/
71 /* Routines for dead node elimination / copying garbage collection  */
72 /* of the obstack.                                                  */
73 /*------------------------------------------------------------------*/
74
75 /**
76  * Remember the new node in the old node by using a field all nodes have.
77  */
78 static void set_new_node(ir_node *node, ir_node *new_node)
79 {
80         set_irn_link(node, new_node);
81 }
82
83 /**
84  * Get this new node, before the old node is forgotten.
85  */
86 static inline ir_node *get_new_node(ir_node *old_node)
87 {
88         assert(irn_visited(old_node));
89         return (ir_node*) get_irn_link(old_node);
90 }
91
92 /*--------------------------------------------------------------------*/
93 /*  Functionality for inlining                                         */
94 /*--------------------------------------------------------------------*/
95
96 /**
97  * Copy node for inlineing.  Updates attributes that change when
98  * inlineing but not for dead node elimination.
99  *
100  * Copies the node by calling copy_node() and then updates the entity if
101  * it's a local one.  env must be a pointer of the frame type of the
102  * inlined procedure. The new entities must be in the link field of
103  * the entities.
104  */
105 static void copy_node_inline(ir_node *node, void *env)
106 {
107         ir_graph *new_irg  = (ir_graph*) env;
108         ir_node  *new_node = irn_copy_into_irg(node, new_irg);
109
110         set_new_node(node, new_node);
111         if (is_Sel(node)) {
112                 ir_graph  *old_irg        = get_irn_irg(node);
113                 ir_type   *old_frame_type = get_irg_frame_type(old_irg);
114                 ir_entity *old_entity     = get_Sel_entity(node);
115                 assert(is_Sel(new_node));
116                 /* use copied entities from the new frame */
117                 if (get_entity_owner(old_entity) == old_frame_type) {
118                         ir_entity *new_entity = (ir_entity*)get_entity_link(old_entity);
119                         assert(new_entity != NULL);
120                         set_Sel_entity(new_node, new_entity);
121                 }
122         } else if (is_Block(new_node)) {
123                 new_node->attr.block.irg.irg = new_irg;
124         }
125 }
126
127 static void set_preds_inline(ir_node *node, void *env)
128 {
129         ir_node *new_node;
130
131         irn_rewire_inputs(node);
132
133         /* move constants into start block */
134         new_node = get_new_node(node);
135         if (is_irn_constlike(new_node)) {
136                 ir_graph *new_irg     = (ir_graph *) env;
137                 ir_node  *start_block = get_irg_start_block(new_irg);
138                 set_nodes_block(new_node, start_block);
139         }
140 }
141
142 /**
143  * Walker: checks if P_value_arg_base is used.
144  */
145 static void find_addr(ir_node *node, void *env)
146 {
147         bool *allow_inline = (bool*)env;
148
149         if (is_Sel(node)) {
150                 ir_graph *irg = current_ir_graph;
151                 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
152                         /* access to frame */
153                         ir_entity *ent = get_Sel_entity(node);
154                         if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
155                                 /* access to value_type */
156                                 *allow_inline = false;
157                         }
158                 }
159         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
160                 /* From GCC:
161                  * Refuse to inline alloca call unless user explicitly forced so as this
162                  * may change program's memory overhead drastically when the function
163                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
164                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
165                  *
166                  * Sorrily this is true with our implementation also.
167                  * Moreover, we cannot differentiate between alloca() and VLA yet, so
168                  * this disables inlining of functions using VLA (which are completely
169                  * save).
170                  *
171                  * 2 Solutions:
172                  * - add a flag to the Alloc node for "real" alloca() calls
173                  * - add a new Stack-Restore node at the end of a function using
174                  *   alloca()
175                  */
176                 *allow_inline = false;
177         }
178 }
179
180 /**
181  * Check if we can inline a given call.
182  * Currently, we cannot inline two cases:
183  * - call with compound arguments
184  * - graphs that take the address of a parameter
185  *
186  * check these conditions here
187  */
188 static bool can_inline(ir_node *call, ir_graph *called_graph)
189 {
190         ir_entity          *called      = get_irg_entity(called_graph);
191         ir_type            *called_type = get_entity_type(called);
192         ir_type            *call_type   = get_Call_type(call);
193         size_t              n_params    = get_method_n_params(called_type);
194         size_t              n_arguments = get_method_n_params(call_type);
195         size_t              n_res       = get_method_n_ress(called_type);
196         irg_inline_property prop        = get_irg_inline_property(called_graph);
197         size_t              i;
198         bool                res;
199
200         if (prop == irg_inline_forbidden)
201                 return false;
202
203         if (n_arguments != n_params) {
204                 /* this is a bad feature of C: without a prototype, we can
205                  * call a function with less parameters than needed. Currently
206                  * we don't support this, although we could use Unknown than. */
207                 return false;
208         }
209         if (n_res != get_method_n_ress(call_type)) {
210                 return false;
211         }
212
213         /* Argh, compiling C has some bad consequences:
214          * It is implementation dependent what happens in that case.
215          * We support inlining, if the bitsize of the types matches AND
216          * the same arithmetic is used. */
217         for (i = 0; i < n_params; ++i) {
218                 ir_type *param_tp = get_method_param_type(called_type, i);
219                 ir_type *arg_tp   = get_method_param_type(call_type, i);
220
221                 if (param_tp != arg_tp) {
222                         ir_mode *pmode = get_type_mode(param_tp);
223                         ir_mode *amode = get_type_mode(arg_tp);
224
225                         if (pmode == NULL || amode == NULL)
226                                 return false;
227                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
228                                 return false;
229                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
230                                 return false;
231                         /* otherwise we can simply "reinterpret" the bits */
232                 }
233         }
234         for (i = 0; i < n_res; ++i) {
235                 ir_type *decl_res_tp = get_method_res_type(called_type, i);
236                 ir_type *used_res_tp = get_method_res_type(call_type, i);
237
238                 if (decl_res_tp != used_res_tp) {
239                         ir_mode *decl_mode = get_type_mode(decl_res_tp);
240                         ir_mode *used_mode = get_type_mode(used_res_tp);
241                         if (decl_mode == NULL || used_mode == NULL)
242                                 return false;
243                         if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
244                                 return false;
245                         if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
246                                 return false;
247                         /* otherwise we can "reinterpret" the bits */
248                 }
249         }
250
251         /* check parameters for compound arguments */
252         for (i = 0; i < n_params; ++i) {
253                 ir_type *p_type = get_method_param_type(call_type, i);
254
255                 if (is_compound_type(p_type))
256                         return false;
257         }
258
259         /* check results for compound arguments */
260         for (i = 0; i < n_res; ++i) {
261                 ir_type *r_type = get_method_res_type(call_type, i);
262
263                 if (is_compound_type(r_type))
264                         return false;
265         }
266
267         res = true;
268         irg_walk_graph(called_graph, find_addr, NULL, &res);
269
270         return res;
271 }
272
273 enum exc_mode {
274         exc_handler,    /**< There is a handler. */
275         exc_no_handler  /**< Exception handling not represented. */
276 };
277
278 /**
279  * copy all entities on the stack frame on 1 irg to the stackframe of another.
280  * Sets entity links of the old entities to the copies
281  */
282 static void copy_frame_entities(ir_graph *from, ir_graph *to)
283 {
284         ir_type *from_frame = get_irg_frame_type(from);
285         ir_type *to_frame   = get_irg_frame_type(to);
286         size_t   n_members  = get_class_n_members(from_frame);
287         size_t   i;
288         assert(from_frame != to_frame);
289
290         for (i = 0; i < n_members; ++i) {
291                 ir_entity *old_ent = get_class_member(from_frame, i);
292                 ir_entity *new_ent = copy_entity_own(old_ent, to_frame);
293                 set_entity_link(old_ent, new_ent);
294         }
295 }
296
297 /* Inlines a method at the given call site. */
298 int inline_method(ir_node *call, ir_graph *called_graph)
299 {
300         ir_node       *pre_call;
301         ir_node       *post_call, *post_bl;
302         ir_node       *in[pn_Start_max];
303         ir_node       *end, *end_bl, *block;
304         ir_node       **res_pred;
305         ir_node       **cf_pred;
306         ir_node       **args_in;
307         ir_node       *ret, *phi;
308         int           arity, n_ret, n_exc, n_res, i, j, rem_opt;
309         int           irn_arity, n_params;
310         int           n_mem_phi;
311         enum exc_mode exc_handling;
312         ir_type       *mtp;
313         ir_type       *ctp;
314         ir_entity     *ent;
315         ir_graph      *rem;
316         ir_graph      *irg = get_irn_irg(call);
317
318         /* we cannot inline some types of calls */
319         if (! can_inline(call, called_graph))
320                 return 0;
321
322         /* We cannot inline a recursive call. The graph must be copied before
323          * the call the inline_method() using create_irg_copy(). */
324         if (called_graph == irg)
325                 return 0;
326
327         ent      = get_irg_entity(called_graph);
328         mtp      = get_entity_type(ent);
329         ctp      = get_Call_type(call);
330         n_params = get_method_n_params(mtp);
331         n_res    = get_method_n_ress(mtp);
332
333         rem = current_ir_graph;
334         current_ir_graph = irg;
335
336         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
337
338         /* optimizations can cause problems when allocating new nodes */
339         rem_opt = get_opt_optimize();
340         set_optimize(0);
341
342         /* Handle graph state */
343         assert(get_irg_phase_state(irg) != phase_building);
344         assert(get_irg_pinned(irg) == op_pin_state_pinned);
345         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
346         set_irg_extblk_inconsistent(irg);
347         set_irg_doms_inconsistent(irg);
348         set_irg_loopinfo_inconsistent(irg);
349         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
350         set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
351         edges_deactivate(irg);
352
353         /* here we know we WILL inline, so inform the statistics */
354         hook_inline(call, called_graph);
355
356         /* -- Decide how to handle exception control flow: Is there a handler
357            for the Call node, or do we branch directly to End on an exception?
358            exc_handling:
359            0 There is a handler.
360            2 Exception handling not represented in Firm. -- */
361         {
362                 ir_node *Xproj = NULL;
363                 ir_node *proj;
364                 for (proj = (ir_node*)get_irn_link(call); proj != NULL;
365                      proj = (ir_node*)get_irn_link(proj)) {
366                         long proj_nr = get_Proj_proj(proj);
367                         if (proj_nr == pn_Call_X_except) Xproj = proj;
368                 }
369                 exc_handling = Xproj != NULL ? exc_handler : exc_no_handler;
370         }
371
372         /* create the argument tuple */
373         args_in = ALLOCAN(ir_node*, n_params);
374
375         block = get_nodes_block(call);
376         for (i = n_params - 1; i >= 0; --i) {
377                 ir_node *arg      = get_Call_param(call, i);
378                 ir_type *param_tp = get_method_param_type(mtp, i);
379                 ir_mode *mode     = get_type_mode(param_tp);
380
381                 if (mode != get_irn_mode(arg)) {
382                         arg = new_r_Conv(block, arg, mode);
383                 }
384                 args_in[i] = arg;
385         }
386
387         /* the procedure and later replaces the Start node of the called graph.
388          * Post_call is the old Call node and collects the results of the called
389          * graph. Both will end up being a tuple. */
390         post_bl = get_nodes_block(call);
391         /* XxMxPxPxPxT of Start + parameter of Call */
392         in[pn_Start_M]              = get_Call_mem(call);
393         in[pn_Start_X_initial_exec] = new_r_Jmp(post_bl);
394         in[pn_Start_P_frame_base]   = get_irg_frame(irg);
395         in[pn_Start_T_args]         = new_r_Tuple(post_bl, n_params, args_in);
396         pre_call = new_r_Tuple(post_bl, pn_Start_max, in);
397         post_call = call;
398
399         /* --
400            The new block gets the ins of the old block, pre_call and all its
401            predecessors and all Phi nodes. -- */
402         part_block(pre_call);
403
404         /* increment visited flag for later walk */
405         inc_irg_visited(called_graph);
406
407         /* link some nodes to nodes in the current graph so instead of copying
408          * the linked nodes will get used.
409          * So the copier will use the created Tuple instead of copying the start
410          * node, similar for singleton nodes like NoMem and Bad.
411          * Note: this will prohibit predecessors to be copied - only do it for
412          *       nodes without predecessors */
413         {
414                 ir_node *start_block;
415                 ir_node *start;
416                 ir_node *nomem;
417
418                 start_block = get_irg_start_block(called_graph);
419                 set_new_node(start_block, get_nodes_block(pre_call));
420                 mark_irn_visited(start_block);
421
422                 start = get_irg_start(called_graph);
423                 set_new_node(start, pre_call);
424                 mark_irn_visited(start);
425
426                 nomem = get_irg_no_mem(called_graph);
427                 set_new_node(nomem, get_irg_no_mem(irg));
428                 mark_irn_visited(nomem);
429         }
430
431         /* entitiy link is used to link entities on old stackframe to the
432          * new stackframe */
433         irp_reserve_resources(irp, IR_RESOURCE_ENTITY_LINK);
434
435         /* copy entities and nodes */
436         assert(!irn_visited(get_irg_end(called_graph)));
437         copy_frame_entities(called_graph, irg);
438         irg_walk_core(get_irg_end(called_graph), copy_node_inline, set_preds_inline,
439                       irg);
440
441         irp_free_resources(irp, IR_RESOURCE_ENTITY_LINK);
442
443         /* -- Merge the end of the inlined procedure with the call site -- */
444         /* We will turn the old Call node into a Tuple with the following
445            predecessors:
446            -1:  Block of Tuple.
447            0: Phi of all Memories of Return statements.
448            1: Jmp from new Block that merges the control flow from all exception
449            predecessors of the old end block.
450            2: Tuple of all arguments.
451            3: Phi of Exception memories.
452            In case the old Call directly branches to End on an exception we don't
453            need the block merging all exceptions nor the Phi of the exception
454            memories.
455         */
456
457         /* Precompute some values */
458         end_bl = get_new_node(get_irg_end_block(called_graph));
459         end    = get_new_node(get_irg_end(called_graph));
460         arity  = get_Block_n_cfgpreds(end_bl);    /* arity = n_exc + n_ret  */
461         n_res  = get_method_n_ress(get_Call_type(call));
462
463         res_pred = XMALLOCN(ir_node*, n_res);
464         cf_pred  = XMALLOCN(ir_node*, arity);
465
466         /* archive keepalives */
467         irn_arity = get_irn_arity(end);
468         for (i = 0; i < irn_arity; i++) {
469                 ir_node *ka = get_End_keepalive(end, i);
470                 if (! is_Bad(ka))
471                         add_End_keepalive(get_irg_end(irg), ka);
472         }
473
474         /* replace Return nodes by Jump nodes */
475         n_ret = 0;
476         for (i = 0; i < arity; i++) {
477                 ir_node *ret;
478                 ret = get_Block_cfgpred(end_bl, i);
479                 if (is_Return(ret)) {
480                         ir_node *block = get_nodes_block(ret);
481                         cf_pred[n_ret] = new_r_Jmp(block);
482                         n_ret++;
483                 }
484         }
485         set_irn_in(post_bl, n_ret, cf_pred);
486
487         /* build a Tuple for all results of the method.
488          * add Phi node if there was more than one Return. */
489         turn_into_tuple(post_call, pn_Call_max);
490         /* First the Memory-Phi */
491         n_mem_phi = 0;
492         for (i = 0; i < arity; i++) {
493                 ret = get_Block_cfgpred(end_bl, i);
494                 if (is_Return(ret)) {
495                         cf_pred[n_mem_phi++] = get_Return_mem(ret);
496                 }
497                 /* memory output for some exceptions is directly connected to End */
498                 if (is_Call(ret)) {
499                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 3);
500                 } else if (is_fragile_op(ret)) {
501                         /* We rely that all cfops have the memory output at the same position. */
502                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 0);
503                 } else if (is_Raise(ret)) {
504                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 1);
505                 }
506         }
507         phi = new_r_Phi(post_bl, n_mem_phi, cf_pred, mode_M);
508         set_Tuple_pred(call, pn_Call_M, phi);
509         /* Conserve Phi-list for further inlinings -- but might be optimized */
510         if (get_nodes_block(phi) == post_bl) {
511                 set_irn_link(phi, get_irn_link(post_bl));
512                 set_irn_link(post_bl, phi);
513         }
514         /* Now the real results */
515         if (n_res > 0) {
516                 ir_node *result_tuple;
517                 for (j = 0; j < n_res; j++) {
518                         ir_type *res_type = get_method_res_type(ctp, j);
519                         ir_mode *res_mode = get_type_mode(res_type);
520                         n_ret = 0;
521                         for (i = 0; i < arity; i++) {
522                                 ret = get_Block_cfgpred(end_bl, i);
523                                 if (is_Return(ret)) {
524                                         ir_node *res = get_Return_res(ret, j);
525                                         if (get_irn_mode(res) != res_mode) {
526                                                 ir_node *block = get_nodes_block(res);
527                                                 res = new_r_Conv(block, res, res_mode);
528                                         }
529                                         cf_pred[n_ret] = res;
530                                         n_ret++;
531                                 }
532                         }
533                         if (n_ret > 0) {
534                                 phi = new_r_Phi(post_bl, n_ret, cf_pred, res_mode);
535                         } else {
536                                 phi = new_r_Bad(irg, res_mode);
537                         }
538                         res_pred[j] = phi;
539                         /* Conserve Phi-list for further inlinings -- but might be optimized */
540                         if (get_nodes_block(phi) == post_bl) {
541                                 set_Phi_next(phi, get_Block_phis(post_bl));
542                                 set_Block_phis(post_bl, phi);
543                         }
544                 }
545                 result_tuple = new_r_Tuple(post_bl, n_res, res_pred);
546                 set_Tuple_pred(call, pn_Call_T_result, result_tuple);
547         } else {
548                 set_Tuple_pred(call, pn_Call_T_result, new_r_Bad(irg, mode_T));
549         }
550         /* handle the regular call */
551         set_Tuple_pred(call, pn_Call_X_regular, new_r_Jmp(post_bl));
552
553         /* Finally the exception control flow.
554            We have two possible situations:
555            First if the Call branches to an exception handler:
556            We need to add a Phi node to
557            collect the memory containing the exception objects.  Further we need
558            to add another block to get a correct representation of this Phi.  To
559            this block we add a Jmp that resolves into the X output of the Call
560            when the Call is turned into a tuple.
561            Second: There is no exception edge. Just add all inlined exception
562            branches to the End node.
563          */
564         if (exc_handling == exc_handler) {
565                 n_exc = 0;
566                 for (i = 0; i < arity; i++) {
567                         ir_node *ret, *irn;
568                         ret = get_Block_cfgpred(end_bl, i);
569                         irn = skip_Proj(ret);
570                         if (is_fragile_op(irn) || is_Raise(irn)) {
571                                 cf_pred[n_exc] = ret;
572                                 ++n_exc;
573                         }
574                 }
575                 if (n_exc > 0) {
576                         if (n_exc == 1) {
577                                 /* simple fix */
578                                 set_Tuple_pred(call, pn_Call_X_except, cf_pred[0]);
579                         } else {
580                                 ir_node *block = new_r_Block(irg, n_exc, cf_pred);
581                                 set_Tuple_pred(call, pn_Call_X_except, new_r_Jmp(block));
582                         }
583                 } else {
584                         set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
585                 }
586         } else {
587                 ir_node *main_end_bl;
588                 int main_end_bl_arity;
589                 ir_node **end_preds;
590
591                 /* assert(exc_handling == 1 || no exceptions. ) */
592                 n_exc = 0;
593                 for (i = 0; i < arity; i++) {
594                         ir_node *ret = get_Block_cfgpred(end_bl, i);
595                         ir_node *irn = skip_Proj(ret);
596
597                         if (is_fragile_op(irn) || is_Raise(irn)) {
598                                 cf_pred[n_exc] = ret;
599                                 n_exc++;
600                         }
601                 }
602                 main_end_bl       = get_irg_end_block(irg);
603                 main_end_bl_arity = get_irn_arity(main_end_bl);
604                 end_preds         = XMALLOCN(ir_node*, n_exc + main_end_bl_arity);
605
606                 for (i = 0; i < main_end_bl_arity; ++i)
607                         end_preds[i] = get_irn_n(main_end_bl, i);
608                 for (i = 0; i < n_exc; ++i)
609                         end_preds[main_end_bl_arity + i] = cf_pred[i];
610                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
611                 set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
612                 free(end_preds);
613         }
614         free(res_pred);
615         free(cf_pred);
616
617         /* --  Turn CSE back on. -- */
618         set_optimize(rem_opt);
619         current_ir_graph = rem;
620
621         return 1;
622 }
623
624 /********************************************************************/
625 /* Apply inlining to small methods.                                 */
626 /********************************************************************/
627
628 static struct obstack  temp_obst;
629
630 /** Represents a possible inlinable call in a graph. */
631 typedef struct call_entry {
632         ir_node    *call;       /**< The Call node. */
633         ir_graph   *callee;     /**< The callee IR-graph. */
634         list_head  list;        /**< List head for linking the next one. */
635         int        loop_depth;  /**< The loop depth of this call. */
636         int        benefice;    /**< The calculated benefice of this call. */
637         unsigned   local_adr:1; /**< Set if this call gets an address of a local variable. */
638         unsigned   all_const:1; /**< Set if this call has only constant parameters. */
639 } call_entry;
640
641 /**
642  * environment for inlining small irgs
643  */
644 typedef struct inline_env_t {
645         struct obstack obst;  /**< An obstack where call_entries are allocated on. */
646         list_head      calls; /**< The call entry list. */
647 } inline_env_t;
648
649 /**
650  * Returns the irg called from a Call node. If the irg is not
651  * known, NULL is returned.
652  *
653  * @param call  the call node
654  */
655 static ir_graph *get_call_called_irg(ir_node *call)
656 {
657         ir_node *addr;
658
659         addr = get_Call_ptr(call);
660         if (is_Global(addr)) {
661                 ir_entity *ent = get_Global_entity(addr);
662                 /* we don't know which function gets finally bound to a weak symbol */
663                 if (get_entity_linkage(ent) & IR_LINKAGE_WEAK)
664                         return NULL;
665
666                 return get_entity_irg(ent);
667         }
668
669         return NULL;
670 }
671
672 /**
673  * Walker: Collect all calls to known graphs inside a graph.
674  */
675 static void collect_calls(ir_node *call, void *env)
676 {
677         (void) env;
678         if (is_Call(call)) {
679                 ir_graph *called_irg = get_call_called_irg(call);
680
681                 if (called_irg != NULL) {
682                         /* The Call node calls a locally defined method.  Remember to inline. */
683                         inline_env_t *ienv  = (inline_env_t*)env;
684                         call_entry   *entry = OALLOC(&ienv->obst, call_entry);
685                         entry->call       = call;
686                         entry->callee     = called_irg;
687                         entry->loop_depth = 0;
688                         entry->benefice   = 0;
689                         entry->local_adr  = 0;
690                         entry->all_const  = 0;
691
692                         list_add_tail(&entry->list, &ienv->calls);
693                 }
694         }
695 }
696
697 /**
698  * Inlines all small methods at call sites where the called address comes
699  * from a Const node that references the entity representing the called
700  * method.
701  * The size argument is a rough measure for the code size of the method:
702  * Methods where the obstack containing the firm graph is smaller than
703  * size are inlined.
704  */
705 void inline_small_irgs(ir_graph *irg, int size)
706 {
707         ir_graph *rem = current_ir_graph;
708         inline_env_t env;
709         call_entry *entry;
710
711         current_ir_graph = irg;
712         /* Handle graph state */
713         assert(get_irg_phase_state(irg) != phase_building);
714         free_callee_info(irg);
715
716         /* Find Call nodes to inline.
717            (We can not inline during a walk of the graph, as inlining the same
718            method several times changes the visited flag of the walked graph:
719            after the first inlining visited of the callee equals visited of
720            the caller.  With the next inlining both are increased.) */
721         obstack_init(&env.obst);
722         INIT_LIST_HEAD(&env.calls);
723         irg_walk_graph(irg, NULL, collect_calls, &env);
724
725         if (! list_empty(&env.calls)) {
726                 /* There are calls to inline */
727                 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
728                 collect_phiprojs(irg);
729
730                 list_for_each_entry(call_entry, entry, &env.calls, list) {
731                         ir_graph            *callee = entry->callee;
732                         irg_inline_property prop    = get_irg_inline_property(callee);
733
734                         if (prop == irg_inline_forbidden) {
735                                 continue;
736                         }
737
738                         if (prop >= irg_inline_forced ||
739                             _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
740                                 inline_method(entry->call, callee);
741                         }
742                 }
743                 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
744         }
745         obstack_free(&env.obst, NULL);
746         current_ir_graph = rem;
747 }
748
749 typedef struct inline_small_irgs_pass_t {
750         ir_graph_pass_t pass;
751         int            size;
752 } inline_small_irgs_pass_t;
753
754 /**
755  * Wrapper to run inline_small_irgs() as a pass.
756  */
757 static int inline_small_irgs_wrapper(ir_graph *irg, void *context)
758 {
759         inline_small_irgs_pass_t *pass = (inline_small_irgs_pass_t*)context;
760
761         inline_small_irgs(irg, pass->size);
762         return 0;
763 }
764
765 /* create a pass for inline_small_irgs() */
766 ir_graph_pass_t *inline_small_irgs_pass(const char *name, int size)
767 {
768         inline_small_irgs_pass_t *pass = XMALLOCZ(inline_small_irgs_pass_t);
769
770         pass->size = size;
771         return def_graph_pass_constructor(
772                 &pass->pass, name ? name : "inline_small_irgs", inline_small_irgs_wrapper);
773 }
774
775 /**
776  * Environment for inlining irgs.
777  */
778 typedef struct {
779         list_head calls;             /**< List of of all call nodes in this graph. */
780         unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
781         unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
782         unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
783         unsigned  n_nodes_orig;      /**< for statistics */
784         unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
785         unsigned  n_call_nodes_orig; /**< for statistics */
786         unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
787         unsigned  n_callers_orig;    /**< for statistics */
788         unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
789         unsigned  recursive:1;       /**< Set, if this function is self recursive. */
790 } inline_irg_env;
791
792 /**
793  * Allocate a new environment for inlining.
794  */
795 static inline_irg_env *alloc_inline_irg_env(void)
796 {
797         inline_irg_env *env    = OALLOC(&temp_obst, inline_irg_env);
798         INIT_LIST_HEAD(&env->calls);
799         env->local_weights     = NULL;
800         env->n_nodes           = -2; /* do not count count Start, End */
801         env->n_blocks          = -2; /* do not count count Start, End Block */
802         env->n_nodes_orig      = -2; /* do not count Start, End */
803         env->n_call_nodes      = 0;
804         env->n_call_nodes_orig = 0;
805         env->n_callers         = 0;
806         env->n_callers_orig    = 0;
807         env->got_inline        = 0;
808         env->recursive         = 0;
809         return env;
810 }
811
812 typedef struct walker_env {
813         inline_irg_env *x;     /**< the inline environment */
814         char ignore_runtime;   /**< the ignore runtime flag */
815         char ignore_callers;   /**< if set, do change callers data */
816 } wenv_t;
817
818 /**
819  * post-walker: collect all calls in the inline-environment
820  * of a graph and sum some statistics.
821  */
822 static void collect_calls2(ir_node *call, void *ctx)
823 {
824         wenv_t         *env = (wenv_t*)ctx;
825         inline_irg_env *x = env->x;
826         unsigned        code = get_irn_opcode(call);
827         ir_graph       *callee;
828         call_entry     *entry;
829
830         /* count meaningful nodes in irg */
831         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
832                 if (code != iro_Block) {
833                         ++x->n_nodes;
834                         ++x->n_nodes_orig;
835                 } else {
836                         ++x->n_blocks;
837                 }
838         }
839
840         if (code != iro_Call) return;
841
842         /* check, if it's a runtime call */
843         if (env->ignore_runtime) {
844                 ir_node *symc = get_Call_ptr(call);
845
846                 if (is_Global(symc)) {
847                         ir_entity *ent = get_Global_entity(symc);
848
849                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
850                                 return;
851                 }
852         }
853
854         /* collect all call nodes */
855         ++x->n_call_nodes;
856         ++x->n_call_nodes_orig;
857
858         callee = get_call_called_irg(call);
859         if (callee != NULL) {
860                 if (! env->ignore_callers) {
861                         inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
862                         /* count all static callers */
863                         ++callee_env->n_callers;
864                         ++callee_env->n_callers_orig;
865                 }
866                 if (callee == current_ir_graph)
867                         x->recursive = 1;
868
869                 /* link it in the list of possible inlinable entries */
870                 entry = OALLOC(&temp_obst, call_entry);
871                 entry->call       = call;
872                 entry->callee     = callee;
873                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
874                 entry->benefice   = 0;
875                 entry->local_adr  = 0;
876                 entry->all_const  = 0;
877
878                 list_add_tail(&entry->list, &x->calls);
879         }
880 }
881
882 /**
883  * Returns TRUE if the number of callers is 0 in the irg's environment,
884  * hence this irg is a leave.
885  */
886 inline static int is_leave(ir_graph *irg)
887 {
888         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
889         return env->n_call_nodes == 0;
890 }
891
892 /**
893  * Returns TRUE if the number of nodes in the callee is
894  * smaller then size in the irg's environment.
895  */
896 inline static int is_smaller(ir_graph *callee, unsigned size)
897 {
898         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
899         return env->n_nodes < size;
900 }
901
902 /**
903  * Duplicate a call entry.
904  *
905  * @param entry     the original entry to duplicate
906  * @param new_call  the new call node
907  * @param loop_depth_delta
908  *                  delta value for the loop depth
909  */
910 static call_entry *duplicate_call_entry(const call_entry *entry,
911                                         ir_node *new_call, int loop_depth_delta)
912 {
913         call_entry *nentry = OALLOC(&temp_obst, call_entry);
914         nentry->call       = new_call;
915         nentry->callee     = entry->callee;
916         nentry->benefice   = entry->benefice;
917         nentry->loop_depth = entry->loop_depth + loop_depth_delta;
918         nentry->local_adr  = entry->local_adr;
919         nentry->all_const  = entry->all_const;
920
921         return nentry;
922 }
923
924 /**
925  * Append all call nodes of the source environment to the nodes of in the destination
926  * environment.
927  *
928  * @param dst         destination environment
929  * @param src         source environment
930  * @param loop_depth  the loop depth of the call that is replaced by the src list
931  */
932 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth)
933 {
934         call_entry *entry, *nentry;
935
936         /* Note that the src list points to Call nodes in the inlined graph, but
937            we need Call nodes in our graph. Luckily the inliner leaves this information
938            in the link field. */
939         list_for_each_entry(call_entry, entry, &src->calls, list) {
940                 nentry = duplicate_call_entry(entry, (ir_node*)get_irn_link(entry->call), loop_depth);
941                 list_add_tail(&nentry->list, &dst->calls);
942         }
943         dst->n_call_nodes += src->n_call_nodes;
944         dst->n_nodes      += src->n_nodes;
945 }
946
947 /*
948  * Inlines small leave methods at call sites where the called address comes
949  * from a Const node that references the entity representing the called
950  * method.
951  * The size argument is a rough measure for the code size of the method:
952  * Methods where the obstack containing the firm graph is smaller than
953  * size are inlined.
954  */
955 void inline_leave_functions(unsigned maxsize, unsigned leavesize,
956                             unsigned size, int ignore_runtime)
957 {
958         inline_irg_env   *env;
959         ir_graph         *irg;
960         size_t           i, n_irgs;
961         ir_graph         *rem;
962         int              did_inline;
963         wenv_t           wenv;
964         call_entry       *entry, *next;
965         const call_entry *centry;
966         pmap             *copied_graphs;
967         pmap_entry       *pm_entry;
968
969         rem = current_ir_graph;
970         obstack_init(&temp_obst);
971
972         /* a map for the copied graphs, used to inline recursive calls */
973         copied_graphs = pmap_create();
974
975         /* extend all irgs by a temporary data structure for inlining. */
976         n_irgs = get_irp_n_irgs();
977         for (i = 0; i < n_irgs; ++i)
978                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
979
980         /* Pre-compute information in temporary data structure. */
981         wenv.ignore_runtime = ignore_runtime;
982         wenv.ignore_callers = 0;
983         for (i = 0; i < n_irgs; ++i) {
984                 ir_graph *irg = get_irp_irg(i);
985
986                 assert(get_irg_phase_state(irg) != phase_building);
987                 free_callee_info(irg);
988
989                 assure_cf_loop(irg);
990                 wenv.x = (inline_irg_env*)get_irg_link(irg);
991                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
992         }
993
994         /* -- and now inline. -- */
995
996         /* Inline leaves recursively -- we might construct new leaves. */
997         do {
998                 did_inline = 0;
999
1000                 for (i = 0; i < n_irgs; ++i) {
1001                         ir_node *call;
1002                         int phiproj_computed = 0;
1003
1004                         current_ir_graph = get_irp_irg(i);
1005                         env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1006
1007                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1008                         list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1009                                 ir_graph            *callee;
1010                                 irg_inline_property  prop;
1011
1012                                 if (env->n_nodes > maxsize)
1013                                         break;
1014
1015                                 call   = entry->call;
1016                                 callee = entry->callee;
1017
1018                                 prop = get_irg_inline_property(callee);
1019                                 if (prop == irg_inline_forbidden) {
1020                                         continue;
1021                                 }
1022
1023                                 if (is_leave(callee) && (
1024                                     is_smaller(callee, leavesize) || prop >= irg_inline_forced)) {
1025                                         if (!phiproj_computed) {
1026                                                 phiproj_computed = 1;
1027                                                 collect_phiprojs(current_ir_graph);
1028                                         }
1029                                         did_inline = inline_method(call, callee);
1030
1031                                         if (did_inline) {
1032                                                 inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
1033
1034                                                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1035                                                 phiproj_computed = 0;
1036
1037                                                 /* Do some statistics */
1038                                                 env->got_inline = 1;
1039                                                 --env->n_call_nodes;
1040                                                 env->n_nodes += callee_env->n_nodes;
1041                                                 --callee_env->n_callers;
1042
1043                                                 /* remove this call from the list */
1044                                                 list_del(&entry->list);
1045                                                 continue;
1046                                         }
1047                                 }
1048                         }
1049                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1050                 }
1051         } while (did_inline);
1052
1053         /* inline other small functions. */
1054         for (i = 0; i < n_irgs; ++i) {
1055                 ir_node *call;
1056                 int phiproj_computed = 0;
1057
1058                 current_ir_graph = get_irp_irg(i);
1059                 env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1060
1061                 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1062
1063                 /* note that the list of possible calls is updated during the process */
1064                 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1065                         irg_inline_property prop;
1066                         ir_graph            *callee;
1067                         pmap_entry          *e;
1068
1069                         call   = entry->call;
1070                         callee = entry->callee;
1071
1072                         prop = get_irg_inline_property(callee);
1073                         if (prop == irg_inline_forbidden) {
1074                                 continue;
1075                         }
1076
1077                         e = pmap_find(copied_graphs, callee);
1078                         if (e != NULL) {
1079                                 /*
1080                                  * Remap callee if we have a copy.
1081                                  * FIXME: Should we do this only for recursive Calls ?
1082                                  */
1083                                 callee = (ir_graph*)e->value;
1084                         }
1085
1086                         if (prop >= irg_inline_forced ||
1087                             (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1088                                 if (current_ir_graph == callee) {
1089                                         /*
1090                                          * Recursive call: we cannot directly inline because we cannot walk
1091                                          * the graph and change it. So we have to make a copy of the graph
1092                                          * first.
1093                                          */
1094
1095                                         inline_irg_env *callee_env;
1096                                         ir_graph       *copy;
1097
1098                                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1099
1100                                         /*
1101                                          * No copy yet, create one.
1102                                          * Note that recursive methods are never leaves, so it is sufficient
1103                                          * to test this condition here.
1104                                          */
1105                                         copy = create_irg_copy(callee);
1106
1107                                         /* create_irg_copy() destroys the Proj links, recompute them */
1108                                         phiproj_computed = 0;
1109
1110                                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1111
1112                                         /* allocate new environment */
1113                                         callee_env = alloc_inline_irg_env();
1114                                         set_irg_link(copy, callee_env);
1115
1116                                         assure_cf_loop(copy);
1117                                         wenv.x              = callee_env;
1118                                         wenv.ignore_callers = 1;
1119                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1120
1121                                         /*
1122                                          * Enter the entity of the original graph. This is needed
1123                                          * for inline_method(). However, note that ent->irg still points
1124                                          * to callee, NOT to copy.
1125                                          */
1126                                         set_irg_entity(copy, get_irg_entity(callee));
1127
1128                                         pmap_insert(copied_graphs, callee, copy);
1129                                         callee = copy;
1130
1131                                         /* we have only one caller: the original graph */
1132                                         callee_env->n_callers      = 1;
1133                                         callee_env->n_callers_orig = 1;
1134                                 }
1135                                 if (! phiproj_computed) {
1136                                         phiproj_computed = 1;
1137                                         collect_phiprojs(current_ir_graph);
1138                                 }
1139                                 did_inline = inline_method(call, callee);
1140                                 if (did_inline) {
1141                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1142
1143                                         /* call was inlined, Phi/Projs for current graph must be recomputed */
1144                                         phiproj_computed = 0;
1145
1146                                         /* callee was inline. Append its call list. */
1147                                         env->got_inline = 1;
1148                                         --env->n_call_nodes;
1149                                         append_call_list(env, callee_env, entry->loop_depth);
1150                                         --callee_env->n_callers;
1151
1152                                         /* after we have inlined callee, all called methods inside callee
1153                                            are now called once more */
1154                                         list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1155                                                 inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1156                                                 ++penv->n_callers;
1157                                         }
1158
1159                                         /* remove this call from the list */
1160                                         list_del(&entry->list);
1161                                         continue;
1162                                 }
1163                         }
1164                 }
1165                 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1166         }
1167
1168         for (i = 0; i < n_irgs; ++i) {
1169                 irg = get_irp_irg(i);
1170                 env = (inline_irg_env*)get_irg_link(irg);
1171
1172                 if (env->got_inline) {
1173                         optimize_graph_df(irg);
1174                         optimize_cf(irg);
1175                 }
1176                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1177                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1178                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1179                         env->n_callers_orig, env->n_callers,
1180                         get_entity_name(get_irg_entity(irg))));
1181                 }
1182         }
1183
1184         /* kill the copied graphs: we don't need them anymore */
1185         foreach_pmap(copied_graphs, pm_entry) {
1186                 ir_graph *copy = (ir_graph*)pm_entry->value;
1187
1188                 /* reset the entity, otherwise it will be deleted in the next step ... */
1189                 set_irg_entity(copy, NULL);
1190                 free_ir_graph(copy);
1191         }
1192         pmap_destroy(copied_graphs);
1193
1194         obstack_free(&temp_obst, NULL);
1195         current_ir_graph = rem;
1196 }
1197
1198 typedef struct inline_leave_functions_pass_t {
1199         ir_prog_pass_t pass;
1200         unsigned       maxsize;
1201         unsigned       leavesize;
1202         unsigned       size;
1203         int            ignore_runtime;
1204 } inline_leave_functions_pass_t;
1205
1206 /**
1207  * Wrapper to run inline_leave_functions() as a ir_prog pass.
1208  */
1209 static int inline_leave_functions_wrapper(ir_prog *irp, void *context)
1210 {
1211         inline_leave_functions_pass_t *pass = (inline_leave_functions_pass_t*)context;
1212
1213         (void)irp;
1214         inline_leave_functions(
1215                 pass->maxsize, pass->leavesize,
1216                 pass->size, pass->ignore_runtime);
1217         return 0;
1218 }
1219
1220 /* create a pass for inline_leave_functions() */
1221 ir_prog_pass_t *inline_leave_functions_pass(
1222         const char *name, unsigned maxsize, unsigned leavesize,
1223         unsigned size, int ignore_runtime)
1224 {
1225         inline_leave_functions_pass_t *pass = XMALLOCZ(inline_leave_functions_pass_t);
1226
1227         pass->maxsize        = maxsize;
1228         pass->leavesize      = leavesize;
1229         pass->size           = size;
1230         pass->ignore_runtime = ignore_runtime;
1231
1232         return def_prog_pass_constructor(
1233                 &pass->pass,
1234                 name ? name : "inline_leave_functions",
1235                 inline_leave_functions_wrapper);
1236 }
1237
1238 /**
1239  * Calculate the parameter weights for transmitting the address of a local variable.
1240  */
1241 static unsigned calc_method_local_weight(ir_node *arg)
1242 {
1243         int      i, j, k;
1244         unsigned v, weight = 0;
1245
1246         for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
1247                 ir_node *succ = get_irn_out(arg, i);
1248
1249                 switch (get_irn_opcode(succ)) {
1250                 case iro_Load:
1251                 case iro_Store:
1252                         /* Loads and Store can be removed */
1253                         weight += 3;
1254                         break;
1255                 case iro_Sel:
1256                         /* check if all args are constant */
1257                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1258                                 ir_node *idx = get_Sel_index(succ, j);
1259                                 if (! is_Const(idx))
1260                                         return 0;
1261                         }
1262                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1263                            some unsupported node. */
1264                         v = calc_method_local_weight(succ);
1265                         if (v == 0)
1266                                 return 0;
1267                         /* we can kill one Sel with constant indexes, this is cheap */
1268                         weight += v + 1;
1269                         break;
1270                 case iro_Id:
1271                         /* when looking backward we might find Id nodes */
1272                         weight += calc_method_local_weight(succ);
1273                         break;
1274                 case iro_Tuple:
1275                         /* unoptimized tuple */
1276                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1277                                 ir_node *pred = get_Tuple_pred(succ, j);
1278                                 if (pred == arg) {
1279                                         /* look for Proj(j) */
1280                                         for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
1281                                                 ir_node *succ_succ = get_irn_out(succ, k);
1282                                                 if (is_Proj(succ_succ)) {
1283                                                         if (get_Proj_proj(succ_succ) == j) {
1284                                                                 /* found */
1285                                                                 weight += calc_method_local_weight(succ_succ);
1286                                                         }
1287                                                 } else {
1288                                                         /* this should NOT happen */
1289                                                         return 0;
1290                                                 }
1291                                         }
1292                                 }
1293                         }
1294                         break;
1295                 default:
1296                         /* any other node: unsupported yet or bad. */
1297                         return 0;
1298                 }
1299         }
1300         return weight;
1301 }
1302
1303 /**
1304  * Calculate the parameter weights for transmitting the address of a local variable.
1305  */
1306 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg)
1307 {
1308         ir_entity *ent = get_irg_entity(irg);
1309         ir_type  *mtp;
1310         size_t   nparams;
1311         int      i;
1312         long     proj_nr;
1313         ir_node  *irg_args, *arg;
1314
1315         mtp      = get_entity_type(ent);
1316         nparams  = get_method_n_params(mtp);
1317
1318         /* allocate a new array. currently used as 'analysed' flag */
1319         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1320
1321         /* If the method haven't parameters we have nothing to do. */
1322         if (nparams <= 0)
1323                 return;
1324
1325         assure_irg_outs(irg);
1326         irg_args = get_irg_args(irg);
1327         for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
1328                 arg     = get_irn_out(irg_args, i);
1329                 proj_nr = get_Proj_proj(arg);
1330                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1331         }
1332 }
1333
1334 /**
1335  * Calculate the benefice for transmitting an local variable address.
1336  * After inlining, the local variable might be transformed into a
1337  * SSA variable by scalar_replacement().
1338  */
1339 static unsigned get_method_local_adress_weight(ir_graph *callee, size_t pos)
1340 {
1341         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
1342
1343         if (env->local_weights == NULL)
1344                 analyze_irg_local_weights(env, callee);
1345
1346         if (pos < ARR_LEN(env->local_weights))
1347                 return env->local_weights[pos];
1348         return 0;
1349 }
1350
1351 /**
1352  * Calculate a benefice value for inlining the given call.
1353  *
1354  * @param call       the call node we have to inspect
1355  * @param callee     the called graph
1356  */
1357 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
1358 {
1359         ir_node   *call = entry->call;
1360         ir_entity *ent  = get_irg_entity(callee);
1361         ir_node   *frame_ptr;
1362         ir_type   *mtp;
1363         int       weight = 0;
1364         int       i, n_params, all_const;
1365         unsigned  cc, v;
1366         irg_inline_property prop;
1367
1368         inline_irg_env *callee_env;
1369
1370         prop = get_irg_inline_property(callee);
1371         if (prop == irg_inline_forbidden) {
1372                 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
1373                     call, callee));
1374                 return entry->benefice = INT_MIN;
1375         }
1376
1377         if (get_irg_additional_properties(callee) & mtp_property_noreturn) {
1378                 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
1379                     call, callee));
1380                 return entry->benefice = INT_MIN;
1381         }
1382
1383         /* costs for every passed parameter */
1384         n_params = get_Call_n_params(call);
1385         mtp      = get_entity_type(ent);
1386         cc       = get_method_calling_convention(mtp);
1387         if (cc & cc_reg_param) {
1388                 /* register parameter, smaller costs for register parameters */
1389                 int max_regs = cc & ~cc_bits;
1390
1391                 if (max_regs < n_params)
1392                         weight += max_regs * 2 + (n_params - max_regs) * 5;
1393                 else
1394                         weight += n_params * 2;
1395         } else {
1396                 /* parameters are passed an stack */
1397                 weight += 5 * n_params;
1398         }
1399
1400         /* constant parameters improve the benefice */
1401         frame_ptr = get_irg_frame(current_ir_graph);
1402         all_const = 1;
1403         for (i = 0; i < n_params; ++i) {
1404                 ir_node *param = get_Call_param(call, i);
1405
1406                 if (is_Const(param)) {
1407                         weight += get_method_param_weight(ent, i);
1408                 } else {
1409                         all_const = 0;
1410                         if (is_SymConst(param))
1411                                 weight += get_method_param_weight(ent, i);
1412                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
1413                                 /*
1414                                  * An address of a local variable is transmitted. After
1415                                  * inlining, scalar_replacement might be able to remove the
1416                                  * local variable, so honor this.
1417                                  */
1418                                 v = get_method_local_adress_weight(callee, i);
1419                                 weight += v;
1420                                 if (v > 0)
1421                                         entry->local_adr = 1;
1422                         }
1423                 }
1424         }
1425         entry->all_const = all_const;
1426
1427         callee_env = (inline_irg_env*)get_irg_link(callee);
1428         if (callee_env->n_callers == 1 &&
1429             callee != current_ir_graph &&
1430             !entity_is_externally_visible(ent)) {
1431                 weight += 700;
1432         }
1433
1434         /* give a bonus for functions with one block */
1435         if (callee_env->n_blocks == 1)
1436                 weight = weight * 3 / 2;
1437
1438         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
1439         if (callee_env->n_nodes < 30 && !callee_env->recursive)
1440                 weight += 2000;
1441
1442         /* and finally for leaves: they do not increase the register pressure
1443            because of callee safe registers */
1444         if (callee_env->n_call_nodes == 0)
1445                 weight += 400;
1446
1447         /** it's important to inline inner loops first */
1448         if (entry->loop_depth > 30)
1449                 weight += 30 * 1024;
1450         else
1451                 weight += entry->loop_depth * 1024;
1452
1453         /*
1454          * All arguments constant is probably a good sign, give an extra bonus
1455          */
1456         if (all_const)
1457                 weight += 1024;
1458
1459         return entry->benefice = weight;
1460 }
1461
1462 typedef struct walk_env_t {
1463         ir_graph **irgs;
1464         size_t   last_irg;
1465 } walk_env_t;
1466
1467 /**
1468  * Callgraph walker, collect all visited graphs.
1469  */
1470 static void callgraph_walker(ir_graph *irg, void *data)
1471 {
1472         walk_env_t *env = (walk_env_t *)data;
1473         env->irgs[env->last_irg++] = irg;
1474 }
1475
1476 /**
1477  * Creates an inline order for all graphs.
1478  *
1479  * @return the list of graphs.
1480  */
1481 static ir_graph **create_irg_list(void)
1482 {
1483         ir_entity  **free_methods;
1484         size_t     n_irgs = get_irp_n_irgs();
1485         walk_env_t env;
1486
1487         cgana(&free_methods);
1488         xfree(free_methods);
1489
1490         compute_callgraph();
1491
1492         env.irgs     = XMALLOCNZ(ir_graph*, n_irgs);
1493         env.last_irg = 0;
1494
1495         callgraph_walk(NULL, callgraph_walker, &env);
1496         assert(n_irgs == env.last_irg);
1497
1498         return env.irgs;
1499 }
1500
1501 /**
1502  * Push a call onto the priority list if its benefice is big enough.
1503  *
1504  * @param pqueue   the priority queue of calls
1505  * @param call     the call entry
1506  * @param inlien_threshold
1507  *                 the threshold value
1508  */
1509 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
1510                             int inline_threshold)
1511 {
1512         ir_graph            *callee  = call->callee;
1513         irg_inline_property prop     = get_irg_inline_property(callee);
1514         int                 benefice = calc_inline_benefice(call, callee);
1515
1516         DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
1517             get_irn_irg(call->call), call->call, callee, benefice));
1518
1519         if (prop < irg_inline_forced && benefice < inline_threshold) {
1520                 return;
1521         }
1522
1523         pqueue_put(pqueue, call, benefice);
1524 }
1525
1526 /**
1527  * Try to inline calls into a graph.
1528  *
1529  * @param irg      the graph into which we inline
1530  * @param maxsize  do NOT inline if the size of irg gets
1531  *                 bigger than this amount
1532  * @param inline_threshold
1533  *                 threshold value for inline decision
1534  * @param copied_graphs
1535  *                 map containing copied of recursive graphs
1536  */
1537 static void inline_into(ir_graph *irg, unsigned maxsize,
1538                         int inline_threshold, pmap *copied_graphs)
1539 {
1540         int            phiproj_computed = 0;
1541         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
1542         call_entry     *curr_call;
1543         wenv_t         wenv;
1544         pqueue_t       *pqueue;
1545
1546         if (env->n_call_nodes == 0)
1547                 return;
1548
1549         if (env->n_nodes > maxsize) {
1550                 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
1551                 return;
1552         }
1553
1554         current_ir_graph = irg;
1555         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1556
1557         /* put irgs into the pqueue */
1558         pqueue = new_pqueue();
1559
1560         list_for_each_entry(call_entry, curr_call, &env->calls, list) {
1561                 assert(is_Call(curr_call->call));
1562                 maybe_push_call(pqueue, curr_call, inline_threshold);
1563         }
1564
1565         /* note that the list of possible calls is updated during the process */
1566         while (!pqueue_empty(pqueue)) {
1567                 int                 did_inline;
1568                 call_entry          *curr_call  = (call_entry*)pqueue_pop_front(pqueue);
1569                 ir_graph            *callee     = curr_call->callee;
1570                 ir_node             *call_node  = curr_call->call;
1571                 inline_irg_env      *callee_env = (inline_irg_env*)get_irg_link(callee);
1572                 irg_inline_property prop        = get_irg_inline_property(callee);
1573                 int                 loop_depth;
1574                 const call_entry    *centry;
1575                 pmap_entry          *e;
1576
1577                 if ((prop < irg_inline_forced) && env->n_nodes + callee_env->n_nodes > maxsize) {
1578                         DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
1579                                                 env->n_nodes, callee, callee_env->n_nodes));
1580                         continue;
1581                 }
1582
1583                 e = pmap_find(copied_graphs, callee);
1584                 if (e != NULL) {
1585                         int benefice = curr_call->benefice;
1586                         /*
1587                          * Reduce the weight for recursive function IFF not all arguments are const.
1588                          * inlining recursive functions is rarely good.
1589                          */
1590                         if (!curr_call->all_const)
1591                                 benefice -= 2000;
1592                         if (benefice < inline_threshold)
1593                                 continue;
1594
1595                         /*
1596                          * Remap callee if we have a copy.
1597                          */
1598                         callee     = (ir_graph*)e->value;
1599                         callee_env = (inline_irg_env*)get_irg_link(callee);
1600                 }
1601
1602                 if (current_ir_graph == callee) {
1603                         /*
1604                          * Recursive call: we cannot directly inline because we cannot
1605                          * walk the graph and change it. So we have to make a copy of
1606                          * the graph first.
1607                          */
1608                         int benefice = curr_call->benefice;
1609                         ir_graph *copy;
1610
1611                         /*
1612                          * Reduce the weight for recursive function IFF not all arguments are const.
1613                          * inlining recursive functions is rarely good.
1614                          */
1615                         if (!curr_call->all_const)
1616                                 benefice -= 2000;
1617                         if (benefice < inline_threshold)
1618                                 continue;
1619
1620                         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1621
1622                         /*
1623                          * No copy yet, create one.
1624                          * Note that recursive methods are never leaves, so it is
1625                          * sufficient to test this condition here.
1626                          */
1627                         copy = create_irg_copy(callee);
1628
1629                         /* create_irg_copy() destroys the Proj links, recompute them */
1630                         phiproj_computed = 0;
1631
1632                         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1633
1634                         /* allocate a new environment */
1635                         callee_env = alloc_inline_irg_env();
1636                         set_irg_link(copy, callee_env);
1637
1638                         assure_cf_loop(copy);
1639                         memset(&wenv, 0, sizeof(wenv));
1640                         wenv.x              = callee_env;
1641                         wenv.ignore_callers = 1;
1642                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1643
1644                         /*
1645                          * Enter the entity of the original graph. This is needed
1646                          * for inline_method(). However, note that ent->irg still points
1647                          * to callee, NOT to copy.
1648                          */
1649                         set_irg_entity(copy, get_irg_entity(callee));
1650
1651                         pmap_insert(copied_graphs, callee, copy);
1652                         callee = copy;
1653
1654                         /* we have only one caller: the original graph */
1655                         callee_env->n_callers      = 1;
1656                         callee_env->n_callers_orig = 1;
1657                 }
1658                 if (! phiproj_computed) {
1659                         phiproj_computed = 1;
1660                         collect_phiprojs(current_ir_graph);
1661                 }
1662                 did_inline = inline_method(call_node, callee);
1663                 if (!did_inline)
1664                         continue;
1665
1666                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1667                 phiproj_computed = 0;
1668
1669                 /* remove it from the caller list */
1670                 list_del(&curr_call->list);
1671
1672                 /* callee was inline. Append its call list. */
1673                 env->got_inline = 1;
1674                 --env->n_call_nodes;
1675
1676                 /* we just generate a bunch of new calls */
1677                 loop_depth = curr_call->loop_depth;
1678                 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1679                         inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1680                         ir_node        *new_call;
1681                         call_entry     *new_entry;
1682
1683                         /* after we have inlined callee, all called methods inside
1684                          * callee are now called once more */
1685                         ++penv->n_callers;
1686
1687                         /* Note that the src list points to Call nodes in the inlined graph,
1688                          * but we need Call nodes in our graph. Luckily the inliner leaves
1689                          * this information in the link field. */
1690                         new_call = (ir_node*)get_irn_link(centry->call);
1691                         assert(is_Call(new_call));
1692
1693                         new_entry = duplicate_call_entry(centry, new_call, loop_depth);
1694                         list_add_tail(&new_entry->list, &env->calls);
1695                         maybe_push_call(pqueue, new_entry, inline_threshold);
1696                 }
1697
1698                 env->n_call_nodes += callee_env->n_call_nodes;
1699                 env->n_nodes += callee_env->n_nodes;
1700                 --callee_env->n_callers;
1701         }
1702         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1703         del_pqueue(pqueue);
1704 }
1705
1706 /*
1707  * Heuristic inliner. Calculates a benefice value for every call and inlines
1708  * those calls with a value higher than the threshold.
1709  */
1710 void inline_functions(unsigned maxsize, int inline_threshold,
1711                       opt_ptr after_inline_opt)
1712 {
1713         inline_irg_env   *env;
1714         size_t           i, n_irgs;
1715         ir_graph         *rem;
1716         wenv_t           wenv;
1717         pmap             *copied_graphs;
1718         pmap_entry       *pm_entry;
1719         ir_graph         **irgs;
1720
1721         rem = current_ir_graph;
1722         obstack_init(&temp_obst);
1723
1724         irgs = create_irg_list();
1725
1726         /* a map for the copied graphs, used to inline recursive calls */
1727         copied_graphs = pmap_create();
1728
1729         /* extend all irgs by a temporary data structure for inlining. */
1730         n_irgs = get_irp_n_irgs();
1731         for (i = 0; i < n_irgs; ++i)
1732                 set_irg_link(irgs[i], alloc_inline_irg_env());
1733
1734         /* Pre-compute information in temporary data structure. */
1735         wenv.ignore_runtime = 0;
1736         wenv.ignore_callers = 0;
1737         for (i = 0; i < n_irgs; ++i) {
1738                 ir_graph *irg = irgs[i];
1739
1740                 free_callee_info(irg);
1741
1742                 wenv.x = (inline_irg_env*)get_irg_link(irg);
1743                 assure_cf_loop(irg);
1744                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1745         }
1746
1747         /* -- and now inline. -- */
1748         for (i = 0; i < n_irgs; ++i) {
1749                 ir_graph *irg = irgs[i];
1750
1751                 inline_into(irg, maxsize, inline_threshold, copied_graphs);
1752         }
1753
1754         for (i = 0; i < n_irgs; ++i) {
1755                 ir_graph *irg = irgs[i];
1756
1757                 env = (inline_irg_env*)get_irg_link(irg);
1758                 if (env->got_inline && after_inline_opt != NULL) {
1759                         /* this irg got calls inlined: optimize it */
1760                         after_inline_opt(irg);
1761                 }
1762                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1763                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1764                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1765                         env->n_callers_orig, env->n_callers,
1766                         get_entity_name(get_irg_entity(irg))));
1767                 }
1768         }
1769
1770         /* kill the copied graphs: we don't need them anymore */
1771         foreach_pmap(copied_graphs, pm_entry) {
1772                 ir_graph *copy = (ir_graph*)pm_entry->value;
1773
1774                 /* reset the entity, otherwise it will be deleted in the next step ... */
1775                 set_irg_entity(copy, NULL);
1776                 free_ir_graph(copy);
1777         }
1778         pmap_destroy(copied_graphs);
1779
1780         xfree(irgs);
1781
1782         obstack_free(&temp_obst, NULL);
1783         current_ir_graph = rem;
1784 }
1785
1786 typedef struct inline_functions_pass_t {
1787         ir_prog_pass_t pass;
1788         unsigned       maxsize;
1789         int            inline_threshold;
1790         opt_ptr        after_inline_opt;
1791 } inline_functions_pass_t;
1792
1793 /**
1794  * Wrapper to run inline_functions() as a ir_prog pass.
1795  */
1796 static int inline_functions_wrapper(ir_prog *irp, void *context)
1797 {
1798         inline_functions_pass_t *pass = (inline_functions_pass_t*)context;
1799
1800         (void)irp;
1801         inline_functions(pass->maxsize, pass->inline_threshold,
1802                          pass->after_inline_opt);
1803         return 0;
1804 }
1805
1806 /* create a ir_prog pass for inline_functions */
1807 ir_prog_pass_t *inline_functions_pass(
1808           const char *name, unsigned maxsize, int inline_threshold,
1809           opt_ptr after_inline_opt)
1810 {
1811         inline_functions_pass_t *pass = XMALLOCZ(inline_functions_pass_t);
1812
1813         pass->maxsize          = maxsize;
1814         pass->inline_threshold = inline_threshold;
1815         pass->after_inline_opt = after_inline_opt;
1816
1817         return def_prog_pass_constructor(
1818                 &pass->pass, name ? name : "inline_functions",
1819                 inline_functions_wrapper);
1820 }
1821
1822 void firm_init_inline(void)
1823 {
1824         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
1825 }