Let collect_nodes cope with Bad nodes
[libfirm] / ir / opt / opt_inline.c
1 /*
2  * Copyright (C) 1995-2011 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief    Dead node elimination and Procedure Inlining.
23  * @author   Michael Beck, Goetz Lindenmaier
24  * @version  $Id$
25  */
26 #include "config.h"
27
28 #include <limits.h>
29 #include <stdbool.h>
30 #include <assert.h>
31
32 #include "irnode_t.h"
33 #include "irgraph_t.h"
34 #include "irprog_t.h"
35
36 #include "iroptimize.h"
37 #include "ircons_t.h"
38 #include "iropt_t.h"
39 #include "irgopt.h"
40 #include "irgmod.h"
41 #include "irgwalk.h"
42
43 #include "array_t.h"
44 #include "list.h"
45 #include "pset.h"
46 #include "pmap.h"
47 #include "pdeq.h"
48 #include "xmalloc.h"
49 #include "pqueue.h"
50
51 #include "irouts.h"
52 #include "irloop_t.h"
53 #include "irbackedge_t.h"
54 #include "opt_init.h"
55 #include "cgana.h"
56 #include "trouts.h"
57 #include "error.h"
58
59 #include "analyze_irg_args.h"
60 #include "iredges_t.h"
61 #include "irflag_t.h"
62 #include "irhooks.h"
63 #include "irtools.h"
64 #include "iropt_dbg.h"
65 #include "irpass_t.h"
66 #include "irphase_t.h"
67
68 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
69
70 /*------------------------------------------------------------------*/
71 /* Routines for dead node elimination / copying garbage collection  */
72 /* of the obstack.                                                  */
73 /*------------------------------------------------------------------*/
74
75 /**
76  * Remember the new node in the old node by using a field all nodes have.
77  */
78 static void set_new_node(ir_node *node, ir_node *new_node)
79 {
80         set_irn_link(node, new_node);
81 }
82
83 /**
84  * Get this new node, before the old node is forgotten.
85  */
86 static inline ir_node *get_new_node(ir_node *old_node)
87 {
88         assert(irn_visited(old_node));
89         return (ir_node*) get_irn_link(old_node);
90 }
91
92 /*--------------------------------------------------------------------*/
93 /*  Functionality for inlining                                         */
94 /*--------------------------------------------------------------------*/
95
96 /**
97  * Copy node for inlineing.  Updates attributes that change when
98  * inlineing but not for dead node elimination.
99  *
100  * Copies the node by calling copy_node() and then updates the entity if
101  * it's a local one.  env must be a pointer of the frame type of the
102  * inlined procedure. The new entities must be in the link field of
103  * the entities.
104  */
105 static void copy_node_inline(ir_node *node, void *env)
106 {
107         ir_graph *new_irg  = (ir_graph*) env;
108         ir_node  *new_node = irn_copy_into_irg(node, new_irg);
109
110         set_new_node(node, new_node);
111         if (is_Sel(node)) {
112                 ir_graph  *old_irg        = get_irn_irg(node);
113                 ir_type   *old_frame_type = get_irg_frame_type(old_irg);
114                 ir_entity *old_entity     = get_Sel_entity(node);
115                 assert(is_Sel(new_node));
116                 /* use copied entities from the new frame */
117                 if (get_entity_owner(old_entity) == old_frame_type) {
118                         ir_entity *new_entity = (ir_entity*)get_entity_link(old_entity);
119                         assert(new_entity != NULL);
120                         set_Sel_entity(new_node, new_entity);
121                 }
122         } else if (is_Block(new_node)) {
123                 new_node->attr.block.irg.irg = new_irg;
124         }
125 }
126
127 static void set_preds_inline(ir_node *node, void *env)
128 {
129         ir_node *new_node;
130
131         irn_rewire_inputs(node);
132
133         /* move constants into start block */
134         new_node = get_new_node(node);
135         if (is_irn_constlike(new_node)) {
136                 ir_graph *new_irg     = (ir_graph *) env;
137                 ir_node  *start_block = get_irg_start_block(new_irg);
138                 set_nodes_block(new_node, start_block);
139         }
140 }
141
142 /**
143  * Walker: checks if P_value_arg_base is used.
144  */
145 static void find_addr(ir_node *node, void *env)
146 {
147         bool *allow_inline = (bool*)env;
148
149         if (is_Sel(node)) {
150                 ir_graph *irg = current_ir_graph;
151                 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
152                         /* access to frame */
153                         ir_entity *ent = get_Sel_entity(node);
154                         if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
155                                 /* access to value_type */
156                                 *allow_inline = false;
157                         }
158                         if (is_parameter_entity(ent)) {
159                                 *allow_inline = false;
160                         }
161                 }
162         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
163                 /* From GCC:
164                  * Refuse to inline alloca call unless user explicitly forced so as this
165                  * may change program's memory overhead drastically when the function
166                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
167                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
168                  *
169                  * Sorrily this is true with our implementation also.
170                  * Moreover, we cannot differentiate between alloca() and VLA yet, so
171                  * this disables inlining of functions using VLA (which are completely
172                  * save).
173                  *
174                  * 2 Solutions:
175                  * - add a flag to the Alloc node for "real" alloca() calls
176                  * - add a new Stack-Restore node at the end of a function using
177                  *   alloca()
178                  */
179                 *allow_inline = false;
180         }
181 }
182
183 /**
184  * Check if we can inline a given call.
185  * Currently, we cannot inline two cases:
186  * - call with compound arguments
187  * - graphs that take the address of a parameter
188  *
189  * check these conditions here
190  */
191 static bool can_inline(ir_node *call, ir_graph *called_graph)
192 {
193         ir_entity          *called      = get_irg_entity(called_graph);
194         ir_type            *called_type = get_entity_type(called);
195         ir_type            *call_type   = get_Call_type(call);
196         size_t              n_params    = get_method_n_params(called_type);
197         size_t              n_arguments = get_method_n_params(call_type);
198         size_t              n_res       = get_method_n_ress(called_type);
199         irg_inline_property prop        = get_irg_inline_property(called_graph);
200         size_t              i;
201         bool                res;
202
203         if (prop == irg_inline_forbidden)
204                 return false;
205
206         if (n_arguments != n_params) {
207                 /* this is a bad feature of C: without a prototype, we can
208                  * call a function with less parameters than needed. Currently
209                  * we don't support this, although we could use Unknown than. */
210                 return false;
211         }
212         if (n_res != get_method_n_ress(call_type)) {
213                 return false;
214         }
215
216         /* Argh, compiling C has some bad consequences:
217          * It is implementation dependent what happens in that case.
218          * We support inlining, if the bitsize of the types matches AND
219          * the same arithmetic is used. */
220         for (i = 0; i < n_params; ++i) {
221                 ir_type *param_tp = get_method_param_type(called_type, i);
222                 ir_type *arg_tp   = get_method_param_type(call_type, i);
223
224                 if (param_tp != arg_tp) {
225                         ir_mode *pmode = get_type_mode(param_tp);
226                         ir_mode *amode = get_type_mode(arg_tp);
227
228                         if (pmode == NULL || amode == NULL)
229                                 return false;
230                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
231                                 return false;
232                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
233                                 return false;
234                         /* otherwise we can simply "reinterpret" the bits */
235                 }
236         }
237         for (i = 0; i < n_res; ++i) {
238                 ir_type *decl_res_tp = get_method_res_type(called_type, i);
239                 ir_type *used_res_tp = get_method_res_type(call_type, i);
240
241                 if (decl_res_tp != used_res_tp) {
242                         ir_mode *decl_mode = get_type_mode(decl_res_tp);
243                         ir_mode *used_mode = get_type_mode(used_res_tp);
244                         if (decl_mode == NULL || used_mode == NULL)
245                                 return false;
246                         if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
247                                 return false;
248                         if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
249                                 return false;
250                         /* otherwise we can "reinterpret" the bits */
251                 }
252         }
253
254         /* check parameters for compound arguments */
255         for (i = 0; i < n_params; ++i) {
256                 ir_type *p_type = get_method_param_type(call_type, i);
257
258                 if (is_compound_type(p_type))
259                         return false;
260         }
261
262         /* check results for compound arguments */
263         for (i = 0; i < n_res; ++i) {
264                 ir_type *r_type = get_method_res_type(call_type, i);
265
266                 if (is_compound_type(r_type))
267                         return false;
268         }
269
270         res = true;
271         irg_walk_graph(called_graph, find_addr, NULL, &res);
272
273         return res;
274 }
275
276 enum exc_mode {
277         exc_handler,    /**< There is a handler. */
278         exc_no_handler  /**< Exception handling not represented. */
279 };
280
281 /**
282  * copy all entities on the stack frame on 1 irg to the stackframe of another.
283  * Sets entity links of the old entities to the copies
284  */
285 static void copy_frame_entities(ir_graph *from, ir_graph *to)
286 {
287         ir_type *from_frame = get_irg_frame_type(from);
288         ir_type *to_frame   = get_irg_frame_type(to);
289         size_t   n_members  = get_class_n_members(from_frame);
290         size_t   i;
291         assert(from_frame != to_frame);
292
293         for (i = 0; i < n_members; ++i) {
294                 ir_entity *old_ent = get_class_member(from_frame, i);
295                 ir_entity *new_ent = copy_entity_own(old_ent, to_frame);
296                 set_entity_link(old_ent, new_ent);
297                 assert (!is_parameter_entity(old_ent));
298         }
299 }
300
301 /* Inlines a method at the given call site. */
302 int inline_method(ir_node *call, ir_graph *called_graph)
303 {
304         ir_node       *pre_call;
305         ir_node       *post_call, *post_bl;
306         ir_node       *in[pn_Start_max+1];
307         ir_node       *end, *end_bl, *block;
308         ir_node       **res_pred;
309         ir_node       **cf_pred;
310         ir_node       **args_in;
311         ir_node       *ret, *phi;
312         int           arity, n_ret, n_exc, n_res, i, j, rem_opt;
313         int           irn_arity, n_params;
314         int           n_mem_phi;
315         enum exc_mode exc_handling;
316         ir_type       *mtp;
317         ir_type       *ctp;
318         ir_entity     *ent;
319         ir_graph      *rem;
320         ir_graph      *irg = get_irn_irg(call);
321
322         /* we cannot inline some types of calls */
323         if (! can_inline(call, called_graph))
324                 return 0;
325
326         /* We cannot inline a recursive call. The graph must be copied before
327          * the call the inline_method() using create_irg_copy(). */
328         if (called_graph == irg)
329                 return 0;
330
331         ent      = get_irg_entity(called_graph);
332         mtp      = get_entity_type(ent);
333         ctp      = get_Call_type(call);
334         n_params = get_method_n_params(mtp);
335         n_res    = get_method_n_ress(mtp);
336
337         rem = current_ir_graph;
338         current_ir_graph = irg;
339
340         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
341
342         /* optimizations can cause problems when allocating new nodes */
343         rem_opt = get_opt_optimize();
344         set_optimize(0);
345
346         /* Handle graph state */
347         assert(get_irg_phase_state(irg) != phase_building);
348         assert(get_irg_pinned(irg) == op_pin_state_pinned);
349         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
350         set_irg_extblk_inconsistent(irg);
351         set_irg_doms_inconsistent(irg);
352         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
353         set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
354         edges_deactivate(irg);
355
356         /* here we know we WILL inline, so inform the statistics */
357         hook_inline(call, called_graph);
358
359         /* -- Decide how to handle exception control flow: Is there a handler
360            for the Call node, or do we branch directly to End on an exception?
361            exc_handling:
362            0 There is a handler.
363            2 Exception handling not represented in Firm. -- */
364         {
365                 ir_node *Xproj = NULL;
366                 ir_node *proj;
367                 for (proj = (ir_node*)get_irn_link(call); proj != NULL;
368                      proj = (ir_node*)get_irn_link(proj)) {
369                         long proj_nr = get_Proj_proj(proj);
370                         if (proj_nr == pn_Call_X_except) Xproj = proj;
371                 }
372                 exc_handling = Xproj != NULL ? exc_handler : exc_no_handler;
373         }
374
375         /* create the argument tuple */
376         args_in = ALLOCAN(ir_node*, n_params);
377
378         block = get_nodes_block(call);
379         for (i = n_params - 1; i >= 0; --i) {
380                 ir_node *arg      = get_Call_param(call, i);
381                 ir_type *param_tp = get_method_param_type(mtp, i);
382                 ir_mode *mode     = get_type_mode(param_tp);
383
384                 if (mode != get_irn_mode(arg)) {
385                         arg = new_r_Conv(block, arg, mode);
386                 }
387                 args_in[i] = arg;
388         }
389
390         /* the procedure and later replaces the Start node of the called graph.
391          * Post_call is the old Call node and collects the results of the called
392          * graph. Both will end up being a tuple. */
393         post_bl = get_nodes_block(call);
394         /* XxMxPxPxPxT of Start + parameter of Call */
395         in[pn_Start_M]              = get_Call_mem(call);
396         in[pn_Start_X_initial_exec] = new_r_Jmp(post_bl);
397         in[pn_Start_P_frame_base]   = get_irg_frame(irg);
398         in[pn_Start_T_args]         = new_r_Tuple(post_bl, n_params, args_in);
399         pre_call = new_r_Tuple(post_bl, pn_Start_max+1, in);
400         post_call = call;
401
402         /* --
403            The new block gets the ins of the old block, pre_call and all its
404            predecessors and all Phi nodes. -- */
405         part_block(pre_call);
406
407         /* increment visited flag for later walk */
408         inc_irg_visited(called_graph);
409
410         /* link some nodes to nodes in the current graph so instead of copying
411          * the linked nodes will get used.
412          * So the copier will use the created Tuple instead of copying the start
413          * node, similar for singleton nodes like NoMem and Bad.
414          * Note: this will prohibit predecessors to be copied - only do it for
415          *       nodes without predecessors */
416         {
417                 ir_node *start_block;
418                 ir_node *start;
419                 ir_node *nomem;
420
421                 start_block = get_irg_start_block(called_graph);
422                 set_new_node(start_block, get_nodes_block(pre_call));
423                 mark_irn_visited(start_block);
424
425                 start = get_irg_start(called_graph);
426                 set_new_node(start, pre_call);
427                 mark_irn_visited(start);
428
429                 nomem = get_irg_no_mem(called_graph);
430                 set_new_node(nomem, get_irg_no_mem(irg));
431                 mark_irn_visited(nomem);
432         }
433
434         /* entitiy link is used to link entities on old stackframe to the
435          * new stackframe */
436         irp_reserve_resources(irp, IRP_RESOURCE_ENTITY_LINK);
437
438         /* copy entities and nodes */
439         assert(!irn_visited(get_irg_end(called_graph)));
440         copy_frame_entities(called_graph, irg);
441         irg_walk_core(get_irg_end(called_graph), copy_node_inline, set_preds_inline,
442                       irg);
443
444         irp_free_resources(irp, IRP_RESOURCE_ENTITY_LINK);
445
446         /* -- Merge the end of the inlined procedure with the call site -- */
447         /* We will turn the old Call node into a Tuple with the following
448            predecessors:
449            -1:  Block of Tuple.
450            0: Phi of all Memories of Return statements.
451            1: Jmp from new Block that merges the control flow from all exception
452            predecessors of the old end block.
453            2: Tuple of all arguments.
454            3: Phi of Exception memories.
455            In case the old Call directly branches to End on an exception we don't
456            need the block merging all exceptions nor the Phi of the exception
457            memories.
458         */
459
460         /* Precompute some values */
461         end_bl = get_new_node(get_irg_end_block(called_graph));
462         end    = get_new_node(get_irg_end(called_graph));
463         arity  = get_Block_n_cfgpreds(end_bl);    /* arity = n_exc + n_ret  */
464         n_res  = get_method_n_ress(get_Call_type(call));
465
466         res_pred = XMALLOCN(ir_node*, n_res);
467         cf_pred  = XMALLOCN(ir_node*, arity);
468
469         /* archive keepalives */
470         irn_arity = get_irn_arity(end);
471         for (i = 0; i < irn_arity; i++) {
472                 ir_node *ka = get_End_keepalive(end, i);
473                 if (! is_Bad(ka))
474                         add_End_keepalive(get_irg_end(irg), ka);
475         }
476
477         /* replace Return nodes by Jump nodes */
478         n_ret = 0;
479         for (i = 0; i < arity; i++) {
480                 ir_node *ret;
481                 ret = get_Block_cfgpred(end_bl, i);
482                 if (is_Return(ret)) {
483                         ir_node *block = get_nodes_block(ret);
484                         cf_pred[n_ret] = new_r_Jmp(block);
485                         n_ret++;
486                 }
487         }
488         set_irn_in(post_bl, n_ret, cf_pred);
489
490         /* build a Tuple for all results of the method.
491          * add Phi node if there was more than one Return. */
492         turn_into_tuple(post_call, pn_Call_max+1);
493         /* First the Memory-Phi */
494         n_mem_phi = 0;
495         for (i = 0; i < arity; i++) {
496                 ret = get_Block_cfgpred(end_bl, i);
497                 if (is_Return(ret)) {
498                         cf_pred[n_mem_phi++] = get_Return_mem(ret);
499                 }
500                 /* memory output for some exceptions is directly connected to End */
501                 if (is_Call(ret)) {
502                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 3);
503                 } else if (is_fragile_op(ret)) {
504                         /* We rely that all cfops have the memory output at the same position. */
505                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 0);
506                 } else if (is_Raise(ret)) {
507                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 1);
508                 }
509         }
510         phi = new_r_Phi(post_bl, n_mem_phi, cf_pred, mode_M);
511         set_Tuple_pred(call, pn_Call_M, phi);
512         /* Conserve Phi-list for further inlinings -- but might be optimized */
513         if (get_nodes_block(phi) == post_bl) {
514                 set_irn_link(phi, get_irn_link(post_bl));
515                 set_irn_link(post_bl, phi);
516         }
517         /* Now the real results */
518         if (n_res > 0) {
519                 ir_node *result_tuple;
520                 for (j = 0; j < n_res; j++) {
521                         ir_type *res_type = get_method_res_type(ctp, j);
522                         ir_mode *res_mode = get_type_mode(res_type);
523                         n_ret = 0;
524                         for (i = 0; i < arity; i++) {
525                                 ret = get_Block_cfgpred(end_bl, i);
526                                 if (is_Return(ret)) {
527                                         ir_node *res = get_Return_res(ret, j);
528                                         if (get_irn_mode(res) != res_mode) {
529                                                 ir_node *block = get_nodes_block(res);
530                                                 res = new_r_Conv(block, res, res_mode);
531                                         }
532                                         cf_pred[n_ret] = res;
533                                         n_ret++;
534                                 }
535                         }
536                         if (n_ret > 0) {
537                                 phi = new_r_Phi(post_bl, n_ret, cf_pred, res_mode);
538                         } else {
539                                 phi = new_r_Bad(irg, res_mode);
540                         }
541                         res_pred[j] = phi;
542                         /* Conserve Phi-list for further inlinings -- but might be optimized */
543                         if (get_nodes_block(phi) == post_bl) {
544                                 set_Phi_next(phi, get_Block_phis(post_bl));
545                                 set_Block_phis(post_bl, phi);
546                         }
547                 }
548                 result_tuple = new_r_Tuple(post_bl, n_res, res_pred);
549                 set_Tuple_pred(call, pn_Call_T_result, result_tuple);
550         } else {
551                 set_Tuple_pred(call, pn_Call_T_result, new_r_Bad(irg, mode_T));
552         }
553         /* handle the regular call */
554         set_Tuple_pred(call, pn_Call_X_regular, new_r_Jmp(post_bl));
555
556         /* Finally the exception control flow.
557            We have two possible situations:
558            First if the Call branches to an exception handler:
559            We need to add a Phi node to
560            collect the memory containing the exception objects.  Further we need
561            to add another block to get a correct representation of this Phi.  To
562            this block we add a Jmp that resolves into the X output of the Call
563            when the Call is turned into a tuple.
564            Second: There is no exception edge. Just add all inlined exception
565            branches to the End node.
566          */
567         if (exc_handling == exc_handler) {
568                 n_exc = 0;
569                 for (i = 0; i < arity; i++) {
570                         ir_node *ret, *irn;
571                         ret = get_Block_cfgpred(end_bl, i);
572                         irn = skip_Proj(ret);
573                         if (is_fragile_op(irn) || is_Raise(irn)) {
574                                 cf_pred[n_exc] = ret;
575                                 ++n_exc;
576                         }
577                 }
578                 if (n_exc > 0) {
579                         if (n_exc == 1) {
580                                 /* simple fix */
581                                 set_Tuple_pred(call, pn_Call_X_except, cf_pred[0]);
582                         } else {
583                                 ir_node *block = new_r_Block(irg, n_exc, cf_pred);
584                                 set_Tuple_pred(call, pn_Call_X_except, new_r_Jmp(block));
585                         }
586                 } else {
587                         set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
588                 }
589         } else {
590                 ir_node *main_end_bl;
591                 int main_end_bl_arity;
592                 ir_node **end_preds;
593
594                 /* assert(exc_handling == 1 || no exceptions. ) */
595                 n_exc = 0;
596                 for (i = 0; i < arity; i++) {
597                         ir_node *ret = get_Block_cfgpred(end_bl, i);
598                         ir_node *irn = skip_Proj(ret);
599
600                         if (is_fragile_op(irn) || is_Raise(irn)) {
601                                 cf_pred[n_exc] = ret;
602                                 n_exc++;
603                         }
604                 }
605                 main_end_bl       = get_irg_end_block(irg);
606                 main_end_bl_arity = get_irn_arity(main_end_bl);
607                 end_preds         = XMALLOCN(ir_node*, n_exc + main_end_bl_arity);
608
609                 for (i = 0; i < main_end_bl_arity; ++i)
610                         end_preds[i] = get_irn_n(main_end_bl, i);
611                 for (i = 0; i < n_exc; ++i)
612                         end_preds[main_end_bl_arity + i] = cf_pred[i];
613                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
614                 set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
615                 free(end_preds);
616         }
617         free(res_pred);
618         free(cf_pred);
619
620         /* --  Turn CSE back on. -- */
621         set_optimize(rem_opt);
622         current_ir_graph = rem;
623
624         return 1;
625 }
626
627 /********************************************************************/
628 /* Apply inlining to small methods.                                 */
629 /********************************************************************/
630
631 static struct obstack  temp_obst;
632
633 /** Represents a possible inlinable call in a graph. */
634 typedef struct call_entry {
635         ir_node    *call;       /**< The Call node. */
636         ir_graph   *callee;     /**< The callee IR-graph. */
637         list_head  list;        /**< List head for linking the next one. */
638         int        loop_depth;  /**< The loop depth of this call. */
639         int        benefice;    /**< The calculated benefice of this call. */
640         unsigned   local_adr:1; /**< Set if this call gets an address of a local variable. */
641         unsigned   all_const:1; /**< Set if this call has only constant parameters. */
642 } call_entry;
643
644 /**
645  * environment for inlining small irgs
646  */
647 typedef struct inline_env_t {
648         struct obstack obst;  /**< An obstack where call_entries are allocated on. */
649         list_head      calls; /**< The call entry list. */
650 } inline_env_t;
651
652 /**
653  * Returns the irg called from a Call node. If the irg is not
654  * known, NULL is returned.
655  *
656  * @param call  the call node
657  */
658 static ir_graph *get_call_called_irg(ir_node *call)
659 {
660         ir_node *addr;
661
662         addr = get_Call_ptr(call);
663         if (is_Global(addr)) {
664                 ir_entity *ent = get_Global_entity(addr);
665                 /* we don't know which function gets finally bound to a weak symbol */
666                 if (get_entity_linkage(ent) & IR_LINKAGE_WEAK)
667                         return NULL;
668
669                 return get_entity_irg(ent);
670         }
671
672         return NULL;
673 }
674
675 /**
676  * Walker: Collect all calls to known graphs inside a graph.
677  */
678 static void collect_calls(ir_node *call, void *env)
679 {
680         (void) env;
681         if (is_Call(call)) {
682                 ir_graph *called_irg = get_call_called_irg(call);
683
684                 if (called_irg != NULL) {
685                         /* The Call node calls a locally defined method.  Remember to inline. */
686                         inline_env_t *ienv  = (inline_env_t*)env;
687                         call_entry   *entry = OALLOC(&ienv->obst, call_entry);
688                         entry->call       = call;
689                         entry->callee     = called_irg;
690                         entry->loop_depth = 0;
691                         entry->benefice   = 0;
692                         entry->local_adr  = 0;
693                         entry->all_const  = 0;
694
695                         list_add_tail(&entry->list, &ienv->calls);
696                 }
697         }
698 }
699
700 /**
701  * Inlines all small methods at call sites where the called address comes
702  * from a Const node that references the entity representing the called
703  * method.
704  * The size argument is a rough measure for the code size of the method:
705  * Methods where the obstack containing the firm graph is smaller than
706  * size are inlined.
707  */
708 void inline_small_irgs(ir_graph *irg, int size)
709 {
710         ir_graph *rem = current_ir_graph;
711         inline_env_t env;
712         call_entry *entry;
713
714         current_ir_graph = irg;
715         /* Handle graph state */
716         assert(get_irg_phase_state(irg) != phase_building);
717         free_callee_info(irg);
718
719         /* Find Call nodes to inline.
720            (We can not inline during a walk of the graph, as inlining the same
721            method several times changes the visited flag of the walked graph:
722            after the first inlining visited of the callee equals visited of
723            the caller.  With the next inlining both are increased.) */
724         obstack_init(&env.obst);
725         INIT_LIST_HEAD(&env.calls);
726         irg_walk_graph(irg, NULL, collect_calls, &env);
727
728         if (! list_empty(&env.calls)) {
729                 /* There are calls to inline */
730                 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
731                 collect_phiprojs(irg);
732
733                 list_for_each_entry(call_entry, entry, &env.calls, list) {
734                         ir_graph            *callee = entry->callee;
735                         irg_inline_property prop    = get_irg_inline_property(callee);
736
737                         if (prop == irg_inline_forbidden) {
738                                 continue;
739                         }
740
741                         if (prop >= irg_inline_forced ||
742                             _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
743                                 inline_method(entry->call, callee);
744                         }
745                 }
746                 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
747         }
748         obstack_free(&env.obst, NULL);
749         current_ir_graph = rem;
750 }
751
752 typedef struct inline_small_irgs_pass_t {
753         ir_graph_pass_t pass;
754         int            size;
755 } inline_small_irgs_pass_t;
756
757 /**
758  * Wrapper to run inline_small_irgs() as a pass.
759  */
760 static int inline_small_irgs_wrapper(ir_graph *irg, void *context)
761 {
762         inline_small_irgs_pass_t *pass = (inline_small_irgs_pass_t*)context;
763
764         inline_small_irgs(irg, pass->size);
765         return 0;
766 }
767
768 /* create a pass for inline_small_irgs() */
769 ir_graph_pass_t *inline_small_irgs_pass(const char *name, int size)
770 {
771         inline_small_irgs_pass_t *pass = XMALLOCZ(inline_small_irgs_pass_t);
772
773         pass->size = size;
774         return def_graph_pass_constructor(
775                 &pass->pass, name ? name : "inline_small_irgs", inline_small_irgs_wrapper);
776 }
777
778 /**
779  * Environment for inlining irgs.
780  */
781 typedef struct {
782         list_head calls;             /**< List of of all call nodes in this graph. */
783         unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
784         unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
785         unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
786         unsigned  n_nodes_orig;      /**< for statistics */
787         unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
788         unsigned  n_call_nodes_orig; /**< for statistics */
789         unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
790         unsigned  n_callers_orig;    /**< for statistics */
791         unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
792         unsigned  recursive:1;       /**< Set, if this function is self recursive. */
793 } inline_irg_env;
794
795 /**
796  * Allocate a new environment for inlining.
797  */
798 static inline_irg_env *alloc_inline_irg_env(void)
799 {
800         inline_irg_env *env    = OALLOC(&temp_obst, inline_irg_env);
801         INIT_LIST_HEAD(&env->calls);
802         env->local_weights     = NULL;
803         env->n_nodes           = -2; /* do not count count Start, End */
804         env->n_blocks          = -2; /* do not count count Start, End Block */
805         env->n_nodes_orig      = -2; /* do not count Start, End */
806         env->n_call_nodes      = 0;
807         env->n_call_nodes_orig = 0;
808         env->n_callers         = 0;
809         env->n_callers_orig    = 0;
810         env->got_inline        = 0;
811         env->recursive         = 0;
812         return env;
813 }
814
815 typedef struct walker_env {
816         inline_irg_env *x;     /**< the inline environment */
817         char ignore_runtime;   /**< the ignore runtime flag */
818         char ignore_callers;   /**< if set, do change callers data */
819 } wenv_t;
820
821 /**
822  * post-walker: collect all calls in the inline-environment
823  * of a graph and sum some statistics.
824  */
825 static void collect_calls2(ir_node *call, void *ctx)
826 {
827         wenv_t         *env = (wenv_t*)ctx;
828         inline_irg_env *x = env->x;
829         unsigned        code = get_irn_opcode(call);
830         ir_graph       *callee;
831         call_entry     *entry;
832
833         /* count meaningful nodes in irg */
834         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
835                 if (code != iro_Block) {
836                         ++x->n_nodes;
837                         ++x->n_nodes_orig;
838                 } else {
839                         ++x->n_blocks;
840                 }
841         }
842
843         if (code != iro_Call) return;
844
845         /* check, if it's a runtime call */
846         if (env->ignore_runtime) {
847                 ir_node *symc = get_Call_ptr(call);
848
849                 if (is_Global(symc)) {
850                         ir_entity *ent = get_Global_entity(symc);
851
852                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
853                                 return;
854                 }
855         }
856
857         /* collect all call nodes */
858         ++x->n_call_nodes;
859         ++x->n_call_nodes_orig;
860
861         callee = get_call_called_irg(call);
862         if (callee != NULL) {
863                 if (! env->ignore_callers) {
864                         inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
865                         /* count all static callers */
866                         ++callee_env->n_callers;
867                         ++callee_env->n_callers_orig;
868                 }
869                 if (callee == current_ir_graph)
870                         x->recursive = 1;
871
872                 /* link it in the list of possible inlinable entries */
873                 entry = OALLOC(&temp_obst, call_entry);
874                 entry->call       = call;
875                 entry->callee     = callee;
876                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
877                 entry->benefice   = 0;
878                 entry->local_adr  = 0;
879                 entry->all_const  = 0;
880
881                 list_add_tail(&entry->list, &x->calls);
882         }
883 }
884
885 /**
886  * Returns TRUE if the number of callers is 0 in the irg's environment,
887  * hence this irg is a leave.
888  */
889 inline static int is_leave(ir_graph *irg)
890 {
891         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
892         return env->n_call_nodes == 0;
893 }
894
895 /**
896  * Returns TRUE if the number of nodes in the callee is
897  * smaller then size in the irg's environment.
898  */
899 inline static int is_smaller(ir_graph *callee, unsigned size)
900 {
901         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
902         return env->n_nodes < size;
903 }
904
905 /**
906  * Duplicate a call entry.
907  *
908  * @param entry     the original entry to duplicate
909  * @param new_call  the new call node
910  * @param loop_depth_delta
911  *                  delta value for the loop depth
912  */
913 static call_entry *duplicate_call_entry(const call_entry *entry,
914                                         ir_node *new_call, int loop_depth_delta)
915 {
916         call_entry *nentry = OALLOC(&temp_obst, call_entry);
917         nentry->call       = new_call;
918         nentry->callee     = entry->callee;
919         nentry->benefice   = entry->benefice;
920         nentry->loop_depth = entry->loop_depth + loop_depth_delta;
921         nentry->local_adr  = entry->local_adr;
922         nentry->all_const  = entry->all_const;
923
924         return nentry;
925 }
926
927 /**
928  * Append all call nodes of the source environment to the nodes of in the destination
929  * environment.
930  *
931  * @param dst         destination environment
932  * @param src         source environment
933  * @param loop_depth  the loop depth of the call that is replaced by the src list
934  */
935 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth)
936 {
937         call_entry *entry, *nentry;
938
939         /* Note that the src list points to Call nodes in the inlined graph, but
940            we need Call nodes in our graph. Luckily the inliner leaves this information
941            in the link field. */
942         list_for_each_entry(call_entry, entry, &src->calls, list) {
943                 nentry = duplicate_call_entry(entry, (ir_node*)get_irn_link(entry->call), loop_depth);
944                 list_add_tail(&nentry->list, &dst->calls);
945         }
946         dst->n_call_nodes += src->n_call_nodes;
947         dst->n_nodes      += src->n_nodes;
948 }
949
950 /*
951  * Inlines small leave methods at call sites where the called address comes
952  * from a Const node that references the entity representing the called
953  * method.
954  * The size argument is a rough measure for the code size of the method:
955  * Methods where the obstack containing the firm graph is smaller than
956  * size are inlined.
957  */
958 void inline_leave_functions(unsigned maxsize, unsigned leavesize,
959                             unsigned size, int ignore_runtime)
960 {
961         inline_irg_env   *env;
962         ir_graph         *irg;
963         size_t           i, n_irgs;
964         ir_graph         *rem;
965         int              did_inline;
966         wenv_t           wenv;
967         call_entry       *entry, *next;
968         const call_entry *centry;
969         pmap             *copied_graphs;
970         pmap_entry       *pm_entry;
971
972         rem = current_ir_graph;
973         obstack_init(&temp_obst);
974
975         /* a map for the copied graphs, used to inline recursive calls */
976         copied_graphs = pmap_create();
977
978         /* extend all irgs by a temporary data structure for inlining. */
979         n_irgs = get_irp_n_irgs();
980         for (i = 0; i < n_irgs; ++i)
981                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
982
983         /* Pre-compute information in temporary data structure. */
984         wenv.ignore_runtime = ignore_runtime;
985         wenv.ignore_callers = 0;
986         for (i = 0; i < n_irgs; ++i) {
987                 ir_graph *irg = get_irp_irg(i);
988
989                 assert(get_irg_phase_state(irg) != phase_building);
990                 free_callee_info(irg);
991
992                 assure_cf_loop(irg);
993                 wenv.x = (inline_irg_env*)get_irg_link(irg);
994                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
995         }
996
997         /* -- and now inline. -- */
998
999         /* Inline leaves recursively -- we might construct new leaves. */
1000         do {
1001                 did_inline = 0;
1002
1003                 for (i = 0; i < n_irgs; ++i) {
1004                         ir_node *call;
1005                         int phiproj_computed = 0;
1006
1007                         current_ir_graph = get_irp_irg(i);
1008                         env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1009
1010                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1011                         list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1012                                 ir_graph            *callee;
1013                                 irg_inline_property  prop;
1014
1015                                 if (env->n_nodes > maxsize)
1016                                         break;
1017
1018                                 call   = entry->call;
1019                                 callee = entry->callee;
1020
1021                                 prop = get_irg_inline_property(callee);
1022                                 if (prop == irg_inline_forbidden) {
1023                                         continue;
1024                                 }
1025
1026                                 if (is_leave(callee) && (
1027                                     is_smaller(callee, leavesize) || prop >= irg_inline_forced)) {
1028                                         if (!phiproj_computed) {
1029                                                 phiproj_computed = 1;
1030                                                 collect_phiprojs(current_ir_graph);
1031                                         }
1032                                         did_inline = inline_method(call, callee);
1033
1034                                         if (did_inline) {
1035                                                 inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
1036
1037                                                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1038                                                 phiproj_computed = 0;
1039
1040                                                 /* Do some statistics */
1041                                                 env->got_inline = 1;
1042                                                 --env->n_call_nodes;
1043                                                 env->n_nodes += callee_env->n_nodes;
1044                                                 --callee_env->n_callers;
1045
1046                                                 /* remove this call from the list */
1047                                                 list_del(&entry->list);
1048                                                 continue;
1049                                         }
1050                                 }
1051                         }
1052                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1053                 }
1054         } while (did_inline);
1055
1056         /* inline other small functions. */
1057         for (i = 0; i < n_irgs; ++i) {
1058                 ir_node *call;
1059                 int phiproj_computed = 0;
1060
1061                 current_ir_graph = get_irp_irg(i);
1062                 env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1063
1064                 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1065
1066                 /* note that the list of possible calls is updated during the process */
1067                 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1068                         irg_inline_property prop;
1069                         ir_graph            *callee;
1070                         pmap_entry          *e;
1071
1072                         call   = entry->call;
1073                         callee = entry->callee;
1074
1075                         prop = get_irg_inline_property(callee);
1076                         if (prop == irg_inline_forbidden) {
1077                                 continue;
1078                         }
1079
1080                         e = pmap_find(copied_graphs, callee);
1081                         if (e != NULL) {
1082                                 /*
1083                                  * Remap callee if we have a copy.
1084                                  * FIXME: Should we do this only for recursive Calls ?
1085                                  */
1086                                 callee = (ir_graph*)e->value;
1087                         }
1088
1089                         if (prop >= irg_inline_forced ||
1090                             (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1091                                 if (current_ir_graph == callee) {
1092                                         /*
1093                                          * Recursive call: we cannot directly inline because we cannot walk
1094                                          * the graph and change it. So we have to make a copy of the graph
1095                                          * first.
1096                                          */
1097
1098                                         inline_irg_env *callee_env;
1099                                         ir_graph       *copy;
1100
1101                                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1102
1103                                         /*
1104                                          * No copy yet, create one.
1105                                          * Note that recursive methods are never leaves, so it is sufficient
1106                                          * to test this condition here.
1107                                          */
1108                                         copy = create_irg_copy(callee);
1109
1110                                         /* create_irg_copy() destroys the Proj links, recompute them */
1111                                         phiproj_computed = 0;
1112
1113                                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1114
1115                                         /* allocate new environment */
1116                                         callee_env = alloc_inline_irg_env();
1117                                         set_irg_link(copy, callee_env);
1118
1119                                         assure_cf_loop(copy);
1120                                         wenv.x              = callee_env;
1121                                         wenv.ignore_callers = 1;
1122                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1123
1124                                         /*
1125                                          * Enter the entity of the original graph. This is needed
1126                                          * for inline_method(). However, note that ent->irg still points
1127                                          * to callee, NOT to copy.
1128                                          */
1129                                         set_irg_entity(copy, get_irg_entity(callee));
1130
1131                                         pmap_insert(copied_graphs, callee, copy);
1132                                         callee = copy;
1133
1134                                         /* we have only one caller: the original graph */
1135                                         callee_env->n_callers      = 1;
1136                                         callee_env->n_callers_orig = 1;
1137                                 }
1138                                 if (! phiproj_computed) {
1139                                         phiproj_computed = 1;
1140                                         collect_phiprojs(current_ir_graph);
1141                                 }
1142                                 did_inline = inline_method(call, callee);
1143                                 if (did_inline) {
1144                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1145
1146                                         /* call was inlined, Phi/Projs for current graph must be recomputed */
1147                                         phiproj_computed = 0;
1148
1149                                         /* callee was inline. Append its call list. */
1150                                         env->got_inline = 1;
1151                                         --env->n_call_nodes;
1152                                         append_call_list(env, callee_env, entry->loop_depth);
1153                                         --callee_env->n_callers;
1154
1155                                         /* after we have inlined callee, all called methods inside callee
1156                                            are now called once more */
1157                                         list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1158                                                 inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1159                                                 ++penv->n_callers;
1160                                         }
1161
1162                                         /* remove this call from the list */
1163                                         list_del(&entry->list);
1164                                         continue;
1165                                 }
1166                         }
1167                 }
1168                 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1169         }
1170
1171         for (i = 0; i < n_irgs; ++i) {
1172                 irg = get_irp_irg(i);
1173                 env = (inline_irg_env*)get_irg_link(irg);
1174
1175                 if (env->got_inline) {
1176                         optimize_graph_df(irg);
1177                         optimize_cf(irg);
1178                 }
1179                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1180                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1181                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1182                         env->n_callers_orig, env->n_callers,
1183                         get_entity_name(get_irg_entity(irg))));
1184                 }
1185         }
1186
1187         /* kill the copied graphs: we don't need them anymore */
1188         foreach_pmap(copied_graphs, pm_entry) {
1189                 ir_graph *copy = (ir_graph*)pm_entry->value;
1190
1191                 /* reset the entity, otherwise it will be deleted in the next step ... */
1192                 set_irg_entity(copy, NULL);
1193                 free_ir_graph(copy);
1194         }
1195         pmap_destroy(copied_graphs);
1196
1197         obstack_free(&temp_obst, NULL);
1198         current_ir_graph = rem;
1199 }
1200
1201 typedef struct inline_leave_functions_pass_t {
1202         ir_prog_pass_t pass;
1203         unsigned       maxsize;
1204         unsigned       leavesize;
1205         unsigned       size;
1206         int            ignore_runtime;
1207 } inline_leave_functions_pass_t;
1208
1209 /**
1210  * Wrapper to run inline_leave_functions() as a ir_prog pass.
1211  */
1212 static int inline_leave_functions_wrapper(ir_prog *irp, void *context)
1213 {
1214         inline_leave_functions_pass_t *pass = (inline_leave_functions_pass_t*)context;
1215
1216         (void)irp;
1217         inline_leave_functions(
1218                 pass->maxsize, pass->leavesize,
1219                 pass->size, pass->ignore_runtime);
1220         return 0;
1221 }
1222
1223 /* create a pass for inline_leave_functions() */
1224 ir_prog_pass_t *inline_leave_functions_pass(
1225         const char *name, unsigned maxsize, unsigned leavesize,
1226         unsigned size, int ignore_runtime)
1227 {
1228         inline_leave_functions_pass_t *pass = XMALLOCZ(inline_leave_functions_pass_t);
1229
1230         pass->maxsize        = maxsize;
1231         pass->leavesize      = leavesize;
1232         pass->size           = size;
1233         pass->ignore_runtime = ignore_runtime;
1234
1235         return def_prog_pass_constructor(
1236                 &pass->pass,
1237                 name ? name : "inline_leave_functions",
1238                 inline_leave_functions_wrapper);
1239 }
1240
1241 /**
1242  * Calculate the parameter weights for transmitting the address of a local variable.
1243  */
1244 static unsigned calc_method_local_weight(ir_node *arg)
1245 {
1246         int      i, j, k;
1247         unsigned v, weight = 0;
1248
1249         for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
1250                 ir_node *succ = get_irn_out(arg, i);
1251
1252                 switch (get_irn_opcode(succ)) {
1253                 case iro_Load:
1254                 case iro_Store:
1255                         /* Loads and Store can be removed */
1256                         weight += 3;
1257                         break;
1258                 case iro_Sel:
1259                         /* check if all args are constant */
1260                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1261                                 ir_node *idx = get_Sel_index(succ, j);
1262                                 if (! is_Const(idx))
1263                                         return 0;
1264                         }
1265                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1266                            some unsupported node. */
1267                         v = calc_method_local_weight(succ);
1268                         if (v == 0)
1269                                 return 0;
1270                         /* we can kill one Sel with constant indexes, this is cheap */
1271                         weight += v + 1;
1272                         break;
1273                 case iro_Id:
1274                         /* when looking backward we might find Id nodes */
1275                         weight += calc_method_local_weight(succ);
1276                         break;
1277                 case iro_Tuple:
1278                         /* unoptimized tuple */
1279                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1280                                 ir_node *pred = get_Tuple_pred(succ, j);
1281                                 if (pred == arg) {
1282                                         /* look for Proj(j) */
1283                                         for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
1284                                                 ir_node *succ_succ = get_irn_out(succ, k);
1285                                                 if (is_Proj(succ_succ)) {
1286                                                         if (get_Proj_proj(succ_succ) == j) {
1287                                                                 /* found */
1288                                                                 weight += calc_method_local_weight(succ_succ);
1289                                                         }
1290                                                 } else {
1291                                                         /* this should NOT happen */
1292                                                         return 0;
1293                                                 }
1294                                         }
1295                                 }
1296                         }
1297                         break;
1298                 default:
1299                         /* any other node: unsupported yet or bad. */
1300                         return 0;
1301                 }
1302         }
1303         return weight;
1304 }
1305
1306 /**
1307  * Calculate the parameter weights for transmitting the address of a local variable.
1308  */
1309 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg)
1310 {
1311         ir_entity *ent = get_irg_entity(irg);
1312         ir_type  *mtp;
1313         size_t   nparams;
1314         int      i;
1315         long     proj_nr;
1316         ir_node  *irg_args, *arg;
1317
1318         mtp      = get_entity_type(ent);
1319         nparams  = get_method_n_params(mtp);
1320
1321         /* allocate a new array. currently used as 'analysed' flag */
1322         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1323
1324         /* If the method haven't parameters we have nothing to do. */
1325         if (nparams <= 0)
1326                 return;
1327
1328         assure_irg_outs(irg);
1329         irg_args = get_irg_args(irg);
1330         for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
1331                 arg     = get_irn_out(irg_args, i);
1332                 proj_nr = get_Proj_proj(arg);
1333                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1334         }
1335 }
1336
1337 /**
1338  * Calculate the benefice for transmitting an local variable address.
1339  * After inlining, the local variable might be transformed into a
1340  * SSA variable by scalar_replacement().
1341  */
1342 static unsigned get_method_local_adress_weight(ir_graph *callee, size_t pos)
1343 {
1344         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
1345
1346         if (env->local_weights == NULL)
1347                 analyze_irg_local_weights(env, callee);
1348
1349         if (pos < ARR_LEN(env->local_weights))
1350                 return env->local_weights[pos];
1351         return 0;
1352 }
1353
1354 /**
1355  * Calculate a benefice value for inlining the given call.
1356  *
1357  * @param call       the call node we have to inspect
1358  * @param callee     the called graph
1359  */
1360 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
1361 {
1362         ir_node   *call = entry->call;
1363         ir_entity *ent  = get_irg_entity(callee);
1364         ir_type   *callee_frame;
1365         size_t    i, n_members, n_params;
1366         ir_node   *frame_ptr;
1367         ir_type   *mtp;
1368         int       weight = 0;
1369         int       all_const;
1370         unsigned  cc, v;
1371         irg_inline_property prop;
1372
1373         inline_irg_env *callee_env;
1374
1375         prop = get_irg_inline_property(callee);
1376         if (prop == irg_inline_forbidden) {
1377                 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
1378                     call, callee));
1379                 return entry->benefice = INT_MIN;
1380         }
1381
1382         callee_frame = get_irg_frame_type(callee);
1383         n_members = get_class_n_members(callee_frame);
1384         for (i = 0; i < n_members; ++i) {
1385                 ir_entity *frame_ent = get_class_member(callee_frame, i);
1386                 if (is_parameter_entity(frame_ent)) {
1387                         // TODO inliner should handle parameter entities by inserting Store operations
1388                         DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden due to parameter entity\n", call, callee));
1389                         set_irg_inline_property(callee, irg_inline_forbidden);
1390                         return entry->benefice = INT_MIN;
1391                 }
1392         }
1393
1394         if (get_irg_additional_properties(callee) & mtp_property_noreturn) {
1395                 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
1396                     call, callee));
1397                 return entry->benefice = INT_MIN;
1398         }
1399
1400         /* costs for every passed parameter */
1401         n_params = get_Call_n_params(call);
1402         mtp      = get_entity_type(ent);
1403         cc       = get_method_calling_convention(mtp);
1404         if (cc & cc_reg_param) {
1405                 /* register parameter, smaller costs for register parameters */
1406                 size_t max_regs = cc & ~cc_bits;
1407
1408                 if (max_regs < n_params)
1409                         weight += max_regs * 2 + (n_params - max_regs) * 5;
1410                 else
1411                         weight += n_params * 2;
1412         } else {
1413                 /* parameters are passed an stack */
1414                 weight += 5 * n_params;
1415         }
1416
1417         /* constant parameters improve the benefice */
1418         frame_ptr = get_irg_frame(current_ir_graph);
1419         all_const = 1;
1420         for (i = 0; i < n_params; ++i) {
1421                 ir_node *param = get_Call_param(call, i);
1422
1423                 if (is_Const(param)) {
1424                         weight += get_method_param_weight(ent, i);
1425                 } else {
1426                         all_const = 0;
1427                         if (is_SymConst(param))
1428                                 weight += get_method_param_weight(ent, i);
1429                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
1430                                 /*
1431                                  * An address of a local variable is transmitted. After
1432                                  * inlining, scalar_replacement might be able to remove the
1433                                  * local variable, so honor this.
1434                                  */
1435                                 v = get_method_local_adress_weight(callee, i);
1436                                 weight += v;
1437                                 if (v > 0)
1438                                         entry->local_adr = 1;
1439                         }
1440                 }
1441         }
1442         entry->all_const = all_const;
1443
1444         callee_env = (inline_irg_env*)get_irg_link(callee);
1445         if (callee_env->n_callers == 1 &&
1446             callee != current_ir_graph &&
1447             !entity_is_externally_visible(ent)) {
1448                 weight += 700;
1449         }
1450
1451         /* give a bonus for functions with one block */
1452         if (callee_env->n_blocks == 1)
1453                 weight = weight * 3 / 2;
1454
1455         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
1456         if (callee_env->n_nodes < 30 && !callee_env->recursive)
1457                 weight += 2000;
1458
1459         /* and finally for leaves: they do not increase the register pressure
1460            because of callee safe registers */
1461         if (callee_env->n_call_nodes == 0)
1462                 weight += 400;
1463
1464         /** it's important to inline inner loops first */
1465         if (entry->loop_depth > 30)
1466                 weight += 30 * 1024;
1467         else
1468                 weight += entry->loop_depth * 1024;
1469
1470         /*
1471          * All arguments constant is probably a good sign, give an extra bonus
1472          */
1473         if (all_const)
1474                 weight += 1024;
1475
1476         return entry->benefice = weight;
1477 }
1478
1479 typedef struct walk_env_t {
1480         ir_graph **irgs;
1481         size_t   last_irg;
1482 } walk_env_t;
1483
1484 /**
1485  * Callgraph walker, collect all visited graphs.
1486  */
1487 static void callgraph_walker(ir_graph *irg, void *data)
1488 {
1489         walk_env_t *env = (walk_env_t *)data;
1490         env->irgs[env->last_irg++] = irg;
1491 }
1492
1493 /**
1494  * Creates an inline order for all graphs.
1495  *
1496  * @return the list of graphs.
1497  */
1498 static ir_graph **create_irg_list(void)
1499 {
1500         ir_entity  **free_methods;
1501         size_t     n_irgs = get_irp_n_irgs();
1502         walk_env_t env;
1503
1504         cgana(&free_methods);
1505         xfree(free_methods);
1506
1507         compute_callgraph();
1508
1509         env.irgs     = XMALLOCNZ(ir_graph*, n_irgs);
1510         env.last_irg = 0;
1511
1512         callgraph_walk(NULL, callgraph_walker, &env);
1513         assert(n_irgs == env.last_irg);
1514
1515         return env.irgs;
1516 }
1517
1518 /**
1519  * Push a call onto the priority list if its benefice is big enough.
1520  *
1521  * @param pqueue   the priority queue of calls
1522  * @param call     the call entry
1523  * @param inlien_threshold
1524  *                 the threshold value
1525  */
1526 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
1527                             int inline_threshold)
1528 {
1529         ir_graph            *callee  = call->callee;
1530         irg_inline_property prop     = get_irg_inline_property(callee);
1531         int                 benefice = calc_inline_benefice(call, callee);
1532
1533         DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
1534             get_irn_irg(call->call), call->call, callee, benefice));
1535
1536         if (prop < irg_inline_forced && benefice < inline_threshold) {
1537                 return;
1538         }
1539
1540         pqueue_put(pqueue, call, benefice);
1541 }
1542
1543 /**
1544  * Try to inline calls into a graph.
1545  *
1546  * @param irg      the graph into which we inline
1547  * @param maxsize  do NOT inline if the size of irg gets
1548  *                 bigger than this amount
1549  * @param inline_threshold
1550  *                 threshold value for inline decision
1551  * @param copied_graphs
1552  *                 map containing copied of recursive graphs
1553  */
1554 static void inline_into(ir_graph *irg, unsigned maxsize,
1555                         int inline_threshold, pmap *copied_graphs)
1556 {
1557         int            phiproj_computed = 0;
1558         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
1559         call_entry     *curr_call;
1560         wenv_t         wenv;
1561         pqueue_t       *pqueue;
1562
1563         if (env->n_call_nodes == 0)
1564                 return;
1565
1566         if (env->n_nodes > maxsize) {
1567                 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
1568                 return;
1569         }
1570
1571         current_ir_graph = irg;
1572         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1573
1574         /* put irgs into the pqueue */
1575         pqueue = new_pqueue();
1576
1577         list_for_each_entry(call_entry, curr_call, &env->calls, list) {
1578                 assert(is_Call(curr_call->call));
1579                 maybe_push_call(pqueue, curr_call, inline_threshold);
1580         }
1581
1582         /* note that the list of possible calls is updated during the process */
1583         while (!pqueue_empty(pqueue)) {
1584                 int                 did_inline;
1585                 call_entry          *curr_call  = (call_entry*)pqueue_pop_front(pqueue);
1586                 ir_graph            *callee     = curr_call->callee;
1587                 ir_node             *call_node  = curr_call->call;
1588                 inline_irg_env      *callee_env = (inline_irg_env*)get_irg_link(callee);
1589                 irg_inline_property prop        = get_irg_inline_property(callee);
1590                 int                 loop_depth;
1591                 const call_entry    *centry;
1592                 pmap_entry          *e;
1593
1594                 if ((prop < irg_inline_forced) && env->n_nodes + callee_env->n_nodes > maxsize) {
1595                         DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
1596                                                 env->n_nodes, callee, callee_env->n_nodes));
1597                         continue;
1598                 }
1599
1600                 e = pmap_find(copied_graphs, callee);
1601                 if (e != NULL) {
1602                         int benefice = curr_call->benefice;
1603                         /*
1604                          * Reduce the weight for recursive function IFF not all arguments are const.
1605                          * inlining recursive functions is rarely good.
1606                          */
1607                         if (!curr_call->all_const)
1608                                 benefice -= 2000;
1609                         if (benefice < inline_threshold)
1610                                 continue;
1611
1612                         /*
1613                          * Remap callee if we have a copy.
1614                          */
1615                         callee     = (ir_graph*)e->value;
1616                         callee_env = (inline_irg_env*)get_irg_link(callee);
1617                 }
1618
1619                 if (current_ir_graph == callee) {
1620                         /*
1621                          * Recursive call: we cannot directly inline because we cannot
1622                          * walk the graph and change it. So we have to make a copy of
1623                          * the graph first.
1624                          */
1625                         int benefice = curr_call->benefice;
1626                         ir_graph *copy;
1627
1628                         /*
1629                          * Reduce the weight for recursive function IFF not all arguments are const.
1630                          * inlining recursive functions is rarely good.
1631                          */
1632                         if (!curr_call->all_const)
1633                                 benefice -= 2000;
1634                         if (benefice < inline_threshold)
1635                                 continue;
1636
1637                         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1638
1639                         /*
1640                          * No copy yet, create one.
1641                          * Note that recursive methods are never leaves, so it is
1642                          * sufficient to test this condition here.
1643                          */
1644                         copy = create_irg_copy(callee);
1645
1646                         /* create_irg_copy() destroys the Proj links, recompute them */
1647                         phiproj_computed = 0;
1648
1649                         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1650
1651                         /* allocate a new environment */
1652                         callee_env = alloc_inline_irg_env();
1653                         set_irg_link(copy, callee_env);
1654
1655                         assure_cf_loop(copy);
1656                         memset(&wenv, 0, sizeof(wenv));
1657                         wenv.x              = callee_env;
1658                         wenv.ignore_callers = 1;
1659                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1660
1661                         /*
1662                          * Enter the entity of the original graph. This is needed
1663                          * for inline_method(). However, note that ent->irg still points
1664                          * to callee, NOT to copy.
1665                          */
1666                         set_irg_entity(copy, get_irg_entity(callee));
1667
1668                         pmap_insert(copied_graphs, callee, copy);
1669                         callee = copy;
1670
1671                         /* we have only one caller: the original graph */
1672                         callee_env->n_callers      = 1;
1673                         callee_env->n_callers_orig = 1;
1674                 }
1675                 if (! phiproj_computed) {
1676                         phiproj_computed = 1;
1677                         collect_phiprojs(current_ir_graph);
1678                 }
1679                 did_inline = inline_method(call_node, callee);
1680                 if (!did_inline)
1681                         continue;
1682
1683                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1684                 phiproj_computed = 0;
1685
1686                 /* remove it from the caller list */
1687                 list_del(&curr_call->list);
1688
1689                 /* callee was inline. Append its call list. */
1690                 env->got_inline = 1;
1691                 --env->n_call_nodes;
1692
1693                 /* we just generate a bunch of new calls */
1694                 loop_depth = curr_call->loop_depth;
1695                 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1696                         inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1697                         ir_node        *new_call;
1698                         call_entry     *new_entry;
1699
1700                         /* after we have inlined callee, all called methods inside
1701                          * callee are now called once more */
1702                         ++penv->n_callers;
1703
1704                         /* Note that the src list points to Call nodes in the inlined graph,
1705                          * but we need Call nodes in our graph. Luckily the inliner leaves
1706                          * this information in the link field. */
1707                         new_call = (ir_node*)get_irn_link(centry->call);
1708                         if (get_irn_irg(new_call) != irg) {
1709                                 /* centry->call has not been copied, which means it is dead.
1710                                  * This might happen during inlining, if a const function,
1711                                  * which cannot be inlined is only used as an unused argument
1712                                  * of another function, which is inlined. */
1713                                 continue;
1714                         }
1715                         assert(is_Call(new_call));
1716
1717                         new_entry = duplicate_call_entry(centry, new_call, loop_depth);
1718                         list_add_tail(&new_entry->list, &env->calls);
1719                         maybe_push_call(pqueue, new_entry, inline_threshold);
1720                 }
1721
1722                 env->n_call_nodes += callee_env->n_call_nodes;
1723                 env->n_nodes += callee_env->n_nodes;
1724                 --callee_env->n_callers;
1725         }
1726         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1727         del_pqueue(pqueue);
1728 }
1729
1730 /*
1731  * Heuristic inliner. Calculates a benefice value for every call and inlines
1732  * those calls with a value higher than the threshold.
1733  */
1734 void inline_functions(unsigned maxsize, int inline_threshold,
1735                       opt_ptr after_inline_opt)
1736 {
1737         inline_irg_env   *env;
1738         size_t           i, n_irgs;
1739         ir_graph         *rem;
1740         wenv_t           wenv;
1741         pmap             *copied_graphs;
1742         pmap_entry       *pm_entry;
1743         ir_graph         **irgs;
1744
1745         rem = current_ir_graph;
1746         obstack_init(&temp_obst);
1747
1748         irgs = create_irg_list();
1749
1750         /* a map for the copied graphs, used to inline recursive calls */
1751         copied_graphs = pmap_create();
1752
1753         /* extend all irgs by a temporary data structure for inlining. */
1754         n_irgs = get_irp_n_irgs();
1755         for (i = 0; i < n_irgs; ++i)
1756                 set_irg_link(irgs[i], alloc_inline_irg_env());
1757
1758         /* Pre-compute information in temporary data structure. */
1759         wenv.ignore_runtime = 0;
1760         wenv.ignore_callers = 0;
1761         for (i = 0; i < n_irgs; ++i) {
1762                 ir_graph *irg = irgs[i];
1763
1764                 free_callee_info(irg);
1765
1766                 wenv.x = (inline_irg_env*)get_irg_link(irg);
1767                 assure_cf_loop(irg);
1768                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1769         }
1770
1771         /* -- and now inline. -- */
1772         for (i = 0; i < n_irgs; ++i) {
1773                 ir_graph *irg = irgs[i];
1774
1775                 inline_into(irg, maxsize, inline_threshold, copied_graphs);
1776         }
1777
1778         for (i = 0; i < n_irgs; ++i) {
1779                 ir_graph *irg = irgs[i];
1780
1781                 env = (inline_irg_env*)get_irg_link(irg);
1782                 if (env->got_inline && after_inline_opt != NULL) {
1783                         /* this irg got calls inlined: optimize it */
1784                         after_inline_opt(irg);
1785                 }
1786                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1787                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1788                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1789                         env->n_callers_orig, env->n_callers,
1790                         get_entity_name(get_irg_entity(irg))));
1791                 }
1792         }
1793
1794         /* kill the copied graphs: we don't need them anymore */
1795         foreach_pmap(copied_graphs, pm_entry) {
1796                 ir_graph *copy = (ir_graph*)pm_entry->value;
1797
1798                 /* reset the entity, otherwise it will be deleted in the next step ... */
1799                 set_irg_entity(copy, NULL);
1800                 free_ir_graph(copy);
1801         }
1802         pmap_destroy(copied_graphs);
1803
1804         xfree(irgs);
1805
1806         obstack_free(&temp_obst, NULL);
1807         current_ir_graph = rem;
1808 }
1809
1810 typedef struct inline_functions_pass_t {
1811         ir_prog_pass_t pass;
1812         unsigned       maxsize;
1813         int            inline_threshold;
1814         opt_ptr        after_inline_opt;
1815 } inline_functions_pass_t;
1816
1817 /**
1818  * Wrapper to run inline_functions() as a ir_prog pass.
1819  */
1820 static int inline_functions_wrapper(ir_prog *irp, void *context)
1821 {
1822         inline_functions_pass_t *pass = (inline_functions_pass_t*)context;
1823
1824         (void)irp;
1825         inline_functions(pass->maxsize, pass->inline_threshold,
1826                          pass->after_inline_opt);
1827         return 0;
1828 }
1829
1830 /* create a ir_prog pass for inline_functions */
1831 ir_prog_pass_t *inline_functions_pass(
1832           const char *name, unsigned maxsize, int inline_threshold,
1833           opt_ptr after_inline_opt)
1834 {
1835         inline_functions_pass_t *pass = XMALLOCZ(inline_functions_pass_t);
1836
1837         pass->maxsize          = maxsize;
1838         pass->inline_threshold = inline_threshold;
1839         pass->after_inline_opt = after_inline_opt;
1840
1841         return def_prog_pass_constructor(
1842                 &pass->pass, name ? name : "inline_functions",
1843                 inline_functions_wrapper);
1844 }
1845
1846 void firm_init_inline(void)
1847 {
1848         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
1849 }