4e71404a90dd48e549a5aaaa8d8a47ffb00e0130
[libfirm] / ir / opt / opt_inline.c
1 /*
2  * Copyright (C) 1995-2011 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief    Dead node elimination and Procedure Inlining.
23  * @author   Michael Beck, Goetz Lindenmaier
24  */
25 #include "config.h"
26
27 #include <limits.h>
28 #include <stdbool.h>
29 #include <assert.h>
30
31 #include "irnode_t.h"
32 #include "irgraph_t.h"
33 #include "irprog_t.h"
34
35 #include "iroptimize.h"
36 #include "ircons_t.h"
37 #include "iropt_t.h"
38 #include "irgopt.h"
39 #include "irgmod.h"
40 #include "irgwalk.h"
41
42 #include "array_t.h"
43 #include "list.h"
44 #include "pset.h"
45 #include "pmap.h"
46 #include "pdeq.h"
47 #include "xmalloc.h"
48 #include "pqueue.h"
49
50 #include "irouts.h"
51 #include "irloop_t.h"
52 #include "irbackedge_t.h"
53 #include "opt_init.h"
54 #include "cgana.h"
55 #include "trouts.h"
56 #include "error.h"
57
58 #include "analyze_irg_args.h"
59 #include "iredges_t.h"
60 #include "irflag_t.h"
61 #include "irhooks.h"
62 #include "irtools.h"
63 #include "iropt_dbg.h"
64 #include "irpass_t.h"
65 #include "irnodemap.h"
66
67 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
68
69 /*------------------------------------------------------------------*/
70 /* Routines for dead node elimination / copying garbage collection  */
71 /* of the obstack.                                                  */
72 /*------------------------------------------------------------------*/
73
74 /**
75  * Remember the new node in the old node by using a field all nodes have.
76  */
77 static void set_new_node(ir_node *node, ir_node *new_node)
78 {
79         set_irn_link(node, new_node);
80 }
81
82 /**
83  * Get this new node, before the old node is forgotten.
84  */
85 static inline ir_node *get_new_node(ir_node *old_node)
86 {
87         assert(irn_visited(old_node));
88         return (ir_node*) get_irn_link(old_node);
89 }
90
91 /*--------------------------------------------------------------------*/
92 /*  Functionality for inlining                                         */
93 /*--------------------------------------------------------------------*/
94
95 /**
96  * Copy node for inlineing.  Updates attributes that change when
97  * inlineing but not for dead node elimination.
98  *
99  * Copies the node by calling copy_node() and then updates the entity if
100  * it's a local one.  env must be a pointer of the frame type of the
101  * inlined procedure. The new entities must be in the link field of
102  * the entities.
103  */
104 static void copy_node_inline(ir_node *node, void *env)
105 {
106         ir_graph *new_irg  = (ir_graph*) env;
107         ir_node  *new_node = irn_copy_into_irg(node, new_irg);
108
109         set_new_node(node, new_node);
110         if (is_Sel(node)) {
111                 ir_graph  *old_irg        = get_irn_irg(node);
112                 ir_type   *old_frame_type = get_irg_frame_type(old_irg);
113                 ir_entity *old_entity     = get_Sel_entity(node);
114                 assert(is_Sel(new_node));
115                 /* use copied entities from the new frame */
116                 if (get_entity_owner(old_entity) == old_frame_type) {
117                         ir_entity *new_entity = (ir_entity*)get_entity_link(old_entity);
118                         assert(new_entity != NULL);
119                         set_Sel_entity(new_node, new_entity);
120                 }
121         } else if (is_Block(new_node)) {
122                 new_node->attr.block.irg.irg = new_irg;
123         }
124 }
125
126 static void set_preds_inline(ir_node *node, void *env)
127 {
128         ir_node *new_node;
129
130         irn_rewire_inputs(node);
131
132         /* move constants into start block */
133         new_node = get_new_node(node);
134         if (is_irn_constlike(new_node)) {
135                 ir_graph *new_irg     = (ir_graph *) env;
136                 ir_node  *start_block = get_irg_start_block(new_irg);
137                 set_nodes_block(new_node, start_block);
138         }
139 }
140
141 /**
142  * Walker: checks if P_value_arg_base is used.
143  */
144 static void find_addr(ir_node *node, void *env)
145 {
146         bool *allow_inline = (bool*)env;
147
148         if (is_Block(node) && get_Block_entity(node)) {
149                 /**
150                  * Currently we can't handle blocks whose address was taken correctly
151                  * when inlining
152                  */
153                 *allow_inline = false;
154         } else if (is_Sel(node)) {
155                 ir_graph *irg = current_ir_graph;
156                 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
157                         /* access to frame */
158                         ir_entity *ent = get_Sel_entity(node);
159                         if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
160                                 /* access to value_type */
161                                 *allow_inline = false;
162                         }
163                         if (is_parameter_entity(ent)) {
164                                 *allow_inline = false;
165                         }
166                 }
167         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
168                 /* From GCC:
169                  * Refuse to inline alloca call unless user explicitly forced so as this
170                  * may change program's memory overhead drastically when the function
171                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
172                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
173                  *
174                  * Sorrily this is true with our implementation also.
175                  * Moreover, we cannot differentiate between alloca() and VLA yet, so
176                  * this disables inlining of functions using VLA (which are completely
177                  * save).
178                  *
179                  * 2 Solutions:
180                  * - add a flag to the Alloc node for "real" alloca() calls
181                  * - add a new Stack-Restore node at the end of a function using
182                  *   alloca()
183                  */
184                 *allow_inline = false;
185         }
186 }
187
188 /**
189  * Check if we can inline a given call.
190  * Currently, we cannot inline two cases:
191  * - call with compound arguments
192  * - graphs that take the address of a parameter
193  *
194  * check these conditions here
195  */
196 static bool can_inline(ir_node *call, ir_graph *called_graph)
197 {
198         ir_entity          *called      = get_irg_entity(called_graph);
199         ir_type            *called_type = get_entity_type(called);
200         ir_type            *call_type   = get_Call_type(call);
201         size_t              n_params    = get_method_n_params(called_type);
202         size_t              n_arguments = get_method_n_params(call_type);
203         size_t              n_res       = get_method_n_ress(called_type);
204         mtp_additional_properties props = get_entity_additional_properties(called);
205         size_t              i;
206         bool                res;
207
208         if (props & mtp_property_noinline)
209                 return false;
210
211         if (n_arguments != n_params) {
212                 /* this is a bad feature of C: without a prototype, we can
213                  * call a function with less parameters than needed. Currently
214                  * we don't support this, although we could use Unknown than. */
215                 return false;
216         }
217         if (n_res != get_method_n_ress(call_type)) {
218                 return false;
219         }
220
221         /* Argh, compiling C has some bad consequences:
222          * It is implementation dependent what happens in that case.
223          * We support inlining, if the bitsize of the types matches AND
224          * the same arithmetic is used. */
225         for (i = 0; i < n_params; ++i) {
226                 ir_type *param_tp = get_method_param_type(called_type, i);
227                 ir_type *arg_tp   = get_method_param_type(call_type, i);
228
229                 if (param_tp != arg_tp) {
230                         ir_mode *pmode = get_type_mode(param_tp);
231                         ir_mode *amode = get_type_mode(arg_tp);
232
233                         if (pmode == NULL || amode == NULL)
234                                 return false;
235                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
236                                 return false;
237                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
238                                 return false;
239                         /* otherwise we can simply "reinterpret" the bits */
240                 }
241         }
242         for (i = 0; i < n_res; ++i) {
243                 ir_type *decl_res_tp = get_method_res_type(called_type, i);
244                 ir_type *used_res_tp = get_method_res_type(call_type, i);
245
246                 if (decl_res_tp != used_res_tp) {
247                         ir_mode *decl_mode = get_type_mode(decl_res_tp);
248                         ir_mode *used_mode = get_type_mode(used_res_tp);
249                         if (decl_mode == NULL || used_mode == NULL)
250                                 return false;
251                         if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
252                                 return false;
253                         if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
254                                 return false;
255                         /* otherwise we can "reinterpret" the bits */
256                 }
257         }
258
259         /* check parameters for compound arguments */
260         for (i = 0; i < n_params; ++i) {
261                 ir_type *p_type = get_method_param_type(call_type, i);
262
263                 if (is_compound_type(p_type))
264                         return false;
265         }
266
267         /* check results for compound arguments */
268         for (i = 0; i < n_res; ++i) {
269                 ir_type *r_type = get_method_res_type(call_type, i);
270
271                 if (is_compound_type(r_type))
272                         return false;
273         }
274
275         res = true;
276         irg_walk_graph(called_graph, find_addr, NULL, &res);
277
278         return res;
279 }
280
281 enum exc_mode {
282         exc_handler,    /**< There is a handler. */
283         exc_no_handler  /**< Exception handling not represented. */
284 };
285
286 /**
287  * copy all entities on the stack frame on 1 irg to the stackframe of another.
288  * Sets entity links of the old entities to the copies
289  */
290 static void copy_frame_entities(ir_graph *from, ir_graph *to)
291 {
292         ir_type *from_frame = get_irg_frame_type(from);
293         ir_type *to_frame   = get_irg_frame_type(to);
294         size_t   n_members  = get_class_n_members(from_frame);
295         size_t   i;
296         assert(from_frame != to_frame);
297
298         for (i = 0; i < n_members; ++i) {
299                 ir_entity *old_ent = get_class_member(from_frame, i);
300                 ir_entity *new_ent = copy_entity_own(old_ent, to_frame);
301                 set_entity_link(old_ent, new_ent);
302                 assert (!is_parameter_entity(old_ent));
303         }
304 }
305
306 /* Inlines a method at the given call site. */
307 int inline_method(ir_node *call, ir_graph *called_graph)
308 {
309         /* we cannot inline some types of calls */
310         if (! can_inline(call, called_graph))
311                 return 0;
312
313         /* We cannot inline a recursive call. The graph must be copied before
314          * the call the inline_method() using create_irg_copy(). */
315         ir_graph *irg = get_irn_irg(call);
316         if (called_graph == irg)
317                 return 0;
318
319         ir_entity *ent      = get_irg_entity(called_graph);
320         ir_type   *mtp      = get_entity_type(ent);
321         ir_type   *ctp      = get_Call_type(call);
322         int        n_params = get_method_n_params(mtp);
323
324         ir_graph *rem = current_ir_graph;
325         current_ir_graph = irg;
326
327         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
328
329         /* optimizations can cause problems when allocating new nodes */
330         int rem_opt = get_opt_optimize();
331         set_optimize(0);
332
333         /* Handle graph state */
334         assert(get_irg_phase_state(irg) != phase_building);
335         assert(get_irg_pinned(irg) == op_pin_state_pinned);
336         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
337         clear_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE
338                            | IR_GRAPH_PROPERTY_CONSISTENT_ENTITY_USAGE);
339         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
340         clear_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_ENTITY_USAGE);
341         edges_deactivate(irg);
342
343         /* here we know we WILL inline, so inform the statistics */
344         hook_inline(call, called_graph);
345
346         /* -- Decide how to handle exception control flow: Is there a handler
347            for the Call node, or do we branch directly to End on an exception?
348            exc_handling:
349            0 There is a handler.
350            2 Exception handling not represented in Firm. -- */
351         ir_node *Xproj = NULL;
352         for (ir_node *proj = (ir_node*)get_irn_link(call); proj != NULL;
353                  proj = (ir_node*)get_irn_link(proj)) {
354                 long proj_nr = get_Proj_proj(proj);
355                 if (proj_nr == pn_Call_X_except) Xproj = proj;
356         }
357         enum exc_mode exc_handling = Xproj != NULL ? exc_handler : exc_no_handler;
358
359         /* create the argument tuple */
360         ir_node **args_in = ALLOCAN(ir_node*, n_params);
361
362         ir_node *block = get_nodes_block(call);
363         for (int i = n_params - 1; i >= 0; --i) {
364                 ir_node *arg      = get_Call_param(call, i);
365                 ir_type *param_tp = get_method_param_type(mtp, i);
366                 ir_mode *mode     = get_type_mode(param_tp);
367
368                 if (mode != get_irn_mode(arg)) {
369                         arg = new_r_Conv(block, arg, mode);
370                 }
371                 args_in[i] = arg;
372         }
373
374         /* the procedure and later replaces the Start node of the called graph.
375          * Post_call is the old Call node and collects the results of the called
376          * graph. Both will end up being a tuple. */
377         ir_node *post_bl = get_nodes_block(call);
378         /* XxMxPxPxPxT of Start + parameter of Call */
379         ir_node *in[pn_Start_max+1];
380         in[pn_Start_M]              = get_Call_mem(call);
381         in[pn_Start_X_initial_exec] = new_r_Jmp(post_bl);
382         in[pn_Start_P_frame_base]   = get_irg_frame(irg);
383         in[pn_Start_T_args]         = new_r_Tuple(post_bl, n_params, args_in);
384         ir_node *pre_call = new_r_Tuple(post_bl, pn_Start_max+1, in);
385         ir_node *post_call = call;
386
387         /* --
388            The new block gets the ins of the old block, pre_call and all its
389            predecessors and all Phi nodes. -- */
390         part_block(pre_call);
391
392         /* increment visited flag for later walk */
393         inc_irg_visited(called_graph);
394
395         /* link some nodes to nodes in the current graph so instead of copying
396          * the linked nodes will get used.
397          * So the copier will use the created Tuple instead of copying the start
398          * node, similar for singleton nodes like NoMem and Bad.
399          * Note: this will prohibit predecessors to be copied - only do it for
400          *       nodes without predecessors */
401         ir_node *start_block = get_irg_start_block(called_graph);
402         set_new_node(start_block, get_nodes_block(pre_call));
403         mark_irn_visited(start_block);
404
405         ir_node *start = get_irg_start(called_graph);
406         set_new_node(start, pre_call);
407         mark_irn_visited(start);
408
409         ir_node *nomem = get_irg_no_mem(called_graph);
410         set_new_node(nomem, get_irg_no_mem(irg));
411         mark_irn_visited(nomem);
412
413         /* entitiy link is used to link entities on old stackframe to the
414          * new stackframe */
415         irp_reserve_resources(irp, IRP_RESOURCE_ENTITY_LINK);
416
417         /* copy entities and nodes */
418         assert(!irn_visited(get_irg_end(called_graph)));
419         copy_frame_entities(called_graph, irg);
420         irg_walk_core(get_irg_end(called_graph), copy_node_inline, set_preds_inline,
421                       irg);
422
423         irp_free_resources(irp, IRP_RESOURCE_ENTITY_LINK);
424
425         /* -- Merge the end of the inlined procedure with the call site -- */
426         /* We will turn the old Call node into a Tuple with the following
427            predecessors:
428            -1:  Block of Tuple.
429            0: Phi of all Memories of Return statements.
430            1: Jmp from new Block that merges the control flow from all exception
431            predecessors of the old end block.
432            2: Tuple of all arguments.
433            3: Phi of Exception memories.
434            In case the old Call directly branches to End on an exception we don't
435            need the block merging all exceptions nor the Phi of the exception
436            memories.
437         */
438
439         /* Precompute some values */
440         ir_node *end_bl = get_new_node(get_irg_end_block(called_graph));
441         ir_node *end    = get_new_node(get_irg_end(called_graph));
442         int      arity  = get_Block_n_cfgpreds(end_bl); /* arity = n_exc + n_ret  */
443         int      n_res  = get_method_n_ress(get_Call_type(call));
444
445         ir_node **res_pred = XMALLOCN(ir_node*, n_res);
446         ir_node **cf_pred  = XMALLOCN(ir_node*, arity);
447
448         /* archive keepalives */
449         int irn_arity = get_irn_arity(end);
450         for (int i = 0; i < irn_arity; i++) {
451                 ir_node *ka = get_End_keepalive(end, i);
452                 if (! is_Bad(ka))
453                         add_End_keepalive(get_irg_end(irg), ka);
454         }
455
456         /* replace Return nodes by Jump nodes */
457         int n_ret = 0;
458         for (int i = 0; i < arity; i++) {
459                 ir_node *ret = get_Block_cfgpred(end_bl, i);
460                 if (is_Return(ret)) {
461                         ir_node *block = get_nodes_block(ret);
462                         cf_pred[n_ret] = new_r_Jmp(block);
463                         n_ret++;
464                 }
465         }
466         set_irn_in(post_bl, n_ret, cf_pred);
467
468         /* build a Tuple for all results of the method.
469          * add Phi node if there was more than one Return. */
470         turn_into_tuple(post_call, pn_Call_max+1);
471         /* First the Memory-Phi */
472         int n_mem_phi = 0;
473         for (int i = 0; i < arity; i++) {
474                 ir_node *ret = get_Block_cfgpred(end_bl, i);
475                 if (is_Return(ret)) {
476                         cf_pred[n_mem_phi++] = get_Return_mem(ret);
477                 }
478                 /* memory output for some exceptions is directly connected to End */
479                 if (is_Call(ret)) {
480                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 3);
481                 } else if (is_fragile_op(ret)) {
482                         /* We rely that all cfops have the memory output at the same position. */
483                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 0);
484                 } else if (is_Raise(ret)) {
485                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 1);
486                 }
487         }
488         ir_node *phi = new_r_Phi(post_bl, n_mem_phi, cf_pred, mode_M);
489         set_Tuple_pred(call, pn_Call_M, phi);
490         /* Conserve Phi-list for further inlinings -- but might be optimized */
491         if (get_nodes_block(phi) == post_bl) {
492                 set_irn_link(phi, get_irn_link(post_bl));
493                 set_irn_link(post_bl, phi);
494         }
495         /* Now the real results */
496         if (n_res > 0) {
497                 for (int j = 0; j < n_res; j++) {
498                         ir_type *res_type = get_method_res_type(ctp, j);
499                         ir_mode *res_mode = get_type_mode(res_type);
500                         int n_ret = 0;
501                         for (int i = 0; i < arity; i++) {
502                                 ir_node *ret = get_Block_cfgpred(end_bl, i);
503                                 if (is_Return(ret)) {
504                                         ir_node *res = get_Return_res(ret, j);
505                                         if (get_irn_mode(res) != res_mode) {
506                                                 ir_node *block = get_nodes_block(res);
507                                                 res = new_r_Conv(block, res, res_mode);
508                                         }
509                                         cf_pred[n_ret] = res;
510                                         n_ret++;
511                                 }
512                         }
513                         if (n_ret > 0) {
514                                 phi = new_r_Phi(post_bl, n_ret, cf_pred, res_mode);
515                         } else {
516                                 phi = new_r_Bad(irg, res_mode);
517                         }
518                         res_pred[j] = phi;
519                         /* Conserve Phi-list for further inlinings -- but might be optimized */
520                         if (get_nodes_block(phi) == post_bl) {
521                                 set_Phi_next(phi, get_Block_phis(post_bl));
522                                 set_Block_phis(post_bl, phi);
523                         }
524                 }
525                 ir_node *result_tuple = new_r_Tuple(post_bl, n_res, res_pred);
526                 set_Tuple_pred(call, pn_Call_T_result, result_tuple);
527         } else {
528                 set_Tuple_pred(call, pn_Call_T_result, new_r_Bad(irg, mode_T));
529         }
530         /* handle the regular call */
531         set_Tuple_pred(call, pn_Call_X_regular, new_r_Jmp(post_bl));
532
533         /* Finally the exception control flow.
534            We have two possible situations:
535            First if the Call branches to an exception handler:
536            We need to add a Phi node to
537            collect the memory containing the exception objects.  Further we need
538            to add another block to get a correct representation of this Phi.  To
539            this block we add a Jmp that resolves into the X output of the Call
540            when the Call is turned into a tuple.
541            Second: There is no exception edge. Just add all inlined exception
542            branches to the End node.
543          */
544         if (exc_handling == exc_handler) {
545                 int n_exc = 0;
546                 for (int i = 0; i < arity; i++) {
547                         ir_node *ret = get_Block_cfgpred(end_bl, i);
548                         ir_node *irn = skip_Proj(ret);
549                         if (is_fragile_op(irn) || is_Raise(irn)) {
550                                 cf_pred[n_exc] = ret;
551                                 ++n_exc;
552                         }
553                 }
554                 if (n_exc > 0) {
555                         if (n_exc == 1) {
556                                 /* simple fix */
557                                 set_Tuple_pred(call, pn_Call_X_except, cf_pred[0]);
558                         } else {
559                                 ir_node *block = new_r_Block(irg, n_exc, cf_pred);
560                                 set_Tuple_pred(call, pn_Call_X_except, new_r_Jmp(block));
561                         }
562                 } else {
563                         set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
564                 }
565         } else {
566                 /* assert(exc_handling == 1 || no exceptions. ) */
567                 int n_exc = 0;
568                 for (int i = 0; i < arity; i++) {
569                         ir_node *ret = get_Block_cfgpred(end_bl, i);
570                         ir_node *irn = skip_Proj(ret);
571
572                         if (is_fragile_op(irn) || is_Raise(irn)) {
573                                 cf_pred[n_exc] = ret;
574                                 n_exc++;
575                         }
576                 }
577                 ir_node  *main_end_bl       = get_irg_end_block(irg);
578                 int       main_end_bl_arity = get_irn_arity(main_end_bl);
579                 ir_node **end_preds         = XMALLOCN(ir_node*, n_exc+main_end_bl_arity);
580
581                 for (int i = 0; i < main_end_bl_arity; ++i)
582                         end_preds[i] = get_irn_n(main_end_bl, i);
583                 for (int i = 0; i < n_exc; ++i)
584                         end_preds[main_end_bl_arity + i] = cf_pred[i];
585                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
586                 set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
587                 free(end_preds);
588         }
589         free(res_pred);
590         free(cf_pred);
591
592         /* --  Turn CSE back on. -- */
593         set_optimize(rem_opt);
594         current_ir_graph = rem;
595
596         return 1;
597 }
598
599 /********************************************************************/
600 /* Apply inlining to small methods.                                 */
601 /********************************************************************/
602
603 static struct obstack  temp_obst;
604
605 /** Represents a possible inlinable call in a graph. */
606 typedef struct call_entry {
607         ir_node    *call;       /**< The Call node. */
608         ir_graph   *callee;     /**< The callee IR-graph. */
609         list_head  list;        /**< List head for linking the next one. */
610         int        loop_depth;  /**< The loop depth of this call. */
611         int        benefice;    /**< The calculated benefice of this call. */
612         unsigned   local_adr:1; /**< Set if this call gets an address of a local variable. */
613         unsigned   all_const:1; /**< Set if this call has only constant parameters. */
614 } call_entry;
615
616 /**
617  * environment for inlining small irgs
618  */
619 typedef struct inline_env_t {
620         struct obstack obst;  /**< An obstack where call_entries are allocated on. */
621         list_head      calls; /**< The call entry list. */
622 } inline_env_t;
623
624 /**
625  * Returns the irg called from a Call node. If the irg is not
626  * known, NULL is returned.
627  *
628  * @param call  the call node
629  */
630 static ir_graph *get_call_called_irg(ir_node *call)
631 {
632         ir_node *addr;
633
634         addr = get_Call_ptr(call);
635         if (is_SymConst_addr_ent(addr)) {
636                 ir_entity *ent = get_SymConst_entity(addr);
637                 /* we don't know which function gets finally bound to a weak symbol */
638                 if (get_entity_linkage(ent) & IR_LINKAGE_WEAK)
639                         return NULL;
640
641                 return get_entity_irg(ent);
642         }
643
644         return NULL;
645 }
646
647 /**
648  * Walker: Collect all calls to known graphs inside a graph.
649  */
650 static void collect_calls(ir_node *call, void *env)
651 {
652         (void) env;
653         if (is_Call(call)) {
654                 ir_graph *called_irg = get_call_called_irg(call);
655
656                 if (called_irg != NULL) {
657                         /* The Call node calls a locally defined method.  Remember to inline. */
658                         inline_env_t *ienv  = (inline_env_t*)env;
659                         call_entry   *entry = OALLOC(&ienv->obst, call_entry);
660                         entry->call       = call;
661                         entry->callee     = called_irg;
662                         entry->loop_depth = 0;
663                         entry->benefice   = 0;
664                         entry->local_adr  = 0;
665                         entry->all_const  = 0;
666
667                         list_add_tail(&entry->list, &ienv->calls);
668                 }
669         }
670 }
671
672 /**
673  * Inlines all small methods at call sites where the called address comes
674  * from a Const node that references the entity representing the called
675  * method.
676  * The size argument is a rough measure for the code size of the method:
677  * Methods where the obstack containing the firm graph is smaller than
678  * size are inlined.
679  */
680 void inline_small_irgs(ir_graph *irg, int size)
681 {
682         ir_graph *rem = current_ir_graph;
683         inline_env_t env;
684
685         current_ir_graph = irg;
686         /* Handle graph state */
687         assert(get_irg_phase_state(irg) != phase_building);
688         free_callee_info(irg);
689
690         /* Find Call nodes to inline.
691            (We can not inline during a walk of the graph, as inlining the same
692            method several times changes the visited flag of the walked graph:
693            after the first inlining visited of the callee equals visited of
694            the caller.  With the next inlining both are increased.) */
695         obstack_init(&env.obst);
696         INIT_LIST_HEAD(&env.calls);
697         irg_walk_graph(irg, NULL, collect_calls, &env);
698
699         if (! list_empty(&env.calls)) {
700                 /* There are calls to inline */
701                 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
702                 collect_phiprojs(irg);
703
704                 list_for_each_entry(call_entry, entry, &env.calls, list) {
705                         ir_graph  *callee = entry->callee;
706                         ir_entity *called = get_irg_entity(callee);
707                         mtp_additional_properties props
708                                 = get_entity_additional_properties(called);
709
710                         if (props & mtp_property_noinline)
711                                 continue;
712
713                         if ((props & mtp_property_always_inline) ||
714                             _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
715                                 inline_method(entry->call, callee);
716                         }
717                 }
718                 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
719         }
720         obstack_free(&env.obst, NULL);
721         current_ir_graph = rem;
722 }
723
724 typedef struct inline_small_irgs_pass_t {
725         ir_graph_pass_t pass;
726         int            size;
727 } inline_small_irgs_pass_t;
728
729 /**
730  * Wrapper to run inline_small_irgs() as a pass.
731  */
732 static int inline_small_irgs_wrapper(ir_graph *irg, void *context)
733 {
734         inline_small_irgs_pass_t *pass = (inline_small_irgs_pass_t*)context;
735
736         inline_small_irgs(irg, pass->size);
737         return 0;
738 }
739
740 /* create a pass for inline_small_irgs() */
741 ir_graph_pass_t *inline_small_irgs_pass(const char *name, int size)
742 {
743         inline_small_irgs_pass_t *pass = XMALLOCZ(inline_small_irgs_pass_t);
744
745         pass->size = size;
746         return def_graph_pass_constructor(
747                 &pass->pass, name ? name : "inline_small_irgs", inline_small_irgs_wrapper);
748 }
749
750 /**
751  * Environment for inlining irgs.
752  */
753 typedef struct {
754         list_head calls;             /**< List of of all call nodes in this graph. */
755         unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
756         unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
757         unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
758         unsigned  n_nodes_orig;      /**< for statistics */
759         unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
760         unsigned  n_call_nodes_orig; /**< for statistics */
761         unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
762         unsigned  n_callers_orig;    /**< for statistics */
763         unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
764         unsigned  recursive:1;       /**< Set, if this function is self recursive. */
765 } inline_irg_env;
766
767 /**
768  * Allocate a new environment for inlining.
769  */
770 static inline_irg_env *alloc_inline_irg_env(void)
771 {
772         inline_irg_env *env    = OALLOC(&temp_obst, inline_irg_env);
773         INIT_LIST_HEAD(&env->calls);
774         env->local_weights     = NULL;
775         env->n_nodes           = -2; /* do not count count Start, End */
776         env->n_blocks          = -2; /* do not count count Start, End Block */
777         env->n_nodes_orig      = -2; /* do not count Start, End */
778         env->n_call_nodes      = 0;
779         env->n_call_nodes_orig = 0;
780         env->n_callers         = 0;
781         env->n_callers_orig    = 0;
782         env->got_inline        = 0;
783         env->recursive         = 0;
784         return env;
785 }
786
787 typedef struct walker_env {
788         inline_irg_env *x;     /**< the inline environment */
789         char ignore_runtime;   /**< the ignore runtime flag */
790         char ignore_callers;   /**< if set, do change callers data */
791 } wenv_t;
792
793 /**
794  * post-walker: collect all calls in the inline-environment
795  * of a graph and sum some statistics.
796  */
797 static void collect_calls2(ir_node *call, void *ctx)
798 {
799         wenv_t         *env = (wenv_t*)ctx;
800         inline_irg_env *x = env->x;
801         unsigned        code = get_irn_opcode(call);
802         ir_graph       *callee;
803         call_entry     *entry;
804
805         /* count meaningful nodes in irg */
806         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
807                 if (code != iro_Block) {
808                         ++x->n_nodes;
809                         ++x->n_nodes_orig;
810                 } else {
811                         ++x->n_blocks;
812                 }
813         }
814
815         if (code != iro_Call) return;
816
817         /* check, if it's a runtime call */
818         if (env->ignore_runtime) {
819                 ir_node *symc = get_Call_ptr(call);
820
821                 if (is_SymConst_addr_ent(symc)) {
822                         ir_entity *ent = get_SymConst_entity(symc);
823
824                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
825                                 return;
826                 }
827         }
828
829         /* collect all call nodes */
830         ++x->n_call_nodes;
831         ++x->n_call_nodes_orig;
832
833         callee = get_call_called_irg(call);
834         if (callee != NULL) {
835                 if (! env->ignore_callers) {
836                         inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
837                         /* count all static callers */
838                         ++callee_env->n_callers;
839                         ++callee_env->n_callers_orig;
840                 }
841                 if (callee == current_ir_graph)
842                         x->recursive = 1;
843
844                 /* link it in the list of possible inlinable entries */
845                 entry = OALLOC(&temp_obst, call_entry);
846                 entry->call       = call;
847                 entry->callee     = callee;
848                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
849                 entry->benefice   = 0;
850                 entry->local_adr  = 0;
851                 entry->all_const  = 0;
852
853                 list_add_tail(&entry->list, &x->calls);
854         }
855 }
856
857 /**
858  * Returns TRUE if the number of callers is 0 in the irg's environment,
859  * hence this irg is a leaf.
860  */
861 inline static int is_leaf(ir_graph *irg)
862 {
863         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
864         return env->n_call_nodes == 0;
865 }
866
867 /**
868  * Returns TRUE if the number of nodes in the callee is
869  * smaller then size in the irg's environment.
870  */
871 inline static int is_smaller(ir_graph *callee, unsigned size)
872 {
873         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
874         return env->n_nodes < size;
875 }
876
877 /**
878  * Duplicate a call entry.
879  *
880  * @param entry     the original entry to duplicate
881  * @param new_call  the new call node
882  * @param loop_depth_delta
883  *                  delta value for the loop depth
884  */
885 static call_entry *duplicate_call_entry(const call_entry *entry,
886                                         ir_node *new_call, int loop_depth_delta)
887 {
888         call_entry *nentry = OALLOC(&temp_obst, call_entry);
889         nentry->call       = new_call;
890         nentry->callee     = entry->callee;
891         nentry->benefice   = entry->benefice;
892         nentry->loop_depth = entry->loop_depth + loop_depth_delta;
893         nentry->local_adr  = entry->local_adr;
894         nentry->all_const  = entry->all_const;
895
896         return nentry;
897 }
898
899 /**
900  * Append all call nodes of the source environment to the nodes of in the destination
901  * environment.
902  *
903  * @param dst         destination environment
904  * @param src         source environment
905  * @param loop_depth  the loop depth of the call that is replaced by the src list
906  */
907 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth)
908 {
909         call_entry *nentry;
910
911         /* Note that the src list points to Call nodes in the inlined graph, but
912            we need Call nodes in our graph. Luckily the inliner leaves this information
913            in the link field. */
914         list_for_each_entry(call_entry, entry, &src->calls, list) {
915                 nentry = duplicate_call_entry(entry, (ir_node*)get_irn_link(entry->call), loop_depth);
916                 list_add_tail(&nentry->list, &dst->calls);
917         }
918         dst->n_call_nodes += src->n_call_nodes;
919         dst->n_nodes      += src->n_nodes;
920 }
921
922 /*
923  * Inlines small leaf methods at call sites where the called address comes
924  * from a Const node that references the entity representing the called
925  * method.
926  * The size argument is a rough measure for the code size of the method:
927  * Methods where the obstack containing the firm graph is smaller than
928  * size are inlined.
929  */
930 void inline_leaf_functions(unsigned maxsize, unsigned leafsize,
931                            unsigned size, int ignore_runtime)
932 {
933         inline_irg_env   *env;
934         ir_graph         *irg;
935         size_t           i, n_irgs;
936         ir_graph         *rem;
937         int              did_inline;
938         wenv_t           wenv;
939         pmap             *copied_graphs;
940         pmap_entry       *pm_entry;
941
942         rem = current_ir_graph;
943         obstack_init(&temp_obst);
944
945         /* a map for the copied graphs, used to inline recursive calls */
946         copied_graphs = pmap_create();
947
948         /* extend all irgs by a temporary data structure for inlining. */
949         n_irgs = get_irp_n_irgs();
950         for (i = 0; i < n_irgs; ++i)
951                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
952
953         /* Pre-compute information in temporary data structure. */
954         wenv.ignore_runtime = ignore_runtime;
955         wenv.ignore_callers = 0;
956         for (i = 0; i < n_irgs; ++i) {
957                 ir_graph *irg = get_irp_irg(i);
958
959                 assert(get_irg_phase_state(irg) != phase_building);
960                 free_callee_info(irg);
961
962                 assure_irg_properties(irg,
963                         IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO);
964                 wenv.x = (inline_irg_env*)get_irg_link(irg);
965                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
966                 confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_ALL);
967         }
968
969         /* -- and now inline. -- */
970
971         /* Inline leafs recursively -- we might construct new leafs. */
972         do {
973                 did_inline = 0;
974
975                 for (i = 0; i < n_irgs; ++i) {
976                         int phiproj_computed = 0;
977
978                         current_ir_graph = get_irp_irg(i);
979                         env              = (inline_irg_env*)get_irg_link(current_ir_graph);
980
981                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
982                         list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
983                                 if (env->n_nodes > maxsize)
984                                         break;
985
986                                 ir_node   *call   = entry->call;
987                                 ir_graph  *callee = entry->callee;
988                                 ir_entity *called = get_irg_entity(callee);
989                                 mtp_additional_properties props
990                                         = get_entity_additional_properties(called);
991                                 if (props & mtp_property_noinline)
992                                         continue;
993
994                                 if (is_leaf(callee) && (
995                                     is_smaller(callee, leafsize) || (props & mtp_property_always_inline))) {
996                                         if (!phiproj_computed) {
997                                                 phiproj_computed = 1;
998                                                 collect_phiprojs(current_ir_graph);
999                                         }
1000                                         did_inline = inline_method(call, callee);
1001
1002                                         if (did_inline) {
1003                                                 inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
1004
1005                                                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1006                                                 phiproj_computed = 0;
1007
1008                                                 /* Do some statistics */
1009                                                 env->got_inline = 1;
1010                                                 --env->n_call_nodes;
1011                                                 env->n_nodes += callee_env->n_nodes;
1012                                                 --callee_env->n_callers;
1013
1014                                                 /* remove this call from the list */
1015                                                 list_del(&entry->list);
1016                                                 continue;
1017                                         }
1018                                 }
1019                         }
1020                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1021                 }
1022         } while (did_inline);
1023
1024         /* inline other small functions. */
1025         for (i = 0; i < n_irgs; ++i) {
1026                 int phiproj_computed = 0;
1027
1028                 current_ir_graph = get_irp_irg(i);
1029                 env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1030
1031                 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1032
1033                 /* note that the list of possible calls is updated during the process */
1034                 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1035                         ir_node   *call   = entry->call;
1036                         ir_graph  *callee = entry->callee;
1037                         ir_entity *called = get_irg_entity(callee);
1038
1039                         mtp_additional_properties props = get_entity_additional_properties(called);
1040                         if (props & mtp_property_noinline)
1041                                 continue;
1042
1043                         ir_graph *calleee = pmap_get(ir_graph, copied_graphs, callee);
1044                         if (calleee != NULL) {
1045                                 /*
1046                                  * Remap callee if we have a copy.
1047                                  * FIXME: Should we do this only for recursive Calls ?
1048                                  */
1049                                 callee = calleee;
1050                         }
1051
1052                         if ((props & mtp_property_always_inline) ||
1053                             (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1054                                 if (current_ir_graph == callee) {
1055                                         /*
1056                                          * Recursive call: we cannot directly inline because we cannot walk
1057                                          * the graph and change it. So we have to make a copy of the graph
1058                                          * first.
1059                                          */
1060
1061                                         inline_irg_env *callee_env;
1062                                         ir_graph       *copy;
1063
1064                                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1065
1066                                         /*
1067                                          * No copy yet, create one.
1068                                          * Note that recursive methods are never leafs, so it is sufficient
1069                                          * to test this condition here.
1070                                          */
1071                                         copy = create_irg_copy(callee);
1072
1073                                         /* create_irg_copy() destroys the Proj links, recompute them */
1074                                         phiproj_computed = 0;
1075
1076                                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1077
1078                                         /* allocate new environment */
1079                                         callee_env = alloc_inline_irg_env();
1080                                         set_irg_link(copy, callee_env);
1081
1082                                         assure_irg_properties(copy,
1083                                                 IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO);
1084                                         wenv.x              = callee_env;
1085                                         wenv.ignore_callers = 1;
1086                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1087
1088                                         /*
1089                                          * Enter the entity of the original graph. This is needed
1090                                          * for inline_method(). However, note that ent->irg still points
1091                                          * to callee, NOT to copy.
1092                                          */
1093                                         set_irg_entity(copy, get_irg_entity(callee));
1094
1095                                         pmap_insert(copied_graphs, callee, copy);
1096                                         callee = copy;
1097
1098                                         /* we have only one caller: the original graph */
1099                                         callee_env->n_callers      = 1;
1100                                         callee_env->n_callers_orig = 1;
1101                                 }
1102                                 if (! phiproj_computed) {
1103                                         phiproj_computed = 1;
1104                                         collect_phiprojs(current_ir_graph);
1105                                 }
1106                                 did_inline = inline_method(call, callee);
1107                                 if (did_inline) {
1108                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1109
1110                                         /* call was inlined, Phi/Projs for current graph must be recomputed */
1111                                         phiproj_computed = 0;
1112
1113                                         /* callee was inline. Append its call list. */
1114                                         env->got_inline = 1;
1115                                         --env->n_call_nodes;
1116                                         append_call_list(env, callee_env, entry->loop_depth);
1117                                         --callee_env->n_callers;
1118
1119                                         /* after we have inlined callee, all called methods inside callee
1120                                            are now called once more */
1121                                         list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1122                                                 inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1123                                                 ++penv->n_callers;
1124                                         }
1125
1126                                         /* remove this call from the list */
1127                                         list_del(&entry->list);
1128                                         continue;
1129                                 }
1130                         }
1131                 }
1132                 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1133         }
1134
1135         for (i = 0; i < n_irgs; ++i) {
1136                 irg = get_irp_irg(i);
1137                 env = (inline_irg_env*)get_irg_link(irg);
1138
1139                 if (env->got_inline) {
1140                         optimize_graph_df(irg);
1141                         optimize_cf(irg);
1142                 }
1143                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1144                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1145                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1146                         env->n_callers_orig, env->n_callers,
1147                         get_entity_name(get_irg_entity(irg))));
1148                 }
1149         }
1150
1151         /* kill the copied graphs: we don't need them anymore */
1152         foreach_pmap(copied_graphs, pm_entry) {
1153                 ir_graph *copy = (ir_graph*)pm_entry->value;
1154
1155                 /* reset the entity, otherwise it will be deleted in the next step ... */
1156                 set_irg_entity(copy, NULL);
1157                 free_ir_graph(copy);
1158         }
1159         pmap_destroy(copied_graphs);
1160
1161         obstack_free(&temp_obst, NULL);
1162         current_ir_graph = rem;
1163 }
1164
1165 typedef struct inline_leaf_functions_pass_t {
1166         ir_prog_pass_t pass;
1167         unsigned       maxsize;
1168         unsigned       leafsize;
1169         unsigned       size;
1170         int            ignore_runtime;
1171 } inline_leaf_functions_pass_t;
1172
1173 /**
1174  * Wrapper to run inline_leaf_functions() as a ir_prog pass.
1175  */
1176 static int inline_leaf_functions_wrapper(ir_prog *irp, void *context)
1177 {
1178         inline_leaf_functions_pass_t *pass = (inline_leaf_functions_pass_t*)context;
1179
1180         (void)irp;
1181         inline_leaf_functions(
1182                 pass->maxsize, pass->leafsize,
1183                 pass->size, pass->ignore_runtime);
1184         return 0;
1185 }
1186
1187 /* create a pass for inline_leaf_functions() */
1188 ir_prog_pass_t *inline_leaf_functions_pass(
1189         const char *name, unsigned maxsize, unsigned leafsize,
1190         unsigned size, int ignore_runtime)
1191 {
1192         inline_leaf_functions_pass_t *pass = XMALLOCZ(inline_leaf_functions_pass_t);
1193
1194         pass->maxsize        = maxsize;
1195         pass->leafsize       = leafsize;
1196         pass->size           = size;
1197         pass->ignore_runtime = ignore_runtime;
1198
1199         return def_prog_pass_constructor(
1200                 &pass->pass,
1201                 name ? name : "inline_leaf_functions",
1202                 inline_leaf_functions_wrapper);
1203 }
1204
1205 /**
1206  * Calculate the parameter weights for transmitting the address of a local variable.
1207  */
1208 static unsigned calc_method_local_weight(ir_node *arg)
1209 {
1210         int      j;
1211         unsigned v, weight = 0;
1212
1213         for (unsigned i = get_irn_n_outs(arg); i-- > 0; ) {
1214                 ir_node *succ = get_irn_out(arg, i);
1215
1216                 switch (get_irn_opcode(succ)) {
1217                 case iro_Load:
1218                 case iro_Store:
1219                         /* Loads and Store can be removed */
1220                         weight += 3;
1221                         break;
1222                 case iro_Sel:
1223                         /* check if all args are constant */
1224                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1225                                 ir_node *idx = get_Sel_index(succ, j);
1226                                 if (! is_Const(idx))
1227                                         return 0;
1228                         }
1229                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1230                            some unsupported node. */
1231                         v = calc_method_local_weight(succ);
1232                         if (v == 0)
1233                                 return 0;
1234                         /* we can kill one Sel with constant indexes, this is cheap */
1235                         weight += v + 1;
1236                         break;
1237                 case iro_Id:
1238                         /* when looking backward we might find Id nodes */
1239                         weight += calc_method_local_weight(succ);
1240                         break;
1241                 case iro_Tuple:
1242                         /* unoptimized tuple */
1243                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1244                                 ir_node *pred = get_Tuple_pred(succ, j);
1245                                 if (pred == arg) {
1246                                         /* look for Proj(j) */
1247                                         for (unsigned k = get_irn_n_outs(succ); k-- > 0; ) {
1248                                                 ir_node *succ_succ = get_irn_out(succ, k);
1249                                                 if (is_Proj(succ_succ)) {
1250                                                         if (get_Proj_proj(succ_succ) == j) {
1251                                                                 /* found */
1252                                                                 weight += calc_method_local_weight(succ_succ);
1253                                                         }
1254                                                 } else {
1255                                                         /* this should NOT happen */
1256                                                         return 0;
1257                                                 }
1258                                         }
1259                                 }
1260                         }
1261                         break;
1262                 default:
1263                         /* any other node: unsupported yet or bad. */
1264                         return 0;
1265                 }
1266         }
1267         return weight;
1268 }
1269
1270 /**
1271  * Calculate the parameter weights for transmitting the address of a local variable.
1272  */
1273 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg)
1274 {
1275         ir_entity *ent = get_irg_entity(irg);
1276         ir_type  *mtp;
1277         size_t   nparams;
1278         long     proj_nr;
1279         ir_node  *irg_args, *arg;
1280
1281         mtp      = get_entity_type(ent);
1282         nparams  = get_method_n_params(mtp);
1283
1284         /* allocate a new array. currently used as 'analysed' flag */
1285         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1286
1287         /* If the method haven't parameters we have nothing to do. */
1288         if (nparams <= 0)
1289                 return;
1290
1291         assure_irg_outs(irg);
1292         irg_args = get_irg_args(irg);
1293         for (unsigned i = get_irn_n_outs(irg_args); i-- > 0; ) {
1294                 arg     = get_irn_out(irg_args, i);
1295                 proj_nr = get_Proj_proj(arg);
1296                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1297         }
1298 }
1299
1300 /**
1301  * Calculate the benefice for transmitting an local variable address.
1302  * After inlining, the local variable might be transformed into a
1303  * SSA variable by scalar_replacement().
1304  */
1305 static unsigned get_method_local_adress_weight(ir_graph *callee, size_t pos)
1306 {
1307         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
1308
1309         if (env->local_weights == NULL)
1310                 analyze_irg_local_weights(env, callee);
1311
1312         if (pos < ARR_LEN(env->local_weights))
1313                 return env->local_weights[pos];
1314         return 0;
1315 }
1316
1317 /**
1318  * Calculate a benefice value for inlining the given call.
1319  *
1320  * @param call       the call node we have to inspect
1321  * @param callee     the called graph
1322  */
1323 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
1324 {
1325         ir_node   *call = entry->call;
1326         ir_entity *ent  = get_irg_entity(callee);
1327         ir_type   *callee_frame;
1328         size_t    i, n_members, n_params;
1329         ir_node   *frame_ptr;
1330         ir_type   *mtp;
1331         int       weight = 0;
1332         int       all_const;
1333         unsigned  cc, v;
1334
1335         inline_irg_env *callee_env;
1336
1337         mtp_additional_properties props = get_entity_additional_properties(ent);
1338         if (props & mtp_property_noinline) {
1339                 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
1340                     call, callee));
1341                 return entry->benefice = INT_MIN;
1342         }
1343
1344         callee_frame = get_irg_frame_type(callee);
1345         n_members = get_class_n_members(callee_frame);
1346         for (i = 0; i < n_members; ++i) {
1347                 ir_entity *frame_ent = get_class_member(callee_frame, i);
1348                 if (is_parameter_entity(frame_ent)) {
1349                         // TODO inliner should handle parameter entities by inserting Store operations
1350                         DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden due to parameter entity\n", call, callee));
1351                         add_entity_additional_properties(ent, mtp_property_noinline);
1352                         return entry->benefice = INT_MIN;
1353                 }
1354         }
1355
1356         if (props & mtp_property_noreturn) {
1357                 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
1358                     call, callee));
1359                 return entry->benefice = INT_MIN;
1360         }
1361
1362         /* costs for every passed parameter */
1363         n_params = get_Call_n_params(call);
1364         mtp      = get_entity_type(ent);
1365         cc       = get_method_calling_convention(mtp);
1366         if (cc & cc_reg_param) {
1367                 /* register parameter, smaller costs for register parameters */
1368                 size_t max_regs = cc & ~cc_bits;
1369
1370                 if (max_regs < n_params)
1371                         weight += max_regs * 2 + (n_params - max_regs) * 5;
1372                 else
1373                         weight += n_params * 2;
1374         } else {
1375                 /* parameters are passed an stack */
1376                 weight += 5 * n_params;
1377         }
1378
1379         /* constant parameters improve the benefice */
1380         frame_ptr = get_irg_frame(current_ir_graph);
1381         all_const = 1;
1382         for (i = 0; i < n_params; ++i) {
1383                 ir_node *param = get_Call_param(call, i);
1384
1385                 if (is_Const(param)) {
1386                         weight += get_method_param_weight(ent, i);
1387                 } else {
1388                         all_const = 0;
1389                         if (is_SymConst(param))
1390                                 weight += get_method_param_weight(ent, i);
1391                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
1392                                 /*
1393                                  * An address of a local variable is transmitted. After
1394                                  * inlining, scalar_replacement might be able to remove the
1395                                  * local variable, so honor this.
1396                                  */
1397                                 v = get_method_local_adress_weight(callee, i);
1398                                 weight += v;
1399                                 if (v > 0)
1400                                         entry->local_adr = 1;
1401                         }
1402                 }
1403         }
1404         entry->all_const = all_const;
1405
1406         callee_env = (inline_irg_env*)get_irg_link(callee);
1407         if (callee_env->n_callers == 1 &&
1408             callee != current_ir_graph &&
1409             !entity_is_externally_visible(ent)) {
1410                 weight += 700;
1411         }
1412
1413         /* give a bonus for functions with one block */
1414         if (callee_env->n_blocks == 1)
1415                 weight = weight * 3 / 2;
1416
1417         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
1418         if (callee_env->n_nodes < 30 && !callee_env->recursive)
1419                 weight += 2000;
1420
1421         /* and finally for leafs: they do not increase the register pressure
1422            because of callee safe registers */
1423         if (callee_env->n_call_nodes == 0)
1424                 weight += 400;
1425
1426         /** it's important to inline inner loops first */
1427         if (entry->loop_depth > 30)
1428                 weight += 30 * 1024;
1429         else
1430                 weight += entry->loop_depth * 1024;
1431
1432         /*
1433          * All arguments constant is probably a good sign, give an extra bonus
1434          */
1435         if (all_const)
1436                 weight += 1024;
1437
1438         return entry->benefice = weight;
1439 }
1440
1441 typedef struct walk_env_t {
1442         ir_graph **irgs;
1443         size_t   last_irg;
1444 } walk_env_t;
1445
1446 /**
1447  * Callgraph walker, collect all visited graphs.
1448  */
1449 static void callgraph_walker(ir_graph *irg, void *data)
1450 {
1451         walk_env_t *env = (walk_env_t *)data;
1452         env->irgs[env->last_irg++] = irg;
1453 }
1454
1455 /**
1456  * Creates an inline order for all graphs.
1457  *
1458  * @return the list of graphs.
1459  */
1460 static ir_graph **create_irg_list(void)
1461 {
1462         ir_entity  **free_methods;
1463         size_t     n_irgs = get_irp_n_irgs();
1464         walk_env_t env;
1465
1466         cgana(&free_methods);
1467         xfree(free_methods);
1468
1469         compute_callgraph();
1470
1471         env.irgs     = XMALLOCNZ(ir_graph*, n_irgs);
1472         env.last_irg = 0;
1473
1474         callgraph_walk(NULL, callgraph_walker, &env);
1475         assert(n_irgs == env.last_irg);
1476
1477         free_callgraph();
1478
1479         return env.irgs;
1480 }
1481
1482 /**
1483  * Push a call onto the priority list if its benefice is big enough.
1484  *
1485  * @param pqueue   the priority queue of calls
1486  * @param call     the call entry
1487  * @param inlien_threshold
1488  *                 the threshold value
1489  */
1490 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
1491                             int inline_threshold)
1492 {
1493         ir_graph *callee   = call->callee;
1494         int       benefice = calc_inline_benefice(call, callee);
1495
1496         DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
1497             get_irn_irg(call->call), call->call, callee, benefice));
1498
1499         ir_entity                *ent   = get_irg_entity(callee);
1500         mtp_additional_properties props = get_entity_additional_properties(ent);
1501         if (!(props & mtp_property_always_inline) && benefice < inline_threshold) {
1502                 return;
1503         }
1504
1505         pqueue_put(pqueue, call, benefice);
1506 }
1507
1508 /**
1509  * Try to inline calls into a graph.
1510  *
1511  * @param irg      the graph into which we inline
1512  * @param maxsize  do NOT inline if the size of irg gets
1513  *                 bigger than this amount
1514  * @param inline_threshold
1515  *                 threshold value for inline decision
1516  * @param copied_graphs
1517  *                 map containing copied of recursive graphs
1518  */
1519 static void inline_into(ir_graph *irg, unsigned maxsize,
1520                         int inline_threshold, pmap *copied_graphs)
1521 {
1522         int            phiproj_computed = 0;
1523         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
1524         wenv_t         wenv;
1525         pqueue_t       *pqueue;
1526
1527         if (env->n_call_nodes == 0)
1528                 return;
1529
1530         if (env->n_nodes > maxsize) {
1531                 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
1532                 return;
1533         }
1534
1535         current_ir_graph = irg;
1536         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1537
1538         /* put irgs into the pqueue */
1539         pqueue = new_pqueue();
1540
1541         list_for_each_entry(call_entry, curr_call, &env->calls, list) {
1542                 assert(is_Call(curr_call->call));
1543                 maybe_push_call(pqueue, curr_call, inline_threshold);
1544         }
1545
1546         /* note that the list of possible calls is updated during the process */
1547         while (!pqueue_empty(pqueue)) {
1548                 int                 did_inline;
1549                 call_entry          *curr_call  = (call_entry*)pqueue_pop_front(pqueue);
1550                 ir_graph            *callee     = curr_call->callee;
1551                 ir_node             *call_node  = curr_call->call;
1552                 inline_irg_env      *callee_env = (inline_irg_env*)get_irg_link(callee);
1553                 ir_entity           *ent        = get_irg_entity(callee);
1554                 mtp_additional_properties props
1555                         = get_entity_additional_properties(ent);
1556                 ir_graph            *calleee;
1557                 int                 loop_depth;
1558
1559                 if (!(props & mtp_property_always_inline)
1560                     && env->n_nodes + callee_env->n_nodes > maxsize) {
1561                         DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
1562                                                 env->n_nodes, callee, callee_env->n_nodes));
1563                         continue;
1564                 }
1565
1566                 calleee = pmap_get(ir_graph, copied_graphs, callee);
1567                 if (calleee != NULL) {
1568                         int benefice = curr_call->benefice;
1569                         /*
1570                          * Reduce the weight for recursive function IFF not all arguments are const.
1571                          * inlining recursive functions is rarely good.
1572                          */
1573                         if (!curr_call->all_const)
1574                                 benefice -= 2000;
1575                         if (benefice < inline_threshold)
1576                                 continue;
1577
1578                         /*
1579                          * Remap callee if we have a copy.
1580                          */
1581                         callee     = calleee;
1582                         callee_env = (inline_irg_env*)get_irg_link(callee);
1583                 }
1584
1585                 if (current_ir_graph == callee) {
1586                         /*
1587                          * Recursive call: we cannot directly inline because we cannot
1588                          * walk the graph and change it. So we have to make a copy of
1589                          * the graph first.
1590                          */
1591                         int benefice = curr_call->benefice;
1592                         ir_graph *copy;
1593
1594                         /*
1595                          * Reduce the weight for recursive function IFF not all arguments are const.
1596                          * inlining recursive functions is rarely good.
1597                          */
1598                         if (!curr_call->all_const)
1599                                 benefice -= 2000;
1600                         if (benefice < inline_threshold)
1601                                 continue;
1602
1603                         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1604
1605                         /*
1606                          * No copy yet, create one.
1607                          * Note that recursive methods are never leafs, so it is
1608                          * sufficient to test this condition here.
1609                          */
1610                         copy = create_irg_copy(callee);
1611
1612                         /* create_irg_copy() destroys the Proj links, recompute them */
1613                         phiproj_computed = 0;
1614
1615                         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1616
1617                         /* allocate a new environment */
1618                         callee_env = alloc_inline_irg_env();
1619                         set_irg_link(copy, callee_env);
1620
1621                         assure_irg_properties(copy, IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO);
1622                         memset(&wenv, 0, sizeof(wenv));
1623                         wenv.x              = callee_env;
1624                         wenv.ignore_callers = 1;
1625                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1626
1627                         /*
1628                          * Enter the entity of the original graph. This is needed
1629                          * for inline_method(). However, note that ent->irg still points
1630                          * to callee, NOT to copy.
1631                          */
1632                         set_irg_entity(copy, get_irg_entity(callee));
1633
1634                         pmap_insert(copied_graphs, callee, copy);
1635                         callee = copy;
1636
1637                         /* we have only one caller: the original graph */
1638                         callee_env->n_callers      = 1;
1639                         callee_env->n_callers_orig = 1;
1640                 }
1641                 if (! phiproj_computed) {
1642                         phiproj_computed = 1;
1643                         collect_phiprojs(current_ir_graph);
1644                 }
1645                 did_inline = inline_method(call_node, callee);
1646                 if (!did_inline)
1647                         continue;
1648
1649                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1650                 phiproj_computed = 0;
1651
1652                 /* remove it from the caller list */
1653                 list_del(&curr_call->list);
1654
1655                 /* callee was inline. Append its call list. */
1656                 env->got_inline = 1;
1657                 --env->n_call_nodes;
1658
1659                 /* we just generate a bunch of new calls */
1660                 loop_depth = curr_call->loop_depth;
1661                 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1662                         inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1663                         ir_node        *new_call;
1664                         call_entry     *new_entry;
1665
1666                         /* after we have inlined callee, all called methods inside
1667                          * callee are now called once more */
1668                         ++penv->n_callers;
1669
1670                         /* Note that the src list points to Call nodes in the inlined graph,
1671                          * but we need Call nodes in our graph. Luckily the inliner leaves
1672                          * this information in the link field. */
1673                         new_call = (ir_node*)get_irn_link(centry->call);
1674                         if (get_irn_irg(new_call) != irg) {
1675                                 /* centry->call has not been copied, which means it is dead.
1676                                  * This might happen during inlining, if a const function,
1677                                  * which cannot be inlined is only used as an unused argument
1678                                  * of another function, which is inlined. */
1679                                 continue;
1680                         }
1681                         assert(is_Call(new_call));
1682
1683                         new_entry = duplicate_call_entry(centry, new_call, loop_depth);
1684                         list_add_tail(&new_entry->list, &env->calls);
1685                         maybe_push_call(pqueue, new_entry, inline_threshold);
1686                 }
1687
1688                 env->n_call_nodes += callee_env->n_call_nodes;
1689                 env->n_nodes += callee_env->n_nodes;
1690                 --callee_env->n_callers;
1691         }
1692         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1693         del_pqueue(pqueue);
1694 }
1695
1696 /*
1697  * Heuristic inliner. Calculates a benefice value for every call and inlines
1698  * those calls with a value higher than the threshold.
1699  */
1700 void inline_functions(unsigned maxsize, int inline_threshold,
1701                       opt_ptr after_inline_opt)
1702 {
1703         inline_irg_env   *env;
1704         size_t           i, n_irgs;
1705         ir_graph         *rem;
1706         wenv_t           wenv;
1707         pmap             *copied_graphs;
1708         pmap_entry       *pm_entry;
1709         ir_graph         **irgs;
1710
1711         rem = current_ir_graph;
1712         obstack_init(&temp_obst);
1713
1714         irgs = create_irg_list();
1715
1716         /* a map for the copied graphs, used to inline recursive calls */
1717         copied_graphs = pmap_create();
1718
1719         /* extend all irgs by a temporary data structure for inlining. */
1720         n_irgs = get_irp_n_irgs();
1721         for (i = 0; i < n_irgs; ++i)
1722                 set_irg_link(irgs[i], alloc_inline_irg_env());
1723
1724         /* Pre-compute information in temporary data structure. */
1725         wenv.ignore_runtime = 0;
1726         wenv.ignore_callers = 0;
1727         for (i = 0; i < n_irgs; ++i) {
1728                 ir_graph *irg = irgs[i];
1729
1730                 free_callee_info(irg);
1731
1732                 wenv.x = (inline_irg_env*)get_irg_link(irg);
1733                 assure_loopinfo(irg);
1734                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1735         }
1736
1737         /* -- and now inline. -- */
1738         for (i = 0; i < n_irgs; ++i) {
1739                 ir_graph *irg = irgs[i];
1740
1741                 inline_into(irg, maxsize, inline_threshold, copied_graphs);
1742         }
1743
1744         for (i = 0; i < n_irgs; ++i) {
1745                 ir_graph *irg = irgs[i];
1746
1747                 env = (inline_irg_env*)get_irg_link(irg);
1748                 if (env->got_inline && after_inline_opt != NULL) {
1749                         /* this irg got calls inlined: optimize it */
1750                         after_inline_opt(irg);
1751                 }
1752                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1753                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1754                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1755                         env->n_callers_orig, env->n_callers,
1756                         get_entity_name(get_irg_entity(irg))));
1757                 }
1758         }
1759
1760         /* kill the copied graphs: we don't need them anymore */
1761         foreach_pmap(copied_graphs, pm_entry) {
1762                 ir_graph *copy = (ir_graph*)pm_entry->value;
1763
1764                 /* reset the entity, otherwise it will be deleted in the next step ... */
1765                 set_irg_entity(copy, NULL);
1766                 free_ir_graph(copy);
1767         }
1768         pmap_destroy(copied_graphs);
1769
1770         xfree(irgs);
1771
1772         obstack_free(&temp_obst, NULL);
1773         current_ir_graph = rem;
1774 }
1775
1776 typedef struct inline_functions_pass_t {
1777         ir_prog_pass_t pass;
1778         unsigned       maxsize;
1779         int            inline_threshold;
1780         opt_ptr        after_inline_opt;
1781 } inline_functions_pass_t;
1782
1783 /**
1784  * Wrapper to run inline_functions() as a ir_prog pass.
1785  */
1786 static int inline_functions_wrapper(ir_prog *irp, void *context)
1787 {
1788         inline_functions_pass_t *pass = (inline_functions_pass_t*)context;
1789
1790         (void)irp;
1791         inline_functions(pass->maxsize, pass->inline_threshold,
1792                          pass->after_inline_opt);
1793         return 0;
1794 }
1795
1796 /* create a ir_prog pass for inline_functions */
1797 ir_prog_pass_t *inline_functions_pass(
1798           const char *name, unsigned maxsize, int inline_threshold,
1799           opt_ptr after_inline_opt)
1800 {
1801         inline_functions_pass_t *pass = XMALLOCZ(inline_functions_pass_t);
1802
1803         pass->maxsize          = maxsize;
1804         pass->inline_threshold = inline_threshold;
1805         pass->after_inline_opt = after_inline_opt;
1806
1807         return def_prog_pass_constructor(
1808                 &pass->pass, name ? name : "inline_functions",
1809                 inline_functions_wrapper);
1810 }
1811
1812 void firm_init_inline(void)
1813 {
1814         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
1815 }