ia32: Do not ignore the floating point control word anymore and make it callee-save.
[libfirm] / ir / opt / opt_inline.c
1 /*
2  * Copyright (C) 1995-2011 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief    Dead node elimination and Procedure Inlining.
23  * @author   Michael Beck, Goetz Lindenmaier
24  */
25 #include "config.h"
26
27 #include <limits.h>
28 #include <stdbool.h>
29 #include <assert.h>
30
31 #include "irnode_t.h"
32 #include "irgraph_t.h"
33 #include "irprog_t.h"
34
35 #include "iroptimize.h"
36 #include "ircons_t.h"
37 #include "iropt_t.h"
38 #include "irgopt.h"
39 #include "irgmod.h"
40 #include "irgwalk.h"
41
42 #include "array_t.h"
43 #include "list.h"
44 #include "pset.h"
45 #include "pmap.h"
46 #include "pdeq.h"
47 #include "xmalloc.h"
48 #include "pqueue.h"
49
50 #include "irouts.h"
51 #include "irloop_t.h"
52 #include "irbackedge_t.h"
53 #include "opt_init.h"
54 #include "cgana.h"
55 #include "trouts.h"
56 #include "error.h"
57
58 #include "analyze_irg_args.h"
59 #include "iredges_t.h"
60 #include "irflag_t.h"
61 #include "irhooks.h"
62 #include "irtools.h"
63 #include "iropt_dbg.h"
64 #include "irpass_t.h"
65 #include "irnodemap.h"
66
67 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
68
69 /*------------------------------------------------------------------*/
70 /* Routines for dead node elimination / copying garbage collection  */
71 /* of the obstack.                                                  */
72 /*------------------------------------------------------------------*/
73
74 /**
75  * Remember the new node in the old node by using a field all nodes have.
76  */
77 static void set_new_node(ir_node *node, ir_node *new_node)
78 {
79         set_irn_link(node, new_node);
80 }
81
82 /**
83  * Get this new node, before the old node is forgotten.
84  */
85 static inline ir_node *get_new_node(ir_node *old_node)
86 {
87         assert(irn_visited(old_node));
88         return (ir_node*) get_irn_link(old_node);
89 }
90
91 /*--------------------------------------------------------------------*/
92 /*  Functionality for inlining                                         */
93 /*--------------------------------------------------------------------*/
94
95 /**
96  * Copy node for inlineing.  Updates attributes that change when
97  * inlineing but not for dead node elimination.
98  *
99  * Copies the node by calling copy_node() and then updates the entity if
100  * it's a local one.  env must be a pointer of the frame type of the
101  * inlined procedure. The new entities must be in the link field of
102  * the entities.
103  */
104 static void copy_node_inline(ir_node *node, void *env)
105 {
106         ir_graph *new_irg  = (ir_graph*) env;
107         ir_node  *new_node = irn_copy_into_irg(node, new_irg);
108
109         set_new_node(node, new_node);
110         if (is_Sel(node)) {
111                 ir_graph  *old_irg        = get_irn_irg(node);
112                 ir_type   *old_frame_type = get_irg_frame_type(old_irg);
113                 ir_entity *old_entity     = get_Sel_entity(node);
114                 assert(is_Sel(new_node));
115                 /* use copied entities from the new frame */
116                 if (get_entity_owner(old_entity) == old_frame_type) {
117                         ir_entity *new_entity = (ir_entity*)get_entity_link(old_entity);
118                         assert(new_entity != NULL);
119                         set_Sel_entity(new_node, new_entity);
120                 }
121         } else if (is_Block(new_node)) {
122                 new_node->attr.block.irg.irg = new_irg;
123         }
124 }
125
126 static void set_preds_inline(ir_node *node, void *env)
127 {
128         ir_node *new_node;
129
130         irn_rewire_inputs(node);
131
132         /* move constants into start block */
133         new_node = get_new_node(node);
134         if (is_irn_constlike(new_node)) {
135                 ir_graph *new_irg     = (ir_graph *) env;
136                 ir_node  *start_block = get_irg_start_block(new_irg);
137                 set_nodes_block(new_node, start_block);
138         }
139 }
140
141 /**
142  * Walker: checks if P_value_arg_base is used.
143  */
144 static void find_addr(ir_node *node, void *env)
145 {
146         bool *allow_inline = (bool*)env;
147
148         if (is_Block(node) && get_Block_entity(node)) {
149                 /**
150                  * Currently we can't handle blocks whose address was taken correctly
151                  * when inlining
152                  */
153                 *allow_inline = false;
154         } else if (is_Sel(node)) {
155                 ir_graph *irg = current_ir_graph;
156                 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
157                         /* access to frame */
158                         ir_entity *ent = get_Sel_entity(node);
159                         if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
160                                 /* access to value_type */
161                                 *allow_inline = false;
162                         }
163                         if (is_parameter_entity(ent)) {
164                                 *allow_inline = false;
165                         }
166                 }
167         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
168                 /* From GCC:
169                  * Refuse to inline alloca call unless user explicitly forced so as this
170                  * may change program's memory overhead drastically when the function
171                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
172                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
173                  *
174                  * Sorrily this is true with our implementation also.
175                  * Moreover, we cannot differentiate between alloca() and VLA yet, so
176                  * this disables inlining of functions using VLA (which are completely
177                  * save).
178                  *
179                  * 2 Solutions:
180                  * - add a flag to the Alloc node for "real" alloca() calls
181                  * - add a new Stack-Restore node at the end of a function using
182                  *   alloca()
183                  */
184                 *allow_inline = false;
185         }
186 }
187
188 /**
189  * Check if we can inline a given call.
190  * Currently, we cannot inline two cases:
191  * - call with compound arguments
192  * - graphs that take the address of a parameter
193  *
194  * check these conditions here
195  */
196 static bool can_inline(ir_node *call, ir_graph *called_graph)
197 {
198         ir_entity          *called      = get_irg_entity(called_graph);
199         ir_type            *called_type = get_entity_type(called);
200         ir_type            *call_type   = get_Call_type(call);
201         size_t              n_params    = get_method_n_params(called_type);
202         size_t              n_arguments = get_method_n_params(call_type);
203         size_t              n_res       = get_method_n_ress(called_type);
204         mtp_additional_properties props = get_entity_additional_properties(called);
205         size_t              i;
206         bool                res;
207
208         if (props & mtp_property_noinline)
209                 return false;
210
211         if (n_arguments != n_params) {
212                 /* this is a bad feature of C: without a prototype, we can
213                  * call a function with less parameters than needed. Currently
214                  * we don't support this, although we could use Unknown than. */
215                 return false;
216         }
217         if (n_res != get_method_n_ress(call_type)) {
218                 return false;
219         }
220
221         /* Argh, compiling C has some bad consequences:
222          * It is implementation dependent what happens in that case.
223          * We support inlining, if the bitsize of the types matches AND
224          * the same arithmetic is used. */
225         for (i = 0; i < n_params; ++i) {
226                 ir_type *param_tp = get_method_param_type(called_type, i);
227                 ir_type *arg_tp   = get_method_param_type(call_type, i);
228
229                 if (param_tp != arg_tp) {
230                         ir_mode *pmode = get_type_mode(param_tp);
231                         ir_mode *amode = get_type_mode(arg_tp);
232
233                         if (pmode == NULL || amode == NULL)
234                                 return false;
235                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
236                                 return false;
237                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
238                                 return false;
239                         /* otherwise we can simply "reinterpret" the bits */
240                 }
241         }
242         for (i = 0; i < n_res; ++i) {
243                 ir_type *decl_res_tp = get_method_res_type(called_type, i);
244                 ir_type *used_res_tp = get_method_res_type(call_type, i);
245
246                 if (decl_res_tp != used_res_tp) {
247                         ir_mode *decl_mode = get_type_mode(decl_res_tp);
248                         ir_mode *used_mode = get_type_mode(used_res_tp);
249                         if (decl_mode == NULL || used_mode == NULL)
250                                 return false;
251                         if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
252                                 return false;
253                         if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
254                                 return false;
255                         /* otherwise we can "reinterpret" the bits */
256                 }
257         }
258
259         /* check parameters for compound arguments */
260         for (i = 0; i < n_params; ++i) {
261                 ir_type *p_type = get_method_param_type(call_type, i);
262
263                 if (is_compound_type(p_type))
264                         return false;
265         }
266
267         /* check results for compound arguments */
268         for (i = 0; i < n_res; ++i) {
269                 ir_type *r_type = get_method_res_type(call_type, i);
270
271                 if (is_compound_type(r_type))
272                         return false;
273         }
274
275         res = true;
276         irg_walk_graph(called_graph, find_addr, NULL, &res);
277
278         return res;
279 }
280
281 enum exc_mode {
282         exc_handler,    /**< There is a handler. */
283         exc_no_handler  /**< Exception handling not represented. */
284 };
285
286 /**
287  * copy all entities on the stack frame on 1 irg to the stackframe of another.
288  * Sets entity links of the old entities to the copies
289  */
290 static void copy_frame_entities(ir_graph *from, ir_graph *to)
291 {
292         ir_type *from_frame = get_irg_frame_type(from);
293         ir_type *to_frame   = get_irg_frame_type(to);
294         size_t   n_members  = get_class_n_members(from_frame);
295         size_t   i;
296         assert(from_frame != to_frame);
297
298         for (i = 0; i < n_members; ++i) {
299                 ir_entity *old_ent = get_class_member(from_frame, i);
300                 ir_entity *new_ent = copy_entity_own(old_ent, to_frame);
301                 set_entity_link(old_ent, new_ent);
302                 assert (!is_parameter_entity(old_ent));
303         }
304 }
305
306 /* Inlines a method at the given call site. */
307 int inline_method(ir_node *call, ir_graph *called_graph)
308 {
309         /* we cannot inline some types of calls */
310         if (! can_inline(call, called_graph))
311                 return 0;
312
313         /* We cannot inline a recursive call. The graph must be copied before
314          * the call the inline_method() using create_irg_copy(). */
315         ir_graph *irg = get_irn_irg(call);
316         if (called_graph == irg)
317                 return 0;
318
319         ir_entity *ent      = get_irg_entity(called_graph);
320         ir_type   *mtp      = get_entity_type(ent);
321         ir_type   *ctp      = get_Call_type(call);
322         int        n_params = get_method_n_params(mtp);
323
324         ir_graph *rem = current_ir_graph;
325         current_ir_graph = irg;
326
327         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
328
329         /* optimizations can cause problems when allocating new nodes */
330         int rem_opt = get_opt_optimize();
331         set_optimize(0);
332
333         /* Handle graph state */
334         assert(get_irg_pinned(irg) == op_pin_state_pinned);
335         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
336         clear_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE
337                            | IR_GRAPH_PROPERTY_CONSISTENT_ENTITY_USAGE);
338         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
339         clear_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_ENTITY_USAGE);
340         edges_deactivate(irg);
341
342         /* here we know we WILL inline, so inform the statistics */
343         hook_inline(call, called_graph);
344
345         /* -- Decide how to handle exception control flow: Is there a handler
346            for the Call node, or do we branch directly to End on an exception?
347            exc_handling:
348            0 There is a handler.
349            2 Exception handling not represented in Firm. -- */
350         ir_node *Xproj = NULL;
351         for (ir_node *proj = (ir_node*)get_irn_link(call); proj != NULL;
352                  proj = (ir_node*)get_irn_link(proj)) {
353                 long proj_nr = get_Proj_proj(proj);
354                 if (proj_nr == pn_Call_X_except) Xproj = proj;
355         }
356         enum exc_mode exc_handling = Xproj != NULL ? exc_handler : exc_no_handler;
357
358         /* create the argument tuple */
359         ir_node **args_in = ALLOCAN(ir_node*, n_params);
360
361         ir_node *block = get_nodes_block(call);
362         for (int i = n_params - 1; i >= 0; --i) {
363                 ir_node *arg      = get_Call_param(call, i);
364                 ir_type *param_tp = get_method_param_type(mtp, i);
365                 ir_mode *mode     = get_type_mode(param_tp);
366
367                 if (mode != get_irn_mode(arg)) {
368                         arg = new_r_Conv(block, arg, mode);
369                 }
370                 args_in[i] = arg;
371         }
372
373         /* the procedure and later replaces the Start node of the called graph.
374          * Post_call is the old Call node and collects the results of the called
375          * graph. Both will end up being a tuple. */
376         ir_node *post_bl = get_nodes_block(call);
377         /* XxMxPxPxPxT of Start + parameter of Call */
378         ir_node *in[pn_Start_max+1];
379         in[pn_Start_M]              = get_Call_mem(call);
380         in[pn_Start_X_initial_exec] = new_r_Jmp(post_bl);
381         in[pn_Start_P_frame_base]   = get_irg_frame(irg);
382         in[pn_Start_T_args]         = new_r_Tuple(post_bl, n_params, args_in);
383         ir_node *pre_call = new_r_Tuple(post_bl, pn_Start_max+1, in);
384         ir_node *post_call = call;
385
386         /* --
387            The new block gets the ins of the old block, pre_call and all its
388            predecessors and all Phi nodes. -- */
389         part_block(pre_call);
390
391         /* increment visited flag for later walk */
392         inc_irg_visited(called_graph);
393
394         /* link some nodes to nodes in the current graph so instead of copying
395          * the linked nodes will get used.
396          * So the copier will use the created Tuple instead of copying the start
397          * node, similar for singleton nodes like NoMem and Bad.
398          * Note: this will prohibit predecessors to be copied - only do it for
399          *       nodes without predecessors */
400         ir_node *start_block = get_irg_start_block(called_graph);
401         set_new_node(start_block, get_nodes_block(pre_call));
402         mark_irn_visited(start_block);
403
404         ir_node *start = get_irg_start(called_graph);
405         set_new_node(start, pre_call);
406         mark_irn_visited(start);
407
408         ir_node *nomem = get_irg_no_mem(called_graph);
409         set_new_node(nomem, get_irg_no_mem(irg));
410         mark_irn_visited(nomem);
411
412         /* entitiy link is used to link entities on old stackframe to the
413          * new stackframe */
414         irp_reserve_resources(irp, IRP_RESOURCE_ENTITY_LINK);
415
416         /* copy entities and nodes */
417         assert(!irn_visited(get_irg_end(called_graph)));
418         copy_frame_entities(called_graph, irg);
419         irg_walk_core(get_irg_end(called_graph), copy_node_inline, set_preds_inline,
420                       irg);
421
422         irp_free_resources(irp, IRP_RESOURCE_ENTITY_LINK);
423
424         /* -- Merge the end of the inlined procedure with the call site -- */
425         /* We will turn the old Call node into a Tuple with the following
426            predecessors:
427            -1:  Block of Tuple.
428            0: Phi of all Memories of Return statements.
429            1: Jmp from new Block that merges the control flow from all exception
430            predecessors of the old end block.
431            2: Tuple of all arguments.
432            3: Phi of Exception memories.
433            In case the old Call directly branches to End on an exception we don't
434            need the block merging all exceptions nor the Phi of the exception
435            memories.
436         */
437
438         /* Precompute some values */
439         ir_node *end_bl = get_new_node(get_irg_end_block(called_graph));
440         ir_node *end    = get_new_node(get_irg_end(called_graph));
441         int      arity  = get_Block_n_cfgpreds(end_bl); /* arity = n_exc + n_ret  */
442         int      n_res  = get_method_n_ress(get_Call_type(call));
443
444         ir_node **res_pred = XMALLOCN(ir_node*, n_res);
445         ir_node **cf_pred  = XMALLOCN(ir_node*, arity);
446
447         /* archive keepalives */
448         int irn_arity = get_irn_arity(end);
449         for (int i = 0; i < irn_arity; i++) {
450                 ir_node *ka = get_End_keepalive(end, i);
451                 if (! is_Bad(ka))
452                         add_End_keepalive(get_irg_end(irg), ka);
453         }
454
455         /* replace Return nodes by Jump nodes */
456         int n_ret = 0;
457         for (int i = 0; i < arity; i++) {
458                 ir_node *ret = get_Block_cfgpred(end_bl, i);
459                 if (is_Return(ret)) {
460                         ir_node *block = get_nodes_block(ret);
461                         cf_pred[n_ret] = new_r_Jmp(block);
462                         n_ret++;
463                 }
464         }
465         set_irn_in(post_bl, n_ret, cf_pred);
466
467         /* build a Tuple for all results of the method.
468          * add Phi node if there was more than one Return. */
469         turn_into_tuple(post_call, pn_Call_max+1);
470         /* First the Memory-Phi */
471         int n_mem_phi = 0;
472         for (int i = 0; i < arity; i++) {
473                 ir_node *ret = get_Block_cfgpred(end_bl, i);
474                 if (is_Return(ret)) {
475                         cf_pred[n_mem_phi++] = get_Return_mem(ret);
476                 }
477                 /* memory output for some exceptions is directly connected to End */
478                 if (is_Call(ret)) {
479                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 3);
480                 } else if (is_fragile_op(ret)) {
481                         /* We rely that all cfops have the memory output at the same position. */
482                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 0);
483                 } else if (is_Raise(ret)) {
484                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 1);
485                 }
486         }
487         ir_node *phi = new_r_Phi(post_bl, n_mem_phi, cf_pred, mode_M);
488         set_Tuple_pred(call, pn_Call_M, phi);
489         /* Conserve Phi-list for further inlinings -- but might be optimized */
490         if (get_nodes_block(phi) == post_bl) {
491                 set_irn_link(phi, get_irn_link(post_bl));
492                 set_irn_link(post_bl, phi);
493         }
494         /* Now the real results */
495         if (n_res > 0) {
496                 for (int j = 0; j < n_res; j++) {
497                         ir_type *res_type = get_method_res_type(ctp, j);
498                         ir_mode *res_mode = get_type_mode(res_type);
499                         int n_ret = 0;
500                         for (int i = 0; i < arity; i++) {
501                                 ir_node *ret = get_Block_cfgpred(end_bl, i);
502                                 if (is_Return(ret)) {
503                                         ir_node *res = get_Return_res(ret, j);
504                                         if (get_irn_mode(res) != res_mode) {
505                                                 ir_node *block = get_nodes_block(res);
506                                                 res = new_r_Conv(block, res, res_mode);
507                                         }
508                                         cf_pred[n_ret] = res;
509                                         n_ret++;
510                                 }
511                         }
512                         if (n_ret > 0) {
513                                 phi = new_r_Phi(post_bl, n_ret, cf_pred, res_mode);
514                         } else {
515                                 phi = new_r_Bad(irg, res_mode);
516                         }
517                         res_pred[j] = phi;
518                         /* Conserve Phi-list for further inlinings -- but might be optimized */
519                         if (get_nodes_block(phi) == post_bl) {
520                                 set_Phi_next(phi, get_Block_phis(post_bl));
521                                 set_Block_phis(post_bl, phi);
522                         }
523                 }
524                 ir_node *result_tuple = new_r_Tuple(post_bl, n_res, res_pred);
525                 set_Tuple_pred(call, pn_Call_T_result, result_tuple);
526         } else {
527                 set_Tuple_pred(call, pn_Call_T_result, new_r_Bad(irg, mode_T));
528         }
529         /* handle the regular call */
530         set_Tuple_pred(call, pn_Call_X_regular, new_r_Jmp(post_bl));
531
532         /* Finally the exception control flow.
533            We have two possible situations:
534            First if the Call branches to an exception handler:
535            We need to add a Phi node to
536            collect the memory containing the exception objects.  Further we need
537            to add another block to get a correct representation of this Phi.  To
538            this block we add a Jmp that resolves into the X output of the Call
539            when the Call is turned into a tuple.
540            Second: There is no exception edge. Just add all inlined exception
541            branches to the End node.
542          */
543         if (exc_handling == exc_handler) {
544                 int n_exc = 0;
545                 for (int i = 0; i < arity; i++) {
546                         ir_node *ret = get_Block_cfgpred(end_bl, i);
547                         ir_node *irn = skip_Proj(ret);
548                         if (is_fragile_op(irn) || is_Raise(irn)) {
549                                 cf_pred[n_exc] = ret;
550                                 ++n_exc;
551                         }
552                 }
553                 if (n_exc > 0) {
554                         if (n_exc == 1) {
555                                 /* simple fix */
556                                 set_Tuple_pred(call, pn_Call_X_except, cf_pred[0]);
557                         } else {
558                                 ir_node *block = new_r_Block(irg, n_exc, cf_pred);
559                                 set_Tuple_pred(call, pn_Call_X_except, new_r_Jmp(block));
560                         }
561                 } else {
562                         set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
563                 }
564         } else {
565                 /* assert(exc_handling == 1 || no exceptions. ) */
566                 int n_exc = 0;
567                 for (int i = 0; i < arity; i++) {
568                         ir_node *ret = get_Block_cfgpred(end_bl, i);
569                         ir_node *irn = skip_Proj(ret);
570
571                         if (is_fragile_op(irn) || is_Raise(irn)) {
572                                 cf_pred[n_exc] = ret;
573                                 n_exc++;
574                         }
575                 }
576                 ir_node  *main_end_bl       = get_irg_end_block(irg);
577                 int       main_end_bl_arity = get_irn_arity(main_end_bl);
578                 ir_node **end_preds         = XMALLOCN(ir_node*, n_exc+main_end_bl_arity);
579
580                 for (int i = 0; i < main_end_bl_arity; ++i)
581                         end_preds[i] = get_irn_n(main_end_bl, i);
582                 for (int i = 0; i < n_exc; ++i)
583                         end_preds[main_end_bl_arity + i] = cf_pred[i];
584                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
585                 set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
586                 free(end_preds);
587         }
588         free(res_pred);
589         free(cf_pred);
590
591         /* --  Turn CSE back on. -- */
592         set_optimize(rem_opt);
593         current_ir_graph = rem;
594
595         return 1;
596 }
597
598 /********************************************************************/
599 /* Apply inlining to small methods.                                 */
600 /********************************************************************/
601
602 static struct obstack  temp_obst;
603
604 /** Represents a possible inlinable call in a graph. */
605 typedef struct call_entry {
606         ir_node    *call;       /**< The Call node. */
607         ir_graph   *callee;     /**< The callee IR-graph. */
608         list_head  list;        /**< List head for linking the next one. */
609         int        loop_depth;  /**< The loop depth of this call. */
610         int        benefice;    /**< The calculated benefice of this call. */
611         unsigned   local_adr:1; /**< Set if this call gets an address of a local variable. */
612         unsigned   all_const:1; /**< Set if this call has only constant parameters. */
613 } call_entry;
614
615 /**
616  * environment for inlining small irgs
617  */
618 typedef struct inline_env_t {
619         struct obstack obst;  /**< An obstack where call_entries are allocated on. */
620         list_head      calls; /**< The call entry list. */
621 } inline_env_t;
622
623 /**
624  * Returns the irg called from a Call node. If the irg is not
625  * known, NULL is returned.
626  *
627  * @param call  the call node
628  */
629 static ir_graph *get_call_called_irg(ir_node *call)
630 {
631         ir_node *addr;
632
633         addr = get_Call_ptr(call);
634         if (is_SymConst_addr_ent(addr)) {
635                 ir_entity *ent = get_SymConst_entity(addr);
636                 /* we don't know which function gets finally bound to a weak symbol */
637                 if (get_entity_linkage(ent) & IR_LINKAGE_WEAK)
638                         return NULL;
639
640                 return get_entity_irg(ent);
641         }
642
643         return NULL;
644 }
645
646 /**
647  * Walker: Collect all calls to known graphs inside a graph.
648  */
649 static void collect_calls(ir_node *call, void *env)
650 {
651         (void) env;
652         if (is_Call(call)) {
653                 ir_graph *called_irg = get_call_called_irg(call);
654
655                 if (called_irg != NULL) {
656                         /* The Call node calls a locally defined method.  Remember to inline. */
657                         inline_env_t *ienv  = (inline_env_t*)env;
658                         call_entry   *entry = OALLOC(&ienv->obst, call_entry);
659                         entry->call       = call;
660                         entry->callee     = called_irg;
661                         entry->loop_depth = 0;
662                         entry->benefice   = 0;
663                         entry->local_adr  = 0;
664                         entry->all_const  = 0;
665
666                         list_add_tail(&entry->list, &ienv->calls);
667                 }
668         }
669 }
670
671 /**
672  * Inlines all small methods at call sites where the called address comes
673  * from a Const node that references the entity representing the called
674  * method.
675  * The size argument is a rough measure for the code size of the method:
676  * Methods where the obstack containing the firm graph is smaller than
677  * size are inlined.
678  */
679 void inline_small_irgs(ir_graph *irg, int size)
680 {
681         ir_graph *rem = current_ir_graph;
682         inline_env_t env;
683
684         current_ir_graph = irg;
685         free_callee_info(irg);
686
687         /* Find Call nodes to inline.
688            (We can not inline during a walk of the graph, as inlining the same
689            method several times changes the visited flag of the walked graph:
690            after the first inlining visited of the callee equals visited of
691            the caller.  With the next inlining both are increased.) */
692         obstack_init(&env.obst);
693         INIT_LIST_HEAD(&env.calls);
694         irg_walk_graph(irg, NULL, collect_calls, &env);
695
696         if (! list_empty(&env.calls)) {
697                 /* There are calls to inline */
698                 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
699                 collect_phiprojs(irg);
700
701                 list_for_each_entry(call_entry, entry, &env.calls, list) {
702                         ir_graph  *callee = entry->callee;
703                         ir_entity *called = get_irg_entity(callee);
704                         mtp_additional_properties props
705                                 = get_entity_additional_properties(called);
706
707                         if (props & mtp_property_noinline)
708                                 continue;
709
710                         struct obstack *const obst = get_irg_obstack(callee);
711                         if ((props & mtp_property_always_inline) ||
712                             _obstack_memory_used(obst) - (int)obstack_room(obst) < size) {
713                                 inline_method(entry->call, callee);
714                         }
715                 }
716                 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
717         }
718         obstack_free(&env.obst, NULL);
719         current_ir_graph = rem;
720 }
721
722 typedef struct inline_small_irgs_pass_t {
723         ir_graph_pass_t pass;
724         int            size;
725 } inline_small_irgs_pass_t;
726
727 /**
728  * Wrapper to run inline_small_irgs() as a pass.
729  */
730 static int inline_small_irgs_wrapper(ir_graph *irg, void *context)
731 {
732         inline_small_irgs_pass_t *pass = (inline_small_irgs_pass_t*)context;
733
734         inline_small_irgs(irg, pass->size);
735         return 0;
736 }
737
738 /* create a pass for inline_small_irgs() */
739 ir_graph_pass_t *inline_small_irgs_pass(const char *name, int size)
740 {
741         inline_small_irgs_pass_t *pass = XMALLOCZ(inline_small_irgs_pass_t);
742
743         pass->size = size;
744         return def_graph_pass_constructor(
745                 &pass->pass, name ? name : "inline_small_irgs", inline_small_irgs_wrapper);
746 }
747
748 /**
749  * Environment for inlining irgs.
750  */
751 typedef struct {
752         list_head calls;             /**< List of of all call nodes in this graph. */
753         unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
754         unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
755         unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
756         unsigned  n_nodes_orig;      /**< for statistics */
757         unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
758         unsigned  n_call_nodes_orig; /**< for statistics */
759         unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
760         unsigned  n_callers_orig;    /**< for statistics */
761         unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
762         unsigned  recursive:1;       /**< Set, if this function is self recursive. */
763 } inline_irg_env;
764
765 /**
766  * Allocate a new environment for inlining.
767  */
768 static inline_irg_env *alloc_inline_irg_env(void)
769 {
770         inline_irg_env *env    = OALLOC(&temp_obst, inline_irg_env);
771         INIT_LIST_HEAD(&env->calls);
772         env->local_weights     = NULL;
773         env->n_nodes           = -2; /* do not count count Start, End */
774         env->n_blocks          = -2; /* do not count count Start, End Block */
775         env->n_nodes_orig      = -2; /* do not count Start, End */
776         env->n_call_nodes      = 0;
777         env->n_call_nodes_orig = 0;
778         env->n_callers         = 0;
779         env->n_callers_orig    = 0;
780         env->got_inline        = 0;
781         env->recursive         = 0;
782         return env;
783 }
784
785 typedef struct walker_env {
786         inline_irg_env *x;     /**< the inline environment */
787         char ignore_runtime;   /**< the ignore runtime flag */
788         char ignore_callers;   /**< if set, do change callers data */
789 } wenv_t;
790
791 /**
792  * post-walker: collect all calls in the inline-environment
793  * of a graph and sum some statistics.
794  */
795 static void collect_calls2(ir_node *call, void *ctx)
796 {
797         wenv_t         *env = (wenv_t*)ctx;
798         inline_irg_env *x = env->x;
799         unsigned        code = get_irn_opcode(call);
800         ir_graph       *callee;
801         call_entry     *entry;
802
803         /* count meaningful nodes in irg */
804         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
805                 if (code != iro_Block) {
806                         ++x->n_nodes;
807                         ++x->n_nodes_orig;
808                 } else {
809                         ++x->n_blocks;
810                 }
811         }
812
813         if (code != iro_Call) return;
814
815         /* check, if it's a runtime call */
816         if (env->ignore_runtime) {
817                 ir_node *symc = get_Call_ptr(call);
818
819                 if (is_SymConst_addr_ent(symc)) {
820                         ir_entity *ent = get_SymConst_entity(symc);
821
822                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
823                                 return;
824                 }
825         }
826
827         /* collect all call nodes */
828         ++x->n_call_nodes;
829         ++x->n_call_nodes_orig;
830
831         callee = get_call_called_irg(call);
832         if (callee != NULL) {
833                 if (! env->ignore_callers) {
834                         inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
835                         /* count all static callers */
836                         ++callee_env->n_callers;
837                         ++callee_env->n_callers_orig;
838                 }
839                 if (callee == current_ir_graph)
840                         x->recursive = 1;
841
842                 /* link it in the list of possible inlinable entries */
843                 entry = OALLOC(&temp_obst, call_entry);
844                 entry->call       = call;
845                 entry->callee     = callee;
846                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
847                 entry->benefice   = 0;
848                 entry->local_adr  = 0;
849                 entry->all_const  = 0;
850
851                 list_add_tail(&entry->list, &x->calls);
852         }
853 }
854
855 /**
856  * Returns TRUE if the number of callers is 0 in the irg's environment,
857  * hence this irg is a leaf.
858  */
859 inline static int is_leaf(ir_graph *irg)
860 {
861         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
862         return env->n_call_nodes == 0;
863 }
864
865 /**
866  * Returns TRUE if the number of nodes in the callee is
867  * smaller then size in the irg's environment.
868  */
869 inline static int is_smaller(ir_graph *callee, unsigned size)
870 {
871         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
872         return env->n_nodes < size;
873 }
874
875 /**
876  * Duplicate a call entry.
877  *
878  * @param entry     the original entry to duplicate
879  * @param new_call  the new call node
880  * @param loop_depth_delta
881  *                  delta value for the loop depth
882  */
883 static call_entry *duplicate_call_entry(const call_entry *entry,
884                                         ir_node *new_call, int loop_depth_delta)
885 {
886         call_entry *nentry = OALLOC(&temp_obst, call_entry);
887         nentry->call       = new_call;
888         nentry->callee     = entry->callee;
889         nentry->benefice   = entry->benefice;
890         nentry->loop_depth = entry->loop_depth + loop_depth_delta;
891         nentry->local_adr  = entry->local_adr;
892         nentry->all_const  = entry->all_const;
893
894         return nentry;
895 }
896
897 /**
898  * Append all call nodes of the source environment to the nodes of in the destination
899  * environment.
900  *
901  * @param dst         destination environment
902  * @param src         source environment
903  * @param loop_depth  the loop depth of the call that is replaced by the src list
904  */
905 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth)
906 {
907         call_entry *nentry;
908
909         /* Note that the src list points to Call nodes in the inlined graph, but
910            we need Call nodes in our graph. Luckily the inliner leaves this information
911            in the link field. */
912         list_for_each_entry(call_entry, entry, &src->calls, list) {
913                 nentry = duplicate_call_entry(entry, (ir_node*)get_irn_link(entry->call), loop_depth);
914                 list_add_tail(&nentry->list, &dst->calls);
915         }
916         dst->n_call_nodes += src->n_call_nodes;
917         dst->n_nodes      += src->n_nodes;
918 }
919
920 /*
921  * Inlines small leaf methods at call sites where the called address comes
922  * from a Const node that references the entity representing the called
923  * method.
924  * The size argument is a rough measure for the code size of the method:
925  * Methods where the obstack containing the firm graph is smaller than
926  * size are inlined.
927  */
928 void inline_leaf_functions(unsigned maxsize, unsigned leafsize,
929                            unsigned size, int ignore_runtime)
930 {
931         inline_irg_env   *env;
932         ir_graph         *irg;
933         size_t           i, n_irgs;
934         ir_graph         *rem;
935         int              did_inline;
936         wenv_t           wenv;
937         pmap             *copied_graphs;
938         pmap_entry       *pm_entry;
939
940         rem = current_ir_graph;
941         obstack_init(&temp_obst);
942
943         /* a map for the copied graphs, used to inline recursive calls */
944         copied_graphs = pmap_create();
945
946         /* extend all irgs by a temporary data structure for inlining. */
947         n_irgs = get_irp_n_irgs();
948         for (i = 0; i < n_irgs; ++i)
949                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
950
951         /* Pre-compute information in temporary data structure. */
952         wenv.ignore_runtime = ignore_runtime;
953         wenv.ignore_callers = 0;
954         for (i = 0; i < n_irgs; ++i) {
955                 ir_graph *irg = get_irp_irg(i);
956
957                 free_callee_info(irg);
958
959                 assure_irg_properties(irg,
960                         IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO);
961                 wenv.x = (inline_irg_env*)get_irg_link(irg);
962                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
963                 confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_ALL);
964         }
965
966         /* -- and now inline. -- */
967
968         /* Inline leafs recursively -- we might construct new leafs. */
969         do {
970                 did_inline = 0;
971
972                 for (i = 0; i < n_irgs; ++i) {
973                         int phiproj_computed = 0;
974
975                         current_ir_graph = get_irp_irg(i);
976                         env              = (inline_irg_env*)get_irg_link(current_ir_graph);
977
978                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
979                         list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
980                                 if (env->n_nodes > maxsize)
981                                         break;
982
983                                 ir_node   *call   = entry->call;
984                                 ir_graph  *callee = entry->callee;
985                                 ir_entity *called = get_irg_entity(callee);
986                                 mtp_additional_properties props
987                                         = get_entity_additional_properties(called);
988                                 if (props & mtp_property_noinline)
989                                         continue;
990
991                                 if (is_leaf(callee) && (
992                                     is_smaller(callee, leafsize) || (props & mtp_property_always_inline))) {
993                                         if (!phiproj_computed) {
994                                                 phiproj_computed = 1;
995                                                 collect_phiprojs(current_ir_graph);
996                                         }
997                                         did_inline = inline_method(call, callee);
998
999                                         if (did_inline) {
1000                                                 inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
1001
1002                                                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1003                                                 phiproj_computed = 0;
1004
1005                                                 /* Do some statistics */
1006                                                 env->got_inline = 1;
1007                                                 --env->n_call_nodes;
1008                                                 env->n_nodes += callee_env->n_nodes;
1009                                                 --callee_env->n_callers;
1010
1011                                                 /* remove this call from the list */
1012                                                 list_del(&entry->list);
1013                                                 continue;
1014                                         }
1015                                 }
1016                         }
1017                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1018                 }
1019         } while (did_inline);
1020
1021         /* inline other small functions. */
1022         for (i = 0; i < n_irgs; ++i) {
1023                 int phiproj_computed = 0;
1024
1025                 current_ir_graph = get_irp_irg(i);
1026                 env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1027
1028                 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1029
1030                 /* note that the list of possible calls is updated during the process */
1031                 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1032                         ir_node   *call   = entry->call;
1033                         ir_graph  *callee = entry->callee;
1034                         ir_entity *called = get_irg_entity(callee);
1035
1036                         mtp_additional_properties props = get_entity_additional_properties(called);
1037                         if (props & mtp_property_noinline)
1038                                 continue;
1039
1040                         ir_graph *calleee = pmap_get(ir_graph, copied_graphs, callee);
1041                         if (calleee != NULL) {
1042                                 /*
1043                                  * Remap callee if we have a copy.
1044                                  * FIXME: Should we do this only for recursive Calls ?
1045                                  */
1046                                 callee = calleee;
1047                         }
1048
1049                         if ((props & mtp_property_always_inline) ||
1050                             (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1051                                 if (current_ir_graph == callee) {
1052                                         /*
1053                                          * Recursive call: we cannot directly inline because we cannot walk
1054                                          * the graph and change it. So we have to make a copy of the graph
1055                                          * first.
1056                                          */
1057
1058                                         inline_irg_env *callee_env;
1059                                         ir_graph       *copy;
1060
1061                                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1062
1063                                         /*
1064                                          * No copy yet, create one.
1065                                          * Note that recursive methods are never leafs, so it is sufficient
1066                                          * to test this condition here.
1067                                          */
1068                                         copy = create_irg_copy(callee);
1069
1070                                         /* create_irg_copy() destroys the Proj links, recompute them */
1071                                         phiproj_computed = 0;
1072
1073                                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1074
1075                                         /* allocate new environment */
1076                                         callee_env = alloc_inline_irg_env();
1077                                         set_irg_link(copy, callee_env);
1078
1079                                         assure_irg_properties(copy,
1080                                                 IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO);
1081                                         wenv.x              = callee_env;
1082                                         wenv.ignore_callers = 1;
1083                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1084
1085                                         /*
1086                                          * Enter the entity of the original graph. This is needed
1087                                          * for inline_method(). However, note that ent->irg still points
1088                                          * to callee, NOT to copy.
1089                                          */
1090                                         set_irg_entity(copy, get_irg_entity(callee));
1091
1092                                         pmap_insert(copied_graphs, callee, copy);
1093                                         callee = copy;
1094
1095                                         /* we have only one caller: the original graph */
1096                                         callee_env->n_callers      = 1;
1097                                         callee_env->n_callers_orig = 1;
1098                                 }
1099                                 if (! phiproj_computed) {
1100                                         phiproj_computed = 1;
1101                                         collect_phiprojs(current_ir_graph);
1102                                 }
1103                                 did_inline = inline_method(call, callee);
1104                                 if (did_inline) {
1105                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1106
1107                                         /* call was inlined, Phi/Projs for current graph must be recomputed */
1108                                         phiproj_computed = 0;
1109
1110                                         /* callee was inline. Append its call list. */
1111                                         env->got_inline = 1;
1112                                         --env->n_call_nodes;
1113                                         append_call_list(env, callee_env, entry->loop_depth);
1114                                         --callee_env->n_callers;
1115
1116                                         /* after we have inlined callee, all called methods inside callee
1117                                            are now called once more */
1118                                         list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1119                                                 inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1120                                                 ++penv->n_callers;
1121                                         }
1122
1123                                         /* remove this call from the list */
1124                                         list_del(&entry->list);
1125                                         continue;
1126                                 }
1127                         }
1128                 }
1129                 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1130         }
1131
1132         for (i = 0; i < n_irgs; ++i) {
1133                 irg = get_irp_irg(i);
1134                 env = (inline_irg_env*)get_irg_link(irg);
1135
1136                 if (env->got_inline) {
1137                         optimize_graph_df(irg);
1138                         optimize_cf(irg);
1139                 }
1140                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1141                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1142                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1143                         env->n_callers_orig, env->n_callers,
1144                         get_entity_name(get_irg_entity(irg))));
1145                 }
1146         }
1147
1148         /* kill the copied graphs: we don't need them anymore */
1149         foreach_pmap(copied_graphs, pm_entry) {
1150                 ir_graph *copy = (ir_graph*)pm_entry->value;
1151
1152                 /* reset the entity, otherwise it will be deleted in the next step ... */
1153                 set_irg_entity(copy, NULL);
1154                 free_ir_graph(copy);
1155         }
1156         pmap_destroy(copied_graphs);
1157
1158         obstack_free(&temp_obst, NULL);
1159         current_ir_graph = rem;
1160 }
1161
1162 typedef struct inline_leaf_functions_pass_t {
1163         ir_prog_pass_t pass;
1164         unsigned       maxsize;
1165         unsigned       leafsize;
1166         unsigned       size;
1167         int            ignore_runtime;
1168 } inline_leaf_functions_pass_t;
1169
1170 /**
1171  * Wrapper to run inline_leaf_functions() as a ir_prog pass.
1172  */
1173 static int inline_leaf_functions_wrapper(ir_prog *irp, void *context)
1174 {
1175         inline_leaf_functions_pass_t *pass = (inline_leaf_functions_pass_t*)context;
1176
1177         (void)irp;
1178         inline_leaf_functions(
1179                 pass->maxsize, pass->leafsize,
1180                 pass->size, pass->ignore_runtime);
1181         return 0;
1182 }
1183
1184 /* create a pass for inline_leaf_functions() */
1185 ir_prog_pass_t *inline_leaf_functions_pass(
1186         const char *name, unsigned maxsize, unsigned leafsize,
1187         unsigned size, int ignore_runtime)
1188 {
1189         inline_leaf_functions_pass_t *pass = XMALLOCZ(inline_leaf_functions_pass_t);
1190
1191         pass->maxsize        = maxsize;
1192         pass->leafsize       = leafsize;
1193         pass->size           = size;
1194         pass->ignore_runtime = ignore_runtime;
1195
1196         return def_prog_pass_constructor(
1197                 &pass->pass,
1198                 name ? name : "inline_leaf_functions",
1199                 inline_leaf_functions_wrapper);
1200 }
1201
1202 /**
1203  * Calculate the parameter weights for transmitting the address of a local variable.
1204  */
1205 static unsigned calc_method_local_weight(ir_node *arg)
1206 {
1207         int      j;
1208         unsigned v, weight = 0;
1209
1210         for (unsigned i = get_irn_n_outs(arg); i-- > 0; ) {
1211                 ir_node *succ = get_irn_out(arg, i);
1212
1213                 switch (get_irn_opcode(succ)) {
1214                 case iro_Load:
1215                 case iro_Store:
1216                         /* Loads and Store can be removed */
1217                         weight += 3;
1218                         break;
1219                 case iro_Sel:
1220                         /* check if all args are constant */
1221                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1222                                 ir_node *idx = get_Sel_index(succ, j);
1223                                 if (! is_Const(idx))
1224                                         return 0;
1225                         }
1226                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1227                            some unsupported node. */
1228                         v = calc_method_local_weight(succ);
1229                         if (v == 0)
1230                                 return 0;
1231                         /* we can kill one Sel with constant indexes, this is cheap */
1232                         weight += v + 1;
1233                         break;
1234                 case iro_Id:
1235                         /* when looking backward we might find Id nodes */
1236                         weight += calc_method_local_weight(succ);
1237                         break;
1238                 case iro_Tuple:
1239                         /* unoptimized tuple */
1240                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1241                                 ir_node *pred = get_Tuple_pred(succ, j);
1242                                 if (pred == arg) {
1243                                         /* look for Proj(j) */
1244                                         for (unsigned k = get_irn_n_outs(succ); k-- > 0; ) {
1245                                                 ir_node *succ_succ = get_irn_out(succ, k);
1246                                                 if (is_Proj(succ_succ)) {
1247                                                         if (get_Proj_proj(succ_succ) == j) {
1248                                                                 /* found */
1249                                                                 weight += calc_method_local_weight(succ_succ);
1250                                                         }
1251                                                 } else {
1252                                                         /* this should NOT happen */
1253                                                         return 0;
1254                                                 }
1255                                         }
1256                                 }
1257                         }
1258                         break;
1259                 default:
1260                         /* any other node: unsupported yet or bad. */
1261                         return 0;
1262                 }
1263         }
1264         return weight;
1265 }
1266
1267 /**
1268  * Calculate the parameter weights for transmitting the address of a local variable.
1269  */
1270 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg)
1271 {
1272         ir_entity *ent = get_irg_entity(irg);
1273         ir_type  *mtp;
1274         size_t   nparams;
1275         long     proj_nr;
1276         ir_node  *irg_args, *arg;
1277
1278         mtp      = get_entity_type(ent);
1279         nparams  = get_method_n_params(mtp);
1280
1281         /* allocate a new array. currently used as 'analysed' flag */
1282         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1283
1284         /* If the method haven't parameters we have nothing to do. */
1285         if (nparams <= 0)
1286                 return;
1287
1288         assure_irg_outs(irg);
1289         irg_args = get_irg_args(irg);
1290         for (unsigned i = get_irn_n_outs(irg_args); i-- > 0; ) {
1291                 arg     = get_irn_out(irg_args, i);
1292                 proj_nr = get_Proj_proj(arg);
1293                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1294         }
1295 }
1296
1297 /**
1298  * Calculate the benefice for transmitting an local variable address.
1299  * After inlining, the local variable might be transformed into a
1300  * SSA variable by scalar_replacement().
1301  */
1302 static unsigned get_method_local_adress_weight(ir_graph *callee, size_t pos)
1303 {
1304         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
1305
1306         if (env->local_weights == NULL)
1307                 analyze_irg_local_weights(env, callee);
1308
1309         if (pos < ARR_LEN(env->local_weights))
1310                 return env->local_weights[pos];
1311         return 0;
1312 }
1313
1314 /**
1315  * Calculate a benefice value for inlining the given call.
1316  *
1317  * @param call       the call node we have to inspect
1318  * @param callee     the called graph
1319  */
1320 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
1321 {
1322         ir_node   *call = entry->call;
1323         ir_entity *ent  = get_irg_entity(callee);
1324         ir_type   *callee_frame;
1325         size_t    i, n_members, n_params;
1326         ir_node   *frame_ptr;
1327         ir_type   *mtp;
1328         int       weight = 0;
1329         int       all_const;
1330         unsigned  cc, v;
1331
1332         inline_irg_env *callee_env;
1333
1334         mtp_additional_properties props = get_entity_additional_properties(ent);
1335         if (props & mtp_property_noinline) {
1336                 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
1337                     call, callee));
1338                 return entry->benefice = INT_MIN;
1339         }
1340
1341         callee_frame = get_irg_frame_type(callee);
1342         n_members = get_class_n_members(callee_frame);
1343         for (i = 0; i < n_members; ++i) {
1344                 ir_entity *frame_ent = get_class_member(callee_frame, i);
1345                 if (is_parameter_entity(frame_ent)) {
1346                         // TODO inliner should handle parameter entities by inserting Store operations
1347                         DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden due to parameter entity\n", call, callee));
1348                         add_entity_additional_properties(ent, mtp_property_noinline);
1349                         return entry->benefice = INT_MIN;
1350                 }
1351         }
1352
1353         if (props & mtp_property_noreturn) {
1354                 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
1355                     call, callee));
1356                 return entry->benefice = INT_MIN;
1357         }
1358
1359         /* costs for every passed parameter */
1360         n_params = get_Call_n_params(call);
1361         mtp      = get_entity_type(ent);
1362         cc       = get_method_calling_convention(mtp);
1363         if (cc & cc_reg_param) {
1364                 /* register parameter, smaller costs for register parameters */
1365                 size_t max_regs = cc & ~cc_bits;
1366
1367                 if (max_regs < n_params)
1368                         weight += max_regs * 2 + (n_params - max_regs) * 5;
1369                 else
1370                         weight += n_params * 2;
1371         } else {
1372                 /* parameters are passed an stack */
1373                 weight += 5 * n_params;
1374         }
1375
1376         /* constant parameters improve the benefice */
1377         frame_ptr = get_irg_frame(current_ir_graph);
1378         all_const = 1;
1379         for (i = 0; i < n_params; ++i) {
1380                 ir_node *param = get_Call_param(call, i);
1381
1382                 if (is_Const(param)) {
1383                         weight += get_method_param_weight(ent, i);
1384                 } else {
1385                         all_const = 0;
1386                         if (is_SymConst(param))
1387                                 weight += get_method_param_weight(ent, i);
1388                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
1389                                 /*
1390                                  * An address of a local variable is transmitted. After
1391                                  * inlining, scalar_replacement might be able to remove the
1392                                  * local variable, so honor this.
1393                                  */
1394                                 v = get_method_local_adress_weight(callee, i);
1395                                 weight += v;
1396                                 if (v > 0)
1397                                         entry->local_adr = 1;
1398                         }
1399                 }
1400         }
1401         entry->all_const = all_const;
1402
1403         callee_env = (inline_irg_env*)get_irg_link(callee);
1404         if (callee_env->n_callers == 1 &&
1405             callee != current_ir_graph &&
1406             !entity_is_externally_visible(ent)) {
1407                 weight += 700;
1408         }
1409
1410         /* give a bonus for functions with one block */
1411         if (callee_env->n_blocks == 1)
1412                 weight = weight * 3 / 2;
1413
1414         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
1415         if (callee_env->n_nodes < 30 && !callee_env->recursive)
1416                 weight += 2000;
1417
1418         /* and finally for leafs: they do not increase the register pressure
1419            because of callee safe registers */
1420         if (callee_env->n_call_nodes == 0)
1421                 weight += 400;
1422
1423         /** it's important to inline inner loops first */
1424         if (entry->loop_depth > 30)
1425                 weight += 30 * 1024;
1426         else
1427                 weight += entry->loop_depth * 1024;
1428
1429         /*
1430          * All arguments constant is probably a good sign, give an extra bonus
1431          */
1432         if (all_const)
1433                 weight += 1024;
1434
1435         return entry->benefice = weight;
1436 }
1437
1438 typedef struct walk_env_t {
1439         ir_graph **irgs;
1440         size_t   last_irg;
1441 } walk_env_t;
1442
1443 /**
1444  * Callgraph walker, collect all visited graphs.
1445  */
1446 static void callgraph_walker(ir_graph *irg, void *data)
1447 {
1448         walk_env_t *env = (walk_env_t *)data;
1449         env->irgs[env->last_irg++] = irg;
1450 }
1451
1452 /**
1453  * Creates an inline order for all graphs.
1454  *
1455  * @return the list of graphs.
1456  */
1457 static ir_graph **create_irg_list(void)
1458 {
1459         ir_entity  **free_methods;
1460         size_t     n_irgs = get_irp_n_irgs();
1461         walk_env_t env;
1462
1463         cgana(&free_methods);
1464         xfree(free_methods);
1465
1466         compute_callgraph();
1467
1468         env.irgs     = XMALLOCNZ(ir_graph*, n_irgs);
1469         env.last_irg = 0;
1470
1471         callgraph_walk(NULL, callgraph_walker, &env);
1472         assert(n_irgs == env.last_irg);
1473
1474         free_callgraph();
1475
1476         return env.irgs;
1477 }
1478
1479 /**
1480  * Push a call onto the priority list if its benefice is big enough.
1481  *
1482  * @param pqueue   the priority queue of calls
1483  * @param call     the call entry
1484  * @param inlien_threshold
1485  *                 the threshold value
1486  */
1487 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
1488                             int inline_threshold)
1489 {
1490         ir_graph *callee   = call->callee;
1491         int       benefice = calc_inline_benefice(call, callee);
1492
1493         DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
1494             get_irn_irg(call->call), call->call, callee, benefice));
1495
1496         ir_entity                *ent   = get_irg_entity(callee);
1497         mtp_additional_properties props = get_entity_additional_properties(ent);
1498         if (!(props & mtp_property_always_inline) && benefice < inline_threshold) {
1499                 return;
1500         }
1501
1502         pqueue_put(pqueue, call, benefice);
1503 }
1504
1505 /**
1506  * Try to inline calls into a graph.
1507  *
1508  * @param irg      the graph into which we inline
1509  * @param maxsize  do NOT inline if the size of irg gets
1510  *                 bigger than this amount
1511  * @param inline_threshold
1512  *                 threshold value for inline decision
1513  * @param copied_graphs
1514  *                 map containing copied of recursive graphs
1515  */
1516 static void inline_into(ir_graph *irg, unsigned maxsize,
1517                         int inline_threshold, pmap *copied_graphs)
1518 {
1519         int            phiproj_computed = 0;
1520         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
1521         wenv_t         wenv;
1522         pqueue_t       *pqueue;
1523
1524         if (env->n_call_nodes == 0)
1525                 return;
1526
1527         if (env->n_nodes > maxsize) {
1528                 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
1529                 return;
1530         }
1531
1532         current_ir_graph = irg;
1533         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1534
1535         /* put irgs into the pqueue */
1536         pqueue = new_pqueue();
1537
1538         list_for_each_entry(call_entry, curr_call, &env->calls, list) {
1539                 assert(is_Call(curr_call->call));
1540                 maybe_push_call(pqueue, curr_call, inline_threshold);
1541         }
1542
1543         /* note that the list of possible calls is updated during the process */
1544         while (!pqueue_empty(pqueue)) {
1545                 int                 did_inline;
1546                 call_entry          *curr_call  = (call_entry*)pqueue_pop_front(pqueue);
1547                 ir_graph            *callee     = curr_call->callee;
1548                 ir_node             *call_node  = curr_call->call;
1549                 inline_irg_env      *callee_env = (inline_irg_env*)get_irg_link(callee);
1550                 ir_entity           *ent        = get_irg_entity(callee);
1551                 mtp_additional_properties props
1552                         = get_entity_additional_properties(ent);
1553                 ir_graph            *calleee;
1554                 int                 loop_depth;
1555
1556                 if (!(props & mtp_property_always_inline)
1557                     && env->n_nodes + callee_env->n_nodes > maxsize) {
1558                         DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
1559                                                 env->n_nodes, callee, callee_env->n_nodes));
1560                         continue;
1561                 }
1562
1563                 calleee = pmap_get(ir_graph, copied_graphs, callee);
1564                 if (calleee != NULL) {
1565                         int benefice = curr_call->benefice;
1566                         /*
1567                          * Reduce the weight for recursive function IFF not all arguments are const.
1568                          * inlining recursive functions is rarely good.
1569                          */
1570                         if (!curr_call->all_const)
1571                                 benefice -= 2000;
1572                         if (benefice < inline_threshold)
1573                                 continue;
1574
1575                         /*
1576                          * Remap callee if we have a copy.
1577                          */
1578                         callee     = calleee;
1579                         callee_env = (inline_irg_env*)get_irg_link(callee);
1580                 }
1581
1582                 if (current_ir_graph == callee) {
1583                         /*
1584                          * Recursive call: we cannot directly inline because we cannot
1585                          * walk the graph and change it. So we have to make a copy of
1586                          * the graph first.
1587                          */
1588                         int benefice = curr_call->benefice;
1589                         ir_graph *copy;
1590
1591                         /*
1592                          * Reduce the weight for recursive function IFF not all arguments are const.
1593                          * inlining recursive functions is rarely good.
1594                          */
1595                         if (!curr_call->all_const)
1596                                 benefice -= 2000;
1597                         if (benefice < inline_threshold)
1598                                 continue;
1599
1600                         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1601
1602                         /*
1603                          * No copy yet, create one.
1604                          * Note that recursive methods are never leafs, so it is
1605                          * sufficient to test this condition here.
1606                          */
1607                         copy = create_irg_copy(callee);
1608
1609                         /* create_irg_copy() destroys the Proj links, recompute them */
1610                         phiproj_computed = 0;
1611
1612                         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1613
1614                         /* allocate a new environment */
1615                         callee_env = alloc_inline_irg_env();
1616                         set_irg_link(copy, callee_env);
1617
1618                         assure_irg_properties(copy, IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO);
1619                         memset(&wenv, 0, sizeof(wenv));
1620                         wenv.x              = callee_env;
1621                         wenv.ignore_callers = 1;
1622                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1623
1624                         /*
1625                          * Enter the entity of the original graph. This is needed
1626                          * for inline_method(). However, note that ent->irg still points
1627                          * to callee, NOT to copy.
1628                          */
1629                         set_irg_entity(copy, get_irg_entity(callee));
1630
1631                         pmap_insert(copied_graphs, callee, copy);
1632                         callee = copy;
1633
1634                         /* we have only one caller: the original graph */
1635                         callee_env->n_callers      = 1;
1636                         callee_env->n_callers_orig = 1;
1637                 }
1638                 if (! phiproj_computed) {
1639                         phiproj_computed = 1;
1640                         collect_phiprojs(current_ir_graph);
1641                 }
1642                 did_inline = inline_method(call_node, callee);
1643                 if (!did_inline)
1644                         continue;
1645
1646                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1647                 phiproj_computed = 0;
1648
1649                 /* remove it from the caller list */
1650                 list_del(&curr_call->list);
1651
1652                 /* callee was inline. Append its call list. */
1653                 env->got_inline = 1;
1654                 --env->n_call_nodes;
1655
1656                 /* we just generate a bunch of new calls */
1657                 loop_depth = curr_call->loop_depth;
1658                 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1659                         inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1660                         ir_node        *new_call;
1661                         call_entry     *new_entry;
1662
1663                         /* after we have inlined callee, all called methods inside
1664                          * callee are now called once more */
1665                         ++penv->n_callers;
1666
1667                         /* Note that the src list points to Call nodes in the inlined graph,
1668                          * but we need Call nodes in our graph. Luckily the inliner leaves
1669                          * this information in the link field. */
1670                         new_call = (ir_node*)get_irn_link(centry->call);
1671                         if (get_irn_irg(new_call) != irg) {
1672                                 /* centry->call has not been copied, which means it is dead.
1673                                  * This might happen during inlining, if a const function,
1674                                  * which cannot be inlined is only used as an unused argument
1675                                  * of another function, which is inlined. */
1676                                 continue;
1677                         }
1678                         assert(is_Call(new_call));
1679
1680                         new_entry = duplicate_call_entry(centry, new_call, loop_depth);
1681                         list_add_tail(&new_entry->list, &env->calls);
1682                         maybe_push_call(pqueue, new_entry, inline_threshold);
1683                 }
1684
1685                 env->n_call_nodes += callee_env->n_call_nodes;
1686                 env->n_nodes += callee_env->n_nodes;
1687                 --callee_env->n_callers;
1688         }
1689         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1690         del_pqueue(pqueue);
1691 }
1692
1693 /*
1694  * Heuristic inliner. Calculates a benefice value for every call and inlines
1695  * those calls with a value higher than the threshold.
1696  */
1697 void inline_functions(unsigned maxsize, int inline_threshold,
1698                       opt_ptr after_inline_opt)
1699 {
1700         inline_irg_env   *env;
1701         size_t           i, n_irgs;
1702         ir_graph         *rem;
1703         wenv_t           wenv;
1704         pmap             *copied_graphs;
1705         pmap_entry       *pm_entry;
1706         ir_graph         **irgs;
1707
1708         rem = current_ir_graph;
1709         obstack_init(&temp_obst);
1710
1711         irgs = create_irg_list();
1712
1713         /* a map for the copied graphs, used to inline recursive calls */
1714         copied_graphs = pmap_create();
1715
1716         /* extend all irgs by a temporary data structure for inlining. */
1717         n_irgs = get_irp_n_irgs();
1718         for (i = 0; i < n_irgs; ++i)
1719                 set_irg_link(irgs[i], alloc_inline_irg_env());
1720
1721         /* Pre-compute information in temporary data structure. */
1722         wenv.ignore_runtime = 0;
1723         wenv.ignore_callers = 0;
1724         for (i = 0; i < n_irgs; ++i) {
1725                 ir_graph *irg = irgs[i];
1726
1727                 free_callee_info(irg);
1728
1729                 wenv.x = (inline_irg_env*)get_irg_link(irg);
1730                 assure_loopinfo(irg);
1731                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1732         }
1733
1734         /* -- and now inline. -- */
1735         for (i = 0; i < n_irgs; ++i) {
1736                 ir_graph *irg = irgs[i];
1737
1738                 inline_into(irg, maxsize, inline_threshold, copied_graphs);
1739         }
1740
1741         for (i = 0; i < n_irgs; ++i) {
1742                 ir_graph *irg = irgs[i];
1743
1744                 env = (inline_irg_env*)get_irg_link(irg);
1745                 if (env->got_inline && after_inline_opt != NULL) {
1746                         /* this irg got calls inlined: optimize it */
1747                         after_inline_opt(irg);
1748                 }
1749                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1750                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1751                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1752                         env->n_callers_orig, env->n_callers,
1753                         get_entity_name(get_irg_entity(irg))));
1754                 }
1755         }
1756
1757         /* kill the copied graphs: we don't need them anymore */
1758         foreach_pmap(copied_graphs, pm_entry) {
1759                 ir_graph *copy = (ir_graph*)pm_entry->value;
1760
1761                 /* reset the entity, otherwise it will be deleted in the next step ... */
1762                 set_irg_entity(copy, NULL);
1763                 free_ir_graph(copy);
1764         }
1765         pmap_destroy(copied_graphs);
1766
1767         xfree(irgs);
1768
1769         obstack_free(&temp_obst, NULL);
1770         current_ir_graph = rem;
1771 }
1772
1773 typedef struct inline_functions_pass_t {
1774         ir_prog_pass_t pass;
1775         unsigned       maxsize;
1776         int            inline_threshold;
1777         opt_ptr        after_inline_opt;
1778 } inline_functions_pass_t;
1779
1780 /**
1781  * Wrapper to run inline_functions() as a ir_prog pass.
1782  */
1783 static int inline_functions_wrapper(ir_prog *irp, void *context)
1784 {
1785         inline_functions_pass_t *pass = (inline_functions_pass_t*)context;
1786
1787         (void)irp;
1788         inline_functions(pass->maxsize, pass->inline_threshold,
1789                          pass->after_inline_opt);
1790         return 0;
1791 }
1792
1793 /* create a ir_prog pass for inline_functions */
1794 ir_prog_pass_t *inline_functions_pass(
1795           const char *name, unsigned maxsize, int inline_threshold,
1796           opt_ptr after_inline_opt)
1797 {
1798         inline_functions_pass_t *pass = XMALLOCZ(inline_functions_pass_t);
1799
1800         pass->maxsize          = maxsize;
1801         pass->inline_threshold = inline_threshold;
1802         pass->after_inline_opt = after_inline_opt;
1803
1804         return def_prog_pass_constructor(
1805                 &pass->pass, name ? name : "inline_functions",
1806                 inline_functions_wrapper);
1807 }
1808
1809 void firm_init_inline(void)
1810 {
1811         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
1812 }