99f237d01bbfcf1bc6d503b4de0421bdc3036439
[libfirm] / ir / opt / opt_inline.c
1 /*
2  * Copyright (C) 1995-2011 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief    Dead node elimination and Procedure Inlining.
23  * @author   Michael Beck, Goetz Lindenmaier
24  */
25 #include "config.h"
26
27 #include <limits.h>
28 #include <stdbool.h>
29 #include <assert.h>
30
31 #include "irnode_t.h"
32 #include "irgraph_t.h"
33 #include "irprog_t.h"
34
35 #include "iroptimize.h"
36 #include "ircons_t.h"
37 #include "iropt_t.h"
38 #include "irgopt.h"
39 #include "irgmod.h"
40 #include "irgwalk.h"
41
42 #include "array_t.h"
43 #include "list.h"
44 #include "pset.h"
45 #include "pmap.h"
46 #include "pdeq.h"
47 #include "xmalloc.h"
48 #include "pqueue.h"
49
50 #include "irouts.h"
51 #include "irloop_t.h"
52 #include "irbackedge_t.h"
53 #include "opt_init.h"
54 #include "cgana.h"
55 #include "trouts.h"
56 #include "error.h"
57
58 #include "analyze_irg_args.h"
59 #include "iredges_t.h"
60 #include "irflag_t.h"
61 #include "irhooks.h"
62 #include "irtools.h"
63 #include "iropt_dbg.h"
64 #include "irpass_t.h"
65 #include "irnodemap.h"
66
67 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
68
69 /*------------------------------------------------------------------*/
70 /* Routines for dead node elimination / copying garbage collection  */
71 /* of the obstack.                                                  */
72 /*------------------------------------------------------------------*/
73
74 /**
75  * Remember the new node in the old node by using a field all nodes have.
76  */
77 static void set_new_node(ir_node *node, ir_node *new_node)
78 {
79         set_irn_link(node, new_node);
80 }
81
82 /**
83  * Get this new node, before the old node is forgotten.
84  */
85 static inline ir_node *get_new_node(ir_node *old_node)
86 {
87         assert(irn_visited(old_node));
88         return (ir_node*) get_irn_link(old_node);
89 }
90
91 /*--------------------------------------------------------------------*/
92 /*  Functionality for inlining                                         */
93 /*--------------------------------------------------------------------*/
94
95 /**
96  * Copy node for inlineing.  Updates attributes that change when
97  * inlineing but not for dead node elimination.
98  *
99  * Copies the node by calling copy_node() and then updates the entity if
100  * it's a local one.  env must be a pointer of the frame type of the
101  * inlined procedure. The new entities must be in the link field of
102  * the entities.
103  */
104 static void copy_node_inline(ir_node *node, void *env)
105 {
106         ir_graph *new_irg  = (ir_graph*) env;
107         ir_node  *new_node = irn_copy_into_irg(node, new_irg);
108
109         set_new_node(node, new_node);
110         if (is_Sel(node)) {
111                 ir_graph  *old_irg        = get_irn_irg(node);
112                 ir_type   *old_frame_type = get_irg_frame_type(old_irg);
113                 ir_entity *old_entity     = get_Sel_entity(node);
114                 assert(is_Sel(new_node));
115                 /* use copied entities from the new frame */
116                 if (get_entity_owner(old_entity) == old_frame_type) {
117                         ir_entity *new_entity = (ir_entity*)get_entity_link(old_entity);
118                         assert(new_entity != NULL);
119                         set_Sel_entity(new_node, new_entity);
120                 }
121         } else if (is_Block(new_node)) {
122                 new_node->attr.block.irg.irg = new_irg;
123         }
124 }
125
126 static void set_preds_inline(ir_node *node, void *env)
127 {
128         ir_node *new_node;
129
130         irn_rewire_inputs(node);
131
132         /* move constants into start block */
133         new_node = get_new_node(node);
134         if (is_irn_constlike(new_node)) {
135                 ir_graph *new_irg     = (ir_graph *) env;
136                 ir_node  *start_block = get_irg_start_block(new_irg);
137                 set_nodes_block(new_node, start_block);
138         }
139 }
140
141 /**
142  * Walker: checks if P_value_arg_base is used.
143  */
144 static void find_addr(ir_node *node, void *env)
145 {
146         bool *allow_inline = (bool*)env;
147
148         if (is_Block(node) && get_Block_entity(node)) {
149                 /**
150                  * Currently we can't handle blocks whose address was taken correctly
151                  * when inlining
152                  */
153                 *allow_inline = false;
154         } else if (is_Sel(node)) {
155                 ir_graph *irg = current_ir_graph;
156                 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
157                         /* access to frame */
158                         ir_entity *ent = get_Sel_entity(node);
159                         if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
160                                 /* access to value_type */
161                                 *allow_inline = false;
162                         }
163                         if (is_parameter_entity(ent)) {
164                                 *allow_inline = false;
165                         }
166                 }
167         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
168                 /* From GCC:
169                  * Refuse to inline alloca call unless user explicitly forced so as this
170                  * may change program's memory overhead drastically when the function
171                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
172                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
173                  *
174                  * Sorrily this is true with our implementation also.
175                  * Moreover, we cannot differentiate between alloca() and VLA yet, so
176                  * this disables inlining of functions using VLA (which are completely
177                  * save).
178                  *
179                  * 2 Solutions:
180                  * - add a flag to the Alloc node for "real" alloca() calls
181                  * - add a new Stack-Restore node at the end of a function using
182                  *   alloca()
183                  */
184                 *allow_inline = false;
185         }
186 }
187
188 /**
189  * Check if we can inline a given call.
190  * Currently, we cannot inline two cases:
191  * - call with compound arguments
192  * - graphs that take the address of a parameter
193  *
194  * check these conditions here
195  */
196 static bool can_inline(ir_node *call, ir_graph *called_graph)
197 {
198         ir_entity          *called      = get_irg_entity(called_graph);
199         ir_type            *called_type = get_entity_type(called);
200         ir_type            *call_type   = get_Call_type(call);
201         size_t              n_params    = get_method_n_params(called_type);
202         size_t              n_arguments = get_method_n_params(call_type);
203         size_t              n_res       = get_method_n_ress(called_type);
204         irg_inline_property prop        = get_irg_inline_property(called_graph);
205         size_t              i;
206         bool                res;
207
208         if (prop == irg_inline_forbidden)
209                 return false;
210
211         if (n_arguments != n_params) {
212                 /* this is a bad feature of C: without a prototype, we can
213                  * call a function with less parameters than needed. Currently
214                  * we don't support this, although we could use Unknown than. */
215                 return false;
216         }
217         if (n_res != get_method_n_ress(call_type)) {
218                 return false;
219         }
220
221         /* Argh, compiling C has some bad consequences:
222          * It is implementation dependent what happens in that case.
223          * We support inlining, if the bitsize of the types matches AND
224          * the same arithmetic is used. */
225         for (i = 0; i < n_params; ++i) {
226                 ir_type *param_tp = get_method_param_type(called_type, i);
227                 ir_type *arg_tp   = get_method_param_type(call_type, i);
228
229                 if (param_tp != arg_tp) {
230                         ir_mode *pmode = get_type_mode(param_tp);
231                         ir_mode *amode = get_type_mode(arg_tp);
232
233                         if (pmode == NULL || amode == NULL)
234                                 return false;
235                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
236                                 return false;
237                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
238                                 return false;
239                         /* otherwise we can simply "reinterpret" the bits */
240                 }
241         }
242         for (i = 0; i < n_res; ++i) {
243                 ir_type *decl_res_tp = get_method_res_type(called_type, i);
244                 ir_type *used_res_tp = get_method_res_type(call_type, i);
245
246                 if (decl_res_tp != used_res_tp) {
247                         ir_mode *decl_mode = get_type_mode(decl_res_tp);
248                         ir_mode *used_mode = get_type_mode(used_res_tp);
249                         if (decl_mode == NULL || used_mode == NULL)
250                                 return false;
251                         if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
252                                 return false;
253                         if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
254                                 return false;
255                         /* otherwise we can "reinterpret" the bits */
256                 }
257         }
258
259         /* check parameters for compound arguments */
260         for (i = 0; i < n_params; ++i) {
261                 ir_type *p_type = get_method_param_type(call_type, i);
262
263                 if (is_compound_type(p_type))
264                         return false;
265         }
266
267         /* check results for compound arguments */
268         for (i = 0; i < n_res; ++i) {
269                 ir_type *r_type = get_method_res_type(call_type, i);
270
271                 if (is_compound_type(r_type))
272                         return false;
273         }
274
275         res = true;
276         irg_walk_graph(called_graph, find_addr, NULL, &res);
277
278         return res;
279 }
280
281 enum exc_mode {
282         exc_handler,    /**< There is a handler. */
283         exc_no_handler  /**< Exception handling not represented. */
284 };
285
286 /**
287  * copy all entities on the stack frame on 1 irg to the stackframe of another.
288  * Sets entity links of the old entities to the copies
289  */
290 static void copy_frame_entities(ir_graph *from, ir_graph *to)
291 {
292         ir_type *from_frame = get_irg_frame_type(from);
293         ir_type *to_frame   = get_irg_frame_type(to);
294         size_t   n_members  = get_class_n_members(from_frame);
295         size_t   i;
296         assert(from_frame != to_frame);
297
298         for (i = 0; i < n_members; ++i) {
299                 ir_entity *old_ent = get_class_member(from_frame, i);
300                 ir_entity *new_ent = copy_entity_own(old_ent, to_frame);
301                 set_entity_link(old_ent, new_ent);
302                 assert (!is_parameter_entity(old_ent));
303         }
304 }
305
306 /* Inlines a method at the given call site. */
307 int inline_method(ir_node *call, ir_graph *called_graph)
308 {
309         ir_node       *pre_call;
310         ir_node       *post_call, *post_bl;
311         ir_node       *in[pn_Start_max+1];
312         ir_node       *end, *end_bl, *block;
313         ir_node       **res_pred;
314         ir_node       **cf_pred;
315         ir_node       **args_in;
316         ir_node       *ret, *phi;
317         int           arity, n_ret, n_exc, n_res, i, j, rem_opt;
318         int           irn_arity, n_params;
319         int           n_mem_phi;
320         enum exc_mode exc_handling;
321         ir_type       *mtp;
322         ir_type       *ctp;
323         ir_entity     *ent;
324         ir_graph      *rem;
325         ir_graph      *irg = get_irn_irg(call);
326
327         /* we cannot inline some types of calls */
328         if (! can_inline(call, called_graph))
329                 return 0;
330
331         /* We cannot inline a recursive call. The graph must be copied before
332          * the call the inline_method() using create_irg_copy(). */
333         if (called_graph == irg)
334                 return 0;
335
336         ent      = get_irg_entity(called_graph);
337         mtp      = get_entity_type(ent);
338         ctp      = get_Call_type(call);
339         n_params = get_method_n_params(mtp);
340         n_res    = get_method_n_ress(mtp);
341
342         rem = current_ir_graph;
343         current_ir_graph = irg;
344
345         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
346
347         /* optimizations can cause problems when allocating new nodes */
348         rem_opt = get_opt_optimize();
349         set_optimize(0);
350
351         /* Handle graph state */
352         assert(get_irg_phase_state(irg) != phase_building);
353         assert(get_irg_pinned(irg) == op_pin_state_pinned);
354         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
355         clear_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_DOMINANCE
356                            | IR_GRAPH_PROPERTY_CONSISTENT_ENTITY_USAGE);
357         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
358         clear_irg_properties(irg, IR_GRAPH_PROPERTY_CONSISTENT_ENTITY_USAGE);
359         edges_deactivate(irg);
360
361         /* here we know we WILL inline, so inform the statistics */
362         hook_inline(call, called_graph);
363
364         /* -- Decide how to handle exception control flow: Is there a handler
365            for the Call node, or do we branch directly to End on an exception?
366            exc_handling:
367            0 There is a handler.
368            2 Exception handling not represented in Firm. -- */
369         {
370                 ir_node *Xproj = NULL;
371                 ir_node *proj;
372                 for (proj = (ir_node*)get_irn_link(call); proj != NULL;
373                      proj = (ir_node*)get_irn_link(proj)) {
374                         long proj_nr = get_Proj_proj(proj);
375                         if (proj_nr == pn_Call_X_except) Xproj = proj;
376                 }
377                 exc_handling = Xproj != NULL ? exc_handler : exc_no_handler;
378         }
379
380         /* create the argument tuple */
381         args_in = ALLOCAN(ir_node*, n_params);
382
383         block = get_nodes_block(call);
384         for (i = n_params - 1; i >= 0; --i) {
385                 ir_node *arg      = get_Call_param(call, i);
386                 ir_type *param_tp = get_method_param_type(mtp, i);
387                 ir_mode *mode     = get_type_mode(param_tp);
388
389                 if (mode != get_irn_mode(arg)) {
390                         arg = new_r_Conv(block, arg, mode);
391                 }
392                 args_in[i] = arg;
393         }
394
395         /* the procedure and later replaces the Start node of the called graph.
396          * Post_call is the old Call node and collects the results of the called
397          * graph. Both will end up being a tuple. */
398         post_bl = get_nodes_block(call);
399         /* XxMxPxPxPxT of Start + parameter of Call */
400         in[pn_Start_M]              = get_Call_mem(call);
401         in[pn_Start_X_initial_exec] = new_r_Jmp(post_bl);
402         in[pn_Start_P_frame_base]   = get_irg_frame(irg);
403         in[pn_Start_T_args]         = new_r_Tuple(post_bl, n_params, args_in);
404         pre_call = new_r_Tuple(post_bl, pn_Start_max+1, in);
405         post_call = call;
406
407         /* --
408            The new block gets the ins of the old block, pre_call and all its
409            predecessors and all Phi nodes. -- */
410         part_block(pre_call);
411
412         /* increment visited flag for later walk */
413         inc_irg_visited(called_graph);
414
415         /* link some nodes to nodes in the current graph so instead of copying
416          * the linked nodes will get used.
417          * So the copier will use the created Tuple instead of copying the start
418          * node, similar for singleton nodes like NoMem and Bad.
419          * Note: this will prohibit predecessors to be copied - only do it for
420          *       nodes without predecessors */
421         {
422                 ir_node *start_block;
423                 ir_node *start;
424                 ir_node *nomem;
425
426                 start_block = get_irg_start_block(called_graph);
427                 set_new_node(start_block, get_nodes_block(pre_call));
428                 mark_irn_visited(start_block);
429
430                 start = get_irg_start(called_graph);
431                 set_new_node(start, pre_call);
432                 mark_irn_visited(start);
433
434                 nomem = get_irg_no_mem(called_graph);
435                 set_new_node(nomem, get_irg_no_mem(irg));
436                 mark_irn_visited(nomem);
437         }
438
439         /* entitiy link is used to link entities on old stackframe to the
440          * new stackframe */
441         irp_reserve_resources(irp, IRP_RESOURCE_ENTITY_LINK);
442
443         /* copy entities and nodes */
444         assert(!irn_visited(get_irg_end(called_graph)));
445         copy_frame_entities(called_graph, irg);
446         irg_walk_core(get_irg_end(called_graph), copy_node_inline, set_preds_inline,
447                       irg);
448
449         irp_free_resources(irp, IRP_RESOURCE_ENTITY_LINK);
450
451         /* -- Merge the end of the inlined procedure with the call site -- */
452         /* We will turn the old Call node into a Tuple with the following
453            predecessors:
454            -1:  Block of Tuple.
455            0: Phi of all Memories of Return statements.
456            1: Jmp from new Block that merges the control flow from all exception
457            predecessors of the old end block.
458            2: Tuple of all arguments.
459            3: Phi of Exception memories.
460            In case the old Call directly branches to End on an exception we don't
461            need the block merging all exceptions nor the Phi of the exception
462            memories.
463         */
464
465         /* Precompute some values */
466         end_bl = get_new_node(get_irg_end_block(called_graph));
467         end    = get_new_node(get_irg_end(called_graph));
468         arity  = get_Block_n_cfgpreds(end_bl);    /* arity = n_exc + n_ret  */
469         n_res  = get_method_n_ress(get_Call_type(call));
470
471         res_pred = XMALLOCN(ir_node*, n_res);
472         cf_pred  = XMALLOCN(ir_node*, arity);
473
474         /* archive keepalives */
475         irn_arity = get_irn_arity(end);
476         for (i = 0; i < irn_arity; i++) {
477                 ir_node *ka = get_End_keepalive(end, i);
478                 if (! is_Bad(ka))
479                         add_End_keepalive(get_irg_end(irg), ka);
480         }
481
482         /* replace Return nodes by Jump nodes */
483         n_ret = 0;
484         for (i = 0; i < arity; i++) {
485                 ir_node *ret;
486                 ret = get_Block_cfgpred(end_bl, i);
487                 if (is_Return(ret)) {
488                         ir_node *block = get_nodes_block(ret);
489                         cf_pred[n_ret] = new_r_Jmp(block);
490                         n_ret++;
491                 }
492         }
493         set_irn_in(post_bl, n_ret, cf_pred);
494
495         /* build a Tuple for all results of the method.
496          * add Phi node if there was more than one Return. */
497         turn_into_tuple(post_call, pn_Call_max+1);
498         /* First the Memory-Phi */
499         n_mem_phi = 0;
500         for (i = 0; i < arity; i++) {
501                 ret = get_Block_cfgpred(end_bl, i);
502                 if (is_Return(ret)) {
503                         cf_pred[n_mem_phi++] = get_Return_mem(ret);
504                 }
505                 /* memory output for some exceptions is directly connected to End */
506                 if (is_Call(ret)) {
507                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 3);
508                 } else if (is_fragile_op(ret)) {
509                         /* We rely that all cfops have the memory output at the same position. */
510                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 0);
511                 } else if (is_Raise(ret)) {
512                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 1);
513                 }
514         }
515         phi = new_r_Phi(post_bl, n_mem_phi, cf_pred, mode_M);
516         set_Tuple_pred(call, pn_Call_M, phi);
517         /* Conserve Phi-list for further inlinings -- but might be optimized */
518         if (get_nodes_block(phi) == post_bl) {
519                 set_irn_link(phi, get_irn_link(post_bl));
520                 set_irn_link(post_bl, phi);
521         }
522         /* Now the real results */
523         if (n_res > 0) {
524                 ir_node *result_tuple;
525                 for (j = 0; j < n_res; j++) {
526                         ir_type *res_type = get_method_res_type(ctp, j);
527                         ir_mode *res_mode = get_type_mode(res_type);
528                         n_ret = 0;
529                         for (i = 0; i < arity; i++) {
530                                 ret = get_Block_cfgpred(end_bl, i);
531                                 if (is_Return(ret)) {
532                                         ir_node *res = get_Return_res(ret, j);
533                                         if (get_irn_mode(res) != res_mode) {
534                                                 ir_node *block = get_nodes_block(res);
535                                                 res = new_r_Conv(block, res, res_mode);
536                                         }
537                                         cf_pred[n_ret] = res;
538                                         n_ret++;
539                                 }
540                         }
541                         if (n_ret > 0) {
542                                 phi = new_r_Phi(post_bl, n_ret, cf_pred, res_mode);
543                         } else {
544                                 phi = new_r_Bad(irg, res_mode);
545                         }
546                         res_pred[j] = phi;
547                         /* Conserve Phi-list for further inlinings -- but might be optimized */
548                         if (get_nodes_block(phi) == post_bl) {
549                                 set_Phi_next(phi, get_Block_phis(post_bl));
550                                 set_Block_phis(post_bl, phi);
551                         }
552                 }
553                 result_tuple = new_r_Tuple(post_bl, n_res, res_pred);
554                 set_Tuple_pred(call, pn_Call_T_result, result_tuple);
555         } else {
556                 set_Tuple_pred(call, pn_Call_T_result, new_r_Bad(irg, mode_T));
557         }
558         /* handle the regular call */
559         set_Tuple_pred(call, pn_Call_X_regular, new_r_Jmp(post_bl));
560
561         /* Finally the exception control flow.
562            We have two possible situations:
563            First if the Call branches to an exception handler:
564            We need to add a Phi node to
565            collect the memory containing the exception objects.  Further we need
566            to add another block to get a correct representation of this Phi.  To
567            this block we add a Jmp that resolves into the X output of the Call
568            when the Call is turned into a tuple.
569            Second: There is no exception edge. Just add all inlined exception
570            branches to the End node.
571          */
572         if (exc_handling == exc_handler) {
573                 n_exc = 0;
574                 for (i = 0; i < arity; i++) {
575                         ir_node *ret, *irn;
576                         ret = get_Block_cfgpred(end_bl, i);
577                         irn = skip_Proj(ret);
578                         if (is_fragile_op(irn) || is_Raise(irn)) {
579                                 cf_pred[n_exc] = ret;
580                                 ++n_exc;
581                         }
582                 }
583                 if (n_exc > 0) {
584                         if (n_exc == 1) {
585                                 /* simple fix */
586                                 set_Tuple_pred(call, pn_Call_X_except, cf_pred[0]);
587                         } else {
588                                 ir_node *block = new_r_Block(irg, n_exc, cf_pred);
589                                 set_Tuple_pred(call, pn_Call_X_except, new_r_Jmp(block));
590                         }
591                 } else {
592                         set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
593                 }
594         } else {
595                 ir_node *main_end_bl;
596                 int main_end_bl_arity;
597                 ir_node **end_preds;
598
599                 /* assert(exc_handling == 1 || no exceptions. ) */
600                 n_exc = 0;
601                 for (i = 0; i < arity; i++) {
602                         ir_node *ret = get_Block_cfgpred(end_bl, i);
603                         ir_node *irn = skip_Proj(ret);
604
605                         if (is_fragile_op(irn) || is_Raise(irn)) {
606                                 cf_pred[n_exc] = ret;
607                                 n_exc++;
608                         }
609                 }
610                 main_end_bl       = get_irg_end_block(irg);
611                 main_end_bl_arity = get_irn_arity(main_end_bl);
612                 end_preds         = XMALLOCN(ir_node*, n_exc + main_end_bl_arity);
613
614                 for (i = 0; i < main_end_bl_arity; ++i)
615                         end_preds[i] = get_irn_n(main_end_bl, i);
616                 for (i = 0; i < n_exc; ++i)
617                         end_preds[main_end_bl_arity + i] = cf_pred[i];
618                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
619                 set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
620                 free(end_preds);
621         }
622         free(res_pred);
623         free(cf_pred);
624
625         /* --  Turn CSE back on. -- */
626         set_optimize(rem_opt);
627         current_ir_graph = rem;
628
629         return 1;
630 }
631
632 /********************************************************************/
633 /* Apply inlining to small methods.                                 */
634 /********************************************************************/
635
636 static struct obstack  temp_obst;
637
638 /** Represents a possible inlinable call in a graph. */
639 typedef struct call_entry {
640         ir_node    *call;       /**< The Call node. */
641         ir_graph   *callee;     /**< The callee IR-graph. */
642         list_head  list;        /**< List head for linking the next one. */
643         int        loop_depth;  /**< The loop depth of this call. */
644         int        benefice;    /**< The calculated benefice of this call. */
645         unsigned   local_adr:1; /**< Set if this call gets an address of a local variable. */
646         unsigned   all_const:1; /**< Set if this call has only constant parameters. */
647 } call_entry;
648
649 /**
650  * environment for inlining small irgs
651  */
652 typedef struct inline_env_t {
653         struct obstack obst;  /**< An obstack where call_entries are allocated on. */
654         list_head      calls; /**< The call entry list. */
655 } inline_env_t;
656
657 /**
658  * Returns the irg called from a Call node. If the irg is not
659  * known, NULL is returned.
660  *
661  * @param call  the call node
662  */
663 static ir_graph *get_call_called_irg(ir_node *call)
664 {
665         ir_node *addr;
666
667         addr = get_Call_ptr(call);
668         if (is_SymConst_addr_ent(addr)) {
669                 ir_entity *ent = get_SymConst_entity(addr);
670                 /* we don't know which function gets finally bound to a weak symbol */
671                 if (get_entity_linkage(ent) & IR_LINKAGE_WEAK)
672                         return NULL;
673
674                 return get_entity_irg(ent);
675         }
676
677         return NULL;
678 }
679
680 /**
681  * Walker: Collect all calls to known graphs inside a graph.
682  */
683 static void collect_calls(ir_node *call, void *env)
684 {
685         (void) env;
686         if (is_Call(call)) {
687                 ir_graph *called_irg = get_call_called_irg(call);
688
689                 if (called_irg != NULL) {
690                         /* The Call node calls a locally defined method.  Remember to inline. */
691                         inline_env_t *ienv  = (inline_env_t*)env;
692                         call_entry   *entry = OALLOC(&ienv->obst, call_entry);
693                         entry->call       = call;
694                         entry->callee     = called_irg;
695                         entry->loop_depth = 0;
696                         entry->benefice   = 0;
697                         entry->local_adr  = 0;
698                         entry->all_const  = 0;
699
700                         list_add_tail(&entry->list, &ienv->calls);
701                 }
702         }
703 }
704
705 /**
706  * Inlines all small methods at call sites where the called address comes
707  * from a Const node that references the entity representing the called
708  * method.
709  * The size argument is a rough measure for the code size of the method:
710  * Methods where the obstack containing the firm graph is smaller than
711  * size are inlined.
712  */
713 void inline_small_irgs(ir_graph *irg, int size)
714 {
715         ir_graph *rem = current_ir_graph;
716         inline_env_t env;
717         call_entry *entry;
718
719         current_ir_graph = irg;
720         /* Handle graph state */
721         assert(get_irg_phase_state(irg) != phase_building);
722         free_callee_info(irg);
723
724         /* Find Call nodes to inline.
725            (We can not inline during a walk of the graph, as inlining the same
726            method several times changes the visited flag of the walked graph:
727            after the first inlining visited of the callee equals visited of
728            the caller.  With the next inlining both are increased.) */
729         obstack_init(&env.obst);
730         INIT_LIST_HEAD(&env.calls);
731         irg_walk_graph(irg, NULL, collect_calls, &env);
732
733         if (! list_empty(&env.calls)) {
734                 /* There are calls to inline */
735                 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
736                 collect_phiprojs(irg);
737
738                 list_for_each_entry(call_entry, entry, &env.calls, list) {
739                         ir_graph            *callee = entry->callee;
740                         irg_inline_property prop    = get_irg_inline_property(callee);
741
742                         if (prop == irg_inline_forbidden) {
743                                 continue;
744                         }
745
746                         if (prop >= irg_inline_forced ||
747                             _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
748                                 inline_method(entry->call, callee);
749                         }
750                 }
751                 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
752         }
753         obstack_free(&env.obst, NULL);
754         current_ir_graph = rem;
755 }
756
757 typedef struct inline_small_irgs_pass_t {
758         ir_graph_pass_t pass;
759         int            size;
760 } inline_small_irgs_pass_t;
761
762 /**
763  * Wrapper to run inline_small_irgs() as a pass.
764  */
765 static int inline_small_irgs_wrapper(ir_graph *irg, void *context)
766 {
767         inline_small_irgs_pass_t *pass = (inline_small_irgs_pass_t*)context;
768
769         inline_small_irgs(irg, pass->size);
770         return 0;
771 }
772
773 /* create a pass for inline_small_irgs() */
774 ir_graph_pass_t *inline_small_irgs_pass(const char *name, int size)
775 {
776         inline_small_irgs_pass_t *pass = XMALLOCZ(inline_small_irgs_pass_t);
777
778         pass->size = size;
779         return def_graph_pass_constructor(
780                 &pass->pass, name ? name : "inline_small_irgs", inline_small_irgs_wrapper);
781 }
782
783 /**
784  * Environment for inlining irgs.
785  */
786 typedef struct {
787         list_head calls;             /**< List of of all call nodes in this graph. */
788         unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
789         unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
790         unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
791         unsigned  n_nodes_orig;      /**< for statistics */
792         unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
793         unsigned  n_call_nodes_orig; /**< for statistics */
794         unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
795         unsigned  n_callers_orig;    /**< for statistics */
796         unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
797         unsigned  recursive:1;       /**< Set, if this function is self recursive. */
798 } inline_irg_env;
799
800 /**
801  * Allocate a new environment for inlining.
802  */
803 static inline_irg_env *alloc_inline_irg_env(void)
804 {
805         inline_irg_env *env    = OALLOC(&temp_obst, inline_irg_env);
806         INIT_LIST_HEAD(&env->calls);
807         env->local_weights     = NULL;
808         env->n_nodes           = -2; /* do not count count Start, End */
809         env->n_blocks          = -2; /* do not count count Start, End Block */
810         env->n_nodes_orig      = -2; /* do not count Start, End */
811         env->n_call_nodes      = 0;
812         env->n_call_nodes_orig = 0;
813         env->n_callers         = 0;
814         env->n_callers_orig    = 0;
815         env->got_inline        = 0;
816         env->recursive         = 0;
817         return env;
818 }
819
820 typedef struct walker_env {
821         inline_irg_env *x;     /**< the inline environment */
822         char ignore_runtime;   /**< the ignore runtime flag */
823         char ignore_callers;   /**< if set, do change callers data */
824 } wenv_t;
825
826 /**
827  * post-walker: collect all calls in the inline-environment
828  * of a graph and sum some statistics.
829  */
830 static void collect_calls2(ir_node *call, void *ctx)
831 {
832         wenv_t         *env = (wenv_t*)ctx;
833         inline_irg_env *x = env->x;
834         unsigned        code = get_irn_opcode(call);
835         ir_graph       *callee;
836         call_entry     *entry;
837
838         /* count meaningful nodes in irg */
839         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
840                 if (code != iro_Block) {
841                         ++x->n_nodes;
842                         ++x->n_nodes_orig;
843                 } else {
844                         ++x->n_blocks;
845                 }
846         }
847
848         if (code != iro_Call) return;
849
850         /* check, if it's a runtime call */
851         if (env->ignore_runtime) {
852                 ir_node *symc = get_Call_ptr(call);
853
854                 if (is_SymConst_addr_ent(symc)) {
855                         ir_entity *ent = get_SymConst_entity(symc);
856
857                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
858                                 return;
859                 }
860         }
861
862         /* collect all call nodes */
863         ++x->n_call_nodes;
864         ++x->n_call_nodes_orig;
865
866         callee = get_call_called_irg(call);
867         if (callee != NULL) {
868                 if (! env->ignore_callers) {
869                         inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
870                         /* count all static callers */
871                         ++callee_env->n_callers;
872                         ++callee_env->n_callers_orig;
873                 }
874                 if (callee == current_ir_graph)
875                         x->recursive = 1;
876
877                 /* link it in the list of possible inlinable entries */
878                 entry = OALLOC(&temp_obst, call_entry);
879                 entry->call       = call;
880                 entry->callee     = callee;
881                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
882                 entry->benefice   = 0;
883                 entry->local_adr  = 0;
884                 entry->all_const  = 0;
885
886                 list_add_tail(&entry->list, &x->calls);
887         }
888 }
889
890 /**
891  * Returns TRUE if the number of callers is 0 in the irg's environment,
892  * hence this irg is a leaf.
893  */
894 inline static int is_leaf(ir_graph *irg)
895 {
896         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
897         return env->n_call_nodes == 0;
898 }
899
900 /**
901  * Returns TRUE if the number of nodes in the callee is
902  * smaller then size in the irg's environment.
903  */
904 inline static int is_smaller(ir_graph *callee, unsigned size)
905 {
906         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
907         return env->n_nodes < size;
908 }
909
910 /**
911  * Duplicate a call entry.
912  *
913  * @param entry     the original entry to duplicate
914  * @param new_call  the new call node
915  * @param loop_depth_delta
916  *                  delta value for the loop depth
917  */
918 static call_entry *duplicate_call_entry(const call_entry *entry,
919                                         ir_node *new_call, int loop_depth_delta)
920 {
921         call_entry *nentry = OALLOC(&temp_obst, call_entry);
922         nentry->call       = new_call;
923         nentry->callee     = entry->callee;
924         nentry->benefice   = entry->benefice;
925         nentry->loop_depth = entry->loop_depth + loop_depth_delta;
926         nentry->local_adr  = entry->local_adr;
927         nentry->all_const  = entry->all_const;
928
929         return nentry;
930 }
931
932 /**
933  * Append all call nodes of the source environment to the nodes of in the destination
934  * environment.
935  *
936  * @param dst         destination environment
937  * @param src         source environment
938  * @param loop_depth  the loop depth of the call that is replaced by the src list
939  */
940 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth)
941 {
942         call_entry *entry, *nentry;
943
944         /* Note that the src list points to Call nodes in the inlined graph, but
945            we need Call nodes in our graph. Luckily the inliner leaves this information
946            in the link field. */
947         list_for_each_entry(call_entry, entry, &src->calls, list) {
948                 nentry = duplicate_call_entry(entry, (ir_node*)get_irn_link(entry->call), loop_depth);
949                 list_add_tail(&nentry->list, &dst->calls);
950         }
951         dst->n_call_nodes += src->n_call_nodes;
952         dst->n_nodes      += src->n_nodes;
953 }
954
955 /*
956  * Inlines small leaf methods at call sites where the called address comes
957  * from a Const node that references the entity representing the called
958  * method.
959  * The size argument is a rough measure for the code size of the method:
960  * Methods where the obstack containing the firm graph is smaller than
961  * size are inlined.
962  */
963 void inline_leaf_functions(unsigned maxsize, unsigned leafsize,
964                            unsigned size, int ignore_runtime)
965 {
966         inline_irg_env   *env;
967         ir_graph         *irg;
968         size_t           i, n_irgs;
969         ir_graph         *rem;
970         int              did_inline;
971         wenv_t           wenv;
972         call_entry       *entry, *next;
973         const call_entry *centry;
974         pmap             *copied_graphs;
975         pmap_entry       *pm_entry;
976
977         rem = current_ir_graph;
978         obstack_init(&temp_obst);
979
980         /* a map for the copied graphs, used to inline recursive calls */
981         copied_graphs = pmap_create();
982
983         /* extend all irgs by a temporary data structure for inlining. */
984         n_irgs = get_irp_n_irgs();
985         for (i = 0; i < n_irgs; ++i)
986                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
987
988         /* Pre-compute information in temporary data structure. */
989         wenv.ignore_runtime = ignore_runtime;
990         wenv.ignore_callers = 0;
991         for (i = 0; i < n_irgs; ++i) {
992                 ir_graph *irg = get_irp_irg(i);
993
994                 assert(get_irg_phase_state(irg) != phase_building);
995                 free_callee_info(irg);
996
997                 assure_irg_properties(irg,
998                         IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO);
999                 wenv.x = (inline_irg_env*)get_irg_link(irg);
1000                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1001                 confirm_irg_properties(irg, IR_GRAPH_PROPERTIES_ALL);
1002         }
1003
1004         /* -- and now inline. -- */
1005
1006         /* Inline leafs recursively -- we might construct new leafs. */
1007         do {
1008                 did_inline = 0;
1009
1010                 for (i = 0; i < n_irgs; ++i) {
1011                         ir_node *call;
1012                         int phiproj_computed = 0;
1013
1014                         current_ir_graph = get_irp_irg(i);
1015                         env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1016
1017                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1018                         list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1019                                 ir_graph            *callee;
1020                                 irg_inline_property  prop;
1021
1022                                 if (env->n_nodes > maxsize)
1023                                         break;
1024
1025                                 call   = entry->call;
1026                                 callee = entry->callee;
1027
1028                                 prop = get_irg_inline_property(callee);
1029                                 if (prop == irg_inline_forbidden) {
1030                                         continue;
1031                                 }
1032
1033                                 if (is_leaf(callee) && (
1034                                     is_smaller(callee, leafsize) || prop >= irg_inline_forced)) {
1035                                         if (!phiproj_computed) {
1036                                                 phiproj_computed = 1;
1037                                                 collect_phiprojs(current_ir_graph);
1038                                         }
1039                                         did_inline = inline_method(call, callee);
1040
1041                                         if (did_inline) {
1042                                                 inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
1043
1044                                                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1045                                                 phiproj_computed = 0;
1046
1047                                                 /* Do some statistics */
1048                                                 env->got_inline = 1;
1049                                                 --env->n_call_nodes;
1050                                                 env->n_nodes += callee_env->n_nodes;
1051                                                 --callee_env->n_callers;
1052
1053                                                 /* remove this call from the list */
1054                                                 list_del(&entry->list);
1055                                                 continue;
1056                                         }
1057                                 }
1058                         }
1059                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1060                 }
1061         } while (did_inline);
1062
1063         /* inline other small functions. */
1064         for (i = 0; i < n_irgs; ++i) {
1065                 ir_node *call;
1066                 int phiproj_computed = 0;
1067
1068                 current_ir_graph = get_irp_irg(i);
1069                 env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1070
1071                 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1072
1073                 /* note that the list of possible calls is updated during the process */
1074                 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1075                         irg_inline_property prop;
1076                         ir_graph            *callee;
1077                         ir_graph            *calleee;
1078
1079                         call   = entry->call;
1080                         callee = entry->callee;
1081
1082                         prop = get_irg_inline_property(callee);
1083                         if (prop == irg_inline_forbidden) {
1084                                 continue;
1085                         }
1086
1087                         calleee = pmap_get(ir_graph, copied_graphs, callee);
1088                         if (calleee != NULL) {
1089                                 /*
1090                                  * Remap callee if we have a copy.
1091                                  * FIXME: Should we do this only for recursive Calls ?
1092                                  */
1093                                 callee = calleee;
1094                         }
1095
1096                         if (prop >= irg_inline_forced ||
1097                             (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1098                                 if (current_ir_graph == callee) {
1099                                         /*
1100                                          * Recursive call: we cannot directly inline because we cannot walk
1101                                          * the graph and change it. So we have to make a copy of the graph
1102                                          * first.
1103                                          */
1104
1105                                         inline_irg_env *callee_env;
1106                                         ir_graph       *copy;
1107
1108                                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1109
1110                                         /*
1111                                          * No copy yet, create one.
1112                                          * Note that recursive methods are never leafs, so it is sufficient
1113                                          * to test this condition here.
1114                                          */
1115                                         copy = create_irg_copy(callee);
1116
1117                                         /* create_irg_copy() destroys the Proj links, recompute them */
1118                                         phiproj_computed = 0;
1119
1120                                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1121
1122                                         /* allocate new environment */
1123                                         callee_env = alloc_inline_irg_env();
1124                                         set_irg_link(copy, callee_env);
1125
1126                                         assure_irg_properties(copy,
1127                                                 IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO);
1128                                         wenv.x              = callee_env;
1129                                         wenv.ignore_callers = 1;
1130                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1131
1132                                         /*
1133                                          * Enter the entity of the original graph. This is needed
1134                                          * for inline_method(). However, note that ent->irg still points
1135                                          * to callee, NOT to copy.
1136                                          */
1137                                         set_irg_entity(copy, get_irg_entity(callee));
1138
1139                                         pmap_insert(copied_graphs, callee, copy);
1140                                         callee = copy;
1141
1142                                         /* we have only one caller: the original graph */
1143                                         callee_env->n_callers      = 1;
1144                                         callee_env->n_callers_orig = 1;
1145                                 }
1146                                 if (! phiproj_computed) {
1147                                         phiproj_computed = 1;
1148                                         collect_phiprojs(current_ir_graph);
1149                                 }
1150                                 did_inline = inline_method(call, callee);
1151                                 if (did_inline) {
1152                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1153
1154                                         /* call was inlined, Phi/Projs for current graph must be recomputed */
1155                                         phiproj_computed = 0;
1156
1157                                         /* callee was inline. Append its call list. */
1158                                         env->got_inline = 1;
1159                                         --env->n_call_nodes;
1160                                         append_call_list(env, callee_env, entry->loop_depth);
1161                                         --callee_env->n_callers;
1162
1163                                         /* after we have inlined callee, all called methods inside callee
1164                                            are now called once more */
1165                                         list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1166                                                 inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1167                                                 ++penv->n_callers;
1168                                         }
1169
1170                                         /* remove this call from the list */
1171                                         list_del(&entry->list);
1172                                         continue;
1173                                 }
1174                         }
1175                 }
1176                 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1177         }
1178
1179         for (i = 0; i < n_irgs; ++i) {
1180                 irg = get_irp_irg(i);
1181                 env = (inline_irg_env*)get_irg_link(irg);
1182
1183                 if (env->got_inline) {
1184                         optimize_graph_df(irg);
1185                         optimize_cf(irg);
1186                 }
1187                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1188                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1189                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1190                         env->n_callers_orig, env->n_callers,
1191                         get_entity_name(get_irg_entity(irg))));
1192                 }
1193         }
1194
1195         /* kill the copied graphs: we don't need them anymore */
1196         foreach_pmap(copied_graphs, pm_entry) {
1197                 ir_graph *copy = (ir_graph*)pm_entry->value;
1198
1199                 /* reset the entity, otherwise it will be deleted in the next step ... */
1200                 set_irg_entity(copy, NULL);
1201                 free_ir_graph(copy);
1202         }
1203         pmap_destroy(copied_graphs);
1204
1205         obstack_free(&temp_obst, NULL);
1206         current_ir_graph = rem;
1207 }
1208
1209 typedef struct inline_leaf_functions_pass_t {
1210         ir_prog_pass_t pass;
1211         unsigned       maxsize;
1212         unsigned       leafsize;
1213         unsigned       size;
1214         int            ignore_runtime;
1215 } inline_leaf_functions_pass_t;
1216
1217 /**
1218  * Wrapper to run inline_leaf_functions() as a ir_prog pass.
1219  */
1220 static int inline_leaf_functions_wrapper(ir_prog *irp, void *context)
1221 {
1222         inline_leaf_functions_pass_t *pass = (inline_leaf_functions_pass_t*)context;
1223
1224         (void)irp;
1225         inline_leaf_functions(
1226                 pass->maxsize, pass->leafsize,
1227                 pass->size, pass->ignore_runtime);
1228         return 0;
1229 }
1230
1231 /* create a pass for inline_leaf_functions() */
1232 ir_prog_pass_t *inline_leaf_functions_pass(
1233         const char *name, unsigned maxsize, unsigned leafsize,
1234         unsigned size, int ignore_runtime)
1235 {
1236         inline_leaf_functions_pass_t *pass = XMALLOCZ(inline_leaf_functions_pass_t);
1237
1238         pass->maxsize        = maxsize;
1239         pass->leafsize       = leafsize;
1240         pass->size           = size;
1241         pass->ignore_runtime = ignore_runtime;
1242
1243         return def_prog_pass_constructor(
1244                 &pass->pass,
1245                 name ? name : "inline_leaf_functions",
1246                 inline_leaf_functions_wrapper);
1247 }
1248
1249 /**
1250  * Calculate the parameter weights for transmitting the address of a local variable.
1251  */
1252 static unsigned calc_method_local_weight(ir_node *arg)
1253 {
1254         int      i, j, k;
1255         unsigned v, weight = 0;
1256
1257         for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
1258                 ir_node *succ = get_irn_out(arg, i);
1259
1260                 switch (get_irn_opcode(succ)) {
1261                 case iro_Load:
1262                 case iro_Store:
1263                         /* Loads and Store can be removed */
1264                         weight += 3;
1265                         break;
1266                 case iro_Sel:
1267                         /* check if all args are constant */
1268                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1269                                 ir_node *idx = get_Sel_index(succ, j);
1270                                 if (! is_Const(idx))
1271                                         return 0;
1272                         }
1273                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1274                            some unsupported node. */
1275                         v = calc_method_local_weight(succ);
1276                         if (v == 0)
1277                                 return 0;
1278                         /* we can kill one Sel with constant indexes, this is cheap */
1279                         weight += v + 1;
1280                         break;
1281                 case iro_Id:
1282                         /* when looking backward we might find Id nodes */
1283                         weight += calc_method_local_weight(succ);
1284                         break;
1285                 case iro_Tuple:
1286                         /* unoptimized tuple */
1287                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1288                                 ir_node *pred = get_Tuple_pred(succ, j);
1289                                 if (pred == arg) {
1290                                         /* look for Proj(j) */
1291                                         for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
1292                                                 ir_node *succ_succ = get_irn_out(succ, k);
1293                                                 if (is_Proj(succ_succ)) {
1294                                                         if (get_Proj_proj(succ_succ) == j) {
1295                                                                 /* found */
1296                                                                 weight += calc_method_local_weight(succ_succ);
1297                                                         }
1298                                                 } else {
1299                                                         /* this should NOT happen */
1300                                                         return 0;
1301                                                 }
1302                                         }
1303                                 }
1304                         }
1305                         break;
1306                 default:
1307                         /* any other node: unsupported yet or bad. */
1308                         return 0;
1309                 }
1310         }
1311         return weight;
1312 }
1313
1314 /**
1315  * Calculate the parameter weights for transmitting the address of a local variable.
1316  */
1317 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg)
1318 {
1319         ir_entity *ent = get_irg_entity(irg);
1320         ir_type  *mtp;
1321         size_t   nparams;
1322         int      i;
1323         long     proj_nr;
1324         ir_node  *irg_args, *arg;
1325
1326         mtp      = get_entity_type(ent);
1327         nparams  = get_method_n_params(mtp);
1328
1329         /* allocate a new array. currently used as 'analysed' flag */
1330         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1331
1332         /* If the method haven't parameters we have nothing to do. */
1333         if (nparams <= 0)
1334                 return;
1335
1336         assure_irg_outs(irg);
1337         irg_args = get_irg_args(irg);
1338         for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
1339                 arg     = get_irn_out(irg_args, i);
1340                 proj_nr = get_Proj_proj(arg);
1341                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1342         }
1343 }
1344
1345 /**
1346  * Calculate the benefice for transmitting an local variable address.
1347  * After inlining, the local variable might be transformed into a
1348  * SSA variable by scalar_replacement().
1349  */
1350 static unsigned get_method_local_adress_weight(ir_graph *callee, size_t pos)
1351 {
1352         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
1353
1354         if (env->local_weights == NULL)
1355                 analyze_irg_local_weights(env, callee);
1356
1357         if (pos < ARR_LEN(env->local_weights))
1358                 return env->local_weights[pos];
1359         return 0;
1360 }
1361
1362 /**
1363  * Calculate a benefice value for inlining the given call.
1364  *
1365  * @param call       the call node we have to inspect
1366  * @param callee     the called graph
1367  */
1368 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
1369 {
1370         ir_node   *call = entry->call;
1371         ir_entity *ent  = get_irg_entity(callee);
1372         ir_type   *callee_frame;
1373         size_t    i, n_members, n_params;
1374         ir_node   *frame_ptr;
1375         ir_type   *mtp;
1376         int       weight = 0;
1377         int       all_const;
1378         unsigned  cc, v;
1379         irg_inline_property prop;
1380
1381         inline_irg_env *callee_env;
1382
1383         prop = get_irg_inline_property(callee);
1384         if (prop == irg_inline_forbidden) {
1385                 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
1386                     call, callee));
1387                 return entry->benefice = INT_MIN;
1388         }
1389
1390         callee_frame = get_irg_frame_type(callee);
1391         n_members = get_class_n_members(callee_frame);
1392         for (i = 0; i < n_members; ++i) {
1393                 ir_entity *frame_ent = get_class_member(callee_frame, i);
1394                 if (is_parameter_entity(frame_ent)) {
1395                         // TODO inliner should handle parameter entities by inserting Store operations
1396                         DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden due to parameter entity\n", call, callee));
1397                         set_irg_inline_property(callee, irg_inline_forbidden);
1398                         return entry->benefice = INT_MIN;
1399                 }
1400         }
1401
1402         if (get_irg_additional_properties(callee) & mtp_property_noreturn) {
1403                 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
1404                     call, callee));
1405                 return entry->benefice = INT_MIN;
1406         }
1407
1408         /* costs for every passed parameter */
1409         n_params = get_Call_n_params(call);
1410         mtp      = get_entity_type(ent);
1411         cc       = get_method_calling_convention(mtp);
1412         if (cc & cc_reg_param) {
1413                 /* register parameter, smaller costs for register parameters */
1414                 size_t max_regs = cc & ~cc_bits;
1415
1416                 if (max_regs < n_params)
1417                         weight += max_regs * 2 + (n_params - max_regs) * 5;
1418                 else
1419                         weight += n_params * 2;
1420         } else {
1421                 /* parameters are passed an stack */
1422                 weight += 5 * n_params;
1423         }
1424
1425         /* constant parameters improve the benefice */
1426         frame_ptr = get_irg_frame(current_ir_graph);
1427         all_const = 1;
1428         for (i = 0; i < n_params; ++i) {
1429                 ir_node *param = get_Call_param(call, i);
1430
1431                 if (is_Const(param)) {
1432                         weight += get_method_param_weight(ent, i);
1433                 } else {
1434                         all_const = 0;
1435                         if (is_SymConst(param))
1436                                 weight += get_method_param_weight(ent, i);
1437                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
1438                                 /*
1439                                  * An address of a local variable is transmitted. After
1440                                  * inlining, scalar_replacement might be able to remove the
1441                                  * local variable, so honor this.
1442                                  */
1443                                 v = get_method_local_adress_weight(callee, i);
1444                                 weight += v;
1445                                 if (v > 0)
1446                                         entry->local_adr = 1;
1447                         }
1448                 }
1449         }
1450         entry->all_const = all_const;
1451
1452         callee_env = (inline_irg_env*)get_irg_link(callee);
1453         if (callee_env->n_callers == 1 &&
1454             callee != current_ir_graph &&
1455             !entity_is_externally_visible(ent)) {
1456                 weight += 700;
1457         }
1458
1459         /* give a bonus for functions with one block */
1460         if (callee_env->n_blocks == 1)
1461                 weight = weight * 3 / 2;
1462
1463         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
1464         if (callee_env->n_nodes < 30 && !callee_env->recursive)
1465                 weight += 2000;
1466
1467         /* and finally for leafs: they do not increase the register pressure
1468            because of callee safe registers */
1469         if (callee_env->n_call_nodes == 0)
1470                 weight += 400;
1471
1472         /** it's important to inline inner loops first */
1473         if (entry->loop_depth > 30)
1474                 weight += 30 * 1024;
1475         else
1476                 weight += entry->loop_depth * 1024;
1477
1478         /*
1479          * All arguments constant is probably a good sign, give an extra bonus
1480          */
1481         if (all_const)
1482                 weight += 1024;
1483
1484         return entry->benefice = weight;
1485 }
1486
1487 typedef struct walk_env_t {
1488         ir_graph **irgs;
1489         size_t   last_irg;
1490 } walk_env_t;
1491
1492 /**
1493  * Callgraph walker, collect all visited graphs.
1494  */
1495 static void callgraph_walker(ir_graph *irg, void *data)
1496 {
1497         walk_env_t *env = (walk_env_t *)data;
1498         env->irgs[env->last_irg++] = irg;
1499 }
1500
1501 /**
1502  * Creates an inline order for all graphs.
1503  *
1504  * @return the list of graphs.
1505  */
1506 static ir_graph **create_irg_list(void)
1507 {
1508         ir_entity  **free_methods;
1509         size_t     n_irgs = get_irp_n_irgs();
1510         walk_env_t env;
1511
1512         cgana(&free_methods);
1513         xfree(free_methods);
1514
1515         compute_callgraph();
1516
1517         env.irgs     = XMALLOCNZ(ir_graph*, n_irgs);
1518         env.last_irg = 0;
1519
1520         callgraph_walk(NULL, callgraph_walker, &env);
1521         assert(n_irgs == env.last_irg);
1522
1523         free_callgraph();
1524
1525         return env.irgs;
1526 }
1527
1528 /**
1529  * Push a call onto the priority list if its benefice is big enough.
1530  *
1531  * @param pqueue   the priority queue of calls
1532  * @param call     the call entry
1533  * @param inlien_threshold
1534  *                 the threshold value
1535  */
1536 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
1537                             int inline_threshold)
1538 {
1539         ir_graph            *callee  = call->callee;
1540         irg_inline_property prop     = get_irg_inline_property(callee);
1541         int                 benefice = calc_inline_benefice(call, callee);
1542
1543         DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
1544             get_irn_irg(call->call), call->call, callee, benefice));
1545
1546         if (prop < irg_inline_forced && benefice < inline_threshold) {
1547                 return;
1548         }
1549
1550         pqueue_put(pqueue, call, benefice);
1551 }
1552
1553 /**
1554  * Try to inline calls into a graph.
1555  *
1556  * @param irg      the graph into which we inline
1557  * @param maxsize  do NOT inline if the size of irg gets
1558  *                 bigger than this amount
1559  * @param inline_threshold
1560  *                 threshold value for inline decision
1561  * @param copied_graphs
1562  *                 map containing copied of recursive graphs
1563  */
1564 static void inline_into(ir_graph *irg, unsigned maxsize,
1565                         int inline_threshold, pmap *copied_graphs)
1566 {
1567         int            phiproj_computed = 0;
1568         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
1569         call_entry     *curr_call;
1570         wenv_t         wenv;
1571         pqueue_t       *pqueue;
1572
1573         if (env->n_call_nodes == 0)
1574                 return;
1575
1576         if (env->n_nodes > maxsize) {
1577                 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
1578                 return;
1579         }
1580
1581         current_ir_graph = irg;
1582         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1583
1584         /* put irgs into the pqueue */
1585         pqueue = new_pqueue();
1586
1587         list_for_each_entry(call_entry, curr_call, &env->calls, list) {
1588                 assert(is_Call(curr_call->call));
1589                 maybe_push_call(pqueue, curr_call, inline_threshold);
1590         }
1591
1592         /* note that the list of possible calls is updated during the process */
1593         while (!pqueue_empty(pqueue)) {
1594                 int                 did_inline;
1595                 call_entry          *curr_call  = (call_entry*)pqueue_pop_front(pqueue);
1596                 ir_graph            *callee     = curr_call->callee;
1597                 ir_node             *call_node  = curr_call->call;
1598                 inline_irg_env      *callee_env = (inline_irg_env*)get_irg_link(callee);
1599                 irg_inline_property prop        = get_irg_inline_property(callee);
1600                 ir_graph            *calleee;
1601                 int                 loop_depth;
1602                 const call_entry    *centry;
1603
1604                 if ((prop < irg_inline_forced) && env->n_nodes + callee_env->n_nodes > maxsize) {
1605                         DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
1606                                                 env->n_nodes, callee, callee_env->n_nodes));
1607                         continue;
1608                 }
1609
1610                 calleee = pmap_get(ir_graph, copied_graphs, callee);
1611                 if (calleee != NULL) {
1612                         int benefice = curr_call->benefice;
1613                         /*
1614                          * Reduce the weight for recursive function IFF not all arguments are const.
1615                          * inlining recursive functions is rarely good.
1616                          */
1617                         if (!curr_call->all_const)
1618                                 benefice -= 2000;
1619                         if (benefice < inline_threshold)
1620                                 continue;
1621
1622                         /*
1623                          * Remap callee if we have a copy.
1624                          */
1625                         callee     = calleee;
1626                         callee_env = (inline_irg_env*)get_irg_link(callee);
1627                 }
1628
1629                 if (current_ir_graph == callee) {
1630                         /*
1631                          * Recursive call: we cannot directly inline because we cannot
1632                          * walk the graph and change it. So we have to make a copy of
1633                          * the graph first.
1634                          */
1635                         int benefice = curr_call->benefice;
1636                         ir_graph *copy;
1637
1638                         /*
1639                          * Reduce the weight for recursive function IFF not all arguments are const.
1640                          * inlining recursive functions is rarely good.
1641                          */
1642                         if (!curr_call->all_const)
1643                                 benefice -= 2000;
1644                         if (benefice < inline_threshold)
1645                                 continue;
1646
1647                         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1648
1649                         /*
1650                          * No copy yet, create one.
1651                          * Note that recursive methods are never leafs, so it is
1652                          * sufficient to test this condition here.
1653                          */
1654                         copy = create_irg_copy(callee);
1655
1656                         /* create_irg_copy() destroys the Proj links, recompute them */
1657                         phiproj_computed = 0;
1658
1659                         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1660
1661                         /* allocate a new environment */
1662                         callee_env = alloc_inline_irg_env();
1663                         set_irg_link(copy, callee_env);
1664
1665                         assure_irg_properties(copy, IR_GRAPH_PROPERTY_CONSISTENT_LOOPINFO);
1666                         memset(&wenv, 0, sizeof(wenv));
1667                         wenv.x              = callee_env;
1668                         wenv.ignore_callers = 1;
1669                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1670
1671                         /*
1672                          * Enter the entity of the original graph. This is needed
1673                          * for inline_method(). However, note that ent->irg still points
1674                          * to callee, NOT to copy.
1675                          */
1676                         set_irg_entity(copy, get_irg_entity(callee));
1677
1678                         pmap_insert(copied_graphs, callee, copy);
1679                         callee = copy;
1680
1681                         /* we have only one caller: the original graph */
1682                         callee_env->n_callers      = 1;
1683                         callee_env->n_callers_orig = 1;
1684                 }
1685                 if (! phiproj_computed) {
1686                         phiproj_computed = 1;
1687                         collect_phiprojs(current_ir_graph);
1688                 }
1689                 did_inline = inline_method(call_node, callee);
1690                 if (!did_inline)
1691                         continue;
1692
1693                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1694                 phiproj_computed = 0;
1695
1696                 /* remove it from the caller list */
1697                 list_del(&curr_call->list);
1698
1699                 /* callee was inline. Append its call list. */
1700                 env->got_inline = 1;
1701                 --env->n_call_nodes;
1702
1703                 /* we just generate a bunch of new calls */
1704                 loop_depth = curr_call->loop_depth;
1705                 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1706                         inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1707                         ir_node        *new_call;
1708                         call_entry     *new_entry;
1709
1710                         /* after we have inlined callee, all called methods inside
1711                          * callee are now called once more */
1712                         ++penv->n_callers;
1713
1714                         /* Note that the src list points to Call nodes in the inlined graph,
1715                          * but we need Call nodes in our graph. Luckily the inliner leaves
1716                          * this information in the link field. */
1717                         new_call = (ir_node*)get_irn_link(centry->call);
1718                         if (get_irn_irg(new_call) != irg) {
1719                                 /* centry->call has not been copied, which means it is dead.
1720                                  * This might happen during inlining, if a const function,
1721                                  * which cannot be inlined is only used as an unused argument
1722                                  * of another function, which is inlined. */
1723                                 continue;
1724                         }
1725                         assert(is_Call(new_call));
1726
1727                         new_entry = duplicate_call_entry(centry, new_call, loop_depth);
1728                         list_add_tail(&new_entry->list, &env->calls);
1729                         maybe_push_call(pqueue, new_entry, inline_threshold);
1730                 }
1731
1732                 env->n_call_nodes += callee_env->n_call_nodes;
1733                 env->n_nodes += callee_env->n_nodes;
1734                 --callee_env->n_callers;
1735         }
1736         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1737         del_pqueue(pqueue);
1738 }
1739
1740 /*
1741  * Heuristic inliner. Calculates a benefice value for every call and inlines
1742  * those calls with a value higher than the threshold.
1743  */
1744 void inline_functions(unsigned maxsize, int inline_threshold,
1745                       opt_ptr after_inline_opt)
1746 {
1747         inline_irg_env   *env;
1748         size_t           i, n_irgs;
1749         ir_graph         *rem;
1750         wenv_t           wenv;
1751         pmap             *copied_graphs;
1752         pmap_entry       *pm_entry;
1753         ir_graph         **irgs;
1754
1755         rem = current_ir_graph;
1756         obstack_init(&temp_obst);
1757
1758         irgs = create_irg_list();
1759
1760         /* a map for the copied graphs, used to inline recursive calls */
1761         copied_graphs = pmap_create();
1762
1763         /* extend all irgs by a temporary data structure for inlining. */
1764         n_irgs = get_irp_n_irgs();
1765         for (i = 0; i < n_irgs; ++i)
1766                 set_irg_link(irgs[i], alloc_inline_irg_env());
1767
1768         /* Pre-compute information in temporary data structure. */
1769         wenv.ignore_runtime = 0;
1770         wenv.ignore_callers = 0;
1771         for (i = 0; i < n_irgs; ++i) {
1772                 ir_graph *irg = irgs[i];
1773
1774                 free_callee_info(irg);
1775
1776                 wenv.x = (inline_irg_env*)get_irg_link(irg);
1777                 assure_loopinfo(irg);
1778                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1779         }
1780
1781         /* -- and now inline. -- */
1782         for (i = 0; i < n_irgs; ++i) {
1783                 ir_graph *irg = irgs[i];
1784
1785                 inline_into(irg, maxsize, inline_threshold, copied_graphs);
1786         }
1787
1788         for (i = 0; i < n_irgs; ++i) {
1789                 ir_graph *irg = irgs[i];
1790
1791                 env = (inline_irg_env*)get_irg_link(irg);
1792                 if (env->got_inline && after_inline_opt != NULL) {
1793                         /* this irg got calls inlined: optimize it */
1794                         after_inline_opt(irg);
1795                 }
1796                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1797                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1798                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1799                         env->n_callers_orig, env->n_callers,
1800                         get_entity_name(get_irg_entity(irg))));
1801                 }
1802         }
1803
1804         /* kill the copied graphs: we don't need them anymore */
1805         foreach_pmap(copied_graphs, pm_entry) {
1806                 ir_graph *copy = (ir_graph*)pm_entry->value;
1807
1808                 /* reset the entity, otherwise it will be deleted in the next step ... */
1809                 set_irg_entity(copy, NULL);
1810                 free_ir_graph(copy);
1811         }
1812         pmap_destroy(copied_graphs);
1813
1814         xfree(irgs);
1815
1816         obstack_free(&temp_obst, NULL);
1817         current_ir_graph = rem;
1818 }
1819
1820 typedef struct inline_functions_pass_t {
1821         ir_prog_pass_t pass;
1822         unsigned       maxsize;
1823         int            inline_threshold;
1824         opt_ptr        after_inline_opt;
1825 } inline_functions_pass_t;
1826
1827 /**
1828  * Wrapper to run inline_functions() as a ir_prog pass.
1829  */
1830 static int inline_functions_wrapper(ir_prog *irp, void *context)
1831 {
1832         inline_functions_pass_t *pass = (inline_functions_pass_t*)context;
1833
1834         (void)irp;
1835         inline_functions(pass->maxsize, pass->inline_threshold,
1836                          pass->after_inline_opt);
1837         return 0;
1838 }
1839
1840 /* create a ir_prog pass for inline_functions */
1841 ir_prog_pass_t *inline_functions_pass(
1842           const char *name, unsigned maxsize, int inline_threshold,
1843           opt_ptr after_inline_opt)
1844 {
1845         inline_functions_pass_t *pass = XMALLOCZ(inline_functions_pass_t);
1846
1847         pass->maxsize          = maxsize;
1848         pass->inline_threshold = inline_threshold;
1849         pass->after_inline_opt = after_inline_opt;
1850
1851         return def_prog_pass_constructor(
1852                 &pass->pass, name ? name : "inline_functions",
1853                 inline_functions_wrapper);
1854 }
1855
1856 void firm_init_inline(void)
1857 {
1858         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
1859 }