critical_edges: set NO_CRITICAL_EDGES graph flag
[libfirm] / ir / opt / opt_inline.c
1 /*
2  * Copyright (C) 1995-2011 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief    Dead node elimination and Procedure Inlining.
23  * @author   Michael Beck, Goetz Lindenmaier
24  * @version  $Id$
25  */
26 #include "config.h"
27
28 #include <limits.h>
29 #include <stdbool.h>
30 #include <assert.h>
31
32 #include "irnode_t.h"
33 #include "irgraph_t.h"
34 #include "irprog_t.h"
35
36 #include "iroptimize.h"
37 #include "ircons_t.h"
38 #include "iropt_t.h"
39 #include "irgopt.h"
40 #include "irgmod.h"
41 #include "irgwalk.h"
42
43 #include "array_t.h"
44 #include "list.h"
45 #include "pset.h"
46 #include "pmap.h"
47 #include "pdeq.h"
48 #include "xmalloc.h"
49 #include "pqueue.h"
50
51 #include "irouts.h"
52 #include "irloop_t.h"
53 #include "irbackedge_t.h"
54 #include "opt_init.h"
55 #include "cgana.h"
56 #include "trouts.h"
57 #include "error.h"
58
59 #include "analyze_irg_args.h"
60 #include "iredges_t.h"
61 #include "irflag_t.h"
62 #include "irhooks.h"
63 #include "irtools.h"
64 #include "iropt_dbg.h"
65 #include "irpass_t.h"
66 #include "irnodemap.h"
67
68 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
69
70 /*------------------------------------------------------------------*/
71 /* Routines for dead node elimination / copying garbage collection  */
72 /* of the obstack.                                                  */
73 /*------------------------------------------------------------------*/
74
75 /**
76  * Remember the new node in the old node by using a field all nodes have.
77  */
78 static void set_new_node(ir_node *node, ir_node *new_node)
79 {
80         set_irn_link(node, new_node);
81 }
82
83 /**
84  * Get this new node, before the old node is forgotten.
85  */
86 static inline ir_node *get_new_node(ir_node *old_node)
87 {
88         assert(irn_visited(old_node));
89         return (ir_node*) get_irn_link(old_node);
90 }
91
92 /*--------------------------------------------------------------------*/
93 /*  Functionality for inlining                                         */
94 /*--------------------------------------------------------------------*/
95
96 /**
97  * Copy node for inlineing.  Updates attributes that change when
98  * inlineing but not for dead node elimination.
99  *
100  * Copies the node by calling copy_node() and then updates the entity if
101  * it's a local one.  env must be a pointer of the frame type of the
102  * inlined procedure. The new entities must be in the link field of
103  * the entities.
104  */
105 static void copy_node_inline(ir_node *node, void *env)
106 {
107         ir_graph *new_irg  = (ir_graph*) env;
108         ir_node  *new_node = irn_copy_into_irg(node, new_irg);
109
110         set_new_node(node, new_node);
111         if (is_Sel(node)) {
112                 ir_graph  *old_irg        = get_irn_irg(node);
113                 ir_type   *old_frame_type = get_irg_frame_type(old_irg);
114                 ir_entity *old_entity     = get_Sel_entity(node);
115                 assert(is_Sel(new_node));
116                 /* use copied entities from the new frame */
117                 if (get_entity_owner(old_entity) == old_frame_type) {
118                         ir_entity *new_entity = (ir_entity*)get_entity_link(old_entity);
119                         assert(new_entity != NULL);
120                         set_Sel_entity(new_node, new_entity);
121                 }
122         } else if (is_Block(new_node)) {
123                 new_node->attr.block.irg.irg = new_irg;
124         }
125 }
126
127 static void set_preds_inline(ir_node *node, void *env)
128 {
129         ir_node *new_node;
130
131         irn_rewire_inputs(node);
132
133         /* move constants into start block */
134         new_node = get_new_node(node);
135         if (is_irn_constlike(new_node)) {
136                 ir_graph *new_irg     = (ir_graph *) env;
137                 ir_node  *start_block = get_irg_start_block(new_irg);
138                 set_nodes_block(new_node, start_block);
139         }
140 }
141
142 /**
143  * Walker: checks if P_value_arg_base is used.
144  */
145 static void find_addr(ir_node *node, void *env)
146 {
147         bool *allow_inline = (bool*)env;
148
149         if (is_Sel(node)) {
150                 ir_graph *irg = current_ir_graph;
151                 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
152                         /* access to frame */
153                         ir_entity *ent = get_Sel_entity(node);
154                         if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
155                                 /* access to value_type */
156                                 *allow_inline = false;
157                         }
158                         if (is_parameter_entity(ent)) {
159                                 *allow_inline = false;
160                         }
161                 }
162         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
163                 /* From GCC:
164                  * Refuse to inline alloca call unless user explicitly forced so as this
165                  * may change program's memory overhead drastically when the function
166                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
167                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
168                  *
169                  * Sorrily this is true with our implementation also.
170                  * Moreover, we cannot differentiate between alloca() and VLA yet, so
171                  * this disables inlining of functions using VLA (which are completely
172                  * save).
173                  *
174                  * 2 Solutions:
175                  * - add a flag to the Alloc node for "real" alloca() calls
176                  * - add a new Stack-Restore node at the end of a function using
177                  *   alloca()
178                  */
179                 *allow_inline = false;
180         }
181 }
182
183 /**
184  * Check if we can inline a given call.
185  * Currently, we cannot inline two cases:
186  * - call with compound arguments
187  * - graphs that take the address of a parameter
188  *
189  * check these conditions here
190  */
191 static bool can_inline(ir_node *call, ir_graph *called_graph)
192 {
193         ir_entity          *called      = get_irg_entity(called_graph);
194         ir_type            *called_type = get_entity_type(called);
195         ir_type            *call_type   = get_Call_type(call);
196         size_t              n_params    = get_method_n_params(called_type);
197         size_t              n_arguments = get_method_n_params(call_type);
198         size_t              n_res       = get_method_n_ress(called_type);
199         irg_inline_property prop        = get_irg_inline_property(called_graph);
200         size_t              i;
201         bool                res;
202
203         if (prop == irg_inline_forbidden)
204                 return false;
205
206         if (n_arguments != n_params) {
207                 /* this is a bad feature of C: without a prototype, we can
208                  * call a function with less parameters than needed. Currently
209                  * we don't support this, although we could use Unknown than. */
210                 return false;
211         }
212         if (n_res != get_method_n_ress(call_type)) {
213                 return false;
214         }
215
216         /* Argh, compiling C has some bad consequences:
217          * It is implementation dependent what happens in that case.
218          * We support inlining, if the bitsize of the types matches AND
219          * the same arithmetic is used. */
220         for (i = 0; i < n_params; ++i) {
221                 ir_type *param_tp = get_method_param_type(called_type, i);
222                 ir_type *arg_tp   = get_method_param_type(call_type, i);
223
224                 if (param_tp != arg_tp) {
225                         ir_mode *pmode = get_type_mode(param_tp);
226                         ir_mode *amode = get_type_mode(arg_tp);
227
228                         if (pmode == NULL || amode == NULL)
229                                 return false;
230                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
231                                 return false;
232                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
233                                 return false;
234                         /* otherwise we can simply "reinterpret" the bits */
235                 }
236         }
237         for (i = 0; i < n_res; ++i) {
238                 ir_type *decl_res_tp = get_method_res_type(called_type, i);
239                 ir_type *used_res_tp = get_method_res_type(call_type, i);
240
241                 if (decl_res_tp != used_res_tp) {
242                         ir_mode *decl_mode = get_type_mode(decl_res_tp);
243                         ir_mode *used_mode = get_type_mode(used_res_tp);
244                         if (decl_mode == NULL || used_mode == NULL)
245                                 return false;
246                         if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
247                                 return false;
248                         if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
249                                 return false;
250                         /* otherwise we can "reinterpret" the bits */
251                 }
252         }
253
254         /* check parameters for compound arguments */
255         for (i = 0; i < n_params; ++i) {
256                 ir_type *p_type = get_method_param_type(call_type, i);
257
258                 if (is_compound_type(p_type))
259                         return false;
260         }
261
262         /* check results for compound arguments */
263         for (i = 0; i < n_res; ++i) {
264                 ir_type *r_type = get_method_res_type(call_type, i);
265
266                 if (is_compound_type(r_type))
267                         return false;
268         }
269
270         res = true;
271         irg_walk_graph(called_graph, find_addr, NULL, &res);
272
273         return res;
274 }
275
276 enum exc_mode {
277         exc_handler,    /**< There is a handler. */
278         exc_no_handler  /**< Exception handling not represented. */
279 };
280
281 /**
282  * copy all entities on the stack frame on 1 irg to the stackframe of another.
283  * Sets entity links of the old entities to the copies
284  */
285 static void copy_frame_entities(ir_graph *from, ir_graph *to)
286 {
287         ir_type *from_frame = get_irg_frame_type(from);
288         ir_type *to_frame   = get_irg_frame_type(to);
289         size_t   n_members  = get_class_n_members(from_frame);
290         size_t   i;
291         assert(from_frame != to_frame);
292
293         for (i = 0; i < n_members; ++i) {
294                 ir_entity *old_ent = get_class_member(from_frame, i);
295                 ir_entity *new_ent = copy_entity_own(old_ent, to_frame);
296                 set_entity_link(old_ent, new_ent);
297                 assert (!is_parameter_entity(old_ent));
298         }
299 }
300
301 /* Inlines a method at the given call site. */
302 int inline_method(ir_node *call, ir_graph *called_graph)
303 {
304         ir_node       *pre_call;
305         ir_node       *post_call, *post_bl;
306         ir_node       *in[pn_Start_max+1];
307         ir_node       *end, *end_bl, *block;
308         ir_node       **res_pred;
309         ir_node       **cf_pred;
310         ir_node       **args_in;
311         ir_node       *ret, *phi;
312         int           arity, n_ret, n_exc, n_res, i, j, rem_opt;
313         int           irn_arity, n_params;
314         int           n_mem_phi;
315         enum exc_mode exc_handling;
316         ir_type       *mtp;
317         ir_type       *ctp;
318         ir_entity     *ent;
319         ir_graph      *rem;
320         ir_graph      *irg = get_irn_irg(call);
321
322         /* we cannot inline some types of calls */
323         if (! can_inline(call, called_graph))
324                 return 0;
325
326         /* We cannot inline a recursive call. The graph must be copied before
327          * the call the inline_method() using create_irg_copy(). */
328         if (called_graph == irg)
329                 return 0;
330
331         ent      = get_irg_entity(called_graph);
332         mtp      = get_entity_type(ent);
333         ctp      = get_Call_type(call);
334         n_params = get_method_n_params(mtp);
335         n_res    = get_method_n_ress(mtp);
336
337         rem = current_ir_graph;
338         current_ir_graph = irg;
339
340         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
341
342         /* optimizations can cause problems when allocating new nodes */
343         rem_opt = get_opt_optimize();
344         set_optimize(0);
345
346         /* Handle graph state */
347         assert(get_irg_phase_state(irg) != phase_building);
348         assert(get_irg_pinned(irg) == op_pin_state_pinned);
349         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
350         clear_irg_state(irg, IR_GRAPH_STATE_CONSISTENT_DOMINANCE
351                            | IR_GRAPH_STATE_VALID_EXTENDED_BLOCKS
352                            | IR_GRAPH_STATE_CONSISTENT_ENTITY_USAGE);
353         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
354         clear_irg_state(irg, IR_GRAPH_STATE_CONSISTENT_ENTITY_USAGE);
355         edges_deactivate(irg);
356
357         /* here we know we WILL inline, so inform the statistics */
358         hook_inline(call, called_graph);
359
360         /* -- Decide how to handle exception control flow: Is there a handler
361            for the Call node, or do we branch directly to End on an exception?
362            exc_handling:
363            0 There is a handler.
364            2 Exception handling not represented in Firm. -- */
365         {
366                 ir_node *Xproj = NULL;
367                 ir_node *proj;
368                 for (proj = (ir_node*)get_irn_link(call); proj != NULL;
369                      proj = (ir_node*)get_irn_link(proj)) {
370                         long proj_nr = get_Proj_proj(proj);
371                         if (proj_nr == pn_Call_X_except) Xproj = proj;
372                 }
373                 exc_handling = Xproj != NULL ? exc_handler : exc_no_handler;
374         }
375
376         /* create the argument tuple */
377         args_in = ALLOCAN(ir_node*, n_params);
378
379         block = get_nodes_block(call);
380         for (i = n_params - 1; i >= 0; --i) {
381                 ir_node *arg      = get_Call_param(call, i);
382                 ir_type *param_tp = get_method_param_type(mtp, i);
383                 ir_mode *mode     = get_type_mode(param_tp);
384
385                 if (mode != get_irn_mode(arg)) {
386                         arg = new_r_Conv(block, arg, mode);
387                 }
388                 args_in[i] = arg;
389         }
390
391         /* the procedure and later replaces the Start node of the called graph.
392          * Post_call is the old Call node and collects the results of the called
393          * graph. Both will end up being a tuple. */
394         post_bl = get_nodes_block(call);
395         /* XxMxPxPxPxT of Start + parameter of Call */
396         in[pn_Start_M]              = get_Call_mem(call);
397         in[pn_Start_X_initial_exec] = new_r_Jmp(post_bl);
398         in[pn_Start_P_frame_base]   = get_irg_frame(irg);
399         in[pn_Start_T_args]         = new_r_Tuple(post_bl, n_params, args_in);
400         pre_call = new_r_Tuple(post_bl, pn_Start_max+1, in);
401         post_call = call;
402
403         /* --
404            The new block gets the ins of the old block, pre_call and all its
405            predecessors and all Phi nodes. -- */
406         part_block(pre_call);
407
408         /* increment visited flag for later walk */
409         inc_irg_visited(called_graph);
410
411         /* link some nodes to nodes in the current graph so instead of copying
412          * the linked nodes will get used.
413          * So the copier will use the created Tuple instead of copying the start
414          * node, similar for singleton nodes like NoMem and Bad.
415          * Note: this will prohibit predecessors to be copied - only do it for
416          *       nodes without predecessors */
417         {
418                 ir_node *start_block;
419                 ir_node *start;
420                 ir_node *nomem;
421
422                 start_block = get_irg_start_block(called_graph);
423                 set_new_node(start_block, get_nodes_block(pre_call));
424                 mark_irn_visited(start_block);
425
426                 start = get_irg_start(called_graph);
427                 set_new_node(start, pre_call);
428                 mark_irn_visited(start);
429
430                 nomem = get_irg_no_mem(called_graph);
431                 set_new_node(nomem, get_irg_no_mem(irg));
432                 mark_irn_visited(nomem);
433         }
434
435         /* entitiy link is used to link entities on old stackframe to the
436          * new stackframe */
437         irp_reserve_resources(irp, IRP_RESOURCE_ENTITY_LINK);
438
439         /* copy entities and nodes */
440         assert(!irn_visited(get_irg_end(called_graph)));
441         copy_frame_entities(called_graph, irg);
442         irg_walk_core(get_irg_end(called_graph), copy_node_inline, set_preds_inline,
443                       irg);
444
445         irp_free_resources(irp, IRP_RESOURCE_ENTITY_LINK);
446
447         /* -- Merge the end of the inlined procedure with the call site -- */
448         /* We will turn the old Call node into a Tuple with the following
449            predecessors:
450            -1:  Block of Tuple.
451            0: Phi of all Memories of Return statements.
452            1: Jmp from new Block that merges the control flow from all exception
453            predecessors of the old end block.
454            2: Tuple of all arguments.
455            3: Phi of Exception memories.
456            In case the old Call directly branches to End on an exception we don't
457            need the block merging all exceptions nor the Phi of the exception
458            memories.
459         */
460
461         /* Precompute some values */
462         end_bl = get_new_node(get_irg_end_block(called_graph));
463         end    = get_new_node(get_irg_end(called_graph));
464         arity  = get_Block_n_cfgpreds(end_bl);    /* arity = n_exc + n_ret  */
465         n_res  = get_method_n_ress(get_Call_type(call));
466
467         res_pred = XMALLOCN(ir_node*, n_res);
468         cf_pred  = XMALLOCN(ir_node*, arity);
469
470         /* archive keepalives */
471         irn_arity = get_irn_arity(end);
472         for (i = 0; i < irn_arity; i++) {
473                 ir_node *ka = get_End_keepalive(end, i);
474                 if (! is_Bad(ka))
475                         add_End_keepalive(get_irg_end(irg), ka);
476         }
477
478         /* replace Return nodes by Jump nodes */
479         n_ret = 0;
480         for (i = 0; i < arity; i++) {
481                 ir_node *ret;
482                 ret = get_Block_cfgpred(end_bl, i);
483                 if (is_Return(ret)) {
484                         ir_node *block = get_nodes_block(ret);
485                         cf_pred[n_ret] = new_r_Jmp(block);
486                         n_ret++;
487                 }
488         }
489         set_irn_in(post_bl, n_ret, cf_pred);
490
491         /* build a Tuple for all results of the method.
492          * add Phi node if there was more than one Return. */
493         turn_into_tuple(post_call, pn_Call_max+1);
494         /* First the Memory-Phi */
495         n_mem_phi = 0;
496         for (i = 0; i < arity; i++) {
497                 ret = get_Block_cfgpred(end_bl, i);
498                 if (is_Return(ret)) {
499                         cf_pred[n_mem_phi++] = get_Return_mem(ret);
500                 }
501                 /* memory output for some exceptions is directly connected to End */
502                 if (is_Call(ret)) {
503                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 3);
504                 } else if (is_fragile_op(ret)) {
505                         /* We rely that all cfops have the memory output at the same position. */
506                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 0);
507                 } else if (is_Raise(ret)) {
508                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 1);
509                 }
510         }
511         phi = new_r_Phi(post_bl, n_mem_phi, cf_pred, mode_M);
512         set_Tuple_pred(call, pn_Call_M, phi);
513         /* Conserve Phi-list for further inlinings -- but might be optimized */
514         if (get_nodes_block(phi) == post_bl) {
515                 set_irn_link(phi, get_irn_link(post_bl));
516                 set_irn_link(post_bl, phi);
517         }
518         /* Now the real results */
519         if (n_res > 0) {
520                 ir_node *result_tuple;
521                 for (j = 0; j < n_res; j++) {
522                         ir_type *res_type = get_method_res_type(ctp, j);
523                         ir_mode *res_mode = get_type_mode(res_type);
524                         n_ret = 0;
525                         for (i = 0; i < arity; i++) {
526                                 ret = get_Block_cfgpred(end_bl, i);
527                                 if (is_Return(ret)) {
528                                         ir_node *res = get_Return_res(ret, j);
529                                         if (get_irn_mode(res) != res_mode) {
530                                                 ir_node *block = get_nodes_block(res);
531                                                 res = new_r_Conv(block, res, res_mode);
532                                         }
533                                         cf_pred[n_ret] = res;
534                                         n_ret++;
535                                 }
536                         }
537                         if (n_ret > 0) {
538                                 phi = new_r_Phi(post_bl, n_ret, cf_pred, res_mode);
539                         } else {
540                                 phi = new_r_Bad(irg, res_mode);
541                         }
542                         res_pred[j] = phi;
543                         /* Conserve Phi-list for further inlinings -- but might be optimized */
544                         if (get_nodes_block(phi) == post_bl) {
545                                 set_Phi_next(phi, get_Block_phis(post_bl));
546                                 set_Block_phis(post_bl, phi);
547                         }
548                 }
549                 result_tuple = new_r_Tuple(post_bl, n_res, res_pred);
550                 set_Tuple_pred(call, pn_Call_T_result, result_tuple);
551         } else {
552                 set_Tuple_pred(call, pn_Call_T_result, new_r_Bad(irg, mode_T));
553         }
554         /* handle the regular call */
555         set_Tuple_pred(call, pn_Call_X_regular, new_r_Jmp(post_bl));
556
557         /* Finally the exception control flow.
558            We have two possible situations:
559            First if the Call branches to an exception handler:
560            We need to add a Phi node to
561            collect the memory containing the exception objects.  Further we need
562            to add another block to get a correct representation of this Phi.  To
563            this block we add a Jmp that resolves into the X output of the Call
564            when the Call is turned into a tuple.
565            Second: There is no exception edge. Just add all inlined exception
566            branches to the End node.
567          */
568         if (exc_handling == exc_handler) {
569                 n_exc = 0;
570                 for (i = 0; i < arity; i++) {
571                         ir_node *ret, *irn;
572                         ret = get_Block_cfgpred(end_bl, i);
573                         irn = skip_Proj(ret);
574                         if (is_fragile_op(irn) || is_Raise(irn)) {
575                                 cf_pred[n_exc] = ret;
576                                 ++n_exc;
577                         }
578                 }
579                 if (n_exc > 0) {
580                         if (n_exc == 1) {
581                                 /* simple fix */
582                                 set_Tuple_pred(call, pn_Call_X_except, cf_pred[0]);
583                         } else {
584                                 ir_node *block = new_r_Block(irg, n_exc, cf_pred);
585                                 set_Tuple_pred(call, pn_Call_X_except, new_r_Jmp(block));
586                         }
587                 } else {
588                         set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
589                 }
590         } else {
591                 ir_node *main_end_bl;
592                 int main_end_bl_arity;
593                 ir_node **end_preds;
594
595                 /* assert(exc_handling == 1 || no exceptions. ) */
596                 n_exc = 0;
597                 for (i = 0; i < arity; i++) {
598                         ir_node *ret = get_Block_cfgpred(end_bl, i);
599                         ir_node *irn = skip_Proj(ret);
600
601                         if (is_fragile_op(irn) || is_Raise(irn)) {
602                                 cf_pred[n_exc] = ret;
603                                 n_exc++;
604                         }
605                 }
606                 main_end_bl       = get_irg_end_block(irg);
607                 main_end_bl_arity = get_irn_arity(main_end_bl);
608                 end_preds         = XMALLOCN(ir_node*, n_exc + main_end_bl_arity);
609
610                 for (i = 0; i < main_end_bl_arity; ++i)
611                         end_preds[i] = get_irn_n(main_end_bl, i);
612                 for (i = 0; i < n_exc; ++i)
613                         end_preds[main_end_bl_arity + i] = cf_pred[i];
614                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
615                 set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
616                 free(end_preds);
617         }
618         free(res_pred);
619         free(cf_pred);
620
621         /* --  Turn CSE back on. -- */
622         set_optimize(rem_opt);
623         current_ir_graph = rem;
624
625         return 1;
626 }
627
628 /********************************************************************/
629 /* Apply inlining to small methods.                                 */
630 /********************************************************************/
631
632 static struct obstack  temp_obst;
633
634 /** Represents a possible inlinable call in a graph. */
635 typedef struct call_entry {
636         ir_node    *call;       /**< The Call node. */
637         ir_graph   *callee;     /**< The callee IR-graph. */
638         list_head  list;        /**< List head for linking the next one. */
639         int        loop_depth;  /**< The loop depth of this call. */
640         int        benefice;    /**< The calculated benefice of this call. */
641         unsigned   local_adr:1; /**< Set if this call gets an address of a local variable. */
642         unsigned   all_const:1; /**< Set if this call has only constant parameters. */
643 } call_entry;
644
645 /**
646  * environment for inlining small irgs
647  */
648 typedef struct inline_env_t {
649         struct obstack obst;  /**< An obstack where call_entries are allocated on. */
650         list_head      calls; /**< The call entry list. */
651 } inline_env_t;
652
653 /**
654  * Returns the irg called from a Call node. If the irg is not
655  * known, NULL is returned.
656  *
657  * @param call  the call node
658  */
659 static ir_graph *get_call_called_irg(ir_node *call)
660 {
661         ir_node *addr;
662
663         addr = get_Call_ptr(call);
664         if (is_SymConst_addr_ent(addr)) {
665                 ir_entity *ent = get_SymConst_entity(addr);
666                 /* we don't know which function gets finally bound to a weak symbol */
667                 if (get_entity_linkage(ent) & IR_LINKAGE_WEAK)
668                         return NULL;
669
670                 return get_entity_irg(ent);
671         }
672
673         return NULL;
674 }
675
676 /**
677  * Walker: Collect all calls to known graphs inside a graph.
678  */
679 static void collect_calls(ir_node *call, void *env)
680 {
681         (void) env;
682         if (is_Call(call)) {
683                 ir_graph *called_irg = get_call_called_irg(call);
684
685                 if (called_irg != NULL) {
686                         /* The Call node calls a locally defined method.  Remember to inline. */
687                         inline_env_t *ienv  = (inline_env_t*)env;
688                         call_entry   *entry = OALLOC(&ienv->obst, call_entry);
689                         entry->call       = call;
690                         entry->callee     = called_irg;
691                         entry->loop_depth = 0;
692                         entry->benefice   = 0;
693                         entry->local_adr  = 0;
694                         entry->all_const  = 0;
695
696                         list_add_tail(&entry->list, &ienv->calls);
697                 }
698         }
699 }
700
701 /**
702  * Inlines all small methods at call sites where the called address comes
703  * from a Const node that references the entity representing the called
704  * method.
705  * The size argument is a rough measure for the code size of the method:
706  * Methods where the obstack containing the firm graph is smaller than
707  * size are inlined.
708  */
709 void inline_small_irgs(ir_graph *irg, int size)
710 {
711         ir_graph *rem = current_ir_graph;
712         inline_env_t env;
713         call_entry *entry;
714
715         current_ir_graph = irg;
716         /* Handle graph state */
717         assert(get_irg_phase_state(irg) != phase_building);
718         free_callee_info(irg);
719
720         /* Find Call nodes to inline.
721            (We can not inline during a walk of the graph, as inlining the same
722            method several times changes the visited flag of the walked graph:
723            after the first inlining visited of the callee equals visited of
724            the caller.  With the next inlining both are increased.) */
725         obstack_init(&env.obst);
726         INIT_LIST_HEAD(&env.calls);
727         irg_walk_graph(irg, NULL, collect_calls, &env);
728
729         if (! list_empty(&env.calls)) {
730                 /* There are calls to inline */
731                 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
732                 collect_phiprojs(irg);
733
734                 list_for_each_entry(call_entry, entry, &env.calls, list) {
735                         ir_graph            *callee = entry->callee;
736                         irg_inline_property prop    = get_irg_inline_property(callee);
737
738                         if (prop == irg_inline_forbidden) {
739                                 continue;
740                         }
741
742                         if (prop >= irg_inline_forced ||
743                             _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
744                                 inline_method(entry->call, callee);
745                         }
746                 }
747                 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
748         }
749         obstack_free(&env.obst, NULL);
750         current_ir_graph = rem;
751 }
752
753 typedef struct inline_small_irgs_pass_t {
754         ir_graph_pass_t pass;
755         int            size;
756 } inline_small_irgs_pass_t;
757
758 /**
759  * Wrapper to run inline_small_irgs() as a pass.
760  */
761 static int inline_small_irgs_wrapper(ir_graph *irg, void *context)
762 {
763         inline_small_irgs_pass_t *pass = (inline_small_irgs_pass_t*)context;
764
765         inline_small_irgs(irg, pass->size);
766         return 0;
767 }
768
769 /* create a pass for inline_small_irgs() */
770 ir_graph_pass_t *inline_small_irgs_pass(const char *name, int size)
771 {
772         inline_small_irgs_pass_t *pass = XMALLOCZ(inline_small_irgs_pass_t);
773
774         pass->size = size;
775         return def_graph_pass_constructor(
776                 &pass->pass, name ? name : "inline_small_irgs", inline_small_irgs_wrapper);
777 }
778
779 /**
780  * Environment for inlining irgs.
781  */
782 typedef struct {
783         list_head calls;             /**< List of of all call nodes in this graph. */
784         unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
785         unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
786         unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
787         unsigned  n_nodes_orig;      /**< for statistics */
788         unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
789         unsigned  n_call_nodes_orig; /**< for statistics */
790         unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
791         unsigned  n_callers_orig;    /**< for statistics */
792         unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
793         unsigned  recursive:1;       /**< Set, if this function is self recursive. */
794 } inline_irg_env;
795
796 /**
797  * Allocate a new environment for inlining.
798  */
799 static inline_irg_env *alloc_inline_irg_env(void)
800 {
801         inline_irg_env *env    = OALLOC(&temp_obst, inline_irg_env);
802         INIT_LIST_HEAD(&env->calls);
803         env->local_weights     = NULL;
804         env->n_nodes           = -2; /* do not count count Start, End */
805         env->n_blocks          = -2; /* do not count count Start, End Block */
806         env->n_nodes_orig      = -2; /* do not count Start, End */
807         env->n_call_nodes      = 0;
808         env->n_call_nodes_orig = 0;
809         env->n_callers         = 0;
810         env->n_callers_orig    = 0;
811         env->got_inline        = 0;
812         env->recursive         = 0;
813         return env;
814 }
815
816 typedef struct walker_env {
817         inline_irg_env *x;     /**< the inline environment */
818         char ignore_runtime;   /**< the ignore runtime flag */
819         char ignore_callers;   /**< if set, do change callers data */
820 } wenv_t;
821
822 /**
823  * post-walker: collect all calls in the inline-environment
824  * of a graph and sum some statistics.
825  */
826 static void collect_calls2(ir_node *call, void *ctx)
827 {
828         wenv_t         *env = (wenv_t*)ctx;
829         inline_irg_env *x = env->x;
830         unsigned        code = get_irn_opcode(call);
831         ir_graph       *callee;
832         call_entry     *entry;
833
834         /* count meaningful nodes in irg */
835         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
836                 if (code != iro_Block) {
837                         ++x->n_nodes;
838                         ++x->n_nodes_orig;
839                 } else {
840                         ++x->n_blocks;
841                 }
842         }
843
844         if (code != iro_Call) return;
845
846         /* check, if it's a runtime call */
847         if (env->ignore_runtime) {
848                 ir_node *symc = get_Call_ptr(call);
849
850                 if (is_SymConst_addr_ent(symc)) {
851                         ir_entity *ent = get_SymConst_entity(symc);
852
853                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
854                                 return;
855                 }
856         }
857
858         /* collect all call nodes */
859         ++x->n_call_nodes;
860         ++x->n_call_nodes_orig;
861
862         callee = get_call_called_irg(call);
863         if (callee != NULL) {
864                 if (! env->ignore_callers) {
865                         inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
866                         /* count all static callers */
867                         ++callee_env->n_callers;
868                         ++callee_env->n_callers_orig;
869                 }
870                 if (callee == current_ir_graph)
871                         x->recursive = 1;
872
873                 /* link it in the list of possible inlinable entries */
874                 entry = OALLOC(&temp_obst, call_entry);
875                 entry->call       = call;
876                 entry->callee     = callee;
877                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
878                 entry->benefice   = 0;
879                 entry->local_adr  = 0;
880                 entry->all_const  = 0;
881
882                 list_add_tail(&entry->list, &x->calls);
883         }
884 }
885
886 /**
887  * Returns TRUE if the number of callers is 0 in the irg's environment,
888  * hence this irg is a leave.
889  */
890 inline static int is_leave(ir_graph *irg)
891 {
892         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
893         return env->n_call_nodes == 0;
894 }
895
896 /**
897  * Returns TRUE if the number of nodes in the callee is
898  * smaller then size in the irg's environment.
899  */
900 inline static int is_smaller(ir_graph *callee, unsigned size)
901 {
902         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
903         return env->n_nodes < size;
904 }
905
906 /**
907  * Duplicate a call entry.
908  *
909  * @param entry     the original entry to duplicate
910  * @param new_call  the new call node
911  * @param loop_depth_delta
912  *                  delta value for the loop depth
913  */
914 static call_entry *duplicate_call_entry(const call_entry *entry,
915                                         ir_node *new_call, int loop_depth_delta)
916 {
917         call_entry *nentry = OALLOC(&temp_obst, call_entry);
918         nentry->call       = new_call;
919         nentry->callee     = entry->callee;
920         nentry->benefice   = entry->benefice;
921         nentry->loop_depth = entry->loop_depth + loop_depth_delta;
922         nentry->local_adr  = entry->local_adr;
923         nentry->all_const  = entry->all_const;
924
925         return nentry;
926 }
927
928 /**
929  * Append all call nodes of the source environment to the nodes of in the destination
930  * environment.
931  *
932  * @param dst         destination environment
933  * @param src         source environment
934  * @param loop_depth  the loop depth of the call that is replaced by the src list
935  */
936 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth)
937 {
938         call_entry *entry, *nentry;
939
940         /* Note that the src list points to Call nodes in the inlined graph, but
941            we need Call nodes in our graph. Luckily the inliner leaves this information
942            in the link field. */
943         list_for_each_entry(call_entry, entry, &src->calls, list) {
944                 nentry = duplicate_call_entry(entry, (ir_node*)get_irn_link(entry->call), loop_depth);
945                 list_add_tail(&nentry->list, &dst->calls);
946         }
947         dst->n_call_nodes += src->n_call_nodes;
948         dst->n_nodes      += src->n_nodes;
949 }
950
951 /*
952  * Inlines small leave methods at call sites where the called address comes
953  * from a Const node that references the entity representing the called
954  * method.
955  * The size argument is a rough measure for the code size of the method:
956  * Methods where the obstack containing the firm graph is smaller than
957  * size are inlined.
958  */
959 void inline_leave_functions(unsigned maxsize, unsigned leavesize,
960                             unsigned size, int ignore_runtime)
961 {
962         inline_irg_env   *env;
963         ir_graph         *irg;
964         size_t           i, n_irgs;
965         ir_graph         *rem;
966         int              did_inline;
967         wenv_t           wenv;
968         call_entry       *entry, *next;
969         const call_entry *centry;
970         pmap             *copied_graphs;
971         pmap_entry       *pm_entry;
972
973         rem = current_ir_graph;
974         obstack_init(&temp_obst);
975
976         /* a map for the copied graphs, used to inline recursive calls */
977         copied_graphs = pmap_create();
978
979         /* extend all irgs by a temporary data structure for inlining. */
980         n_irgs = get_irp_n_irgs();
981         for (i = 0; i < n_irgs; ++i)
982                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
983
984         /* Pre-compute information in temporary data structure. */
985         wenv.ignore_runtime = ignore_runtime;
986         wenv.ignore_callers = 0;
987         for (i = 0; i < n_irgs; ++i) {
988                 ir_graph *irg = get_irp_irg(i);
989
990                 assert(get_irg_phase_state(irg) != phase_building);
991                 free_callee_info(irg);
992
993                 assure_loopinfo(irg);
994                 wenv.x = (inline_irg_env*)get_irg_link(irg);
995                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
996         }
997
998         /* -- and now inline. -- */
999
1000         /* Inline leaves recursively -- we might construct new leaves. */
1001         do {
1002                 did_inline = 0;
1003
1004                 for (i = 0; i < n_irgs; ++i) {
1005                         ir_node *call;
1006                         int phiproj_computed = 0;
1007
1008                         current_ir_graph = get_irp_irg(i);
1009                         env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1010
1011                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1012                         list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1013                                 ir_graph            *callee;
1014                                 irg_inline_property  prop;
1015
1016                                 if (env->n_nodes > maxsize)
1017                                         break;
1018
1019                                 call   = entry->call;
1020                                 callee = entry->callee;
1021
1022                                 prop = get_irg_inline_property(callee);
1023                                 if (prop == irg_inline_forbidden) {
1024                                         continue;
1025                                 }
1026
1027                                 if (is_leave(callee) && (
1028                                     is_smaller(callee, leavesize) || prop >= irg_inline_forced)) {
1029                                         if (!phiproj_computed) {
1030                                                 phiproj_computed = 1;
1031                                                 collect_phiprojs(current_ir_graph);
1032                                         }
1033                                         did_inline = inline_method(call, callee);
1034
1035                                         if (did_inline) {
1036                                                 inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
1037
1038                                                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1039                                                 phiproj_computed = 0;
1040
1041                                                 /* Do some statistics */
1042                                                 env->got_inline = 1;
1043                                                 --env->n_call_nodes;
1044                                                 env->n_nodes += callee_env->n_nodes;
1045                                                 --callee_env->n_callers;
1046
1047                                                 /* remove this call from the list */
1048                                                 list_del(&entry->list);
1049                                                 continue;
1050                                         }
1051                                 }
1052                         }
1053                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1054                 }
1055         } while (did_inline);
1056
1057         /* inline other small functions. */
1058         for (i = 0; i < n_irgs; ++i) {
1059                 ir_node *call;
1060                 int phiproj_computed = 0;
1061
1062                 current_ir_graph = get_irp_irg(i);
1063                 env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1064
1065                 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1066
1067                 /* note that the list of possible calls is updated during the process */
1068                 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1069                         irg_inline_property prop;
1070                         ir_graph            *callee;
1071                         pmap_entry          *e;
1072
1073                         call   = entry->call;
1074                         callee = entry->callee;
1075
1076                         prop = get_irg_inline_property(callee);
1077                         if (prop == irg_inline_forbidden) {
1078                                 continue;
1079                         }
1080
1081                         e = pmap_find(copied_graphs, callee);
1082                         if (e != NULL) {
1083                                 /*
1084                                  * Remap callee if we have a copy.
1085                                  * FIXME: Should we do this only for recursive Calls ?
1086                                  */
1087                                 callee = (ir_graph*)e->value;
1088                         }
1089
1090                         if (prop >= irg_inline_forced ||
1091                             (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1092                                 if (current_ir_graph == callee) {
1093                                         /*
1094                                          * Recursive call: we cannot directly inline because we cannot walk
1095                                          * the graph and change it. So we have to make a copy of the graph
1096                                          * first.
1097                                          */
1098
1099                                         inline_irg_env *callee_env;
1100                                         ir_graph       *copy;
1101
1102                                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1103
1104                                         /*
1105                                          * No copy yet, create one.
1106                                          * Note that recursive methods are never leaves, so it is sufficient
1107                                          * to test this condition here.
1108                                          */
1109                                         copy = create_irg_copy(callee);
1110
1111                                         /* create_irg_copy() destroys the Proj links, recompute them */
1112                                         phiproj_computed = 0;
1113
1114                                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1115
1116                                         /* allocate new environment */
1117                                         callee_env = alloc_inline_irg_env();
1118                                         set_irg_link(copy, callee_env);
1119
1120                                         assure_loopinfo(copy);
1121                                         wenv.x              = callee_env;
1122                                         wenv.ignore_callers = 1;
1123                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1124
1125                                         /*
1126                                          * Enter the entity of the original graph. This is needed
1127                                          * for inline_method(). However, note that ent->irg still points
1128                                          * to callee, NOT to copy.
1129                                          */
1130                                         set_irg_entity(copy, get_irg_entity(callee));
1131
1132                                         pmap_insert(copied_graphs, callee, copy);
1133                                         callee = copy;
1134
1135                                         /* we have only one caller: the original graph */
1136                                         callee_env->n_callers      = 1;
1137                                         callee_env->n_callers_orig = 1;
1138                                 }
1139                                 if (! phiproj_computed) {
1140                                         phiproj_computed = 1;
1141                                         collect_phiprojs(current_ir_graph);
1142                                 }
1143                                 did_inline = inline_method(call, callee);
1144                                 if (did_inline) {
1145                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1146
1147                                         /* call was inlined, Phi/Projs for current graph must be recomputed */
1148                                         phiproj_computed = 0;
1149
1150                                         /* callee was inline. Append its call list. */
1151                                         env->got_inline = 1;
1152                                         --env->n_call_nodes;
1153                                         append_call_list(env, callee_env, entry->loop_depth);
1154                                         --callee_env->n_callers;
1155
1156                                         /* after we have inlined callee, all called methods inside callee
1157                                            are now called once more */
1158                                         list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1159                                                 inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1160                                                 ++penv->n_callers;
1161                                         }
1162
1163                                         /* remove this call from the list */
1164                                         list_del(&entry->list);
1165                                         continue;
1166                                 }
1167                         }
1168                 }
1169                 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1170         }
1171
1172         for (i = 0; i < n_irgs; ++i) {
1173                 irg = get_irp_irg(i);
1174                 env = (inline_irg_env*)get_irg_link(irg);
1175
1176                 if (env->got_inline) {
1177                         optimize_graph_df(irg);
1178                         optimize_cf(irg);
1179                 }
1180                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1181                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1182                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1183                         env->n_callers_orig, env->n_callers,
1184                         get_entity_name(get_irg_entity(irg))));
1185                 }
1186         }
1187
1188         /* kill the copied graphs: we don't need them anymore */
1189         foreach_pmap(copied_graphs, pm_entry) {
1190                 ir_graph *copy = (ir_graph*)pm_entry->value;
1191
1192                 /* reset the entity, otherwise it will be deleted in the next step ... */
1193                 set_irg_entity(copy, NULL);
1194                 free_ir_graph(copy);
1195         }
1196         pmap_destroy(copied_graphs);
1197
1198         obstack_free(&temp_obst, NULL);
1199         current_ir_graph = rem;
1200 }
1201
1202 typedef struct inline_leave_functions_pass_t {
1203         ir_prog_pass_t pass;
1204         unsigned       maxsize;
1205         unsigned       leavesize;
1206         unsigned       size;
1207         int            ignore_runtime;
1208 } inline_leave_functions_pass_t;
1209
1210 /**
1211  * Wrapper to run inline_leave_functions() as a ir_prog pass.
1212  */
1213 static int inline_leave_functions_wrapper(ir_prog *irp, void *context)
1214 {
1215         inline_leave_functions_pass_t *pass = (inline_leave_functions_pass_t*)context;
1216
1217         (void)irp;
1218         inline_leave_functions(
1219                 pass->maxsize, pass->leavesize,
1220                 pass->size, pass->ignore_runtime);
1221         return 0;
1222 }
1223
1224 /* create a pass for inline_leave_functions() */
1225 ir_prog_pass_t *inline_leave_functions_pass(
1226         const char *name, unsigned maxsize, unsigned leavesize,
1227         unsigned size, int ignore_runtime)
1228 {
1229         inline_leave_functions_pass_t *pass = XMALLOCZ(inline_leave_functions_pass_t);
1230
1231         pass->maxsize        = maxsize;
1232         pass->leavesize      = leavesize;
1233         pass->size           = size;
1234         pass->ignore_runtime = ignore_runtime;
1235
1236         return def_prog_pass_constructor(
1237                 &pass->pass,
1238                 name ? name : "inline_leave_functions",
1239                 inline_leave_functions_wrapper);
1240 }
1241
1242 /**
1243  * Calculate the parameter weights for transmitting the address of a local variable.
1244  */
1245 static unsigned calc_method_local_weight(ir_node *arg)
1246 {
1247         int      i, j, k;
1248         unsigned v, weight = 0;
1249
1250         for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
1251                 ir_node *succ = get_irn_out(arg, i);
1252
1253                 switch (get_irn_opcode(succ)) {
1254                 case iro_Load:
1255                 case iro_Store:
1256                         /* Loads and Store can be removed */
1257                         weight += 3;
1258                         break;
1259                 case iro_Sel:
1260                         /* check if all args are constant */
1261                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1262                                 ir_node *idx = get_Sel_index(succ, j);
1263                                 if (! is_Const(idx))
1264                                         return 0;
1265                         }
1266                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1267                            some unsupported node. */
1268                         v = calc_method_local_weight(succ);
1269                         if (v == 0)
1270                                 return 0;
1271                         /* we can kill one Sel with constant indexes, this is cheap */
1272                         weight += v + 1;
1273                         break;
1274                 case iro_Id:
1275                         /* when looking backward we might find Id nodes */
1276                         weight += calc_method_local_weight(succ);
1277                         break;
1278                 case iro_Tuple:
1279                         /* unoptimized tuple */
1280                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1281                                 ir_node *pred = get_Tuple_pred(succ, j);
1282                                 if (pred == arg) {
1283                                         /* look for Proj(j) */
1284                                         for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
1285                                                 ir_node *succ_succ = get_irn_out(succ, k);
1286                                                 if (is_Proj(succ_succ)) {
1287                                                         if (get_Proj_proj(succ_succ) == j) {
1288                                                                 /* found */
1289                                                                 weight += calc_method_local_weight(succ_succ);
1290                                                         }
1291                                                 } else {
1292                                                         /* this should NOT happen */
1293                                                         return 0;
1294                                                 }
1295                                         }
1296                                 }
1297                         }
1298                         break;
1299                 default:
1300                         /* any other node: unsupported yet or bad. */
1301                         return 0;
1302                 }
1303         }
1304         return weight;
1305 }
1306
1307 /**
1308  * Calculate the parameter weights for transmitting the address of a local variable.
1309  */
1310 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg)
1311 {
1312         ir_entity *ent = get_irg_entity(irg);
1313         ir_type  *mtp;
1314         size_t   nparams;
1315         int      i;
1316         long     proj_nr;
1317         ir_node  *irg_args, *arg;
1318
1319         mtp      = get_entity_type(ent);
1320         nparams  = get_method_n_params(mtp);
1321
1322         /* allocate a new array. currently used as 'analysed' flag */
1323         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1324
1325         /* If the method haven't parameters we have nothing to do. */
1326         if (nparams <= 0)
1327                 return;
1328
1329         assure_irg_outs(irg);
1330         irg_args = get_irg_args(irg);
1331         for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
1332                 arg     = get_irn_out(irg_args, i);
1333                 proj_nr = get_Proj_proj(arg);
1334                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1335         }
1336 }
1337
1338 /**
1339  * Calculate the benefice for transmitting an local variable address.
1340  * After inlining, the local variable might be transformed into a
1341  * SSA variable by scalar_replacement().
1342  */
1343 static unsigned get_method_local_adress_weight(ir_graph *callee, size_t pos)
1344 {
1345         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
1346
1347         if (env->local_weights == NULL)
1348                 analyze_irg_local_weights(env, callee);
1349
1350         if (pos < ARR_LEN(env->local_weights))
1351                 return env->local_weights[pos];
1352         return 0;
1353 }
1354
1355 /**
1356  * Calculate a benefice value for inlining the given call.
1357  *
1358  * @param call       the call node we have to inspect
1359  * @param callee     the called graph
1360  */
1361 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
1362 {
1363         ir_node   *call = entry->call;
1364         ir_entity *ent  = get_irg_entity(callee);
1365         ir_type   *callee_frame;
1366         size_t    i, n_members, n_params;
1367         ir_node   *frame_ptr;
1368         ir_type   *mtp;
1369         int       weight = 0;
1370         int       all_const;
1371         unsigned  cc, v;
1372         irg_inline_property prop;
1373
1374         inline_irg_env *callee_env;
1375
1376         prop = get_irg_inline_property(callee);
1377         if (prop == irg_inline_forbidden) {
1378                 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
1379                     call, callee));
1380                 return entry->benefice = INT_MIN;
1381         }
1382
1383         callee_frame = get_irg_frame_type(callee);
1384         n_members = get_class_n_members(callee_frame);
1385         for (i = 0; i < n_members; ++i) {
1386                 ir_entity *frame_ent = get_class_member(callee_frame, i);
1387                 if (is_parameter_entity(frame_ent)) {
1388                         // TODO inliner should handle parameter entities by inserting Store operations
1389                         DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden due to parameter entity\n", call, callee));
1390                         set_irg_inline_property(callee, irg_inline_forbidden);
1391                         return entry->benefice = INT_MIN;
1392                 }
1393         }
1394
1395         if (get_irg_additional_properties(callee) & mtp_property_noreturn) {
1396                 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
1397                     call, callee));
1398                 return entry->benefice = INT_MIN;
1399         }
1400
1401         /* costs for every passed parameter */
1402         n_params = get_Call_n_params(call);
1403         mtp      = get_entity_type(ent);
1404         cc       = get_method_calling_convention(mtp);
1405         if (cc & cc_reg_param) {
1406                 /* register parameter, smaller costs for register parameters */
1407                 size_t max_regs = cc & ~cc_bits;
1408
1409                 if (max_regs < n_params)
1410                         weight += max_regs * 2 + (n_params - max_regs) * 5;
1411                 else
1412                         weight += n_params * 2;
1413         } else {
1414                 /* parameters are passed an stack */
1415                 weight += 5 * n_params;
1416         }
1417
1418         /* constant parameters improve the benefice */
1419         frame_ptr = get_irg_frame(current_ir_graph);
1420         all_const = 1;
1421         for (i = 0; i < n_params; ++i) {
1422                 ir_node *param = get_Call_param(call, i);
1423
1424                 if (is_Const(param)) {
1425                         weight += get_method_param_weight(ent, i);
1426                 } else {
1427                         all_const = 0;
1428                         if (is_SymConst(param))
1429                                 weight += get_method_param_weight(ent, i);
1430                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
1431                                 /*
1432                                  * An address of a local variable is transmitted. After
1433                                  * inlining, scalar_replacement might be able to remove the
1434                                  * local variable, so honor this.
1435                                  */
1436                                 v = get_method_local_adress_weight(callee, i);
1437                                 weight += v;
1438                                 if (v > 0)
1439                                         entry->local_adr = 1;
1440                         }
1441                 }
1442         }
1443         entry->all_const = all_const;
1444
1445         callee_env = (inline_irg_env*)get_irg_link(callee);
1446         if (callee_env->n_callers == 1 &&
1447             callee != current_ir_graph &&
1448             !entity_is_externally_visible(ent)) {
1449                 weight += 700;
1450         }
1451
1452         /* give a bonus for functions with one block */
1453         if (callee_env->n_blocks == 1)
1454                 weight = weight * 3 / 2;
1455
1456         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
1457         if (callee_env->n_nodes < 30 && !callee_env->recursive)
1458                 weight += 2000;
1459
1460         /* and finally for leaves: they do not increase the register pressure
1461            because of callee safe registers */
1462         if (callee_env->n_call_nodes == 0)
1463                 weight += 400;
1464
1465         /** it's important to inline inner loops first */
1466         if (entry->loop_depth > 30)
1467                 weight += 30 * 1024;
1468         else
1469                 weight += entry->loop_depth * 1024;
1470
1471         /*
1472          * All arguments constant is probably a good sign, give an extra bonus
1473          */
1474         if (all_const)
1475                 weight += 1024;
1476
1477         return entry->benefice = weight;
1478 }
1479
1480 typedef struct walk_env_t {
1481         ir_graph **irgs;
1482         size_t   last_irg;
1483 } walk_env_t;
1484
1485 /**
1486  * Callgraph walker, collect all visited graphs.
1487  */
1488 static void callgraph_walker(ir_graph *irg, void *data)
1489 {
1490         walk_env_t *env = (walk_env_t *)data;
1491         env->irgs[env->last_irg++] = irg;
1492 }
1493
1494 /**
1495  * Creates an inline order for all graphs.
1496  *
1497  * @return the list of graphs.
1498  */
1499 static ir_graph **create_irg_list(void)
1500 {
1501         ir_entity  **free_methods;
1502         size_t     n_irgs = get_irp_n_irgs();
1503         walk_env_t env;
1504
1505         cgana(&free_methods);
1506         xfree(free_methods);
1507
1508         compute_callgraph();
1509
1510         env.irgs     = XMALLOCNZ(ir_graph*, n_irgs);
1511         env.last_irg = 0;
1512
1513         callgraph_walk(NULL, callgraph_walker, &env);
1514         assert(n_irgs == env.last_irg);
1515
1516         return env.irgs;
1517 }
1518
1519 /**
1520  * Push a call onto the priority list if its benefice is big enough.
1521  *
1522  * @param pqueue   the priority queue of calls
1523  * @param call     the call entry
1524  * @param inlien_threshold
1525  *                 the threshold value
1526  */
1527 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
1528                             int inline_threshold)
1529 {
1530         ir_graph            *callee  = call->callee;
1531         irg_inline_property prop     = get_irg_inline_property(callee);
1532         int                 benefice = calc_inline_benefice(call, callee);
1533
1534         DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
1535             get_irn_irg(call->call), call->call, callee, benefice));
1536
1537         if (prop < irg_inline_forced && benefice < inline_threshold) {
1538                 return;
1539         }
1540
1541         pqueue_put(pqueue, call, benefice);
1542 }
1543
1544 /**
1545  * Try to inline calls into a graph.
1546  *
1547  * @param irg      the graph into which we inline
1548  * @param maxsize  do NOT inline if the size of irg gets
1549  *                 bigger than this amount
1550  * @param inline_threshold
1551  *                 threshold value for inline decision
1552  * @param copied_graphs
1553  *                 map containing copied of recursive graphs
1554  */
1555 static void inline_into(ir_graph *irg, unsigned maxsize,
1556                         int inline_threshold, pmap *copied_graphs)
1557 {
1558         int            phiproj_computed = 0;
1559         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
1560         call_entry     *curr_call;
1561         wenv_t         wenv;
1562         pqueue_t       *pqueue;
1563
1564         if (env->n_call_nodes == 0)
1565                 return;
1566
1567         if (env->n_nodes > maxsize) {
1568                 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
1569                 return;
1570         }
1571
1572         current_ir_graph = irg;
1573         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1574
1575         /* put irgs into the pqueue */
1576         pqueue = new_pqueue();
1577
1578         list_for_each_entry(call_entry, curr_call, &env->calls, list) {
1579                 assert(is_Call(curr_call->call));
1580                 maybe_push_call(pqueue, curr_call, inline_threshold);
1581         }
1582
1583         /* note that the list of possible calls is updated during the process */
1584         while (!pqueue_empty(pqueue)) {
1585                 int                 did_inline;
1586                 call_entry          *curr_call  = (call_entry*)pqueue_pop_front(pqueue);
1587                 ir_graph            *callee     = curr_call->callee;
1588                 ir_node             *call_node  = curr_call->call;
1589                 inline_irg_env      *callee_env = (inline_irg_env*)get_irg_link(callee);
1590                 irg_inline_property prop        = get_irg_inline_property(callee);
1591                 int                 loop_depth;
1592                 const call_entry    *centry;
1593                 pmap_entry          *e;
1594
1595                 if ((prop < irg_inline_forced) && env->n_nodes + callee_env->n_nodes > maxsize) {
1596                         DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
1597                                                 env->n_nodes, callee, callee_env->n_nodes));
1598                         continue;
1599                 }
1600
1601                 e = pmap_find(copied_graphs, callee);
1602                 if (e != NULL) {
1603                         int benefice = curr_call->benefice;
1604                         /*
1605                          * Reduce the weight for recursive function IFF not all arguments are const.
1606                          * inlining recursive functions is rarely good.
1607                          */
1608                         if (!curr_call->all_const)
1609                                 benefice -= 2000;
1610                         if (benefice < inline_threshold)
1611                                 continue;
1612
1613                         /*
1614                          * Remap callee if we have a copy.
1615                          */
1616                         callee     = (ir_graph*)e->value;
1617                         callee_env = (inline_irg_env*)get_irg_link(callee);
1618                 }
1619
1620                 if (current_ir_graph == callee) {
1621                         /*
1622                          * Recursive call: we cannot directly inline because we cannot
1623                          * walk the graph and change it. So we have to make a copy of
1624                          * the graph first.
1625                          */
1626                         int benefice = curr_call->benefice;
1627                         ir_graph *copy;
1628
1629                         /*
1630                          * Reduce the weight for recursive function IFF not all arguments are const.
1631                          * inlining recursive functions is rarely good.
1632                          */
1633                         if (!curr_call->all_const)
1634                                 benefice -= 2000;
1635                         if (benefice < inline_threshold)
1636                                 continue;
1637
1638                         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1639
1640                         /*
1641                          * No copy yet, create one.
1642                          * Note that recursive methods are never leaves, so it is
1643                          * sufficient to test this condition here.
1644                          */
1645                         copy = create_irg_copy(callee);
1646
1647                         /* create_irg_copy() destroys the Proj links, recompute them */
1648                         phiproj_computed = 0;
1649
1650                         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1651
1652                         /* allocate a new environment */
1653                         callee_env = alloc_inline_irg_env();
1654                         set_irg_link(copy, callee_env);
1655
1656                         assure_loopinfo(copy);
1657                         memset(&wenv, 0, sizeof(wenv));
1658                         wenv.x              = callee_env;
1659                         wenv.ignore_callers = 1;
1660                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1661
1662                         /*
1663                          * Enter the entity of the original graph. This is needed
1664                          * for inline_method(). However, note that ent->irg still points
1665                          * to callee, NOT to copy.
1666                          */
1667                         set_irg_entity(copy, get_irg_entity(callee));
1668
1669                         pmap_insert(copied_graphs, callee, copy);
1670                         callee = copy;
1671
1672                         /* we have only one caller: the original graph */
1673                         callee_env->n_callers      = 1;
1674                         callee_env->n_callers_orig = 1;
1675                 }
1676                 if (! phiproj_computed) {
1677                         phiproj_computed = 1;
1678                         collect_phiprojs(current_ir_graph);
1679                 }
1680                 did_inline = inline_method(call_node, callee);
1681                 if (!did_inline)
1682                         continue;
1683
1684                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1685                 phiproj_computed = 0;
1686
1687                 /* remove it from the caller list */
1688                 list_del(&curr_call->list);
1689
1690                 /* callee was inline. Append its call list. */
1691                 env->got_inline = 1;
1692                 --env->n_call_nodes;
1693
1694                 /* we just generate a bunch of new calls */
1695                 loop_depth = curr_call->loop_depth;
1696                 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1697                         inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1698                         ir_node        *new_call;
1699                         call_entry     *new_entry;
1700
1701                         /* after we have inlined callee, all called methods inside
1702                          * callee are now called once more */
1703                         ++penv->n_callers;
1704
1705                         /* Note that the src list points to Call nodes in the inlined graph,
1706                          * but we need Call nodes in our graph. Luckily the inliner leaves
1707                          * this information in the link field. */
1708                         new_call = (ir_node*)get_irn_link(centry->call);
1709                         if (get_irn_irg(new_call) != irg) {
1710                                 /* centry->call has not been copied, which means it is dead.
1711                                  * This might happen during inlining, if a const function,
1712                                  * which cannot be inlined is only used as an unused argument
1713                                  * of another function, which is inlined. */
1714                                 continue;
1715                         }
1716                         assert(is_Call(new_call));
1717
1718                         new_entry = duplicate_call_entry(centry, new_call, loop_depth);
1719                         list_add_tail(&new_entry->list, &env->calls);
1720                         maybe_push_call(pqueue, new_entry, inline_threshold);
1721                 }
1722
1723                 env->n_call_nodes += callee_env->n_call_nodes;
1724                 env->n_nodes += callee_env->n_nodes;
1725                 --callee_env->n_callers;
1726         }
1727         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1728         del_pqueue(pqueue);
1729 }
1730
1731 /*
1732  * Heuristic inliner. Calculates a benefice value for every call and inlines
1733  * those calls with a value higher than the threshold.
1734  */
1735 void inline_functions(unsigned maxsize, int inline_threshold,
1736                       opt_ptr after_inline_opt)
1737 {
1738         inline_irg_env   *env;
1739         size_t           i, n_irgs;
1740         ir_graph         *rem;
1741         wenv_t           wenv;
1742         pmap             *copied_graphs;
1743         pmap_entry       *pm_entry;
1744         ir_graph         **irgs;
1745
1746         rem = current_ir_graph;
1747         obstack_init(&temp_obst);
1748
1749         irgs = create_irg_list();
1750
1751         /* a map for the copied graphs, used to inline recursive calls */
1752         copied_graphs = pmap_create();
1753
1754         /* extend all irgs by a temporary data structure for inlining. */
1755         n_irgs = get_irp_n_irgs();
1756         for (i = 0; i < n_irgs; ++i)
1757                 set_irg_link(irgs[i], alloc_inline_irg_env());
1758
1759         /* Pre-compute information in temporary data structure. */
1760         wenv.ignore_runtime = 0;
1761         wenv.ignore_callers = 0;
1762         for (i = 0; i < n_irgs; ++i) {
1763                 ir_graph *irg = irgs[i];
1764
1765                 free_callee_info(irg);
1766
1767                 wenv.x = (inline_irg_env*)get_irg_link(irg);
1768                 assure_loopinfo(irg);
1769                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1770         }
1771
1772         /* -- and now inline. -- */
1773         for (i = 0; i < n_irgs; ++i) {
1774                 ir_graph *irg = irgs[i];
1775
1776                 inline_into(irg, maxsize, inline_threshold, copied_graphs);
1777         }
1778
1779         for (i = 0; i < n_irgs; ++i) {
1780                 ir_graph *irg = irgs[i];
1781
1782                 env = (inline_irg_env*)get_irg_link(irg);
1783                 if (env->got_inline && after_inline_opt != NULL) {
1784                         /* this irg got calls inlined: optimize it */
1785                         after_inline_opt(irg);
1786                 }
1787                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1788                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1789                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1790                         env->n_callers_orig, env->n_callers,
1791                         get_entity_name(get_irg_entity(irg))));
1792                 }
1793         }
1794
1795         /* kill the copied graphs: we don't need them anymore */
1796         foreach_pmap(copied_graphs, pm_entry) {
1797                 ir_graph *copy = (ir_graph*)pm_entry->value;
1798
1799                 /* reset the entity, otherwise it will be deleted in the next step ... */
1800                 set_irg_entity(copy, NULL);
1801                 free_ir_graph(copy);
1802         }
1803         pmap_destroy(copied_graphs);
1804
1805         xfree(irgs);
1806
1807         obstack_free(&temp_obst, NULL);
1808         current_ir_graph = rem;
1809 }
1810
1811 typedef struct inline_functions_pass_t {
1812         ir_prog_pass_t pass;
1813         unsigned       maxsize;
1814         int            inline_threshold;
1815         opt_ptr        after_inline_opt;
1816 } inline_functions_pass_t;
1817
1818 /**
1819  * Wrapper to run inline_functions() as a ir_prog pass.
1820  */
1821 static int inline_functions_wrapper(ir_prog *irp, void *context)
1822 {
1823         inline_functions_pass_t *pass = (inline_functions_pass_t*)context;
1824
1825         (void)irp;
1826         inline_functions(pass->maxsize, pass->inline_threshold,
1827                          pass->after_inline_opt);
1828         return 0;
1829 }
1830
1831 /* create a ir_prog pass for inline_functions */
1832 ir_prog_pass_t *inline_functions_pass(
1833           const char *name, unsigned maxsize, int inline_threshold,
1834           opt_ptr after_inline_opt)
1835 {
1836         inline_functions_pass_t *pass = XMALLOCZ(inline_functions_pass_t);
1837
1838         pass->maxsize          = maxsize;
1839         pass->inline_threshold = inline_threshold;
1840         pass->after_inline_opt = after_inline_opt;
1841
1842         return def_prog_pass_constructor(
1843                 &pass->pass, name ? name : "inline_functions",
1844                 inline_functions_wrapper);
1845 }
1846
1847 void firm_init_inline(void)
1848 {
1849         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
1850 }