give Bad nodes a mode
[libfirm] / ir / opt / opt_inline.c
1 /*
2  * Copyright (C) 1995-2011 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief    Dead node elimination and Procedure Inlining.
23  * @author   Michael Beck, Goetz Lindenmaier
24  * @version  $Id$
25  */
26 #include "config.h"
27
28 #include <limits.h>
29 #include <stdbool.h>
30 #include <assert.h>
31
32 #include "irnode_t.h"
33 #include "irgraph_t.h"
34 #include "irprog_t.h"
35
36 #include "iroptimize.h"
37 #include "ircons_t.h"
38 #include "iropt_t.h"
39 #include "irgopt.h"
40 #include "irgmod.h"
41 #include "irgwalk.h"
42
43 #include "array_t.h"
44 #include "list.h"
45 #include "pset.h"
46 #include "pmap.h"
47 #include "pdeq.h"
48 #include "xmalloc.h"
49 #include "pqueue.h"
50
51 #include "irouts.h"
52 #include "irloop_t.h"
53 #include "irbackedge_t.h"
54 #include "opt_init.h"
55 #include "cgana.h"
56 #include "trouts.h"
57 #include "error.h"
58
59 #include "analyze_irg_args.h"
60 #include "iredges_t.h"
61 #include "irflag_t.h"
62 #include "irhooks.h"
63 #include "irtools.h"
64 #include "iropt_dbg.h"
65 #include "irpass_t.h"
66 #include "irphase_t.h"
67
68 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
69
70 /*------------------------------------------------------------------*/
71 /* Routines for dead node elimination / copying garbage collection  */
72 /* of the obstack.                                                  */
73 /*------------------------------------------------------------------*/
74
75 /**
76  * Remember the new node in the old node by using a field all nodes have.
77  */
78 static void set_new_node(ir_node *node, ir_node *new_node)
79 {
80         set_irn_link(node, new_node);
81 }
82
83 /**
84  * Get this new node, before the old node is forgotten.
85  */
86 static inline ir_node *get_new_node(ir_node *old_node)
87 {
88         assert(irn_visited(old_node));
89         return (ir_node*) get_irn_link(old_node);
90 }
91
92 /*--------------------------------------------------------------------*/
93 /*  Functionality for inlining                                         */
94 /*--------------------------------------------------------------------*/
95
96 /**
97  * Copy node for inlineing.  Updates attributes that change when
98  * inlineing but not for dead node elimination.
99  *
100  * Copies the node by calling copy_node() and then updates the entity if
101  * it's a local one.  env must be a pointer of the frame type of the
102  * inlined procedure. The new entities must be in the link field of
103  * the entities.
104  */
105 static void copy_node_inline(ir_node *node, void *env)
106 {
107         ir_graph *new_irg  = (ir_graph*) env;
108         ir_node  *new_node = irn_copy_into_irg(node, new_irg);
109
110         set_new_node(node, new_node);
111         if (is_Sel(node)) {
112                 ir_graph  *old_irg        = get_irn_irg(node);
113                 ir_type   *old_frame_type = get_irg_frame_type(old_irg);
114                 ir_entity *old_entity     = get_Sel_entity(node);
115                 assert(is_Sel(new_node));
116                 /* use copied entities from the new frame */
117                 if (get_entity_owner(old_entity) == old_frame_type) {
118                         ir_entity *new_entity = (ir_entity*)get_entity_link(old_entity);
119                         assert(new_entity != NULL);
120                         set_Sel_entity(new_node, new_entity);
121                 }
122         } else if (is_Block(new_node)) {
123                 new_node->attr.block.irg.irg = new_irg;
124         }
125 }
126
127 static void set_preds_inline(ir_node *node, void *env)
128 {
129         ir_node *new_node;
130
131         irn_rewire_inputs(node);
132
133         /* move constants into start block */
134         new_node = get_new_node(node);
135         if (is_irn_constlike(new_node)) {
136                 ir_graph *new_irg     = (ir_graph *) env;
137                 ir_node  *start_block = get_irg_start_block(new_irg);
138                 set_nodes_block(new_node, start_block);
139         }
140 }
141
142 /**
143  * Walker: checks if P_value_arg_base is used.
144  */
145 static void find_addr(ir_node *node, void *env)
146 {
147         bool *allow_inline = (bool*)env;
148
149         if (is_Sel(node)) {
150                 ir_graph *irg = current_ir_graph;
151                 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
152                         /* access to frame */
153                         ir_entity *ent = get_Sel_entity(node);
154                         if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
155                                 /* access to value_type */
156                                 *allow_inline = false;
157                         }
158                 }
159         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
160                 /* From GCC:
161                  * Refuse to inline alloca call unless user explicitly forced so as this
162                  * may change program's memory overhead drastically when the function
163                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
164                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
165                  *
166                  * Sorrily this is true with our implementation also.
167                  * Moreover, we cannot differentiate between alloca() and VLA yet, so
168                  * this disables inlining of functions using VLA (which are completely
169                  * save).
170                  *
171                  * 2 Solutions:
172                  * - add a flag to the Alloc node for "real" alloca() calls
173                  * - add a new Stack-Restore node at the end of a function using
174                  *   alloca()
175                  */
176                 *allow_inline = false;
177         }
178 }
179
180 /**
181  * Check if we can inline a given call.
182  * Currently, we cannot inline two cases:
183  * - call with compound arguments
184  * - graphs that take the address of a parameter
185  *
186  * check these conditions here
187  */
188 static bool can_inline(ir_node *call, ir_graph *called_graph)
189 {
190         ir_entity          *called      = get_irg_entity(called_graph);
191         ir_type            *called_type = get_entity_type(called);
192         ir_type            *call_type   = get_Call_type(call);
193         size_t              n_params    = get_method_n_params(called_type);
194         size_t              n_arguments = get_method_n_params(call_type);
195         size_t              n_res       = get_method_n_ress(called_type);
196         irg_inline_property prop        = get_irg_inline_property(called_graph);
197         size_t              i;
198         bool                res;
199
200         if (prop == irg_inline_forbidden)
201                 return false;
202
203         if (n_arguments != n_params) {
204                 /* this is a bad feature of C: without a prototype, we can
205                  * call a function with less parameters than needed. Currently
206                  * we don't support this, although we could use Unknown than. */
207                 return false;
208         }
209         if (n_res != get_method_n_ress(call_type)) {
210                 return false;
211         }
212
213         /* Argh, compiling C has some bad consequences:
214          * It is implementation dependent what happens in that case.
215          * We support inlining, if the bitsize of the types matches AND
216          * the same arithmetic is used. */
217         for (i = 0; i < n_params; ++i) {
218                 ir_type *param_tp = get_method_param_type(called_type, i);
219                 ir_type *arg_tp   = get_method_param_type(call_type, i);
220
221                 if (param_tp != arg_tp) {
222                         ir_mode *pmode = get_type_mode(param_tp);
223                         ir_mode *amode = get_type_mode(arg_tp);
224
225                         if (pmode == NULL || amode == NULL)
226                                 return false;
227                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
228                                 return false;
229                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
230                                 return false;
231                         /* otherwise we can simply "reinterpret" the bits */
232                 }
233         }
234         for (i = 0; i < n_res; ++i) {
235                 ir_type *decl_res_tp = get_method_res_type(called_type, i);
236                 ir_type *used_res_tp = get_method_res_type(call_type, i);
237
238                 if (decl_res_tp != used_res_tp) {
239                         ir_mode *decl_mode = get_type_mode(decl_res_tp);
240                         ir_mode *used_mode = get_type_mode(used_res_tp);
241                         if (decl_mode == NULL || used_mode == NULL)
242                                 return false;
243                         if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
244                                 return false;
245                         if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
246                                 return false;
247                         /* otherwise we can "reinterpret" the bits */
248                 }
249         }
250
251         /* check parameters for compound arguments */
252         for (i = 0; i < n_params; ++i) {
253                 ir_type *p_type = get_method_param_type(call_type, i);
254
255                 if (is_compound_type(p_type))
256                         return false;
257         }
258
259         /* check results for compound arguments */
260         for (i = 0; i < n_res; ++i) {
261                 ir_type *r_type = get_method_res_type(call_type, i);
262
263                 if (is_compound_type(r_type))
264                         return false;
265         }
266
267         res = true;
268         irg_walk_graph(called_graph, find_addr, NULL, &res);
269
270         return res;
271 }
272
273 enum exc_mode {
274         exc_handler,    /**< There is a handler. */
275         exc_no_handler  /**< Exception handling not represented. */
276 };
277
278 /**
279  * copy all entities on the stack frame on 1 irg to the stackframe of another.
280  * Sets entity links of the old entities to the copies
281  */
282 static void copy_frame_entities(ir_graph *from, ir_graph *to)
283 {
284         ir_type *from_frame = get_irg_frame_type(from);
285         ir_type *to_frame   = get_irg_frame_type(to);
286         size_t   n_members  = get_class_n_members(from_frame);
287         size_t   i;
288         assert(from_frame != to_frame);
289
290         for (i = 0; i < n_members; ++i) {
291                 ir_entity *old_ent = get_class_member(from_frame, i);
292                 ir_entity *new_ent = copy_entity_own(old_ent, to_frame);
293                 set_entity_link(old_ent, new_ent);
294         }
295 }
296
297 /* Inlines a method at the given call site. */
298 int inline_method(ir_node *call, ir_graph *called_graph)
299 {
300         ir_node       *pre_call;
301         ir_node       *post_call, *post_bl;
302         ir_node       *in[pn_Start_max];
303         ir_node       *end, *end_bl, *block;
304         ir_node       **res_pred;
305         ir_node       **cf_pred;
306         ir_node       **args_in;
307         ir_node       *ret, *phi;
308         int           arity, n_ret, n_exc, n_res, i, j, rem_opt;
309         int           irn_arity, n_params;
310         int           n_mem_phi;
311         enum exc_mode exc_handling;
312         ir_type       *mtp;
313         ir_type       *ctp;
314         ir_entity     *ent;
315         ir_graph      *rem;
316         ir_graph      *irg = get_irn_irg(call);
317
318         /* we cannot inline some types of calls */
319         if (! can_inline(call, called_graph))
320                 return 0;
321
322         /* We cannot inline a recursive call. The graph must be copied before
323          * the call the inline_method() using create_irg_copy(). */
324         if (called_graph == irg)
325                 return 0;
326
327         ent      = get_irg_entity(called_graph);
328         mtp      = get_entity_type(ent);
329         ctp      = get_Call_type(call);
330         n_params = get_method_n_params(mtp);
331         n_res    = get_method_n_ress(mtp);
332
333         rem = current_ir_graph;
334         current_ir_graph = irg;
335
336         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
337
338         /* optimizations can cause problems when allocating new nodes */
339         rem_opt = get_opt_optimize();
340         set_optimize(0);
341
342         /* Handle graph state */
343         assert(get_irg_phase_state(irg) != phase_building);
344         assert(get_irg_pinned(irg) == op_pin_state_pinned);
345         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
346         set_irg_outs_inconsistent(irg);
347         set_irg_extblk_inconsistent(irg);
348         set_irg_doms_inconsistent(irg);
349         set_irg_loopinfo_inconsistent(irg);
350         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
351         set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
352         edges_deactivate(irg);
353
354         /* here we know we WILL inline, so inform the statistics */
355         hook_inline(call, called_graph);
356
357         /* -- Decide how to handle exception control flow: Is there a handler
358            for the Call node, or do we branch directly to End on an exception?
359            exc_handling:
360            0 There is a handler.
361            2 Exception handling not represented in Firm. -- */
362         {
363                 ir_node *Xproj = NULL;
364                 ir_node *proj;
365                 for (proj = (ir_node*)get_irn_link(call); proj != NULL;
366                      proj = (ir_node*)get_irn_link(proj)) {
367                         long proj_nr = get_Proj_proj(proj);
368                         if (proj_nr == pn_Call_X_except) Xproj = proj;
369                 }
370                 exc_handling = Xproj != NULL ? exc_handler : exc_no_handler;
371         }
372
373         /* create the argument tuple */
374         args_in = ALLOCAN(ir_node*, n_params);
375
376         block = get_nodes_block(call);
377         for (i = n_params - 1; i >= 0; --i) {
378                 ir_node *arg      = get_Call_param(call, i);
379                 ir_type *param_tp = get_method_param_type(mtp, i);
380                 ir_mode *mode     = get_type_mode(param_tp);
381
382                 if (mode != get_irn_mode(arg)) {
383                         arg = new_r_Conv(block, arg, mode);
384                 }
385                 args_in[i] = arg;
386         }
387
388         /* the procedure and later replaces the Start node of the called graph.
389          * Post_call is the old Call node and collects the results of the called
390          * graph. Both will end up being a tuple. */
391         post_bl = get_nodes_block(call);
392         /* XxMxPxPxPxT of Start + parameter of Call */
393         in[pn_Start_M]              = get_Call_mem(call);
394         in[pn_Start_X_initial_exec] = new_r_Jmp(post_bl);
395         in[pn_Start_P_frame_base]   = get_irg_frame(irg);
396         in[pn_Start_T_args]         = new_r_Tuple(post_bl, n_params, args_in);
397         pre_call = new_r_Tuple(post_bl, pn_Start_max, in);
398         post_call = call;
399
400         /* --
401            The new block gets the ins of the old block, pre_call and all its
402            predecessors and all Phi nodes. -- */
403         part_block(pre_call);
404
405         /* increment visited flag for later walk */
406         inc_irg_visited(called_graph);
407
408         /* link some nodes to nodes in the current graph so instead of copying
409          * the linked nodes will get used.
410          * So the copier will use the created Tuple instead of copying the start
411          * node, similar for singleton nodes like NoMem and Bad.
412          * Note: this will prohibit predecessors to be copied - only do it for
413          *       nodes without predecessors */
414         {
415                 ir_node *start_block;
416                 ir_node *start;
417                 ir_node *nomem;
418
419                 start_block = get_irg_start_block(called_graph);
420                 set_new_node(start_block, get_nodes_block(pre_call));
421                 mark_irn_visited(start_block);
422
423                 start = get_irg_start(called_graph);
424                 set_new_node(start, pre_call);
425                 mark_irn_visited(start);
426
427                 nomem = get_irg_no_mem(called_graph);
428                 set_new_node(nomem, get_irg_no_mem(irg));
429                 mark_irn_visited(nomem);
430         }
431
432         /* entitiy link is used to link entities on old stackframe to the
433          * new stackframe */
434         irp_reserve_resources(irp, IR_RESOURCE_ENTITY_LINK);
435
436         /* copy entities and nodes */
437         assert(!irn_visited(get_irg_end(called_graph)));
438         copy_frame_entities(called_graph, irg);
439         irg_walk_core(get_irg_end(called_graph), copy_node_inline, set_preds_inline,
440                       irg);
441
442         irp_free_resources(irp, IR_RESOURCE_ENTITY_LINK);
443
444         /* -- Merge the end of the inlined procedure with the call site -- */
445         /* We will turn the old Call node into a Tuple with the following
446            predecessors:
447            -1:  Block of Tuple.
448            0: Phi of all Memories of Return statements.
449            1: Jmp from new Block that merges the control flow from all exception
450            predecessors of the old end block.
451            2: Tuple of all arguments.
452            3: Phi of Exception memories.
453            In case the old Call directly branches to End on an exception we don't
454            need the block merging all exceptions nor the Phi of the exception
455            memories.
456         */
457
458         /* Precompute some values */
459         end_bl = get_new_node(get_irg_end_block(called_graph));
460         end    = get_new_node(get_irg_end(called_graph));
461         arity  = get_Block_n_cfgpreds(end_bl);    /* arity = n_exc + n_ret  */
462         n_res  = get_method_n_ress(get_Call_type(call));
463
464         res_pred = XMALLOCN(ir_node*, n_res);
465         cf_pred  = XMALLOCN(ir_node*, arity);
466
467         /* archive keepalives */
468         irn_arity = get_irn_arity(end);
469         for (i = 0; i < irn_arity; i++) {
470                 ir_node *ka = get_End_keepalive(end, i);
471                 if (! is_Bad(ka))
472                         add_End_keepalive(get_irg_end(irg), ka);
473         }
474
475         /* replace Return nodes by Jump nodes */
476         n_ret = 0;
477         for (i = 0; i < arity; i++) {
478                 ir_node *ret;
479                 ret = get_Block_cfgpred(end_bl, i);
480                 if (is_Return(ret)) {
481                         ir_node *block = get_nodes_block(ret);
482                         cf_pred[n_ret] = new_r_Jmp(block);
483                         n_ret++;
484                 }
485         }
486         set_irn_in(post_bl, n_ret, cf_pred);
487
488         /* build a Tuple for all results of the method.
489          * add Phi node if there was more than one Return. */
490         turn_into_tuple(post_call, pn_Call_max);
491         /* First the Memory-Phi */
492         n_mem_phi = 0;
493         for (i = 0; i < arity; i++) {
494                 ret = get_Block_cfgpred(end_bl, i);
495                 if (is_Return(ret)) {
496                         cf_pred[n_mem_phi++] = get_Return_mem(ret);
497                 }
498                 /* memory output for some exceptions is directly connected to End */
499                 if (is_Call(ret)) {
500                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 3);
501                 } else if (is_fragile_op(ret)) {
502                         /* We rely that all cfops have the memory output at the same position. */
503                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 0);
504                 } else if (is_Raise(ret)) {
505                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 1);
506                 }
507         }
508         phi = new_r_Phi(post_bl, n_mem_phi, cf_pred, mode_M);
509         set_Tuple_pred(call, pn_Call_M, phi);
510         /* Conserve Phi-list for further inlinings -- but might be optimized */
511         if (get_nodes_block(phi) == post_bl) {
512                 set_irn_link(phi, get_irn_link(post_bl));
513                 set_irn_link(post_bl, phi);
514         }
515         /* Now the real results */
516         if (n_res > 0) {
517                 ir_node *result_tuple;
518                 for (j = 0; j < n_res; j++) {
519                         ir_type *res_type = get_method_res_type(ctp, j);
520                         ir_mode *res_mode = get_type_mode(res_type);
521                         n_ret = 0;
522                         for (i = 0; i < arity; i++) {
523                                 ret = get_Block_cfgpred(end_bl, i);
524                                 if (is_Return(ret)) {
525                                         ir_node *res = get_Return_res(ret, j);
526                                         if (get_irn_mode(res) != res_mode) {
527                                                 ir_node *block = get_nodes_block(res);
528                                                 res = new_r_Conv(block, res, res_mode);
529                                         }
530                                         cf_pred[n_ret] = res;
531                                         n_ret++;
532                                 }
533                         }
534                         if (n_ret > 0) {
535                                 ir_mode *mode = get_irn_mode(cf_pred[0]);
536                                 phi = new_r_Phi(post_bl, n_ret, cf_pred, mode);
537                         } else {
538                                 ir_mode *mode = get_irn_mode(cf_pred[0]);
539                                 phi = new_r_Bad(irg, mode);
540                         }
541                         res_pred[j] = phi;
542                         /* Conserve Phi-list for further inlinings -- but might be optimized */
543                         if (get_nodes_block(phi) == post_bl) {
544                                 set_Phi_next(phi, get_Block_phis(post_bl));
545                                 set_Block_phis(post_bl, phi);
546                         }
547                 }
548                 result_tuple = new_r_Tuple(post_bl, n_res, res_pred);
549                 set_Tuple_pred(call, pn_Call_T_result, result_tuple);
550         } else {
551                 set_Tuple_pred(call, pn_Call_T_result, new_r_Bad(irg, mode_T));
552         }
553         /* handle the regular call */
554         set_Tuple_pred(call, pn_Call_X_regular, new_r_Jmp(post_bl));
555
556         /* Finally the exception control flow.
557            We have two possible situations:
558            First if the Call branches to an exception handler:
559            We need to add a Phi node to
560            collect the memory containing the exception objects.  Further we need
561            to add another block to get a correct representation of this Phi.  To
562            this block we add a Jmp that resolves into the X output of the Call
563            when the Call is turned into a tuple.
564            Second: There is no exception edge. Just add all inlined exception
565            branches to the End node.
566          */
567         if (exc_handling == exc_handler) {
568                 n_exc = 0;
569                 for (i = 0; i < arity; i++) {
570                         ir_node *ret, *irn;
571                         ret = get_Block_cfgpred(end_bl, i);
572                         irn = skip_Proj(ret);
573                         if (is_fragile_op(irn) || is_Raise(irn)) {
574                                 cf_pred[n_exc] = ret;
575                                 ++n_exc;
576                         }
577                 }
578                 if (n_exc > 0) {
579                         if (n_exc == 1) {
580                                 /* simple fix */
581                                 set_Tuple_pred(call, pn_Call_X_except, cf_pred[0]);
582                         } else {
583                                 ir_node *block = new_r_Block(irg, n_exc, cf_pred);
584                                 set_Tuple_pred(call, pn_Call_X_except, new_r_Jmp(block));
585                         }
586                 } else {
587                         set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
588                 }
589         } else {
590                 ir_node *main_end_bl;
591                 int main_end_bl_arity;
592                 ir_node **end_preds;
593
594                 /* assert(exc_handling == 1 || no exceptions. ) */
595                 n_exc = 0;
596                 for (i = 0; i < arity; i++) {
597                         ir_node *ret = get_Block_cfgpred(end_bl, i);
598                         ir_node *irn = skip_Proj(ret);
599
600                         if (is_fragile_op(irn) || is_Raise(irn)) {
601                                 cf_pred[n_exc] = ret;
602                                 n_exc++;
603                         }
604                 }
605                 main_end_bl       = get_irg_end_block(irg);
606                 main_end_bl_arity = get_irn_arity(main_end_bl);
607                 end_preds         = XMALLOCN(ir_node*, n_exc + main_end_bl_arity);
608
609                 for (i = 0; i < main_end_bl_arity; ++i)
610                         end_preds[i] = get_irn_n(main_end_bl, i);
611                 for (i = 0; i < n_exc; ++i)
612                         end_preds[main_end_bl_arity + i] = cf_pred[i];
613                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
614                 set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
615                 free(end_preds);
616         }
617         free(res_pred);
618         free(cf_pred);
619
620         /* --  Turn CSE back on. -- */
621         set_optimize(rem_opt);
622         current_ir_graph = rem;
623
624         return 1;
625 }
626
627 /********************************************************************/
628 /* Apply inlining to small methods.                                 */
629 /********************************************************************/
630
631 static struct obstack  temp_obst;
632
633 /** Represents a possible inlinable call in a graph. */
634 typedef struct call_entry {
635         ir_node    *call;       /**< The Call node. */
636         ir_graph   *callee;     /**< The callee IR-graph. */
637         list_head  list;        /**< List head for linking the next one. */
638         int        loop_depth;  /**< The loop depth of this call. */
639         int        benefice;    /**< The calculated benefice of this call. */
640         unsigned   local_adr:1; /**< Set if this call gets an address of a local variable. */
641         unsigned   all_const:1; /**< Set if this call has only constant parameters. */
642 } call_entry;
643
644 /**
645  * environment for inlining small irgs
646  */
647 typedef struct inline_env_t {
648         struct obstack obst;  /**< An obstack where call_entries are allocated on. */
649         list_head      calls; /**< The call entry list. */
650 } inline_env_t;
651
652 /**
653  * Returns the irg called from a Call node. If the irg is not
654  * known, NULL is returned.
655  *
656  * @param call  the call node
657  */
658 static ir_graph *get_call_called_irg(ir_node *call)
659 {
660         ir_node *addr;
661
662         addr = get_Call_ptr(call);
663         if (is_Global(addr)) {
664                 ir_entity *ent = get_Global_entity(addr);
665                 /* we don't know which function gets finally bound to a weak symbol */
666                 if (get_entity_linkage(ent) & IR_LINKAGE_WEAK)
667                         return NULL;
668
669                 return get_entity_irg(ent);
670         }
671
672         return NULL;
673 }
674
675 /**
676  * Walker: Collect all calls to known graphs inside a graph.
677  */
678 static void collect_calls(ir_node *call, void *env)
679 {
680         (void) env;
681         if (is_Call(call)) {
682                 ir_graph *called_irg = get_call_called_irg(call);
683
684                 if (called_irg != NULL) {
685                         /* The Call node calls a locally defined method.  Remember to inline. */
686                         inline_env_t *ienv  = (inline_env_t*)env;
687                         call_entry   *entry = OALLOC(&ienv->obst, call_entry);
688                         entry->call       = call;
689                         entry->callee     = called_irg;
690                         entry->loop_depth = 0;
691                         entry->benefice   = 0;
692                         entry->local_adr  = 0;
693                         entry->all_const  = 0;
694
695                         list_add_tail(&entry->list, &ienv->calls);
696                 }
697         }
698 }
699
700 /**
701  * Inlines all small methods at call sites where the called address comes
702  * from a Const node that references the entity representing the called
703  * method.
704  * The size argument is a rough measure for the code size of the method:
705  * Methods where the obstack containing the firm graph is smaller than
706  * size are inlined.
707  */
708 void inline_small_irgs(ir_graph *irg, int size)
709 {
710         ir_graph *rem = current_ir_graph;
711         inline_env_t env;
712         call_entry *entry;
713
714         current_ir_graph = irg;
715         /* Handle graph state */
716         assert(get_irg_phase_state(irg) != phase_building);
717         free_callee_info(irg);
718
719         /* Find Call nodes to inline.
720            (We can not inline during a walk of the graph, as inlining the same
721            method several times changes the visited flag of the walked graph:
722            after the first inlining visited of the callee equals visited of
723            the caller.  With the next inlining both are increased.) */
724         obstack_init(&env.obst);
725         INIT_LIST_HEAD(&env.calls);
726         irg_walk_graph(irg, NULL, collect_calls, &env);
727
728         if (! list_empty(&env.calls)) {
729                 /* There are calls to inline */
730                 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
731                 collect_phiprojs(irg);
732
733                 list_for_each_entry(call_entry, entry, &env.calls, list) {
734                         ir_graph            *callee = entry->callee;
735                         irg_inline_property prop    = get_irg_inline_property(callee);
736
737                         if (prop == irg_inline_forbidden) {
738                                 continue;
739                         }
740
741                         if (prop >= irg_inline_forced ||
742                             _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
743                                 inline_method(entry->call, callee);
744                         }
745                 }
746                 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
747         }
748         obstack_free(&env.obst, NULL);
749         current_ir_graph = rem;
750 }
751
752 typedef struct inline_small_irgs_pass_t {
753         ir_graph_pass_t pass;
754         int            size;
755 } inline_small_irgs_pass_t;
756
757 /**
758  * Wrapper to run inline_small_irgs() as a pass.
759  */
760 static int inline_small_irgs_wrapper(ir_graph *irg, void *context)
761 {
762         inline_small_irgs_pass_t *pass = (inline_small_irgs_pass_t*)context;
763
764         inline_small_irgs(irg, pass->size);
765         return 0;
766 }
767
768 /* create a pass for inline_small_irgs() */
769 ir_graph_pass_t *inline_small_irgs_pass(const char *name, int size)
770 {
771         inline_small_irgs_pass_t *pass = XMALLOCZ(inline_small_irgs_pass_t);
772
773         pass->size = size;
774         return def_graph_pass_constructor(
775                 &pass->pass, name ? name : "inline_small_irgs", inline_small_irgs_wrapper);
776 }
777
778 /**
779  * Environment for inlining irgs.
780  */
781 typedef struct {
782         list_head calls;             /**< List of of all call nodes in this graph. */
783         unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
784         unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
785         unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
786         unsigned  n_nodes_orig;      /**< for statistics */
787         unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
788         unsigned  n_call_nodes_orig; /**< for statistics */
789         unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
790         unsigned  n_callers_orig;    /**< for statistics */
791         unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
792         unsigned  recursive:1;       /**< Set, if this function is self recursive. */
793 } inline_irg_env;
794
795 /**
796  * Allocate a new environment for inlining.
797  */
798 static inline_irg_env *alloc_inline_irg_env(void)
799 {
800         inline_irg_env *env    = OALLOC(&temp_obst, inline_irg_env);
801         INIT_LIST_HEAD(&env->calls);
802         env->local_weights     = NULL;
803         env->n_nodes           = -2; /* do not count count Start, End */
804         env->n_blocks          = -2; /* do not count count Start, End Block */
805         env->n_nodes_orig      = -2; /* do not count Start, End */
806         env->n_call_nodes      = 0;
807         env->n_call_nodes_orig = 0;
808         env->n_callers         = 0;
809         env->n_callers_orig    = 0;
810         env->got_inline        = 0;
811         env->recursive         = 0;
812         return env;
813 }
814
815 typedef struct walker_env {
816         inline_irg_env *x;     /**< the inline environment */
817         char ignore_runtime;   /**< the ignore runtime flag */
818         char ignore_callers;   /**< if set, do change callers data */
819 } wenv_t;
820
821 /**
822  * post-walker: collect all calls in the inline-environment
823  * of a graph and sum some statistics.
824  */
825 static void collect_calls2(ir_node *call, void *ctx)
826 {
827         wenv_t         *env = (wenv_t*)ctx;
828         inline_irg_env *x = env->x;
829         unsigned        code = get_irn_opcode(call);
830         ir_graph       *callee;
831         call_entry     *entry;
832
833         /* count meaningful nodes in irg */
834         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
835                 if (code != iro_Block) {
836                         ++x->n_nodes;
837                         ++x->n_nodes_orig;
838                 } else {
839                         ++x->n_blocks;
840                 }
841         }
842
843         if (code != iro_Call) return;
844
845         /* check, if it's a runtime call */
846         if (env->ignore_runtime) {
847                 ir_node *symc = get_Call_ptr(call);
848
849                 if (is_Global(symc)) {
850                         ir_entity *ent = get_Global_entity(symc);
851
852                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
853                                 return;
854                 }
855         }
856
857         /* collect all call nodes */
858         ++x->n_call_nodes;
859         ++x->n_call_nodes_orig;
860
861         callee = get_call_called_irg(call);
862         if (callee != NULL) {
863                 if (! env->ignore_callers) {
864                         inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
865                         /* count all static callers */
866                         ++callee_env->n_callers;
867                         ++callee_env->n_callers_orig;
868                 }
869                 if (callee == current_ir_graph)
870                         x->recursive = 1;
871
872                 /* link it in the list of possible inlinable entries */
873                 entry = OALLOC(&temp_obst, call_entry);
874                 entry->call       = call;
875                 entry->callee     = callee;
876                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
877                 entry->benefice   = 0;
878                 entry->local_adr  = 0;
879                 entry->all_const  = 0;
880
881                 list_add_tail(&entry->list, &x->calls);
882         }
883 }
884
885 /**
886  * Returns TRUE if the number of callers is 0 in the irg's environment,
887  * hence this irg is a leave.
888  */
889 inline static int is_leave(ir_graph *irg)
890 {
891         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
892         return env->n_call_nodes == 0;
893 }
894
895 /**
896  * Returns TRUE if the number of nodes in the callee is
897  * smaller then size in the irg's environment.
898  */
899 inline static int is_smaller(ir_graph *callee, unsigned size)
900 {
901         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
902         return env->n_nodes < size;
903 }
904
905 /**
906  * Duplicate a call entry.
907  *
908  * @param entry     the original entry to duplicate
909  * @param new_call  the new call node
910  * @param loop_depth_delta
911  *                  delta value for the loop depth
912  */
913 static call_entry *duplicate_call_entry(const call_entry *entry,
914                                         ir_node *new_call, int loop_depth_delta)
915 {
916         call_entry *nentry = OALLOC(&temp_obst, call_entry);
917         nentry->call       = new_call;
918         nentry->callee     = entry->callee;
919         nentry->benefice   = entry->benefice;
920         nentry->loop_depth = entry->loop_depth + loop_depth_delta;
921         nentry->local_adr  = entry->local_adr;
922         nentry->all_const  = entry->all_const;
923
924         return nentry;
925 }
926
927 /**
928  * Append all call nodes of the source environment to the nodes of in the destination
929  * environment.
930  *
931  * @param dst         destination environment
932  * @param src         source environment
933  * @param loop_depth  the loop depth of the call that is replaced by the src list
934  */
935 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth)
936 {
937         call_entry *entry, *nentry;
938
939         /* Note that the src list points to Call nodes in the inlined graph, but
940            we need Call nodes in our graph. Luckily the inliner leaves this information
941            in the link field. */
942         list_for_each_entry(call_entry, entry, &src->calls, list) {
943                 nentry = duplicate_call_entry(entry, (ir_node*)get_irn_link(entry->call), loop_depth);
944                 list_add_tail(&nentry->list, &dst->calls);
945         }
946         dst->n_call_nodes += src->n_call_nodes;
947         dst->n_nodes      += src->n_nodes;
948 }
949
950 /*
951  * Inlines small leave methods at call sites where the called address comes
952  * from a Const node that references the entity representing the called
953  * method.
954  * The size argument is a rough measure for the code size of the method:
955  * Methods where the obstack containing the firm graph is smaller than
956  * size are inlined.
957  */
958 void inline_leave_functions(unsigned maxsize, unsigned leavesize,
959                             unsigned size, int ignore_runtime)
960 {
961         inline_irg_env   *env;
962         ir_graph         *irg;
963         size_t           i, n_irgs;
964         ir_graph         *rem;
965         int              did_inline;
966         wenv_t           wenv;
967         call_entry       *entry, *next;
968         const call_entry *centry;
969         pmap             *copied_graphs;
970         pmap_entry       *pm_entry;
971
972         rem = current_ir_graph;
973         obstack_init(&temp_obst);
974
975         /* a map for the copied graphs, used to inline recursive calls */
976         copied_graphs = pmap_create();
977
978         /* extend all irgs by a temporary data structure for inlining. */
979         n_irgs = get_irp_n_irgs();
980         for (i = 0; i < n_irgs; ++i)
981                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
982
983         /* Pre-compute information in temporary data structure. */
984         wenv.ignore_runtime = ignore_runtime;
985         wenv.ignore_callers = 0;
986         for (i = 0; i < n_irgs; ++i) {
987                 ir_graph *irg = get_irp_irg(i);
988
989                 assert(get_irg_phase_state(irg) != phase_building);
990                 free_callee_info(irg);
991
992                 assure_cf_loop(irg);
993                 wenv.x = (inline_irg_env*)get_irg_link(irg);
994                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
995         }
996
997         /* -- and now inline. -- */
998
999         /* Inline leaves recursively -- we might construct new leaves. */
1000         do {
1001                 did_inline = 0;
1002
1003                 for (i = 0; i < n_irgs; ++i) {
1004                         ir_node *call;
1005                         int phiproj_computed = 0;
1006
1007                         current_ir_graph = get_irp_irg(i);
1008                         env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1009
1010                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1011                         list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1012                                 ir_graph            *callee;
1013                                 irg_inline_property  prop;
1014
1015                                 if (env->n_nodes > maxsize)
1016                                         break;
1017
1018                                 call   = entry->call;
1019                                 callee = entry->callee;
1020
1021                                 prop = get_irg_inline_property(callee);
1022                                 if (prop == irg_inline_forbidden) {
1023                                         continue;
1024                                 }
1025
1026                                 if (is_leave(callee) && (
1027                                     is_smaller(callee, leavesize) || prop >= irg_inline_forced)) {
1028                                         if (!phiproj_computed) {
1029                                                 phiproj_computed = 1;
1030                                                 collect_phiprojs(current_ir_graph);
1031                                         }
1032                                         did_inline = inline_method(call, callee);
1033
1034                                         if (did_inline) {
1035                                                 inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
1036
1037                                                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1038                                                 phiproj_computed = 0;
1039
1040                                                 /* Do some statistics */
1041                                                 env->got_inline = 1;
1042                                                 --env->n_call_nodes;
1043                                                 env->n_nodes += callee_env->n_nodes;
1044                                                 --callee_env->n_callers;
1045
1046                                                 /* remove this call from the list */
1047                                                 list_del(&entry->list);
1048                                                 continue;
1049                                         }
1050                                 }
1051                         }
1052                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1053                 }
1054         } while (did_inline);
1055
1056         /* inline other small functions. */
1057         for (i = 0; i < n_irgs; ++i) {
1058                 ir_node *call;
1059                 int phiproj_computed = 0;
1060
1061                 current_ir_graph = get_irp_irg(i);
1062                 env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1063
1064                 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1065
1066                 /* note that the list of possible calls is updated during the process */
1067                 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1068                         irg_inline_property prop;
1069                         ir_graph            *callee;
1070                         pmap_entry          *e;
1071
1072                         call   = entry->call;
1073                         callee = entry->callee;
1074
1075                         prop = get_irg_inline_property(callee);
1076                         if (prop == irg_inline_forbidden) {
1077                                 continue;
1078                         }
1079
1080                         e = pmap_find(copied_graphs, callee);
1081                         if (e != NULL) {
1082                                 /*
1083                                  * Remap callee if we have a copy.
1084                                  * FIXME: Should we do this only for recursive Calls ?
1085                                  */
1086                                 callee = (ir_graph*)e->value;
1087                         }
1088
1089                         if (prop >= irg_inline_forced ||
1090                             (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1091                                 if (current_ir_graph == callee) {
1092                                         /*
1093                                          * Recursive call: we cannot directly inline because we cannot walk
1094                                          * the graph and change it. So we have to make a copy of the graph
1095                                          * first.
1096                                          */
1097
1098                                         inline_irg_env *callee_env;
1099                                         ir_graph       *copy;
1100
1101                                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1102
1103                                         /*
1104                                          * No copy yet, create one.
1105                                          * Note that recursive methods are never leaves, so it is sufficient
1106                                          * to test this condition here.
1107                                          */
1108                                         copy = create_irg_copy(callee);
1109
1110                                         /* create_irg_copy() destroys the Proj links, recompute them */
1111                                         phiproj_computed = 0;
1112
1113                                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1114
1115                                         /* allocate new environment */
1116                                         callee_env = alloc_inline_irg_env();
1117                                         set_irg_link(copy, callee_env);
1118
1119                                         assure_cf_loop(copy);
1120                                         wenv.x              = callee_env;
1121                                         wenv.ignore_callers = 1;
1122                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1123
1124                                         /*
1125                                          * Enter the entity of the original graph. This is needed
1126                                          * for inline_method(). However, note that ent->irg still points
1127                                          * to callee, NOT to copy.
1128                                          */
1129                                         set_irg_entity(copy, get_irg_entity(callee));
1130
1131                                         pmap_insert(copied_graphs, callee, copy);
1132                                         callee = copy;
1133
1134                                         /* we have only one caller: the original graph */
1135                                         callee_env->n_callers      = 1;
1136                                         callee_env->n_callers_orig = 1;
1137                                 }
1138                                 if (! phiproj_computed) {
1139                                         phiproj_computed = 1;
1140                                         collect_phiprojs(current_ir_graph);
1141                                 }
1142                                 did_inline = inline_method(call, callee);
1143                                 if (did_inline) {
1144                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1145
1146                                         /* call was inlined, Phi/Projs for current graph must be recomputed */
1147                                         phiproj_computed = 0;
1148
1149                                         /* callee was inline. Append its call list. */
1150                                         env->got_inline = 1;
1151                                         --env->n_call_nodes;
1152                                         append_call_list(env, callee_env, entry->loop_depth);
1153                                         --callee_env->n_callers;
1154
1155                                         /* after we have inlined callee, all called methods inside callee
1156                                            are now called once more */
1157                                         list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1158                                                 inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1159                                                 ++penv->n_callers;
1160                                         }
1161
1162                                         /* remove this call from the list */
1163                                         list_del(&entry->list);
1164                                         continue;
1165                                 }
1166                         }
1167                 }
1168                 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1169         }
1170
1171         for (i = 0; i < n_irgs; ++i) {
1172                 irg = get_irp_irg(i);
1173                 env = (inline_irg_env*)get_irg_link(irg);
1174
1175                 if (env->got_inline) {
1176                         optimize_graph_df(irg);
1177                         optimize_cf(irg);
1178                 }
1179                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1180                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1181                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1182                         env->n_callers_orig, env->n_callers,
1183                         get_entity_name(get_irg_entity(irg))));
1184                 }
1185         }
1186
1187         /* kill the copied graphs: we don't need them anymore */
1188         foreach_pmap(copied_graphs, pm_entry) {
1189                 ir_graph *copy = (ir_graph*)pm_entry->value;
1190
1191                 /* reset the entity, otherwise it will be deleted in the next step ... */
1192                 set_irg_entity(copy, NULL);
1193                 free_ir_graph(copy);
1194         }
1195         pmap_destroy(copied_graphs);
1196
1197         obstack_free(&temp_obst, NULL);
1198         current_ir_graph = rem;
1199 }
1200
1201 typedef struct inline_leave_functions_pass_t {
1202         ir_prog_pass_t pass;
1203         unsigned       maxsize;
1204         unsigned       leavesize;
1205         unsigned       size;
1206         int            ignore_runtime;
1207 } inline_leave_functions_pass_t;
1208
1209 /**
1210  * Wrapper to run inline_leave_functions() as a ir_prog pass.
1211  */
1212 static int inline_leave_functions_wrapper(ir_prog *irp, void *context)
1213 {
1214         inline_leave_functions_pass_t *pass = (inline_leave_functions_pass_t*)context;
1215
1216         (void)irp;
1217         inline_leave_functions(
1218                 pass->maxsize, pass->leavesize,
1219                 pass->size, pass->ignore_runtime);
1220         return 0;
1221 }
1222
1223 /* create a pass for inline_leave_functions() */
1224 ir_prog_pass_t *inline_leave_functions_pass(
1225         const char *name, unsigned maxsize, unsigned leavesize,
1226         unsigned size, int ignore_runtime)
1227 {
1228         inline_leave_functions_pass_t *pass = XMALLOCZ(inline_leave_functions_pass_t);
1229
1230         pass->maxsize        = maxsize;
1231         pass->leavesize      = leavesize;
1232         pass->size           = size;
1233         pass->ignore_runtime = ignore_runtime;
1234
1235         return def_prog_pass_constructor(
1236                 &pass->pass,
1237                 name ? name : "inline_leave_functions",
1238                 inline_leave_functions_wrapper);
1239 }
1240
1241 /**
1242  * Calculate the parameter weights for transmitting the address of a local variable.
1243  */
1244 static unsigned calc_method_local_weight(ir_node *arg)
1245 {
1246         int      i, j, k;
1247         unsigned v, weight = 0;
1248
1249         for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
1250                 ir_node *succ = get_irn_out(arg, i);
1251
1252                 switch (get_irn_opcode(succ)) {
1253                 case iro_Load:
1254                 case iro_Store:
1255                         /* Loads and Store can be removed */
1256                         weight += 3;
1257                         break;
1258                 case iro_Sel:
1259                         /* check if all args are constant */
1260                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1261                                 ir_node *idx = get_Sel_index(succ, j);
1262                                 if (! is_Const(idx))
1263                                         return 0;
1264                         }
1265                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1266                            some unsupported node. */
1267                         v = calc_method_local_weight(succ);
1268                         if (v == 0)
1269                                 return 0;
1270                         /* we can kill one Sel with constant indexes, this is cheap */
1271                         weight += v + 1;
1272                         break;
1273                 case iro_Id:
1274                         /* when looking backward we might find Id nodes */
1275                         weight += calc_method_local_weight(succ);
1276                         break;
1277                 case iro_Tuple:
1278                         /* unoptimized tuple */
1279                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1280                                 ir_node *pred = get_Tuple_pred(succ, j);
1281                                 if (pred == arg) {
1282                                         /* look for Proj(j) */
1283                                         for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
1284                                                 ir_node *succ_succ = get_irn_out(succ, k);
1285                                                 if (is_Proj(succ_succ)) {
1286                                                         if (get_Proj_proj(succ_succ) == j) {
1287                                                                 /* found */
1288                                                                 weight += calc_method_local_weight(succ_succ);
1289                                                         }
1290                                                 } else {
1291                                                         /* this should NOT happen */
1292                                                         return 0;
1293                                                 }
1294                                         }
1295                                 }
1296                         }
1297                         break;
1298                 default:
1299                         /* any other node: unsupported yet or bad. */
1300                         return 0;
1301                 }
1302         }
1303         return weight;
1304 }
1305
1306 /**
1307  * Calculate the parameter weights for transmitting the address of a local variable.
1308  */
1309 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg)
1310 {
1311         ir_entity *ent = get_irg_entity(irg);
1312         ir_type  *mtp;
1313         size_t   nparams;
1314         int      i;
1315         long     proj_nr;
1316         ir_node  *irg_args, *arg;
1317
1318         mtp      = get_entity_type(ent);
1319         nparams  = get_method_n_params(mtp);
1320
1321         /* allocate a new array. currently used as 'analysed' flag */
1322         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1323
1324         /* If the method haven't parameters we have nothing to do. */
1325         if (nparams <= 0)
1326                 return;
1327
1328         assure_irg_outs(irg);
1329         irg_args = get_irg_args(irg);
1330         for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
1331                 arg     = get_irn_out(irg_args, i);
1332                 proj_nr = get_Proj_proj(arg);
1333                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1334         }
1335 }
1336
1337 /**
1338  * Calculate the benefice for transmitting an local variable address.
1339  * After inlining, the local variable might be transformed into a
1340  * SSA variable by scalar_replacement().
1341  */
1342 static unsigned get_method_local_adress_weight(ir_graph *callee, size_t pos)
1343 {
1344         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
1345
1346         if (env->local_weights == NULL)
1347                 analyze_irg_local_weights(env, callee);
1348
1349         if (pos < ARR_LEN(env->local_weights))
1350                 return env->local_weights[pos];
1351         return 0;
1352 }
1353
1354 /**
1355  * Calculate a benefice value for inlining the given call.
1356  *
1357  * @param call       the call node we have to inspect
1358  * @param callee     the called graph
1359  */
1360 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
1361 {
1362         ir_node   *call = entry->call;
1363         ir_entity *ent  = get_irg_entity(callee);
1364         ir_node   *frame_ptr;
1365         ir_type   *mtp;
1366         int       weight = 0;
1367         int       i, n_params, all_const;
1368         unsigned  cc, v;
1369         irg_inline_property prop;
1370
1371         inline_irg_env *callee_env;
1372
1373         prop = get_irg_inline_property(callee);
1374         if (prop == irg_inline_forbidden) {
1375                 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
1376                     call, callee));
1377                 return entry->benefice = INT_MIN;
1378         }
1379
1380         if (get_irg_additional_properties(callee) & mtp_property_noreturn) {
1381                 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
1382                     call, callee));
1383                 return entry->benefice = INT_MIN;
1384         }
1385
1386         /* costs for every passed parameter */
1387         n_params = get_Call_n_params(call);
1388         mtp      = get_entity_type(ent);
1389         cc       = get_method_calling_convention(mtp);
1390         if (cc & cc_reg_param) {
1391                 /* register parameter, smaller costs for register parameters */
1392                 int max_regs = cc & ~cc_bits;
1393
1394                 if (max_regs < n_params)
1395                         weight += max_regs * 2 + (n_params - max_regs) * 5;
1396                 else
1397                         weight += n_params * 2;
1398         } else {
1399                 /* parameters are passed an stack */
1400                 weight += 5 * n_params;
1401         }
1402
1403         /* constant parameters improve the benefice */
1404         frame_ptr = get_irg_frame(current_ir_graph);
1405         all_const = 1;
1406         for (i = 0; i < n_params; ++i) {
1407                 ir_node *param = get_Call_param(call, i);
1408
1409                 if (is_Const(param)) {
1410                         weight += get_method_param_weight(ent, i);
1411                 } else {
1412                         all_const = 0;
1413                         if (is_SymConst(param))
1414                                 weight += get_method_param_weight(ent, i);
1415                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
1416                                 /*
1417                                  * An address of a local variable is transmitted. After
1418                                  * inlining, scalar_replacement might be able to remove the
1419                                  * local variable, so honor this.
1420                                  */
1421                                 v = get_method_local_adress_weight(callee, i);
1422                                 weight += v;
1423                                 if (v > 0)
1424                                         entry->local_adr = 1;
1425                         }
1426                 }
1427         }
1428         entry->all_const = all_const;
1429
1430         callee_env = (inline_irg_env*)get_irg_link(callee);
1431         if (callee_env->n_callers == 1 &&
1432             callee != current_ir_graph &&
1433             !entity_is_externally_visible(ent)) {
1434                 weight += 700;
1435         }
1436
1437         /* give a bonus for functions with one block */
1438         if (callee_env->n_blocks == 1)
1439                 weight = weight * 3 / 2;
1440
1441         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
1442         if (callee_env->n_nodes < 30 && !callee_env->recursive)
1443                 weight += 2000;
1444
1445         /* and finally for leaves: they do not increase the register pressure
1446            because of callee safe registers */
1447         if (callee_env->n_call_nodes == 0)
1448                 weight += 400;
1449
1450         /** it's important to inline inner loops first */
1451         if (entry->loop_depth > 30)
1452                 weight += 30 * 1024;
1453         else
1454                 weight += entry->loop_depth * 1024;
1455
1456         /*
1457          * All arguments constant is probably a good sign, give an extra bonus
1458          */
1459         if (all_const)
1460                 weight += 1024;
1461
1462         return entry->benefice = weight;
1463 }
1464
1465 typedef struct walk_env_t {
1466         ir_graph **irgs;
1467         size_t   last_irg;
1468 } walk_env_t;
1469
1470 /**
1471  * Callgraph walker, collect all visited graphs.
1472  */
1473 static void callgraph_walker(ir_graph *irg, void *data)
1474 {
1475         walk_env_t *env = (walk_env_t *)data;
1476         env->irgs[env->last_irg++] = irg;
1477 }
1478
1479 /**
1480  * Creates an inline order for all graphs.
1481  *
1482  * @return the list of graphs.
1483  */
1484 static ir_graph **create_irg_list(void)
1485 {
1486         ir_entity  **free_methods;
1487         size_t     n_irgs = get_irp_n_irgs();
1488         walk_env_t env;
1489
1490         cgana(&free_methods);
1491         xfree(free_methods);
1492
1493         compute_callgraph();
1494
1495         env.irgs     = XMALLOCNZ(ir_graph*, n_irgs);
1496         env.last_irg = 0;
1497
1498         callgraph_walk(NULL, callgraph_walker, &env);
1499         assert(n_irgs == env.last_irg);
1500
1501         return env.irgs;
1502 }
1503
1504 /**
1505  * Push a call onto the priority list if its benefice is big enough.
1506  *
1507  * @param pqueue   the priority queue of calls
1508  * @param call     the call entry
1509  * @param inlien_threshold
1510  *                 the threshold value
1511  */
1512 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
1513                             int inline_threshold)
1514 {
1515         ir_graph            *callee  = call->callee;
1516         irg_inline_property prop     = get_irg_inline_property(callee);
1517         int                 benefice = calc_inline_benefice(call, callee);
1518
1519         DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
1520             get_irn_irg(call->call), call->call, callee, benefice));
1521
1522         if (prop < irg_inline_forced && benefice < inline_threshold) {
1523                 return;
1524         }
1525
1526         pqueue_put(pqueue, call, benefice);
1527 }
1528
1529 /**
1530  * Try to inline calls into a graph.
1531  *
1532  * @param irg      the graph into which we inline
1533  * @param maxsize  do NOT inline if the size of irg gets
1534  *                 bigger than this amount
1535  * @param inline_threshold
1536  *                 threshold value for inline decision
1537  * @param copied_graphs
1538  *                 map containing copied of recursive graphs
1539  */
1540 static void inline_into(ir_graph *irg, unsigned maxsize,
1541                         int inline_threshold, pmap *copied_graphs)
1542 {
1543         int            phiproj_computed = 0;
1544         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
1545         call_entry     *curr_call;
1546         wenv_t         wenv;
1547         pqueue_t       *pqueue;
1548
1549         if (env->n_call_nodes == 0)
1550                 return;
1551
1552         if (env->n_nodes > maxsize) {
1553                 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
1554                 return;
1555         }
1556
1557         current_ir_graph = irg;
1558         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1559
1560         /* put irgs into the pqueue */
1561         pqueue = new_pqueue();
1562
1563         list_for_each_entry(call_entry, curr_call, &env->calls, list) {
1564                 assert(is_Call(curr_call->call));
1565                 maybe_push_call(pqueue, curr_call, inline_threshold);
1566         }
1567
1568         /* note that the list of possible calls is updated during the process */
1569         while (!pqueue_empty(pqueue)) {
1570                 int                 did_inline;
1571                 call_entry          *curr_call  = (call_entry*)pqueue_pop_front(pqueue);
1572                 ir_graph            *callee     = curr_call->callee;
1573                 ir_node             *call_node  = curr_call->call;
1574                 inline_irg_env      *callee_env = (inline_irg_env*)get_irg_link(callee);
1575                 irg_inline_property prop        = get_irg_inline_property(callee);
1576                 int                 loop_depth;
1577                 const call_entry    *centry;
1578                 pmap_entry          *e;
1579
1580                 if ((prop < irg_inline_forced) && env->n_nodes + callee_env->n_nodes > maxsize) {
1581                         DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
1582                                                 env->n_nodes, callee, callee_env->n_nodes));
1583                         continue;
1584                 }
1585
1586                 e = pmap_find(copied_graphs, callee);
1587                 if (e != NULL) {
1588                         int benefice = curr_call->benefice;
1589                         /*
1590                          * Reduce the weight for recursive function IFF not all arguments are const.
1591                          * inlining recursive functions is rarely good.
1592                          */
1593                         if (!curr_call->all_const)
1594                                 benefice -= 2000;
1595                         if (benefice < inline_threshold)
1596                                 continue;
1597
1598                         /*
1599                          * Remap callee if we have a copy.
1600                          */
1601                         callee     = (ir_graph*)e->value;
1602                         callee_env = (inline_irg_env*)get_irg_link(callee);
1603                 }
1604
1605                 if (current_ir_graph == callee) {
1606                         /*
1607                          * Recursive call: we cannot directly inline because we cannot
1608                          * walk the graph and change it. So we have to make a copy of
1609                          * the graph first.
1610                          */
1611                         int benefice = curr_call->benefice;
1612                         ir_graph *copy;
1613
1614                         /*
1615                          * Reduce the weight for recursive function IFF not all arguments are const.
1616                          * inlining recursive functions is rarely good.
1617                          */
1618                         if (!curr_call->all_const)
1619                                 benefice -= 2000;
1620                         if (benefice < inline_threshold)
1621                                 continue;
1622
1623                         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1624
1625                         /*
1626                          * No copy yet, create one.
1627                          * Note that recursive methods are never leaves, so it is
1628                          * sufficient to test this condition here.
1629                          */
1630                         copy = create_irg_copy(callee);
1631
1632                         /* create_irg_copy() destroys the Proj links, recompute them */
1633                         phiproj_computed = 0;
1634
1635                         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1636
1637                         /* allocate a new environment */
1638                         callee_env = alloc_inline_irg_env();
1639                         set_irg_link(copy, callee_env);
1640
1641                         assure_cf_loop(copy);
1642                         wenv.x              = callee_env;
1643                         wenv.ignore_callers = 1;
1644                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1645
1646                         /*
1647                          * Enter the entity of the original graph. This is needed
1648                          * for inline_method(). However, note that ent->irg still points
1649                          * to callee, NOT to copy.
1650                          */
1651                         set_irg_entity(copy, get_irg_entity(callee));
1652
1653                         pmap_insert(copied_graphs, callee, copy);
1654                         callee = copy;
1655
1656                         /* we have only one caller: the original graph */
1657                         callee_env->n_callers      = 1;
1658                         callee_env->n_callers_orig = 1;
1659                 }
1660                 if (! phiproj_computed) {
1661                         phiproj_computed = 1;
1662                         collect_phiprojs(current_ir_graph);
1663                 }
1664                 did_inline = inline_method(call_node, callee);
1665                 if (!did_inline)
1666                         continue;
1667
1668                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1669                 phiproj_computed = 0;
1670
1671                 /* remove it from the caller list */
1672                 list_del(&curr_call->list);
1673
1674                 /* callee was inline. Append its call list. */
1675                 env->got_inline = 1;
1676                 --env->n_call_nodes;
1677
1678                 /* we just generate a bunch of new calls */
1679                 loop_depth = curr_call->loop_depth;
1680                 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1681                         inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1682                         ir_node        *new_call;
1683                         call_entry     *new_entry;
1684
1685                         /* after we have inlined callee, all called methods inside
1686                          * callee are now called once more */
1687                         ++penv->n_callers;
1688
1689                         /* Note that the src list points to Call nodes in the inlined graph,
1690                          * but we need Call nodes in our graph. Luckily the inliner leaves
1691                          * this information in the link field. */
1692                         new_call = (ir_node*)get_irn_link(centry->call);
1693                         assert(is_Call(new_call));
1694
1695                         new_entry = duplicate_call_entry(centry, new_call, loop_depth);
1696                         list_add_tail(&new_entry->list, &env->calls);
1697                         maybe_push_call(pqueue, new_entry, inline_threshold);
1698                 }
1699
1700                 env->n_call_nodes += callee_env->n_call_nodes;
1701                 env->n_nodes += callee_env->n_nodes;
1702                 --callee_env->n_callers;
1703         }
1704         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1705         del_pqueue(pqueue);
1706 }
1707
1708 /*
1709  * Heuristic inliner. Calculates a benefice value for every call and inlines
1710  * those calls with a value higher than the threshold.
1711  */
1712 void inline_functions(unsigned maxsize, int inline_threshold,
1713                       opt_ptr after_inline_opt)
1714 {
1715         inline_irg_env   *env;
1716         size_t           i, n_irgs;
1717         ir_graph         *rem;
1718         wenv_t           wenv;
1719         pmap             *copied_graphs;
1720         pmap_entry       *pm_entry;
1721         ir_graph         **irgs;
1722
1723         rem = current_ir_graph;
1724         obstack_init(&temp_obst);
1725
1726         irgs = create_irg_list();
1727
1728         /* a map for the copied graphs, used to inline recursive calls */
1729         copied_graphs = pmap_create();
1730
1731         /* extend all irgs by a temporary data structure for inlining. */
1732         n_irgs = get_irp_n_irgs();
1733         for (i = 0; i < n_irgs; ++i)
1734                 set_irg_link(irgs[i], alloc_inline_irg_env());
1735
1736         /* Pre-compute information in temporary data structure. */
1737         wenv.ignore_runtime = 0;
1738         wenv.ignore_callers = 0;
1739         for (i = 0; i < n_irgs; ++i) {
1740                 ir_graph *irg = irgs[i];
1741
1742                 free_callee_info(irg);
1743
1744                 wenv.x = (inline_irg_env*)get_irg_link(irg);
1745                 assure_cf_loop(irg);
1746                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1747         }
1748
1749         /* -- and now inline. -- */
1750         for (i = 0; i < n_irgs; ++i) {
1751                 ir_graph *irg = irgs[i];
1752
1753                 inline_into(irg, maxsize, inline_threshold, copied_graphs);
1754         }
1755
1756         for (i = 0; i < n_irgs; ++i) {
1757                 ir_graph *irg = irgs[i];
1758
1759                 env = (inline_irg_env*)get_irg_link(irg);
1760                 if (env->got_inline && after_inline_opt != NULL) {
1761                         /* this irg got calls inlined: optimize it */
1762                         after_inline_opt(irg);
1763                 }
1764                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1765                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1766                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1767                         env->n_callers_orig, env->n_callers,
1768                         get_entity_name(get_irg_entity(irg))));
1769                 }
1770         }
1771
1772         /* kill the copied graphs: we don't need them anymore */
1773         foreach_pmap(copied_graphs, pm_entry) {
1774                 ir_graph *copy = (ir_graph*)pm_entry->value;
1775
1776                 /* reset the entity, otherwise it will be deleted in the next step ... */
1777                 set_irg_entity(copy, NULL);
1778                 free_ir_graph(copy);
1779         }
1780         pmap_destroy(copied_graphs);
1781
1782         xfree(irgs);
1783
1784         obstack_free(&temp_obst, NULL);
1785         current_ir_graph = rem;
1786 }
1787
1788 typedef struct inline_functions_pass_t {
1789         ir_prog_pass_t pass;
1790         unsigned       maxsize;
1791         int            inline_threshold;
1792         opt_ptr        after_inline_opt;
1793 } inline_functions_pass_t;
1794
1795 /**
1796  * Wrapper to run inline_functions() as a ir_prog pass.
1797  */
1798 static int inline_functions_wrapper(ir_prog *irp, void *context)
1799 {
1800         inline_functions_pass_t *pass = (inline_functions_pass_t*)context;
1801
1802         (void)irp;
1803         inline_functions(pass->maxsize, pass->inline_threshold,
1804                          pass->after_inline_opt);
1805         return 0;
1806 }
1807
1808 /* create a ir_prog pass for inline_functions */
1809 ir_prog_pass_t *inline_functions_pass(
1810           const char *name, unsigned maxsize, int inline_threshold,
1811           opt_ptr after_inline_opt)
1812 {
1813         inline_functions_pass_t *pass = XMALLOCZ(inline_functions_pass_t);
1814
1815         pass->maxsize          = maxsize;
1816         pass->inline_threshold = inline_threshold;
1817         pass->after_inline_opt = after_inline_opt;
1818
1819         return def_prog_pass_constructor(
1820                 &pass->pass, name ? name : "inline_functions",
1821                 inline_functions_wrapper);
1822 }
1823
1824 void firm_init_inline(void)
1825 {
1826         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
1827 }