call hook_new_entity after copying entity so the firm debugger can catch it
[libfirm] / ir / opt / opt_inline.c
1 /*
2  * Copyright (C) 1995-2011 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief    Dead node elimination and Procedure Inlining.
23  * @author   Michael Beck, Goetz Lindenmaier
24  * @version  $Id$
25  */
26 #include "config.h"
27
28 #include <limits.h>
29 #include <stdbool.h>
30 #include <assert.h>
31
32 #include "irnode_t.h"
33 #include "irgraph_t.h"
34 #include "irprog_t.h"
35
36 #include "iroptimize.h"
37 #include "ircons_t.h"
38 #include "iropt_t.h"
39 #include "irgopt.h"
40 #include "irgmod.h"
41 #include "irgwalk.h"
42
43 #include "array_t.h"
44 #include "list.h"
45 #include "pset.h"
46 #include "pmap.h"
47 #include "pdeq.h"
48 #include "xmalloc.h"
49 #include "pqueue.h"
50
51 #include "irouts.h"
52 #include "irloop_t.h"
53 #include "irbackedge_t.h"
54 #include "opt_init.h"
55 #include "cgana.h"
56 #include "trouts.h"
57 #include "error.h"
58
59 #include "analyze_irg_args.h"
60 #include "iredges_t.h"
61 #include "irflag_t.h"
62 #include "irhooks.h"
63 #include "irtools.h"
64 #include "iropt_dbg.h"
65 #include "irpass_t.h"
66 #include "irphase_t.h"
67
68 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
69
70 /*------------------------------------------------------------------*/
71 /* Routines for dead node elimination / copying garbage collection  */
72 /* of the obstack.                                                  */
73 /*------------------------------------------------------------------*/
74
75 /**
76  * Remember the new node in the old node by using a field all nodes have.
77  */
78 static void set_new_node(ir_node *node, ir_node *new_node)
79 {
80         set_irn_link(node, new_node);
81 }
82
83 /**
84  * Get this new node, before the old node is forgotten.
85  */
86 static inline ir_node *get_new_node(ir_node *old_node)
87 {
88         assert(irn_visited(old_node));
89         return (ir_node*) get_irn_link(old_node);
90 }
91
92 /*--------------------------------------------------------------------*/
93 /*  Functionality for inlining                                         */
94 /*--------------------------------------------------------------------*/
95
96 /**
97  * Copy node for inlineing.  Updates attributes that change when
98  * inlineing but not for dead node elimination.
99  *
100  * Copies the node by calling copy_node() and then updates the entity if
101  * it's a local one.  env must be a pointer of the frame type of the
102  * inlined procedure. The new entities must be in the link field of
103  * the entities.
104  */
105 static void copy_node_inline(ir_node *node, void *env)
106 {
107         ir_graph *new_irg  = (ir_graph*) env;
108         ir_node  *new_node = irn_copy_into_irg(node, new_irg);
109
110         set_new_node(node, new_node);
111         if (is_Sel(node)) {
112                 ir_graph  *old_irg        = get_irn_irg(node);
113                 ir_type   *old_frame_type = get_irg_frame_type(old_irg);
114                 ir_entity *old_entity     = get_Sel_entity(node);
115                 assert(is_Sel(new_node));
116                 /* use copied entities from the new frame */
117                 if (get_entity_owner(old_entity) == old_frame_type) {
118                         ir_entity *new_entity = (ir_entity*)get_entity_link(old_entity);
119                         assert(new_entity != NULL);
120                         set_Sel_entity(new_node, new_entity);
121                 }
122         } else if (is_Block(new_node)) {
123                 new_node->attr.block.irg.irg = new_irg;
124         }
125 }
126
127 static void set_preds_inline(ir_node *node, void *env)
128 {
129         ir_node *new_node;
130
131         irn_rewire_inputs(node);
132
133         /* move constants into start block */
134         new_node = get_new_node(node);
135         if (is_irn_constlike(new_node)) {
136                 ir_graph *new_irg     = (ir_graph *) env;
137                 ir_node  *start_block = get_irg_start_block(new_irg);
138                 set_nodes_block(new_node, start_block);
139         }
140 }
141
142 /**
143  * Walker: checks if P_value_arg_base is used.
144  */
145 static void find_addr(ir_node *node, void *env)
146 {
147         bool *allow_inline = (bool*)env;
148
149         if (is_Sel(node)) {
150                 ir_graph *irg = current_ir_graph;
151                 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
152                         /* access to frame */
153                         ir_entity *ent = get_Sel_entity(node);
154                         if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
155                                 /* access to value_type */
156                                 *allow_inline = false;
157                         }
158                 }
159         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
160                 /* From GCC:
161                  * Refuse to inline alloca call unless user explicitly forced so as this
162                  * may change program's memory overhead drastically when the function
163                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
164                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
165                  *
166                  * Sorrily this is true with our implementation also.
167                  * Moreover, we cannot differentiate between alloca() and VLA yet, so
168                  * this disables inlining of functions using VLA (which are completely
169                  * save).
170                  *
171                  * 2 Solutions:
172                  * - add a flag to the Alloc node for "real" alloca() calls
173                  * - add a new Stack-Restore node at the end of a function using
174                  *   alloca()
175                  */
176                 *allow_inline = false;
177         }
178 }
179
180 /**
181  * Check if we can inline a given call.
182  * Currently, we cannot inline two cases:
183  * - call with compound arguments
184  * - graphs that take the address of a parameter
185  *
186  * check these conditions here
187  */
188 static bool can_inline(ir_node *call, ir_graph *called_graph)
189 {
190         ir_entity          *called      = get_irg_entity(called_graph);
191         ir_type            *called_type = get_entity_type(called);
192         ir_type            *call_type   = get_Call_type(call);
193         size_t              n_params    = get_method_n_params(called_type);
194         size_t              n_arguments = get_method_n_params(call_type);
195         size_t              n_res       = get_method_n_ress(called_type);
196         irg_inline_property prop        = get_irg_inline_property(called_graph);
197         size_t              i;
198         bool                res;
199
200         if (prop == irg_inline_forbidden)
201                 return false;
202
203         if (n_arguments != n_params) {
204                 /* this is a bad feature of C: without a prototype, we can
205                  * call a function with less parameters than needed. Currently
206                  * we don't support this, although we could use Unknown than. */
207                 return false;
208         }
209         if (n_res != get_method_n_ress(call_type)) {
210                 return false;
211         }
212
213         /* Argh, compiling C has some bad consequences:
214          * It is implementation dependent what happens in that case.
215          * We support inlining, if the bitsize of the types matches AND
216          * the same arithmetic is used. */
217         for (i = 0; i < n_params; ++i) {
218                 ir_type *param_tp = get_method_param_type(called_type, i);
219                 ir_type *arg_tp   = get_method_param_type(call_type, i);
220
221                 if (param_tp != arg_tp) {
222                         ir_mode *pmode = get_type_mode(param_tp);
223                         ir_mode *amode = get_type_mode(arg_tp);
224
225                         if (pmode == NULL || amode == NULL)
226                                 return false;
227                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
228                                 return false;
229                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
230                                 return false;
231                         /* otherwise we can simply "reinterpret" the bits */
232                 }
233         }
234         for (i = 0; i < n_res; ++i) {
235                 ir_type *decl_res_tp = get_method_res_type(called_type, i);
236                 ir_type *used_res_tp = get_method_res_type(call_type, i);
237
238                 if (decl_res_tp != used_res_tp) {
239                         ir_mode *decl_mode = get_type_mode(decl_res_tp);
240                         ir_mode *used_mode = get_type_mode(used_res_tp);
241                         if (decl_mode == NULL || used_mode == NULL)
242                                 return false;
243                         if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
244                                 return false;
245                         if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
246                                 return false;
247                         /* otherwise we can "reinterpret" the bits */
248                 }
249         }
250
251         /* check parameters for compound arguments */
252         for (i = 0; i < n_params; ++i) {
253                 ir_type *p_type = get_method_param_type(call_type, i);
254
255                 if (is_compound_type(p_type))
256                         return false;
257         }
258
259         /* check results for compound arguments */
260         for (i = 0; i < n_res; ++i) {
261                 ir_type *r_type = get_method_res_type(call_type, i);
262
263                 if (is_compound_type(r_type))
264                         return false;
265         }
266
267         res = true;
268         irg_walk_graph(called_graph, find_addr, NULL, &res);
269
270         return res;
271 }
272
273 enum exc_mode {
274         exc_handler,    /**< There is a handler. */
275         exc_no_handler  /**< Exception handling not represented. */
276 };
277
278 /**
279  * copy all entities on the stack frame on 1 irg to the stackframe of another.
280  * Sets entity links of the old entities to the copies
281  */
282 static void copy_frame_entities(ir_graph *from, ir_graph *to)
283 {
284         ir_type *from_frame = get_irg_frame_type(from);
285         ir_type *to_frame   = get_irg_frame_type(to);
286         size_t   n_members  = get_class_n_members(from_frame);
287         size_t   i;
288         assert(from_frame != to_frame);
289
290         for (i = 0; i < n_members; ++i) {
291                 ir_entity *old_ent = get_class_member(from_frame, i);
292                 ir_entity *new_ent = copy_entity_own(old_ent, to_frame);
293                 set_entity_link(old_ent, new_ent);
294         }
295 }
296
297 /* Inlines a method at the given call site. */
298 int inline_method(ir_node *call, ir_graph *called_graph)
299 {
300         ir_node       *pre_call;
301         ir_node       *post_call, *post_bl;
302         ir_node       *in[pn_Start_max+1];
303         ir_node       *end, *end_bl, *block;
304         ir_node       **res_pred;
305         ir_node       **cf_pred;
306         ir_node       **args_in;
307         ir_node       *ret, *phi;
308         int           arity, n_ret, n_exc, n_res, i, j, rem_opt;
309         int           irn_arity, n_params;
310         int           n_mem_phi;
311         enum exc_mode exc_handling;
312         ir_type       *mtp;
313         ir_type       *ctp;
314         ir_entity     *ent;
315         ir_graph      *rem;
316         ir_graph      *irg = get_irn_irg(call);
317
318         /* we cannot inline some types of calls */
319         if (! can_inline(call, called_graph))
320                 return 0;
321
322         /* We cannot inline a recursive call. The graph must be copied before
323          * the call the inline_method() using create_irg_copy(). */
324         if (called_graph == irg)
325                 return 0;
326
327         ent      = get_irg_entity(called_graph);
328         mtp      = get_entity_type(ent);
329         ctp      = get_Call_type(call);
330         n_params = get_method_n_params(mtp);
331         n_res    = get_method_n_ress(mtp);
332
333         rem = current_ir_graph;
334         current_ir_graph = irg;
335
336         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
337
338         /* optimizations can cause problems when allocating new nodes */
339         rem_opt = get_opt_optimize();
340         set_optimize(0);
341
342         /* Handle graph state */
343         assert(get_irg_phase_state(irg) != phase_building);
344         assert(get_irg_pinned(irg) == op_pin_state_pinned);
345         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
346         set_irg_extblk_inconsistent(irg);
347         set_irg_doms_inconsistent(irg);
348         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
349         set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
350         edges_deactivate(irg);
351
352         /* here we know we WILL inline, so inform the statistics */
353         hook_inline(call, called_graph);
354
355         /* -- Decide how to handle exception control flow: Is there a handler
356            for the Call node, or do we branch directly to End on an exception?
357            exc_handling:
358            0 There is a handler.
359            2 Exception handling not represented in Firm. -- */
360         {
361                 ir_node *Xproj = NULL;
362                 ir_node *proj;
363                 for (proj = (ir_node*)get_irn_link(call); proj != NULL;
364                      proj = (ir_node*)get_irn_link(proj)) {
365                         long proj_nr = get_Proj_proj(proj);
366                         if (proj_nr == pn_Call_X_except) Xproj = proj;
367                 }
368                 exc_handling = Xproj != NULL ? exc_handler : exc_no_handler;
369         }
370
371         /* create the argument tuple */
372         args_in = ALLOCAN(ir_node*, n_params);
373
374         block = get_nodes_block(call);
375         for (i = n_params - 1; i >= 0; --i) {
376                 ir_node *arg      = get_Call_param(call, i);
377                 ir_type *param_tp = get_method_param_type(mtp, i);
378                 ir_mode *mode     = get_type_mode(param_tp);
379
380                 if (mode != get_irn_mode(arg)) {
381                         arg = new_r_Conv(block, arg, mode);
382                 }
383                 args_in[i] = arg;
384         }
385
386         /* the procedure and later replaces the Start node of the called graph.
387          * Post_call is the old Call node and collects the results of the called
388          * graph. Both will end up being a tuple. */
389         post_bl = get_nodes_block(call);
390         /* XxMxPxPxPxT of Start + parameter of Call */
391         in[pn_Start_M]              = get_Call_mem(call);
392         in[pn_Start_X_initial_exec] = new_r_Jmp(post_bl);
393         in[pn_Start_P_frame_base]   = get_irg_frame(irg);
394         in[pn_Start_T_args]         = new_r_Tuple(post_bl, n_params, args_in);
395         pre_call = new_r_Tuple(post_bl, pn_Start_max+1, in);
396         post_call = call;
397
398         /* --
399            The new block gets the ins of the old block, pre_call and all its
400            predecessors and all Phi nodes. -- */
401         part_block(pre_call);
402
403         /* increment visited flag for later walk */
404         inc_irg_visited(called_graph);
405
406         /* link some nodes to nodes in the current graph so instead of copying
407          * the linked nodes will get used.
408          * So the copier will use the created Tuple instead of copying the start
409          * node, similar for singleton nodes like NoMem and Bad.
410          * Note: this will prohibit predecessors to be copied - only do it for
411          *       nodes without predecessors */
412         {
413                 ir_node *start_block;
414                 ir_node *start;
415                 ir_node *nomem;
416
417                 start_block = get_irg_start_block(called_graph);
418                 set_new_node(start_block, get_nodes_block(pre_call));
419                 mark_irn_visited(start_block);
420
421                 start = get_irg_start(called_graph);
422                 set_new_node(start, pre_call);
423                 mark_irn_visited(start);
424
425                 nomem = get_irg_no_mem(called_graph);
426                 set_new_node(nomem, get_irg_no_mem(irg));
427                 mark_irn_visited(nomem);
428         }
429
430         /* entitiy link is used to link entities on old stackframe to the
431          * new stackframe */
432         irp_reserve_resources(irp, IRP_RESOURCE_ENTITY_LINK);
433
434         /* copy entities and nodes */
435         assert(!irn_visited(get_irg_end(called_graph)));
436         copy_frame_entities(called_graph, irg);
437         irg_walk_core(get_irg_end(called_graph), copy_node_inline, set_preds_inline,
438                       irg);
439
440         irp_free_resources(irp, IRP_RESOURCE_ENTITY_LINK);
441
442         /* -- Merge the end of the inlined procedure with the call site -- */
443         /* We will turn the old Call node into a Tuple with the following
444            predecessors:
445            -1:  Block of Tuple.
446            0: Phi of all Memories of Return statements.
447            1: Jmp from new Block that merges the control flow from all exception
448            predecessors of the old end block.
449            2: Tuple of all arguments.
450            3: Phi of Exception memories.
451            In case the old Call directly branches to End on an exception we don't
452            need the block merging all exceptions nor the Phi of the exception
453            memories.
454         */
455
456         /* Precompute some values */
457         end_bl = get_new_node(get_irg_end_block(called_graph));
458         end    = get_new_node(get_irg_end(called_graph));
459         arity  = get_Block_n_cfgpreds(end_bl);    /* arity = n_exc + n_ret  */
460         n_res  = get_method_n_ress(get_Call_type(call));
461
462         res_pred = XMALLOCN(ir_node*, n_res);
463         cf_pred  = XMALLOCN(ir_node*, arity);
464
465         /* archive keepalives */
466         irn_arity = get_irn_arity(end);
467         for (i = 0; i < irn_arity; i++) {
468                 ir_node *ka = get_End_keepalive(end, i);
469                 if (! is_Bad(ka))
470                         add_End_keepalive(get_irg_end(irg), ka);
471         }
472
473         /* replace Return nodes by Jump nodes */
474         n_ret = 0;
475         for (i = 0; i < arity; i++) {
476                 ir_node *ret;
477                 ret = get_Block_cfgpred(end_bl, i);
478                 if (is_Return(ret)) {
479                         ir_node *block = get_nodes_block(ret);
480                         cf_pred[n_ret] = new_r_Jmp(block);
481                         n_ret++;
482                 }
483         }
484         set_irn_in(post_bl, n_ret, cf_pred);
485
486         /* build a Tuple for all results of the method.
487          * add Phi node if there was more than one Return. */
488         turn_into_tuple(post_call, pn_Call_max+1);
489         /* First the Memory-Phi */
490         n_mem_phi = 0;
491         for (i = 0; i < arity; i++) {
492                 ret = get_Block_cfgpred(end_bl, i);
493                 if (is_Return(ret)) {
494                         cf_pred[n_mem_phi++] = get_Return_mem(ret);
495                 }
496                 /* memory output for some exceptions is directly connected to End */
497                 if (is_Call(ret)) {
498                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 3);
499                 } else if (is_fragile_op(ret)) {
500                         /* We rely that all cfops have the memory output at the same position. */
501                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 0);
502                 } else if (is_Raise(ret)) {
503                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 1);
504                 }
505         }
506         phi = new_r_Phi(post_bl, n_mem_phi, cf_pred, mode_M);
507         set_Tuple_pred(call, pn_Call_M, phi);
508         /* Conserve Phi-list for further inlinings -- but might be optimized */
509         if (get_nodes_block(phi) == post_bl) {
510                 set_irn_link(phi, get_irn_link(post_bl));
511                 set_irn_link(post_bl, phi);
512         }
513         /* Now the real results */
514         if (n_res > 0) {
515                 ir_node *result_tuple;
516                 for (j = 0; j < n_res; j++) {
517                         ir_type *res_type = get_method_res_type(ctp, j);
518                         ir_mode *res_mode = get_type_mode(res_type);
519                         n_ret = 0;
520                         for (i = 0; i < arity; i++) {
521                                 ret = get_Block_cfgpred(end_bl, i);
522                                 if (is_Return(ret)) {
523                                         ir_node *res = get_Return_res(ret, j);
524                                         if (get_irn_mode(res) != res_mode) {
525                                                 ir_node *block = get_nodes_block(res);
526                                                 res = new_r_Conv(block, res, res_mode);
527                                         }
528                                         cf_pred[n_ret] = res;
529                                         n_ret++;
530                                 }
531                         }
532                         if (n_ret > 0) {
533                                 phi = new_r_Phi(post_bl, n_ret, cf_pred, res_mode);
534                         } else {
535                                 phi = new_r_Bad(irg, res_mode);
536                         }
537                         res_pred[j] = phi;
538                         /* Conserve Phi-list for further inlinings -- but might be optimized */
539                         if (get_nodes_block(phi) == post_bl) {
540                                 set_Phi_next(phi, get_Block_phis(post_bl));
541                                 set_Block_phis(post_bl, phi);
542                         }
543                 }
544                 result_tuple = new_r_Tuple(post_bl, n_res, res_pred);
545                 set_Tuple_pred(call, pn_Call_T_result, result_tuple);
546         } else {
547                 set_Tuple_pred(call, pn_Call_T_result, new_r_Bad(irg, mode_T));
548         }
549         /* handle the regular call */
550         set_Tuple_pred(call, pn_Call_X_regular, new_r_Jmp(post_bl));
551
552         /* Finally the exception control flow.
553            We have two possible situations:
554            First if the Call branches to an exception handler:
555            We need to add a Phi node to
556            collect the memory containing the exception objects.  Further we need
557            to add another block to get a correct representation of this Phi.  To
558            this block we add a Jmp that resolves into the X output of the Call
559            when the Call is turned into a tuple.
560            Second: There is no exception edge. Just add all inlined exception
561            branches to the End node.
562          */
563         if (exc_handling == exc_handler) {
564                 n_exc = 0;
565                 for (i = 0; i < arity; i++) {
566                         ir_node *ret, *irn;
567                         ret = get_Block_cfgpred(end_bl, i);
568                         irn = skip_Proj(ret);
569                         if (is_fragile_op(irn) || is_Raise(irn)) {
570                                 cf_pred[n_exc] = ret;
571                                 ++n_exc;
572                         }
573                 }
574                 if (n_exc > 0) {
575                         if (n_exc == 1) {
576                                 /* simple fix */
577                                 set_Tuple_pred(call, pn_Call_X_except, cf_pred[0]);
578                         } else {
579                                 ir_node *block = new_r_Block(irg, n_exc, cf_pred);
580                                 set_Tuple_pred(call, pn_Call_X_except, new_r_Jmp(block));
581                         }
582                 } else {
583                         set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
584                 }
585         } else {
586                 ir_node *main_end_bl;
587                 int main_end_bl_arity;
588                 ir_node **end_preds;
589
590                 /* assert(exc_handling == 1 || no exceptions. ) */
591                 n_exc = 0;
592                 for (i = 0; i < arity; i++) {
593                         ir_node *ret = get_Block_cfgpred(end_bl, i);
594                         ir_node *irn = skip_Proj(ret);
595
596                         if (is_fragile_op(irn) || is_Raise(irn)) {
597                                 cf_pred[n_exc] = ret;
598                                 n_exc++;
599                         }
600                 }
601                 main_end_bl       = get_irg_end_block(irg);
602                 main_end_bl_arity = get_irn_arity(main_end_bl);
603                 end_preds         = XMALLOCN(ir_node*, n_exc + main_end_bl_arity);
604
605                 for (i = 0; i < main_end_bl_arity; ++i)
606                         end_preds[i] = get_irn_n(main_end_bl, i);
607                 for (i = 0; i < n_exc; ++i)
608                         end_preds[main_end_bl_arity + i] = cf_pred[i];
609                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
610                 set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg, mode_X));
611                 free(end_preds);
612         }
613         free(res_pred);
614         free(cf_pred);
615
616         /* --  Turn CSE back on. -- */
617         set_optimize(rem_opt);
618         current_ir_graph = rem;
619
620         return 1;
621 }
622
623 /********************************************************************/
624 /* Apply inlining to small methods.                                 */
625 /********************************************************************/
626
627 static struct obstack  temp_obst;
628
629 /** Represents a possible inlinable call in a graph. */
630 typedef struct call_entry {
631         ir_node    *call;       /**< The Call node. */
632         ir_graph   *callee;     /**< The callee IR-graph. */
633         list_head  list;        /**< List head for linking the next one. */
634         int        loop_depth;  /**< The loop depth of this call. */
635         int        benefice;    /**< The calculated benefice of this call. */
636         unsigned   local_adr:1; /**< Set if this call gets an address of a local variable. */
637         unsigned   all_const:1; /**< Set if this call has only constant parameters. */
638 } call_entry;
639
640 /**
641  * environment for inlining small irgs
642  */
643 typedef struct inline_env_t {
644         struct obstack obst;  /**< An obstack where call_entries are allocated on. */
645         list_head      calls; /**< The call entry list. */
646 } inline_env_t;
647
648 /**
649  * Returns the irg called from a Call node. If the irg is not
650  * known, NULL is returned.
651  *
652  * @param call  the call node
653  */
654 static ir_graph *get_call_called_irg(ir_node *call)
655 {
656         ir_node *addr;
657
658         addr = get_Call_ptr(call);
659         if (is_Global(addr)) {
660                 ir_entity *ent = get_Global_entity(addr);
661                 /* we don't know which function gets finally bound to a weak symbol */
662                 if (get_entity_linkage(ent) & IR_LINKAGE_WEAK)
663                         return NULL;
664
665                 return get_entity_irg(ent);
666         }
667
668         return NULL;
669 }
670
671 /**
672  * Walker: Collect all calls to known graphs inside a graph.
673  */
674 static void collect_calls(ir_node *call, void *env)
675 {
676         (void) env;
677         if (is_Call(call)) {
678                 ir_graph *called_irg = get_call_called_irg(call);
679
680                 if (called_irg != NULL) {
681                         /* The Call node calls a locally defined method.  Remember to inline. */
682                         inline_env_t *ienv  = (inline_env_t*)env;
683                         call_entry   *entry = OALLOC(&ienv->obst, call_entry);
684                         entry->call       = call;
685                         entry->callee     = called_irg;
686                         entry->loop_depth = 0;
687                         entry->benefice   = 0;
688                         entry->local_adr  = 0;
689                         entry->all_const  = 0;
690
691                         list_add_tail(&entry->list, &ienv->calls);
692                 }
693         }
694 }
695
696 /**
697  * Inlines all small methods at call sites where the called address comes
698  * from a Const node that references the entity representing the called
699  * method.
700  * The size argument is a rough measure for the code size of the method:
701  * Methods where the obstack containing the firm graph is smaller than
702  * size are inlined.
703  */
704 void inline_small_irgs(ir_graph *irg, int size)
705 {
706         ir_graph *rem = current_ir_graph;
707         inline_env_t env;
708         call_entry *entry;
709
710         current_ir_graph = irg;
711         /* Handle graph state */
712         assert(get_irg_phase_state(irg) != phase_building);
713         free_callee_info(irg);
714
715         /* Find Call nodes to inline.
716            (We can not inline during a walk of the graph, as inlining the same
717            method several times changes the visited flag of the walked graph:
718            after the first inlining visited of the callee equals visited of
719            the caller.  With the next inlining both are increased.) */
720         obstack_init(&env.obst);
721         INIT_LIST_HEAD(&env.calls);
722         irg_walk_graph(irg, NULL, collect_calls, &env);
723
724         if (! list_empty(&env.calls)) {
725                 /* There are calls to inline */
726                 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
727                 collect_phiprojs(irg);
728
729                 list_for_each_entry(call_entry, entry, &env.calls, list) {
730                         ir_graph            *callee = entry->callee;
731                         irg_inline_property prop    = get_irg_inline_property(callee);
732
733                         if (prop == irg_inline_forbidden) {
734                                 continue;
735                         }
736
737                         if (prop >= irg_inline_forced ||
738                             _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
739                                 inline_method(entry->call, callee);
740                         }
741                 }
742                 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
743         }
744         obstack_free(&env.obst, NULL);
745         current_ir_graph = rem;
746 }
747
748 typedef struct inline_small_irgs_pass_t {
749         ir_graph_pass_t pass;
750         int            size;
751 } inline_small_irgs_pass_t;
752
753 /**
754  * Wrapper to run inline_small_irgs() as a pass.
755  */
756 static int inline_small_irgs_wrapper(ir_graph *irg, void *context)
757 {
758         inline_small_irgs_pass_t *pass = (inline_small_irgs_pass_t*)context;
759
760         inline_small_irgs(irg, pass->size);
761         return 0;
762 }
763
764 /* create a pass for inline_small_irgs() */
765 ir_graph_pass_t *inline_small_irgs_pass(const char *name, int size)
766 {
767         inline_small_irgs_pass_t *pass = XMALLOCZ(inline_small_irgs_pass_t);
768
769         pass->size = size;
770         return def_graph_pass_constructor(
771                 &pass->pass, name ? name : "inline_small_irgs", inline_small_irgs_wrapper);
772 }
773
774 /**
775  * Environment for inlining irgs.
776  */
777 typedef struct {
778         list_head calls;             /**< List of of all call nodes in this graph. */
779         unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
780         unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
781         unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
782         unsigned  n_nodes_orig;      /**< for statistics */
783         unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
784         unsigned  n_call_nodes_orig; /**< for statistics */
785         unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
786         unsigned  n_callers_orig;    /**< for statistics */
787         unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
788         unsigned  recursive:1;       /**< Set, if this function is self recursive. */
789 } inline_irg_env;
790
791 /**
792  * Allocate a new environment for inlining.
793  */
794 static inline_irg_env *alloc_inline_irg_env(void)
795 {
796         inline_irg_env *env    = OALLOC(&temp_obst, inline_irg_env);
797         INIT_LIST_HEAD(&env->calls);
798         env->local_weights     = NULL;
799         env->n_nodes           = -2; /* do not count count Start, End */
800         env->n_blocks          = -2; /* do not count count Start, End Block */
801         env->n_nodes_orig      = -2; /* do not count Start, End */
802         env->n_call_nodes      = 0;
803         env->n_call_nodes_orig = 0;
804         env->n_callers         = 0;
805         env->n_callers_orig    = 0;
806         env->got_inline        = 0;
807         env->recursive         = 0;
808         return env;
809 }
810
811 typedef struct walker_env {
812         inline_irg_env *x;     /**< the inline environment */
813         char ignore_runtime;   /**< the ignore runtime flag */
814         char ignore_callers;   /**< if set, do change callers data */
815 } wenv_t;
816
817 /**
818  * post-walker: collect all calls in the inline-environment
819  * of a graph and sum some statistics.
820  */
821 static void collect_calls2(ir_node *call, void *ctx)
822 {
823         wenv_t         *env = (wenv_t*)ctx;
824         inline_irg_env *x = env->x;
825         unsigned        code = get_irn_opcode(call);
826         ir_graph       *callee;
827         call_entry     *entry;
828
829         /* count meaningful nodes in irg */
830         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
831                 if (code != iro_Block) {
832                         ++x->n_nodes;
833                         ++x->n_nodes_orig;
834                 } else {
835                         ++x->n_blocks;
836                 }
837         }
838
839         if (code != iro_Call) return;
840
841         /* check, if it's a runtime call */
842         if (env->ignore_runtime) {
843                 ir_node *symc = get_Call_ptr(call);
844
845                 if (is_Global(symc)) {
846                         ir_entity *ent = get_Global_entity(symc);
847
848                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
849                                 return;
850                 }
851         }
852
853         /* collect all call nodes */
854         ++x->n_call_nodes;
855         ++x->n_call_nodes_orig;
856
857         callee = get_call_called_irg(call);
858         if (callee != NULL) {
859                 if (! env->ignore_callers) {
860                         inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
861                         /* count all static callers */
862                         ++callee_env->n_callers;
863                         ++callee_env->n_callers_orig;
864                 }
865                 if (callee == current_ir_graph)
866                         x->recursive = 1;
867
868                 /* link it in the list of possible inlinable entries */
869                 entry = OALLOC(&temp_obst, call_entry);
870                 entry->call       = call;
871                 entry->callee     = callee;
872                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
873                 entry->benefice   = 0;
874                 entry->local_adr  = 0;
875                 entry->all_const  = 0;
876
877                 list_add_tail(&entry->list, &x->calls);
878         }
879 }
880
881 /**
882  * Returns TRUE if the number of callers is 0 in the irg's environment,
883  * hence this irg is a leave.
884  */
885 inline static int is_leave(ir_graph *irg)
886 {
887         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
888         return env->n_call_nodes == 0;
889 }
890
891 /**
892  * Returns TRUE if the number of nodes in the callee is
893  * smaller then size in the irg's environment.
894  */
895 inline static int is_smaller(ir_graph *callee, unsigned size)
896 {
897         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
898         return env->n_nodes < size;
899 }
900
901 /**
902  * Duplicate a call entry.
903  *
904  * @param entry     the original entry to duplicate
905  * @param new_call  the new call node
906  * @param loop_depth_delta
907  *                  delta value for the loop depth
908  */
909 static call_entry *duplicate_call_entry(const call_entry *entry,
910                                         ir_node *new_call, int loop_depth_delta)
911 {
912         call_entry *nentry = OALLOC(&temp_obst, call_entry);
913         nentry->call       = new_call;
914         nentry->callee     = entry->callee;
915         nentry->benefice   = entry->benefice;
916         nentry->loop_depth = entry->loop_depth + loop_depth_delta;
917         nentry->local_adr  = entry->local_adr;
918         nentry->all_const  = entry->all_const;
919
920         return nentry;
921 }
922
923 /**
924  * Append all call nodes of the source environment to the nodes of in the destination
925  * environment.
926  *
927  * @param dst         destination environment
928  * @param src         source environment
929  * @param loop_depth  the loop depth of the call that is replaced by the src list
930  */
931 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth)
932 {
933         call_entry *entry, *nentry;
934
935         /* Note that the src list points to Call nodes in the inlined graph, but
936            we need Call nodes in our graph. Luckily the inliner leaves this information
937            in the link field. */
938         list_for_each_entry(call_entry, entry, &src->calls, list) {
939                 nentry = duplicate_call_entry(entry, (ir_node*)get_irn_link(entry->call), loop_depth);
940                 list_add_tail(&nentry->list, &dst->calls);
941         }
942         dst->n_call_nodes += src->n_call_nodes;
943         dst->n_nodes      += src->n_nodes;
944 }
945
946 /*
947  * Inlines small leave methods at call sites where the called address comes
948  * from a Const node that references the entity representing the called
949  * method.
950  * The size argument is a rough measure for the code size of the method:
951  * Methods where the obstack containing the firm graph is smaller than
952  * size are inlined.
953  */
954 void inline_leave_functions(unsigned maxsize, unsigned leavesize,
955                             unsigned size, int ignore_runtime)
956 {
957         inline_irg_env   *env;
958         ir_graph         *irg;
959         size_t           i, n_irgs;
960         ir_graph         *rem;
961         int              did_inline;
962         wenv_t           wenv;
963         call_entry       *entry, *next;
964         const call_entry *centry;
965         pmap             *copied_graphs;
966         pmap_entry       *pm_entry;
967
968         rem = current_ir_graph;
969         obstack_init(&temp_obst);
970
971         /* a map for the copied graphs, used to inline recursive calls */
972         copied_graphs = pmap_create();
973
974         /* extend all irgs by a temporary data structure for inlining. */
975         n_irgs = get_irp_n_irgs();
976         for (i = 0; i < n_irgs; ++i)
977                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
978
979         /* Pre-compute information in temporary data structure. */
980         wenv.ignore_runtime = ignore_runtime;
981         wenv.ignore_callers = 0;
982         for (i = 0; i < n_irgs; ++i) {
983                 ir_graph *irg = get_irp_irg(i);
984
985                 assert(get_irg_phase_state(irg) != phase_building);
986                 free_callee_info(irg);
987
988                 assure_cf_loop(irg);
989                 wenv.x = (inline_irg_env*)get_irg_link(irg);
990                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
991         }
992
993         /* -- and now inline. -- */
994
995         /* Inline leaves recursively -- we might construct new leaves. */
996         do {
997                 did_inline = 0;
998
999                 for (i = 0; i < n_irgs; ++i) {
1000                         ir_node *call;
1001                         int phiproj_computed = 0;
1002
1003                         current_ir_graph = get_irp_irg(i);
1004                         env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1005
1006                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1007                         list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1008                                 ir_graph            *callee;
1009                                 irg_inline_property  prop;
1010
1011                                 if (env->n_nodes > maxsize)
1012                                         break;
1013
1014                                 call   = entry->call;
1015                                 callee = entry->callee;
1016
1017                                 prop = get_irg_inline_property(callee);
1018                                 if (prop == irg_inline_forbidden) {
1019                                         continue;
1020                                 }
1021
1022                                 if (is_leave(callee) && (
1023                                     is_smaller(callee, leavesize) || prop >= irg_inline_forced)) {
1024                                         if (!phiproj_computed) {
1025                                                 phiproj_computed = 1;
1026                                                 collect_phiprojs(current_ir_graph);
1027                                         }
1028                                         did_inline = inline_method(call, callee);
1029
1030                                         if (did_inline) {
1031                                                 inline_irg_env *callee_env = (inline_irg_env*)get_irg_link(callee);
1032
1033                                                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1034                                                 phiproj_computed = 0;
1035
1036                                                 /* Do some statistics */
1037                                                 env->got_inline = 1;
1038                                                 --env->n_call_nodes;
1039                                                 env->n_nodes += callee_env->n_nodes;
1040                                                 --callee_env->n_callers;
1041
1042                                                 /* remove this call from the list */
1043                                                 list_del(&entry->list);
1044                                                 continue;
1045                                         }
1046                                 }
1047                         }
1048                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1049                 }
1050         } while (did_inline);
1051
1052         /* inline other small functions. */
1053         for (i = 0; i < n_irgs; ++i) {
1054                 ir_node *call;
1055                 int phiproj_computed = 0;
1056
1057                 current_ir_graph = get_irp_irg(i);
1058                 env              = (inline_irg_env*)get_irg_link(current_ir_graph);
1059
1060                 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1061
1062                 /* note that the list of possible calls is updated during the process */
1063                 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1064                         irg_inline_property prop;
1065                         ir_graph            *callee;
1066                         pmap_entry          *e;
1067
1068                         call   = entry->call;
1069                         callee = entry->callee;
1070
1071                         prop = get_irg_inline_property(callee);
1072                         if (prop == irg_inline_forbidden) {
1073                                 continue;
1074                         }
1075
1076                         e = pmap_find(copied_graphs, callee);
1077                         if (e != NULL) {
1078                                 /*
1079                                  * Remap callee if we have a copy.
1080                                  * FIXME: Should we do this only for recursive Calls ?
1081                                  */
1082                                 callee = (ir_graph*)e->value;
1083                         }
1084
1085                         if (prop >= irg_inline_forced ||
1086                             (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1087                                 if (current_ir_graph == callee) {
1088                                         /*
1089                                          * Recursive call: we cannot directly inline because we cannot walk
1090                                          * the graph and change it. So we have to make a copy of the graph
1091                                          * first.
1092                                          */
1093
1094                                         inline_irg_env *callee_env;
1095                                         ir_graph       *copy;
1096
1097                                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1098
1099                                         /*
1100                                          * No copy yet, create one.
1101                                          * Note that recursive methods are never leaves, so it is sufficient
1102                                          * to test this condition here.
1103                                          */
1104                                         copy = create_irg_copy(callee);
1105
1106                                         /* create_irg_copy() destroys the Proj links, recompute them */
1107                                         phiproj_computed = 0;
1108
1109                                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1110
1111                                         /* allocate new environment */
1112                                         callee_env = alloc_inline_irg_env();
1113                                         set_irg_link(copy, callee_env);
1114
1115                                         assure_cf_loop(copy);
1116                                         wenv.x              = callee_env;
1117                                         wenv.ignore_callers = 1;
1118                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1119
1120                                         /*
1121                                          * Enter the entity of the original graph. This is needed
1122                                          * for inline_method(). However, note that ent->irg still points
1123                                          * to callee, NOT to copy.
1124                                          */
1125                                         set_irg_entity(copy, get_irg_entity(callee));
1126
1127                                         pmap_insert(copied_graphs, callee, copy);
1128                                         callee = copy;
1129
1130                                         /* we have only one caller: the original graph */
1131                                         callee_env->n_callers      = 1;
1132                                         callee_env->n_callers_orig = 1;
1133                                 }
1134                                 if (! phiproj_computed) {
1135                                         phiproj_computed = 1;
1136                                         collect_phiprojs(current_ir_graph);
1137                                 }
1138                                 did_inline = inline_method(call, callee);
1139                                 if (did_inline) {
1140                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1141
1142                                         /* call was inlined, Phi/Projs for current graph must be recomputed */
1143                                         phiproj_computed = 0;
1144
1145                                         /* callee was inline. Append its call list. */
1146                                         env->got_inline = 1;
1147                                         --env->n_call_nodes;
1148                                         append_call_list(env, callee_env, entry->loop_depth);
1149                                         --callee_env->n_callers;
1150
1151                                         /* after we have inlined callee, all called methods inside callee
1152                                            are now called once more */
1153                                         list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1154                                                 inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1155                                                 ++penv->n_callers;
1156                                         }
1157
1158                                         /* remove this call from the list */
1159                                         list_del(&entry->list);
1160                                         continue;
1161                                 }
1162                         }
1163                 }
1164                 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1165         }
1166
1167         for (i = 0; i < n_irgs; ++i) {
1168                 irg = get_irp_irg(i);
1169                 env = (inline_irg_env*)get_irg_link(irg);
1170
1171                 if (env->got_inline) {
1172                         optimize_graph_df(irg);
1173                         optimize_cf(irg);
1174                 }
1175                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1176                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1177                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1178                         env->n_callers_orig, env->n_callers,
1179                         get_entity_name(get_irg_entity(irg))));
1180                 }
1181         }
1182
1183         /* kill the copied graphs: we don't need them anymore */
1184         foreach_pmap(copied_graphs, pm_entry) {
1185                 ir_graph *copy = (ir_graph*)pm_entry->value;
1186
1187                 /* reset the entity, otherwise it will be deleted in the next step ... */
1188                 set_irg_entity(copy, NULL);
1189                 free_ir_graph(copy);
1190         }
1191         pmap_destroy(copied_graphs);
1192
1193         obstack_free(&temp_obst, NULL);
1194         current_ir_graph = rem;
1195 }
1196
1197 typedef struct inline_leave_functions_pass_t {
1198         ir_prog_pass_t pass;
1199         unsigned       maxsize;
1200         unsigned       leavesize;
1201         unsigned       size;
1202         int            ignore_runtime;
1203 } inline_leave_functions_pass_t;
1204
1205 /**
1206  * Wrapper to run inline_leave_functions() as a ir_prog pass.
1207  */
1208 static int inline_leave_functions_wrapper(ir_prog *irp, void *context)
1209 {
1210         inline_leave_functions_pass_t *pass = (inline_leave_functions_pass_t*)context;
1211
1212         (void)irp;
1213         inline_leave_functions(
1214                 pass->maxsize, pass->leavesize,
1215                 pass->size, pass->ignore_runtime);
1216         return 0;
1217 }
1218
1219 /* create a pass for inline_leave_functions() */
1220 ir_prog_pass_t *inline_leave_functions_pass(
1221         const char *name, unsigned maxsize, unsigned leavesize,
1222         unsigned size, int ignore_runtime)
1223 {
1224         inline_leave_functions_pass_t *pass = XMALLOCZ(inline_leave_functions_pass_t);
1225
1226         pass->maxsize        = maxsize;
1227         pass->leavesize      = leavesize;
1228         pass->size           = size;
1229         pass->ignore_runtime = ignore_runtime;
1230
1231         return def_prog_pass_constructor(
1232                 &pass->pass,
1233                 name ? name : "inline_leave_functions",
1234                 inline_leave_functions_wrapper);
1235 }
1236
1237 /**
1238  * Calculate the parameter weights for transmitting the address of a local variable.
1239  */
1240 static unsigned calc_method_local_weight(ir_node *arg)
1241 {
1242         int      i, j, k;
1243         unsigned v, weight = 0;
1244
1245         for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
1246                 ir_node *succ = get_irn_out(arg, i);
1247
1248                 switch (get_irn_opcode(succ)) {
1249                 case iro_Load:
1250                 case iro_Store:
1251                         /* Loads and Store can be removed */
1252                         weight += 3;
1253                         break;
1254                 case iro_Sel:
1255                         /* check if all args are constant */
1256                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1257                                 ir_node *idx = get_Sel_index(succ, j);
1258                                 if (! is_Const(idx))
1259                                         return 0;
1260                         }
1261                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1262                            some unsupported node. */
1263                         v = calc_method_local_weight(succ);
1264                         if (v == 0)
1265                                 return 0;
1266                         /* we can kill one Sel with constant indexes, this is cheap */
1267                         weight += v + 1;
1268                         break;
1269                 case iro_Id:
1270                         /* when looking backward we might find Id nodes */
1271                         weight += calc_method_local_weight(succ);
1272                         break;
1273                 case iro_Tuple:
1274                         /* unoptimized tuple */
1275                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1276                                 ir_node *pred = get_Tuple_pred(succ, j);
1277                                 if (pred == arg) {
1278                                         /* look for Proj(j) */
1279                                         for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
1280                                                 ir_node *succ_succ = get_irn_out(succ, k);
1281                                                 if (is_Proj(succ_succ)) {
1282                                                         if (get_Proj_proj(succ_succ) == j) {
1283                                                                 /* found */
1284                                                                 weight += calc_method_local_weight(succ_succ);
1285                                                         }
1286                                                 } else {
1287                                                         /* this should NOT happen */
1288                                                         return 0;
1289                                                 }
1290                                         }
1291                                 }
1292                         }
1293                         break;
1294                 default:
1295                         /* any other node: unsupported yet or bad. */
1296                         return 0;
1297                 }
1298         }
1299         return weight;
1300 }
1301
1302 /**
1303  * Calculate the parameter weights for transmitting the address of a local variable.
1304  */
1305 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg)
1306 {
1307         ir_entity *ent = get_irg_entity(irg);
1308         ir_type  *mtp;
1309         size_t   nparams;
1310         int      i;
1311         long     proj_nr;
1312         ir_node  *irg_args, *arg;
1313
1314         mtp      = get_entity_type(ent);
1315         nparams  = get_method_n_params(mtp);
1316
1317         /* allocate a new array. currently used as 'analysed' flag */
1318         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1319
1320         /* If the method haven't parameters we have nothing to do. */
1321         if (nparams <= 0)
1322                 return;
1323
1324         assure_irg_outs(irg);
1325         irg_args = get_irg_args(irg);
1326         for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
1327                 arg     = get_irn_out(irg_args, i);
1328                 proj_nr = get_Proj_proj(arg);
1329                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1330         }
1331 }
1332
1333 /**
1334  * Calculate the benefice for transmitting an local variable address.
1335  * After inlining, the local variable might be transformed into a
1336  * SSA variable by scalar_replacement().
1337  */
1338 static unsigned get_method_local_adress_weight(ir_graph *callee, size_t pos)
1339 {
1340         inline_irg_env *env = (inline_irg_env*)get_irg_link(callee);
1341
1342         if (env->local_weights == NULL)
1343                 analyze_irg_local_weights(env, callee);
1344
1345         if (pos < ARR_LEN(env->local_weights))
1346                 return env->local_weights[pos];
1347         return 0;
1348 }
1349
1350 /**
1351  * Calculate a benefice value for inlining the given call.
1352  *
1353  * @param call       the call node we have to inspect
1354  * @param callee     the called graph
1355  */
1356 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
1357 {
1358         ir_node   *call = entry->call;
1359         ir_entity *ent  = get_irg_entity(callee);
1360         ir_node   *frame_ptr;
1361         ir_type   *mtp;
1362         int       weight = 0;
1363         int       i, n_params, all_const;
1364         unsigned  cc, v;
1365         irg_inline_property prop;
1366
1367         inline_irg_env *callee_env;
1368
1369         prop = get_irg_inline_property(callee);
1370         if (prop == irg_inline_forbidden) {
1371                 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
1372                     call, callee));
1373                 return entry->benefice = INT_MIN;
1374         }
1375
1376         if (get_irg_additional_properties(callee) & mtp_property_noreturn) {
1377                 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
1378                     call, callee));
1379                 return entry->benefice = INT_MIN;
1380         }
1381
1382         /* costs for every passed parameter */
1383         n_params = get_Call_n_params(call);
1384         mtp      = get_entity_type(ent);
1385         cc       = get_method_calling_convention(mtp);
1386         if (cc & cc_reg_param) {
1387                 /* register parameter, smaller costs for register parameters */
1388                 int max_regs = cc & ~cc_bits;
1389
1390                 if (max_regs < n_params)
1391                         weight += max_regs * 2 + (n_params - max_regs) * 5;
1392                 else
1393                         weight += n_params * 2;
1394         } else {
1395                 /* parameters are passed an stack */
1396                 weight += 5 * n_params;
1397         }
1398
1399         /* constant parameters improve the benefice */
1400         frame_ptr = get_irg_frame(current_ir_graph);
1401         all_const = 1;
1402         for (i = 0; i < n_params; ++i) {
1403                 ir_node *param = get_Call_param(call, i);
1404
1405                 if (is_Const(param)) {
1406                         weight += get_method_param_weight(ent, i);
1407                 } else {
1408                         all_const = 0;
1409                         if (is_SymConst(param))
1410                                 weight += get_method_param_weight(ent, i);
1411                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
1412                                 /*
1413                                  * An address of a local variable is transmitted. After
1414                                  * inlining, scalar_replacement might be able to remove the
1415                                  * local variable, so honor this.
1416                                  */
1417                                 v = get_method_local_adress_weight(callee, i);
1418                                 weight += v;
1419                                 if (v > 0)
1420                                         entry->local_adr = 1;
1421                         }
1422                 }
1423         }
1424         entry->all_const = all_const;
1425
1426         callee_env = (inline_irg_env*)get_irg_link(callee);
1427         if (callee_env->n_callers == 1 &&
1428             callee != current_ir_graph &&
1429             !entity_is_externally_visible(ent)) {
1430                 weight += 700;
1431         }
1432
1433         /* give a bonus for functions with one block */
1434         if (callee_env->n_blocks == 1)
1435                 weight = weight * 3 / 2;
1436
1437         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
1438         if (callee_env->n_nodes < 30 && !callee_env->recursive)
1439                 weight += 2000;
1440
1441         /* and finally for leaves: they do not increase the register pressure
1442            because of callee safe registers */
1443         if (callee_env->n_call_nodes == 0)
1444                 weight += 400;
1445
1446         /** it's important to inline inner loops first */
1447         if (entry->loop_depth > 30)
1448                 weight += 30 * 1024;
1449         else
1450                 weight += entry->loop_depth * 1024;
1451
1452         /*
1453          * All arguments constant is probably a good sign, give an extra bonus
1454          */
1455         if (all_const)
1456                 weight += 1024;
1457
1458         return entry->benefice = weight;
1459 }
1460
1461 typedef struct walk_env_t {
1462         ir_graph **irgs;
1463         size_t   last_irg;
1464 } walk_env_t;
1465
1466 /**
1467  * Callgraph walker, collect all visited graphs.
1468  */
1469 static void callgraph_walker(ir_graph *irg, void *data)
1470 {
1471         walk_env_t *env = (walk_env_t *)data;
1472         env->irgs[env->last_irg++] = irg;
1473 }
1474
1475 /**
1476  * Creates an inline order for all graphs.
1477  *
1478  * @return the list of graphs.
1479  */
1480 static ir_graph **create_irg_list(void)
1481 {
1482         ir_entity  **free_methods;
1483         size_t     n_irgs = get_irp_n_irgs();
1484         walk_env_t env;
1485
1486         cgana(&free_methods);
1487         xfree(free_methods);
1488
1489         compute_callgraph();
1490
1491         env.irgs     = XMALLOCNZ(ir_graph*, n_irgs);
1492         env.last_irg = 0;
1493
1494         callgraph_walk(NULL, callgraph_walker, &env);
1495         assert(n_irgs == env.last_irg);
1496
1497         return env.irgs;
1498 }
1499
1500 /**
1501  * Push a call onto the priority list if its benefice is big enough.
1502  *
1503  * @param pqueue   the priority queue of calls
1504  * @param call     the call entry
1505  * @param inlien_threshold
1506  *                 the threshold value
1507  */
1508 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
1509                             int inline_threshold)
1510 {
1511         ir_graph            *callee  = call->callee;
1512         irg_inline_property prop     = get_irg_inline_property(callee);
1513         int                 benefice = calc_inline_benefice(call, callee);
1514
1515         DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
1516             get_irn_irg(call->call), call->call, callee, benefice));
1517
1518         if (prop < irg_inline_forced && benefice < inline_threshold) {
1519                 return;
1520         }
1521
1522         pqueue_put(pqueue, call, benefice);
1523 }
1524
1525 /**
1526  * Try to inline calls into a graph.
1527  *
1528  * @param irg      the graph into which we inline
1529  * @param maxsize  do NOT inline if the size of irg gets
1530  *                 bigger than this amount
1531  * @param inline_threshold
1532  *                 threshold value for inline decision
1533  * @param copied_graphs
1534  *                 map containing copied of recursive graphs
1535  */
1536 static void inline_into(ir_graph *irg, unsigned maxsize,
1537                         int inline_threshold, pmap *copied_graphs)
1538 {
1539         int            phiproj_computed = 0;
1540         inline_irg_env *env = (inline_irg_env*)get_irg_link(irg);
1541         call_entry     *curr_call;
1542         wenv_t         wenv;
1543         pqueue_t       *pqueue;
1544
1545         if (env->n_call_nodes == 0)
1546                 return;
1547
1548         if (env->n_nodes > maxsize) {
1549                 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
1550                 return;
1551         }
1552
1553         current_ir_graph = irg;
1554         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1555
1556         /* put irgs into the pqueue */
1557         pqueue = new_pqueue();
1558
1559         list_for_each_entry(call_entry, curr_call, &env->calls, list) {
1560                 assert(is_Call(curr_call->call));
1561                 maybe_push_call(pqueue, curr_call, inline_threshold);
1562         }
1563
1564         /* note that the list of possible calls is updated during the process */
1565         while (!pqueue_empty(pqueue)) {
1566                 int                 did_inline;
1567                 call_entry          *curr_call  = (call_entry*)pqueue_pop_front(pqueue);
1568                 ir_graph            *callee     = curr_call->callee;
1569                 ir_node             *call_node  = curr_call->call;
1570                 inline_irg_env      *callee_env = (inline_irg_env*)get_irg_link(callee);
1571                 irg_inline_property prop        = get_irg_inline_property(callee);
1572                 int                 loop_depth;
1573                 const call_entry    *centry;
1574                 pmap_entry          *e;
1575
1576                 if ((prop < irg_inline_forced) && env->n_nodes + callee_env->n_nodes > maxsize) {
1577                         DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
1578                                                 env->n_nodes, callee, callee_env->n_nodes));
1579                         continue;
1580                 }
1581
1582                 e = pmap_find(copied_graphs, callee);
1583                 if (e != NULL) {
1584                         int benefice = curr_call->benefice;
1585                         /*
1586                          * Reduce the weight for recursive function IFF not all arguments are const.
1587                          * inlining recursive functions is rarely good.
1588                          */
1589                         if (!curr_call->all_const)
1590                                 benefice -= 2000;
1591                         if (benefice < inline_threshold)
1592                                 continue;
1593
1594                         /*
1595                          * Remap callee if we have a copy.
1596                          */
1597                         callee     = (ir_graph*)e->value;
1598                         callee_env = (inline_irg_env*)get_irg_link(callee);
1599                 }
1600
1601                 if (current_ir_graph == callee) {
1602                         /*
1603                          * Recursive call: we cannot directly inline because we cannot
1604                          * walk the graph and change it. So we have to make a copy of
1605                          * the graph first.
1606                          */
1607                         int benefice = curr_call->benefice;
1608                         ir_graph *copy;
1609
1610                         /*
1611                          * Reduce the weight for recursive function IFF not all arguments are const.
1612                          * inlining recursive functions is rarely good.
1613                          */
1614                         if (!curr_call->all_const)
1615                                 benefice -= 2000;
1616                         if (benefice < inline_threshold)
1617                                 continue;
1618
1619                         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1620
1621                         /*
1622                          * No copy yet, create one.
1623                          * Note that recursive methods are never leaves, so it is
1624                          * sufficient to test this condition here.
1625                          */
1626                         copy = create_irg_copy(callee);
1627
1628                         /* create_irg_copy() destroys the Proj links, recompute them */
1629                         phiproj_computed = 0;
1630
1631                         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1632
1633                         /* allocate a new environment */
1634                         callee_env = alloc_inline_irg_env();
1635                         set_irg_link(copy, callee_env);
1636
1637                         assure_cf_loop(copy);
1638                         memset(&wenv, 0, sizeof(wenv));
1639                         wenv.x              = callee_env;
1640                         wenv.ignore_callers = 1;
1641                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1642
1643                         /*
1644                          * Enter the entity of the original graph. This is needed
1645                          * for inline_method(). However, note that ent->irg still points
1646                          * to callee, NOT to copy.
1647                          */
1648                         set_irg_entity(copy, get_irg_entity(callee));
1649
1650                         pmap_insert(copied_graphs, callee, copy);
1651                         callee = copy;
1652
1653                         /* we have only one caller: the original graph */
1654                         callee_env->n_callers      = 1;
1655                         callee_env->n_callers_orig = 1;
1656                 }
1657                 if (! phiproj_computed) {
1658                         phiproj_computed = 1;
1659                         collect_phiprojs(current_ir_graph);
1660                 }
1661                 did_inline = inline_method(call_node, callee);
1662                 if (!did_inline)
1663                         continue;
1664
1665                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1666                 phiproj_computed = 0;
1667
1668                 /* remove it from the caller list */
1669                 list_del(&curr_call->list);
1670
1671                 /* callee was inline. Append its call list. */
1672                 env->got_inline = 1;
1673                 --env->n_call_nodes;
1674
1675                 /* we just generate a bunch of new calls */
1676                 loop_depth = curr_call->loop_depth;
1677                 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1678                         inline_irg_env *penv = (inline_irg_env*)get_irg_link(centry->callee);
1679                         ir_node        *new_call;
1680                         call_entry     *new_entry;
1681
1682                         /* after we have inlined callee, all called methods inside
1683                          * callee are now called once more */
1684                         ++penv->n_callers;
1685
1686                         /* Note that the src list points to Call nodes in the inlined graph,
1687                          * but we need Call nodes in our graph. Luckily the inliner leaves
1688                          * this information in the link field. */
1689                         new_call = (ir_node*)get_irn_link(centry->call);
1690                         if (get_irn_irg(new_call) != irg) {
1691                                 /* centry->call has not been copied, which means it is dead.
1692                                  * This might happen during inlining, if a const function,
1693                                  * which cannot be inlined is only used as an unused argument
1694                                  * of another function, which is inlined. */
1695                                 continue;
1696                         }
1697                         assert(is_Call(new_call));
1698
1699                         new_entry = duplicate_call_entry(centry, new_call, loop_depth);
1700                         list_add_tail(&new_entry->list, &env->calls);
1701                         maybe_push_call(pqueue, new_entry, inline_threshold);
1702                 }
1703
1704                 env->n_call_nodes += callee_env->n_call_nodes;
1705                 env->n_nodes += callee_env->n_nodes;
1706                 --callee_env->n_callers;
1707         }
1708         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1709         del_pqueue(pqueue);
1710 }
1711
1712 /*
1713  * Heuristic inliner. Calculates a benefice value for every call and inlines
1714  * those calls with a value higher than the threshold.
1715  */
1716 void inline_functions(unsigned maxsize, int inline_threshold,
1717                       opt_ptr after_inline_opt)
1718 {
1719         inline_irg_env   *env;
1720         size_t           i, n_irgs;
1721         ir_graph         *rem;
1722         wenv_t           wenv;
1723         pmap             *copied_graphs;
1724         pmap_entry       *pm_entry;
1725         ir_graph         **irgs;
1726
1727         rem = current_ir_graph;
1728         obstack_init(&temp_obst);
1729
1730         irgs = create_irg_list();
1731
1732         /* a map for the copied graphs, used to inline recursive calls */
1733         copied_graphs = pmap_create();
1734
1735         /* extend all irgs by a temporary data structure for inlining. */
1736         n_irgs = get_irp_n_irgs();
1737         for (i = 0; i < n_irgs; ++i)
1738                 set_irg_link(irgs[i], alloc_inline_irg_env());
1739
1740         /* Pre-compute information in temporary data structure. */
1741         wenv.ignore_runtime = 0;
1742         wenv.ignore_callers = 0;
1743         for (i = 0; i < n_irgs; ++i) {
1744                 ir_graph *irg = irgs[i];
1745
1746                 free_callee_info(irg);
1747
1748                 wenv.x = (inline_irg_env*)get_irg_link(irg);
1749                 assure_cf_loop(irg);
1750                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1751         }
1752
1753         /* -- and now inline. -- */
1754         for (i = 0; i < n_irgs; ++i) {
1755                 ir_graph *irg = irgs[i];
1756
1757                 inline_into(irg, maxsize, inline_threshold, copied_graphs);
1758         }
1759
1760         for (i = 0; i < n_irgs; ++i) {
1761                 ir_graph *irg = irgs[i];
1762
1763                 env = (inline_irg_env*)get_irg_link(irg);
1764                 if (env->got_inline && after_inline_opt != NULL) {
1765                         /* this irg got calls inlined: optimize it */
1766                         after_inline_opt(irg);
1767                 }
1768                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1769                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1770                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1771                         env->n_callers_orig, env->n_callers,
1772                         get_entity_name(get_irg_entity(irg))));
1773                 }
1774         }
1775
1776         /* kill the copied graphs: we don't need them anymore */
1777         foreach_pmap(copied_graphs, pm_entry) {
1778                 ir_graph *copy = (ir_graph*)pm_entry->value;
1779
1780                 /* reset the entity, otherwise it will be deleted in the next step ... */
1781                 set_irg_entity(copy, NULL);
1782                 free_ir_graph(copy);
1783         }
1784         pmap_destroy(copied_graphs);
1785
1786         xfree(irgs);
1787
1788         obstack_free(&temp_obst, NULL);
1789         current_ir_graph = rem;
1790 }
1791
1792 typedef struct inline_functions_pass_t {
1793         ir_prog_pass_t pass;
1794         unsigned       maxsize;
1795         int            inline_threshold;
1796         opt_ptr        after_inline_opt;
1797 } inline_functions_pass_t;
1798
1799 /**
1800  * Wrapper to run inline_functions() as a ir_prog pass.
1801  */
1802 static int inline_functions_wrapper(ir_prog *irp, void *context)
1803 {
1804         inline_functions_pass_t *pass = (inline_functions_pass_t*)context;
1805
1806         (void)irp;
1807         inline_functions(pass->maxsize, pass->inline_threshold,
1808                          pass->after_inline_opt);
1809         return 0;
1810 }
1811
1812 /* create a ir_prog pass for inline_functions */
1813 ir_prog_pass_t *inline_functions_pass(
1814           const char *name, unsigned maxsize, int inline_threshold,
1815           opt_ptr after_inline_opt)
1816 {
1817         inline_functions_pass_t *pass = XMALLOCZ(inline_functions_pass_t);
1818
1819         pass->maxsize          = maxsize;
1820         pass->inline_threshold = inline_threshold;
1821         pass->after_inline_opt = after_inline_opt;
1822
1823         return def_prog_pass_constructor(
1824                 &pass->pass, name ? name : "inline_functions",
1825                 inline_functions_wrapper);
1826 }
1827
1828 void firm_init_inline(void)
1829 {
1830         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
1831 }