Probably compute_Cmp() can ge removed at all, compute_Proj_Cmp() does the interesting...
[libfirm] / ir / opt / opt_inline.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief    Dead node elimination and Procedure Inlining.
23  * @author   Michael Beck, Goetz Lindenmaier
24  * @version  $Id$
25  */
26 #include "config.h"
27
28 #include <limits.h>
29 #include <stdbool.h>
30 #include <assert.h>
31
32 #include "irnode_t.h"
33 #include "irgraph_t.h"
34 #include "irprog_t.h"
35
36 #include "iroptimize.h"
37 #include "ircons_t.h"
38 #include "iropt_t.h"
39 #include "irgopt.h"
40 #include "irgmod.h"
41 #include "irgwalk.h"
42
43 #include "array_t.h"
44 #include "list.h"
45 #include "pset.h"
46 #include "pmap.h"
47 #include "pdeq.h"
48 #include "xmalloc.h"
49 #include "pqueue.h"
50
51 #include "irouts.h"
52 #include "irloop_t.h"
53 #include "irbackedge_t.h"
54 #include "opt_init.h"
55 #include "cgana.h"
56 #include "trouts.h"
57 #include "error.h"
58
59 #include "analyze_irg_args.h"
60 #include "iredges_t.h"
61 #include "irflag_t.h"
62 #include "irhooks.h"
63 #include "irtools.h"
64 #include "iropt_dbg.h"
65 #include "irpass_t.h"
66 #include "irphase_t.h"
67
68 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
69
70 /*------------------------------------------------------------------*/
71 /* Routines for dead node elimination / copying garbage collection  */
72 /* of the obstack.                                                  */
73 /*------------------------------------------------------------------*/
74
75 /**
76  * Remember the new node in the old node by using a field all nodes have.
77  */
78 static void set_new_node(ir_node *node, ir_node *new_node)
79 {
80         set_irn_link(node, new_node);
81 }
82
83 /**
84  * Get this new node, before the old node is forgotten.
85  */
86 static inline ir_node *get_new_node(ir_node *old_node)
87 {
88         assert(irn_visited(old_node));
89         return (ir_node*) get_irn_link(old_node);
90 }
91
92 /*--------------------------------------------------------------------*/
93 /*  Functionality for inlining                                         */
94 /*--------------------------------------------------------------------*/
95
96 /**
97  * Copy node for inlineing.  Updates attributes that change when
98  * inlineing but not for dead node elimination.
99  *
100  * Copies the node by calling copy_node() and then updates the entity if
101  * it's a local one.  env must be a pointer of the frame type of the
102  * inlined procedure. The new entities must be in the link field of
103  * the entities.
104  */
105 static void copy_node_inline(ir_node *node, void *env)
106 {
107         ir_graph *new_irg  = (ir_graph*) env;
108         ir_node  *new_node = irn_copy_into_irg(node, new_irg);
109
110         set_new_node(node, new_node);
111
112         if (is_Sel(node)) {
113                 ir_graph  *old_irg        = get_irn_irg(node);
114                 ir_type   *old_frame_type = get_irg_frame_type(old_irg);
115                 ir_entity *old_entity     = get_Sel_entity(node);
116                 assert(is_Sel(new_node));
117                 /* use copied entities from the new frame */
118                 if (get_entity_owner(old_entity) == old_frame_type) {
119                         ir_entity *new_entity = get_entity_link(old_entity);
120                         assert(new_entity != NULL);
121                         set_Sel_entity(new_node, new_entity);
122                 }
123         } else if (is_Block(new_node)) {
124                 new_node->attr.block.irg.irg = new_irg;
125         }
126 }
127
128 static void set_preds_inline(ir_node *node, void *env)
129 {
130         ir_node *new_node;
131
132         irn_rewire_inputs(node);
133
134         /* move constants into start block */
135         new_node = get_new_node(node);
136         if (is_irn_constlike(new_node)) {
137                 ir_graph *new_irg     = (ir_graph *) env;
138                 ir_node  *start_block = get_irg_start_block(new_irg);
139                 set_nodes_block(new_node, start_block);
140         }
141 }
142
143 /**
144  * Walker: checks if P_value_arg_base is used.
145  */
146 static void find_addr(ir_node *node, void *env)
147 {
148         bool *allow_inline = env;
149
150         if (is_Sel(node)) {
151                 ir_graph *irg = current_ir_graph;
152                 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
153                         /* access to frame */
154                         ir_entity *ent = get_Sel_entity(node);
155                         if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
156                                 /* access to value_type */
157                                 *allow_inline = false;
158                         }
159                 }
160         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
161                 /* From GCC:
162                  * Refuse to inline alloca call unless user explicitly forced so as this
163                  * may change program's memory overhead drastically when the function
164                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
165                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
166                  *
167                  * Sorrily this is true with our implementation also.
168                  * Moreover, we cannot differentiate between alloca() and VLA yet, so
169                  * this disables inlining of functions using VLA (which are completely
170                  * save).
171                  *
172                  * 2 Solutions:
173                  * - add a flag to the Alloc node for "real" alloca() calls
174                  * - add a new Stack-Restore node at the end of a function using
175                  *   alloca()
176                  */
177                 *allow_inline = false;
178         }
179 }
180
181 /**
182  * Check if we can inline a given call.
183  * Currently, we cannot inline two cases:
184  * - call with compound arguments
185  * - graphs that take the address of a parameter
186  *
187  * check these conditions here
188  */
189 static bool can_inline(ir_node *call, ir_graph *called_graph)
190 {
191         ir_entity          *called      = get_irg_entity(called_graph);
192         ir_type            *called_type = get_entity_type(called);
193         ir_type            *call_type   = get_Call_type(call);
194         int                 n_params    = get_method_n_params(called_type);
195         int                 n_arguments = get_method_n_params(call_type);
196         int                 n_res       = get_method_n_ress(called_type);
197         irg_inline_property prop        = get_irg_inline_property(called_graph);
198         int                 i;
199         bool                res;
200
201         if (prop == irg_inline_forbidden)
202                 return false;
203
204         if (n_arguments != n_params) {
205                 /* this is a bad feature of C: without a prototype, we can
206                  * call a function with less parameters than needed. Currently
207                  * we don't support this, although we could use Unknown than. */
208                 return false;
209         }
210         if (n_res != get_method_n_ress(call_type)) {
211                 return false;
212         }
213
214         /* Argh, compiling C has some bad consequences:
215          * It is implementation dependent what happens in that case.
216          * We support inlining, if the bitsize of the types matches AND
217          * the same arithmetic is used. */
218         for (i = n_params - 1; i >= 0; --i) {
219                 ir_type *param_tp = get_method_param_type(called_type, i);
220                 ir_type *arg_tp   = get_method_param_type(call_type, i);
221
222                 if (param_tp != arg_tp) {
223                         ir_mode *pmode = get_type_mode(param_tp);
224                         ir_mode *amode = get_type_mode(arg_tp);
225
226                         if (pmode == NULL || amode == NULL)
227                                 return false;
228                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
229                                 return false;
230                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
231                                 return false;
232                         /* otherwise we can simply "reinterpret" the bits */
233                 }
234         }
235         for (i = n_res - 1; i >= 0; --i) {
236                 ir_type *decl_res_tp = get_method_res_type(called_type, i);
237                 ir_type *used_res_tp = get_method_res_type(call_type, i);
238
239                 if (decl_res_tp != used_res_tp) {
240                         ir_mode *decl_mode = get_type_mode(decl_res_tp);
241                         ir_mode *used_mode = get_type_mode(used_res_tp);
242                         if (decl_mode == NULL || used_mode == NULL)
243                                 return false;
244                         if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
245                                 return false;
246                         if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
247                                 return false;
248                         /* otherwise we can "reinterpret" the bits */
249                 }
250         }
251
252         /* check parameters for compound arguments */
253         for (i = 0; i < n_params; ++i) {
254                 ir_type *p_type = get_method_param_type(call_type, i);
255
256                 if (is_compound_type(p_type))
257                         return false;
258         }
259
260         /* check results for compound arguments */
261         for (i = 0; i < n_res; ++i) {
262                 ir_type *r_type = get_method_res_type(call_type, i);
263
264                 if (is_compound_type(r_type))
265                         return false;
266         }
267
268         res = true;
269         irg_walk_graph(called_graph, find_addr, NULL, &res);
270
271         return res;
272 }
273
274 enum exc_mode {
275         exc_handler,    /**< There is a handler. */
276         exc_no_handler  /**< Exception handling not represented. */
277 };
278
279 /**
280  * copy all entities on the stack frame on 1 irg to the stackframe of another.
281  * Sets entity links of the old entities to the copies
282  */
283 static void copy_frame_entities(ir_graph *from, ir_graph *to)
284 {
285         ir_type *from_frame = get_irg_frame_type(from);
286         ir_type *to_frame   = get_irg_frame_type(to);
287         int      n_members  = get_class_n_members(from_frame);
288         int      i;
289         assert(from_frame != to_frame);
290
291         for (i = 0; i < n_members; ++i) {
292                 ir_entity *old_ent = get_class_member(from_frame, i);
293                 ir_entity *new_ent = copy_entity_own(old_ent, to_frame);
294                 set_entity_link(old_ent, new_ent);
295         }
296 }
297
298 /* Inlines a method at the given call site. */
299 int inline_method(ir_node *call, ir_graph *called_graph)
300 {
301         ir_node       *pre_call;
302         ir_node       *post_call, *post_bl;
303         ir_node       *in[pn_Start_max];
304         ir_node       *end, *end_bl, *block;
305         ir_node       **res_pred;
306         ir_node       **cf_pred;
307         ir_node       **args_in;
308         ir_node       *ret, *phi;
309         int           arity, n_ret, n_exc, n_res, i, j, rem_opt;
310         int           irn_arity, n_params;
311         int           n_mem_phi;
312         enum exc_mode exc_handling;
313         ir_type       *mtp;
314         ir_type       *ctp;
315         ir_entity     *ent;
316         ir_graph      *rem;
317         ir_graph      *irg = get_irn_irg(call);
318
319         /* we cannot inline some types of calls */
320         if (! can_inline(call, called_graph))
321                 return 0;
322
323         /* We cannot inline a recursive call. The graph must be copied before
324          * the call the inline_method() using create_irg_copy(). */
325         if (called_graph == irg)
326                 return 0;
327
328         ent      = get_irg_entity(called_graph);
329         mtp      = get_entity_type(ent);
330         ctp      = get_Call_type(call);
331         n_params = get_method_n_params(mtp);
332         n_res    = get_method_n_ress(mtp);
333
334         rem = current_ir_graph;
335         current_ir_graph = irg;
336
337         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
338
339         /* optimizations can cause problems when allocating new nodes */
340         rem_opt = get_opt_optimize();
341         set_optimize(0);
342
343         /* Handle graph state */
344         assert(get_irg_phase_state(irg) != phase_building);
345         assert(get_irg_pinned(irg) == op_pin_state_pinned);
346         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
347         set_irg_outs_inconsistent(irg);
348         set_irg_extblk_inconsistent(irg);
349         set_irg_doms_inconsistent(irg);
350         set_irg_loopinfo_inconsistent(irg);
351         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
352         set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
353         edges_deactivate(irg);
354
355         /* here we know we WILL inline, so inform the statistics */
356         hook_inline(call, called_graph);
357
358         /* -- Decide how to handle exception control flow: Is there a handler
359            for the Call node, or do we branch directly to End on an exception?
360            exc_handling:
361            0 There is a handler.
362            2 Exception handling not represented in Firm. -- */
363         {
364                 ir_node *Xproj = NULL;
365                 ir_node *proj;
366                 for (proj = get_irn_link(call); proj; proj = get_irn_link(proj)) {
367                         long proj_nr = get_Proj_proj(proj);
368                         if (proj_nr == pn_Call_X_except) Xproj = proj;
369                 }
370                 exc_handling = Xproj != NULL ? exc_handler : exc_no_handler;
371         }
372
373         /* create the argument tuple */
374         args_in = ALLOCAN(ir_node*, n_params);
375
376         block = get_nodes_block(call);
377         for (i = n_params - 1; i >= 0; --i) {
378                 ir_node *arg      = get_Call_param(call, i);
379                 ir_type *param_tp = get_method_param_type(mtp, i);
380                 ir_mode *mode     = get_type_mode(param_tp);
381
382                 if (mode != get_irn_mode(arg)) {
383                         arg = new_r_Conv(block, arg, mode);
384                 }
385                 args_in[i] = arg;
386         }
387
388         /* the procedure and later replaces the Start node of the called graph.
389          * Post_call is the old Call node and collects the results of the called
390          * graph. Both will end up being a tuple. */
391         post_bl = get_nodes_block(call);
392         /* XxMxPxPxPxT of Start + parameter of Call */
393         in[pn_Start_M]              = get_Call_mem(call);
394         in[pn_Start_X_initial_exec] = new_r_Jmp(post_bl);
395         in[pn_Start_P_frame_base]   = get_irg_frame(irg);
396         in[pn_Start_P_tls]          = get_irg_tls(irg);
397         in[pn_Start_T_args]         = new_r_Tuple(post_bl, n_params, args_in);
398         pre_call = new_r_Tuple(post_bl, pn_Start_max, in);
399         post_call = call;
400
401         /* --
402            The new block gets the ins of the old block, pre_call and all its
403            predecessors and all Phi nodes. -- */
404         part_block(pre_call);
405
406         /* increment visited flag for later walk */
407         inc_irg_visited(called_graph);
408
409         /* link some nodes to nodes in the current graph so instead of copying
410          * the linked nodes will get used.
411          * So the copier will use the created Tuple instead of copying the start
412          * node, similar for singleton nodes like NoMem and Bad.
413          * Note: this will prohibit predecessors to be copied - only do it for
414          *       nodes without predecessors */
415         {
416                 ir_node *start_block;
417                 ir_node *start;
418                 ir_node *bad;
419                 ir_node *nomem;
420
421                 start_block = get_irg_start_block(called_graph);
422                 set_new_node(start_block, get_nodes_block(pre_call));
423                 mark_irn_visited(start_block);
424
425                 start = get_irg_start(called_graph);
426                 set_new_node(start, pre_call);
427                 mark_irn_visited(start);
428
429                 bad = get_irg_bad(called_graph);
430                 set_new_node(bad, get_irg_bad(irg));
431                 mark_irn_visited(bad);
432
433                 nomem = get_irg_no_mem(called_graph);
434                 set_new_node(nomem, get_irg_no_mem(irg));
435                 mark_irn_visited(nomem);
436         }
437
438         /* entitiy link is used to link entities on old stackframe to the
439          * new stackframe */
440         irp_reserve_resources(irp, IR_RESOURCE_ENTITY_LINK);
441
442         /* copy entities and nodes */
443         assert(!irn_visited(get_irg_end(called_graph)));
444         copy_frame_entities(called_graph, irg);
445         irg_walk_core(get_irg_end(called_graph), copy_node_inline, set_preds_inline,
446                       irg);
447
448         irp_free_resources(irp, IR_RESOURCE_ENTITY_LINK);
449
450         /* -- Merge the end of the inlined procedure with the call site -- */
451         /* We will turn the old Call node into a Tuple with the following
452            predecessors:
453            -1:  Block of Tuple.
454            0: Phi of all Memories of Return statements.
455            1: Jmp from new Block that merges the control flow from all exception
456            predecessors of the old end block.
457            2: Tuple of all arguments.
458            3: Phi of Exception memories.
459            In case the old Call directly branches to End on an exception we don't
460            need the block merging all exceptions nor the Phi of the exception
461            memories.
462         */
463
464         /* Precompute some values */
465         end_bl = get_new_node(get_irg_end_block(called_graph));
466         end    = get_new_node(get_irg_end(called_graph));
467         arity  = get_Block_n_cfgpreds(end_bl);    /* arity = n_exc + n_ret  */
468         n_res  = get_method_n_ress(get_Call_type(call));
469
470         res_pred = XMALLOCN(ir_node*, n_res);
471         cf_pred  = XMALLOCN(ir_node*, arity);
472
473         /* archive keepalives */
474         irn_arity = get_irn_arity(end);
475         for (i = 0; i < irn_arity; i++) {
476                 ir_node *ka = get_End_keepalive(end, i);
477                 if (! is_Bad(ka))
478                         add_End_keepalive(get_irg_end(irg), ka);
479         }
480
481         /* replace Return nodes by Jump nodes */
482         n_ret = 0;
483         for (i = 0; i < arity; i++) {
484                 ir_node *ret;
485                 ret = get_Block_cfgpred(end_bl, i);
486                 if (is_Return(ret)) {
487                         ir_node *block = get_nodes_block(ret);
488                         cf_pred[n_ret] = new_r_Jmp(block);
489                         n_ret++;
490                 }
491         }
492         set_irn_in(post_bl, n_ret, cf_pred);
493
494         /* build a Tuple for all results of the method.
495          * add Phi node if there was more than one Return. */
496         turn_into_tuple(post_call, pn_Call_max);
497         /* First the Memory-Phi */
498         n_mem_phi = 0;
499         for (i = 0; i < arity; i++) {
500                 ret = get_Block_cfgpred(end_bl, i);
501                 if (is_Return(ret)) {
502                         cf_pred[n_mem_phi++] = get_Return_mem(ret);
503                 }
504                 /* memory output for some exceptions is directly connected to End */
505                 if (is_Call(ret)) {
506                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 3);
507                 } else if (is_fragile_op(ret)) {
508                         /* We rely that all cfops have the memory output at the same position. */
509                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 0);
510                 } else if (is_Raise(ret)) {
511                         cf_pred[n_mem_phi++] = new_r_Proj(ret, mode_M, 1);
512                 }
513         }
514         phi = new_r_Phi(post_bl, n_mem_phi, cf_pred, mode_M);
515         set_Tuple_pred(call, pn_Call_M, phi);
516         /* Conserve Phi-list for further inlinings -- but might be optimized */
517         if (get_nodes_block(phi) == post_bl) {
518                 set_irn_link(phi, get_irn_link(post_bl));
519                 set_irn_link(post_bl, phi);
520         }
521         /* Now the real results */
522         if (n_res > 0) {
523                 ir_node *result_tuple;
524                 for (j = 0; j < n_res; j++) {
525                         ir_type *res_type = get_method_res_type(ctp, j);
526                         ir_mode *res_mode = get_type_mode(res_type);
527                         n_ret = 0;
528                         for (i = 0; i < arity; i++) {
529                                 ret = get_Block_cfgpred(end_bl, i);
530                                 if (is_Return(ret)) {
531                                         ir_node *res = get_Return_res(ret, j);
532                                         if (get_irn_mode(res) != res_mode) {
533                                                 ir_node *block = get_nodes_block(res);
534                                                 res = new_r_Conv(block, res, res_mode);
535                                         }
536                                         cf_pred[n_ret] = res;
537                                         n_ret++;
538                                 }
539                         }
540                         if (n_ret > 0) {
541                                 ir_mode *mode = get_irn_mode(cf_pred[0]);
542                                 phi = new_r_Phi(post_bl, n_ret, cf_pred, mode);
543                         } else {
544                                 phi = new_r_Bad(irg);
545                         }
546                         res_pred[j] = phi;
547                         /* Conserve Phi-list for further inlinings -- but might be optimized */
548                         if (get_nodes_block(phi) == post_bl) {
549                                 set_Phi_next(phi, get_Block_phis(post_bl));
550                                 set_Block_phis(post_bl, phi);
551                         }
552                 }
553                 result_tuple = new_r_Tuple(post_bl, n_res, res_pred);
554                 set_Tuple_pred(call, pn_Call_T_result, result_tuple);
555         } else {
556                 set_Tuple_pred(call, pn_Call_T_result, new_r_Bad(irg));
557         }
558         /* handle the regular call */
559         set_Tuple_pred(call, pn_Call_X_regular, new_r_Jmp(post_bl));
560
561         /* For now, we cannot inline calls with value_base */
562         set_Tuple_pred(call, pn_Call_P_value_res_base, new_r_Bad(irg));
563
564         /* Finally the exception control flow.
565            We have two possible situations:
566            First if the Call branches to an exception handler:
567            We need to add a Phi node to
568            collect the memory containing the exception objects.  Further we need
569            to add another block to get a correct representation of this Phi.  To
570            this block we add a Jmp that resolves into the X output of the Call
571            when the Call is turned into a tuple.
572            Second: There is no exception edge. Just add all inlined exception
573            branches to the End node.
574          */
575         if (exc_handling == exc_handler) {
576                 n_exc = 0;
577                 for (i = 0; i < arity; i++) {
578                         ir_node *ret, *irn;
579                         ret = get_Block_cfgpred(end_bl, i);
580                         irn = skip_Proj(ret);
581                         if (is_fragile_op(irn) || is_Raise(irn)) {
582                                 cf_pred[n_exc] = ret;
583                                 ++n_exc;
584                         }
585                 }
586                 if (n_exc > 0) {
587                         if (n_exc == 1) {
588                                 /* simple fix */
589                                 set_Tuple_pred(call, pn_Call_X_except, cf_pred[0]);
590                         } else {
591                                 ir_node *block = new_r_Block(irg, n_exc, cf_pred);
592                                 set_Tuple_pred(call, pn_Call_X_except, new_r_Jmp(block));
593                         }
594                 } else {
595                         set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg));
596                 }
597         } else {
598                 ir_node *main_end_bl;
599                 int main_end_bl_arity;
600                 ir_node **end_preds;
601
602                 /* assert(exc_handling == 1 || no exceptions. ) */
603                 n_exc = 0;
604                 for (i = 0; i < arity; i++) {
605                         ir_node *ret = get_Block_cfgpred(end_bl, i);
606                         ir_node *irn = skip_Proj(ret);
607
608                         if (is_fragile_op(irn) || is_Raise(irn)) {
609                                 cf_pred[n_exc] = ret;
610                                 n_exc++;
611                         }
612                 }
613                 main_end_bl       = get_irg_end_block(irg);
614                 main_end_bl_arity = get_irn_arity(main_end_bl);
615                 end_preds         = XMALLOCN(ir_node*, n_exc + main_end_bl_arity);
616
617                 for (i = 0; i < main_end_bl_arity; ++i)
618                         end_preds[i] = get_irn_n(main_end_bl, i);
619                 for (i = 0; i < n_exc; ++i)
620                         end_preds[main_end_bl_arity + i] = cf_pred[i];
621                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
622                 set_Tuple_pred(call, pn_Call_X_except, new_r_Bad(irg));
623                 free(end_preds);
624         }
625         free(res_pred);
626         free(cf_pred);
627
628         /* --  Turn CSE back on. -- */
629         set_optimize(rem_opt);
630         current_ir_graph = rem;
631
632         return 1;
633 }
634
635 /********************************************************************/
636 /* Apply inlining to small methods.                                 */
637 /********************************************************************/
638
639 static struct obstack  temp_obst;
640
641 /** Represents a possible inlinable call in a graph. */
642 typedef struct _call_entry {
643         ir_node    *call;       /**< The Call node. */
644         ir_graph   *callee;     /**< The callee IR-graph. */
645         list_head  list;        /**< List head for linking the next one. */
646         int        loop_depth;  /**< The loop depth of this call. */
647         int        benefice;    /**< The calculated benefice of this call. */
648         unsigned   local_adr:1; /**< Set if this call gets an address of a local variable. */
649         unsigned   all_const:1; /**< Set if this call has only constant parameters. */
650 } call_entry;
651
652 /**
653  * environment for inlining small irgs
654  */
655 typedef struct _inline_env_t {
656         struct obstack obst;  /**< An obstack where call_entries are allocated on. */
657         list_head      calls; /**< The call entry list. */
658 } inline_env_t;
659
660 /**
661  * Returns the irg called from a Call node. If the irg is not
662  * known, NULL is returned.
663  *
664  * @param call  the call node
665  */
666 static ir_graph *get_call_called_irg(ir_node *call)
667 {
668         ir_node *addr;
669
670         addr = get_Call_ptr(call);
671         if (is_Global(addr)) {
672                 ir_entity *ent = get_Global_entity(addr);
673                 /* we don't know which function gets finally bound to a weak symbol */
674                 if (get_entity_linkage(ent) & IR_LINKAGE_WEAK)
675                         return NULL;
676
677                 return get_entity_irg(ent);
678         }
679
680         return NULL;
681 }
682
683 /**
684  * Walker: Collect all calls to known graphs inside a graph.
685  */
686 static void collect_calls(ir_node *call, void *env)
687 {
688         (void) env;
689         if (is_Call(call)) {
690                 ir_graph *called_irg = get_call_called_irg(call);
691
692                 if (called_irg != NULL) {
693                         /* The Call node calls a locally defined method.  Remember to inline. */
694                         inline_env_t *ienv  = env;
695                         call_entry   *entry = OALLOC(&ienv->obst, call_entry);
696                         entry->call       = call;
697                         entry->callee     = called_irg;
698                         entry->loop_depth = 0;
699                         entry->benefice   = 0;
700                         entry->local_adr  = 0;
701                         entry->all_const  = 0;
702
703                         list_add_tail(&entry->list, &ienv->calls);
704                 }
705         }
706 }
707
708 /**
709  * Inlines all small methods at call sites where the called address comes
710  * from a Const node that references the entity representing the called
711  * method.
712  * The size argument is a rough measure for the code size of the method:
713  * Methods where the obstack containing the firm graph is smaller than
714  * size are inlined.
715  */
716 void inline_small_irgs(ir_graph *irg, int size)
717 {
718         ir_graph *rem = current_ir_graph;
719         inline_env_t env;
720         call_entry *entry;
721
722         current_ir_graph = irg;
723         /* Handle graph state */
724         assert(get_irg_phase_state(irg) != phase_building);
725         free_callee_info(irg);
726
727         /* Find Call nodes to inline.
728            (We can not inline during a walk of the graph, as inlining the same
729            method several times changes the visited flag of the walked graph:
730            after the first inlining visited of the callee equals visited of
731            the caller.  With the next inlining both are increased.) */
732         obstack_init(&env.obst);
733         INIT_LIST_HEAD(&env.calls);
734         irg_walk_graph(irg, NULL, collect_calls, &env);
735
736         if (! list_empty(&env.calls)) {
737                 /* There are calls to inline */
738                 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
739                 collect_phiprojs(irg);
740
741                 list_for_each_entry(call_entry, entry, &env.calls, list) {
742                         ir_graph            *callee = entry->callee;
743                         irg_inline_property prop    = get_irg_inline_property(callee);
744
745                         if (prop == irg_inline_forbidden) {
746                                 continue;
747                         }
748
749                         if (prop >= irg_inline_forced ||
750                             _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
751                                 inline_method(entry->call, callee);
752                         }
753                 }
754                 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
755         }
756         obstack_free(&env.obst, NULL);
757         current_ir_graph = rem;
758 }
759
760 struct inline_small_irgs_pass_t {
761         ir_graph_pass_t pass;
762         int            size;
763 };
764
765 /**
766  * Wrapper to run inline_small_irgs() as a pass.
767  */
768 static int inline_small_irgs_wrapper(ir_graph *irg, void *context)
769 {
770         struct inline_small_irgs_pass_t *pass = context;
771
772         inline_small_irgs(irg, pass->size);
773         return 0;
774 }
775
776 /* create a pass for inline_small_irgs() */
777 ir_graph_pass_t *inline_small_irgs_pass(const char *name, int size)
778 {
779         struct inline_small_irgs_pass_t *pass =
780                 XMALLOCZ(struct inline_small_irgs_pass_t);
781
782         pass->size = size;
783         return def_graph_pass_constructor(
784                 &pass->pass, name ? name : "inline_small_irgs", inline_small_irgs_wrapper);
785 }
786
787 /**
788  * Environment for inlining irgs.
789  */
790 typedef struct {
791         list_head calls;             /**< List of of all call nodes in this graph. */
792         unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
793         unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
794         unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
795         unsigned  n_nodes_orig;      /**< for statistics */
796         unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
797         unsigned  n_call_nodes_orig; /**< for statistics */
798         unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
799         unsigned  n_callers_orig;    /**< for statistics */
800         unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
801         unsigned  recursive:1;       /**< Set, if this function is self recursive. */
802 } inline_irg_env;
803
804 /**
805  * Allocate a new environment for inlining.
806  */
807 static inline_irg_env *alloc_inline_irg_env(void)
808 {
809         inline_irg_env *env    = OALLOC(&temp_obst, inline_irg_env);
810         INIT_LIST_HEAD(&env->calls);
811         env->local_weights     = NULL;
812         env->n_nodes           = -2; /* do not count count Start, End */
813         env->n_blocks          = -2; /* do not count count Start, End Block */
814         env->n_nodes_orig      = -2; /* do not count Start, End */
815         env->n_call_nodes      = 0;
816         env->n_call_nodes_orig = 0;
817         env->n_callers         = 0;
818         env->n_callers_orig    = 0;
819         env->got_inline        = 0;
820         env->recursive         = 0;
821         return env;
822 }
823
824 typedef struct walker_env {
825         inline_irg_env *x;     /**< the inline environment */
826         char ignore_runtime;   /**< the ignore runtime flag */
827         char ignore_callers;   /**< if set, do change callers data */
828 } wenv_t;
829
830 /**
831  * post-walker: collect all calls in the inline-environment
832  * of a graph and sum some statistics.
833  */
834 static void collect_calls2(ir_node *call, void *ctx)
835 {
836         wenv_t         *env = ctx;
837         inline_irg_env *x = env->x;
838         ir_opcode      code = get_irn_opcode(call);
839         ir_graph       *callee;
840         call_entry     *entry;
841
842         /* count meaningful nodes in irg */
843         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
844                 if (code != iro_Block) {
845                         ++x->n_nodes;
846                         ++x->n_nodes_orig;
847                 } else {
848                         ++x->n_blocks;
849                 }
850         }
851
852         if (code != iro_Call) return;
853
854         /* check, if it's a runtime call */
855         if (env->ignore_runtime) {
856                 ir_node *symc = get_Call_ptr(call);
857
858                 if (is_Global(symc)) {
859                         ir_entity *ent = get_Global_entity(symc);
860
861                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
862                                 return;
863                 }
864         }
865
866         /* collect all call nodes */
867         ++x->n_call_nodes;
868         ++x->n_call_nodes_orig;
869
870         callee = get_call_called_irg(call);
871         if (callee != NULL) {
872                 if (! env->ignore_callers) {
873                         inline_irg_env *callee_env = get_irg_link(callee);
874                         /* count all static callers */
875                         ++callee_env->n_callers;
876                         ++callee_env->n_callers_orig;
877                 }
878                 if (callee == current_ir_graph)
879                         x->recursive = 1;
880
881                 /* link it in the list of possible inlinable entries */
882                 entry = OALLOC(&temp_obst, call_entry);
883                 entry->call       = call;
884                 entry->callee     = callee;
885                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
886                 entry->benefice   = 0;
887                 entry->local_adr  = 0;
888                 entry->all_const  = 0;
889
890                 list_add_tail(&entry->list, &x->calls);
891         }
892 }
893
894 /**
895  * Returns TRUE if the number of callers is 0 in the irg's environment,
896  * hence this irg is a leave.
897  */
898 inline static int is_leave(ir_graph *irg)
899 {
900         inline_irg_env *env = get_irg_link(irg);
901         return env->n_call_nodes == 0;
902 }
903
904 /**
905  * Returns TRUE if the number of nodes in the callee is
906  * smaller then size in the irg's environment.
907  */
908 inline static int is_smaller(ir_graph *callee, unsigned size)
909 {
910         inline_irg_env *env = get_irg_link(callee);
911         return env->n_nodes < size;
912 }
913
914 /**
915  * Duplicate a call entry.
916  *
917  * @param entry     the original entry to duplicate
918  * @param new_call  the new call node
919  * @param loop_depth_delta
920  *                  delta value for the loop depth
921  */
922 static call_entry *duplicate_call_entry(const call_entry *entry,
923                                         ir_node *new_call, int loop_depth_delta)
924 {
925         call_entry *nentry = OALLOC(&temp_obst, call_entry);
926         nentry->call       = new_call;
927         nentry->callee     = entry->callee;
928         nentry->benefice   = entry->benefice;
929         nentry->loop_depth = entry->loop_depth + loop_depth_delta;
930         nentry->local_adr  = entry->local_adr;
931         nentry->all_const  = entry->all_const;
932
933         return nentry;
934 }
935
936 /**
937  * Append all call nodes of the source environment to the nodes of in the destination
938  * environment.
939  *
940  * @param dst         destination environment
941  * @param src         source environment
942  * @param loop_depth  the loop depth of the call that is replaced by the src list
943  */
944 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth)
945 {
946         call_entry *entry, *nentry;
947
948         /* Note that the src list points to Call nodes in the inlined graph, but
949            we need Call nodes in our graph. Luckily the inliner leaves this information
950            in the link field. */
951         list_for_each_entry(call_entry, entry, &src->calls, list) {
952                 nentry = duplicate_call_entry(entry, get_irn_link(entry->call), loop_depth);
953                 list_add_tail(&nentry->list, &dst->calls);
954         }
955         dst->n_call_nodes += src->n_call_nodes;
956         dst->n_nodes      += src->n_nodes;
957 }
958
959 /*
960  * Inlines small leave methods at call sites where the called address comes
961  * from a Const node that references the entity representing the called
962  * method.
963  * The size argument is a rough measure for the code size of the method:
964  * Methods where the obstack containing the firm graph is smaller than
965  * size are inlined.
966  */
967 void inline_leave_functions(unsigned maxsize, unsigned leavesize,
968                             unsigned size, int ignore_runtime)
969 {
970         inline_irg_env   *env;
971         ir_graph         *irg;
972         int              i, n_irgs;
973         ir_graph         *rem;
974         int              did_inline;
975         wenv_t           wenv;
976         call_entry       *entry, *next;
977         const call_entry *centry;
978         pmap             *copied_graphs;
979         pmap_entry       *pm_entry;
980
981         rem = current_ir_graph;
982         obstack_init(&temp_obst);
983
984         /* a map for the copied graphs, used to inline recursive calls */
985         copied_graphs = pmap_create();
986
987         /* extend all irgs by a temporary data structure for inlining. */
988         n_irgs = get_irp_n_irgs();
989         for (i = 0; i < n_irgs; ++i)
990                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
991
992         /* Pre-compute information in temporary data structure. */
993         wenv.ignore_runtime = ignore_runtime;
994         wenv.ignore_callers = 0;
995         for (i = 0; i < n_irgs; ++i) {
996                 ir_graph *irg = get_irp_irg(i);
997
998                 assert(get_irg_phase_state(irg) != phase_building);
999                 free_callee_info(irg);
1000
1001                 assure_cf_loop(irg);
1002                 wenv.x = get_irg_link(irg);
1003                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1004         }
1005
1006         /* -- and now inline. -- */
1007
1008         /* Inline leaves recursively -- we might construct new leaves. */
1009         do {
1010                 did_inline = 0;
1011
1012                 for (i = 0; i < n_irgs; ++i) {
1013                         ir_node *call;
1014                         int phiproj_computed = 0;
1015
1016                         current_ir_graph = get_irp_irg(i);
1017                         env              = get_irg_link(current_ir_graph);
1018
1019                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1020                         list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1021                                 ir_graph            *callee;
1022                                 irg_inline_property  prop;
1023
1024                                 if (env->n_nodes > maxsize)
1025                                         break;
1026
1027                                 call   = entry->call;
1028                                 callee = entry->callee;
1029
1030                                 prop = get_irg_inline_property(callee);
1031                                 if (prop == irg_inline_forbidden) {
1032                                         continue;
1033                                 }
1034
1035                                 if (is_leave(callee) && (
1036                                     is_smaller(callee, leavesize) || prop >= irg_inline_forced)) {
1037                                         if (!phiproj_computed) {
1038                                                 phiproj_computed = 1;
1039                                                 collect_phiprojs(current_ir_graph);
1040                                         }
1041                                         did_inline = inline_method(call, callee);
1042
1043                                         if (did_inline) {
1044                                                 inline_irg_env *callee_env = get_irg_link(callee);
1045
1046                                                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1047                                                 phiproj_computed = 0;
1048
1049                                                 /* Do some statistics */
1050                                                 env->got_inline = 1;
1051                                                 --env->n_call_nodes;
1052                                                 env->n_nodes += callee_env->n_nodes;
1053                                                 --callee_env->n_callers;
1054
1055                                                 /* remove this call from the list */
1056                                                 list_del(&entry->list);
1057                                                 continue;
1058                                         }
1059                                 }
1060                         }
1061                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1062                 }
1063         } while (did_inline);
1064
1065         /* inline other small functions. */
1066         for (i = 0; i < n_irgs; ++i) {
1067                 ir_node *call;
1068                 int phiproj_computed = 0;
1069
1070                 current_ir_graph = get_irp_irg(i);
1071                 env              = get_irg_link(current_ir_graph);
1072
1073                 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1074
1075                 /* note that the list of possible calls is updated during the process */
1076                 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1077                         irg_inline_property prop;
1078                         ir_graph            *callee;
1079                         pmap_entry          *e;
1080
1081                         call   = entry->call;
1082                         callee = entry->callee;
1083
1084                         prop = get_irg_inline_property(callee);
1085                         if (prop == irg_inline_forbidden) {
1086                                 continue;
1087                         }
1088
1089                         e = pmap_find(copied_graphs, callee);
1090                         if (e != NULL) {
1091                                 /*
1092                                  * Remap callee if we have a copy.
1093                                  * FIXME: Should we do this only for recursive Calls ?
1094                                  */
1095                                 callee = e->value;
1096                         }
1097
1098                         if (prop >= irg_inline_forced ||
1099                             (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1100                                 if (current_ir_graph == callee) {
1101                                         /*
1102                                          * Recursive call: we cannot directly inline because we cannot walk
1103                                          * the graph and change it. So we have to make a copy of the graph
1104                                          * first.
1105                                          */
1106
1107                                         inline_irg_env *callee_env;
1108                                         ir_graph       *copy;
1109
1110                                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1111
1112                                         /*
1113                                          * No copy yet, create one.
1114                                          * Note that recursive methods are never leaves, so it is sufficient
1115                                          * to test this condition here.
1116                                          */
1117                                         copy = create_irg_copy(callee);
1118
1119                                         /* create_irg_copy() destroys the Proj links, recompute them */
1120                                         phiproj_computed = 0;
1121
1122                                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1123
1124                                         /* allocate new environment */
1125                                         callee_env = alloc_inline_irg_env();
1126                                         set_irg_link(copy, callee_env);
1127
1128                                         assure_cf_loop(copy);
1129                                         wenv.x              = callee_env;
1130                                         wenv.ignore_callers = 1;
1131                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1132
1133                                         /*
1134                                          * Enter the entity of the original graph. This is needed
1135                                          * for inline_method(). However, note that ent->irg still points
1136                                          * to callee, NOT to copy.
1137                                          */
1138                                         set_irg_entity(copy, get_irg_entity(callee));
1139
1140                                         pmap_insert(copied_graphs, callee, copy);
1141                                         callee = copy;
1142
1143                                         /* we have only one caller: the original graph */
1144                                         callee_env->n_callers      = 1;
1145                                         callee_env->n_callers_orig = 1;
1146                                 }
1147                                 if (! phiproj_computed) {
1148                                         phiproj_computed = 1;
1149                                         collect_phiprojs(current_ir_graph);
1150                                 }
1151                                 did_inline = inline_method(call, callee);
1152                                 if (did_inline) {
1153                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1154
1155                                         /* call was inlined, Phi/Projs for current graph must be recomputed */
1156                                         phiproj_computed = 0;
1157
1158                                         /* callee was inline. Append it's call list. */
1159                                         env->got_inline = 1;
1160                                         --env->n_call_nodes;
1161                                         append_call_list(env, callee_env, entry->loop_depth);
1162                                         --callee_env->n_callers;
1163
1164                                         /* after we have inlined callee, all called methods inside callee
1165                                            are now called once more */
1166                                         list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1167                                                 inline_irg_env *penv = get_irg_link(centry->callee);
1168                                                 ++penv->n_callers;
1169                                         }
1170
1171                                         /* remove this call from the list */
1172                                         list_del(&entry->list);
1173                                         continue;
1174                                 }
1175                         }
1176                 }
1177                 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1178         }
1179
1180         for (i = 0; i < n_irgs; ++i) {
1181                 irg = get_irp_irg(i);
1182                 env = get_irg_link(irg);
1183
1184                 if (env->got_inline) {
1185                         optimize_graph_df(irg);
1186                         optimize_cf(irg);
1187                 }
1188                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1189                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1190                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1191                         env->n_callers_orig, env->n_callers,
1192                         get_entity_name(get_irg_entity(irg))));
1193                 }
1194         }
1195
1196         /* kill the copied graphs: we don't need them anymore */
1197         foreach_pmap(copied_graphs, pm_entry) {
1198                 ir_graph *copy = pm_entry->value;
1199
1200                 /* reset the entity, otherwise it will be deleted in the next step ... */
1201                 set_irg_entity(copy, NULL);
1202                 free_ir_graph(copy);
1203         }
1204         pmap_destroy(copied_graphs);
1205
1206         obstack_free(&temp_obst, NULL);
1207         current_ir_graph = rem;
1208 }
1209
1210 struct inline_leave_functions_pass_t {
1211         ir_prog_pass_t pass;
1212         unsigned       maxsize;
1213         unsigned       leavesize;
1214         unsigned       size;
1215         int            ignore_runtime;
1216 };
1217
1218 /**
1219  * Wrapper to run inline_leave_functions() as a ir_prog pass.
1220  */
1221 static int inline_leave_functions_wrapper(ir_prog *irp, void *context)
1222 {
1223         struct inline_leave_functions_pass_t *pass = context;
1224
1225         (void)irp;
1226         inline_leave_functions(
1227                 pass->maxsize, pass->leavesize,
1228                 pass->size, pass->ignore_runtime);
1229         return 0;
1230 }
1231
1232 /* create a pass for inline_leave_functions() */
1233 ir_prog_pass_t *inline_leave_functions_pass(
1234         const char *name, unsigned maxsize, unsigned leavesize,
1235         unsigned size, int ignore_runtime)
1236 {
1237         struct inline_leave_functions_pass_t *pass =
1238                 XMALLOCZ(struct inline_leave_functions_pass_t);
1239
1240         pass->maxsize        = maxsize;
1241         pass->leavesize      = leavesize;
1242         pass->size           = size;
1243         pass->ignore_runtime = ignore_runtime;
1244
1245         return def_prog_pass_constructor(
1246                 &pass->pass,
1247                 name ? name : "inline_leave_functions",
1248                 inline_leave_functions_wrapper);
1249 }
1250
1251 /**
1252  * Calculate the parameter weights for transmitting the address of a local variable.
1253  */
1254 static unsigned calc_method_local_weight(ir_node *arg)
1255 {
1256         int      i, j, k;
1257         unsigned v, weight = 0;
1258
1259         for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
1260                 ir_node *succ = get_irn_out(arg, i);
1261
1262                 switch (get_irn_opcode(succ)) {
1263                 case iro_Load:
1264                 case iro_Store:
1265                         /* Loads and Store can be removed */
1266                         weight += 3;
1267                         break;
1268                 case iro_Sel:
1269                         /* check if all args are constant */
1270                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1271                                 ir_node *idx = get_Sel_index(succ, j);
1272                                 if (! is_Const(idx))
1273                                         return 0;
1274                         }
1275                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1276                            some unsupported node. */
1277                         v = calc_method_local_weight(succ);
1278                         if (v == 0)
1279                                 return 0;
1280                         /* we can kill one Sel with constant indexes, this is cheap */
1281                         weight += v + 1;
1282                         break;
1283                 case iro_Id:
1284                         /* when looking backward we might find Id nodes */
1285                         weight += calc_method_local_weight(succ);
1286                         break;
1287                 case iro_Tuple:
1288                         /* unoptimized tuple */
1289                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1290                                 ir_node *pred = get_Tuple_pred(succ, j);
1291                                 if (pred == arg) {
1292                                         /* look for Proj(j) */
1293                                         for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
1294                                                 ir_node *succ_succ = get_irn_out(succ, k);
1295                                                 if (is_Proj(succ_succ)) {
1296                                                         if (get_Proj_proj(succ_succ) == j) {
1297                                                                 /* found */
1298                                                                 weight += calc_method_local_weight(succ_succ);
1299                                                         }
1300                                                 } else {
1301                                                         /* this should NOT happen */
1302                                                         return 0;
1303                                                 }
1304                                         }
1305                                 }
1306                         }
1307                         break;
1308                 default:
1309                         /* any other node: unsupported yet or bad. */
1310                         return 0;
1311                 }
1312         }
1313         return weight;
1314 }
1315
1316 /**
1317  * Calculate the parameter weights for transmitting the address of a local variable.
1318  */
1319 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg)
1320 {
1321         ir_entity *ent = get_irg_entity(irg);
1322         ir_type  *mtp;
1323         int      nparams, i, proj_nr;
1324         ir_node  *irg_args, *arg;
1325
1326         mtp      = get_entity_type(ent);
1327         nparams  = get_method_n_params(mtp);
1328
1329         /* allocate a new array. currently used as 'analysed' flag */
1330         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1331
1332         /* If the method haven't parameters we have nothing to do. */
1333         if (nparams <= 0)
1334                 return;
1335
1336         assure_irg_outs(irg);
1337         irg_args = get_irg_args(irg);
1338         for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
1339                 arg     = get_irn_out(irg_args, i);
1340                 proj_nr = get_Proj_proj(arg);
1341                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1342         }
1343 }
1344
1345 /**
1346  * Calculate the benefice for transmitting an local variable address.
1347  * After inlining, the local variable might be transformed into a
1348  * SSA variable by scalar_replacement().
1349  */
1350 static unsigned get_method_local_adress_weight(ir_graph *callee, int pos)
1351 {
1352         inline_irg_env *env = get_irg_link(callee);
1353
1354         if (env->local_weights != NULL) {
1355                 if (pos < ARR_LEN(env->local_weights))
1356                         return env->local_weights[pos];
1357                 return 0;
1358         }
1359
1360         analyze_irg_local_weights(env, callee);
1361
1362         if (pos < ARR_LEN(env->local_weights))
1363                 return env->local_weights[pos];
1364         return 0;
1365 }
1366
1367 /**
1368  * Calculate a benefice value for inlining the given call.
1369  *
1370  * @param call       the call node we have to inspect
1371  * @param callee     the called graph
1372  */
1373 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
1374 {
1375         ir_node   *call = entry->call;
1376         ir_entity *ent  = get_irg_entity(callee);
1377         ir_node   *frame_ptr;
1378         ir_type   *mtp;
1379         int       weight = 0;
1380         int       i, n_params, all_const;
1381         unsigned  cc, v;
1382         irg_inline_property prop;
1383
1384         inline_irg_env *callee_env;
1385
1386         prop = get_irg_inline_property(callee);
1387         if (prop == irg_inline_forbidden) {
1388                 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
1389                     call, callee));
1390                 return entry->benefice = INT_MIN;
1391         }
1392
1393         if (get_irg_additional_properties(callee) & mtp_property_noreturn) {
1394                 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
1395                     call, callee));
1396                 return entry->benefice = INT_MIN;
1397         }
1398
1399         /* costs for every passed parameter */
1400         n_params = get_Call_n_params(call);
1401         mtp      = get_entity_type(ent);
1402         cc       = get_method_calling_convention(mtp);
1403         if (cc & cc_reg_param) {
1404                 /* register parameter, smaller costs for register parameters */
1405                 int max_regs = cc & ~cc_bits;
1406
1407                 if (max_regs < n_params)
1408                         weight += max_regs * 2 + (n_params - max_regs) * 5;
1409                 else
1410                         weight += n_params * 2;
1411         } else {
1412                 /* parameters are passed an stack */
1413                 weight += 5 * n_params;
1414         }
1415
1416         /* constant parameters improve the benefice */
1417         frame_ptr = get_irg_frame(current_ir_graph);
1418         all_const = 1;
1419         for (i = 0; i < n_params; ++i) {
1420                 ir_node *param = get_Call_param(call, i);
1421
1422                 if (is_Const(param)) {
1423                         weight += get_method_param_weight(ent, i);
1424                 } else {
1425                         all_const = 0;
1426                         if (is_SymConst(param))
1427                                 weight += get_method_param_weight(ent, i);
1428                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
1429                                 /*
1430                                  * An address of a local variable is transmitted. After
1431                                  * inlining, scalar_replacement might be able to remove the
1432                                  * local variable, so honor this.
1433                                  */
1434                                 v = get_method_local_adress_weight(callee, i);
1435                                 weight += v;
1436                                 if (v > 0)
1437                                         entry->local_adr = 1;
1438                         }
1439                 }
1440         }
1441         entry->all_const = all_const;
1442
1443         callee_env = get_irg_link(callee);
1444         if (callee_env->n_callers == 1 &&
1445             callee != current_ir_graph &&
1446             !entity_is_externally_visible(ent)) {
1447                 weight += 700;
1448         }
1449
1450         /* give a bonus for functions with one block */
1451         if (callee_env->n_blocks == 1)
1452                 weight = weight * 3 / 2;
1453
1454         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
1455         if (callee_env->n_nodes < 30 && !callee_env->recursive)
1456                 weight += 2000;
1457
1458         /* and finally for leaves: they do not increase the register pressure
1459            because of callee safe registers */
1460         if (callee_env->n_call_nodes == 0)
1461                 weight += 400;
1462
1463         /** it's important to inline inner loops first */
1464         if (entry->loop_depth > 30)
1465                 weight += 30 * 1024;
1466         else
1467                 weight += entry->loop_depth * 1024;
1468
1469         /*
1470          * All arguments constant is probably a good sign, give an extra bonus
1471          */
1472         if (all_const)
1473                 weight += 1024;
1474
1475         return entry->benefice = weight;
1476 }
1477
1478 static ir_graph **irgs;
1479 static int      last_irg;
1480
1481 /**
1482  * Callgraph walker, collect all visited graphs.
1483  */
1484 static void callgraph_walker(ir_graph *irg, void *data)
1485 {
1486         (void) data;
1487         irgs[last_irg++] = irg;
1488 }
1489
1490 /**
1491  * Creates an inline order for all graphs.
1492  *
1493  * @return the list of graphs.
1494  */
1495 static ir_graph **create_irg_list(void)
1496 {
1497         ir_entity **free_methods;
1498         int       arr_len;
1499         int       n_irgs = get_irp_n_irgs();
1500
1501         cgana(&arr_len, &free_methods);
1502         xfree(free_methods);
1503
1504         compute_callgraph();
1505
1506         last_irg = 0;
1507         irgs     = XMALLOCNZ(ir_graph*, n_irgs);
1508
1509         callgraph_walk(NULL, callgraph_walker, NULL);
1510         assert(n_irgs == last_irg);
1511
1512         return irgs;
1513 }
1514
1515 /**
1516  * Push a call onto the priority list if its benefice is big enough.
1517  *
1518  * @param pqueue   the priority queue of calls
1519  * @param call     the call entry
1520  * @param inlien_threshold
1521  *                 the threshold value
1522  */
1523 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
1524                             int inline_threshold)
1525 {
1526         ir_graph            *callee  = call->callee;
1527         irg_inline_property prop     = get_irg_inline_property(callee);
1528         int                 benefice = calc_inline_benefice(call, callee);
1529
1530         DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
1531             get_irn_irg(call->call), call->call, callee, benefice));
1532
1533         if (prop < irg_inline_forced && benefice < inline_threshold) {
1534                 return;
1535         }
1536
1537         pqueue_put(pqueue, call, benefice);
1538 }
1539
1540 /**
1541  * Try to inline calls into a graph.
1542  *
1543  * @param irg      the graph into which we inline
1544  * @param maxsize  do NOT inline if the size of irg gets
1545  *                 bigger than this amount
1546  * @param inline_threshold
1547  *                 threshold value for inline decision
1548  * @param copied_graphs
1549  *                 map containing copied of recursive graphs
1550  */
1551 static void inline_into(ir_graph *irg, unsigned maxsize,
1552                         int inline_threshold, pmap *copied_graphs)
1553 {
1554         int            phiproj_computed = 0;
1555         inline_irg_env *env = get_irg_link(irg);
1556         call_entry     *curr_call;
1557         wenv_t         wenv;
1558         pqueue_t       *pqueue;
1559
1560         if (env->n_call_nodes == 0)
1561                 return;
1562
1563         if (env->n_nodes > maxsize) {
1564                 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
1565                 return;
1566         }
1567
1568         current_ir_graph = irg;
1569         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1570
1571         /* put irgs into the pqueue */
1572         pqueue = new_pqueue();
1573
1574         list_for_each_entry(call_entry, curr_call, &env->calls, list) {
1575                 assert(is_Call(curr_call->call));
1576                 maybe_push_call(pqueue, curr_call, inline_threshold);
1577         }
1578
1579         /* note that the list of possible calls is updated during the process */
1580         while (!pqueue_empty(pqueue)) {
1581                 int                 did_inline;
1582                 call_entry          *curr_call  = pqueue_pop_front(pqueue);
1583                 ir_graph            *callee     = curr_call->callee;
1584                 ir_node             *call_node  = curr_call->call;
1585                 inline_irg_env      *callee_env = get_irg_link(callee);
1586                 irg_inline_property prop        = get_irg_inline_property(callee);
1587                 int                 loop_depth;
1588                 const call_entry    *centry;
1589                 pmap_entry          *e;
1590
1591                 if ((prop < irg_inline_forced) && env->n_nodes + callee_env->n_nodes > maxsize) {
1592                         DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
1593                                                 env->n_nodes, callee, callee_env->n_nodes));
1594                         continue;
1595                 }
1596
1597                 e = pmap_find(copied_graphs, callee);
1598                 if (e != NULL) {
1599                         int benefice = curr_call->benefice;
1600                         /*
1601                          * Reduce the weight for recursive function IFF not all arguments are const.
1602                          * inlining recursive functions is rarely good.
1603                          */
1604                         if (!curr_call->all_const)
1605                                 benefice -= 2000;
1606                         if (benefice < inline_threshold)
1607                                 continue;
1608
1609                         /*
1610                          * Remap callee if we have a copy.
1611                          */
1612                         callee     = e->value;
1613                         callee_env = get_irg_link(callee);
1614                 }
1615
1616                 if (current_ir_graph == callee) {
1617                         /*
1618                          * Recursive call: we cannot directly inline because we cannot
1619                          * walk the graph and change it. So we have to make a copy of
1620                          * the graph first.
1621                          */
1622                         int benefice = curr_call->benefice;
1623                         ir_graph *copy;
1624
1625                         /*
1626                          * Reduce the weight for recursive function IFF not all arguments are const.
1627                          * inlining recursive functions is rarely good.
1628                          */
1629                         if (!curr_call->all_const)
1630                                 benefice -= 2000;
1631                         if (benefice < inline_threshold)
1632                                 continue;
1633
1634                         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1635
1636                         /*
1637                          * No copy yet, create one.
1638                          * Note that recursive methods are never leaves, so it is
1639                          * sufficient to test this condition here.
1640                          */
1641                         copy = create_irg_copy(callee);
1642
1643                         /* create_irg_copy() destroys the Proj links, recompute them */
1644                         phiproj_computed = 0;
1645
1646                         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1647
1648                         /* allocate a new environment */
1649                         callee_env = alloc_inline_irg_env();
1650                         set_irg_link(copy, callee_env);
1651
1652                         assure_cf_loop(copy);
1653                         wenv.x              = callee_env;
1654                         wenv.ignore_callers = 1;
1655                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1656
1657                         /*
1658                          * Enter the entity of the original graph. This is needed
1659                          * for inline_method(). However, note that ent->irg still points
1660                          * to callee, NOT to copy.
1661                          */
1662                         set_irg_entity(copy, get_irg_entity(callee));
1663
1664                         pmap_insert(copied_graphs, callee, copy);
1665                         callee = copy;
1666
1667                         /* we have only one caller: the original graph */
1668                         callee_env->n_callers      = 1;
1669                         callee_env->n_callers_orig = 1;
1670                 }
1671                 if (! phiproj_computed) {
1672                         phiproj_computed = 1;
1673                         collect_phiprojs(current_ir_graph);
1674                 }
1675                 did_inline = inline_method(call_node, callee);
1676                 if (!did_inline)
1677                         continue;
1678
1679                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1680                 phiproj_computed = 0;
1681
1682                 /* remove it from the caller list */
1683                 list_del(&curr_call->list);
1684
1685                 /* callee was inline. Append it's call list. */
1686                 env->got_inline = 1;
1687                 --env->n_call_nodes;
1688
1689                 /* we just generate a bunch of new calls */
1690                 loop_depth = curr_call->loop_depth;
1691                 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1692                         inline_irg_env *penv = get_irg_link(centry->callee);
1693                         ir_node        *new_call;
1694                         call_entry     *new_entry;
1695
1696                         /* after we have inlined callee, all called methods inside
1697                          * callee are now called once more */
1698                         ++penv->n_callers;
1699
1700                         /* Note that the src list points to Call nodes in the inlined graph,
1701                          * but we need Call nodes in our graph. Luckily the inliner leaves
1702                          * this information in the link field. */
1703                         new_call = get_irn_link(centry->call);
1704                         assert(is_Call(new_call));
1705
1706                         new_entry = duplicate_call_entry(centry, new_call, loop_depth);
1707                         list_add_tail(&new_entry->list, &env->calls);
1708                         maybe_push_call(pqueue, new_entry, inline_threshold);
1709                 }
1710
1711                 env->n_call_nodes += callee_env->n_call_nodes;
1712                 env->n_nodes += callee_env->n_nodes;
1713                 --callee_env->n_callers;
1714         }
1715         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1716         del_pqueue(pqueue);
1717 }
1718
1719 /*
1720  * Heuristic inliner. Calculates a benefice value for every call and inlines
1721  * those calls with a value higher than the threshold.
1722  */
1723 void inline_functions(unsigned maxsize, int inline_threshold,
1724                       opt_ptr after_inline_opt)
1725 {
1726         inline_irg_env   *env;
1727         int              i, n_irgs;
1728         ir_graph         *rem;
1729         wenv_t           wenv;
1730         pmap             *copied_graphs;
1731         pmap_entry       *pm_entry;
1732         ir_graph         **irgs;
1733
1734         rem = current_ir_graph;
1735         obstack_init(&temp_obst);
1736
1737         irgs = create_irg_list();
1738
1739         /* a map for the copied graphs, used to inline recursive calls */
1740         copied_graphs = pmap_create();
1741
1742         /* extend all irgs by a temporary data structure for inlining. */
1743         n_irgs = get_irp_n_irgs();
1744         for (i = 0; i < n_irgs; ++i)
1745                 set_irg_link(irgs[i], alloc_inline_irg_env());
1746
1747         /* Pre-compute information in temporary data structure. */
1748         wenv.ignore_runtime = 0;
1749         wenv.ignore_callers = 0;
1750         for (i = 0; i < n_irgs; ++i) {
1751                 ir_graph *irg = irgs[i];
1752
1753                 free_callee_info(irg);
1754
1755                 wenv.x = get_irg_link(irg);
1756                 assure_cf_loop(irg);
1757                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1758         }
1759
1760         /* -- and now inline. -- */
1761         for (i = 0; i < n_irgs; ++i) {
1762                 ir_graph *irg = irgs[i];
1763
1764                 inline_into(irg, maxsize, inline_threshold, copied_graphs);
1765         }
1766
1767         for (i = 0; i < n_irgs; ++i) {
1768                 ir_graph *irg = irgs[i];
1769
1770                 env = get_irg_link(irg);
1771                 if (env->got_inline && after_inline_opt != NULL) {
1772                         /* this irg got calls inlined: optimize it */
1773                         after_inline_opt(irg);
1774                 }
1775                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1776                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1777                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1778                         env->n_callers_orig, env->n_callers,
1779                         get_entity_name(get_irg_entity(irg))));
1780                 }
1781         }
1782
1783         /* kill the copied graphs: we don't need them anymore */
1784         foreach_pmap(copied_graphs, pm_entry) {
1785                 ir_graph *copy = pm_entry->value;
1786
1787                 /* reset the entity, otherwise it will be deleted in the next step ... */
1788                 set_irg_entity(copy, NULL);
1789                 free_ir_graph(copy);
1790         }
1791         pmap_destroy(copied_graphs);
1792
1793         xfree(irgs);
1794
1795         obstack_free(&temp_obst, NULL);
1796         current_ir_graph = rem;
1797 }
1798
1799 struct inline_functions_pass_t {
1800         ir_prog_pass_t pass;
1801         unsigned       maxsize;
1802         int            inline_threshold;
1803         opt_ptr        after_inline_opt;
1804 };
1805
1806 /**
1807  * Wrapper to run inline_functions() as a ir_prog pass.
1808  */
1809 static int inline_functions_wrapper(ir_prog *irp, void *context)
1810 {
1811         struct inline_functions_pass_t *pass = context;
1812
1813         (void)irp;
1814         inline_functions(pass->maxsize, pass->inline_threshold,
1815                          pass->after_inline_opt);
1816         return 0;
1817 }
1818
1819 /* create a ir_prog pass for inline_functions */
1820 ir_prog_pass_t *inline_functions_pass(
1821           const char *name, unsigned maxsize, int inline_threshold,
1822           opt_ptr after_inline_opt)
1823 {
1824         struct inline_functions_pass_t *pass =
1825                 XMALLOCZ(struct inline_functions_pass_t);
1826
1827         pass->maxsize          = maxsize;
1828         pass->inline_threshold = inline_threshold;
1829         pass->after_inline_opt = after_inline_opt;
1830
1831         return def_prog_pass_constructor(
1832                 &pass->pass, name ? name : "inline_functions",
1833                 inline_functions_wrapper);
1834 }
1835
1836 void firm_init_inline(void)
1837 {
1838         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
1839 }