Include limits.h for LONG_{MAX,MIN}.
[libfirm] / ir / opt / opt_inline.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief    Dead node elimination and Procedure Inlining.
23  * @author   Michael Beck, Goetz Lindenmaier
24  * @version  $Id$
25  */
26 #ifdef HAVE_CONFIG_H
27 # include "config.h"
28 #endif
29
30 #include <limits.h>
31 #include <assert.h>
32
33 #include "irnode_t.h"
34 #include "irgraph_t.h"
35 #include "irprog_t.h"
36
37 #include "iroptimize.h"
38 #include "ircons_t.h"
39 #include "iropt_t.h"
40 #include "irgopt.h"
41 #include "irgmod.h"
42 #include "irgwalk.h"
43
44 #include "adt/array.h"
45 #include "adt/pset.h"
46 #include "adt/pmap.h"
47 #include "adt/pdeq.h"
48 #include "adt/xmalloc.h"
49
50 #include "irouts.h"
51 #include "irloop_t.h"
52 #include "irbackedge_t.h"
53 #include "opt_inline_t.h"
54 #include "cgana.h"
55 #include "trouts.h"
56 #include "error.h"
57
58 #include "analyze_irg_args.h"
59 #include "iredges_t.h"
60 #include "irflag_t.h"
61 #include "irhooks.h"
62 #include "irtools.h"
63
64 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
65
66 /*------------------------------------------------------------------*/
67 /* Routines for dead node elimination / copying garbage collection  */
68 /* of the obstack.                                                  */
69 /*------------------------------------------------------------------*/
70
71 /**
72  * Remember the new node in the old node by using a field all nodes have.
73  */
74 #define set_new_node(oldn, newn)  set_irn_link(oldn, newn)
75
76 /**
77  * Get this new node, before the old node is forgotten.
78  */
79 #define get_new_node(oldn) get_irn_link(oldn)
80
81 /**
82  * Check if a new node was set.
83  */
84 #define has_new_node(n) (get_new_node(n) != NULL)
85
86 /**
87  * We use the block_visited flag to mark that we have computed the
88  * number of useful predecessors for this block.
89  * Further we encode the new arity in this flag in the old blocks.
90  * Remembering the arity is useful, as it saves a lot of pointer
91  * accesses.  This function is called for all Phi and Block nodes
92  * in a Block.
93  */
94 static INLINE int
95 compute_new_arity(ir_node *b) {
96         int i, res, irn_arity;
97         int irg_v, block_v;
98
99         irg_v = get_irg_block_visited(current_ir_graph);
100         block_v = get_Block_block_visited(b);
101         if (block_v >= irg_v) {
102                 /* we computed the number of preds for this block and saved it in the
103                    block_v flag */
104                 return block_v - irg_v;
105         } else {
106                 /* compute the number of good predecessors */
107                 res = irn_arity = get_irn_arity(b);
108                 for (i = 0; i < irn_arity; i++)
109                         if (is_Bad(get_irn_n(b, i))) res--;
110                         /* save it in the flag. */
111                         set_Block_block_visited(b, irg_v + res);
112                         return res;
113         }
114 }
115
116 /**
117  * Copies the node to the new obstack. The Ins of the new node point to
118  * the predecessors on the old obstack.  For block/phi nodes not all
119  * predecessors might be copied.  n->link points to the new node.
120  * For Phi and Block nodes the function allocates in-arrays with an arity
121  * only for useful predecessors.  The arity is determined by counting
122  * the non-bad predecessors of the block.
123  *
124  * @param n    The node to be copied
125  * @param env  if non-NULL, the node number attribute will be copied to the new node
126  *
127  * Note: Also used for loop unrolling.
128  */
129 static void copy_node(ir_node *n, void *env) {
130         ir_node *nn, *block;
131         int new_arity;
132         ir_op *op = get_irn_op(n);
133         (void) env;
134
135         if (op == op_Bad) {
136                 /* node copied already */
137                 return;
138         } else if (op == op_Block) {
139                 block = NULL;
140                 new_arity = compute_new_arity(n);
141                 n->attr.block.graph_arr = NULL;
142         } else {
143                 block = get_nodes_block(n);
144                 if (op == op_Phi) {
145                         new_arity = compute_new_arity(block);
146                 } else {
147                         new_arity = get_irn_arity(n);
148                 }
149         }
150         nn = new_ir_node(get_irn_dbg_info(n),
151                 current_ir_graph,
152                 block,
153                 op,
154                 get_irn_mode(n),
155                 new_arity,
156                 get_irn_in(n) + 1);
157         /* Copy the attributes.  These might point to additional data.  If this
158            was allocated on the old obstack the pointers now are dangling.  This
159            frees e.g. the memory of the graph_arr allocated in new_immBlock. */
160         if (op == op_Block) {
161                 /* we cannot allow blocks WITHOUT macroblock input */
162                 set_Block_MacroBlock(nn, get_Block_MacroBlock(n));
163         }
164         copy_node_attr(n, nn);
165
166 #ifdef DEBUG_libfirm
167         {
168                 int copy_node_nr = env != NULL;
169                 if (copy_node_nr) {
170                         /* for easier debugging, we want to copy the node numbers too */
171                         nn->node_nr = n->node_nr;
172                 }
173         }
174 #endif
175
176         set_new_node(n, nn);
177         hook_dead_node_elim_subst(current_ir_graph, n, nn);
178 }
179
180 /**
181  * Copies new predecessors of old node to new node remembered in link.
182  * Spare the Bad predecessors of Phi and Block nodes.
183  */
184 static void copy_preds(ir_node *n, void *env) {
185         ir_node *nn, *block;
186         int i, j, irn_arity;
187         (void) env;
188
189         nn = get_new_node(n);
190
191         if (is_Block(n)) {
192                 /* copy the macro block header */
193                 ir_node *mbh = get_Block_MacroBlock(n);
194
195                 if (mbh == n) {
196                         /* this block is a macroblock header */
197                         set_Block_MacroBlock(nn, nn);
198                 } else {
199                         /* get the macro block header */
200                         ir_node *nmbh = get_new_node(mbh);
201                         assert(nmbh != NULL);
202                         set_Block_MacroBlock(nn, nmbh);
203                 }
204
205                 /* Don't copy Bad nodes. */
206                 j = 0;
207                 irn_arity = get_irn_arity(n);
208                 for (i = 0; i < irn_arity; i++) {
209                         if (! is_Bad(get_irn_n(n, i))) {
210                                 ir_node *pred = get_irn_n(n, i);
211                                 set_irn_n(nn, j, get_new_node(pred));
212                                 j++;
213                         }
214                 }
215                 /* repair the block visited flag from above misuse. Repair it in both
216                    graphs so that the old one can still be used. */
217                 set_Block_block_visited(nn, 0);
218                 set_Block_block_visited(n, 0);
219                 /* Local optimization could not merge two subsequent blocks if
220                    in array contained Bads.  Now it's possible.
221                    We don't call optimize_in_place as it requires
222                    that the fields in ir_graph are set properly. */
223                 if ((get_opt_control_flow_straightening()) &&
224                         (get_Block_n_cfgpreds(nn) == 1) &&
225                         is_Jmp(get_Block_cfgpred(nn, 0))) {
226                         ir_node *old = get_nodes_block(get_Block_cfgpred(nn, 0));
227                         if (nn == old) {
228                                 /* Jmp jumps into the block it is in -- deal self cycle. */
229                                 assert(is_Bad(get_new_node(get_irg_bad(current_ir_graph))));
230                                 exchange(nn, get_new_node(get_irg_bad(current_ir_graph)));
231                         } else {
232                                 exchange(nn, old);
233                         }
234                 }
235         } else if (is_Phi(n) && get_irn_arity(n) > 0) {
236                 /* Don't copy node if corresponding predecessor in block is Bad.
237                    The Block itself should not be Bad. */
238                 block = get_nodes_block(n);
239                 set_nodes_block(nn, get_new_node(block));
240                 j = 0;
241                 irn_arity = get_irn_arity(n);
242                 for (i = 0; i < irn_arity; i++) {
243                         if (! is_Bad(get_irn_n(block, i))) {
244                                 ir_node *pred = get_irn_n(n, i);
245                                 set_irn_n(nn, j, get_new_node(pred));
246                                 /*if (is_backedge(n, i)) set_backedge(nn, j);*/
247                                 j++;
248                         }
249                 }
250                 /* If the pre walker reached this Phi after the post walker visited the
251                    block block_visited is > 0. */
252                 set_Block_block_visited(get_nodes_block(n), 0);
253                 /* Compacting the Phi's ins might generate Phis with only one
254                    predecessor. */
255                 if (get_irn_arity(nn) == 1)
256                         exchange(nn, get_irn_n(nn, 0));
257         } else {
258                 irn_arity = get_irn_arity(n);
259                 for (i = -1; i < irn_arity; i++)
260                         set_irn_n(nn, i, get_new_node(get_irn_n(n, i)));
261         }
262         /* Now the new node is complete.  We can add it to the hash table for CSE.
263            @@@ inlining aborts if we identify End. Why? */
264         if (!is_End(nn))
265                 add_identities(current_ir_graph->value_table, nn);
266 }
267
268 /**
269  * Copies the graph recursively, compacts the keep-alives of the end node.
270  *
271  * @param irg           the graph to be copied
272  * @param copy_node_nr  If non-zero, the node number will be copied
273  */
274 static void copy_graph(ir_graph *irg, int copy_node_nr) {
275         ir_node *oe, *ne, *ob, *nb, *om, *nm; /* old end, new end, old bad, new bad, old NoMem, new NoMem */
276         ir_node *ka;      /* keep alive */
277         int i, irn_arity;
278         unsigned long vfl;
279
280         /* Some nodes must be copied by hand, sigh */
281         vfl = get_irg_visited(irg);
282         set_irg_visited(irg, vfl + 1);
283
284         oe = get_irg_end(irg);
285         mark_irn_visited(oe);
286         /* copy the end node by hand, allocate dynamic in array! */
287         ne = new_ir_node(get_irn_dbg_info(oe),
288                 irg,
289                 NULL,
290                 op_End,
291                 mode_X,
292                 -1,
293                 NULL);
294         /* Copy the attributes.  Well, there might be some in the future... */
295         copy_node_attr(oe, ne);
296         set_new_node(oe, ne);
297
298         /* copy the Bad node */
299         ob = get_irg_bad(irg);
300         mark_irn_visited(ob);
301         nb = new_ir_node(get_irn_dbg_info(ob),
302                 irg,
303                 NULL,
304                 op_Bad,
305                 mode_T,
306                 0,
307                 NULL);
308         copy_node_attr(ob, nb);
309         set_new_node(ob, nb);
310
311         /* copy the NoMem node */
312         om = get_irg_no_mem(irg);
313         mark_irn_visited(om);
314         nm = new_ir_node(get_irn_dbg_info(om),
315                 irg,
316                 NULL,
317                 op_NoMem,
318                 mode_M,
319                 0,
320                 NULL);
321         copy_node_attr(om, nm);
322         set_new_node(om, nm);
323
324         /* copy the live nodes */
325         set_irg_visited(irg, vfl);
326         irg_walk(get_nodes_block(oe), copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
327
328         /* Note: from yet, the visited flag of the graph is equal to vfl + 1 */
329
330         /* visit the anchors as well */
331         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
332                 ir_node *n = get_irg_anchor(irg, i);
333
334                 if (n && (get_irn_visited(n) <= vfl)) {
335                         set_irg_visited(irg, vfl);
336                         irg_walk(n, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
337                 }
338         }
339
340         /* copy_preds for the end node ... */
341         set_nodes_block(ne, get_new_node(get_nodes_block(oe)));
342
343         /*- ... and now the keep alives. -*/
344         /* First pick the not marked block nodes and walk them.  We must pick these
345            first as else we will oversee blocks reachable from Phis. */
346         irn_arity = get_End_n_keepalives(oe);
347         for (i = 0; i < irn_arity; i++) {
348                 ka = get_End_keepalive(oe, i);
349                 if (is_Block(ka)) {
350                         if (get_irn_visited(ka) <= vfl) {
351                                 /* We must keep the block alive and copy everything reachable */
352                                 set_irg_visited(irg, vfl);
353                                 irg_walk(ka, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
354                         }
355                         add_End_keepalive(ne, get_new_node(ka));
356                 }
357         }
358
359         /* Now pick other nodes.  Here we will keep all! */
360         irn_arity = get_End_n_keepalives(oe);
361         for (i = 0; i < irn_arity; i++) {
362                 ka = get_End_keepalive(oe, i);
363                 if (!is_Block(ka)) {
364                         if (get_irn_visited(ka) <= vfl) {
365                                 /* We didn't copy the node yet.  */
366                                 set_irg_visited(irg, vfl);
367                                 irg_walk(ka, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
368                         }
369                         add_End_keepalive(ne, get_new_node(ka));
370                 }
371         }
372
373         /* start block sometimes only reached after keep alives */
374         set_nodes_block(nb, get_new_node(get_nodes_block(ob)));
375         set_nodes_block(nm, get_new_node(get_nodes_block(om)));
376 }
377
378 /**
379  * Copies the graph reachable from current_ir_graph->end to the obstack
380  * in current_ir_graph and fixes the environment.
381  * Then fixes the fields in current_ir_graph containing nodes of the
382  * graph.
383  *
384  * @param copy_node_nr  If non-zero, the node number will be copied
385  */
386 static void
387 copy_graph_env(int copy_node_nr) {
388         ir_graph *irg = current_ir_graph;
389         ir_node *old_end, *new_anchor;
390         int i;
391
392         /* remove end_except and end_reg nodes */
393         old_end = get_irg_end(irg);
394         set_irg_end_except (irg, old_end);
395         set_irg_end_reg    (irg, old_end);
396
397         /* Not all nodes remembered in irg might be reachable
398            from the end node.  Assure their link is set to NULL, so that
399            we can test whether new nodes have been computed. */
400         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
401                 ir_node *n = get_irg_anchor(irg, i);
402                 if (n != NULL)
403                         set_new_node(n, NULL);
404         }
405         /* we use the block walk flag for removing Bads from Blocks ins. */
406         inc_irg_block_visited(irg);
407
408         /* copy the graph */
409         copy_graph(irg, copy_node_nr);
410
411         /* fix the anchor */
412         old_end    = get_irg_end(irg);
413         new_anchor = new_Anchor(irg);
414
415         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
416                 ir_node *n = get_irg_anchor(irg, i);
417                 if (n)
418                         set_irn_n(new_anchor, i, get_new_node(n));
419         }
420         free_End(old_end);
421         irg->anchor = new_anchor;
422
423         /* ensure the new anchor is placed in the endblock */
424         set_nodes_block(new_anchor, get_irg_end_block(irg));
425 }
426
427 /**
428  * Copies all reachable nodes to a new obstack.  Removes bad inputs
429  * from block nodes and the corresponding inputs from Phi nodes.
430  * Merges single exit blocks with single entry blocks and removes
431  * 1-input Phis.
432  * Adds all new nodes to a new hash table for CSE.  Does not
433  * perform CSE, so the hash table might contain common subexpressions.
434  */
435 void dead_node_elimination(ir_graph *irg) {
436         ir_graph *rem;
437 #ifdef INTERPROCEDURAL_VIEW
438         int rem_ipview = get_interprocedural_view();
439 #endif
440         struct obstack *graveyard_obst = NULL;
441         struct obstack *rebirth_obst   = NULL;
442
443         edges_deactivate(irg);
444
445         /* inform statistics that we started a dead-node elimination run */
446         hook_dead_node_elim(irg, 1);
447
448         /* Remember external state of current_ir_graph. */
449         rem = current_ir_graph;
450         current_ir_graph = irg;
451 #ifdef INTERPROCEDURAL_VIEW
452         set_interprocedural_view(0);
453 #endif
454
455         assert(get_irg_phase_state(irg) != phase_building);
456
457         /* Handle graph state */
458         free_callee_info(irg);
459         free_irg_outs(irg);
460         free_trouts();
461
462         /* @@@ so far we loose loops when copying */
463         free_loop_information(irg);
464
465         set_irg_doms_inconsistent(irg);
466
467         /* A quiet place, where the old obstack can rest in peace,
468            until it will be cremated. */
469         graveyard_obst = irg->obst;
470
471         /* A new obstack, where the reachable nodes will be copied to. */
472         rebirth_obst = xmalloc(sizeof(*rebirth_obst));
473         irg->obst = rebirth_obst;
474         obstack_init(irg->obst);
475         irg->last_node_idx = 0;
476
477         /* We also need a new value table for CSE */
478         del_identities(irg->value_table);
479         irg->value_table = new_identities();
480
481         /* Copy the graph from the old to the new obstack */
482         copy_graph_env(/*copy_node_nr=*/1);
483
484         /* Free memory from old unoptimized obstack */
485         obstack_free(graveyard_obst, 0);  /* First empty the obstack ... */
486         xfree(graveyard_obst);            /* ... then free it.           */
487
488         /* inform statistics that the run is over */
489         hook_dead_node_elim(irg, 0);
490
491         current_ir_graph = rem;
492 #ifdef INTERPROCEDURAL_VIEW
493         set_interprocedural_view(rem_ipview);
494 #endif
495 }
496
497 /**
498  * Relink bad predecessors of a block and store the old in array to the
499  * link field. This function is called by relink_bad_predecessors().
500  * The array of link field starts with the block operand at position 0.
501  * If block has bad predecessors, create a new in array without bad preds.
502  * Otherwise let in array untouched.
503  */
504 static void relink_bad_block_predecessors(ir_node *n, void *env) {
505         ir_node **new_in, *irn;
506         int i, new_irn_n, old_irn_arity, new_irn_arity = 0;
507         (void) env;
508
509         /* if link field of block is NULL, look for bad predecessors otherwise
510            this is already done */
511         if (is_Block(n) && get_irn_link(n) == NULL) {
512                 /* save old predecessors in link field (position 0 is the block operand)*/
513                 set_irn_link(n, get_irn_in(n));
514
515                 /* count predecessors without bad nodes */
516                 old_irn_arity = get_irn_arity(n);
517                 for (i = 0; i < old_irn_arity; i++)
518                         if (!is_Bad(get_irn_n(n, i)))
519                                 ++new_irn_arity;
520
521                 /* arity changing: set new predecessors without bad nodes */
522                 if (new_irn_arity < old_irn_arity) {
523                         /* Get new predecessor array. We do not resize the array, as we must
524                            keep the old one to update Phis. */
525                         new_in = NEW_ARR_D(ir_node *, current_ir_graph->obst, (new_irn_arity+1));
526
527                         /* set new predecessors in array */
528                         new_in[0] = NULL;
529                         new_irn_n = 1;
530                         for (i = 0; i < old_irn_arity; i++) {
531                                 irn = get_irn_n(n, i);
532                                 if (!is_Bad(irn)) {
533                                         new_in[new_irn_n] = irn;
534                                         is_backedge(n, i) ? set_backedge(n, new_irn_n-1) : set_not_backedge(n, new_irn_n-1);
535                                         ++new_irn_n;
536                                 }
537                         }
538                         /* ARR_SETLEN(int, n->attr.block.backedge, new_irn_arity); */
539                         ARR_SHRINKLEN(n->attr.block.backedge, new_irn_arity);
540                         n->in = new_in;
541                 } /* ir node has bad predecessors */
542         } /* Block is not relinked */
543 }
544
545 /**
546  * Relinks Bad predecessors from Blocks and Phis called by walker
547  * remove_bad_predecesors(). If n is a Block, call
548  * relink_bad_block_redecessors(). If n is a Phi-node, call also the relinking
549  * function of Phi's Block. If this block has bad predecessors, relink preds
550  * of the Phi-node.
551  */
552 static void relink_bad_predecessors(ir_node *n, void *env) {
553         ir_node *block, **old_in;
554         int i, old_irn_arity, new_irn_arity;
555
556         /* relink bad predecessors of a block */
557         if (is_Block(n))
558                 relink_bad_block_predecessors(n, env);
559
560         /* If Phi node relink its block and its predecessors */
561         if (is_Phi(n)) {
562                 /* Relink predecessors of phi's block */
563                 block = get_nodes_block(n);
564                 if (get_irn_link(block) == NULL)
565                         relink_bad_block_predecessors(block, env);
566
567                 old_in = (ir_node **)get_irn_link(block); /* Of Phi's Block */
568                 old_irn_arity = ARR_LEN(old_in);
569
570                 /* Relink Phi predecessors if count of predecessors changed */
571                 if (old_irn_arity != ARR_LEN(get_irn_in(block))) {
572                         /* set new predecessors in array
573                            n->in[0] remains the same block */
574                         new_irn_arity = 1;
575                         for(i = 1; i < old_irn_arity; i++)
576                                 if (!is_Bad(old_in[i])) {
577                                         n->in[new_irn_arity] = n->in[i];
578                                         is_backedge(n, i) ? set_backedge(n, new_irn_arity) : set_not_backedge(n, new_irn_arity);
579                                         ++new_irn_arity;
580                                 }
581
582                                 ARR_SETLEN(ir_node *, n->in, new_irn_arity);
583                                 ARR_SETLEN(int, n->attr.phi.u.backedge, new_irn_arity);
584                 }
585         } /* n is a Phi node */
586 }
587
588 /*
589  * Removes Bad Bad predecessors from Blocks and the corresponding
590  * inputs to Phi nodes as in dead_node_elimination but without
591  * copying the graph.
592  * On walking up set the link field to NULL, on walking down call
593  * relink_bad_predecessors() (This function stores the old in array
594  * to the link field and sets a new in array if arity of predecessors
595  * changes).
596  */
597 void remove_bad_predecessors(ir_graph *irg) {
598         panic("Fix backedge handling first");
599         irg_walk_graph(irg, firm_clear_link, relink_bad_predecessors, NULL);
600 }
601
602
603 /*
604    __                      _  __ __
605   (_     __    o     _    | \/  |_
606   __)|_| | \_/ | \_/(/_   |_/\__|__
607
608   The following stuff implements a facility that automatically patches
609   registered ir_node pointers to the new node when a dead node elimination occurs.
610 */
611
612 struct _survive_dce_t {
613         struct obstack obst;
614         pmap *places;
615         pmap *new_places;
616         hook_entry_t dead_node_elim;
617         hook_entry_t dead_node_elim_subst;
618 };
619
620 typedef struct _survive_dce_list_t {
621         struct _survive_dce_list_t *next;
622         ir_node **place;
623 } survive_dce_list_t;
624
625 static void dead_node_hook(void *context, ir_graph *irg, int start) {
626         survive_dce_t *sd = context;
627         (void) irg;
628
629         /* Create a new map before the dead node elimination is performed. */
630         if (start) {
631                 sd->new_places = pmap_create_ex(pmap_count(sd->places));
632         } else {
633                 /* Patch back all nodes if dead node elimination is over and something is to be done. */
634                 pmap_destroy(sd->places);
635                 sd->places     = sd->new_places;
636                 sd->new_places = NULL;
637         }
638 }
639
640 /**
641  * Hook called when dead node elimination replaces old by nw.
642  */
643 static void dead_node_subst_hook(void *context, ir_graph *irg, ir_node *old, ir_node *nw) {
644         survive_dce_t *sd = context;
645         survive_dce_list_t *list = pmap_get(sd->places, old);
646         (void) irg;
647
648         /* If the node is to be patched back, write the new address to all registered locations. */
649         if (list) {
650                 survive_dce_list_t *p;
651
652                 for (p = list; p; p = p->next)
653                         *(p->place) = nw;
654
655                 pmap_insert(sd->new_places, nw, list);
656         }
657 }
658
659 /**
660  * Make a new Survive DCE environment.
661  */
662 survive_dce_t *new_survive_dce(void) {
663         survive_dce_t *res = xmalloc(sizeof(res[0]));
664         obstack_init(&res->obst);
665         res->places     = pmap_create();
666         res->new_places = NULL;
667
668         res->dead_node_elim.hook._hook_dead_node_elim = dead_node_hook;
669         res->dead_node_elim.context                   = res;
670         res->dead_node_elim.next                      = NULL;
671
672         res->dead_node_elim_subst.hook._hook_dead_node_elim_subst = dead_node_subst_hook;
673         res->dead_node_elim_subst.context = res;
674         res->dead_node_elim_subst.next    = NULL;
675
676 #ifndef FIRM_ENABLE_HOOKS
677         assert(0 && "need hooks enabled");
678 #endif
679
680         register_hook(hook_dead_node_elim, &res->dead_node_elim);
681         register_hook(hook_dead_node_elim_subst, &res->dead_node_elim_subst);
682         return res;
683 }
684
685 /**
686  * Free a Survive DCE environment.
687  */
688 void free_survive_dce(survive_dce_t *sd) {
689         obstack_free(&sd->obst, NULL);
690         pmap_destroy(sd->places);
691         unregister_hook(hook_dead_node_elim, &sd->dead_node_elim);
692         unregister_hook(hook_dead_node_elim_subst, &sd->dead_node_elim_subst);
693         xfree(sd);
694 }
695
696 /**
697  * Register a node pointer to be patched upon DCE.
698  * When DCE occurs, the node pointer specified by @p place will be
699  * patched to the new address of the node it is pointing to.
700  *
701  * @param sd    The Survive DCE environment.
702  * @param place The address of the node pointer.
703  */
704 void survive_dce_register_irn(survive_dce_t *sd, ir_node **place) {
705         if (*place != NULL) {
706                 ir_node *irn      = *place;
707                 survive_dce_list_t *curr = pmap_get(sd->places, irn);
708                 survive_dce_list_t *nw   = obstack_alloc(&sd->obst, sizeof(nw[0]));
709
710                 nw->next  = curr;
711                 nw->place = place;
712
713                 pmap_insert(sd->places, irn, nw);
714         }
715 }
716
717 /*--------------------------------------------------------------------*/
718 /*  Functionality for inlining                                         */
719 /*--------------------------------------------------------------------*/
720
721 /**
722  * Copy node for inlineing.  Updates attributes that change when
723  * inlineing but not for dead node elimination.
724  *
725  * Copies the node by calling copy_node() and then updates the entity if
726  * it's a local one.  env must be a pointer of the frame type of the
727  * inlined procedure. The new entities must be in the link field of
728  * the entities.
729  */
730 static void copy_node_inline(ir_node *n, void *env) {
731         ir_node *nn;
732         ir_type *frame_tp = (ir_type *)env;
733
734         copy_node(n, NULL);
735         if (is_Sel(n)) {
736                 nn = get_new_node (n);
737                 assert(is_Sel(nn));
738                 if (get_entity_owner(get_Sel_entity(n)) == frame_tp) {
739                         set_Sel_entity(nn, get_entity_link(get_Sel_entity(n)));
740                 }
741         } else if (is_Block(n)) {
742                 nn = get_new_node (n);
743                 nn->attr.block.irg = current_ir_graph;
744         }
745 }
746
747 /**
748  * Copies new predecessors of old node and move constants to
749  * the Start Block.
750  */
751 static void copy_preds_inline(ir_node *n, void *env) {
752         ir_node *nn;
753
754         copy_preds(n, env);
755         nn = skip_Id(get_new_node(n));
756         if (is_irn_constlike(nn)) {
757                 /* move Constants into the start block */
758                 set_nodes_block(nn, get_irg_start_block(current_ir_graph));
759
760                 n = identify_remember(current_ir_graph->value_table, nn);
761                 if (nn != n) {
762                         exchange(nn, n);
763                 }
764         }
765 }
766
767 /**
768  * Walker: checks if P_value_arg_base is used.
769  */
770 static void find_addr(ir_node *node, void *env) {
771         int *allow_inline = env;
772         if (is_Proj(node) &&
773                         is_Start(get_Proj_pred(node)) &&
774                         get_Proj_proj(node) == pn_Start_P_value_arg_base) {
775                 *allow_inline = 0;
776         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
777                 /* From GCC:
778                  * Refuse to inline alloca call unless user explicitly forced so as this
779                  * may change program's memory overhead drastically when the function
780                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
781                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
782                  *
783                  * Sorryly this is true with our implementation also.
784                  * Moreover, we cannot differentiate between alloca() and VLA yet, so this
785                  * disables inlining of functions using VLA (with are completely save).
786                  *
787                  * 2 Solutions:
788                  * - add a flag to the Alloc node for "real" alloca() calls
789                  * - add a new Stack-Restore node at the end of a function using alloca()
790                  */
791                 *allow_inline = 0;
792         }
793 }
794
795 /**
796  * Check if we can inline a given call.
797  * Currently, we cannot inline two cases:
798  * - call with compound arguments
799  * - graphs that take the address of a parameter
800  *
801  * check these conditions here
802  */
803 static int can_inline(ir_node *call, ir_graph *called_graph) {
804         ir_type *call_type = get_Call_type(call);
805         int params, ress, i, res;
806         assert(is_Method_type(call_type));
807
808         params = get_method_n_params(call_type);
809         ress   = get_method_n_ress(call_type);
810
811         /* check parameters for compound arguments */
812         for (i = 0; i < params; ++i) {
813                 ir_type *p_type = get_method_param_type(call_type, i);
814
815                 if (is_compound_type(p_type))
816                         return 0;
817         }
818
819         /* check results for compound arguments */
820         for (i = 0; i < ress; ++i) {
821                 ir_type *r_type = get_method_res_type(call_type, i);
822
823                 if (is_compound_type(r_type))
824                         return 0;
825         }
826
827         res = 1;
828         irg_walk_graph(called_graph, find_addr, NULL, &res);
829
830         return res;
831 }
832
833 enum exc_mode {
834         exc_handler    = 0, /**< There is a handler. */
835         exc_to_end     = 1, /**< Branches to End. */
836         exc_no_handler = 2  /**< Exception handling not represented. */
837 };
838
839 /* Inlines a method at the given call site. */
840 int inline_method(ir_node *call, ir_graph *called_graph) {
841         ir_node             *pre_call;
842         ir_node             *post_call, *post_bl;
843         ir_node             *in[pn_Start_max];
844         ir_node             *end, *end_bl, *block;
845         ir_node             **res_pred;
846         ir_node             **cf_pred;
847         ir_node             **args_in;
848         ir_node             *ret, *phi;
849         int                 arity, n_ret, n_exc, n_res, i, n, j, rem_opt, irn_arity, n_params;
850         enum exc_mode       exc_handling;
851         ir_type             *called_frame, *curr_frame, *mtp, *ctp;
852         ir_entity           *ent;
853         ir_graph            *rem, *irg;
854         irg_inline_property prop = get_irg_inline_property(called_graph);
855
856         if (prop == irg_inline_forbidden)
857                 return 0;
858
859         ent = get_irg_entity(called_graph);
860
861         mtp = get_entity_type(ent);
862         ctp = get_Call_type(call);
863         if (get_method_n_params(mtp) > get_method_n_params(ctp)) {
864                 /* this is a bad feature of C: without a prototype, we can can call a function with less
865                 parameters than needed. Currently we don't support this, although it would be
866                 to use Unknown than. */
867                 return 0;
868         }
869
870         /* Argh, compiling C has some bad consequences:
871            the call type AND the method type might be different.
872            It is implementation defendant what happens in that case.
873            We support inlining, if the bitsize of the types matches AND
874            the same arithmetic is used. */
875         n_params = get_method_n_params(mtp);
876         for (i = n_params - 1; i >= 0; --i) {
877                 ir_type *param_tp = get_method_param_type(mtp, i);
878                 ir_type *arg_tp   = get_method_param_type(ctp, i);
879
880                 if (param_tp != arg_tp) {
881                         ir_mode *pmode = get_type_mode(param_tp);
882                         ir_mode *amode = get_type_mode(arg_tp);
883
884                         if (pmode == NULL || amode == NULL)
885                                 return 0;
886                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
887                                 return 0;
888                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
889                                 return 0;
890                         /* otherwise we can simply "reinterpret" the bits */
891                 }
892         }
893
894         irg = get_irn_irg(call);
895
896         /*
897          * We cannot inline a recursive call. The graph must be copied before
898          * the call the inline_method() using create_irg_copy().
899          */
900         if (called_graph == irg)
901                 return 0;
902
903         /*
904          * currently, we cannot inline two cases:
905          * - call with compound arguments
906          * - graphs that take the address of a parameter
907          */
908         if (! can_inline(call, called_graph))
909                 return 0;
910
911         rem = current_ir_graph;
912         current_ir_graph = irg;
913
914         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
915
916         /* --  Turn off optimizations, this can cause problems when allocating new nodes. -- */
917         rem_opt = get_opt_optimize();
918         set_optimize(0);
919
920         /* Handle graph state */
921         assert(get_irg_phase_state(irg) != phase_building);
922         assert(get_irg_pinned(irg) == op_pin_state_pinned);
923         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
924         set_irg_outs_inconsistent(irg);
925         set_irg_extblk_inconsistent(irg);
926         set_irg_doms_inconsistent(irg);
927         set_irg_loopinfo_inconsistent(irg);
928         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
929
930         /* -- Check preconditions -- */
931         assert(is_Call(call));
932
933         /* here we know we WILL inline, so inform the statistics */
934         hook_inline(call, called_graph);
935
936         /* -- Decide how to handle exception control flow: Is there a handler
937            for the Call node, or do we branch directly to End on an exception?
938            exc_handling:
939            0 There is a handler.
940            1 Branches to End.
941            2 Exception handling not represented in Firm. -- */
942         {
943                 ir_node *proj, *Mproj = NULL, *Xproj = NULL;
944                 for (proj = get_irn_link(call); proj; proj = get_irn_link(proj)) {
945                         long proj_nr = get_Proj_proj(proj);
946                         if (proj_nr == pn_Call_X_except) Xproj = proj;
947                         if (proj_nr == pn_Call_M_except) Mproj = proj;
948                 }
949                 if      (Mproj) { assert(Xproj); exc_handling = exc_handler; } /*  Mproj           */
950                 else if (Xproj) {                exc_handling = exc_to_end; } /* !Mproj &&  Xproj   */
951                 else            {                exc_handling = exc_no_handler; } /* !Mproj && !Xproj   */
952         }
953
954         /* create the argument tuple */
955         NEW_ARR_A(ir_type *, args_in, n_params);
956
957         block = get_nodes_block(call);
958         for (i = n_params - 1; i >= 0; --i) {
959                 ir_node *arg      = get_Call_param(call, i);
960                 ir_type *param_tp = get_method_param_type(mtp, i);
961                 ir_mode *mode     = get_type_mode(param_tp);
962
963                 if (mode != get_irn_mode(arg)) {
964                         arg = new_r_Conv(irg, block, arg, mode);
965                 }
966                 args_in[i] = arg;
967         }
968
969         /* --
970            the procedure and later replaces the Start node of the called graph.
971            Post_call is the old Call node and collects the results of the called
972            graph. Both will end up being a tuple.  -- */
973         post_bl = get_nodes_block(call);
974         set_irg_current_block(irg, post_bl);
975         /* XxMxPxPxPxT of Start + parameter of Call */
976         in[pn_Start_X_initial_exec]   = new_Jmp();
977         in[pn_Start_M]                = get_Call_mem(call);
978         in[pn_Start_P_frame_base]     = get_irg_frame(irg);
979         in[pn_Start_P_tls]            = get_irg_tls(irg);
980         in[pn_Start_T_args]           = new_Tuple(n_params, args_in);
981         /* in[pn_Start_P_value_arg_base] = ??? */
982         assert(pn_Start_P_value_arg_base == pn_Start_max - 1 && "pn_Start_P_value_arg_base not supported, fix");
983         pre_call = new_Tuple(pn_Start_max - 1, in);
984         post_call = call;
985
986         /* --
987            The new block gets the ins of the old block, pre_call and all its
988            predecessors and all Phi nodes. -- */
989         part_block(pre_call);
990
991         /* -- Prepare state for dead node elimination -- */
992         /* Visited flags in calling irg must be >= flag in called irg.
993            Else walker and arity computation will not work. */
994         if (get_irg_visited(irg) <= get_irg_visited(called_graph))
995                 set_irg_visited(irg, get_irg_visited(called_graph) + 1);
996         if (get_irg_block_visited(irg) < get_irg_block_visited(called_graph))
997                 set_irg_block_visited(irg, get_irg_block_visited(called_graph));
998         /* Set pre_call as new Start node in link field of the start node of
999            calling graph and pre_calls block as new block for the start block
1000            of calling graph.
1001            Further mark these nodes so that they are not visited by the
1002            copying. */
1003         set_irn_link(get_irg_start(called_graph), pre_call);
1004         set_irn_visited(get_irg_start(called_graph), get_irg_visited(irg));
1005         set_irn_link(get_irg_start_block(called_graph), get_nodes_block(pre_call));
1006         set_irn_visited(get_irg_start_block(called_graph), get_irg_visited(irg));
1007         set_irn_link(get_irg_bad(called_graph), get_irg_bad(irg));
1008         set_irn_visited(get_irg_bad(called_graph), get_irg_visited(irg));
1009
1010         /* Initialize for compaction of in arrays */
1011         inc_irg_block_visited(irg);
1012
1013         /* -- Replicate local entities of the called_graph -- */
1014         /* copy the entities. */
1015         called_frame = get_irg_frame_type(called_graph);
1016         curr_frame   = get_irg_frame_type(irg);
1017         for (i = 0, n = get_class_n_members(called_frame); i < n; ++i) {
1018                 ir_entity *new_ent, *old_ent;
1019                 old_ent = get_class_member(called_frame, i);
1020                 new_ent = copy_entity_own(old_ent, curr_frame);
1021                 set_entity_link(old_ent, new_ent);
1022         }
1023
1024         /* visited is > than that of called graph.  With this trick visited will
1025            remain unchanged so that an outer walker, e.g., searching the call nodes
1026             to inline, calling this inline will not visit the inlined nodes. */
1027         set_irg_visited(irg, get_irg_visited(irg)-1);
1028
1029         /* -- Performing dead node elimination inlines the graph -- */
1030         /* Copies the nodes to the obstack of current_ir_graph. Updates links to new
1031            entities. */
1032         irg_walk(get_irg_end(called_graph), copy_node_inline, copy_preds_inline,
1033                  get_irg_frame_type(called_graph));
1034
1035         /* Repair called_graph */
1036         set_irg_visited(called_graph, get_irg_visited(irg));
1037         set_irg_block_visited(called_graph, get_irg_block_visited(irg));
1038         set_Block_block_visited(get_irg_start_block(called_graph), 0);
1039
1040         /* -- Merge the end of the inlined procedure with the call site -- */
1041         /* We will turn the old Call node into a Tuple with the following
1042            predecessors:
1043            -1:  Block of Tuple.
1044            0: Phi of all Memories of Return statements.
1045            1: Jmp from new Block that merges the control flow from all exception
1046            predecessors of the old end block.
1047            2: Tuple of all arguments.
1048            3: Phi of Exception memories.
1049            In case the old Call directly branches to End on an exception we don't
1050            need the block merging all exceptions nor the Phi of the exception
1051            memories.
1052         */
1053
1054         /* -- Precompute some values -- */
1055         end_bl = get_new_node(get_irg_end_block(called_graph));
1056         end = get_new_node(get_irg_end(called_graph));
1057         arity = get_irn_arity(end_bl);    /* arity = n_exc + n_ret  */
1058         n_res = get_method_n_ress(get_Call_type(call));
1059
1060         res_pred = xmalloc(n_res * sizeof(*res_pred));
1061         cf_pred  = xmalloc(arity * sizeof(*res_pred));
1062
1063         set_irg_current_block(irg, post_bl); /* just to make sure */
1064
1065         /* -- archive keepalives -- */
1066         irn_arity = get_irn_arity(end);
1067         for (i = 0; i < irn_arity; i++) {
1068                 ir_node *ka = get_End_keepalive(end, i);
1069                 if (! is_Bad(ka))
1070                         add_End_keepalive(get_irg_end(irg), ka);
1071         }
1072
1073         /* The new end node will die.  We need not free as the in array is on the obstack:
1074            copy_node() only generated 'D' arrays. */
1075
1076         /* -- Replace Return nodes by Jump nodes. -- */
1077         n_ret = 0;
1078         for (i = 0; i < arity; i++) {
1079                 ir_node *ret;
1080                 ret = get_irn_n(end_bl, i);
1081                 if (is_Return(ret)) {
1082                         cf_pred[n_ret] = new_r_Jmp(irg, get_nodes_block(ret));
1083                         n_ret++;
1084                 }
1085         }
1086         set_irn_in(post_bl, n_ret, cf_pred);
1087
1088         /* -- Build a Tuple for all results of the method.
1089            Add Phi node if there was more than one Return.  -- */
1090         turn_into_tuple(post_call, pn_Call_max);
1091         /* First the Memory-Phi */
1092         n_ret = 0;
1093         for (i = 0; i < arity; i++) {
1094                 ret = get_irn_n(end_bl, i);
1095                 if (is_Return(ret)) {
1096                         cf_pred[n_ret] = get_Return_mem(ret);
1097                         n_ret++;
1098                 }
1099         }
1100         phi = new_Phi(n_ret, cf_pred, mode_M);
1101         set_Tuple_pred(call, pn_Call_M_regular, phi);
1102         /* Conserve Phi-list for further inlinings -- but might be optimized */
1103         if (get_nodes_block(phi) == post_bl) {
1104                 set_irn_link(phi, get_irn_link(post_bl));
1105                 set_irn_link(post_bl, phi);
1106         }
1107         /* Now the real results */
1108         if (n_res > 0) {
1109                 for (j = 0; j < n_res; j++) {
1110                         n_ret = 0;
1111                         for (i = 0; i < arity; i++) {
1112                                 ret = get_irn_n(end_bl, i);
1113                                 if (is_Return(ret)) {
1114                                         cf_pred[n_ret] = get_Return_res(ret, j);
1115                                         n_ret++;
1116                                 }
1117                         }
1118                         if (n_ret > 0)
1119                                 phi = new_Phi(n_ret, cf_pred, get_irn_mode(cf_pred[0]));
1120                         else
1121                                 phi = new_Bad();
1122                         res_pred[j] = phi;
1123                         /* Conserve Phi-list for further inlinings -- but might be optimized */
1124                         if (get_nodes_block(phi) == post_bl) {
1125                                 set_Phi_next(phi, get_Block_phis(post_bl));
1126                                 set_Block_phis(post_bl, phi);
1127                         }
1128                 }
1129                 set_Tuple_pred(call, pn_Call_T_result, new_Tuple(n_res, res_pred));
1130         } else {
1131                 set_Tuple_pred(call, pn_Call_T_result, new_Bad());
1132         }
1133         /* handle the regular call */
1134         set_Tuple_pred(call, pn_Call_X_regular, new_Jmp());
1135
1136         /* For now, we cannot inline calls with value_base */
1137         set_Tuple_pred(call, pn_Call_P_value_res_base, new_Bad());
1138
1139         /* Finally the exception control flow.
1140            We have two (three) possible situations:
1141            First if the Call branches to an exception handler: We need to add a Phi node to
1142            collect the memory containing the exception objects.  Further we need
1143            to add another block to get a correct representation of this Phi.  To
1144            this block we add a Jmp that resolves into the X output of the Call
1145            when the Call is turned into a tuple.
1146            Second the Call branches to End, the exception is not handled.  Just
1147            add all inlined exception branches to the End node.
1148            Third: there is no Exception edge at all. Handle as case two. */
1149         if (exc_handling == exc_handler) {
1150                 n_exc = 0;
1151                 for (i = 0; i < arity; i++) {
1152                         ir_node *ret, *irn;
1153                         ret = get_irn_n(end_bl, i);
1154                         irn = skip_Proj(ret);
1155                         if (is_fragile_op(irn) || is_Raise(irn)) {
1156                                 cf_pred[n_exc] = ret;
1157                                 ++n_exc;
1158                         }
1159                 }
1160                 if (n_exc > 0) {
1161                         new_Block(n_exc, cf_pred);      /* watch it: current_block is changed! */
1162                         set_Tuple_pred(call, pn_Call_X_except, new_Jmp());
1163                         /* The Phi for the memories with the exception objects */
1164                         n_exc = 0;
1165                         for (i = 0; i < arity; i++) {
1166                                 ir_node *ret;
1167                                 ret = skip_Proj(get_irn_n(end_bl, i));
1168                                 if (is_Call(ret)) {
1169                                         cf_pred[n_exc] = new_r_Proj(irg, get_nodes_block(ret), ret, mode_M, 3);
1170                                         n_exc++;
1171                                 } else if (is_fragile_op(ret)) {
1172                                         /* We rely that all cfops have the memory output at the same position. */
1173                                         cf_pred[n_exc] = new_r_Proj(irg, get_nodes_block(ret), ret, mode_M, 0);
1174                                         n_exc++;
1175                                 } else if (is_Raise(ret)) {
1176                                         cf_pred[n_exc] = new_r_Proj(irg, get_nodes_block(ret), ret, mode_M, 1);
1177                                         n_exc++;
1178                                 }
1179                         }
1180                         set_Tuple_pred(call, pn_Call_M_except, new_Phi(n_exc, cf_pred, mode_M));
1181                 } else {
1182                         set_Tuple_pred(call, pn_Call_X_except, new_Bad());
1183                         set_Tuple_pred(call, pn_Call_M_except, new_Bad());
1184                 }
1185         } else {
1186                 ir_node *main_end_bl;
1187                 int main_end_bl_arity;
1188                 ir_node **end_preds;
1189
1190                 /* assert(exc_handling == 1 || no exceptions. ) */
1191                 n_exc = 0;
1192                 for (i = 0; i < arity; i++) {
1193                         ir_node *ret = get_irn_n(end_bl, i);
1194                         ir_node *irn = skip_Proj(ret);
1195
1196                         if (is_fragile_op(irn) || is_Raise(irn)) {
1197                                 cf_pred[n_exc] = ret;
1198                                 n_exc++;
1199                         }
1200                 }
1201                 main_end_bl = get_irg_end_block(irg);
1202                 main_end_bl_arity = get_irn_arity(main_end_bl);
1203                 end_preds =  xmalloc((n_exc + main_end_bl_arity) * sizeof(*end_preds));
1204
1205                 for (i = 0; i < main_end_bl_arity; ++i)
1206                         end_preds[i] = get_irn_n(main_end_bl, i);
1207                 for (i = 0; i < n_exc; ++i)
1208                         end_preds[main_end_bl_arity + i] = cf_pred[i];
1209                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
1210                 set_Tuple_pred(call, pn_Call_X_except,  new_Bad());
1211                 set_Tuple_pred(call, pn_Call_M_except,  new_Bad());
1212                 free(end_preds);
1213         }
1214         free(res_pred);
1215         free(cf_pred);
1216
1217         /* --  Turn CSE back on. -- */
1218         set_optimize(rem_opt);
1219         current_ir_graph = rem;
1220
1221         return 1;
1222 }
1223
1224 /********************************************************************/
1225 /* Apply inlineing to small methods.                                */
1226 /********************************************************************/
1227
1228 static struct obstack  temp_obst;
1229
1230 /** Represents a possible inlinable call in a graph. */
1231 typedef struct _call_entry call_entry;
1232 struct _call_entry {
1233         ir_node    *call;      /**< the Call node */
1234         ir_graph   *callee;    /**< the callee IR-graph called here */
1235         call_entry *next;      /**< for linking the next one */
1236         int        loop_depth; /**< the loop depth of this call */
1237 };
1238
1239 /**
1240  * environment for inlining small irgs
1241  */
1242 typedef struct _inline_env_t {
1243         struct obstack obst;  /**< an obstack where call_entries are allocated on. */
1244         call_entry *head;     /**< the head of the call entry list */
1245         call_entry *tail;     /**< the tail of the call entry list */
1246 } inline_env_t;
1247
1248 /**
1249  * Returns the irg called from a Call node. If the irg is not
1250  * known, NULL is returned.
1251  *
1252  * @param call  the call node
1253  */
1254 static ir_graph *get_call_called_irg(ir_node *call) {
1255         ir_node *addr;
1256
1257         addr = get_Call_ptr(call);
1258         if (is_Global(addr)) {
1259                 ir_entity *ent = get_Global_entity(addr);
1260                 return get_entity_irg(ent);
1261         }
1262
1263         return NULL;
1264 }
1265
1266 /**
1267  * Walker: Collect all calls to known graphs inside a graph.
1268  */
1269 static void collect_calls(ir_node *call, void *env) {
1270         if (is_Call(call)) {
1271                 ir_graph *called_irg = get_call_called_irg(call);
1272
1273                 if (called_irg != NULL) {
1274                         /* The Call node calls a locally defined method.  Remember to inline. */
1275                         inline_env_t *ienv  = env;
1276                         call_entry   *entry = obstack_alloc(&ienv->obst, sizeof(*entry));
1277                         entry->call       = call;
1278                         entry->callee     = called_irg;
1279                         entry->next       = NULL;
1280                         entry->loop_depth = 0;
1281
1282                         if (ienv->tail == NULL)
1283                                 ienv->head = entry;
1284                         else
1285                                 ienv->tail->next = entry;
1286                         ienv->tail = entry;
1287                 }
1288         }
1289 }
1290
1291 /**
1292  * Inlines all small methods at call sites where the called address comes
1293  * from a Const node that references the entity representing the called
1294  * method.
1295  * The size argument is a rough measure for the code size of the method:
1296  * Methods where the obstack containing the firm graph is smaller than
1297  * size are inlined.
1298  */
1299 void inline_small_irgs(ir_graph *irg, int size) {
1300         ir_graph *rem = current_ir_graph;
1301         inline_env_t env;
1302         call_entry *entry;
1303
1304         current_ir_graph = irg;
1305         /* Handle graph state */
1306         assert(get_irg_phase_state(irg) != phase_building);
1307         free_callee_info(irg);
1308
1309         /* Find Call nodes to inline.
1310            (We can not inline during a walk of the graph, as inlineing the same
1311            method several times changes the visited flag of the walked graph:
1312            after the first inlineing visited of the callee equals visited of
1313            the caller.  With the next inlineing both are increased.) */
1314         obstack_init(&env.obst);
1315         env.head = env.tail = NULL;
1316         irg_walk_graph(irg, NULL, collect_calls, &env);
1317
1318         if (env.head != NULL) {
1319                 /* There are calls to inline */
1320                 collect_phiprojs(irg);
1321                 for (entry = env.head; entry != NULL; entry = entry->next) {
1322                         ir_graph *callee = entry->callee;
1323                         if (((_obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst)) < size) ||
1324                             (get_irg_inline_property(callee) >= irg_inline_forced)) {
1325                                 inline_method(entry->call, callee);
1326                         }
1327                 }
1328         }
1329         obstack_free(&env.obst, NULL);
1330         current_ir_graph = rem;
1331 }
1332
1333 /**
1334  * Environment for inlining irgs.
1335  */
1336 typedef struct {
1337         int n_nodes;             /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
1338         int n_blocks;            /**< Number of Blocks in graph without Start and End block. */
1339         int n_nodes_orig;        /**< for statistics */
1340         int n_call_nodes;        /**< Number of Call nodes in the graph. */
1341         int n_call_nodes_orig;   /**< for statistics */
1342         int n_callers;           /**< Number of known graphs that call this graphs. */
1343         int n_callers_orig;      /**< for statistics */
1344         unsigned got_inline:1;   /**< Set, if at least one call inside this graph was inlined. */
1345         unsigned local_vars:1;   /**< Set, if a inlined function gets the address of an inlined variable. */
1346         unsigned recursive:1;    /**< Set, if this function is self recursive. */
1347         call_entry *call_head;   /**< The head of the list of all call nodes in this graph. */
1348         call_entry *call_tail;   /**< The tail of the list of all call nodes in this graph .*/
1349         unsigned *local_weights; /**< Once allocated, the beneficial weight for transmitting local addresses. */
1350 } inline_irg_env;
1351
1352 /**
1353  * Allocate a new environment for inlining.
1354  */
1355 static inline_irg_env *alloc_inline_irg_env(void) {
1356         inline_irg_env *env    = obstack_alloc(&temp_obst, sizeof(*env));
1357         env->n_nodes           = -2; /* do not count count Start, End */
1358         env->n_blocks          = -2; /* do not count count Start, End Block */
1359         env->n_nodes_orig      = -2; /* do not count Start, End */
1360         env->call_head         = NULL;
1361         env->call_tail         = NULL;
1362         env->n_call_nodes      = 0;
1363         env->n_call_nodes_orig = 0;
1364         env->n_callers         = 0;
1365         env->n_callers_orig    = 0;
1366         env->got_inline        = 0;
1367         env->local_vars        = 0;
1368         env->recursive         = 0;
1369         env->local_weights     = NULL;
1370         return env;
1371 }
1372
1373 typedef struct walker_env {
1374         inline_irg_env *x;     /**< the inline environment */
1375         call_entry *last_call; /**< points to the last inserted call */
1376         char ignore_runtime;   /**< the ignore runtime flag */
1377         char ignore_callers;   /**< if set, do change callers data */
1378 } wenv_t;
1379
1380 /**
1381  * post-walker: collect all calls in the inline-environment
1382  * of a graph and sum some statistics.
1383  */
1384 static void collect_calls2(ir_node *call, void *ctx) {
1385         wenv_t         *env = ctx;
1386         inline_irg_env *x = env->x;
1387         ir_opcode      code = get_irn_opcode(call);
1388         ir_graph       *callee;
1389         call_entry     *entry;
1390
1391         /* count meaningful nodes in irg */
1392         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
1393                 if (code != iro_Block) {
1394                         ++x->n_nodes;
1395                         ++x->n_nodes_orig;
1396                 } else {
1397                         ++x->n_blocks;
1398                 }
1399         }
1400
1401         if (code != iro_Call) return;
1402
1403         /* check, if it's a runtime call */
1404         if (env->ignore_runtime) {
1405                 ir_node *symc = get_Call_ptr(call);
1406
1407                 if (is_Global(symc)) {
1408                         ir_entity *ent = get_Global_entity(symc);
1409
1410                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
1411                                 return;
1412                 }
1413         }
1414
1415         /* collect all call nodes */
1416         ++x->n_call_nodes;
1417         ++x->n_call_nodes_orig;
1418
1419         callee = get_call_called_irg(call);
1420         if (callee != NULL) {
1421                 if (! env->ignore_callers) {
1422                         inline_irg_env *callee_env = get_irg_link(callee);
1423                         /* count all static callers */
1424                         ++callee_env->n_callers;
1425                         ++callee_env->n_callers_orig;
1426                 }
1427                 if (callee == current_ir_graph)
1428                         x->recursive = 1;
1429
1430                 /* link it in the list of possible inlinable entries */
1431                 entry = obstack_alloc(&temp_obst, sizeof(*entry));
1432                 entry->call       = call;
1433                 entry->callee     = callee;
1434                 entry->next       = NULL;
1435                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
1436
1437                 /* note: we use call_tail here as a pointer to the last inserted */
1438                 if (x->call_head == NULL) {
1439                         x->call_head = entry;
1440                 } else {
1441                         if (entry->loop_depth == env->last_call->loop_depth) {
1442                                 /* same depth as the last one, enqueue after it */
1443                                 entry->next          = env->last_call->next;
1444                                 env->last_call->next = entry;
1445                         } else if (entry->loop_depth > x->call_head->loop_depth) {
1446                                 /* put first */
1447                                 entry->next  = x->call_head;
1448                                 x->call_head = entry;
1449                         } else {
1450                                 /* search the insertion point */
1451                                 call_entry *p;
1452
1453                                 for (p = x->call_head; p->next != NULL; p = p->next)
1454                                         if (entry->loop_depth > p->next->loop_depth)
1455                                                 break;
1456                                 entry->next = p->next;
1457                                 p->next     = entry;
1458                         }
1459                 }
1460                 env->last_call = entry;
1461                 if (entry->next == NULL) {
1462                         /* keep tail up to date */
1463                         x->call_tail = entry;
1464                 }
1465         }
1466 }
1467
1468 /**
1469  * Returns TRUE if the number of callers is 0 in the irg's environment,
1470  * hence this irg is a leave.
1471  */
1472 INLINE static int is_leave(ir_graph *irg) {
1473         inline_irg_env *env = get_irg_link(irg);
1474         return env->n_call_nodes == 0;
1475 }
1476
1477 /**
1478  * Returns TRUE if the number of nodes in the callee is
1479  * smaller then size in the irg's environment.
1480  */
1481 INLINE static int is_smaller(ir_graph *callee, int size) {
1482         inline_irg_env *env = get_irg_link(callee);
1483         return env->n_nodes < size;
1484 }
1485
1486 /**
1487  * Append the nodes of the list src to the nodes of the list in environment dst.
1488  */
1489 static void append_call_list(inline_irg_env *dst, call_entry *src) {
1490         call_entry *entry, *nentry;
1491
1492         /* Note that the src list points to Call nodes in the inlined graph, but
1493            we need Call nodes in our graph. Luckily the inliner leaves this information
1494            in the link field. */
1495         for (entry = src; entry != NULL; entry = entry->next) {
1496                 nentry = obstack_alloc(&temp_obst, sizeof(*nentry));
1497                 nentry->call         = get_irn_link(entry->call);
1498                 nentry->callee       = entry->callee;
1499                 nentry->next         = NULL;
1500                 nentry->loop_depth   = entry->loop_depth;
1501                 dst->call_tail->next = nentry;
1502                 dst->call_tail       = nentry;
1503         }
1504 }
1505
1506 /**
1507  * Add the nodes of the list src in front to the nodes of the list dst.
1508  */
1509 static call_entry *replace_entry_by_call_list(call_entry *dst, call_entry *src) {
1510         call_entry *entry, *nentry, *head, *tail;
1511
1512         /* Note that the src list points to Call nodes in the inlined graph, but
1513            we need Call nodes in our graph. Luckily the inliner leaves this information
1514            in the link field. */
1515         head = tail = NULL;
1516         for (entry = src; entry != NULL; entry = entry->next) {
1517                 nentry = obstack_alloc(&temp_obst, sizeof(*nentry));
1518                 nentry->call         = get_irn_link(entry->call);
1519                 nentry->callee       = entry->callee;
1520                 nentry->next         = NULL;
1521                 nentry->loop_depth   = entry->loop_depth + dst->loop_depth;
1522                 if (head == NULL)
1523                         head = nentry;
1524                 else
1525                         tail->next = nentry;
1526                 tail = nentry;
1527         }
1528         /* skip the head of dst */
1529         if (head != NULL) {
1530                 tail->next = dst->next;
1531         } else {
1532                 head = dst->next;
1533         }
1534         return head;
1535 }
1536
1537 /*
1538  * Inlines small leave methods at call sites where the called address comes
1539  * from a Const node that references the entity representing the called
1540  * method.
1541  * The size argument is a rough measure for the code size of the method:
1542  * Methods where the obstack containing the firm graph is smaller than
1543  * size are inlined.
1544  */
1545 void inline_leave_functions(int maxsize, int leavesize, int size, int ignore_runtime) {
1546         inline_irg_env   *env;
1547         ir_graph         *irg;
1548         int              i, n_irgs;
1549         ir_graph         *rem;
1550         int              did_inline;
1551         wenv_t           wenv;
1552         call_entry       *entry, *tail;
1553         const call_entry *centry;
1554         pmap             *copied_graphs;
1555         pmap_entry       *pm_entry;
1556
1557         rem = current_ir_graph;
1558         obstack_init(&temp_obst);
1559
1560         /* a map for the copied graphs, used to inline recursive calls */
1561         copied_graphs = pmap_create();
1562
1563         /* extend all irgs by a temporary data structure for inlining. */
1564         n_irgs = get_irp_n_irgs();
1565         for (i = 0; i < n_irgs; ++i)
1566                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
1567
1568         /* Precompute information in temporary data structure. */
1569         wenv.ignore_runtime = ignore_runtime;
1570         wenv.ignore_callers = 0;
1571         for (i = 0; i < n_irgs; ++i) {
1572                 ir_graph *irg = get_irp_irg(i);
1573
1574                 assert(get_irg_phase_state(irg) != phase_building);
1575                 free_callee_info(irg);
1576
1577                 assure_cf_loop(irg);
1578                 wenv.x = get_irg_link(irg);
1579                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1580         }
1581
1582         /* -- and now inline. -- */
1583
1584         /* Inline leaves recursively -- we might construct new leaves. */
1585         do {
1586                 did_inline = 0;
1587
1588                 for (i = 0; i < n_irgs; ++i) {
1589                         ir_node *call;
1590                         int phiproj_computed = 0;
1591
1592                         current_ir_graph = get_irp_irg(i);
1593                         env = (inline_irg_env *)get_irg_link(current_ir_graph);
1594
1595                         tail = NULL;
1596                         for (entry = env->call_head; entry != NULL; entry = entry->next) {
1597                                 ir_graph *callee;
1598
1599                                 if (env->n_nodes > maxsize) break;
1600
1601                                 call   = entry->call;
1602                                 callee = entry->callee;
1603
1604                                 if (is_leave(callee) && (
1605                                     is_smaller(callee, leavesize) || (get_irg_inline_property(callee) >= irg_inline_forced))) {
1606                                         if (!phiproj_computed) {
1607                                                 phiproj_computed = 1;
1608                                                 collect_phiprojs(current_ir_graph);
1609                                         }
1610                                         did_inline = inline_method(call, callee);
1611
1612                                         if (did_inline) {
1613                                                 inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1614
1615                                                 /* was inlined, must be recomputed */
1616                                                 phiproj_computed = 0;
1617
1618                                                 /* Do some statistics */
1619                                                 env->got_inline = 1;
1620                                                 --env->n_call_nodes;
1621                                                 env->n_nodes += callee_env->n_nodes;
1622                                                 --callee_env->n_callers;
1623
1624                                                 /* remove this call from the list */
1625                                                 if (tail != NULL)
1626                                                         tail->next = entry->next;
1627                                                 else
1628                                                         env->call_head = entry->next;
1629                                                 continue;
1630                                         }
1631                                 }
1632                                 tail = entry;
1633                         }
1634                         env->call_tail = tail;
1635                 }
1636         } while (did_inline);
1637
1638         /* inline other small functions. */
1639         for (i = 0; i < n_irgs; ++i) {
1640                 ir_node *call;
1641                 int phiproj_computed = 0;
1642
1643                 current_ir_graph = get_irp_irg(i);
1644                 env = (inline_irg_env *)get_irg_link(current_ir_graph);
1645
1646                 /* note that the list of possible calls is updated during the process */
1647                 tail = NULL;
1648                 for (entry = env->call_head; entry != NULL; entry = entry->next) {
1649                         ir_graph   *callee;
1650                         pmap_entry *e;
1651
1652                         call   = entry->call;
1653                         callee = entry->callee;
1654
1655                         e = pmap_find(copied_graphs, callee);
1656                         if (e != NULL) {
1657                                 /*
1658                                  * Remap callee if we have a copy.
1659                                  * FIXME: Should we do this only for recursive Calls ?
1660                                  */
1661                                 callee = e->value;
1662                         }
1663
1664                         if (((is_smaller(callee, size) && (env->n_nodes < maxsize)) ||    /* small function */
1665                                 (get_irg_inline_property(callee) >= irg_inline_forced))) {
1666                                 if (current_ir_graph == callee) {
1667                                         /*
1668                                          * Recursive call: we cannot directly inline because we cannot walk
1669                                          * the graph and change it. So we have to make a copy of the graph
1670                                          * first.
1671                                          */
1672
1673                                         inline_irg_env *callee_env;
1674                                         ir_graph       *copy;
1675
1676                                         /*
1677                                          * No copy yet, create one.
1678                                          * Note that recursive methods are never leaves, so it is sufficient
1679                                          * to test this condition here.
1680                                          */
1681                                         copy = create_irg_copy(callee);
1682
1683                                         /* create_irg_copy() destroys the Proj links, recompute them */
1684                                         phiproj_computed = 0;
1685
1686                                         /* allocate new environment */
1687                                         callee_env = alloc_inline_irg_env();
1688                                         set_irg_link(copy, callee_env);
1689
1690                                         assure_cf_loop(copy);
1691                                         wenv.x              = callee_env;
1692                                         wenv.ignore_callers = 1;
1693                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1694
1695                                         /*
1696                                          * Enter the entity of the original graph. This is needed
1697                                          * for inline_method(). However, note that ent->irg still points
1698                                          * to callee, NOT to copy.
1699                                          */
1700                                         set_irg_entity(copy, get_irg_entity(callee));
1701
1702                                         pmap_insert(copied_graphs, callee, copy);
1703                                         callee = copy;
1704
1705                                         /* we have only one caller: the original graph */
1706                                         callee_env->n_callers      = 1;
1707                                         callee_env->n_callers_orig = 1;
1708                                 }
1709                                 if (! phiproj_computed) {
1710                                         phiproj_computed = 1;
1711                                         collect_phiprojs(current_ir_graph);
1712                                 }
1713                                 did_inline = inline_method(call, callee);
1714                                 if (did_inline) {
1715                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1716
1717                                         /* was inlined, must be recomputed */
1718                                         phiproj_computed = 0;
1719
1720                                         /* callee was inline. Append it's call list. */
1721                                         env->got_inline = 1;
1722                                         --env->n_call_nodes;
1723                                         append_call_list(env, callee_env->call_head);
1724                                         env->n_call_nodes += callee_env->n_call_nodes;
1725                                         env->n_nodes += callee_env->n_nodes;
1726                                         --callee_env->n_callers;
1727
1728                                         /* after we have inlined callee, all called methods inside callee
1729                                            are now called once more */
1730                                         for (centry = callee_env->call_head; centry != NULL; centry = centry->next) {
1731                                                 inline_irg_env *penv = get_irg_link(centry->callee);
1732                                                 ++penv->n_callers;
1733                                         }
1734
1735                                         /* remove this call from the list */
1736                                         if (tail != NULL)
1737                                                 tail->next = entry->next;
1738                                         else
1739                                                 env->call_head = entry->next;
1740                                         continue;
1741                                 }
1742                         }
1743                         tail = entry;
1744                 }
1745                 env->call_tail = tail;
1746         }
1747
1748         for (i = 0; i < n_irgs; ++i) {
1749                 irg = get_irp_irg(i);
1750                 env = (inline_irg_env *)get_irg_link(irg);
1751
1752                 if (env->got_inline) {
1753                         optimize_graph_df(irg);
1754                         optimize_cf(irg);
1755                 }
1756                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1757                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1758                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1759                         env->n_callers_orig, env->n_callers,
1760                         get_entity_name(get_irg_entity(irg))));
1761                 }
1762         }
1763
1764         /* kill the copied graphs: we don't need them anymore */
1765         foreach_pmap(copied_graphs, pm_entry) {
1766                 ir_graph *copy = pm_entry->value;
1767
1768                 /* reset the entity, otherwise it will be deleted in the next step ... */
1769                 set_irg_entity(copy, NULL);
1770                 free_ir_graph(copy);
1771         }
1772         pmap_destroy(copied_graphs);
1773
1774         obstack_free(&temp_obst, NULL);
1775         current_ir_graph = rem;
1776 }
1777
1778 /**
1779  * Calculate the parameter weights for transmitting the address of a local variable.
1780  */
1781 static unsigned calc_method_local_weight(ir_node *arg) {
1782         int      i, j, k;
1783         unsigned v, weight = 0;
1784
1785         for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
1786                 ir_node *succ = get_irn_out(arg, i);
1787
1788                 switch (get_irn_opcode(succ)) {
1789                 case iro_Load:
1790                 case iro_Store:
1791                         /* Loads and Store can be removed */
1792                         weight += 3;
1793                         break;
1794                 case iro_Sel:
1795                         /* check if all args are constant */
1796                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1797                                 ir_node *idx = get_Sel_index(succ, j);
1798                                 if (! is_Const(idx))
1799                                         return 0;
1800                         }
1801                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1802                            some unsupported node. */
1803                         v = calc_method_local_weight(succ);
1804                         if (v == 0)
1805                                 return 0;
1806                         /* we can kill one Sel with constant indexes, this is cheap */
1807                         weight += v + 1;
1808                         break;
1809                 case iro_Id:
1810                         /* when looking backward we might find Id nodes */
1811                         weight += calc_method_local_weight(succ);
1812                         break;
1813                 case iro_Tuple:
1814                         /* unoptimized tuple */
1815                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1816                                 ir_node *pred = get_Tuple_pred(succ, j);
1817                                 if (pred == arg) {
1818                                         /* look for Proj(j) */
1819                                         for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
1820                                                 ir_node *succ_succ = get_irn_out(succ, k);
1821                                                 if (is_Proj(succ_succ)) {
1822                                                         if (get_Proj_proj(succ_succ) == j) {
1823                                                                 /* found */
1824                                                                 weight += calc_method_local_weight(succ_succ);
1825                                                         }
1826                                                 } else {
1827                                                         /* this should NOT happen */
1828                                                         return 0;
1829                                                 }
1830                                         }
1831                                 }
1832                         }
1833                         break;
1834                 default:
1835                         /* any other node: unsupported yet or bad. */
1836                         return 0;
1837                 }
1838         }
1839         return weight;
1840 }
1841
1842 /**
1843  * Calculate the parameter weights for transmitting the address of a local variable.
1844  */
1845 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg) {
1846         ir_entity *ent = get_irg_entity(irg);
1847         ir_type  *mtp;
1848         int      nparams, i, proj_nr;
1849         ir_node  *irg_args, *arg;
1850
1851         mtp      = get_entity_type(ent);
1852         nparams  = get_method_n_params(mtp);
1853
1854         /* allocate a new array. currently used as 'analysed' flag */
1855         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1856
1857         /* If the method haven't parameters we have nothing to do. */
1858         if (nparams <= 0)
1859                 return;
1860
1861         assure_irg_outs(irg);
1862         irg_args = get_irg_args(irg);
1863         for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
1864                 arg     = get_irn_out(irg_args, i);
1865                 proj_nr = get_Proj_proj(arg);
1866                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1867         }
1868 }
1869
1870 /**
1871  * Calculate the benefice for transmitting an local variable address.
1872  * After inlining, the local variable might be transformed into a
1873  * SSA variable by scalar_replacement().
1874  */
1875 static unsigned get_method_local_adress_weight(ir_graph *callee, int pos) {
1876         inline_irg_env *env = get_irg_link(callee);
1877
1878         if (env->local_weights != NULL) {
1879                 if (pos < ARR_LEN(env->local_weights))
1880                         return env->local_weights[pos];
1881                 return 0;
1882         }
1883
1884         analyze_irg_local_weights(env, callee);
1885
1886         if (pos < ARR_LEN(env->local_weights))
1887                 return env->local_weights[pos];
1888         return 0;
1889 }
1890
1891 /**
1892  * Calculate a benefice value for inlining the given call.
1893  *
1894  * @param call       the call node we have to inspect
1895  * @param callee     the called graph
1896  * @param local_adr  set after return if an address of a local variable is
1897  *                   transmitted as a parameter
1898  */
1899 static int calc_inline_benefice(ir_node *call, ir_graph *callee, unsigned *local_adr) {
1900         ir_entity *ent = get_irg_entity(callee);
1901         ir_node   *frame_ptr;
1902         ir_type   *mtp;
1903         int       weight = 0;
1904         int       i, n_params, all_const;
1905         unsigned  cc, v;
1906
1907         inline_irg_env *curr_env, *callee_env;
1908
1909         if (get_entity_additional_properties(ent) & mtp_property_noreturn) {
1910                 /* do NOT inline noreturn calls */
1911                 return INT_MIN;
1912         }
1913
1914         /* costs for every passed parameter */
1915         n_params = get_Call_n_params(call);
1916         mtp      = get_entity_type(ent);
1917         cc       = get_method_calling_convention(mtp);
1918         if (cc & cc_reg_param) {
1919                 /* register parameter, smaller costs for register parameters */
1920                 int max_regs = cc & ~cc_bits;
1921
1922                 if (max_regs < n_params)
1923                         weight += max_regs * 2 + (n_params - max_regs) * 5;
1924                 else
1925                         weight += n_params * 2;
1926         } else {
1927                 /* parameters are passed an stack */
1928                 weight += 5 * n_params;
1929         }
1930
1931         /* constant parameters improve the benefice */
1932         frame_ptr = get_irg_frame(current_ir_graph);
1933         all_const = 1;
1934         for (i = 0; i < n_params; ++i) {
1935                 ir_node *param = get_Call_param(call, i);
1936
1937                 if (is_Const(param)) {
1938                         weight += get_method_param_weight(ent, i);
1939                 } else {
1940                         all_const = 0;
1941                         if (is_SymConst(param))
1942                                 weight += get_method_param_weight(ent, i);
1943                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
1944                                 /*
1945                                  * An address of a local variable is transmitted. After inlining,
1946                                  * scalar_replacement might be able to remove the local variable,
1947                                  * so honor this.
1948                                  */
1949                                 v = get_method_local_adress_weight(callee, i);
1950                                 weight += v;
1951                                 if (v > 0)
1952                                         *local_adr = 1;
1953                         }
1954                 }
1955         }
1956
1957         callee_env = get_irg_link(callee);
1958         if (get_entity_visibility(ent) == visibility_local &&
1959             callee_env->n_callers_orig == 1 &&
1960             callee != current_ir_graph) {
1961                 /* we are the only caller, give big bonus */
1962                 weight += 5000;
1963         }
1964
1965         /* do not inline big functions */
1966         weight -= callee_env->n_nodes;
1967
1968         /* reduce the benefice if the current function is already big */
1969         curr_env = get_irg_link(current_ir_graph);
1970         weight -= curr_env->n_nodes / 50;
1971
1972         /* give a bonus for functions with one block */
1973         if (callee_env->n_blocks == 1)
1974                 weight = weight * 3 / 2;
1975
1976         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
1977         else if (callee_env->n_nodes < 20 && !callee_env->recursive)
1978                 weight += 5000;
1979
1980         /* and finally for leaves: they do not increase the register pressure
1981            because of callee safe registers */
1982         else if (callee_env->n_call_nodes == 0)
1983                 weight += 25;
1984
1985         /*
1986          * Reduce the weight for recursive function IFF not all arguments are const.
1987          * inlining recursive functions is rarely good.
1988          */
1989         if (callee_env->recursive && !all_const)
1990                 weight -= 500;
1991
1992         /*
1993          * All arguments constant is probably a good sign, give an extra bonus
1994          */
1995         if (all_const)
1996                 weight += 100;
1997
1998         return weight;
1999 }
2000
2001 /**
2002  * Heuristic inliner. Calculates a benefice value for every call and inlines
2003  * those calls with a value higher than the threshold.
2004  */
2005 void inline_functions(int maxsize, int inline_threshold) {
2006         inline_irg_env   *env;
2007         int              i, n_irgs;
2008         ir_graph         *rem;
2009         int              did_inline;
2010         wenv_t           wenv;
2011         call_entry       *curr_call, **last_call;
2012         const call_entry *centry;
2013         pmap             *copied_graphs;
2014         pmap_entry       *pm_entry;
2015
2016         rem = current_ir_graph;
2017         obstack_init(&temp_obst);
2018
2019         /* a map for the copied graphs, used to inline recursive calls */
2020         copied_graphs = pmap_create();
2021
2022         /* extend all irgs by a temporary data structure for inlining. */
2023         n_irgs = get_irp_n_irgs();
2024         for (i = 0; i < n_irgs; ++i)
2025                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
2026
2027         /* Precompute information in temporary data structure. */
2028         wenv.ignore_runtime = 0;
2029         wenv.ignore_callers = 0;
2030         for (i = 0; i < n_irgs; ++i) {
2031                 ir_graph *irg = get_irp_irg(i);
2032
2033                 assert(get_irg_phase_state(irg) != phase_building);
2034                 free_callee_info(irg);
2035
2036                 wenv.x         = get_irg_link(irg);
2037                 wenv.last_call = NULL;
2038                 assure_cf_loop(irg);
2039                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
2040         }
2041
2042         /* -- and now inline. -- */
2043         for (i = 0; i < n_irgs; ++i) {
2044                 int      phiproj_computed = 0;
2045                 ir_node  *call;
2046                 ir_graph *irg = get_irp_irg(i);
2047
2048                 current_ir_graph = irg;
2049                 env = get_irg_link(irg);
2050
2051                 /* note that the list of possible calls is updated during the process */
2052                 last_call = &env->call_head;
2053                 for (curr_call = env->call_head; curr_call != NULL;) {
2054                         ir_graph   *callee;
2055                         pmap_entry *e;
2056                         int        benefice;
2057                         unsigned   local_adr;
2058
2059                         if (env->n_nodes > maxsize) break;
2060
2061                         call   = curr_call->call;
2062                         callee = curr_call->callee;
2063
2064                         e = pmap_find(copied_graphs, callee);
2065                         if (e != NULL) {
2066                                 /*
2067                                 * Remap callee if we have a copy.
2068                                 * FIXME: Should we do this only for recursive Calls ?
2069                                 */
2070                                 callee = e->value;
2071                         }
2072
2073                         /* calculate the benefice on the original call to prevent excessive inlining */
2074                         local_adr = 0;
2075                         benefice = calc_inline_benefice(call, callee, &local_adr);
2076                         DB((dbg, LEVEL_2, "In %+F Call %+F has benefice %d\n", irg, callee, benefice));
2077
2078                         if (benefice > -inline_threshold ||
2079                                 (get_irg_inline_property(callee) >= irg_inline_forced)) {
2080                                 if (current_ir_graph == callee) {
2081                                         /*
2082                                          * Recursive call: we cannot directly inline because we cannot walk
2083                                          * the graph and change it. So we have to make a copy of the graph
2084                                          * first.
2085                                          */
2086
2087                                         inline_irg_env *callee_env;
2088                                         ir_graph       *copy;
2089
2090                                         /*
2091                                          * No copy yet, create one.
2092                                          * Note that recursive methods are never leaves, so it is sufficient
2093                                          * to test this condition here.
2094                                          */
2095                                         copy = create_irg_copy(callee);
2096
2097                                         /* create_irg_copy() destroys the Proj links, recompute them */
2098                                         phiproj_computed = 0;
2099
2100                                         /* allocate new environment */
2101                                         callee_env = alloc_inline_irg_env();
2102                                         set_irg_link(copy, callee_env);
2103
2104                                         assure_cf_loop(copy);
2105                                         wenv.x              = callee_env;
2106                                         wenv.ignore_callers = 1;
2107                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
2108
2109                                         /*
2110                                          * Enter the entity of the original graph. This is needed
2111                                          * for inline_method(). However, note that ent->irg still points
2112                                          * to callee, NOT to copy.
2113                                          */
2114                                         set_irg_entity(copy, get_irg_entity(callee));
2115
2116                                         pmap_insert(copied_graphs, callee, copy);
2117                                         callee = copy;
2118
2119                                         /* we have only one caller: the original graph */
2120                                         callee_env->n_callers      = 1;
2121                                         callee_env->n_callers_orig = 1;
2122                                 }
2123                                 if (! phiproj_computed) {
2124                                         phiproj_computed = 1;
2125                                         collect_phiprojs(current_ir_graph);
2126                                 }
2127                                 did_inline = inline_method(call, callee);
2128                                 if (did_inline) {
2129                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
2130
2131                                         /* was inlined, must be recomputed */
2132                                         phiproj_computed = 0;
2133
2134                                         /* after we have inlined callee, all called methods inside callee
2135                                         are now called once more */
2136                                         for (centry = callee_env->call_head; centry != NULL; centry = centry->next) {
2137                                                 inline_irg_env *penv = get_irg_link(centry->callee);
2138                                                 ++penv->n_callers;
2139                                         }
2140
2141                                         /* callee was inline. Append it's call list. */
2142                                         env->got_inline = 1;
2143                                         if (local_adr)
2144                                                 env->local_vars = 1;
2145                                         --env->n_call_nodes;
2146                                         curr_call = replace_entry_by_call_list(curr_call, callee_env->call_head);
2147                                         env->n_call_nodes += callee_env->n_call_nodes;
2148                                         env->n_nodes += callee_env->n_nodes;
2149                                         --callee_env->n_callers;
2150
2151                                         /* remove the current call entry from the list */
2152                                         *last_call = curr_call;
2153                                         continue;
2154                                 }
2155                         }
2156                         last_call = &curr_call->next;
2157                         curr_call = curr_call->next;
2158                 }
2159
2160                 if (env->got_inline) {
2161                         /* this irg got calls inlined: optimize it */
2162
2163                         /* scalar replacement does not work well with Tuple nodes, so optimize them away */
2164                         optimize_graph_df(irg);
2165
2166                         if (env->local_vars) {
2167                                 if (scalar_replacement_opt(irg)) {
2168                                         optimize_graph_df(irg);
2169                                 }
2170                         }
2171                         optimize_cf(irg);
2172                 }
2173                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
2174                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
2175                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
2176                         env->n_callers_orig, env->n_callers,
2177                         get_entity_name(get_irg_entity(irg))));
2178                 }
2179         }
2180
2181         /* kill the copied graphs: we don't need them anymore */
2182         foreach_pmap(copied_graphs, pm_entry) {
2183                 ir_graph *copy = pm_entry->value;
2184
2185                 /* reset the entity, otherwise it will be deleted in the next step ... */
2186                 set_irg_entity(copy, NULL);
2187                 free_ir_graph(copy);
2188         }
2189         pmap_destroy(copied_graphs);
2190
2191         obstack_free(&temp_obst, NULL);
2192         current_ir_graph = rem;
2193 }
2194
2195 void firm_init_inline(void) {
2196         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
2197 }