- workround for inline of got inlined: we cannot
[libfirm] / ir / opt / opt_inline.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief    Dead node elimination and Procedure Inlining.
23  * @author   Michael Beck, Goetz Lindenmaier
24  * @version  $Id$
25  */
26 #ifdef HAVE_CONFIG_H
27 # include "config.h"
28 #endif
29
30 #include <limits.h>
31 #include <assert.h>
32
33 #include "irnode_t.h"
34 #include "irgraph_t.h"
35 #include "irprog_t.h"
36
37 #include "iroptimize.h"
38 #include "ircons_t.h"
39 #include "iropt_t.h"
40 #include "irgopt.h"
41 #include "irgmod.h"
42 #include "irgwalk.h"
43
44 #include "adt/array.h"
45 #include "adt/pset.h"
46 #include "adt/pmap.h"
47 #include "adt/pdeq.h"
48 #include "adt/xmalloc.h"
49
50 #include "irouts.h"
51 #include "irloop_t.h"
52 #include "irbackedge_t.h"
53 #include "opt_inline_t.h"
54 #include "cgana.h"
55 #include "trouts.h"
56 #include "error.h"
57
58 #include "analyze_irg_args.h"
59 #include "iredges_t.h"
60 #include "irflag_t.h"
61 #include "irhooks.h"
62 #include "irtools.h"
63
64 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
65
66 /*------------------------------------------------------------------*/
67 /* Routines for dead node elimination / copying garbage collection  */
68 /* of the obstack.                                                  */
69 /*------------------------------------------------------------------*/
70
71 /**
72  * Remember the new node in the old node by using a field all nodes have.
73  */
74 #define set_new_node(oldn, newn)  set_irn_link(oldn, newn)
75
76 /**
77  * Get this new node, before the old node is forgotten.
78  */
79 #define get_new_node(oldn) get_irn_link(oldn)
80
81 /**
82  * Check if a new node was set.
83  */
84 #define has_new_node(n) (get_new_node(n) != NULL)
85
86 /**
87  * We use the block_visited flag to mark that we have computed the
88  * number of useful predecessors for this block.
89  * Further we encode the new arity in this flag in the old blocks.
90  * Remembering the arity is useful, as it saves a lot of pointer
91  * accesses.  This function is called for all Phi and Block nodes
92  * in a Block.
93  */
94 static INLINE int
95 compute_new_arity(ir_node *b) {
96         int i, res, irn_arity;
97         int irg_v, block_v;
98
99         irg_v = get_irg_block_visited(current_ir_graph);
100         block_v = get_Block_block_visited(b);
101         if (block_v >= irg_v) {
102                 /* we computed the number of preds for this block and saved it in the
103                    block_v flag */
104                 return block_v - irg_v;
105         } else {
106                 /* compute the number of good predecessors */
107                 res = irn_arity = get_irn_arity(b);
108                 for (i = 0; i < irn_arity; i++)
109                         if (is_Bad(get_irn_n(b, i))) res--;
110                         /* save it in the flag. */
111                         set_Block_block_visited(b, irg_v + res);
112                         return res;
113         }
114 }
115
116 /**
117  * Copies the node to the new obstack. The Ins of the new node point to
118  * the predecessors on the old obstack.  For block/phi nodes not all
119  * predecessors might be copied.  n->link points to the new node.
120  * For Phi and Block nodes the function allocates in-arrays with an arity
121  * only for useful predecessors.  The arity is determined by counting
122  * the non-bad predecessors of the block.
123  *
124  * @param n    The node to be copied
125  * @param env  if non-NULL, the node number attribute will be copied to the new node
126  *
127  * Note: Also used for loop unrolling.
128  */
129 static void copy_node(ir_node *n, void *env) {
130         ir_node *nn, *block;
131         int new_arity;
132         ir_op *op = get_irn_op(n);
133         (void) env;
134
135         if (op == op_Bad) {
136                 /* node copied already */
137                 return;
138         } else if (op == op_Block) {
139                 block = NULL;
140                 new_arity = compute_new_arity(n);
141                 n->attr.block.graph_arr = NULL;
142         } else {
143                 block = get_nodes_block(n);
144                 if (op == op_Phi) {
145                         new_arity = compute_new_arity(block);
146                 } else {
147                         new_arity = get_irn_arity(n);
148                 }
149         }
150         nn = new_ir_node(get_irn_dbg_info(n),
151                 current_ir_graph,
152                 block,
153                 op,
154                 get_irn_mode(n),
155                 new_arity,
156                 get_irn_in(n) + 1);
157         /* Copy the attributes.  These might point to additional data.  If this
158            was allocated on the old obstack the pointers now are dangling.  This
159            frees e.g. the memory of the graph_arr allocated in new_immBlock. */
160         if (op == op_Block) {
161                 /* we cannot allow blocks WITHOUT macroblock input */
162                 set_Block_MacroBlock(nn, get_Block_MacroBlock(n));
163         }
164         copy_node_attr(n, nn);
165
166 #ifdef DEBUG_libfirm
167         {
168                 int copy_node_nr = env != NULL;
169                 if (copy_node_nr) {
170                         /* for easier debugging, we want to copy the node numbers too */
171                         nn->node_nr = n->node_nr;
172                 }
173         }
174 #endif
175
176         set_new_node(n, nn);
177         hook_dead_node_elim_subst(current_ir_graph, n, nn);
178 }
179
180 /**
181  * Copies new predecessors of old node to new node remembered in link.
182  * Spare the Bad predecessors of Phi and Block nodes.
183  */
184 static void copy_preds(ir_node *n, void *env) {
185         ir_node *nn, *block;
186         int i, j, irn_arity;
187         (void) env;
188
189         nn = get_new_node(n);
190
191         if (is_Block(n)) {
192                 /* copy the macro block header */
193                 ir_node *mbh = get_Block_MacroBlock(n);
194
195                 if (mbh == n) {
196                         /* this block is a macroblock header */
197                         set_Block_MacroBlock(nn, nn);
198                 } else {
199                         /* get the macro block header */
200                         ir_node *nmbh = get_new_node(mbh);
201                         assert(nmbh != NULL);
202                         set_Block_MacroBlock(nn, nmbh);
203                 }
204
205                 /* Don't copy Bad nodes. */
206                 j = 0;
207                 irn_arity = get_irn_arity(n);
208                 for (i = 0; i < irn_arity; i++) {
209                         if (! is_Bad(get_irn_n(n, i))) {
210                                 ir_node *pred = get_irn_n(n, i);
211                                 set_irn_n(nn, j, get_new_node(pred));
212                                 j++;
213                         }
214                 }
215                 /* repair the block visited flag from above misuse. Repair it in both
216                    graphs so that the old one can still be used. */
217                 set_Block_block_visited(nn, 0);
218                 set_Block_block_visited(n, 0);
219                 /* Local optimization could not merge two subsequent blocks if
220                    in array contained Bads.  Now it's possible.
221                    We don't call optimize_in_place as it requires
222                    that the fields in ir_graph are set properly. */
223                 if ((get_opt_control_flow_straightening()) &&
224                         (get_Block_n_cfgpreds(nn) == 1) &&
225                         is_Jmp(get_Block_cfgpred(nn, 0))) {
226                         ir_node *old = get_nodes_block(get_Block_cfgpred(nn, 0));
227                         if (nn == old) {
228                                 /* Jmp jumps into the block it is in -- deal self cycle. */
229                                 assert(is_Bad(get_new_node(get_irg_bad(current_ir_graph))));
230                                 exchange(nn, get_new_node(get_irg_bad(current_ir_graph)));
231                         } else {
232                                 exchange(nn, old);
233                         }
234                 }
235         } else if (is_Phi(n) && get_irn_arity(n) > 0) {
236                 /* Don't copy node if corresponding predecessor in block is Bad.
237                    The Block itself should not be Bad. */
238                 block = get_nodes_block(n);
239                 set_nodes_block(nn, get_new_node(block));
240                 j = 0;
241                 irn_arity = get_irn_arity(n);
242                 for (i = 0; i < irn_arity; i++) {
243                         if (! is_Bad(get_irn_n(block, i))) {
244                                 ir_node *pred = get_irn_n(n, i);
245                                 set_irn_n(nn, j, get_new_node(pred));
246                                 /*if (is_backedge(n, i)) set_backedge(nn, j);*/
247                                 j++;
248                         }
249                 }
250                 /* If the pre walker reached this Phi after the post walker visited the
251                    block block_visited is > 0. */
252                 set_Block_block_visited(get_nodes_block(n), 0);
253                 /* Compacting the Phi's ins might generate Phis with only one
254                    predecessor. */
255                 if (get_irn_arity(nn) == 1)
256                         exchange(nn, get_irn_n(nn, 0));
257         } else {
258                 irn_arity = get_irn_arity(n);
259                 for (i = -1; i < irn_arity; i++)
260                         set_irn_n(nn, i, get_new_node(get_irn_n(n, i)));
261         }
262         /* Now the new node is complete.  We can add it to the hash table for CSE.
263            @@@ inlining aborts if we identify End. Why? */
264         if (!is_End(nn))
265                 add_identities(current_ir_graph->value_table, nn);
266 }
267
268 /**
269  * Copies the graph recursively, compacts the keep-alives of the end node.
270  *
271  * @param irg           the graph to be copied
272  * @param copy_node_nr  If non-zero, the node number will be copied
273  */
274 static void copy_graph(ir_graph *irg, int copy_node_nr) {
275         ir_node *oe, *ne, *ob, *nb, *om, *nm; /* old end, new end, old bad, new bad, old NoMem, new NoMem */
276         ir_node *ka;      /* keep alive */
277         int i, irn_arity;
278         unsigned long vfl;
279
280         /* Some nodes must be copied by hand, sigh */
281         vfl = get_irg_visited(irg);
282         set_irg_visited(irg, vfl + 1);
283
284         oe = get_irg_end(irg);
285         mark_irn_visited(oe);
286         /* copy the end node by hand, allocate dynamic in array! */
287         ne = new_ir_node(get_irn_dbg_info(oe),
288                 irg,
289                 NULL,
290                 op_End,
291                 mode_X,
292                 -1,
293                 NULL);
294         /* Copy the attributes.  Well, there might be some in the future... */
295         copy_node_attr(oe, ne);
296         set_new_node(oe, ne);
297
298         /* copy the Bad node */
299         ob = get_irg_bad(irg);
300         mark_irn_visited(ob);
301         nb = new_ir_node(get_irn_dbg_info(ob),
302                 irg,
303                 NULL,
304                 op_Bad,
305                 mode_T,
306                 0,
307                 NULL);
308         copy_node_attr(ob, nb);
309         set_new_node(ob, nb);
310
311         /* copy the NoMem node */
312         om = get_irg_no_mem(irg);
313         mark_irn_visited(om);
314         nm = new_ir_node(get_irn_dbg_info(om),
315                 irg,
316                 NULL,
317                 op_NoMem,
318                 mode_M,
319                 0,
320                 NULL);
321         copy_node_attr(om, nm);
322         set_new_node(om, nm);
323
324         /* copy the live nodes */
325         set_irg_visited(irg, vfl);
326         irg_walk(get_nodes_block(oe), copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
327
328         /* Note: from yet, the visited flag of the graph is equal to vfl + 1 */
329
330         /* visit the anchors as well */
331         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
332                 ir_node *n = get_irg_anchor(irg, i);
333
334                 if (n && (get_irn_visited(n) <= vfl)) {
335                         set_irg_visited(irg, vfl);
336                         irg_walk(n, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
337                 }
338         }
339
340         /* copy_preds for the end node ... */
341         set_nodes_block(ne, get_new_node(get_nodes_block(oe)));
342
343         /*- ... and now the keep alives. -*/
344         /* First pick the not marked block nodes and walk them.  We must pick these
345            first as else we will oversee blocks reachable from Phis. */
346         irn_arity = get_End_n_keepalives(oe);
347         for (i = 0; i < irn_arity; i++) {
348                 ka = get_End_keepalive(oe, i);
349                 if (is_Block(ka)) {
350                         if (get_irn_visited(ka) <= vfl) {
351                                 /* We must keep the block alive and copy everything reachable */
352                                 set_irg_visited(irg, vfl);
353                                 irg_walk(ka, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
354                         }
355                         add_End_keepalive(ne, get_new_node(ka));
356                 }
357         }
358
359         /* Now pick other nodes.  Here we will keep all! */
360         irn_arity = get_End_n_keepalives(oe);
361         for (i = 0; i < irn_arity; i++) {
362                 ka = get_End_keepalive(oe, i);
363                 if (!is_Block(ka)) {
364                         if (get_irn_visited(ka) <= vfl) {
365                                 /* We didn't copy the node yet.  */
366                                 set_irg_visited(irg, vfl);
367                                 irg_walk(ka, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
368                         }
369                         add_End_keepalive(ne, get_new_node(ka));
370                 }
371         }
372
373         /* start block sometimes only reached after keep alives */
374         set_nodes_block(nb, get_new_node(get_nodes_block(ob)));
375         set_nodes_block(nm, get_new_node(get_nodes_block(om)));
376 }
377
378 /**
379  * Copies the graph reachable from current_ir_graph->end to the obstack
380  * in current_ir_graph and fixes the environment.
381  * Then fixes the fields in current_ir_graph containing nodes of the
382  * graph.
383  *
384  * @param copy_node_nr  If non-zero, the node number will be copied
385  */
386 static void
387 copy_graph_env(int copy_node_nr) {
388         ir_graph *irg = current_ir_graph;
389         ir_node *old_end, *new_anchor;
390         int i;
391
392         /* remove end_except and end_reg nodes */
393         old_end = get_irg_end(irg);
394         set_irg_end_except (irg, old_end);
395         set_irg_end_reg    (irg, old_end);
396
397         /* Not all nodes remembered in irg might be reachable
398            from the end node.  Assure their link is set to NULL, so that
399            we can test whether new nodes have been computed. */
400         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
401                 ir_node *n = get_irg_anchor(irg, i);
402                 if (n != NULL)
403                         set_new_node(n, NULL);
404         }
405         /* we use the block walk flag for removing Bads from Blocks ins. */
406         inc_irg_block_visited(irg);
407
408         /* copy the graph */
409         copy_graph(irg, copy_node_nr);
410
411         /* fix the anchor */
412         old_end    = get_irg_end(irg);
413         new_anchor = new_Anchor(irg);
414
415         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
416                 ir_node *n = get_irg_anchor(irg, i);
417                 if (n)
418                         set_irn_n(new_anchor, i, get_new_node(n));
419         }
420         free_End(old_end);
421         irg->anchor = new_anchor;
422
423         /* ensure the new anchor is placed in the endblock */
424         set_nodes_block(new_anchor, get_irg_end_block(irg));
425 }
426
427 /**
428  * Copies all reachable nodes to a new obstack.  Removes bad inputs
429  * from block nodes and the corresponding inputs from Phi nodes.
430  * Merges single exit blocks with single entry blocks and removes
431  * 1-input Phis.
432  * Adds all new nodes to a new hash table for CSE.  Does not
433  * perform CSE, so the hash table might contain common subexpressions.
434  */
435 void dead_node_elimination(ir_graph *irg) {
436         ir_graph *rem;
437 #ifdef INTERPROCEDURAL_VIEW
438         int rem_ipview = get_interprocedural_view();
439 #endif
440         struct obstack *graveyard_obst = NULL;
441         struct obstack *rebirth_obst   = NULL;
442
443         edges_deactivate(irg);
444
445         /* inform statistics that we started a dead-node elimination run */
446         hook_dead_node_elim(irg, 1);
447
448         /* Remember external state of current_ir_graph. */
449         rem = current_ir_graph;
450         current_ir_graph = irg;
451 #ifdef INTERPROCEDURAL_VIEW
452         set_interprocedural_view(0);
453 #endif
454
455         assert(get_irg_phase_state(irg) != phase_building);
456
457         /* Handle graph state */
458         free_callee_info(irg);
459         free_irg_outs(irg);
460         free_trouts();
461
462         /* @@@ so far we loose loops when copying */
463         free_loop_information(irg);
464
465         set_irg_doms_inconsistent(irg);
466
467         /* A quiet place, where the old obstack can rest in peace,
468            until it will be cremated. */
469         graveyard_obst = irg->obst;
470
471         /* A new obstack, where the reachable nodes will be copied to. */
472         rebirth_obst = xmalloc(sizeof(*rebirth_obst));
473         irg->obst = rebirth_obst;
474         obstack_init(irg->obst);
475         irg->last_node_idx = 0;
476
477         /* We also need a new value table for CSE */
478         del_identities(irg->value_table);
479         irg->value_table = new_identities();
480
481         /* Copy the graph from the old to the new obstack */
482         copy_graph_env(/*copy_node_nr=*/1);
483
484         /* Free memory from old unoptimized obstack */
485         obstack_free(graveyard_obst, 0);  /* First empty the obstack ... */
486         xfree(graveyard_obst);            /* ... then free it.           */
487
488         /* inform statistics that the run is over */
489         hook_dead_node_elim(irg, 0);
490
491         current_ir_graph = rem;
492 #ifdef INTERPROCEDURAL_VIEW
493         set_interprocedural_view(rem_ipview);
494 #endif
495 }
496
497 /**
498  * Relink bad predecessors of a block and store the old in array to the
499  * link field. This function is called by relink_bad_predecessors().
500  * The array of link field starts with the block operand at position 0.
501  * If block has bad predecessors, create a new in array without bad preds.
502  * Otherwise let in array untouched.
503  */
504 static void relink_bad_block_predecessors(ir_node *n, void *env) {
505         ir_node **new_in, *irn;
506         int i, new_irn_n, old_irn_arity, new_irn_arity = 0;
507         (void) env;
508
509         /* if link field of block is NULL, look for bad predecessors otherwise
510            this is already done */
511         if (is_Block(n) && get_irn_link(n) == NULL) {
512                 /* save old predecessors in link field (position 0 is the block operand)*/
513                 set_irn_link(n, get_irn_in(n));
514
515                 /* count predecessors without bad nodes */
516                 old_irn_arity = get_irn_arity(n);
517                 for (i = 0; i < old_irn_arity; i++)
518                         if (!is_Bad(get_irn_n(n, i)))
519                                 ++new_irn_arity;
520
521                 /* arity changing: set new predecessors without bad nodes */
522                 if (new_irn_arity < old_irn_arity) {
523                         /* Get new predecessor array. We do not resize the array, as we must
524                            keep the old one to update Phis. */
525                         new_in = NEW_ARR_D(ir_node *, current_ir_graph->obst, (new_irn_arity+1));
526
527                         /* set new predecessors in array */
528                         new_in[0] = NULL;
529                         new_irn_n = 1;
530                         for (i = 0; i < old_irn_arity; i++) {
531                                 irn = get_irn_n(n, i);
532                                 if (!is_Bad(irn)) {
533                                         new_in[new_irn_n] = irn;
534                                         is_backedge(n, i) ? set_backedge(n, new_irn_n-1) : set_not_backedge(n, new_irn_n-1);
535                                         ++new_irn_n;
536                                 }
537                         }
538                         /* ARR_SETLEN(int, n->attr.block.backedge, new_irn_arity); */
539                         ARR_SHRINKLEN(n->attr.block.backedge, new_irn_arity);
540                         n->in = new_in;
541                 } /* ir node has bad predecessors */
542         } /* Block is not relinked */
543 }
544
545 /**
546  * Relinks Bad predecessors from Blocks and Phis called by walker
547  * remove_bad_predecesors(). If n is a Block, call
548  * relink_bad_block_redecessors(). If n is a Phi-node, call also the relinking
549  * function of Phi's Block. If this block has bad predecessors, relink preds
550  * of the Phi-node.
551  */
552 static void relink_bad_predecessors(ir_node *n, void *env) {
553         ir_node *block, **old_in;
554         int i, old_irn_arity, new_irn_arity;
555
556         /* relink bad predecessors of a block */
557         if (is_Block(n))
558                 relink_bad_block_predecessors(n, env);
559
560         /* If Phi node relink its block and its predecessors */
561         if (is_Phi(n)) {
562                 /* Relink predecessors of phi's block */
563                 block = get_nodes_block(n);
564                 if (get_irn_link(block) == NULL)
565                         relink_bad_block_predecessors(block, env);
566
567                 old_in = (ir_node **)get_irn_link(block); /* Of Phi's Block */
568                 old_irn_arity = ARR_LEN(old_in);
569
570                 /* Relink Phi predecessors if count of predecessors changed */
571                 if (old_irn_arity != ARR_LEN(get_irn_in(block))) {
572                         /* set new predecessors in array
573                            n->in[0] remains the same block */
574                         new_irn_arity = 1;
575                         for(i = 1; i < old_irn_arity; i++)
576                                 if (!is_Bad(old_in[i])) {
577                                         n->in[new_irn_arity] = n->in[i];
578                                         is_backedge(n, i) ? set_backedge(n, new_irn_arity) : set_not_backedge(n, new_irn_arity);
579                                         ++new_irn_arity;
580                                 }
581
582                                 ARR_SETLEN(ir_node *, n->in, new_irn_arity);
583                                 ARR_SETLEN(int, n->attr.phi.u.backedge, new_irn_arity);
584                 }
585         } /* n is a Phi node */
586 }
587
588 /*
589  * Removes Bad Bad predecessors from Blocks and the corresponding
590  * inputs to Phi nodes as in dead_node_elimination but without
591  * copying the graph.
592  * On walking up set the link field to NULL, on walking down call
593  * relink_bad_predecessors() (This function stores the old in array
594  * to the link field and sets a new in array if arity of predecessors
595  * changes).
596  */
597 void remove_bad_predecessors(ir_graph *irg) {
598         panic("Fix backedge handling first");
599         irg_walk_graph(irg, firm_clear_link, relink_bad_predecessors, NULL);
600 }
601
602
603 /*
604    __                      _  __ __
605   (_     __    o     _    | \/  |_
606   __)|_| | \_/ | \_/(/_   |_/\__|__
607
608   The following stuff implements a facility that automatically patches
609   registered ir_node pointers to the new node when a dead node elimination occurs.
610 */
611
612 struct _survive_dce_t {
613         struct obstack obst;
614         pmap *places;
615         pmap *new_places;
616         hook_entry_t dead_node_elim;
617         hook_entry_t dead_node_elim_subst;
618 };
619
620 typedef struct _survive_dce_list_t {
621         struct _survive_dce_list_t *next;
622         ir_node **place;
623 } survive_dce_list_t;
624
625 static void dead_node_hook(void *context, ir_graph *irg, int start) {
626         survive_dce_t *sd = context;
627         (void) irg;
628
629         /* Create a new map before the dead node elimination is performed. */
630         if (start) {
631                 sd->new_places = pmap_create_ex(pmap_count(sd->places));
632         } else {
633                 /* Patch back all nodes if dead node elimination is over and something is to be done. */
634                 pmap_destroy(sd->places);
635                 sd->places     = sd->new_places;
636                 sd->new_places = NULL;
637         }
638 }
639
640 /**
641  * Hook called when dead node elimination replaces old by nw.
642  */
643 static void dead_node_subst_hook(void *context, ir_graph *irg, ir_node *old, ir_node *nw) {
644         survive_dce_t *sd = context;
645         survive_dce_list_t *list = pmap_get(sd->places, old);
646         (void) irg;
647
648         /* If the node is to be patched back, write the new address to all registered locations. */
649         if (list) {
650                 survive_dce_list_t *p;
651
652                 for (p = list; p; p = p->next)
653                         *(p->place) = nw;
654
655                 pmap_insert(sd->new_places, nw, list);
656         }
657 }
658
659 /**
660  * Make a new Survive DCE environment.
661  */
662 survive_dce_t *new_survive_dce(void) {
663         survive_dce_t *res = xmalloc(sizeof(res[0]));
664         obstack_init(&res->obst);
665         res->places     = pmap_create();
666         res->new_places = NULL;
667
668         res->dead_node_elim.hook._hook_dead_node_elim = dead_node_hook;
669         res->dead_node_elim.context                   = res;
670         res->dead_node_elim.next                      = NULL;
671
672         res->dead_node_elim_subst.hook._hook_dead_node_elim_subst = dead_node_subst_hook;
673         res->dead_node_elim_subst.context = res;
674         res->dead_node_elim_subst.next    = NULL;
675
676 #ifndef FIRM_ENABLE_HOOKS
677         assert(0 && "need hooks enabled");
678 #endif
679
680         register_hook(hook_dead_node_elim, &res->dead_node_elim);
681         register_hook(hook_dead_node_elim_subst, &res->dead_node_elim_subst);
682         return res;
683 }
684
685 /**
686  * Free a Survive DCE environment.
687  */
688 void free_survive_dce(survive_dce_t *sd) {
689         obstack_free(&sd->obst, NULL);
690         pmap_destroy(sd->places);
691         unregister_hook(hook_dead_node_elim, &sd->dead_node_elim);
692         unregister_hook(hook_dead_node_elim_subst, &sd->dead_node_elim_subst);
693         xfree(sd);
694 }
695
696 /**
697  * Register a node pointer to be patched upon DCE.
698  * When DCE occurs, the node pointer specified by @p place will be
699  * patched to the new address of the node it is pointing to.
700  *
701  * @param sd    The Survive DCE environment.
702  * @param place The address of the node pointer.
703  */
704 void survive_dce_register_irn(survive_dce_t *sd, ir_node **place) {
705         if (*place != NULL) {
706                 ir_node *irn      = *place;
707                 survive_dce_list_t *curr = pmap_get(sd->places, irn);
708                 survive_dce_list_t *nw   = obstack_alloc(&sd->obst, sizeof(nw[0]));
709
710                 nw->next  = curr;
711                 nw->place = place;
712
713                 pmap_insert(sd->places, irn, nw);
714         }
715 }
716
717 /*--------------------------------------------------------------------*/
718 /*  Functionality for inlining                                         */
719 /*--------------------------------------------------------------------*/
720
721 /**
722  * Copy node for inlineing.  Updates attributes that change when
723  * inlineing but not for dead node elimination.
724  *
725  * Copies the node by calling copy_node() and then updates the entity if
726  * it's a local one.  env must be a pointer of the frame type of the
727  * inlined procedure. The new entities must be in the link field of
728  * the entities.
729  */
730 static void copy_node_inline(ir_node *n, void *env) {
731         ir_node *nn;
732         ir_type *frame_tp = (ir_type *)env;
733
734         copy_node(n, NULL);
735         if (is_Sel(n)) {
736                 nn = get_new_node (n);
737                 assert(is_Sel(nn));
738                 if (get_entity_owner(get_Sel_entity(n)) == frame_tp) {
739                         set_Sel_entity(nn, get_entity_link(get_Sel_entity(n)));
740                 }
741         } else if (is_Block(n)) {
742                 nn = get_new_node (n);
743                 nn->attr.block.irg = current_ir_graph;
744         }
745 }
746
747 /**
748  * Copies new predecessors of old node and move constants to
749  * the Start Block.
750  */
751 static void copy_preds_inline(ir_node *n, void *env) {
752         ir_node *nn;
753
754         copy_preds(n, env);
755         nn = skip_Id(get_new_node(n));
756         if (is_irn_constlike(nn)) {
757                 /* move Constants into the start block */
758                 set_nodes_block(nn, get_irg_start_block(current_ir_graph));
759
760                 n = identify_remember(current_ir_graph->value_table, nn);
761                 if (nn != n) {
762                         exchange(nn, n);
763                 }
764         }
765 }
766
767 /**
768  * Walker: checks if P_value_arg_base is used.
769  */
770 static void find_addr(ir_node *node, void *env) {
771         int *allow_inline = env;
772         if (is_Proj(node) &&
773                         is_Start(get_Proj_pred(node)) &&
774                         get_Proj_proj(node) == pn_Start_P_value_arg_base) {
775                 *allow_inline = 0;
776         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
777                 /* From GCC:
778                  * Refuse to inline alloca call unless user explicitly forced so as this
779                  * may change program's memory overhead drastically when the function
780                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
781                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
782                  *
783                  * Sorryly this is true with our implementation also.
784                  * Moreover, we cannot differentiate between alloca() and VLA yet, so this
785                  * disables inlining of functions using VLA (with are completely save).
786                  *
787                  * 2 Solutions:
788                  * - add a flag to the Alloc node for "real" alloca() calls
789                  * - add a new Stack-Restore node at the end of a function using alloca()
790                  */
791                 *allow_inline = 0;
792         }
793 }
794
795 /**
796  * Check if we can inline a given call.
797  * Currently, we cannot inline two cases:
798  * - call with compound arguments
799  * - graphs that take the address of a parameter
800  *
801  * check these conditions here
802  */
803 static int can_inline(ir_node *call, ir_graph *called_graph) {
804         ir_type *call_type = get_Call_type(call);
805         int params, ress, i, res;
806         assert(is_Method_type(call_type));
807
808         params = get_method_n_params(call_type);
809         ress   = get_method_n_ress(call_type);
810
811         /* check parameters for compound arguments */
812         for (i = 0; i < params; ++i) {
813                 ir_type *p_type = get_method_param_type(call_type, i);
814
815                 if (is_compound_type(p_type))
816                         return 0;
817         }
818
819         /* check results for compound arguments */
820         for (i = 0; i < ress; ++i) {
821                 ir_type *r_type = get_method_res_type(call_type, i);
822
823                 if (is_compound_type(r_type))
824                         return 0;
825         }
826
827         res = 1;
828         irg_walk_graph(called_graph, find_addr, NULL, &res);
829
830         return res;
831 }
832
833 enum exc_mode {
834         exc_handler    = 0, /**< There is a handler. */
835         exc_to_end     = 1, /**< Branches to End. */
836         exc_no_handler = 2  /**< Exception handling not represented. */
837 };
838
839 /* Inlines a method at the given call site. */
840 int inline_method(ir_node *call, ir_graph *called_graph) {
841         ir_node             *pre_call;
842         ir_node             *post_call, *post_bl;
843         ir_node             *in[pn_Start_max];
844         ir_node             *end, *end_bl, *block;
845         ir_node             **res_pred;
846         ir_node             **cf_pred;
847         ir_node             **args_in;
848         ir_node             *ret, *phi;
849         int                 arity, n_ret, n_exc, n_res, i, n, j, rem_opt, irn_arity, n_params;
850         enum exc_mode       exc_handling;
851         ir_type             *called_frame, *curr_frame, *mtp, *ctp;
852         ir_entity           *ent;
853         ir_graph            *rem, *irg;
854         irg_inline_property prop = get_irg_inline_property(called_graph);
855         unsigned long       visited;
856
857         if (prop == irg_inline_forbidden)
858                 return 0;
859
860         ent = get_irg_entity(called_graph);
861
862         mtp = get_entity_type(ent);
863         ctp = get_Call_type(call);
864         if (get_method_n_params(mtp) > get_method_n_params(ctp)) {
865                 /* this is a bad feature of C: without a prototype, we can can call a function with less
866                 parameters than needed. Currently we don't support this, although it would be
867                 to use Unknown than. */
868                 return 0;
869         }
870
871         /* Argh, compiling C has some bad consequences:
872            the call type AND the method type might be different.
873            It is implementation defendant what happens in that case.
874            We support inlining, if the bitsize of the types matches AND
875            the same arithmetic is used. */
876         n_params = get_method_n_params(mtp);
877         for (i = n_params - 1; i >= 0; --i) {
878                 ir_type *param_tp = get_method_param_type(mtp, i);
879                 ir_type *arg_tp   = get_method_param_type(ctp, i);
880
881                 if (param_tp != arg_tp) {
882                         ir_mode *pmode = get_type_mode(param_tp);
883                         ir_mode *amode = get_type_mode(arg_tp);
884
885                         if (pmode == NULL || amode == NULL)
886                                 return 0;
887                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
888                                 return 0;
889                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
890                                 return 0;
891                         /* otherwise we can simply "reinterpret" the bits */
892                 }
893         }
894
895         irg = get_irn_irg(call);
896
897         /*
898          * We cannot inline a recursive call. The graph must be copied before
899          * the call the inline_method() using create_irg_copy().
900          */
901         if (called_graph == irg)
902                 return 0;
903
904         /*
905          * currently, we cannot inline two cases:
906          * - call with compound arguments
907          * - graphs that take the address of a parameter
908          */
909         if (! can_inline(call, called_graph))
910                 return 0;
911
912         rem = current_ir_graph;
913         current_ir_graph = irg;
914
915         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
916
917         /* --  Turn off optimizations, this can cause problems when allocating new nodes. -- */
918         rem_opt = get_opt_optimize();
919         set_optimize(0);
920
921         /* Handle graph state */
922         assert(get_irg_phase_state(irg) != phase_building);
923         assert(get_irg_pinned(irg) == op_pin_state_pinned);
924         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
925         set_irg_outs_inconsistent(irg);
926         set_irg_extblk_inconsistent(irg);
927         set_irg_doms_inconsistent(irg);
928         set_irg_loopinfo_inconsistent(irg);
929         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
930
931         /* -- Check preconditions -- */
932         assert(is_Call(call));
933
934         /* here we know we WILL inline, so inform the statistics */
935         hook_inline(call, called_graph);
936
937         /* -- Decide how to handle exception control flow: Is there a handler
938            for the Call node, or do we branch directly to End on an exception?
939            exc_handling:
940            0 There is a handler.
941            1 Branches to End.
942            2 Exception handling not represented in Firm. -- */
943         {
944                 ir_node *proj, *Mproj = NULL, *Xproj = NULL;
945                 for (proj = get_irn_link(call); proj; proj = get_irn_link(proj)) {
946                         long proj_nr = get_Proj_proj(proj);
947                         if (proj_nr == pn_Call_X_except) Xproj = proj;
948                         if (proj_nr == pn_Call_M_except) Mproj = proj;
949                 }
950                 if      (Mproj) { assert(Xproj); exc_handling = exc_handler; } /*  Mproj           */
951                 else if (Xproj) {                exc_handling = exc_to_end; } /* !Mproj &&  Xproj   */
952                 else            {                exc_handling = exc_no_handler; } /* !Mproj && !Xproj   */
953         }
954
955         /* create the argument tuple */
956         NEW_ARR_A(ir_type *, args_in, n_params);
957
958         block = get_nodes_block(call);
959         for (i = n_params - 1; i >= 0; --i) {
960                 ir_node *arg      = get_Call_param(call, i);
961                 ir_type *param_tp = get_method_param_type(mtp, i);
962                 ir_mode *mode     = get_type_mode(param_tp);
963
964                 if (mode != get_irn_mode(arg)) {
965                         arg = new_r_Conv(irg, block, arg, mode);
966                 }
967                 args_in[i] = arg;
968         }
969
970         /* --
971            the procedure and later replaces the Start node of the called graph.
972            Post_call is the old Call node and collects the results of the called
973            graph. Both will end up being a tuple.  -- */
974         post_bl = get_nodes_block(call);
975         set_irg_current_block(irg, post_bl);
976         /* XxMxPxPxPxT of Start + parameter of Call */
977         in[pn_Start_X_initial_exec]   = new_Jmp();
978         in[pn_Start_M]                = get_Call_mem(call);
979         in[pn_Start_P_frame_base]     = get_irg_frame(irg);
980         in[pn_Start_P_tls]            = get_irg_tls(irg);
981         in[pn_Start_T_args]           = new_Tuple(n_params, args_in);
982         /* in[pn_Start_P_value_arg_base] = ??? */
983         assert(pn_Start_P_value_arg_base == pn_Start_max - 1 && "pn_Start_P_value_arg_base not supported, fix");
984         pre_call = new_Tuple(pn_Start_max - 1, in);
985         post_call = call;
986
987         /* --
988            The new block gets the ins of the old block, pre_call and all its
989            predecessors and all Phi nodes. -- */
990         part_block(pre_call);
991
992         /* -- Prepare state for dead node elimination -- */
993         /* Visited flags in calling irg must be >= flag in called irg.
994            Else walker and arity computation will not work. */
995         if (get_irg_visited(irg) <= get_irg_visited(called_graph))
996                 set_irg_visited(irg, get_irg_visited(called_graph) + 1);
997         if (get_irg_block_visited(irg) < get_irg_block_visited(called_graph))
998                 set_irg_block_visited(irg, get_irg_block_visited(called_graph));
999         visited = get_irg_visited(irg);
1000
1001         /* Set pre_call as new Start node in link field of the start node of
1002            calling graph and pre_calls block as new block for the start block
1003            of calling graph.
1004            Further mark these nodes so that they are not visited by the
1005            copying. */
1006         set_irn_link(get_irg_start(called_graph), pre_call);
1007         set_irn_visited(get_irg_start(called_graph), visited);
1008         set_irn_link(get_irg_start_block(called_graph), get_nodes_block(pre_call));
1009         set_irn_visited(get_irg_start_block(called_graph), visited);
1010
1011         set_irn_link(get_irg_bad(called_graph), get_irg_bad(current_ir_graph));
1012         set_irn_visited(get_irg_bad(called_graph), visited);
1013
1014         set_irn_link(get_irg_no_mem(called_graph), get_irg_no_mem(current_ir_graph));
1015         set_irn_visited(get_irg_no_mem(called_graph), visited);
1016
1017         /* Initialize for compaction of in arrays */
1018         inc_irg_block_visited(irg);
1019
1020         /* -- Replicate local entities of the called_graph -- */
1021         /* copy the entities. */
1022         called_frame = get_irg_frame_type(called_graph);
1023         curr_frame   = get_irg_frame_type(irg);
1024         for (i = 0, n = get_class_n_members(called_frame); i < n; ++i) {
1025                 ir_entity *new_ent, *old_ent;
1026                 old_ent = get_class_member(called_frame, i);
1027                 new_ent = copy_entity_own(old_ent, curr_frame);
1028                 set_entity_link(old_ent, new_ent);
1029         }
1030
1031         /* visited is > than that of called graph.  With this trick visited will
1032            remain unchanged so that an outer walker, e.g., searching the call nodes
1033             to inline, calling this inline will not visit the inlined nodes. */
1034         set_irg_visited(irg, get_irg_visited(irg)-1);
1035
1036         /* -- Performing dead node elimination inlines the graph -- */
1037         /* Copies the nodes to the obstack of current_ir_graph. Updates links to new
1038            entities. */
1039         irg_walk(get_irg_end(called_graph), copy_node_inline, copy_preds_inline,
1040                  get_irg_frame_type(called_graph));
1041
1042         /* Repair called_graph */
1043         set_irg_visited(called_graph, get_irg_visited(irg));
1044         set_irg_block_visited(called_graph, get_irg_block_visited(irg));
1045         set_Block_block_visited(get_irg_start_block(called_graph), 0);
1046
1047         /* -- Merge the end of the inlined procedure with the call site -- */
1048         /* We will turn the old Call node into a Tuple with the following
1049            predecessors:
1050            -1:  Block of Tuple.
1051            0: Phi of all Memories of Return statements.
1052            1: Jmp from new Block that merges the control flow from all exception
1053            predecessors of the old end block.
1054            2: Tuple of all arguments.
1055            3: Phi of Exception memories.
1056            In case the old Call directly branches to End on an exception we don't
1057            need the block merging all exceptions nor the Phi of the exception
1058            memories.
1059         */
1060
1061         /* -- Precompute some values -- */
1062         end_bl = get_new_node(get_irg_end_block(called_graph));
1063         end = get_new_node(get_irg_end(called_graph));
1064         arity = get_irn_arity(end_bl);    /* arity = n_exc + n_ret  */
1065         n_res = get_method_n_ress(get_Call_type(call));
1066
1067         res_pred = xmalloc(n_res * sizeof(*res_pred));
1068         cf_pred  = xmalloc(arity * sizeof(*res_pred));
1069
1070         set_irg_current_block(irg, post_bl); /* just to make sure */
1071
1072         /* -- archive keepalives -- */
1073         irn_arity = get_irn_arity(end);
1074         for (i = 0; i < irn_arity; i++) {
1075                 ir_node *ka = get_End_keepalive(end, i);
1076                 if (! is_Bad(ka))
1077                         add_End_keepalive(get_irg_end(irg), ka);
1078         }
1079
1080         /* The new end node will die.  We need not free as the in array is on the obstack:
1081            copy_node() only generated 'D' arrays. */
1082
1083         /* -- Replace Return nodes by Jump nodes. -- */
1084         n_ret = 0;
1085         for (i = 0; i < arity; i++) {
1086                 ir_node *ret;
1087                 ret = get_irn_n(end_bl, i);
1088                 if (is_Return(ret)) {
1089                         cf_pred[n_ret] = new_r_Jmp(irg, get_nodes_block(ret));
1090                         n_ret++;
1091                 }
1092         }
1093         set_irn_in(post_bl, n_ret, cf_pred);
1094
1095         /* -- Build a Tuple for all results of the method.
1096            Add Phi node if there was more than one Return.  -- */
1097         turn_into_tuple(post_call, pn_Call_max);
1098         /* First the Memory-Phi */
1099         n_ret = 0;
1100         for (i = 0; i < arity; i++) {
1101                 ret = get_irn_n(end_bl, i);
1102                 if (is_Return(ret)) {
1103                         cf_pred[n_ret] = get_Return_mem(ret);
1104                         n_ret++;
1105                 }
1106         }
1107         phi = new_Phi(n_ret, cf_pred, mode_M);
1108         set_Tuple_pred(call, pn_Call_M_regular, phi);
1109         /* Conserve Phi-list for further inlinings -- but might be optimized */
1110         if (get_nodes_block(phi) == post_bl) {
1111                 set_irn_link(phi, get_irn_link(post_bl));
1112                 set_irn_link(post_bl, phi);
1113         }
1114         /* Now the real results */
1115         if (n_res > 0) {
1116                 for (j = 0; j < n_res; j++) {
1117                         n_ret = 0;
1118                         for (i = 0; i < arity; i++) {
1119                                 ret = get_irn_n(end_bl, i);
1120                                 if (is_Return(ret)) {
1121                                         cf_pred[n_ret] = get_Return_res(ret, j);
1122                                         n_ret++;
1123                                 }
1124                         }
1125                         if (n_ret > 0)
1126                                 phi = new_Phi(n_ret, cf_pred, get_irn_mode(cf_pred[0]));
1127                         else
1128                                 phi = new_Bad();
1129                         res_pred[j] = phi;
1130                         /* Conserve Phi-list for further inlinings -- but might be optimized */
1131                         if (get_nodes_block(phi) == post_bl) {
1132                                 set_Phi_next(phi, get_Block_phis(post_bl));
1133                                 set_Block_phis(post_bl, phi);
1134                         }
1135                 }
1136                 set_Tuple_pred(call, pn_Call_T_result, new_Tuple(n_res, res_pred));
1137         } else {
1138                 set_Tuple_pred(call, pn_Call_T_result, new_Bad());
1139         }
1140         /* handle the regular call */
1141         set_Tuple_pred(call, pn_Call_X_regular, new_Jmp());
1142
1143         /* For now, we cannot inline calls with value_base */
1144         set_Tuple_pred(call, pn_Call_P_value_res_base, new_Bad());
1145
1146         /* Finally the exception control flow.
1147            We have two (three) possible situations:
1148            First if the Call branches to an exception handler: We need to add a Phi node to
1149            collect the memory containing the exception objects.  Further we need
1150            to add another block to get a correct representation of this Phi.  To
1151            this block we add a Jmp that resolves into the X output of the Call
1152            when the Call is turned into a tuple.
1153            Second the Call branches to End, the exception is not handled.  Just
1154            add all inlined exception branches to the End node.
1155            Third: there is no Exception edge at all. Handle as case two. */
1156         if (exc_handling == exc_handler) {
1157                 n_exc = 0;
1158                 for (i = 0; i < arity; i++) {
1159                         ir_node *ret, *irn;
1160                         ret = get_irn_n(end_bl, i);
1161                         irn = skip_Proj(ret);
1162                         if (is_fragile_op(irn) || is_Raise(irn)) {
1163                                 cf_pred[n_exc] = ret;
1164                                 ++n_exc;
1165                         }
1166                 }
1167                 if (n_exc > 0) {
1168                         new_Block(n_exc, cf_pred);      /* watch it: current_block is changed! */
1169                         set_Tuple_pred(call, pn_Call_X_except, new_Jmp());
1170                         /* The Phi for the memories with the exception objects */
1171                         n_exc = 0;
1172                         for (i = 0; i < arity; i++) {
1173                                 ir_node *ret;
1174                                 ret = skip_Proj(get_irn_n(end_bl, i));
1175                                 if (is_Call(ret)) {
1176                                         cf_pred[n_exc] = new_r_Proj(irg, get_nodes_block(ret), ret, mode_M, 3);
1177                                         n_exc++;
1178                                 } else if (is_fragile_op(ret)) {
1179                                         /* We rely that all cfops have the memory output at the same position. */
1180                                         cf_pred[n_exc] = new_r_Proj(irg, get_nodes_block(ret), ret, mode_M, 0);
1181                                         n_exc++;
1182                                 } else if (is_Raise(ret)) {
1183                                         cf_pred[n_exc] = new_r_Proj(irg, get_nodes_block(ret), ret, mode_M, 1);
1184                                         n_exc++;
1185                                 }
1186                         }
1187                         set_Tuple_pred(call, pn_Call_M_except, new_Phi(n_exc, cf_pred, mode_M));
1188                 } else {
1189                         set_Tuple_pred(call, pn_Call_X_except, new_Bad());
1190                         set_Tuple_pred(call, pn_Call_M_except, new_Bad());
1191                 }
1192         } else {
1193                 ir_node *main_end_bl;
1194                 int main_end_bl_arity;
1195                 ir_node **end_preds;
1196
1197                 /* assert(exc_handling == 1 || no exceptions. ) */
1198                 n_exc = 0;
1199                 for (i = 0; i < arity; i++) {
1200                         ir_node *ret = get_irn_n(end_bl, i);
1201                         ir_node *irn = skip_Proj(ret);
1202
1203                         if (is_fragile_op(irn) || is_Raise(irn)) {
1204                                 cf_pred[n_exc] = ret;
1205                                 n_exc++;
1206                         }
1207                 }
1208                 main_end_bl = get_irg_end_block(irg);
1209                 main_end_bl_arity = get_irn_arity(main_end_bl);
1210                 end_preds =  xmalloc((n_exc + main_end_bl_arity) * sizeof(*end_preds));
1211
1212                 for (i = 0; i < main_end_bl_arity; ++i)
1213                         end_preds[i] = get_irn_n(main_end_bl, i);
1214                 for (i = 0; i < n_exc; ++i)
1215                         end_preds[main_end_bl_arity + i] = cf_pred[i];
1216                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
1217                 set_Tuple_pred(call, pn_Call_X_except,  new_Bad());
1218                 set_Tuple_pred(call, pn_Call_M_except,  new_Bad());
1219                 free(end_preds);
1220         }
1221         free(res_pred);
1222         free(cf_pred);
1223
1224         /* --  Turn CSE back on. -- */
1225         set_optimize(rem_opt);
1226         current_ir_graph = rem;
1227
1228         return 1;
1229 }
1230
1231 /********************************************************************/
1232 /* Apply inlineing to small methods.                                */
1233 /********************************************************************/
1234
1235 static struct obstack  temp_obst;
1236
1237 /** Represents a possible inlinable call in a graph. */
1238 typedef struct _call_entry call_entry;
1239 struct _call_entry {
1240         ir_node    *call;      /**< the Call node */
1241         ir_graph   *callee;    /**< the callee IR-graph called here */
1242         call_entry *next;      /**< for linking the next one */
1243         int        loop_depth; /**< the loop depth of this call */
1244 };
1245
1246 /**
1247  * environment for inlining small irgs
1248  */
1249 typedef struct _inline_env_t {
1250         struct obstack obst;  /**< an obstack where call_entries are allocated on. */
1251         call_entry *head;     /**< the head of the call entry list */
1252         call_entry *tail;     /**< the tail of the call entry list */
1253 } inline_env_t;
1254
1255 /**
1256  * Returns the irg called from a Call node. If the irg is not
1257  * known, NULL is returned.
1258  *
1259  * @param call  the call node
1260  */
1261 static ir_graph *get_call_called_irg(ir_node *call) {
1262         ir_node *addr;
1263
1264         addr = get_Call_ptr(call);
1265         if (is_Global(addr)) {
1266                 ir_entity *ent = get_Global_entity(addr);
1267                 return get_entity_irg(ent);
1268         }
1269
1270         return NULL;
1271 }
1272
1273 /**
1274  * Walker: Collect all calls to known graphs inside a graph.
1275  */
1276 static void collect_calls(ir_node *call, void *env) {
1277         if (is_Call(call)) {
1278                 ir_graph *called_irg = get_call_called_irg(call);
1279
1280                 if (called_irg != NULL) {
1281                         /* The Call node calls a locally defined method.  Remember to inline. */
1282                         inline_env_t *ienv  = env;
1283                         call_entry   *entry = obstack_alloc(&ienv->obst, sizeof(*entry));
1284                         entry->call       = call;
1285                         entry->callee     = called_irg;
1286                         entry->next       = NULL;
1287                         entry->loop_depth = 0;
1288
1289                         if (ienv->tail == NULL)
1290                                 ienv->head = entry;
1291                         else
1292                                 ienv->tail->next = entry;
1293                         ienv->tail = entry;
1294                 }
1295         }
1296 }
1297
1298 /**
1299  * Inlines all small methods at call sites where the called address comes
1300  * from a Const node that references the entity representing the called
1301  * method.
1302  * The size argument is a rough measure for the code size of the method:
1303  * Methods where the obstack containing the firm graph is smaller than
1304  * size are inlined.
1305  */
1306 void inline_small_irgs(ir_graph *irg, int size) {
1307         ir_graph *rem = current_ir_graph;
1308         inline_env_t env;
1309         call_entry *entry;
1310
1311         current_ir_graph = irg;
1312         /* Handle graph state */
1313         assert(get_irg_phase_state(irg) != phase_building);
1314         free_callee_info(irg);
1315
1316         /* Find Call nodes to inline.
1317            (We can not inline during a walk of the graph, as inlineing the same
1318            method several times changes the visited flag of the walked graph:
1319            after the first inlineing visited of the callee equals visited of
1320            the caller.  With the next inlineing both are increased.) */
1321         obstack_init(&env.obst);
1322         env.head = env.tail = NULL;
1323         irg_walk_graph(irg, NULL, collect_calls, &env);
1324
1325         if (env.head != NULL) {
1326                 /* There are calls to inline */
1327                 collect_phiprojs(irg);
1328                 for (entry = env.head; entry != NULL; entry = entry->next) {
1329                         ir_graph *callee = entry->callee;
1330                         if (((_obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst)) < size) ||
1331                             (get_irg_inline_property(callee) >= irg_inline_forced)) {
1332                                 inline_method(entry->call, callee);
1333                         }
1334                 }
1335         }
1336         obstack_free(&env.obst, NULL);
1337         current_ir_graph = rem;
1338 }
1339
1340 /**
1341  * Environment for inlining irgs.
1342  */
1343 typedef struct {
1344         int n_nodes;             /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
1345         int n_blocks;            /**< Number of Blocks in graph without Start and End block. */
1346         int n_nodes_orig;        /**< for statistics */
1347         int n_call_nodes;        /**< Number of Call nodes in the graph. */
1348         int n_call_nodes_orig;   /**< for statistics */
1349         int n_callers;           /**< Number of known graphs that call this graphs. */
1350         int n_callers_orig;      /**< for statistics */
1351         unsigned got_inline:1;   /**< Set, if at least one call inside this graph was inlined. */
1352         unsigned local_vars:1;   /**< Set, if a inlined function gets the address of an inlined variable. */
1353         unsigned recursive:1;    /**< Set, if this function is self recursive. */
1354         call_entry *call_head;   /**< The head of the list of all call nodes in this graph. */
1355         call_entry *call_tail;   /**< The tail of the list of all call nodes in this graph .*/
1356         unsigned *local_weights; /**< Once allocated, the beneficial weight for transmitting local addresses. */
1357 } inline_irg_env;
1358
1359 /**
1360  * Allocate a new environment for inlining.
1361  */
1362 static inline_irg_env *alloc_inline_irg_env(void) {
1363         inline_irg_env *env    = obstack_alloc(&temp_obst, sizeof(*env));
1364         env->n_nodes           = -2; /* do not count count Start, End */
1365         env->n_blocks          = -2; /* do not count count Start, End Block */
1366         env->n_nodes_orig      = -2; /* do not count Start, End */
1367         env->call_head         = NULL;
1368         env->call_tail         = NULL;
1369         env->n_call_nodes      = 0;
1370         env->n_call_nodes_orig = 0;
1371         env->n_callers         = 0;
1372         env->n_callers_orig    = 0;
1373         env->got_inline        = 0;
1374         env->local_vars        = 0;
1375         env->recursive         = 0;
1376         env->local_weights     = NULL;
1377         return env;
1378 }
1379
1380 typedef struct walker_env {
1381         inline_irg_env *x;     /**< the inline environment */
1382         call_entry *last_call; /**< points to the last inserted call */
1383         char ignore_runtime;   /**< the ignore runtime flag */
1384         char ignore_callers;   /**< if set, do change callers data */
1385 } wenv_t;
1386
1387 /**
1388  * post-walker: collect all calls in the inline-environment
1389  * of a graph and sum some statistics.
1390  */
1391 static void collect_calls2(ir_node *call, void *ctx) {
1392         wenv_t         *env = ctx;
1393         inline_irg_env *x = env->x;
1394         ir_opcode      code = get_irn_opcode(call);
1395         ir_graph       *callee;
1396         call_entry     *entry;
1397
1398         /* count meaningful nodes in irg */
1399         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
1400                 if (code != iro_Block) {
1401                         ++x->n_nodes;
1402                         ++x->n_nodes_orig;
1403                 } else {
1404                         ++x->n_blocks;
1405                 }
1406         }
1407
1408         if (code != iro_Call) return;
1409
1410         /* check, if it's a runtime call */
1411         if (env->ignore_runtime) {
1412                 ir_node *symc = get_Call_ptr(call);
1413
1414                 if (is_Global(symc)) {
1415                         ir_entity *ent = get_Global_entity(symc);
1416
1417                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
1418                                 return;
1419                 }
1420         }
1421
1422         /* collect all call nodes */
1423         ++x->n_call_nodes;
1424         ++x->n_call_nodes_orig;
1425
1426         callee = get_call_called_irg(call);
1427         if (callee != NULL) {
1428                 if (! env->ignore_callers) {
1429                         inline_irg_env *callee_env = get_irg_link(callee);
1430                         /* count all static callers */
1431                         ++callee_env->n_callers;
1432                         ++callee_env->n_callers_orig;
1433                 }
1434                 if (callee == current_ir_graph)
1435                         x->recursive = 1;
1436
1437                 /* link it in the list of possible inlinable entries */
1438                 entry = obstack_alloc(&temp_obst, sizeof(*entry));
1439                 entry->call       = call;
1440                 entry->callee     = callee;
1441                 entry->next       = NULL;
1442                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
1443
1444                 /* note: we use call_tail here as a pointer to the last inserted */
1445                 if (x->call_head == NULL) {
1446                         x->call_head = entry;
1447                 } else {
1448                         if (entry->loop_depth == env->last_call->loop_depth) {
1449                                 /* same depth as the last one, enqueue after it */
1450                                 entry->next          = env->last_call->next;
1451                                 env->last_call->next = entry;
1452                         } else if (entry->loop_depth > x->call_head->loop_depth) {
1453                                 /* put first */
1454                                 entry->next  = x->call_head;
1455                                 x->call_head = entry;
1456                         } else {
1457                                 /* search the insertion point */
1458                                 call_entry *p;
1459
1460                                 for (p = x->call_head; p->next != NULL; p = p->next)
1461                                         if (entry->loop_depth > p->next->loop_depth)
1462                                                 break;
1463                                 entry->next = p->next;
1464                                 p->next     = entry;
1465                         }
1466                 }
1467                 env->last_call = entry;
1468                 if (entry->next == NULL) {
1469                         /* keep tail up to date */
1470                         x->call_tail = entry;
1471                 }
1472         }
1473 }
1474
1475 /**
1476  * Returns TRUE if the number of callers is 0 in the irg's environment,
1477  * hence this irg is a leave.
1478  */
1479 INLINE static int is_leave(ir_graph *irg) {
1480         inline_irg_env *env = get_irg_link(irg);
1481         return env->n_call_nodes == 0;
1482 }
1483
1484 /**
1485  * Returns TRUE if the number of nodes in the callee is
1486  * smaller then size in the irg's environment.
1487  */
1488 INLINE static int is_smaller(ir_graph *callee, int size) {
1489         inline_irg_env *env = get_irg_link(callee);
1490         return env->n_nodes < size;
1491 }
1492
1493 /**
1494  * Append the nodes of the list src to the nodes of the list in environment dst.
1495  */
1496 static void append_call_list(inline_irg_env *dst, call_entry *src) {
1497         call_entry *entry, *nentry;
1498
1499         /* Note that the src list points to Call nodes in the inlined graph, but
1500            we need Call nodes in our graph. Luckily the inliner leaves this information
1501            in the link field. */
1502         for (entry = src; entry != NULL; entry = entry->next) {
1503                 nentry = obstack_alloc(&temp_obst, sizeof(*nentry));
1504                 nentry->call         = get_irn_link(entry->call);
1505                 nentry->callee       = entry->callee;
1506                 nentry->next         = NULL;
1507                 nentry->loop_depth   = entry->loop_depth;
1508                 dst->call_tail->next = nentry;
1509                 dst->call_tail       = nentry;
1510         }
1511 }
1512
1513 /**
1514  * Add the nodes of the list src in front to the nodes of the list dst.
1515  */
1516 static call_entry *replace_entry_by_call_list(call_entry *dst, call_entry *src) {
1517         call_entry *entry, *nentry, *head, *tail;
1518
1519         /* Note that the src list points to Call nodes in the inlined graph, but
1520            we need Call nodes in our graph. Luckily the inliner leaves this information
1521            in the link field. */
1522         head = tail = NULL;
1523         for (entry = src; entry != NULL; entry = entry->next) {
1524                 nentry = obstack_alloc(&temp_obst, sizeof(*nentry));
1525                 nentry->call         = get_irn_link(entry->call);
1526                 nentry->callee       = entry->callee;
1527                 nentry->next         = NULL;
1528                 nentry->loop_depth   = entry->loop_depth + dst->loop_depth;
1529                 if (head == NULL)
1530                         head = nentry;
1531                 else
1532                         tail->next = nentry;
1533                 tail = nentry;
1534         }
1535         /* skip the head of dst */
1536         if (head != NULL) {
1537                 tail->next = dst->next;
1538         } else {
1539                 head = dst->next;
1540         }
1541         return head;
1542 }
1543
1544 /*
1545  * Inlines small leave methods at call sites where the called address comes
1546  * from a Const node that references the entity representing the called
1547  * method.
1548  * The size argument is a rough measure for the code size of the method:
1549  * Methods where the obstack containing the firm graph is smaller than
1550  * size are inlined.
1551  */
1552 void inline_leave_functions(int maxsize, int leavesize, int size, int ignore_runtime) {
1553         inline_irg_env   *env;
1554         ir_graph         *irg;
1555         int              i, n_irgs;
1556         ir_graph         *rem;
1557         int              did_inline;
1558         wenv_t           wenv;
1559         call_entry       *entry, *tail;
1560         const call_entry *centry;
1561         pmap             *copied_graphs;
1562         pmap_entry       *pm_entry;
1563
1564         rem = current_ir_graph;
1565         obstack_init(&temp_obst);
1566
1567         /* a map for the copied graphs, used to inline recursive calls */
1568         copied_graphs = pmap_create();
1569
1570         /* extend all irgs by a temporary data structure for inlining. */
1571         n_irgs = get_irp_n_irgs();
1572         for (i = 0; i < n_irgs; ++i)
1573                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
1574
1575         /* Precompute information in temporary data structure. */
1576         wenv.ignore_runtime = ignore_runtime;
1577         wenv.ignore_callers = 0;
1578         for (i = 0; i < n_irgs; ++i) {
1579                 ir_graph *irg = get_irp_irg(i);
1580
1581                 assert(get_irg_phase_state(irg) != phase_building);
1582                 free_callee_info(irg);
1583
1584                 assure_cf_loop(irg);
1585                 wenv.x = get_irg_link(irg);
1586                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1587         }
1588
1589         /* -- and now inline. -- */
1590
1591         /* Inline leaves recursively -- we might construct new leaves. */
1592         do {
1593                 did_inline = 0;
1594
1595                 for (i = 0; i < n_irgs; ++i) {
1596                         ir_node *call;
1597                         int phiproj_computed = 0;
1598
1599                         current_ir_graph = get_irp_irg(i);
1600                         env = (inline_irg_env *)get_irg_link(current_ir_graph);
1601
1602                         tail = NULL;
1603                         for (entry = env->call_head; entry != NULL; entry = entry->next) {
1604                                 ir_graph *callee;
1605
1606                                 if (env->n_nodes > maxsize) break;
1607
1608                                 call   = entry->call;
1609                                 callee = entry->callee;
1610
1611                                 if (is_leave(callee) && (
1612                                     is_smaller(callee, leavesize) || (get_irg_inline_property(callee) >= irg_inline_forced))) {
1613                                         if (!phiproj_computed) {
1614                                                 phiproj_computed = 1;
1615                                                 collect_phiprojs(current_ir_graph);
1616                                         }
1617                                         did_inline = inline_method(call, callee);
1618
1619                                         if (did_inline) {
1620                                                 inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1621
1622                                                 /* was inlined, must be recomputed */
1623                                                 phiproj_computed = 0;
1624
1625                                                 /* Do some statistics */
1626                                                 env->got_inline = 1;
1627                                                 --env->n_call_nodes;
1628                                                 env->n_nodes += callee_env->n_nodes;
1629                                                 --callee_env->n_callers;
1630
1631                                                 /* remove this call from the list */
1632                                                 if (tail != NULL)
1633                                                         tail->next = entry->next;
1634                                                 else
1635                                                         env->call_head = entry->next;
1636                                                 continue;
1637                                         }
1638                                 }
1639                                 tail = entry;
1640                         }
1641                         env->call_tail = tail;
1642                 }
1643         } while (did_inline);
1644
1645         /* inline other small functions. */
1646         for (i = 0; i < n_irgs; ++i) {
1647                 ir_node *call;
1648                 int phiproj_computed = 0;
1649
1650                 current_ir_graph = get_irp_irg(i);
1651                 env = (inline_irg_env *)get_irg_link(current_ir_graph);
1652
1653                 /* note that the list of possible calls is updated during the process */
1654                 tail = NULL;
1655                 for (entry = env->call_head; entry != NULL; entry = entry->next) {
1656                         ir_graph   *callee;
1657                         pmap_entry *e;
1658
1659                         call   = entry->call;
1660                         callee = entry->callee;
1661
1662                         e = pmap_find(copied_graphs, callee);
1663                         if (e != NULL) {
1664                                 /*
1665                                  * Remap callee if we have a copy.
1666                                  * FIXME: Should we do this only for recursive Calls ?
1667                                  */
1668                                 callee = e->value;
1669                         }
1670
1671                         if (((is_smaller(callee, size) && (env->n_nodes < maxsize)) ||    /* small function */
1672                                 (get_irg_inline_property(callee) >= irg_inline_forced))) {
1673                                 if (current_ir_graph == callee) {
1674                                         /*
1675                                          * Recursive call: we cannot directly inline because we cannot walk
1676                                          * the graph and change it. So we have to make a copy of the graph
1677                                          * first.
1678                                          */
1679
1680                                         inline_irg_env *callee_env;
1681                                         ir_graph       *copy;
1682
1683                                         /*
1684                                          * No copy yet, create one.
1685                                          * Note that recursive methods are never leaves, so it is sufficient
1686                                          * to test this condition here.
1687                                          */
1688                                         copy = create_irg_copy(callee);
1689
1690                                         /* create_irg_copy() destroys the Proj links, recompute them */
1691                                         phiproj_computed = 0;
1692
1693                                         /* allocate new environment */
1694                                         callee_env = alloc_inline_irg_env();
1695                                         set_irg_link(copy, callee_env);
1696
1697                                         assure_cf_loop(copy);
1698                                         wenv.x              = callee_env;
1699                                         wenv.ignore_callers = 1;
1700                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1701
1702                                         /*
1703                                          * Enter the entity of the original graph. This is needed
1704                                          * for inline_method(). However, note that ent->irg still points
1705                                          * to callee, NOT to copy.
1706                                          */
1707                                         set_irg_entity(copy, get_irg_entity(callee));
1708
1709                                         pmap_insert(copied_graphs, callee, copy);
1710                                         callee = copy;
1711
1712                                         /* we have only one caller: the original graph */
1713                                         callee_env->n_callers      = 1;
1714                                         callee_env->n_callers_orig = 1;
1715                                 }
1716                                 if (! phiproj_computed) {
1717                                         phiproj_computed = 1;
1718                                         collect_phiprojs(current_ir_graph);
1719                                 }
1720                                 did_inline = inline_method(call, callee);
1721                                 if (did_inline) {
1722                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1723
1724                                         /* was inlined, must be recomputed */
1725                                         phiproj_computed = 0;
1726
1727                                         /* callee was inline. Append it's call list. */
1728                                         env->got_inline = 1;
1729                                         --env->n_call_nodes;
1730                                         append_call_list(env, callee_env->call_head);
1731                                         env->n_call_nodes += callee_env->n_call_nodes;
1732                                         env->n_nodes += callee_env->n_nodes;
1733                                         --callee_env->n_callers;
1734
1735                                         /* after we have inlined callee, all called methods inside callee
1736                                            are now called once more */
1737                                         for (centry = callee_env->call_head; centry != NULL; centry = centry->next) {
1738                                                 inline_irg_env *penv = get_irg_link(centry->callee);
1739                                                 ++penv->n_callers;
1740                                         }
1741
1742                                         /* remove this call from the list */
1743                                         if (tail != NULL)
1744                                                 tail->next = entry->next;
1745                                         else
1746                                                 env->call_head = entry->next;
1747                                         continue;
1748                                 }
1749                         }
1750                         tail = entry;
1751                 }
1752                 env->call_tail = tail;
1753         }
1754
1755         for (i = 0; i < n_irgs; ++i) {
1756                 irg = get_irp_irg(i);
1757                 env = (inline_irg_env *)get_irg_link(irg);
1758
1759                 if (env->got_inline) {
1760                         optimize_graph_df(irg);
1761                         optimize_cf(irg);
1762                 }
1763                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1764                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1765                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1766                         env->n_callers_orig, env->n_callers,
1767                         get_entity_name(get_irg_entity(irg))));
1768                 }
1769         }
1770
1771         /* kill the copied graphs: we don't need them anymore */
1772         foreach_pmap(copied_graphs, pm_entry) {
1773                 ir_graph *copy = pm_entry->value;
1774
1775                 /* reset the entity, otherwise it will be deleted in the next step ... */
1776                 set_irg_entity(copy, NULL);
1777                 free_ir_graph(copy);
1778         }
1779         pmap_destroy(copied_graphs);
1780
1781         obstack_free(&temp_obst, NULL);
1782         current_ir_graph = rem;
1783 }
1784
1785 /**
1786  * Calculate the parameter weights for transmitting the address of a local variable.
1787  */
1788 static unsigned calc_method_local_weight(ir_node *arg) {
1789         int      i, j, k;
1790         unsigned v, weight = 0;
1791
1792         for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
1793                 ir_node *succ = get_irn_out(arg, i);
1794
1795                 switch (get_irn_opcode(succ)) {
1796                 case iro_Load:
1797                 case iro_Store:
1798                         /* Loads and Store can be removed */
1799                         weight += 3;
1800                         break;
1801                 case iro_Sel:
1802                         /* check if all args are constant */
1803                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1804                                 ir_node *idx = get_Sel_index(succ, j);
1805                                 if (! is_Const(idx))
1806                                         return 0;
1807                         }
1808                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1809                            some unsupported node. */
1810                         v = calc_method_local_weight(succ);
1811                         if (v == 0)
1812                                 return 0;
1813                         /* we can kill one Sel with constant indexes, this is cheap */
1814                         weight += v + 1;
1815                         break;
1816                 case iro_Id:
1817                         /* when looking backward we might find Id nodes */
1818                         weight += calc_method_local_weight(succ);
1819                         break;
1820                 case iro_Tuple:
1821                         /* unoptimized tuple */
1822                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1823                                 ir_node *pred = get_Tuple_pred(succ, j);
1824                                 if (pred == arg) {
1825                                         /* look for Proj(j) */
1826                                         for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
1827                                                 ir_node *succ_succ = get_irn_out(succ, k);
1828                                                 if (is_Proj(succ_succ)) {
1829                                                         if (get_Proj_proj(succ_succ) == j) {
1830                                                                 /* found */
1831                                                                 weight += calc_method_local_weight(succ_succ);
1832                                                         }
1833                                                 } else {
1834                                                         /* this should NOT happen */
1835                                                         return 0;
1836                                                 }
1837                                         }
1838                                 }
1839                         }
1840                         break;
1841                 default:
1842                         /* any other node: unsupported yet or bad. */
1843                         return 0;
1844                 }
1845         }
1846         return weight;
1847 }
1848
1849 /**
1850  * Calculate the parameter weights for transmitting the address of a local variable.
1851  */
1852 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg) {
1853         ir_entity *ent = get_irg_entity(irg);
1854         ir_type  *mtp;
1855         int      nparams, i, proj_nr;
1856         ir_node  *irg_args, *arg;
1857
1858         mtp      = get_entity_type(ent);
1859         nparams  = get_method_n_params(mtp);
1860
1861         /* allocate a new array. currently used as 'analysed' flag */
1862         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1863
1864         /* If the method haven't parameters we have nothing to do. */
1865         if (nparams <= 0)
1866                 return;
1867
1868         assure_irg_outs(irg);
1869         irg_args = get_irg_args(irg);
1870         for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
1871                 arg     = get_irn_out(irg_args, i);
1872                 proj_nr = get_Proj_proj(arg);
1873                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1874         }
1875 }
1876
1877 /**
1878  * Calculate the benefice for transmitting an local variable address.
1879  * After inlining, the local variable might be transformed into a
1880  * SSA variable by scalar_replacement().
1881  */
1882 static unsigned get_method_local_adress_weight(ir_graph *callee, int pos) {
1883         inline_irg_env *env = get_irg_link(callee);
1884
1885         if (env->local_weights != NULL) {
1886                 if (pos < ARR_LEN(env->local_weights))
1887                         return env->local_weights[pos];
1888                 return 0;
1889         }
1890
1891         analyze_irg_local_weights(env, callee);
1892
1893         if (pos < ARR_LEN(env->local_weights))
1894                 return env->local_weights[pos];
1895         return 0;
1896 }
1897
1898 /**
1899  * Calculate a benefice value for inlining the given call.
1900  *
1901  * @param call       the call node we have to inspect
1902  * @param callee     the called graph
1903  * @param local_adr  set after return if an address of a local variable is
1904  *                   transmitted as a parameter
1905  */
1906 static int calc_inline_benefice(ir_node *call, ir_graph *callee, unsigned *local_adr) {
1907         ir_entity *ent = get_irg_entity(callee);
1908         ir_node   *frame_ptr;
1909         ir_type   *mtp;
1910         int       weight = 0;
1911         int       i, n_params, all_const;
1912         unsigned  cc, v;
1913
1914         inline_irg_env *curr_env, *callee_env;
1915
1916         if (get_entity_additional_properties(ent) & mtp_property_noreturn) {
1917                 /* do NOT inline noreturn calls */
1918                 return INT_MIN;
1919         }
1920
1921         /* costs for every passed parameter */
1922         n_params = get_Call_n_params(call);
1923         mtp      = get_entity_type(ent);
1924         cc       = get_method_calling_convention(mtp);
1925         if (cc & cc_reg_param) {
1926                 /* register parameter, smaller costs for register parameters */
1927                 int max_regs = cc & ~cc_bits;
1928
1929                 if (max_regs < n_params)
1930                         weight += max_regs * 2 + (n_params - max_regs) * 5;
1931                 else
1932                         weight += n_params * 2;
1933         } else {
1934                 /* parameters are passed an stack */
1935                 weight += 5 * n_params;
1936         }
1937
1938         /* constant parameters improve the benefice */
1939         frame_ptr = get_irg_frame(current_ir_graph);
1940         all_const = 1;
1941         for (i = 0; i < n_params; ++i) {
1942                 ir_node *param = get_Call_param(call, i);
1943
1944                 if (is_Const(param)) {
1945                         weight += get_method_param_weight(ent, i);
1946                 } else {
1947                         all_const = 0;
1948                         if (is_SymConst(param))
1949                                 weight += get_method_param_weight(ent, i);
1950                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
1951                                 /*
1952                                  * An address of a local variable is transmitted. After inlining,
1953                                  * scalar_replacement might be able to remove the local variable,
1954                                  * so honor this.
1955                                  */
1956                                 v = get_method_local_adress_weight(callee, i);
1957                                 weight += v;
1958                                 if (v > 0)
1959                                         *local_adr = 1;
1960                         }
1961                 }
1962         }
1963
1964         callee_env = get_irg_link(callee);
1965         if (get_entity_visibility(ent) == visibility_local &&
1966             callee_env->n_callers_orig == 1 &&
1967             callee != current_ir_graph) {
1968                 /* we are the only caller, give big bonus */
1969                 weight += 5000;
1970         }
1971
1972         /* do not inline big functions */
1973         weight -= callee_env->n_nodes;
1974
1975         /* reduce the benefice if the current function is already big */
1976         curr_env = get_irg_link(current_ir_graph);
1977         weight -= curr_env->n_nodes / 50;
1978
1979         /* give a bonus for functions with one block */
1980         if (callee_env->n_blocks == 1)
1981                 weight = weight * 3 / 2;
1982
1983         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
1984         else if (callee_env->n_nodes < 20 && !callee_env->recursive)
1985                 weight += 5000;
1986
1987         /* and finally for leaves: they do not increase the register pressure
1988            because of callee safe registers */
1989         else if (callee_env->n_call_nodes == 0)
1990                 weight += 25;
1991
1992         /*
1993          * Reduce the weight for recursive function IFF not all arguments are const.
1994          * inlining recursive functions is rarely good.
1995          */
1996         if (callee_env->recursive && !all_const)
1997                 weight -= 500;
1998
1999         /*
2000          * All arguments constant is probably a good sign, give an extra bonus
2001          */
2002         if (all_const)
2003                 weight += 100;
2004
2005         return weight;
2006 }
2007
2008 /**
2009  * Heuristic inliner. Calculates a benefice value for every call and inlines
2010  * those calls with a value higher than the threshold.
2011  */
2012 void inline_functions(int maxsize, int inline_threshold) {
2013         inline_irg_env   *env;
2014         int              i, n_irgs;
2015         ir_graph         *rem;
2016         int              did_inline;
2017         wenv_t           wenv;
2018         call_entry       *curr_call, **last_call;
2019         const call_entry *centry;
2020         pmap             *copied_graphs;
2021         pmap_entry       *pm_entry;
2022
2023         rem = current_ir_graph;
2024         obstack_init(&temp_obst);
2025
2026         /* a map for the copied graphs, used to inline recursive calls */
2027         copied_graphs = pmap_create();
2028
2029         /* extend all irgs by a temporary data structure for inlining. */
2030         n_irgs = get_irp_n_irgs();
2031         for (i = 0; i < n_irgs; ++i)
2032                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
2033
2034         /* Precompute information in temporary data structure. */
2035         wenv.ignore_runtime = 0;
2036         wenv.ignore_callers = 0;
2037         for (i = 0; i < n_irgs; ++i) {
2038                 ir_graph *irg = get_irp_irg(i);
2039
2040                 assert(get_irg_phase_state(irg) != phase_building);
2041                 free_callee_info(irg);
2042
2043                 wenv.x         = get_irg_link(irg);
2044                 wenv.last_call = NULL;
2045                 assure_cf_loop(irg);
2046                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
2047         }
2048
2049         /* -- and now inline. -- */
2050         for (i = 0; i < n_irgs; ++i) {
2051                 int      phiproj_computed = 0;
2052                 ir_node  *call;
2053                 ir_graph *irg = get_irp_irg(i);
2054
2055                 current_ir_graph = irg;
2056                 env = get_irg_link(irg);
2057
2058                 /* note that the list of possible calls is updated during the process */
2059                 last_call = &env->call_head;
2060                 for (curr_call = env->call_head; curr_call != NULL;) {
2061                         ir_graph   *callee;
2062                         pmap_entry *e;
2063                         int        benefice;
2064                         unsigned   local_adr;
2065
2066                         if (env->n_nodes > maxsize) break;
2067
2068                         call   = curr_call->call;
2069                         callee = curr_call->callee;
2070
2071                         e = pmap_find(copied_graphs, callee);
2072                         if (e != NULL) {
2073                                 /*
2074                                 * Remap callee if we have a copy.
2075                                 * FIXME: Should we do this only for recursive Calls ?
2076                                 */
2077                                 callee = e->value;
2078                         }
2079
2080                         /* calculate the benefice on the original call to prevent excessive inlining */
2081                         local_adr = 0;
2082                         benefice = calc_inline_benefice(call, callee, &local_adr);
2083                         DB((dbg, LEVEL_2, "In %+F Call %+F has benefice %d\n", irg, callee, benefice));
2084
2085                         if (benefice > -inline_threshold ||
2086                                 (get_irg_inline_property(callee) >= irg_inline_forced)) {
2087                                 if (current_ir_graph == callee) {
2088                                         /*
2089                                          * Recursive call: we cannot directly inline because we cannot walk
2090                                          * the graph and change it. So we have to make a copy of the graph
2091                                          * first.
2092                                          */
2093
2094                                         inline_irg_env *callee_env;
2095                                         ir_graph       *copy;
2096
2097                                         /*
2098                                          * No copy yet, create one.
2099                                          * Note that recursive methods are never leaves, so it is sufficient
2100                                          * to test this condition here.
2101                                          */
2102                                         copy = create_irg_copy(callee);
2103
2104                                         /* create_irg_copy() destroys the Proj links, recompute them */
2105                                         phiproj_computed = 0;
2106
2107                                         /* allocate new environment */
2108                                         callee_env = alloc_inline_irg_env();
2109                                         set_irg_link(copy, callee_env);
2110
2111                                         assure_cf_loop(copy);
2112                                         wenv.x              = callee_env;
2113                                         wenv.ignore_callers = 1;
2114                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
2115
2116                                         /*
2117                                          * Enter the entity of the original graph. This is needed
2118                                          * for inline_method(). However, note that ent->irg still points
2119                                          * to callee, NOT to copy.
2120                                          */
2121                                         set_irg_entity(copy, get_irg_entity(callee));
2122
2123                                         pmap_insert(copied_graphs, callee, copy);
2124                                         callee = copy;
2125
2126                                         /* we have only one caller: the original graph */
2127                                         callee_env->n_callers      = 1;
2128                                         callee_env->n_callers_orig = 1;
2129                                 }
2130                                 if (! phiproj_computed) {
2131                                         phiproj_computed = 1;
2132                                         collect_phiprojs(current_ir_graph);
2133                                 }
2134                                 did_inline = inline_method(call, callee);
2135                                 if (did_inline) {
2136                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
2137
2138                                         /* was inlined, must be recomputed */
2139                                         phiproj_computed = 0;
2140
2141                                         /* after we have inlined callee, all called methods inside callee
2142                                         are now called once more */
2143                                         for (centry = callee_env->call_head; centry != NULL; centry = centry->next) {
2144                                                 inline_irg_env *penv = get_irg_link(centry->callee);
2145                                                 ++penv->n_callers;
2146                                         }
2147
2148                                         /* callee was inline. Append it's call list. */
2149                                         env->got_inline = 1;
2150                                         if (local_adr)
2151                                                 env->local_vars = 1;
2152                                         --env->n_call_nodes;
2153                                         curr_call = replace_entry_by_call_list(curr_call, callee_env->call_head);
2154                                         env->n_call_nodes += callee_env->n_call_nodes;
2155                                         env->n_nodes += callee_env->n_nodes;
2156                                         --callee_env->n_callers;
2157
2158                                         /* remove the current call entry from the list */
2159                                         *last_call = curr_call;
2160                                         continue;
2161                                 }
2162                         }
2163                         last_call = &curr_call->next;
2164                         curr_call = curr_call->next;
2165                 }
2166
2167         }
2168
2169         for (i = 0; i < n_irgs; ++i) {
2170                 ir_graph *irg = get_irp_irg(i);
2171
2172                 env = get_irg_link(irg);
2173                 if (env->got_inline) {
2174                         /* this irg got calls inlined: optimize it */
2175
2176                         /* scalar replacement does not work well with Tuple nodes, so optimize them away */
2177                         optimize_graph_df(irg);
2178
2179                         if (env->local_vars) {
2180                                 if (scalar_replacement_opt(irg)) {
2181                                         optimize_graph_df(irg);
2182                                 }
2183                         }
2184                         optimize_cf(irg);
2185                 }
2186                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
2187                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
2188                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
2189                         env->n_callers_orig, env->n_callers,
2190                         get_entity_name(get_irg_entity(irg))));
2191                 }
2192         }
2193
2194         /* kill the copied graphs: we don't need them anymore */
2195         foreach_pmap(copied_graphs, pm_entry) {
2196                 ir_graph *copy = pm_entry->value;
2197
2198                 /* reset the entity, otherwise it will be deleted in the next step ... */
2199                 set_irg_entity(copy, NULL);
2200                 free_ir_graph(copy);
2201         }
2202         pmap_destroy(copied_graphs);
2203
2204         obstack_free(&temp_obst, NULL);
2205         current_ir_graph = rem;
2206 }
2207
2208 void firm_init_inline(void) {
2209         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
2210 }