hooks are always enabled now -> one less option
[libfirm] / ir / opt / opt_inline.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief    Dead node elimination and Procedure Inlining.
23  * @author   Michael Beck, Goetz Lindenmaier
24  * @version  $Id$
25  */
26 #ifdef HAVE_CONFIG_H
27 # include "config.h"
28 #endif
29
30 #include <limits.h>
31 #include <assert.h>
32
33 #include "irnode_t.h"
34 #include "irgraph_t.h"
35 #include "irprog_t.h"
36
37 #include "iroptimize.h"
38 #include "ircons_t.h"
39 #include "iropt_t.h"
40 #include "irgopt.h"
41 #include "irgmod.h"
42 #include "irgwalk.h"
43
44 #include "array_t.h"
45 #include "list.h"
46 #include "pset.h"
47 #include "pmap.h"
48 #include "pdeq.h"
49 #include "xmalloc.h"
50 #include "pqueue.h"
51
52 #include "irouts.h"
53 #include "irloop_t.h"
54 #include "irbackedge_t.h"
55 #include "opt_inline_t.h"
56 #include "cgana.h"
57 #include "trouts.h"
58 #include "error.h"
59
60 #include "analyze_irg_args.h"
61 #include "iredges_t.h"
62 #include "irflag_t.h"
63 #include "irhooks.h"
64 #include "irtools.h"
65 #include "iropt_dbg.h"
66
67 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
68
69 /*------------------------------------------------------------------*/
70 /* Routines for dead node elimination / copying garbage collection  */
71 /* of the obstack.                                                  */
72 /*------------------------------------------------------------------*/
73
74 /**
75  * Remember the new node in the old node by using a field all nodes have.
76  */
77 #define set_new_node(oldn, newn)  set_irn_link(oldn, newn)
78
79 /**
80  * Get this new node, before the old node is forgotten.
81  */
82 #define get_new_node(oldn) get_irn_link(oldn)
83
84 /**
85  * Check if a new node was set.
86  */
87 #define has_new_node(n) (get_new_node(n) != NULL)
88
89 /**
90  * We use the block_visited flag to mark that we have computed the
91  * number of useful predecessors for this block.
92  * Further we encode the new arity in this flag in the old blocks.
93  * Remembering the arity is useful, as it saves a lot of pointer
94  * accesses.  This function is called for all Phi and Block nodes
95  * in a Block.
96  */
97 static INLINE int
98 compute_new_arity(ir_node *b) {
99         int i, res, irn_arity;
100         int irg_v, block_v;
101
102         irg_v = get_irg_block_visited(current_ir_graph);
103         block_v = get_Block_block_visited(b);
104         if (block_v >= irg_v) {
105                 /* we computed the number of preds for this block and saved it in the
106                    block_v flag */
107                 return block_v - irg_v;
108         } else {
109                 /* compute the number of good predecessors */
110                 res = irn_arity = get_irn_arity(b);
111                 for (i = 0; i < irn_arity; i++)
112                         if (is_Bad(get_irn_n(b, i))) res--;
113                         /* save it in the flag. */
114                         set_Block_block_visited(b, irg_v + res);
115                         return res;
116         }
117 }
118
119 /**
120  * Copies the node to the new obstack. The Ins of the new node point to
121  * the predecessors on the old obstack.  For block/phi nodes not all
122  * predecessors might be copied.  n->link points to the new node.
123  * For Phi and Block nodes the function allocates in-arrays with an arity
124  * only for useful predecessors.  The arity is determined by counting
125  * the non-bad predecessors of the block.
126  *
127  * @param n    The node to be copied
128  * @param env  if non-NULL, the node number attribute will be copied to the new node
129  *
130  * Note: Also used for loop unrolling.
131  */
132 static void copy_node(ir_node *n, void *env) {
133         ir_node *nn, *block;
134         int new_arity;
135         ir_op *op = get_irn_op(n);
136         (void) env;
137
138         if (op == op_Bad) {
139                 /* node copied already */
140                 return;
141         } else if (op == op_Block) {
142                 block = NULL;
143                 new_arity = compute_new_arity(n);
144                 n->attr.block.graph_arr = NULL;
145         } else {
146                 block = get_nodes_block(n);
147                 if (op == op_Phi) {
148                         new_arity = compute_new_arity(block);
149                 } else {
150                         new_arity = get_irn_arity(n);
151                 }
152         }
153         nn = new_ir_node(get_irn_dbg_info(n),
154                 current_ir_graph,
155                 block,
156                 op,
157                 get_irn_mode(n),
158                 new_arity,
159                 get_irn_in(n) + 1);
160         /* Copy the attributes.  These might point to additional data.  If this
161            was allocated on the old obstack the pointers now are dangling.  This
162            frees e.g. the memory of the graph_arr allocated in new_immBlock. */
163         if (op == op_Block) {
164                 /* we cannot allow blocks WITHOUT macroblock input */
165                 set_Block_MacroBlock(nn, get_Block_MacroBlock(n));
166         }
167         copy_node_attr(n, nn);
168
169 #ifdef DEBUG_libfirm
170         {
171                 int copy_node_nr = env != NULL;
172                 if (copy_node_nr) {
173                         /* for easier debugging, we want to copy the node numbers too */
174                         nn->node_nr = n->node_nr;
175                 }
176         }
177 #endif
178
179         set_new_node(n, nn);
180         hook_dead_node_elim_subst(current_ir_graph, n, nn);
181 }
182
183 /**
184  * Copies new predecessors of old node to new node remembered in link.
185  * Spare the Bad predecessors of Phi and Block nodes.
186  */
187 static void copy_preds(ir_node *n, void *env) {
188         ir_node *nn, *block;
189         int i, j, irn_arity;
190         (void) env;
191
192         nn = get_new_node(n);
193
194         if (is_Block(n)) {
195                 /* copy the macro block header */
196                 ir_node *mbh = get_Block_MacroBlock(n);
197
198                 if (mbh == n) {
199                         /* this block is a macroblock header */
200                         set_Block_MacroBlock(nn, nn);
201                 } else {
202                         /* get the macro block header */
203                         ir_node *nmbh = get_new_node(mbh);
204                         assert(nmbh != NULL);
205                         set_Block_MacroBlock(nn, nmbh);
206                 }
207
208                 /* Don't copy Bad nodes. */
209                 j = 0;
210                 irn_arity = get_irn_arity(n);
211                 for (i = 0; i < irn_arity; i++) {
212                         if (! is_Bad(get_irn_n(n, i))) {
213                                 ir_node *pred = get_irn_n(n, i);
214                                 set_irn_n(nn, j, get_new_node(pred));
215                                 j++;
216                         }
217                 }
218                 /* repair the block visited flag from above misuse. Repair it in both
219                    graphs so that the old one can still be used. */
220                 set_Block_block_visited(nn, 0);
221                 set_Block_block_visited(n, 0);
222                 /* Local optimization could not merge two subsequent blocks if
223                    in array contained Bads.  Now it's possible.
224                    We don't call optimize_in_place as it requires
225                    that the fields in ir_graph are set properly. */
226                 if ((get_opt_control_flow_straightening()) &&
227                         (get_Block_n_cfgpreds(nn) == 1) &&
228                         is_Jmp(get_Block_cfgpred(nn, 0))) {
229                         ir_node *old = get_nodes_block(get_Block_cfgpred(nn, 0));
230                         if (nn == old) {
231                                 /* Jmp jumps into the block it is in -- deal self cycle. */
232                                 assert(is_Bad(get_new_node(get_irg_bad(current_ir_graph))));
233                                 exchange(nn, get_new_node(get_irg_bad(current_ir_graph)));
234                         } else {
235                                 exchange(nn, old);
236                         }
237                 }
238         } else if (is_Phi(n) && get_irn_arity(n) > 0) {
239                 /* Don't copy node if corresponding predecessor in block is Bad.
240                    The Block itself should not be Bad. */
241                 block = get_nodes_block(n);
242                 set_nodes_block(nn, get_new_node(block));
243                 j = 0;
244                 irn_arity = get_irn_arity(n);
245                 for (i = 0; i < irn_arity; i++) {
246                         if (! is_Bad(get_irn_n(block, i))) {
247                                 ir_node *pred = get_irn_n(n, i);
248                                 set_irn_n(nn, j, get_new_node(pred));
249                                 /*if (is_backedge(n, i)) set_backedge(nn, j);*/
250                                 j++;
251                         }
252                 }
253                 /* If the pre walker reached this Phi after the post walker visited the
254                    block block_visited is > 0. */
255                 set_Block_block_visited(get_nodes_block(n), 0);
256                 /* Compacting the Phi's ins might generate Phis with only one
257                    predecessor. */
258                 if (get_irn_arity(nn) == 1)
259                         exchange(nn, get_irn_n(nn, 0));
260         } else {
261                 irn_arity = get_irn_arity(n);
262                 for (i = -1; i < irn_arity; i++)
263                         set_irn_n(nn, i, get_new_node(get_irn_n(n, i)));
264         }
265         /* Now the new node is complete.  We can add it to the hash table for CSE.
266            @@@ inlining aborts if we identify End. Why? */
267         if (!is_End(nn))
268                 add_identities(current_ir_graph->value_table, nn);
269 }
270
271 /**
272  * Copies the graph recursively, compacts the keep-alives of the end node.
273  *
274  * @param irg           the graph to be copied
275  * @param copy_node_nr  If non-zero, the node number will be copied
276  */
277 static void copy_graph(ir_graph *irg, int copy_node_nr) {
278         ir_node *oe, *ne, *ob, *nb, *om, *nm; /* old end, new end, old bad, new bad, old NoMem, new NoMem */
279         ir_node *ka;      /* keep alive */
280         int i, irn_arity;
281         unsigned long vfl;
282
283         /* Some nodes must be copied by hand, sigh */
284         vfl = get_irg_visited(irg);
285         set_irg_visited(irg, vfl + 1);
286
287         oe = get_irg_end(irg);
288         mark_irn_visited(oe);
289         /* copy the end node by hand, allocate dynamic in array! */
290         ne = new_ir_node(get_irn_dbg_info(oe),
291                 irg,
292                 NULL,
293                 op_End,
294                 mode_X,
295                 -1,
296                 NULL);
297         /* Copy the attributes.  Well, there might be some in the future... */
298         copy_node_attr(oe, ne);
299         set_new_node(oe, ne);
300
301         /* copy the Bad node */
302         ob = get_irg_bad(irg);
303         mark_irn_visited(ob);
304         nb = new_ir_node(get_irn_dbg_info(ob),
305                 irg,
306                 NULL,
307                 op_Bad,
308                 mode_T,
309                 0,
310                 NULL);
311         copy_node_attr(ob, nb);
312         set_new_node(ob, nb);
313
314         /* copy the NoMem node */
315         om = get_irg_no_mem(irg);
316         mark_irn_visited(om);
317         nm = new_ir_node(get_irn_dbg_info(om),
318                 irg,
319                 NULL,
320                 op_NoMem,
321                 mode_M,
322                 0,
323                 NULL);
324         copy_node_attr(om, nm);
325         set_new_node(om, nm);
326
327         /* copy the live nodes */
328         set_irg_visited(irg, vfl);
329         irg_walk(get_nodes_block(oe), copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
330
331         /* Note: from yet, the visited flag of the graph is equal to vfl + 1 */
332
333         /* visit the anchors as well */
334         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
335                 ir_node *n = get_irg_anchor(irg, i);
336
337                 if (n && (get_irn_visited(n) <= vfl)) {
338                         set_irg_visited(irg, vfl);
339                         irg_walk(n, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
340                 }
341         }
342
343         /* copy_preds for the end node ... */
344         set_nodes_block(ne, get_new_node(get_nodes_block(oe)));
345
346         /*- ... and now the keep alives. -*/
347         /* First pick the not marked block nodes and walk them.  We must pick these
348            first as else we will oversee blocks reachable from Phis. */
349         irn_arity = get_End_n_keepalives(oe);
350         for (i = 0; i < irn_arity; i++) {
351                 ka = get_End_keepalive(oe, i);
352                 if (is_Block(ka)) {
353                         if (get_irn_visited(ka) <= vfl) {
354                                 /* We must keep the block alive and copy everything reachable */
355                                 set_irg_visited(irg, vfl);
356                                 irg_walk(ka, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
357                         }
358                         add_End_keepalive(ne, get_new_node(ka));
359                 }
360         }
361
362         /* Now pick other nodes.  Here we will keep all! */
363         irn_arity = get_End_n_keepalives(oe);
364         for (i = 0; i < irn_arity; i++) {
365                 ka = get_End_keepalive(oe, i);
366                 if (!is_Block(ka)) {
367                         if (get_irn_visited(ka) <= vfl) {
368                                 /* We didn't copy the node yet.  */
369                                 set_irg_visited(irg, vfl);
370                                 irg_walk(ka, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
371                         }
372                         add_End_keepalive(ne, get_new_node(ka));
373                 }
374         }
375
376         /* start block sometimes only reached after keep alives */
377         set_nodes_block(nb, get_new_node(get_nodes_block(ob)));
378         set_nodes_block(nm, get_new_node(get_nodes_block(om)));
379 }
380
381 /**
382  * Copies the graph reachable from current_ir_graph->end to the obstack
383  * in current_ir_graph and fixes the environment.
384  * Then fixes the fields in current_ir_graph containing nodes of the
385  * graph.
386  *
387  * @param copy_node_nr  If non-zero, the node number will be copied
388  */
389 static void
390 copy_graph_env(int copy_node_nr) {
391         ir_graph *irg = current_ir_graph;
392         ir_node *old_end, *new_anchor;
393         int i;
394
395         /* remove end_except and end_reg nodes */
396         old_end = get_irg_end(irg);
397         set_irg_end_except (irg, old_end);
398         set_irg_end_reg    (irg, old_end);
399
400         /* Not all nodes remembered in irg might be reachable
401            from the end node.  Assure their link is set to NULL, so that
402            we can test whether new nodes have been computed. */
403         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
404                 ir_node *n = get_irg_anchor(irg, i);
405                 if (n != NULL)
406                         set_new_node(n, NULL);
407         }
408         /* we use the block walk flag for removing Bads from Blocks ins. */
409         inc_irg_block_visited(irg);
410
411         /* copy the graph */
412         copy_graph(irg, copy_node_nr);
413
414         /* fix the anchor */
415         old_end    = get_irg_end(irg);
416         new_anchor = new_Anchor(irg);
417
418         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
419                 ir_node *n = get_irg_anchor(irg, i);
420                 if (n)
421                         set_irn_n(new_anchor, i, get_new_node(n));
422         }
423         free_End(old_end);
424         irg->anchor = new_anchor;
425
426         /* ensure the new anchor is placed in the endblock */
427         set_nodes_block(new_anchor, get_irg_end_block(irg));
428 }
429
430 /**
431  * Copies all reachable nodes to a new obstack.  Removes bad inputs
432  * from block nodes and the corresponding inputs from Phi nodes.
433  * Merges single exit blocks with single entry blocks and removes
434  * 1-input Phis.
435  * Adds all new nodes to a new hash table for CSE.  Does not
436  * perform CSE, so the hash table might contain common subexpressions.
437  */
438 void dead_node_elimination(ir_graph *irg) {
439         ir_graph *rem;
440 #ifdef INTERPROCEDURAL_VIEW
441         int rem_ipview = get_interprocedural_view();
442 #endif
443         struct obstack *graveyard_obst = NULL;
444         struct obstack *rebirth_obst   = NULL;
445
446         edges_deactivate(irg);
447
448         /* inform statistics that we started a dead-node elimination run */
449         hook_dead_node_elim(irg, 1);
450
451         /* Remember external state of current_ir_graph. */
452         rem = current_ir_graph;
453         current_ir_graph = irg;
454 #ifdef INTERPROCEDURAL_VIEW
455         set_interprocedural_view(0);
456 #endif
457
458         assert(get_irg_phase_state(irg) != phase_building);
459
460         /* Handle graph state */
461         free_callee_info(irg);
462         free_irg_outs(irg);
463         free_trouts();
464
465         /* @@@ so far we loose loops when copying */
466         free_loop_information(irg);
467
468         set_irg_doms_inconsistent(irg);
469
470         /* A quiet place, where the old obstack can rest in peace,
471            until it will be cremated. */
472         graveyard_obst = irg->obst;
473
474         /* A new obstack, where the reachable nodes will be copied to. */
475         rebirth_obst = XMALLOC(struct obstack);
476         irg->obst = rebirth_obst;
477         obstack_init(irg->obst);
478         irg->last_node_idx = 0;
479
480         /* We also need a new value table for CSE */
481         del_identities(irg->value_table);
482         irg->value_table = new_identities();
483
484         /* Copy the graph from the old to the new obstack */
485         copy_graph_env(/*copy_node_nr=*/1);
486
487         /* Free memory from old unoptimized obstack */
488         obstack_free(graveyard_obst, 0);  /* First empty the obstack ... */
489         xfree(graveyard_obst);            /* ... then free it.           */
490
491         /* inform statistics that the run is over */
492         hook_dead_node_elim(irg, 0);
493
494         current_ir_graph = rem;
495 #ifdef INTERPROCEDURAL_VIEW
496         set_interprocedural_view(rem_ipview);
497 #endif
498 }
499
500 /**
501  * Relink bad predecessors of a block and store the old in array to the
502  * link field. This function is called by relink_bad_predecessors().
503  * The array of link field starts with the block operand at position 0.
504  * If block has bad predecessors, create a new in array without bad preds.
505  * Otherwise let in array untouched.
506  */
507 static void relink_bad_block_predecessors(ir_node *n, void *env) {
508         ir_node **new_in, *irn;
509         int i, new_irn_n, old_irn_arity, new_irn_arity = 0;
510         (void) env;
511
512         /* if link field of block is NULL, look for bad predecessors otherwise
513            this is already done */
514         if (is_Block(n) && get_irn_link(n) == NULL) {
515                 /* save old predecessors in link field (position 0 is the block operand)*/
516                 set_irn_link(n, get_irn_in(n));
517
518                 /* count predecessors without bad nodes */
519                 old_irn_arity = get_irn_arity(n);
520                 for (i = 0; i < old_irn_arity; i++)
521                         if (!is_Bad(get_irn_n(n, i)))
522                                 ++new_irn_arity;
523
524                 /* arity changing: set new predecessors without bad nodes */
525                 if (new_irn_arity < old_irn_arity) {
526                         /* Get new predecessor array. We do not resize the array, as we must
527                            keep the old one to update Phis. */
528                         new_in = NEW_ARR_D(ir_node *, current_ir_graph->obst, (new_irn_arity+1));
529
530                         /* set new predecessors in array */
531                         new_in[0] = NULL;
532                         new_irn_n = 1;
533                         for (i = 0; i < old_irn_arity; i++) {
534                                 irn = get_irn_n(n, i);
535                                 if (!is_Bad(irn)) {
536                                         new_in[new_irn_n] = irn;
537                                         is_backedge(n, i) ? set_backedge(n, new_irn_n-1) : set_not_backedge(n, new_irn_n-1);
538                                         ++new_irn_n;
539                                 }
540                         }
541                         /* ARR_SETLEN(int, n->attr.block.backedge, new_irn_arity); */
542                         ARR_SHRINKLEN(n->attr.block.backedge, new_irn_arity);
543                         n->in = new_in;
544                 } /* ir node has bad predecessors */
545         } /* Block is not relinked */
546 }
547
548 /**
549  * Relinks Bad predecessors from Blocks and Phis called by walker
550  * remove_bad_predecesors(). If n is a Block, call
551  * relink_bad_block_redecessors(). If n is a Phi-node, call also the relinking
552  * function of Phi's Block. If this block has bad predecessors, relink preds
553  * of the Phi-node.
554  */
555 static void relink_bad_predecessors(ir_node *n, void *env) {
556         ir_node *block, **old_in;
557         int i, old_irn_arity, new_irn_arity;
558
559         /* relink bad predecessors of a block */
560         if (is_Block(n))
561                 relink_bad_block_predecessors(n, env);
562
563         /* If Phi node relink its block and its predecessors */
564         if (is_Phi(n)) {
565                 /* Relink predecessors of phi's block */
566                 block = get_nodes_block(n);
567                 if (get_irn_link(block) == NULL)
568                         relink_bad_block_predecessors(block, env);
569
570                 old_in = (ir_node **)get_irn_link(block); /* Of Phi's Block */
571                 old_irn_arity = ARR_LEN(old_in);
572
573                 /* Relink Phi predecessors if count of predecessors changed */
574                 if (old_irn_arity != ARR_LEN(get_irn_in(block))) {
575                         /* set new predecessors in array
576                            n->in[0] remains the same block */
577                         new_irn_arity = 1;
578                         for(i = 1; i < old_irn_arity; i++)
579                                 if (!is_Bad(old_in[i])) {
580                                         n->in[new_irn_arity] = n->in[i];
581                                         is_backedge(n, i) ? set_backedge(n, new_irn_arity) : set_not_backedge(n, new_irn_arity);
582                                         ++new_irn_arity;
583                                 }
584
585                                 ARR_SETLEN(ir_node *, n->in, new_irn_arity);
586                                 ARR_SETLEN(int, n->attr.phi.u.backedge, new_irn_arity);
587                 }
588         } /* n is a Phi node */
589 }
590
591 /*
592  * Removes Bad Bad predecessors from Blocks and the corresponding
593  * inputs to Phi nodes as in dead_node_elimination but without
594  * copying the graph.
595  * On walking up set the link field to NULL, on walking down call
596  * relink_bad_predecessors() (This function stores the old in array
597  * to the link field and sets a new in array if arity of predecessors
598  * changes).
599  */
600 void remove_bad_predecessors(ir_graph *irg) {
601         panic("Fix backedge handling first");
602         irg_walk_graph(irg, firm_clear_link, relink_bad_predecessors, NULL);
603 }
604
605
606 /*
607    __                      _  __ __
608   (_     __    o     _    | \/  |_
609   __)|_| | \_/ | \_/(/_   |_/\__|__
610
611   The following stuff implements a facility that automatically patches
612   registered ir_node pointers to the new node when a dead node elimination occurs.
613 */
614
615 struct _survive_dce_t {
616         struct obstack obst;
617         pmap *places;
618         pmap *new_places;
619         hook_entry_t dead_node_elim;
620         hook_entry_t dead_node_elim_subst;
621 };
622
623 typedef struct _survive_dce_list_t {
624         struct _survive_dce_list_t *next;
625         ir_node **place;
626 } survive_dce_list_t;
627
628 static void dead_node_hook(void *context, ir_graph *irg, int start) {
629         survive_dce_t *sd = context;
630         (void) irg;
631
632         /* Create a new map before the dead node elimination is performed. */
633         if (start) {
634                 sd->new_places = pmap_create_ex(pmap_count(sd->places));
635         } else {
636                 /* Patch back all nodes if dead node elimination is over and something is to be done. */
637                 pmap_destroy(sd->places);
638                 sd->places     = sd->new_places;
639                 sd->new_places = NULL;
640         }
641 }
642
643 /**
644  * Hook called when dead node elimination replaces old by nw.
645  */
646 static void dead_node_subst_hook(void *context, ir_graph *irg, ir_node *old, ir_node *nw) {
647         survive_dce_t *sd = context;
648         survive_dce_list_t *list = pmap_get(sd->places, old);
649         (void) irg;
650
651         /* If the node is to be patched back, write the new address to all registered locations. */
652         if (list) {
653                 survive_dce_list_t *p;
654
655                 for (p = list; p; p = p->next)
656                         *(p->place) = nw;
657
658                 pmap_insert(sd->new_places, nw, list);
659         }
660 }
661
662 /**
663  * Make a new Survive DCE environment.
664  */
665 survive_dce_t *new_survive_dce(void) {
666         survive_dce_t *res = XMALLOC(survive_dce_t);
667         obstack_init(&res->obst);
668         res->places     = pmap_create();
669         res->new_places = NULL;
670
671         res->dead_node_elim.hook._hook_dead_node_elim = dead_node_hook;
672         res->dead_node_elim.context                   = res;
673         res->dead_node_elim.next                      = NULL;
674
675         res->dead_node_elim_subst.hook._hook_dead_node_elim_subst = dead_node_subst_hook;
676         res->dead_node_elim_subst.context = res;
677         res->dead_node_elim_subst.next    = NULL;
678
679         register_hook(hook_dead_node_elim, &res->dead_node_elim);
680         register_hook(hook_dead_node_elim_subst, &res->dead_node_elim_subst);
681         return res;
682 }
683
684 /**
685  * Free a Survive DCE environment.
686  */
687 void free_survive_dce(survive_dce_t *sd) {
688         obstack_free(&sd->obst, NULL);
689         pmap_destroy(sd->places);
690         unregister_hook(hook_dead_node_elim, &sd->dead_node_elim);
691         unregister_hook(hook_dead_node_elim_subst, &sd->dead_node_elim_subst);
692         xfree(sd);
693 }
694
695 /**
696  * Register a node pointer to be patched upon DCE.
697  * When DCE occurs, the node pointer specified by @p place will be
698  * patched to the new address of the node it is pointing to.
699  *
700  * @param sd    The Survive DCE environment.
701  * @param place The address of the node pointer.
702  */
703 void survive_dce_register_irn(survive_dce_t *sd, ir_node **place) {
704         if (*place != NULL) {
705                 ir_node *irn      = *place;
706                 survive_dce_list_t *curr = pmap_get(sd->places, irn);
707                 survive_dce_list_t *nw   = obstack_alloc(&sd->obst, sizeof(nw[0]));
708
709                 nw->next  = curr;
710                 nw->place = place;
711
712                 pmap_insert(sd->places, irn, nw);
713         }
714 }
715
716 /*--------------------------------------------------------------------*/
717 /*  Functionality for inlining                                         */
718 /*--------------------------------------------------------------------*/
719
720 /**
721  * Copy node for inlineing.  Updates attributes that change when
722  * inlineing but not for dead node elimination.
723  *
724  * Copies the node by calling copy_node() and then updates the entity if
725  * it's a local one.  env must be a pointer of the frame type of the
726  * inlined procedure. The new entities must be in the link field of
727  * the entities.
728  */
729 static void copy_node_inline(ir_node *n, void *env) {
730         ir_node *nn;
731         ir_type *frame_tp = (ir_type *)env;
732
733         copy_node(n, NULL);
734         if (is_Sel(n)) {
735                 nn = get_new_node (n);
736                 assert(is_Sel(nn));
737                 if (get_entity_owner(get_Sel_entity(n)) == frame_tp) {
738                         set_Sel_entity(nn, get_entity_link(get_Sel_entity(n)));
739                 }
740         } else if (is_Block(n)) {
741                 nn = get_new_node (n);
742                 nn->attr.block.irg = current_ir_graph;
743         }
744 }
745
746 /**
747  * Copies new predecessors of old node and move constants to
748  * the Start Block.
749  */
750 static void copy_preds_inline(ir_node *n, void *env) {
751         ir_node *nn;
752
753         copy_preds(n, env);
754         nn = skip_Id(get_new_node(n));
755         if (is_irn_constlike(nn)) {
756                 /* move Constants into the start block */
757                 set_nodes_block(nn, get_irg_start_block(current_ir_graph));
758
759                 n = identify_remember(current_ir_graph->value_table, nn);
760                 if (nn != n) {
761                         DBG_OPT_CSE(nn, n);
762                         exchange(nn, n);
763                 }
764         }
765 }
766
767 /**
768  * Walker: checks if P_value_arg_base is used.
769  */
770 static void find_addr(ir_node *node, void *env) {
771         int *allow_inline = env;
772         if (is_Proj(node) &&
773                         is_Start(get_Proj_pred(node)) &&
774                         get_Proj_proj(node) == pn_Start_P_value_arg_base) {
775                 *allow_inline = 0;
776         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
777                 /* From GCC:
778                  * Refuse to inline alloca call unless user explicitly forced so as this
779                  * may change program's memory overhead drastically when the function
780                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
781                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
782                  *
783                  * Sorrily this is true with our implementation also.
784                  * Moreover, we cannot differentiate between alloca() and VLA yet, so this
785                  * disables inlining of functions using VLA (with are completely save).
786                  *
787                  * 2 Solutions:
788                  * - add a flag to the Alloc node for "real" alloca() calls
789                  * - add a new Stack-Restore node at the end of a function using alloca()
790                  */
791                 *allow_inline = 0;
792         }
793 }
794
795 /**
796  * Check if we can inline a given call.
797  * Currently, we cannot inline two cases:
798  * - call with compound arguments
799  * - graphs that take the address of a parameter
800  *
801  * check these conditions here
802  */
803 static int can_inline(ir_node *call, ir_graph *called_graph) {
804         ir_type *call_type = get_Call_type(call);
805         int params, ress, i, res;
806         assert(is_Method_type(call_type));
807
808         params = get_method_n_params(call_type);
809         ress   = get_method_n_ress(call_type);
810
811         /* check parameters for compound arguments */
812         for (i = 0; i < params; ++i) {
813                 ir_type *p_type = get_method_param_type(call_type, i);
814
815                 if (is_compound_type(p_type))
816                         return 0;
817         }
818
819         /* check results for compound arguments */
820         for (i = 0; i < ress; ++i) {
821                 ir_type *r_type = get_method_res_type(call_type, i);
822
823                 if (is_compound_type(r_type))
824                         return 0;
825         }
826
827         res = 1;
828         irg_walk_graph(called_graph, find_addr, NULL, &res);
829
830         return res;
831 }
832
833 enum exc_mode {
834         exc_handler    = 0, /**< There is a handler. */
835         exc_to_end     = 1, /**< Branches to End. */
836         exc_no_handler = 2  /**< Exception handling not represented. */
837 };
838
839 /* Inlines a method at the given call site. */
840 int inline_method(ir_node *call, ir_graph *called_graph) {
841         ir_node             *pre_call;
842         ir_node             *post_call, *post_bl;
843         ir_node             *in[pn_Start_max];
844         ir_node             *end, *end_bl, *block;
845         ir_node             **res_pred;
846         ir_node             **cf_pred;
847         ir_node             **args_in;
848         ir_node             *ret, *phi;
849         int                 arity, n_ret, n_exc, n_res, i, n, j, rem_opt, irn_arity, n_params;
850         enum exc_mode       exc_handling;
851         ir_type             *called_frame, *curr_frame, *mtp, *ctp;
852         ir_entity           *ent;
853         ir_graph            *rem, *irg;
854         irg_inline_property prop = get_irg_inline_property(called_graph);
855         unsigned long       visited;
856
857         if (prop == irg_inline_forbidden)
858                 return 0;
859
860         ent = get_irg_entity(called_graph);
861
862         mtp = get_entity_type(ent);
863         ctp = get_Call_type(call);
864         if (get_method_n_params(mtp) > get_method_n_params(ctp)) {
865                 /* this is a bad feature of C: without a prototype, we can can call a function with less
866                 parameters than needed. Currently we don't support this, although it would be
867                 to use Unknown than. */
868                 return 0;
869         }
870
871         /* Argh, compiling C has some bad consequences:
872            the call type AND the method type might be different.
873            It is implementation defendant what happens in that case.
874            We support inlining, if the bitsize of the types matches AND
875            the same arithmetic is used. */
876         n_params = get_method_n_params(mtp);
877         for (i = n_params - 1; i >= 0; --i) {
878                 ir_type *param_tp = get_method_param_type(mtp, i);
879                 ir_type *arg_tp   = get_method_param_type(ctp, i);
880
881                 if (param_tp != arg_tp) {
882                         ir_mode *pmode = get_type_mode(param_tp);
883                         ir_mode *amode = get_type_mode(arg_tp);
884
885                         if (pmode == NULL || amode == NULL)
886                                 return 0;
887                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
888                                 return 0;
889                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
890                                 return 0;
891                         /* otherwise we can simply "reinterpret" the bits */
892                 }
893         }
894
895         irg = get_irn_irg(call);
896
897         /*
898          * We cannot inline a recursive call. The graph must be copied before
899          * the call the inline_method() using create_irg_copy().
900          */
901         if (called_graph == irg)
902                 return 0;
903
904         /*
905          * currently, we cannot inline two cases:
906          * - call with compound arguments
907          * - graphs that take the address of a parameter
908          */
909         if (! can_inline(call, called_graph))
910                 return 0;
911
912         rem = current_ir_graph;
913         current_ir_graph = irg;
914
915         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
916
917         /* --  Turn off optimizations, this can cause problems when allocating new nodes. -- */
918         rem_opt = get_opt_optimize();
919         set_optimize(0);
920
921         /* Handle graph state */
922         assert(get_irg_phase_state(irg) != phase_building);
923         assert(get_irg_pinned(irg) == op_pin_state_pinned);
924         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
925         set_irg_outs_inconsistent(irg);
926         set_irg_extblk_inconsistent(irg);
927         set_irg_doms_inconsistent(irg);
928         set_irg_loopinfo_inconsistent(irg);
929         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
930
931         /* -- Check preconditions -- */
932         assert(is_Call(call));
933
934         /* here we know we WILL inline, so inform the statistics */
935         hook_inline(call, called_graph);
936
937         /* -- Decide how to handle exception control flow: Is there a handler
938            for the Call node, or do we branch directly to End on an exception?
939            exc_handling:
940            0 There is a handler.
941            1 Branches to End.
942            2 Exception handling not represented in Firm. -- */
943         {
944                 ir_node *proj, *Mproj = NULL, *Xproj = NULL;
945                 for (proj = get_irn_link(call); proj; proj = get_irn_link(proj)) {
946                         long proj_nr = get_Proj_proj(proj);
947                         if (proj_nr == pn_Call_X_except) Xproj = proj;
948                         if (proj_nr == pn_Call_M_except) Mproj = proj;
949                 }
950                 if      (Mproj) { assert(Xproj); exc_handling = exc_handler; } /*  Mproj           */
951                 else if (Xproj) {                exc_handling = exc_to_end; } /* !Mproj &&  Xproj   */
952                 else            {                exc_handling = exc_no_handler; } /* !Mproj && !Xproj   */
953         }
954
955         /* create the argument tuple */
956         NEW_ARR_A(ir_type *, args_in, n_params);
957
958         block = get_nodes_block(call);
959         for (i = n_params - 1; i >= 0; --i) {
960                 ir_node *arg      = get_Call_param(call, i);
961                 ir_type *param_tp = get_method_param_type(mtp, i);
962                 ir_mode *mode     = get_type_mode(param_tp);
963
964                 if (mode != get_irn_mode(arg)) {
965                         arg = new_r_Conv(irg, block, arg, mode);
966                 }
967                 args_in[i] = arg;
968         }
969
970         /* --
971            the procedure and later replaces the Start node of the called graph.
972            Post_call is the old Call node and collects the results of the called
973            graph. Both will end up being a tuple.  -- */
974         post_bl = get_nodes_block(call);
975         set_irg_current_block(irg, post_bl);
976         /* XxMxPxPxPxT of Start + parameter of Call */
977         in[pn_Start_X_initial_exec]   = new_Jmp();
978         in[pn_Start_M]                = get_Call_mem(call);
979         in[pn_Start_P_frame_base]     = get_irg_frame(irg);
980         in[pn_Start_P_tls]            = get_irg_tls(irg);
981         in[pn_Start_T_args]           = new_Tuple(n_params, args_in);
982         /* in[pn_Start_P_value_arg_base] = ??? */
983         assert(pn_Start_P_value_arg_base == pn_Start_max - 1 && "pn_Start_P_value_arg_base not supported, fix");
984         pre_call = new_Tuple(pn_Start_max - 1, in);
985         post_call = call;
986
987         /* --
988            The new block gets the ins of the old block, pre_call and all its
989            predecessors and all Phi nodes. -- */
990         part_block(pre_call);
991
992         /* -- Prepare state for dead node elimination -- */
993         /* Visited flags in calling irg must be >= flag in called irg.
994            Else walker and arity computation will not work. */
995         if (get_irg_visited(irg) <= get_irg_visited(called_graph))
996                 set_irg_visited(irg, get_irg_visited(called_graph) + 1);
997         if (get_irg_block_visited(irg) < get_irg_block_visited(called_graph))
998                 set_irg_block_visited(irg, get_irg_block_visited(called_graph));
999         visited = get_irg_visited(irg);
1000
1001         /* Set pre_call as new Start node in link field of the start node of
1002            calling graph and pre_calls block as new block for the start block
1003            of calling graph.
1004            Further mark these nodes so that they are not visited by the
1005            copying. */
1006         set_irn_link(get_irg_start(called_graph), pre_call);
1007         set_irn_visited(get_irg_start(called_graph), visited);
1008         set_irn_link(get_irg_start_block(called_graph), get_nodes_block(pre_call));
1009         set_irn_visited(get_irg_start_block(called_graph), visited);
1010
1011         set_irn_link(get_irg_bad(called_graph), get_irg_bad(current_ir_graph));
1012         set_irn_visited(get_irg_bad(called_graph), visited);
1013
1014         set_irn_link(get_irg_no_mem(called_graph), get_irg_no_mem(current_ir_graph));
1015         set_irn_visited(get_irg_no_mem(called_graph), visited);
1016
1017         /* Initialize for compaction of in arrays */
1018         inc_irg_block_visited(irg);
1019
1020         /* -- Replicate local entities of the called_graph -- */
1021         /* copy the entities. */
1022         called_frame = get_irg_frame_type(called_graph);
1023         curr_frame   = get_irg_frame_type(irg);
1024         for (i = 0, n = get_class_n_members(called_frame); i < n; ++i) {
1025                 ir_entity *new_ent, *old_ent;
1026                 old_ent = get_class_member(called_frame, i);
1027                 new_ent = copy_entity_own(old_ent, curr_frame);
1028                 set_entity_link(old_ent, new_ent);
1029         }
1030
1031         /* visited is > than that of called graph.  With this trick visited will
1032            remain unchanged so that an outer walker, e.g., searching the call nodes
1033             to inline, calling this inline will not visit the inlined nodes. */
1034         set_irg_visited(irg, get_irg_visited(irg)-1);
1035
1036         /* -- Performing dead node elimination inlines the graph -- */
1037         /* Copies the nodes to the obstack of current_ir_graph. Updates links to new
1038            entities. */
1039         irg_walk(get_irg_end(called_graph), copy_node_inline, copy_preds_inline,
1040                  get_irg_frame_type(called_graph));
1041
1042         /* Repair called_graph */
1043         set_irg_visited(called_graph, get_irg_visited(irg));
1044         set_irg_block_visited(called_graph, get_irg_block_visited(irg));
1045         set_Block_block_visited(get_irg_start_block(called_graph), 0);
1046
1047         /* -- Merge the end of the inlined procedure with the call site -- */
1048         /* We will turn the old Call node into a Tuple with the following
1049            predecessors:
1050            -1:  Block of Tuple.
1051            0: Phi of all Memories of Return statements.
1052            1: Jmp from new Block that merges the control flow from all exception
1053            predecessors of the old end block.
1054            2: Tuple of all arguments.
1055            3: Phi of Exception memories.
1056            In case the old Call directly branches to End on an exception we don't
1057            need the block merging all exceptions nor the Phi of the exception
1058            memories.
1059         */
1060
1061         /* -- Precompute some values -- */
1062         end_bl = get_new_node(get_irg_end_block(called_graph));
1063         end = get_new_node(get_irg_end(called_graph));
1064         arity = get_Block_n_cfgpreds(end_bl);    /* arity = n_exc + n_ret  */
1065         n_res = get_method_n_ress(get_Call_type(call));
1066
1067         res_pred = XMALLOCN(ir_node*, n_res);
1068         cf_pred  = XMALLOCN(ir_node*, arity);
1069
1070         set_irg_current_block(irg, post_bl); /* just to make sure */
1071
1072         /* -- archive keepalives -- */
1073         irn_arity = get_irn_arity(end);
1074         for (i = 0; i < irn_arity; i++) {
1075                 ir_node *ka = get_End_keepalive(end, i);
1076                 if (! is_Bad(ka))
1077                         add_End_keepalive(get_irg_end(irg), ka);
1078         }
1079
1080         /* The new end node will die.  We need not free as the in array is on the obstack:
1081            copy_node() only generated 'D' arrays. */
1082
1083         /* -- Replace Return nodes by Jump nodes. -- */
1084         n_ret = 0;
1085         for (i = 0; i < arity; i++) {
1086                 ir_node *ret;
1087                 ret = get_Block_cfgpred(end_bl, i);
1088                 if (is_Return(ret)) {
1089                         cf_pred[n_ret] = new_r_Jmp(irg, get_nodes_block(ret));
1090                         n_ret++;
1091                 }
1092         }
1093         set_irn_in(post_bl, n_ret, cf_pred);
1094
1095         /* -- Build a Tuple for all results of the method.
1096            Add Phi node if there was more than one Return.  -- */
1097         turn_into_tuple(post_call, pn_Call_max);
1098         /* First the Memory-Phi */
1099         n_ret = 0;
1100         for (i = 0; i < arity; i++) {
1101                 ret = get_Block_cfgpred(end_bl, i);
1102                 if (is_Return(ret)) {
1103                         cf_pred[n_ret] = get_Return_mem(ret);
1104                         n_ret++;
1105                 }
1106         }
1107         phi = new_Phi(n_ret, cf_pred, mode_M);
1108         set_Tuple_pred(call, pn_Call_M_regular, phi);
1109         /* Conserve Phi-list for further inlinings -- but might be optimized */
1110         if (get_nodes_block(phi) == post_bl) {
1111                 set_irn_link(phi, get_irn_link(post_bl));
1112                 set_irn_link(post_bl, phi);
1113         }
1114         /* Now the real results */
1115         if (n_res > 0) {
1116                 for (j = 0; j < n_res; j++) {
1117                         n_ret = 0;
1118                         for (i = 0; i < arity; i++) {
1119                                 ret = get_Block_cfgpred(end_bl, i);
1120                                 if (is_Return(ret)) {
1121                                         cf_pred[n_ret] = get_Return_res(ret, j);
1122                                         n_ret++;
1123                                 }
1124                         }
1125                         if (n_ret > 0)
1126                                 phi = new_Phi(n_ret, cf_pred, get_irn_mode(cf_pred[0]));
1127                         else
1128                                 phi = new_Bad();
1129                         res_pred[j] = phi;
1130                         /* Conserve Phi-list for further inlinings -- but might be optimized */
1131                         if (get_nodes_block(phi) == post_bl) {
1132                                 set_Phi_next(phi, get_Block_phis(post_bl));
1133                                 set_Block_phis(post_bl, phi);
1134                         }
1135                 }
1136                 set_Tuple_pred(call, pn_Call_T_result, new_Tuple(n_res, res_pred));
1137         } else {
1138                 set_Tuple_pred(call, pn_Call_T_result, new_Bad());
1139         }
1140         /* handle the regular call */
1141         set_Tuple_pred(call, pn_Call_X_regular, new_Jmp());
1142
1143         /* For now, we cannot inline calls with value_base */
1144         set_Tuple_pred(call, pn_Call_P_value_res_base, new_Bad());
1145
1146         /* Finally the exception control flow.
1147            We have two (three) possible situations:
1148            First if the Call branches to an exception handler: We need to add a Phi node to
1149            collect the memory containing the exception objects.  Further we need
1150            to add another block to get a correct representation of this Phi.  To
1151            this block we add a Jmp that resolves into the X output of the Call
1152            when the Call is turned into a tuple.
1153            Second the Call branches to End, the exception is not handled.  Just
1154            add all inlined exception branches to the End node.
1155            Third: there is no Exception edge at all. Handle as case two. */
1156         if (exc_handling == exc_handler) {
1157                 n_exc = 0;
1158                 for (i = 0; i < arity; i++) {
1159                         ir_node *ret, *irn;
1160                         ret = get_Block_cfgpred(end_bl, i);
1161                         irn = skip_Proj(ret);
1162                         if (is_fragile_op(irn) || is_Raise(irn)) {
1163                                 cf_pred[n_exc] = ret;
1164                                 ++n_exc;
1165                         }
1166                 }
1167                 if (n_exc > 0) {
1168                         new_Block(n_exc, cf_pred);      /* watch it: current_block is changed! */
1169                         set_Tuple_pred(call, pn_Call_X_except, new_Jmp());
1170                         /* The Phi for the memories with the exception objects */
1171                         n_exc = 0;
1172                         for (i = 0; i < arity; i++) {
1173                                 ir_node *ret;
1174                                 ret = skip_Proj(get_Block_cfgpred(end_bl, i));
1175                                 if (is_Call(ret)) {
1176                                         cf_pred[n_exc] = new_r_Proj(irg, get_nodes_block(ret), ret, mode_M, 3);
1177                                         n_exc++;
1178                                 } else if (is_fragile_op(ret)) {
1179                                         /* We rely that all cfops have the memory output at the same position. */
1180                                         cf_pred[n_exc] = new_r_Proj(irg, get_nodes_block(ret), ret, mode_M, 0);
1181                                         n_exc++;
1182                                 } else if (is_Raise(ret)) {
1183                                         cf_pred[n_exc] = new_r_Proj(irg, get_nodes_block(ret), ret, mode_M, 1);
1184                                         n_exc++;
1185                                 }
1186                         }
1187                         set_Tuple_pred(call, pn_Call_M_except, new_Phi(n_exc, cf_pred, mode_M));
1188                 } else {
1189                         set_Tuple_pred(call, pn_Call_X_except, new_Bad());
1190                         set_Tuple_pred(call, pn_Call_M_except, new_Bad());
1191                 }
1192         } else {
1193                 ir_node *main_end_bl;
1194                 int main_end_bl_arity;
1195                 ir_node **end_preds;
1196
1197                 /* assert(exc_handling == 1 || no exceptions. ) */
1198                 n_exc = 0;
1199                 for (i = 0; i < arity; i++) {
1200                         ir_node *ret = get_Block_cfgpred(end_bl, i);
1201                         ir_node *irn = skip_Proj(ret);
1202
1203                         if (is_fragile_op(irn) || is_Raise(irn)) {
1204                                 cf_pred[n_exc] = ret;
1205                                 n_exc++;
1206                         }
1207                 }
1208                 main_end_bl       = get_irg_end_block(irg);
1209                 main_end_bl_arity = get_irn_arity(main_end_bl);
1210                 end_preds         = XMALLOCN(ir_node*, n_exc + main_end_bl_arity);
1211
1212                 for (i = 0; i < main_end_bl_arity; ++i)
1213                         end_preds[i] = get_irn_n(main_end_bl, i);
1214                 for (i = 0; i < n_exc; ++i)
1215                         end_preds[main_end_bl_arity + i] = cf_pred[i];
1216                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
1217                 set_Tuple_pred(call, pn_Call_X_except,  new_Bad());
1218                 set_Tuple_pred(call, pn_Call_M_except,  new_Bad());
1219                 free(end_preds);
1220         }
1221         free(res_pred);
1222         free(cf_pred);
1223
1224         /* --  Turn CSE back on. -- */
1225         set_optimize(rem_opt);
1226         current_ir_graph = rem;
1227
1228         return 1;
1229 }
1230
1231 /********************************************************************/
1232 /* Apply inlining to small methods.                                 */
1233 /********************************************************************/
1234
1235 static struct obstack  temp_obst;
1236
1237 /** Represents a possible inlinable call in a graph. */
1238 typedef struct _call_entry {
1239         ir_node    *call;       /**< The Call node. */
1240         ir_graph   *callee;     /**< The callee IR-graph. */
1241         list_head  list;        /**< List head for linking the next one. */
1242         int        loop_depth;  /**< The loop depth of this call. */
1243         int        benefice;    /**< The calculated benefice of this call. */
1244         unsigned   local_adr:1; /**< Set if this call gets an address of a local variable. */
1245         unsigned   all_const:1; /**< Set if this call has only constant parameters. */
1246 } call_entry;
1247
1248 /**
1249  * environment for inlining small irgs
1250  */
1251 typedef struct _inline_env_t {
1252         struct obstack obst;  /**< An obstack where call_entries are allocated on. */
1253         list_head      calls; /**< The call entry list. */
1254 } inline_env_t;
1255
1256 /**
1257  * Returns the irg called from a Call node. If the irg is not
1258  * known, NULL is returned.
1259  *
1260  * @param call  the call node
1261  */
1262 static ir_graph *get_call_called_irg(ir_node *call) {
1263         ir_node *addr;
1264
1265         addr = get_Call_ptr(call);
1266         if (is_Global(addr)) {
1267                 ir_entity *ent = get_Global_entity(addr);
1268                 return get_entity_irg(ent);
1269         }
1270
1271         return NULL;
1272 }
1273
1274 /**
1275  * Walker: Collect all calls to known graphs inside a graph.
1276  */
1277 static void collect_calls(ir_node *call, void *env) {
1278         (void) env;
1279         if (is_Call(call)) {
1280                 ir_graph *called_irg = get_call_called_irg(call);
1281
1282                 if (called_irg != NULL) {
1283                         /* The Call node calls a locally defined method.  Remember to inline. */
1284                         inline_env_t *ienv  = env;
1285                         call_entry   *entry = obstack_alloc(&ienv->obst, sizeof(*entry));
1286                         entry->call       = call;
1287                         entry->callee     = called_irg;
1288                         entry->loop_depth = 0;
1289                         entry->benefice   = 0;
1290                         entry->local_adr  = 0;
1291                         entry->all_const  = 0;
1292
1293                         list_add_tail(&entry->list, &ienv->calls);
1294                 }
1295         }
1296 }
1297
1298 /**
1299  * Inlines all small methods at call sites where the called address comes
1300  * from a Const node that references the entity representing the called
1301  * method.
1302  * The size argument is a rough measure for the code size of the method:
1303  * Methods where the obstack containing the firm graph is smaller than
1304  * size are inlined.
1305  */
1306 void inline_small_irgs(ir_graph *irg, int size) {
1307         ir_graph *rem = current_ir_graph;
1308         inline_env_t env;
1309         call_entry *entry;
1310
1311         current_ir_graph = irg;
1312         /* Handle graph state */
1313         assert(get_irg_phase_state(irg) != phase_building);
1314         free_callee_info(irg);
1315
1316         /* Find Call nodes to inline.
1317            (We can not inline during a walk of the graph, as inlining the same
1318            method several times changes the visited flag of the walked graph:
1319            after the first inlining visited of the callee equals visited of
1320            the caller.  With the next inlining both are increased.) */
1321         obstack_init(&env.obst);
1322         INIT_LIST_HEAD(&env.calls);
1323         irg_walk_graph(irg, NULL, collect_calls, &env);
1324
1325         if (! list_empty(&env.calls)) {
1326                 /* There are calls to inline */
1327                 collect_phiprojs(irg);
1328
1329                 list_for_each_entry(call_entry, entry, &env.calls, list) {
1330                         ir_graph            *callee = entry->callee;
1331                         irg_inline_property prop    = get_irg_inline_property(callee);
1332
1333                         if (prop == irg_inline_forbidden || get_irg_additional_properties(callee) & mtp_property_weak) {
1334                                 /* do not inline forbidden / weak graphs */
1335                                 continue;
1336                         }
1337
1338                         if (prop >= irg_inline_forced ||
1339                             _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
1340                                 inline_method(entry->call, callee);
1341                         }
1342                 }
1343         }
1344         obstack_free(&env.obst, NULL);
1345         current_ir_graph = rem;
1346 }
1347
1348 /**
1349  * Environment for inlining irgs.
1350  */
1351 typedef struct {
1352         list_head calls;             /**< List of of all call nodes in this graph. */
1353         unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
1354         unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
1355         unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
1356         unsigned  n_nodes_orig;      /**< for statistics */
1357         unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
1358         unsigned  n_call_nodes_orig; /**< for statistics */
1359         unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
1360         unsigned  n_callers_orig;    /**< for statistics */
1361         unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
1362         unsigned  local_vars:1;      /**< Set, if an inlined function got the address of a local variable. */
1363         unsigned  recursive:1;       /**< Set, if this function is self recursive. */
1364 } inline_irg_env;
1365
1366 /**
1367  * Allocate a new environment for inlining.
1368  */
1369 static inline_irg_env *alloc_inline_irg_env(void) {
1370         inline_irg_env *env    = obstack_alloc(&temp_obst, sizeof(*env));
1371         INIT_LIST_HEAD(&env->calls);
1372         env->local_weights     = NULL;
1373         env->n_nodes           = -2; /* do not count count Start, End */
1374         env->n_blocks          = -2; /* do not count count Start, End Block */
1375         env->n_nodes_orig      = -2; /* do not count Start, End */
1376         env->n_call_nodes      = 0;
1377         env->n_call_nodes_orig = 0;
1378         env->n_callers         = 0;
1379         env->n_callers_orig    = 0;
1380         env->got_inline        = 0;
1381         env->local_vars        = 0;
1382         env->recursive         = 0;
1383         return env;
1384 }
1385
1386 typedef struct walker_env {
1387         inline_irg_env *x;     /**< the inline environment */
1388         char ignore_runtime;   /**< the ignore runtime flag */
1389         char ignore_callers;   /**< if set, do change callers data */
1390 } wenv_t;
1391
1392 /**
1393  * post-walker: collect all calls in the inline-environment
1394  * of a graph and sum some statistics.
1395  */
1396 static void collect_calls2(ir_node *call, void *ctx) {
1397         wenv_t         *env = ctx;
1398         inline_irg_env *x = env->x;
1399         ir_opcode      code = get_irn_opcode(call);
1400         ir_graph       *callee;
1401         call_entry     *entry;
1402
1403         /* count meaningful nodes in irg */
1404         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
1405                 if (code != iro_Block) {
1406                         ++x->n_nodes;
1407                         ++x->n_nodes_orig;
1408                 } else {
1409                         ++x->n_blocks;
1410                 }
1411         }
1412
1413         if (code != iro_Call) return;
1414
1415         /* check, if it's a runtime call */
1416         if (env->ignore_runtime) {
1417                 ir_node *symc = get_Call_ptr(call);
1418
1419                 if (is_Global(symc)) {
1420                         ir_entity *ent = get_Global_entity(symc);
1421
1422                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
1423                                 return;
1424                 }
1425         }
1426
1427         /* collect all call nodes */
1428         ++x->n_call_nodes;
1429         ++x->n_call_nodes_orig;
1430
1431         callee = get_call_called_irg(call);
1432         if (callee != NULL) {
1433                 if (! env->ignore_callers) {
1434                         inline_irg_env *callee_env = get_irg_link(callee);
1435                         /* count all static callers */
1436                         ++callee_env->n_callers;
1437                         ++callee_env->n_callers_orig;
1438                 }
1439                 if (callee == current_ir_graph)
1440                         x->recursive = 1;
1441
1442                 /* link it in the list of possible inlinable entries */
1443                 entry = obstack_alloc(&temp_obst, sizeof(*entry));
1444                 entry->call       = call;
1445                 entry->callee     = callee;
1446                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
1447                 entry->benefice   = 0;
1448                 entry->local_adr  = 0;
1449                 entry->all_const  = 0;
1450
1451                 list_add_tail(&entry->list, &x->calls);
1452         }
1453 }
1454
1455 /**
1456  * Returns TRUE if the number of callers is 0 in the irg's environment,
1457  * hence this irg is a leave.
1458  */
1459 INLINE static int is_leave(ir_graph *irg) {
1460         inline_irg_env *env = get_irg_link(irg);
1461         return env->n_call_nodes == 0;
1462 }
1463
1464 /**
1465  * Returns TRUE if the number of nodes in the callee is
1466  * smaller then size in the irg's environment.
1467  */
1468 INLINE static int is_smaller(ir_graph *callee, unsigned size) {
1469         inline_irg_env *env = get_irg_link(callee);
1470         return env->n_nodes < size;
1471 }
1472
1473 /**
1474  * Duplicate a call entry.
1475  *
1476  * @param entry     the original entry to duplicate
1477  * @param new_call  the new call node
1478  * @param loop_depth_delta
1479  *                  delta value for the loop depth
1480  */
1481 static call_entry *duplicate_call_entry(const call_entry *entry,
1482                                         ir_node *new_call, int loop_depth_delta) {
1483         call_entry *nentry = obstack_alloc(&temp_obst, sizeof(*nentry));
1484         nentry->call       = new_call;
1485         nentry->callee     = entry->callee;
1486         nentry->benefice   = entry->benefice;
1487         nentry->loop_depth = entry->loop_depth + loop_depth_delta;
1488         nentry->local_adr  = entry->local_adr;
1489         nentry->all_const  = entry->all_const;
1490
1491         return nentry;
1492 }
1493
1494 /**
1495  * Append all call nodes of the source environment to the nodes of in the destination
1496  * environment.
1497  *
1498  * @param dst         destination environment
1499  * @param src         source environment
1500  * @param loop_depth  the loop depth of the call that is replaced by the src list
1501  */
1502 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth) {
1503         call_entry *entry, *nentry;
1504
1505         /* Note that the src list points to Call nodes in the inlined graph, but
1506            we need Call nodes in our graph. Luckily the inliner leaves this information
1507            in the link field. */
1508         list_for_each_entry(call_entry, entry, &src->calls, list) {
1509                 nentry = duplicate_call_entry(entry, get_irn_link(entry->call), loop_depth);
1510                 list_add_tail(&nentry->list, &dst->calls);
1511         }
1512         dst->n_call_nodes += src->n_call_nodes;
1513         dst->n_nodes      += src->n_nodes;
1514 }
1515
1516 /*
1517  * Inlines small leave methods at call sites where the called address comes
1518  * from a Const node that references the entity representing the called
1519  * method.
1520  * The size argument is a rough measure for the code size of the method:
1521  * Methods where the obstack containing the firm graph is smaller than
1522  * size are inlined.
1523  */
1524 void inline_leave_functions(unsigned maxsize, unsigned leavesize,
1525                             unsigned size, int ignore_runtime)
1526 {
1527         inline_irg_env   *env;
1528         ir_graph         *irg;
1529         int              i, n_irgs;
1530         ir_graph         *rem;
1531         int              did_inline;
1532         wenv_t           wenv;
1533         call_entry       *entry, *next;
1534         const call_entry *centry;
1535         pmap             *copied_graphs;
1536         pmap_entry       *pm_entry;
1537
1538         rem = current_ir_graph;
1539         obstack_init(&temp_obst);
1540
1541         /* a map for the copied graphs, used to inline recursive calls */
1542         copied_graphs = pmap_create();
1543
1544         /* extend all irgs by a temporary data structure for inlining. */
1545         n_irgs = get_irp_n_irgs();
1546         for (i = 0; i < n_irgs; ++i)
1547                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
1548
1549         /* Pre-compute information in temporary data structure. */
1550         wenv.ignore_runtime = ignore_runtime;
1551         wenv.ignore_callers = 0;
1552         for (i = 0; i < n_irgs; ++i) {
1553                 ir_graph *irg = get_irp_irg(i);
1554
1555                 assert(get_irg_phase_state(irg) != phase_building);
1556                 free_callee_info(irg);
1557
1558                 assure_cf_loop(irg);
1559                 wenv.x = get_irg_link(irg);
1560                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1561         }
1562
1563         /* -- and now inline. -- */
1564
1565         /* Inline leaves recursively -- we might construct new leaves. */
1566         do {
1567                 did_inline = 0;
1568
1569                 for (i = 0; i < n_irgs; ++i) {
1570                         ir_node *call;
1571                         int phiproj_computed = 0;
1572
1573                         current_ir_graph = get_irp_irg(i);
1574                         env              = get_irg_link(current_ir_graph);
1575
1576                         list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1577                                 ir_graph            *callee;
1578                                 irg_inline_property  prop;
1579
1580                                 if (env->n_nodes > maxsize)
1581                                         break;
1582
1583                                 call   = entry->call;
1584                                 callee = entry->callee;
1585
1586                                 prop = get_irg_inline_property(callee);
1587                                 if (prop == irg_inline_forbidden || get_irg_additional_properties(callee) & mtp_property_weak) {
1588                                         /* do not inline forbidden / weak graphs */
1589                                         continue;
1590                                 }
1591
1592                                 if (is_leave(callee) && (
1593                                     is_smaller(callee, leavesize) || prop >= irg_inline_forced)) {
1594                                         if (!phiproj_computed) {
1595                                                 phiproj_computed = 1;
1596                                                 collect_phiprojs(current_ir_graph);
1597                                         }
1598                                         did_inline = inline_method(call, callee);
1599
1600                                         if (did_inline) {
1601                                                 inline_irg_env *callee_env = get_irg_link(callee);
1602
1603                                                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1604                                                 phiproj_computed = 0;
1605
1606                                                 /* Do some statistics */
1607                                                 env->got_inline = 1;
1608                                                 --env->n_call_nodes;
1609                                                 env->n_nodes += callee_env->n_nodes;
1610                                                 --callee_env->n_callers;
1611
1612                                                 /* remove this call from the list */
1613                                                 list_del(&entry->list);
1614                                                 continue;
1615                                         }
1616                                 }
1617                         }
1618                 }
1619         } while (did_inline);
1620
1621         /* inline other small functions. */
1622         for (i = 0; i < n_irgs; ++i) {
1623                 ir_node *call;
1624                 int phiproj_computed = 0;
1625
1626                 current_ir_graph = get_irp_irg(i);
1627                 env              = get_irg_link(current_ir_graph);
1628
1629                 /* note that the list of possible calls is updated during the process */
1630                 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1631                         irg_inline_property prop;
1632                         ir_graph            *callee;
1633                         pmap_entry          *e;
1634
1635                         call   = entry->call;
1636                         callee = entry->callee;
1637
1638                         prop = get_irg_inline_property(callee);
1639                         if (prop == irg_inline_forbidden || get_irg_additional_properties(callee) & mtp_property_weak) {
1640                                 /* do not inline forbidden / weak graphs */
1641                                 continue;
1642                         }
1643
1644                         e = pmap_find(copied_graphs, callee);
1645                         if (e != NULL) {
1646                                 /*
1647                                  * Remap callee if we have a copy.
1648                                  * FIXME: Should we do this only for recursive Calls ?
1649                                  */
1650                                 callee = e->value;
1651                         }
1652
1653                         if (prop >= irg_inline_forced ||
1654                             (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1655                                 if (current_ir_graph == callee) {
1656                                         /*
1657                                          * Recursive call: we cannot directly inline because we cannot walk
1658                                          * the graph and change it. So we have to make a copy of the graph
1659                                          * first.
1660                                          */
1661
1662                                         inline_irg_env *callee_env;
1663                                         ir_graph       *copy;
1664
1665                                         /*
1666                                          * No copy yet, create one.
1667                                          * Note that recursive methods are never leaves, so it is sufficient
1668                                          * to test this condition here.
1669                                          */
1670                                         copy = create_irg_copy(callee);
1671
1672                                         /* create_irg_copy() destroys the Proj links, recompute them */
1673                                         phiproj_computed = 0;
1674
1675                                         /* allocate new environment */
1676                                         callee_env = alloc_inline_irg_env();
1677                                         set_irg_link(copy, callee_env);
1678
1679                                         assure_cf_loop(copy);
1680                                         wenv.x              = callee_env;
1681                                         wenv.ignore_callers = 1;
1682                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1683
1684                                         /*
1685                                          * Enter the entity of the original graph. This is needed
1686                                          * for inline_method(). However, note that ent->irg still points
1687                                          * to callee, NOT to copy.
1688                                          */
1689                                         set_irg_entity(copy, get_irg_entity(callee));
1690
1691                                         pmap_insert(copied_graphs, callee, copy);
1692                                         callee = copy;
1693
1694                                         /* we have only one caller: the original graph */
1695                                         callee_env->n_callers      = 1;
1696                                         callee_env->n_callers_orig = 1;
1697                                 }
1698                                 if (! phiproj_computed) {
1699                                         phiproj_computed = 1;
1700                                         collect_phiprojs(current_ir_graph);
1701                                 }
1702                                 did_inline = inline_method(call, callee);
1703                                 if (did_inline) {
1704                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1705
1706                                         /* call was inlined, Phi/Projs for current graph must be recomputed */
1707                                         phiproj_computed = 0;
1708
1709                                         /* callee was inline. Append it's call list. */
1710                                         env->got_inline = 1;
1711                                         --env->n_call_nodes;
1712                                         append_call_list(env, callee_env, entry->loop_depth);
1713                                         --callee_env->n_callers;
1714
1715                                         /* after we have inlined callee, all called methods inside callee
1716                                            are now called once more */
1717                                         list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1718                                                 inline_irg_env *penv = get_irg_link(centry->callee);
1719                                                 ++penv->n_callers;
1720                                         }
1721
1722                                         /* remove this call from the list */
1723                                         list_del(&entry->list);
1724                                         continue;
1725                                 }
1726                         }
1727                 }
1728         }
1729
1730         for (i = 0; i < n_irgs; ++i) {
1731                 irg = get_irp_irg(i);
1732                 env = get_irg_link(irg);
1733
1734                 if (env->got_inline) {
1735                         optimize_graph_df(irg);
1736                         optimize_cf(irg);
1737                 }
1738                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1739                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1740                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1741                         env->n_callers_orig, env->n_callers,
1742                         get_entity_name(get_irg_entity(irg))));
1743                 }
1744         }
1745
1746         /* kill the copied graphs: we don't need them anymore */
1747         foreach_pmap(copied_graphs, pm_entry) {
1748                 ir_graph *copy = pm_entry->value;
1749
1750                 /* reset the entity, otherwise it will be deleted in the next step ... */
1751                 set_irg_entity(copy, NULL);
1752                 free_ir_graph(copy);
1753         }
1754         pmap_destroy(copied_graphs);
1755
1756         obstack_free(&temp_obst, NULL);
1757         current_ir_graph = rem;
1758 }
1759
1760 /**
1761  * Calculate the parameter weights for transmitting the address of a local variable.
1762  */
1763 static unsigned calc_method_local_weight(ir_node *arg) {
1764         int      i, j, k;
1765         unsigned v, weight = 0;
1766
1767         for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
1768                 ir_node *succ = get_irn_out(arg, i);
1769
1770                 switch (get_irn_opcode(succ)) {
1771                 case iro_Load:
1772                 case iro_Store:
1773                         /* Loads and Store can be removed */
1774                         weight += 3;
1775                         break;
1776                 case iro_Sel:
1777                         /* check if all args are constant */
1778                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1779                                 ir_node *idx = get_Sel_index(succ, j);
1780                                 if (! is_Const(idx))
1781                                         return 0;
1782                         }
1783                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1784                            some unsupported node. */
1785                         v = calc_method_local_weight(succ);
1786                         if (v == 0)
1787                                 return 0;
1788                         /* we can kill one Sel with constant indexes, this is cheap */
1789                         weight += v + 1;
1790                         break;
1791                 case iro_Id:
1792                         /* when looking backward we might find Id nodes */
1793                         weight += calc_method_local_weight(succ);
1794                         break;
1795                 case iro_Tuple:
1796                         /* unoptimized tuple */
1797                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1798                                 ir_node *pred = get_Tuple_pred(succ, j);
1799                                 if (pred == arg) {
1800                                         /* look for Proj(j) */
1801                                         for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
1802                                                 ir_node *succ_succ = get_irn_out(succ, k);
1803                                                 if (is_Proj(succ_succ)) {
1804                                                         if (get_Proj_proj(succ_succ) == j) {
1805                                                                 /* found */
1806                                                                 weight += calc_method_local_weight(succ_succ);
1807                                                         }
1808                                                 } else {
1809                                                         /* this should NOT happen */
1810                                                         return 0;
1811                                                 }
1812                                         }
1813                                 }
1814                         }
1815                         break;
1816                 default:
1817                         /* any other node: unsupported yet or bad. */
1818                         return 0;
1819                 }
1820         }
1821         return weight;
1822 }
1823
1824 /**
1825  * Calculate the parameter weights for transmitting the address of a local variable.
1826  */
1827 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg) {
1828         ir_entity *ent = get_irg_entity(irg);
1829         ir_type  *mtp;
1830         int      nparams, i, proj_nr;
1831         ir_node  *irg_args, *arg;
1832
1833         mtp      = get_entity_type(ent);
1834         nparams  = get_method_n_params(mtp);
1835
1836         /* allocate a new array. currently used as 'analysed' flag */
1837         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1838
1839         /* If the method haven't parameters we have nothing to do. */
1840         if (nparams <= 0)
1841                 return;
1842
1843         assure_irg_outs(irg);
1844         irg_args = get_irg_args(irg);
1845         for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
1846                 arg     = get_irn_out(irg_args, i);
1847                 proj_nr = get_Proj_proj(arg);
1848                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1849         }
1850 }
1851
1852 /**
1853  * Calculate the benefice for transmitting an local variable address.
1854  * After inlining, the local variable might be transformed into a
1855  * SSA variable by scalar_replacement().
1856  */
1857 static unsigned get_method_local_adress_weight(ir_graph *callee, int pos) {
1858         inline_irg_env *env = get_irg_link(callee);
1859
1860         if (env->local_weights != NULL) {
1861                 if (pos < ARR_LEN(env->local_weights))
1862                         return env->local_weights[pos];
1863                 return 0;
1864         }
1865
1866         analyze_irg_local_weights(env, callee);
1867
1868         if (pos < ARR_LEN(env->local_weights))
1869                 return env->local_weights[pos];
1870         return 0;
1871 }
1872
1873 /**
1874  * Calculate a benefice value for inlining the given call.
1875  *
1876  * @param call       the call node we have to inspect
1877  * @param callee     the called graph
1878  */
1879 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
1880 {
1881         ir_node   *call = entry->call;
1882         ir_entity *ent  = get_irg_entity(callee);
1883         ir_node   *frame_ptr;
1884         ir_type   *mtp;
1885         int       weight = 0;
1886         int       i, n_params, all_const;
1887         unsigned  cc, v;
1888         irg_inline_property prop;
1889
1890         inline_irg_env *callee_env;
1891
1892         prop = get_irg_inline_property(callee);
1893         if (prop == irg_inline_forbidden) {
1894                 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
1895                     call, callee));
1896                 return entry->benefice = INT_MIN;
1897         }
1898
1899         if (get_irg_additional_properties(callee) & (mtp_property_noreturn | mtp_property_weak)) {
1900                 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
1901                     call, callee));
1902                 return entry->benefice = INT_MIN;
1903         }
1904
1905         /* costs for every passed parameter */
1906         n_params = get_Call_n_params(call);
1907         mtp      = get_entity_type(ent);
1908         cc       = get_method_calling_convention(mtp);
1909         if (cc & cc_reg_param) {
1910                 /* register parameter, smaller costs for register parameters */
1911                 int max_regs = cc & ~cc_bits;
1912
1913                 if (max_regs < n_params)
1914                         weight += max_regs * 2 + (n_params - max_regs) * 5;
1915                 else
1916                         weight += n_params * 2;
1917         } else {
1918                 /* parameters are passed an stack */
1919                 weight += 5 * n_params;
1920         }
1921
1922         /* constant parameters improve the benefice */
1923         frame_ptr = get_irg_frame(current_ir_graph);
1924         all_const = 1;
1925         for (i = 0; i < n_params; ++i) {
1926                 ir_node *param = get_Call_param(call, i);
1927
1928                 if (is_Const(param)) {
1929                         weight += get_method_param_weight(ent, i);
1930                 } else {
1931                         all_const = 0;
1932                         if (is_SymConst(param))
1933                                 weight += get_method_param_weight(ent, i);
1934                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
1935                                 /*
1936                                  * An address of a local variable is transmitted. After
1937                                  * inlining, scalar_replacement might be able to remove the
1938                                  * local variable, so honor this.
1939                                  */
1940                                 v = get_method_local_adress_weight(callee, i);
1941                                 weight += v;
1942                                 if (v > 0)
1943                                         entry->local_adr = 1;
1944                         }
1945                 }
1946         }
1947         entry->all_const = all_const;
1948
1949         callee_env = get_irg_link(callee);
1950         if (callee_env->n_callers == 1 &&
1951             callee != current_ir_graph &&
1952                 get_entity_visibility(ent) == visibility_local) {
1953                 weight += 700;
1954         }
1955
1956         /* give a bonus for functions with one block */
1957         if (callee_env->n_blocks == 1)
1958                 weight = weight * 3 / 2;
1959
1960         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
1961         if (callee_env->n_nodes < 30 && !callee_env->recursive)
1962                 weight += 2000;
1963
1964         /* and finally for leaves: they do not increase the register pressure
1965            because of callee safe registers */
1966         if (callee_env->n_call_nodes == 0)
1967                 weight += 400;
1968
1969         /** it's important to inline inner loops first */
1970         if (entry->loop_depth > 30)
1971                 weight += 30 * 1024;
1972         else
1973                 weight += entry->loop_depth * 1024;
1974
1975         /*
1976          * All arguments constant is probably a good sign, give an extra bonus
1977          */
1978         if (all_const)
1979                 weight += 1024;
1980
1981         return entry->benefice = weight;
1982 }
1983
1984 static ir_graph **irgs;
1985 static int      last_irg;
1986
1987 /**
1988  * Callgraph walker, collect all visited graphs.
1989  */
1990 static void callgraph_walker(ir_graph *irg, void *data) {
1991         (void) data;
1992         irgs[last_irg++] = irg;
1993 }
1994
1995 /**
1996  * Creates an inline order for all graphs.
1997  *
1998  * @return the list of graphs.
1999  */
2000 static ir_graph **create_irg_list(void) {
2001         ir_entity **free_methods;
2002         int       arr_len;
2003         int       n_irgs = get_irp_n_irgs();
2004
2005         cgana(&arr_len, &free_methods);
2006         xfree(free_methods);
2007
2008         compute_callgraph();
2009
2010         last_irg = 0;
2011         irgs     = XMALLOCNZ(ir_graph*, n_irgs);
2012
2013         callgraph_walk(NULL, callgraph_walker, NULL);
2014         assert(n_irgs == last_irg);
2015
2016         return irgs;
2017 }
2018
2019 /**
2020  * Push a call onto the priority list if its benefice is big enough.
2021  *
2022  * @param pqueue   the priority queue of calls
2023  * @param call     the call entry
2024  * @param inlien_threshold
2025  *                 the threshold value
2026  */
2027 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
2028                             int inline_threshold)
2029 {
2030         ir_graph            *callee  = call->callee;
2031         irg_inline_property prop     = get_irg_inline_property(callee);
2032         int                 benefice = calc_inline_benefice(call, callee);
2033
2034         DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
2035             get_irn_irg(call->call), call->call, callee, benefice));
2036
2037         if (prop < irg_inline_forced && benefice < inline_threshold) {
2038                 return;
2039         }
2040
2041         pqueue_put(pqueue, call, benefice);
2042 }
2043
2044 /**
2045  * Try to inline calls into a graph.
2046  *
2047  * @param irg      the graph into which we inline
2048  * @param maxsize  do NOT inline if the size of irg gets
2049  *                 bigger than this amount
2050  * @param inline_threshold
2051  *                 threshold value for inline decision
2052  * @param copied_graphs
2053  *                 map containing copied of recursive graphs
2054  */
2055 static void inline_into(ir_graph *irg, unsigned maxsize,
2056                         int inline_threshold, pmap *copied_graphs)
2057 {
2058         int            phiproj_computed = 0;
2059         inline_irg_env *env = get_irg_link(irg);
2060         call_entry     *curr_call;
2061         wenv_t         wenv;
2062         pqueue_t       *pqueue;
2063
2064         if (env->n_call_nodes == 0)
2065                 return;
2066
2067         if (env->n_nodes > maxsize) {
2068                 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
2069                 return;
2070         }
2071
2072         current_ir_graph = irg;
2073
2074         /* put irgs into the pqueue */
2075         pqueue = new_pqueue();
2076
2077         list_for_each_entry(call_entry, curr_call, &env->calls, list) {
2078                 assert(is_Call(curr_call->call));
2079                 maybe_push_call(pqueue, curr_call, inline_threshold);
2080         }
2081
2082         /* note that the list of possible calls is updated during the process */
2083         while (!pqueue_empty(pqueue)) {
2084                 int                 did_inline;
2085                 call_entry          *curr_call  = pqueue_pop_front(pqueue);
2086                 ir_graph            *callee     = curr_call->callee;
2087                 ir_node             *call_node  = curr_call->call;
2088                 inline_irg_env      *callee_env = get_irg_link(callee);
2089                 irg_inline_property prop        = get_irg_inline_property(callee);
2090                 int                 loop_depth;
2091                 const call_entry    *centry;
2092                 pmap_entry          *e;
2093
2094                 if ((prop < irg_inline_forced) && env->n_nodes + callee_env->n_nodes > maxsize) {
2095                         DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
2096                                                 env->n_nodes, callee, callee_env->n_nodes));
2097                         continue;
2098                 }
2099
2100                 e = pmap_find(copied_graphs, callee);
2101                 if (e != NULL) {
2102                         int benefice = curr_call->benefice;
2103                         /*
2104                          * Reduce the weight for recursive function IFF not all arguments are const.
2105                          * inlining recursive functions is rarely good.
2106                          */
2107                         if (!curr_call->all_const)
2108                                 benefice -= 2000;
2109                         if (benefice < inline_threshold)
2110                                 continue;
2111
2112                         /*
2113                          * Remap callee if we have a copy.
2114                          */
2115                         callee     = e->value;
2116                         callee_env = get_irg_link(callee);
2117                 }
2118
2119                 if (current_ir_graph == callee) {
2120                         /*
2121                          * Recursive call: we cannot directly inline because we cannot
2122                          * walk the graph and change it. So we have to make a copy of
2123                          * the graph first.
2124                          */
2125                         int benefice = curr_call->benefice;
2126                         ir_graph *copy;
2127
2128                         /*
2129                          * Reduce the weight for recursive function IFF not all arguments are const.
2130                          * inlining recursive functions is rarely good.
2131                          */
2132                         if (!curr_call->all_const)
2133                                 benefice -= 2000;
2134                         if (benefice < inline_threshold)
2135                                 continue;
2136
2137                         /*
2138                          * No copy yet, create one.
2139                          * Note that recursive methods are never leaves, so it is
2140                          * sufficient to test this condition here.
2141                          */
2142                         copy = create_irg_copy(callee);
2143
2144                         /* create_irg_copy() destroys the Proj links, recompute them */
2145                         phiproj_computed = 0;
2146
2147                         /* allocate a new environment */
2148                         callee_env = alloc_inline_irg_env();
2149                         set_irg_link(copy, callee_env);
2150
2151                         assure_cf_loop(copy);
2152                         wenv.x              = callee_env;
2153                         wenv.ignore_callers = 1;
2154                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
2155
2156                         /*
2157                          * Enter the entity of the original graph. This is needed
2158                          * for inline_method(). However, note that ent->irg still points
2159                          * to callee, NOT to copy.
2160                          */
2161                         set_irg_entity(copy, get_irg_entity(callee));
2162
2163                         pmap_insert(copied_graphs, callee, copy);
2164                         callee = copy;
2165
2166                         /* we have only one caller: the original graph */
2167                         callee_env->n_callers      = 1;
2168                         callee_env->n_callers_orig = 1;
2169                 }
2170                 if (! phiproj_computed) {
2171                         phiproj_computed = 1;
2172                         collect_phiprojs(current_ir_graph);
2173                 }
2174                 did_inline = inline_method(call_node, callee);
2175                 if (!did_inline)
2176                         continue;
2177
2178                 /* call was inlined, Phi/Projs for current graph must be recomputed */
2179                 phiproj_computed = 0;
2180
2181                 /* remove it from the caller list */
2182                 list_del(&curr_call->list);
2183
2184                 /* callee was inline. Append it's call list. */
2185                 env->got_inline = 1;
2186                 if (curr_call->local_adr)
2187                         env->local_vars = 1;
2188                 --env->n_call_nodes;
2189
2190                 /* we just generate a bunch of new calls */
2191                 loop_depth = curr_call->loop_depth;
2192                 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
2193                         inline_irg_env *penv = get_irg_link(centry->callee);
2194                         ir_node        *new_call;
2195                         call_entry     *new_entry;
2196
2197                         /* after we have inlined callee, all called methods inside
2198                          * callee are now called once more */
2199                         ++penv->n_callers;
2200
2201                         /* Note that the src list points to Call nodes in the inlined graph,
2202                          * but we need Call nodes in our graph. Luckily the inliner leaves
2203                          * this information in the link field. */
2204                         new_call = get_irn_link(centry->call);
2205                         assert(is_Call(new_call));
2206
2207                         new_entry = duplicate_call_entry(centry, new_call, loop_depth);
2208                         list_add_tail(&new_entry->list, &env->calls);
2209                         maybe_push_call(pqueue, new_entry, inline_threshold);
2210                 }
2211
2212                 env->n_call_nodes += callee_env->n_call_nodes;
2213                 env->n_nodes += callee_env->n_nodes;
2214                 --callee_env->n_callers;
2215         }
2216
2217         del_pqueue(pqueue);
2218 }
2219
2220 /*
2221  * Heuristic inliner. Calculates a benefice value for every call and inlines
2222  * those calls with a value higher than the threshold.
2223  */
2224 void inline_functions(unsigned maxsize, int inline_threshold) {
2225         inline_irg_env   *env;
2226         int              i, n_irgs;
2227         ir_graph         *rem;
2228         wenv_t           wenv;
2229         pmap             *copied_graphs;
2230         pmap_entry       *pm_entry;
2231         ir_graph         **irgs;
2232
2233         rem = current_ir_graph;
2234         obstack_init(&temp_obst);
2235
2236         irgs = create_irg_list();
2237
2238         /* a map for the copied graphs, used to inline recursive calls */
2239         copied_graphs = pmap_create();
2240
2241         /* extend all irgs by a temporary data structure for inlining. */
2242         n_irgs = get_irp_n_irgs();
2243         for (i = 0; i < n_irgs; ++i)
2244                 set_irg_link(irgs[i], alloc_inline_irg_env());
2245
2246         /* Pre-compute information in temporary data structure. */
2247         wenv.ignore_runtime = 0;
2248         wenv.ignore_callers = 0;
2249         for (i = 0; i < n_irgs; ++i) {
2250                 ir_graph *irg = irgs[i];
2251
2252                 free_callee_info(irg);
2253
2254                 wenv.x = get_irg_link(irg);
2255                 assure_cf_loop(irg);
2256                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
2257         }
2258
2259         /* -- and now inline. -- */
2260         for (i = 0; i < n_irgs; ++i) {
2261                 ir_graph *irg = irgs[i];
2262
2263                 inline_into(irg, maxsize, inline_threshold, copied_graphs);
2264         }
2265
2266         for (i = 0; i < n_irgs; ++i) {
2267                 ir_graph *irg = irgs[i];
2268
2269                 env = get_irg_link(irg);
2270                 if (env->got_inline) {
2271                         /* this irg got calls inlined: optimize it */
2272                         if (get_opt_combo()) {
2273                                 if (env->local_vars) {
2274                                         scalar_replacement_opt(irg);
2275                                 }
2276                                 combo(irg);
2277                         } else {
2278                                 if (env->local_vars) {
2279                                         if (scalar_replacement_opt(irg)) {
2280                                                 optimize_graph_df(irg);
2281                                         }
2282                                 }
2283                                 optimize_cf(irg);
2284                         }
2285                 }
2286                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
2287                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
2288                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
2289                         env->n_callers_orig, env->n_callers,
2290                         get_entity_name(get_irg_entity(irg))));
2291                 }
2292         }
2293
2294         /* kill the copied graphs: we don't need them anymore */
2295         foreach_pmap(copied_graphs, pm_entry) {
2296                 ir_graph *copy = pm_entry->value;
2297
2298                 /* reset the entity, otherwise it will be deleted in the next step ... */
2299                 set_irg_entity(copy, NULL);
2300                 free_ir_graph(copy);
2301         }
2302         pmap_destroy(copied_graphs);
2303
2304         xfree(irgs);
2305
2306         obstack_free(&temp_obst, NULL);
2307         current_ir_graph = rem;
2308 }
2309
2310 void firm_init_inline(void) {
2311         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
2312 }