Add passes for inlining, private methods.
[libfirm] / ir / opt / opt_inline.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief    Dead node elimination and Procedure Inlining.
23  * @author   Michael Beck, Goetz Lindenmaier
24  * @version  $Id$
25  */
26 #include "config.h"
27
28 #include <limits.h>
29 #include <assert.h>
30
31 #include "irnode_t.h"
32 #include "irgraph_t.h"
33 #include "irprog_t.h"
34
35 #include "iroptimize.h"
36 #include "ircons_t.h"
37 #include "iropt_t.h"
38 #include "irgopt.h"
39 #include "irgmod.h"
40 #include "irgwalk.h"
41
42 #include "array_t.h"
43 #include "list.h"
44 #include "pset.h"
45 #include "pmap.h"
46 #include "pdeq.h"
47 #include "xmalloc.h"
48 #include "pqueue.h"
49
50 #include "irouts.h"
51 #include "irloop_t.h"
52 #include "irbackedge_t.h"
53 #include "opt_inline_t.h"
54 #include "cgana.h"
55 #include "trouts.h"
56 #include "error.h"
57
58 #include "analyze_irg_args.h"
59 #include "iredges_t.h"
60 #include "irflag_t.h"
61 #include "irhooks.h"
62 #include "irtools.h"
63 #include "iropt_dbg.h"
64 #include "irpass_t.h"
65
66 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
67
68 /*------------------------------------------------------------------*/
69 /* Routines for dead node elimination / copying garbage collection  */
70 /* of the obstack.                                                  */
71 /*------------------------------------------------------------------*/
72
73 /**
74  * Remember the new node in the old node by using a field all nodes have.
75  */
76 #define set_new_node(oldn, newn)  set_irn_link(oldn, newn)
77
78 /**
79  * Get this new node, before the old node is forgotten.
80  */
81 #define get_new_node(oldn) get_irn_link(oldn)
82
83 /**
84  * Check if a new node was set.
85  */
86 #define has_new_node(n) (get_new_node(n) != NULL)
87
88 /**
89  * We use the block_visited flag to mark that we have computed the
90  * number of useful predecessors for this block.
91  * Further we encode the new arity in this flag in the old blocks.
92  * Remembering the arity is useful, as it saves a lot of pointer
93  * accesses.  This function is called for all Phi and Block nodes
94  * in a Block.
95  */
96 static inline int
97 compute_new_arity(ir_node *b) {
98         int i, res, irn_arity;
99         int irg_v, block_v;
100
101         irg_v = get_irg_block_visited(current_ir_graph);
102         block_v = get_Block_block_visited(b);
103         if (block_v >= irg_v) {
104                 /* we computed the number of preds for this block and saved it in the
105                    block_v flag */
106                 return block_v - irg_v;
107         } else {
108                 /* compute the number of good predecessors */
109                 res = irn_arity = get_irn_arity(b);
110                 for (i = 0; i < irn_arity; i++)
111                         if (is_Bad(get_irn_n(b, i))) res--;
112                         /* save it in the flag. */
113                         set_Block_block_visited(b, irg_v + res);
114                         return res;
115         }
116 }
117
118 /**
119  * Copies the node to the new obstack. The Ins of the new node point to
120  * the predecessors on the old obstack.  For block/phi nodes not all
121  * predecessors might be copied.  n->link points to the new node.
122  * For Phi and Block nodes the function allocates in-arrays with an arity
123  * only for useful predecessors.  The arity is determined by counting
124  * the non-bad predecessors of the block.
125  *
126  * @param n    The node to be copied
127  * @param env  if non-NULL, the node number attribute will be copied to the new node
128  *
129  * Note: Also used for loop unrolling.
130  */
131 static void copy_node(ir_node *n, void *env) {
132         ir_node *nn, *block;
133         int new_arity;
134         ir_op *op = get_irn_op(n);
135         (void) env;
136
137         if (op == op_Bad) {
138                 /* node copied already */
139                 return;
140         } else if (op == op_Block) {
141                 block = NULL;
142                 new_arity = compute_new_arity(n);
143                 n->attr.block.graph_arr = NULL;
144         } else {
145                 block = get_nodes_block(n);
146                 if (op == op_Phi) {
147                         new_arity = compute_new_arity(block);
148                 } else {
149                         new_arity = get_irn_arity(n);
150                 }
151         }
152         nn = new_ir_node(get_irn_dbg_info(n),
153                 current_ir_graph,
154                 block,
155                 op,
156                 get_irn_mode(n),
157                 new_arity,
158                 get_irn_in(n) + 1);
159         /* Copy the attributes.  These might point to additional data.  If this
160            was allocated on the old obstack the pointers now are dangling.  This
161            frees e.g. the memory of the graph_arr allocated in new_immBlock. */
162         if (op == op_Block) {
163                 /* we cannot allow blocks WITHOUT macroblock input */
164                 set_Block_MacroBlock(nn, get_Block_MacroBlock(n));
165         }
166         copy_node_attr(n, nn);
167
168         if (env != NULL) {
169                 /* for easier debugging, we want to copy the node numbers too */
170                 nn->node_nr = n->node_nr;
171         }
172
173         set_new_node(n, nn);
174         hook_dead_node_elim_subst(current_ir_graph, n, nn);
175 }
176
177 /**
178  * Copies new predecessors of old node to new node remembered in link.
179  * Spare the Bad predecessors of Phi and Block nodes.
180  */
181 static void copy_preds(ir_node *n, void *env) {
182         ir_node *nn, *block;
183         int i, j, irn_arity;
184         (void) env;
185
186         nn = get_new_node(n);
187
188         if (is_Block(n)) {
189                 /* copy the macro block header */
190                 ir_node *mbh = get_Block_MacroBlock(n);
191
192                 if (mbh == n) {
193                         /* this block is a macroblock header */
194                         set_Block_MacroBlock(nn, nn);
195                 } else {
196                         /* get the macro block header */
197                         ir_node *nmbh = get_new_node(mbh);
198                         assert(nmbh != NULL);
199                         set_Block_MacroBlock(nn, nmbh);
200                 }
201
202                 /* Don't copy Bad nodes. */
203                 j = 0;
204                 irn_arity = get_irn_arity(n);
205                 for (i = 0; i < irn_arity; i++) {
206                         if (! is_Bad(get_irn_n(n, i))) {
207                                 ir_node *pred = get_irn_n(n, i);
208                                 set_irn_n(nn, j, get_new_node(pred));
209                                 j++;
210                         }
211                 }
212                 /* repair the block visited flag from above misuse. Repair it in both
213                    graphs so that the old one can still be used. */
214                 set_Block_block_visited(nn, 0);
215                 set_Block_block_visited(n, 0);
216                 /* Local optimization could not merge two subsequent blocks if
217                    in array contained Bads.  Now it's possible.
218                    We don't call optimize_in_place as it requires
219                    that the fields in ir_graph are set properly. */
220                 if (!has_Block_entity(nn) &&
221                     get_opt_control_flow_straightening() &&
222                     get_Block_n_cfgpreds(nn) == 1 &&
223                     is_Jmp(get_Block_cfgpred(nn, 0))) {
224                         ir_node *old = get_nodes_block(get_Block_cfgpred(nn, 0));
225                         if (nn == old) {
226                                 /* Jmp jumps into the block it is in -- deal self cycle. */
227                                 assert(is_Bad(get_new_node(get_irg_bad(current_ir_graph))));
228                                 exchange(nn, get_new_node(get_irg_bad(current_ir_graph)));
229                         } else {
230                                 exchange(nn, old);
231                         }
232                 }
233         } else if (is_Phi(n) && get_irn_arity(n) > 0) {
234                 /* Don't copy node if corresponding predecessor in block is Bad.
235                    The Block itself should not be Bad. */
236                 block = get_nodes_block(n);
237                 set_nodes_block(nn, get_new_node(block));
238                 j = 0;
239                 irn_arity = get_irn_arity(n);
240                 for (i = 0; i < irn_arity; i++) {
241                         if (! is_Bad(get_irn_n(block, i))) {
242                                 ir_node *pred = get_irn_n(n, i);
243                                 set_irn_n(nn, j, get_new_node(pred));
244                                 /*if (is_backedge(n, i)) set_backedge(nn, j);*/
245                                 j++;
246                         }
247                 }
248                 /* If the pre walker reached this Phi after the post walker visited the
249                    block block_visited is > 0. */
250                 set_Block_block_visited(get_nodes_block(n), 0);
251                 /* Compacting the Phi's ins might generate Phis with only one
252                    predecessor. */
253                 if (get_irn_arity(nn) == 1)
254                         exchange(nn, get_irn_n(nn, 0));
255         } else {
256                 irn_arity = get_irn_arity(n);
257                 for (i = -1; i < irn_arity; i++)
258                         set_irn_n(nn, i, get_new_node(get_irn_n(n, i)));
259         }
260         /* Now the new node is complete.  We can add it to the hash table for CSE.
261            @@@ inlining aborts if we identify End. Why? */
262         if (!is_End(nn))
263                 add_identities(current_ir_graph->value_table, nn);
264 }
265
266 /**
267  * Copies the graph recursively, compacts the keep-alives of the end node.
268  *
269  * @param irg           the graph to be copied
270  * @param copy_node_nr  If non-zero, the node number will be copied
271  */
272 static void copy_graph(ir_graph *irg, int copy_node_nr) {
273         ir_node *oe, *ne, *ob, *nb, *om, *nm; /* old end, new end, old bad, new bad, old NoMem, new NoMem */
274         ir_node *ka;      /* keep alive */
275         int i, irn_arity;
276         unsigned long vfl;
277
278         /* Some nodes must be copied by hand, sigh */
279         vfl = get_irg_visited(irg);
280         set_irg_visited(irg, vfl + 1);
281
282         oe = get_irg_end(irg);
283         mark_irn_visited(oe);
284         /* copy the end node by hand, allocate dynamic in array! */
285         ne = new_ir_node(get_irn_dbg_info(oe),
286                 irg,
287                 NULL,
288                 op_End,
289                 mode_X,
290                 -1,
291                 NULL);
292         /* Copy the attributes.  Well, there might be some in the future... */
293         copy_node_attr(oe, ne);
294         set_new_node(oe, ne);
295
296         /* copy the Bad node */
297         ob = get_irg_bad(irg);
298         mark_irn_visited(ob);
299         nb = new_ir_node(get_irn_dbg_info(ob),
300                 irg,
301                 NULL,
302                 op_Bad,
303                 mode_T,
304                 0,
305                 NULL);
306         copy_node_attr(ob, nb);
307         set_new_node(ob, nb);
308
309         /* copy the NoMem node */
310         om = get_irg_no_mem(irg);
311         mark_irn_visited(om);
312         nm = new_ir_node(get_irn_dbg_info(om),
313                 irg,
314                 NULL,
315                 op_NoMem,
316                 mode_M,
317                 0,
318                 NULL);
319         copy_node_attr(om, nm);
320         set_new_node(om, nm);
321
322         /* copy the live nodes */
323         set_irg_visited(irg, vfl);
324         irg_walk(get_nodes_block(oe), copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
325
326         /* Note: from yet, the visited flag of the graph is equal to vfl + 1 */
327
328         /* visit the anchors as well */
329         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
330                 ir_node *n = get_irg_anchor(irg, i);
331
332                 if (n && (get_irn_visited(n) <= vfl)) {
333                         set_irg_visited(irg, vfl);
334                         irg_walk(n, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
335                 }
336         }
337
338         /* copy_preds for the end node ... */
339         set_nodes_block(ne, get_new_node(get_nodes_block(oe)));
340
341         /*- ... and now the keep alives. -*/
342         /* First pick the not marked block nodes and walk them.  We must pick these
343            first as else we will oversee blocks reachable from Phis. */
344         irn_arity = get_End_n_keepalives(oe);
345         for (i = 0; i < irn_arity; i++) {
346                 ka = get_End_keepalive(oe, i);
347                 if (is_Block(ka)) {
348                         if (get_irn_visited(ka) <= vfl) {
349                                 /* We must keep the block alive and copy everything reachable */
350                                 set_irg_visited(irg, vfl);
351                                 irg_walk(ka, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
352                         }
353                         add_End_keepalive(ne, get_new_node(ka));
354                 }
355         }
356
357         /* Now pick other nodes.  Here we will keep all! */
358         irn_arity = get_End_n_keepalives(oe);
359         for (i = 0; i < irn_arity; i++) {
360                 ka = get_End_keepalive(oe, i);
361                 if (!is_Block(ka)) {
362                         if (get_irn_visited(ka) <= vfl) {
363                                 /* We didn't copy the node yet.  */
364                                 set_irg_visited(irg, vfl);
365                                 irg_walk(ka, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
366                         }
367                         add_End_keepalive(ne, get_new_node(ka));
368                 }
369         }
370
371         /* start block sometimes only reached after keep alives */
372         set_nodes_block(nb, get_new_node(get_nodes_block(ob)));
373         set_nodes_block(nm, get_new_node(get_nodes_block(om)));
374 }
375
376 /**
377  * Copies the graph reachable from current_ir_graph->end to the obstack
378  * in current_ir_graph and fixes the environment.
379  * Then fixes the fields in current_ir_graph containing nodes of the
380  * graph.
381  *
382  * @param copy_node_nr  If non-zero, the node number will be copied
383  */
384 static void
385 copy_graph_env(int copy_node_nr) {
386         ir_graph *irg = current_ir_graph;
387         ir_node *old_end, *new_anchor;
388         int i;
389
390         /* remove end_except and end_reg nodes */
391         old_end = get_irg_end(irg);
392         set_irg_end_except (irg, old_end);
393         set_irg_end_reg    (irg, old_end);
394
395         /* Not all nodes remembered in irg might be reachable
396            from the end node.  Assure their link is set to NULL, so that
397            we can test whether new nodes have been computed. */
398         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
399                 ir_node *n = get_irg_anchor(irg, i);
400                 if (n != NULL)
401                         set_new_node(n, NULL);
402         }
403         /* we use the block walk flag for removing Bads from Blocks ins. */
404         inc_irg_block_visited(irg);
405
406         /* copy the graph */
407         copy_graph(irg, copy_node_nr);
408
409         /* fix the anchor */
410         old_end    = get_irg_end(irg);
411         new_anchor = new_Anchor(irg);
412
413         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
414                 ir_node *n = get_irg_anchor(irg, i);
415                 if (n)
416                         set_irn_n(new_anchor, i, get_new_node(n));
417         }
418         free_End(old_end);
419         irg->anchor = new_anchor;
420
421         /* ensure the new anchor is placed in the endblock */
422         set_nodes_block(new_anchor, get_irg_end_block(irg));
423 }
424
425 /**
426  * Copies all reachable nodes to a new obstack.  Removes bad inputs
427  * from block nodes and the corresponding inputs from Phi nodes.
428  * Merges single exit blocks with single entry blocks and removes
429  * 1-input Phis.
430  * Adds all new nodes to a new hash table for CSE.  Does not
431  * perform CSE, so the hash table might contain common subexpressions.
432  */
433 void dead_node_elimination(ir_graph *irg) {
434         ir_graph *rem;
435 #ifdef INTERPROCEDURAL_VIEW
436         int rem_ipview = get_interprocedural_view();
437 #endif
438         struct obstack *graveyard_obst = NULL;
439         struct obstack *rebirth_obst   = NULL;
440
441         edges_deactivate(irg);
442
443         /* inform statistics that we started a dead-node elimination run */
444         hook_dead_node_elim(irg, 1);
445
446         /* Remember external state of current_ir_graph. */
447         rem = current_ir_graph;
448         current_ir_graph = irg;
449 #ifdef INTERPROCEDURAL_VIEW
450         set_interprocedural_view(0);
451 #endif
452
453         assert(get_irg_phase_state(irg) != phase_building);
454
455         /* Handle graph state */
456         free_callee_info(irg);
457         free_irg_outs(irg);
458         free_trouts();
459
460         /* @@@ so far we loose loops when copying */
461         free_loop_information(irg);
462
463         set_irg_doms_inconsistent(irg);
464
465         /* A quiet place, where the old obstack can rest in peace,
466            until it will be cremated. */
467         graveyard_obst = irg->obst;
468
469         /* A new obstack, where the reachable nodes will be copied to. */
470         rebirth_obst = XMALLOC(struct obstack);
471         irg->obst = rebirth_obst;
472         obstack_init(irg->obst);
473         irg->last_node_idx = 0;
474
475         /* We also need a new value table for CSE */
476         del_identities(irg->value_table);
477         irg->value_table = new_identities();
478
479         /* Copy the graph from the old to the new obstack */
480         copy_graph_env(/*copy_node_nr=*/1);
481
482         /* Free memory from old unoptimized obstack */
483         obstack_free(graveyard_obst, 0);  /* First empty the obstack ... */
484         xfree(graveyard_obst);            /* ... then free it.           */
485
486         /* inform statistics that the run is over */
487         hook_dead_node_elim(irg, 0);
488
489         current_ir_graph = rem;
490 #ifdef INTERPROCEDURAL_VIEW
491         set_interprocedural_view(rem_ipview);
492 #endif
493 }
494
495 /**
496  * Relink bad predecessors of a block and store the old in array to the
497  * link field. This function is called by relink_bad_predecessors().
498  * The array of link field starts with the block operand at position 0.
499  * If block has bad predecessors, create a new in array without bad preds.
500  * Otherwise let in array untouched.
501  */
502 static void relink_bad_block_predecessors(ir_node *n, void *env) {
503         ir_node **new_in, *irn;
504         int i, new_irn_n, old_irn_arity, new_irn_arity = 0;
505         (void) env;
506
507         /* if link field of block is NULL, look for bad predecessors otherwise
508            this is already done */
509         if (is_Block(n) && get_irn_link(n) == NULL) {
510                 /* save old predecessors in link field (position 0 is the block operand)*/
511                 set_irn_link(n, get_irn_in(n));
512
513                 /* count predecessors without bad nodes */
514                 old_irn_arity = get_irn_arity(n);
515                 for (i = 0; i < old_irn_arity; i++)
516                         if (!is_Bad(get_irn_n(n, i)))
517                                 ++new_irn_arity;
518
519                 /* arity changing: set new predecessors without bad nodes */
520                 if (new_irn_arity < old_irn_arity) {
521                         /* Get new predecessor array. We do not resize the array, as we must
522                            keep the old one to update Phis. */
523                         new_in = NEW_ARR_D(ir_node *, current_ir_graph->obst, (new_irn_arity+1));
524
525                         /* set new predecessors in array */
526                         new_in[0] = NULL;
527                         new_irn_n = 1;
528                         for (i = 0; i < old_irn_arity; i++) {
529                                 irn = get_irn_n(n, i);
530                                 if (!is_Bad(irn)) {
531                                         new_in[new_irn_n] = irn;
532                                         is_backedge(n, i) ? set_backedge(n, new_irn_n-1) : set_not_backedge(n, new_irn_n-1);
533                                         ++new_irn_n;
534                                 }
535                         }
536                         /* ARR_SETLEN(int, n->attr.block.backedge, new_irn_arity); */
537                         ARR_SHRINKLEN(n->attr.block.backedge, new_irn_arity);
538                         n->in = new_in;
539                 } /* ir node has bad predecessors */
540         } /* Block is not relinked */
541 }
542
543 /**
544  * Relinks Bad predecessors from Blocks and Phis called by walker
545  * remove_bad_predecesors(). If n is a Block, call
546  * relink_bad_block_redecessors(). If n is a Phi-node, call also the relinking
547  * function of Phi's Block. If this block has bad predecessors, relink preds
548  * of the Phi-node.
549  */
550 static void relink_bad_predecessors(ir_node *n, void *env) {
551         ir_node *block, **old_in;
552         int i, old_irn_arity, new_irn_arity;
553
554         /* relink bad predecessors of a block */
555         if (is_Block(n))
556                 relink_bad_block_predecessors(n, env);
557
558         /* If Phi node relink its block and its predecessors */
559         if (is_Phi(n)) {
560                 /* Relink predecessors of phi's block */
561                 block = get_nodes_block(n);
562                 if (get_irn_link(block) == NULL)
563                         relink_bad_block_predecessors(block, env);
564
565                 old_in = (ir_node **)get_irn_link(block); /* Of Phi's Block */
566                 old_irn_arity = ARR_LEN(old_in);
567
568                 /* Relink Phi predecessors if count of predecessors changed */
569                 if (old_irn_arity != ARR_LEN(get_irn_in(block))) {
570                         /* set new predecessors in array
571                            n->in[0] remains the same block */
572                         new_irn_arity = 1;
573                         for(i = 1; i < old_irn_arity; i++)
574                                 if (!is_Bad(old_in[i])) {
575                                         n->in[new_irn_arity] = n->in[i];
576                                         is_backedge(n, i) ? set_backedge(n, new_irn_arity) : set_not_backedge(n, new_irn_arity);
577                                         ++new_irn_arity;
578                                 }
579
580                                 ARR_SETLEN(ir_node *, n->in, new_irn_arity);
581                                 ARR_SETLEN(int, n->attr.phi.u.backedge, new_irn_arity);
582                 }
583         } /* n is a Phi node */
584 }
585
586 /*
587  * Removes Bad Bad predecessors from Blocks and the corresponding
588  * inputs to Phi nodes as in dead_node_elimination but without
589  * copying the graph.
590  * On walking up set the link field to NULL, on walking down call
591  * relink_bad_predecessors() (This function stores the old in array
592  * to the link field and sets a new in array if arity of predecessors
593  * changes).
594  */
595 void remove_bad_predecessors(ir_graph *irg) {
596         panic("Fix backedge handling first");
597         irg_walk_graph(irg, firm_clear_link, relink_bad_predecessors, NULL);
598 }
599
600
601 /*
602    __                      _  __ __
603   (_     __    o     _    | \/  |_
604   __)|_| | \_/ | \_/(/_   |_/\__|__
605
606   The following stuff implements a facility that automatically patches
607   registered ir_node pointers to the new node when a dead node elimination occurs.
608 */
609
610 struct _survive_dce_t {
611         struct obstack obst;
612         pmap *places;
613         pmap *new_places;
614         hook_entry_t dead_node_elim;
615         hook_entry_t dead_node_elim_subst;
616 };
617
618 typedef struct _survive_dce_list_t {
619         struct _survive_dce_list_t *next;
620         ir_node **place;
621 } survive_dce_list_t;
622
623 static void dead_node_hook(void *context, ir_graph *irg, int start) {
624         survive_dce_t *sd = context;
625         (void) irg;
626
627         /* Create a new map before the dead node elimination is performed. */
628         if (start) {
629                 sd->new_places = pmap_create_ex(pmap_count(sd->places));
630         } else {
631                 /* Patch back all nodes if dead node elimination is over and something is to be done. */
632                 pmap_destroy(sd->places);
633                 sd->places     = sd->new_places;
634                 sd->new_places = NULL;
635         }
636 }
637
638 /**
639  * Hook called when dead node elimination replaces old by nw.
640  */
641 static void dead_node_subst_hook(void *context, ir_graph *irg, ir_node *old, ir_node *nw) {
642         survive_dce_t *sd = context;
643         survive_dce_list_t *list = pmap_get(sd->places, old);
644         (void) irg;
645
646         /* If the node is to be patched back, write the new address to all registered locations. */
647         if (list) {
648                 survive_dce_list_t *p;
649
650                 for (p = list; p; p = p->next)
651                         *(p->place) = nw;
652
653                 pmap_insert(sd->new_places, nw, list);
654         }
655 }
656
657 /**
658  * Make a new Survive DCE environment.
659  */
660 survive_dce_t *new_survive_dce(void) {
661         survive_dce_t *res = XMALLOC(survive_dce_t);
662         obstack_init(&res->obst);
663         res->places     = pmap_create();
664         res->new_places = NULL;
665
666         res->dead_node_elim.hook._hook_dead_node_elim = dead_node_hook;
667         res->dead_node_elim.context                   = res;
668         res->dead_node_elim.next                      = NULL;
669
670         res->dead_node_elim_subst.hook._hook_dead_node_elim_subst = dead_node_subst_hook;
671         res->dead_node_elim_subst.context = res;
672         res->dead_node_elim_subst.next    = NULL;
673
674         register_hook(hook_dead_node_elim, &res->dead_node_elim);
675         register_hook(hook_dead_node_elim_subst, &res->dead_node_elim_subst);
676         return res;
677 }
678
679 /**
680  * Free a Survive DCE environment.
681  */
682 void free_survive_dce(survive_dce_t *sd) {
683         obstack_free(&sd->obst, NULL);
684         pmap_destroy(sd->places);
685         unregister_hook(hook_dead_node_elim, &sd->dead_node_elim);
686         unregister_hook(hook_dead_node_elim_subst, &sd->dead_node_elim_subst);
687         xfree(sd);
688 }
689
690 /**
691  * Register a node pointer to be patched upon DCE.
692  * When DCE occurs, the node pointer specified by @p place will be
693  * patched to the new address of the node it is pointing to.
694  *
695  * @param sd    The Survive DCE environment.
696  * @param place The address of the node pointer.
697  */
698 void survive_dce_register_irn(survive_dce_t *sd, ir_node **place) {
699         if (*place != NULL) {
700                 ir_node *irn      = *place;
701                 survive_dce_list_t *curr = pmap_get(sd->places, irn);
702                 survive_dce_list_t *nw   = obstack_alloc(&sd->obst, sizeof(nw[0]));
703
704                 nw->next  = curr;
705                 nw->place = place;
706
707                 pmap_insert(sd->places, irn, nw);
708         }
709 }
710
711 /*--------------------------------------------------------------------*/
712 /*  Functionality for inlining                                         */
713 /*--------------------------------------------------------------------*/
714
715 /**
716  * Copy node for inlineing.  Updates attributes that change when
717  * inlineing but not for dead node elimination.
718  *
719  * Copies the node by calling copy_node() and then updates the entity if
720  * it's a local one.  env must be a pointer of the frame type of the
721  * inlined procedure. The new entities must be in the link field of
722  * the entities.
723  */
724 static void copy_node_inline(ir_node *n, void *env) {
725         ir_node *nn;
726         ir_type *frame_tp = (ir_type *)env;
727
728         copy_node(n, NULL);
729         if (is_Sel(n)) {
730                 nn = get_new_node(n);
731                 assert(is_Sel(nn));
732                 /* use copied entities from the new frame */
733                 if (get_entity_owner(get_Sel_entity(n)) == frame_tp) {
734                         set_Sel_entity(nn, get_entity_link(get_Sel_entity(n)));
735                 }
736         } else if (is_Block(n)) {
737                 nn = get_new_node(n);
738                 nn->attr.block.irg.irg = current_ir_graph;
739         }
740 }
741
742 /**
743  * Copies new predecessors of old node and move constants to
744  * the Start Block.
745  */
746 static void copy_preds_inline(ir_node *n, void *env) {
747         ir_node *nn;
748
749         copy_preds(n, env);
750         nn = skip_Id(get_new_node(n));
751         if (is_irn_constlike(nn)) {
752                 /* move Constants into the start block */
753                 set_nodes_block(nn, get_irg_start_block(current_ir_graph));
754
755                 n = identify_remember(current_ir_graph->value_table, nn);
756                 if (nn != n) {
757                         DBG_OPT_CSE(nn, n);
758                         exchange(nn, n);
759                 }
760         }
761 }
762
763 /**
764  * Walker: checks if P_value_arg_base is used.
765  */
766 static void find_addr(ir_node *node, void *env) {
767         int *allow_inline = env;
768         if (is_Sel(node)) {
769                 ir_graph *irg = current_ir_graph;
770                 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
771                         /* access to frame */
772                         ir_entity *ent = get_Sel_entity(node);
773                         if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
774                                 /* access to value_type */
775                                 *allow_inline = 0;
776                         }
777                 }
778         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
779                 /* From GCC:
780                  * Refuse to inline alloca call unless user explicitly forced so as this
781                  * may change program's memory overhead drastically when the function
782                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
783                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
784                  *
785                  * Sorrily this is true with our implementation also.
786                  * Moreover, we cannot differentiate between alloca() and VLA yet, so this
787                  * disables inlining of functions using VLA (with are completely save).
788                  *
789                  * 2 Solutions:
790                  * - add a flag to the Alloc node for "real" alloca() calls
791                  * - add a new Stack-Restore node at the end of a function using alloca()
792                  */
793                 *allow_inline = 0;
794         }
795 }
796
797 /**
798  * Check if we can inline a given call.
799  * Currently, we cannot inline two cases:
800  * - call with compound arguments
801  * - graphs that take the address of a parameter
802  *
803  * check these conditions here
804  */
805 static int can_inline(ir_node *call, ir_graph *called_graph) {
806         ir_type *call_type = get_Call_type(call);
807         int params, ress, i, res;
808         assert(is_Method_type(call_type));
809
810         params = get_method_n_params(call_type);
811         ress   = get_method_n_ress(call_type);
812
813         /* check parameters for compound arguments */
814         for (i = 0; i < params; ++i) {
815                 ir_type *p_type = get_method_param_type(call_type, i);
816
817                 if (is_compound_type(p_type))
818                         return 0;
819         }
820
821         /* check results for compound arguments */
822         for (i = 0; i < ress; ++i) {
823                 ir_type *r_type = get_method_res_type(call_type, i);
824
825                 if (is_compound_type(r_type))
826                         return 0;
827         }
828
829         res = 1;
830         irg_walk_graph(called_graph, find_addr, NULL, &res);
831
832         return res;
833 }
834
835 enum exc_mode {
836         exc_handler    = 0, /**< There is a handler. */
837         exc_to_end     = 1, /**< Branches to End. */
838         exc_no_handler = 2  /**< Exception handling not represented. */
839 };
840
841 /* Inlines a method at the given call site. */
842 int inline_method(ir_node *call, ir_graph *called_graph) {
843         ir_node             *pre_call;
844         ir_node             *post_call, *post_bl;
845         ir_node             *in[pn_Start_max];
846         ir_node             *end, *end_bl, *block;
847         ir_node             **res_pred;
848         ir_node             **cf_pred;
849         ir_node             **args_in;
850         ir_node             *ret, *phi;
851         int                 arity, n_ret, n_exc, n_res, i, n, j, rem_opt, irn_arity, n_params;
852         enum exc_mode       exc_handling;
853         ir_type             *called_frame, *curr_frame, *mtp, *ctp;
854         ir_entity           *ent;
855         ir_graph            *rem, *irg;
856         irg_inline_property prop = get_irg_inline_property(called_graph);
857         unsigned long       visited;
858
859         if (prop == irg_inline_forbidden)
860                 return 0;
861
862         ent = get_irg_entity(called_graph);
863
864         mtp = get_entity_type(ent);
865         ctp = get_Call_type(call);
866         n_params = get_method_n_params(mtp);
867         n_res    = get_method_n_ress(mtp);
868         if (n_params > get_method_n_params(ctp)) {
869                 /* this is a bad feature of C: without a prototype, we can
870                  * call a function with less parameters than needed. Currently
871                  * we don't support this, although we could use Unknown than. */
872                 return 0;
873         }
874         if (n_res != get_method_n_ress(ctp)) {
875                 return 0;
876         }
877
878         /* Argh, compiling C has some bad consequences:
879          * It is implementation dependent what happens in that case.
880          * We support inlining, if the bitsize of the types matches AND
881          * the same arithmetic is used. */
882         for (i = n_params - 1; i >= 0; --i) {
883                 ir_type *param_tp = get_method_param_type(mtp, i);
884                 ir_type *arg_tp   = get_method_param_type(ctp, i);
885
886                 if (param_tp != arg_tp) {
887                         ir_mode *pmode = get_type_mode(param_tp);
888                         ir_mode *amode = get_type_mode(arg_tp);
889
890                         if (pmode == NULL || amode == NULL)
891                                 return 0;
892                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
893                                 return 0;
894                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
895                                 return 0;
896                         /* otherwise we can simply "reinterpret" the bits */
897                 }
898         }
899         for (i = n_res - 1; i >= 0; --i) {
900                 ir_type *decl_res_tp = get_method_res_type(mtp, i);
901                 ir_type *used_res_tp = get_method_res_type(ctp, i);
902
903                 if (decl_res_tp != used_res_tp) {
904                         ir_mode *decl_mode = get_type_mode(decl_res_tp);
905                         ir_mode *used_mode = get_type_mode(used_res_tp);
906                         if (decl_mode == NULL || used_mode == NULL)
907                                 return 0;
908                         if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
909                                 return 0;
910                         if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
911                                 return 0;
912                         /* otherwise we can "reinterpret" the bits */
913                 }
914         }
915
916         irg = get_irn_irg(call);
917
918         /*
919          * We cannot inline a recursive call. The graph must be copied before
920          * the call the inline_method() using create_irg_copy().
921          */
922         if (called_graph == irg)
923                 return 0;
924
925         /*
926          * currently, we cannot inline two cases:
927          * - call with compound arguments
928          * - graphs that take the address of a parameter
929          */
930         if (! can_inline(call, called_graph))
931                 return 0;
932
933         rem = current_ir_graph;
934         current_ir_graph = irg;
935
936         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
937
938         /* --  Turn off optimizations, this can cause problems when allocating new nodes. -- */
939         rem_opt = get_opt_optimize();
940         set_optimize(0);
941
942         /* Handle graph state */
943         assert(get_irg_phase_state(irg) != phase_building);
944         assert(get_irg_pinned(irg) == op_pin_state_pinned);
945         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
946         set_irg_outs_inconsistent(irg);
947         set_irg_extblk_inconsistent(irg);
948         set_irg_doms_inconsistent(irg);
949         set_irg_loopinfo_inconsistent(irg);
950         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
951         set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
952
953         /* -- Check preconditions -- */
954         assert(is_Call(call));
955
956         /* here we know we WILL inline, so inform the statistics */
957         hook_inline(call, called_graph);
958
959         /* -- Decide how to handle exception control flow: Is there a handler
960            for the Call node, or do we branch directly to End on an exception?
961            exc_handling:
962            0 There is a handler.
963            1 Branches to End.
964            2 Exception handling not represented in Firm. -- */
965         {
966                 ir_node *proj, *Mproj = NULL, *Xproj = NULL;
967                 for (proj = get_irn_link(call); proj; proj = get_irn_link(proj)) {
968                         long proj_nr = get_Proj_proj(proj);
969                         if (proj_nr == pn_Call_X_except) Xproj = proj;
970                         if (proj_nr == pn_Call_M_except) Mproj = proj;
971                 }
972                 if      (Mproj) { assert(Xproj); exc_handling = exc_handler; } /*  Mproj           */
973                 else if (Xproj) {                exc_handling = exc_to_end; } /* !Mproj &&  Xproj   */
974                 else            {                exc_handling = exc_no_handler; } /* !Mproj && !Xproj   */
975         }
976
977         /* create the argument tuple */
978         NEW_ARR_A(ir_type *, args_in, n_params);
979
980         block = get_nodes_block(call);
981         for (i = n_params - 1; i >= 0; --i) {
982                 ir_node *arg      = get_Call_param(call, i);
983                 ir_type *param_tp = get_method_param_type(mtp, i);
984                 ir_mode *mode     = get_type_mode(param_tp);
985
986                 if (mode != get_irn_mode(arg)) {
987                         arg = new_r_Conv(block, arg, mode);
988                 }
989                 args_in[i] = arg;
990         }
991
992         /* --
993            the procedure and later replaces the Start node of the called graph.
994            Post_call is the old Call node and collects the results of the called
995            graph. Both will end up being a tuple.  -- */
996         post_bl = get_nodes_block(call);
997         set_irg_current_block(irg, post_bl);
998         /* XxMxPxPxPxT of Start + parameter of Call */
999         in[pn_Start_X_initial_exec]   = new_Jmp();
1000         in[pn_Start_M]                = get_Call_mem(call);
1001         in[pn_Start_P_frame_base]     = get_irg_frame(irg);
1002         in[pn_Start_P_tls]            = get_irg_tls(irg);
1003         in[pn_Start_T_args]           = new_Tuple(n_params, args_in);
1004         pre_call = new_Tuple(pn_Start_max, in);
1005         post_call = call;
1006
1007         /* --
1008            The new block gets the ins of the old block, pre_call and all its
1009            predecessors and all Phi nodes. -- */
1010         part_block(pre_call);
1011
1012         /* -- Prepare state for dead node elimination -- */
1013         /* Visited flags in calling irg must be >= flag in called irg.
1014            Else walker and arity computation will not work. */
1015         if (get_irg_visited(irg) <= get_irg_visited(called_graph))
1016                 set_irg_visited(irg, get_irg_visited(called_graph) + 1);
1017         if (get_irg_block_visited(irg) < get_irg_block_visited(called_graph))
1018                 set_irg_block_visited(irg, get_irg_block_visited(called_graph));
1019         visited = get_irg_visited(irg);
1020
1021         /* Set pre_call as new Start node in link field of the start node of
1022            calling graph and pre_calls block as new block for the start block
1023            of calling graph.
1024            Further mark these nodes so that they are not visited by the
1025            copying. */
1026         set_irn_link(get_irg_start(called_graph), pre_call);
1027         set_irn_visited(get_irg_start(called_graph), visited);
1028         set_irn_link(get_irg_start_block(called_graph), get_nodes_block(pre_call));
1029         set_irn_visited(get_irg_start_block(called_graph), visited);
1030
1031         set_irn_link(get_irg_bad(called_graph), get_irg_bad(current_ir_graph));
1032         set_irn_visited(get_irg_bad(called_graph), visited);
1033
1034         set_irn_link(get_irg_no_mem(called_graph), get_irg_no_mem(current_ir_graph));
1035         set_irn_visited(get_irg_no_mem(called_graph), visited);
1036
1037         /* Initialize for compaction of in arrays */
1038         inc_irg_block_visited(irg);
1039
1040         /* -- Replicate local entities of the called_graph -- */
1041         /* copy the entities. */
1042         irp_reserve_resources(irp, IR_RESOURCE_ENTITY_LINK);
1043         called_frame = get_irg_frame_type(called_graph);
1044         curr_frame   = get_irg_frame_type(irg);
1045         for (i = 0, n = get_class_n_members(called_frame); i < n; ++i) {
1046                 ir_entity *new_ent, *old_ent;
1047                 old_ent = get_class_member(called_frame, i);
1048                 new_ent = copy_entity_own(old_ent, curr_frame);
1049                 set_entity_link(old_ent, new_ent);
1050         }
1051
1052         /* visited is > than that of called graph.  With this trick visited will
1053            remain unchanged so that an outer walker, e.g., searching the call nodes
1054             to inline, calling this inline will not visit the inlined nodes. */
1055         set_irg_visited(irg, get_irg_visited(irg)-1);
1056
1057         /* -- Performing dead node elimination inlines the graph -- */
1058         /* Copies the nodes to the obstack of current_ir_graph. Updates links to new
1059            entities. */
1060         irg_walk(get_irg_end(called_graph), copy_node_inline, copy_preds_inline,
1061                  get_irg_frame_type(called_graph));
1062
1063         irp_free_resources(irp, IR_RESOURCE_ENTITY_LINK);
1064
1065         /* Repair called_graph */
1066         set_irg_visited(called_graph, get_irg_visited(irg));
1067         set_irg_block_visited(called_graph, get_irg_block_visited(irg));
1068         set_Block_block_visited(get_irg_start_block(called_graph), 0);
1069
1070         /* -- Merge the end of the inlined procedure with the call site -- */
1071         /* We will turn the old Call node into a Tuple with the following
1072            predecessors:
1073            -1:  Block of Tuple.
1074            0: Phi of all Memories of Return statements.
1075            1: Jmp from new Block that merges the control flow from all exception
1076            predecessors of the old end block.
1077            2: Tuple of all arguments.
1078            3: Phi of Exception memories.
1079            In case the old Call directly branches to End on an exception we don't
1080            need the block merging all exceptions nor the Phi of the exception
1081            memories.
1082         */
1083
1084         /* -- Precompute some values -- */
1085         end_bl = get_new_node(get_irg_end_block(called_graph));
1086         end = get_new_node(get_irg_end(called_graph));
1087         arity = get_Block_n_cfgpreds(end_bl);    /* arity = n_exc + n_ret  */
1088         n_res = get_method_n_ress(get_Call_type(call));
1089
1090         res_pred = XMALLOCN(ir_node*, n_res);
1091         cf_pred  = XMALLOCN(ir_node*, arity);
1092
1093         set_irg_current_block(irg, post_bl); /* just to make sure */
1094
1095         /* -- archive keepalives -- */
1096         irn_arity = get_irn_arity(end);
1097         for (i = 0; i < irn_arity; i++) {
1098                 ir_node *ka = get_End_keepalive(end, i);
1099                 if (! is_Bad(ka))
1100                         add_End_keepalive(get_irg_end(irg), ka);
1101         }
1102
1103         /* The new end node will die.  We need not free as the in array is on the obstack:
1104            copy_node() only generated 'D' arrays. */
1105
1106         /* -- Replace Return nodes by Jump nodes. -- */
1107         n_ret = 0;
1108         for (i = 0; i < arity; i++) {
1109                 ir_node *ret;
1110                 ret = get_Block_cfgpred(end_bl, i);
1111                 if (is_Return(ret)) {
1112                         cf_pred[n_ret] = new_r_Jmp(get_nodes_block(ret));
1113                         n_ret++;
1114                 }
1115         }
1116         set_irn_in(post_bl, n_ret, cf_pred);
1117
1118         /* -- Build a Tuple for all results of the method.
1119            Add Phi node if there was more than one Return.  -- */
1120         turn_into_tuple(post_call, pn_Call_max);
1121         /* First the Memory-Phi */
1122         n_ret = 0;
1123         for (i = 0; i < arity; i++) {
1124                 ret = get_Block_cfgpred(end_bl, i);
1125                 if (is_Return(ret)) {
1126                         cf_pred[n_ret] = get_Return_mem(ret);
1127                         n_ret++;
1128                 }
1129         }
1130         phi = new_Phi(n_ret, cf_pred, mode_M);
1131         set_Tuple_pred(call, pn_Call_M_regular, phi);
1132         /* Conserve Phi-list for further inlinings -- but might be optimized */
1133         if (get_nodes_block(phi) == post_bl) {
1134                 set_irn_link(phi, get_irn_link(post_bl));
1135                 set_irn_link(post_bl, phi);
1136         }
1137         /* Now the real results */
1138         if (n_res > 0) {
1139                 for (j = 0; j < n_res; j++) {
1140                         ir_type *res_type = get_method_res_type(ctp, j);
1141                         ir_mode *res_mode = get_type_mode(res_type);
1142                         n_ret = 0;
1143                         for (i = 0; i < arity; i++) {
1144                                 ret = get_Block_cfgpred(end_bl, i);
1145                                 if (is_Return(ret)) {
1146                                         ir_node *res = get_Return_res(ret, j);
1147                                         if (get_irn_mode(res) != res_mode) {
1148                                                 ir_node *block = get_nodes_block(res);
1149                                                 res = new_r_Conv(block, res, res_mode);
1150                                         }
1151                                         cf_pred[n_ret] = res;
1152                                         n_ret++;
1153                                 }
1154                         }
1155                         if (n_ret > 0)
1156                                 phi = new_Phi(n_ret, cf_pred, get_irn_mode(cf_pred[0]));
1157                         else
1158                                 phi = new_Bad();
1159                         res_pred[j] = phi;
1160                         /* Conserve Phi-list for further inlinings -- but might be optimized */
1161                         if (get_nodes_block(phi) == post_bl) {
1162                                 set_Phi_next(phi, get_Block_phis(post_bl));
1163                                 set_Block_phis(post_bl, phi);
1164                         }
1165                 }
1166                 set_Tuple_pred(call, pn_Call_T_result, new_Tuple(n_res, res_pred));
1167         } else {
1168                 set_Tuple_pred(call, pn_Call_T_result, new_Bad());
1169         }
1170         /* handle the regular call */
1171         set_Tuple_pred(call, pn_Call_X_regular, new_Jmp());
1172
1173         /* For now, we cannot inline calls with value_base */
1174         set_Tuple_pred(call, pn_Call_P_value_res_base, new_Bad());
1175
1176         /* Finally the exception control flow.
1177            We have two (three) possible situations:
1178            First if the Call branches to an exception handler: We need to add a Phi node to
1179            collect the memory containing the exception objects.  Further we need
1180            to add another block to get a correct representation of this Phi.  To
1181            this block we add a Jmp that resolves into the X output of the Call
1182            when the Call is turned into a tuple.
1183            Second the Call branches to End, the exception is not handled.  Just
1184            add all inlined exception branches to the End node.
1185            Third: there is no Exception edge at all. Handle as case two. */
1186         if (exc_handling == exc_handler) {
1187                 n_exc = 0;
1188                 for (i = 0; i < arity; i++) {
1189                         ir_node *ret, *irn;
1190                         ret = get_Block_cfgpred(end_bl, i);
1191                         irn = skip_Proj(ret);
1192                         if (is_fragile_op(irn) || is_Raise(irn)) {
1193                                 cf_pred[n_exc] = ret;
1194                                 ++n_exc;
1195                         }
1196                 }
1197                 if (n_exc > 0) {
1198                         ir_node *block = new_Block(n_exc, cf_pred);
1199                         set_cur_block(block);
1200
1201                         set_Tuple_pred(call, pn_Call_X_except, new_Jmp());
1202                         /* The Phi for the memories with the exception objects */
1203                         n_exc = 0;
1204                         for (i = 0; i < arity; i++) {
1205                                 ir_node *ret;
1206                                 ret = skip_Proj(get_Block_cfgpred(end_bl, i));
1207                                 if (is_Call(ret)) {
1208                                         cf_pred[n_exc] = new_r_Proj(get_nodes_block(ret), ret, mode_M, 3);
1209                                         n_exc++;
1210                                 } else if (is_fragile_op(ret)) {
1211                                         /* We rely that all cfops have the memory output at the same position. */
1212                                         cf_pred[n_exc] = new_r_Proj(get_nodes_block(ret), ret, mode_M, 0);
1213                                         n_exc++;
1214                                 } else if (is_Raise(ret)) {
1215                                         cf_pred[n_exc] = new_r_Proj(get_nodes_block(ret), ret, mode_M, 1);
1216                                         n_exc++;
1217                                 }
1218                         }
1219                         set_Tuple_pred(call, pn_Call_M_except, new_Phi(n_exc, cf_pred, mode_M));
1220                 } else {
1221                         set_Tuple_pred(call, pn_Call_X_except, new_Bad());
1222                         set_Tuple_pred(call, pn_Call_M_except, new_Bad());
1223                 }
1224         } else {
1225                 ir_node *main_end_bl;
1226                 int main_end_bl_arity;
1227                 ir_node **end_preds;
1228
1229                 /* assert(exc_handling == 1 || no exceptions. ) */
1230                 n_exc = 0;
1231                 for (i = 0; i < arity; i++) {
1232                         ir_node *ret = get_Block_cfgpred(end_bl, i);
1233                         ir_node *irn = skip_Proj(ret);
1234
1235                         if (is_fragile_op(irn) || is_Raise(irn)) {
1236                                 cf_pred[n_exc] = ret;
1237                                 n_exc++;
1238                         }
1239                 }
1240                 main_end_bl       = get_irg_end_block(irg);
1241                 main_end_bl_arity = get_irn_arity(main_end_bl);
1242                 end_preds         = XMALLOCN(ir_node*, n_exc + main_end_bl_arity);
1243
1244                 for (i = 0; i < main_end_bl_arity; ++i)
1245                         end_preds[i] = get_irn_n(main_end_bl, i);
1246                 for (i = 0; i < n_exc; ++i)
1247                         end_preds[main_end_bl_arity + i] = cf_pred[i];
1248                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
1249                 set_Tuple_pred(call, pn_Call_X_except, new_Bad());
1250                 set_Tuple_pred(call, pn_Call_M_except, new_Bad());
1251                 free(end_preds);
1252         }
1253         free(res_pred);
1254         free(cf_pred);
1255
1256         /* --  Turn CSE back on. -- */
1257         set_optimize(rem_opt);
1258         current_ir_graph = rem;
1259
1260         return 1;
1261 }
1262
1263 /********************************************************************/
1264 /* Apply inlining to small methods.                                 */
1265 /********************************************************************/
1266
1267 static struct obstack  temp_obst;
1268
1269 /** Represents a possible inlinable call in a graph. */
1270 typedef struct _call_entry {
1271         ir_node    *call;       /**< The Call node. */
1272         ir_graph   *callee;     /**< The callee IR-graph. */
1273         list_head  list;        /**< List head for linking the next one. */
1274         int        loop_depth;  /**< The loop depth of this call. */
1275         int        benefice;    /**< The calculated benefice of this call. */
1276         unsigned   local_adr:1; /**< Set if this call gets an address of a local variable. */
1277         unsigned   all_const:1; /**< Set if this call has only constant parameters. */
1278 } call_entry;
1279
1280 /**
1281  * environment for inlining small irgs
1282  */
1283 typedef struct _inline_env_t {
1284         struct obstack obst;  /**< An obstack where call_entries are allocated on. */
1285         list_head      calls; /**< The call entry list. */
1286 } inline_env_t;
1287
1288 /**
1289  * Returns the irg called from a Call node. If the irg is not
1290  * known, NULL is returned.
1291  *
1292  * @param call  the call node
1293  */
1294 static ir_graph *get_call_called_irg(ir_node *call) {
1295         ir_node *addr;
1296
1297         addr = get_Call_ptr(call);
1298         if (is_Global(addr)) {
1299                 ir_entity *ent = get_Global_entity(addr);
1300                 return get_entity_irg(ent);
1301         }
1302
1303         return NULL;
1304 }
1305
1306 /**
1307  * Walker: Collect all calls to known graphs inside a graph.
1308  */
1309 static void collect_calls(ir_node *call, void *env) {
1310         (void) env;
1311         if (is_Call(call)) {
1312                 ir_graph *called_irg = get_call_called_irg(call);
1313
1314                 if (called_irg != NULL) {
1315                         /* The Call node calls a locally defined method.  Remember to inline. */
1316                         inline_env_t *ienv  = env;
1317                         call_entry   *entry = obstack_alloc(&ienv->obst, sizeof(*entry));
1318                         entry->call       = call;
1319                         entry->callee     = called_irg;
1320                         entry->loop_depth = 0;
1321                         entry->benefice   = 0;
1322                         entry->local_adr  = 0;
1323                         entry->all_const  = 0;
1324
1325                         list_add_tail(&entry->list, &ienv->calls);
1326                 }
1327         }
1328 }
1329
1330 /**
1331  * Inlines all small methods at call sites where the called address comes
1332  * from a Const node that references the entity representing the called
1333  * method.
1334  * The size argument is a rough measure for the code size of the method:
1335  * Methods where the obstack containing the firm graph is smaller than
1336  * size are inlined.
1337  */
1338 void inline_small_irgs(ir_graph *irg, int size) {
1339         ir_graph *rem = current_ir_graph;
1340         inline_env_t env;
1341         call_entry *entry;
1342
1343         current_ir_graph = irg;
1344         /* Handle graph state */
1345         assert(get_irg_phase_state(irg) != phase_building);
1346         free_callee_info(irg);
1347
1348         /* Find Call nodes to inline.
1349            (We can not inline during a walk of the graph, as inlining the same
1350            method several times changes the visited flag of the walked graph:
1351            after the first inlining visited of the callee equals visited of
1352            the caller.  With the next inlining both are increased.) */
1353         obstack_init(&env.obst);
1354         INIT_LIST_HEAD(&env.calls);
1355         irg_walk_graph(irg, NULL, collect_calls, &env);
1356
1357         if (! list_empty(&env.calls)) {
1358                 /* There are calls to inline */
1359                 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1360                 collect_phiprojs(irg);
1361
1362                 list_for_each_entry(call_entry, entry, &env.calls, list) {
1363                         ir_graph            *callee = entry->callee;
1364                         irg_inline_property prop    = get_irg_inline_property(callee);
1365
1366                         if (prop == irg_inline_forbidden || get_irg_additional_properties(callee) & mtp_property_weak) {
1367                                 /* do not inline forbidden / weak graphs */
1368                                 continue;
1369                         }
1370
1371                         if (prop >= irg_inline_forced ||
1372                             _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
1373                                 inline_method(entry->call, callee);
1374                         }
1375                 }
1376                 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1377         }
1378         obstack_free(&env.obst, NULL);
1379         current_ir_graph = rem;
1380 }
1381
1382 struct inline_small_irgs_pass_t {
1383         ir_graph_pass_t pass;
1384         int            size;
1385 };
1386
1387 /**
1388  * Wrapper to run inline_small_irgs() as a pass.
1389  */
1390 static int inline_small_irgs_wrapper(ir_graph *irg, void *context) {
1391         struct inline_small_irgs_pass_t *pass = context;
1392
1393         inline_small_irgs(irg, pass->size);
1394         return 0;
1395 }
1396
1397 /* create a pass for inline_small_irgs() */
1398 ir_graph_pass_t *inline_small_irgs_pass(const char *name, int size) {
1399         struct inline_small_irgs_pass_t *pass =
1400                 XMALLOCZ(struct inline_small_irgs_pass_t);
1401
1402         pass->size = size;
1403         return def_graph_pass_constructor(
1404                 &pass->pass, name ? name : "inline_small_irgs", inline_small_irgs_wrapper);
1405 }
1406
1407 /**
1408  * Environment for inlining irgs.
1409  */
1410 typedef struct {
1411         list_head calls;             /**< List of of all call nodes in this graph. */
1412         unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
1413         unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
1414         unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
1415         unsigned  n_nodes_orig;      /**< for statistics */
1416         unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
1417         unsigned  n_call_nodes_orig; /**< for statistics */
1418         unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
1419         unsigned  n_callers_orig;    /**< for statistics */
1420         unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
1421         unsigned  local_vars:1;      /**< Set, if an inlined function got the address of a local variable. */
1422         unsigned  recursive:1;       /**< Set, if this function is self recursive. */
1423 } inline_irg_env;
1424
1425 /**
1426  * Allocate a new environment for inlining.
1427  */
1428 static inline_irg_env *alloc_inline_irg_env(void) {
1429         inline_irg_env *env    = obstack_alloc(&temp_obst, sizeof(*env));
1430         INIT_LIST_HEAD(&env->calls);
1431         env->local_weights     = NULL;
1432         env->n_nodes           = -2; /* do not count count Start, End */
1433         env->n_blocks          = -2; /* do not count count Start, End Block */
1434         env->n_nodes_orig      = -2; /* do not count Start, End */
1435         env->n_call_nodes      = 0;
1436         env->n_call_nodes_orig = 0;
1437         env->n_callers         = 0;
1438         env->n_callers_orig    = 0;
1439         env->got_inline        = 0;
1440         env->local_vars        = 0;
1441         env->recursive         = 0;
1442         return env;
1443 }
1444
1445 typedef struct walker_env {
1446         inline_irg_env *x;     /**< the inline environment */
1447         char ignore_runtime;   /**< the ignore runtime flag */
1448         char ignore_callers;   /**< if set, do change callers data */
1449 } wenv_t;
1450
1451 /**
1452  * post-walker: collect all calls in the inline-environment
1453  * of a graph and sum some statistics.
1454  */
1455 static void collect_calls2(ir_node *call, void *ctx) {
1456         wenv_t         *env = ctx;
1457         inline_irg_env *x = env->x;
1458         ir_opcode      code = get_irn_opcode(call);
1459         ir_graph       *callee;
1460         call_entry     *entry;
1461
1462         /* count meaningful nodes in irg */
1463         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
1464                 if (code != iro_Block) {
1465                         ++x->n_nodes;
1466                         ++x->n_nodes_orig;
1467                 } else {
1468                         ++x->n_blocks;
1469                 }
1470         }
1471
1472         if (code != iro_Call) return;
1473
1474         /* check, if it's a runtime call */
1475         if (env->ignore_runtime) {
1476                 ir_node *symc = get_Call_ptr(call);
1477
1478                 if (is_Global(symc)) {
1479                         ir_entity *ent = get_Global_entity(symc);
1480
1481                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
1482                                 return;
1483                 }
1484         }
1485
1486         /* collect all call nodes */
1487         ++x->n_call_nodes;
1488         ++x->n_call_nodes_orig;
1489
1490         callee = get_call_called_irg(call);
1491         if (callee != NULL) {
1492                 if (! env->ignore_callers) {
1493                         inline_irg_env *callee_env = get_irg_link(callee);
1494                         /* count all static callers */
1495                         ++callee_env->n_callers;
1496                         ++callee_env->n_callers_orig;
1497                 }
1498                 if (callee == current_ir_graph)
1499                         x->recursive = 1;
1500
1501                 /* link it in the list of possible inlinable entries */
1502                 entry = obstack_alloc(&temp_obst, sizeof(*entry));
1503                 entry->call       = call;
1504                 entry->callee     = callee;
1505                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
1506                 entry->benefice   = 0;
1507                 entry->local_adr  = 0;
1508                 entry->all_const  = 0;
1509
1510                 list_add_tail(&entry->list, &x->calls);
1511         }
1512 }
1513
1514 /**
1515  * Returns TRUE if the number of callers is 0 in the irg's environment,
1516  * hence this irg is a leave.
1517  */
1518 inline static int is_leave(ir_graph *irg) {
1519         inline_irg_env *env = get_irg_link(irg);
1520         return env->n_call_nodes == 0;
1521 }
1522
1523 /**
1524  * Returns TRUE if the number of nodes in the callee is
1525  * smaller then size in the irg's environment.
1526  */
1527 inline static int is_smaller(ir_graph *callee, unsigned size) {
1528         inline_irg_env *env = get_irg_link(callee);
1529         return env->n_nodes < size;
1530 }
1531
1532 /**
1533  * Duplicate a call entry.
1534  *
1535  * @param entry     the original entry to duplicate
1536  * @param new_call  the new call node
1537  * @param loop_depth_delta
1538  *                  delta value for the loop depth
1539  */
1540 static call_entry *duplicate_call_entry(const call_entry *entry,
1541                                         ir_node *new_call, int loop_depth_delta) {
1542         call_entry *nentry = obstack_alloc(&temp_obst, sizeof(*nentry));
1543         nentry->call       = new_call;
1544         nentry->callee     = entry->callee;
1545         nentry->benefice   = entry->benefice;
1546         nentry->loop_depth = entry->loop_depth + loop_depth_delta;
1547         nentry->local_adr  = entry->local_adr;
1548         nentry->all_const  = entry->all_const;
1549
1550         return nentry;
1551 }
1552
1553 /**
1554  * Append all call nodes of the source environment to the nodes of in the destination
1555  * environment.
1556  *
1557  * @param dst         destination environment
1558  * @param src         source environment
1559  * @param loop_depth  the loop depth of the call that is replaced by the src list
1560  */
1561 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth) {
1562         call_entry *entry, *nentry;
1563
1564         /* Note that the src list points to Call nodes in the inlined graph, but
1565            we need Call nodes in our graph. Luckily the inliner leaves this information
1566            in the link field. */
1567         list_for_each_entry(call_entry, entry, &src->calls, list) {
1568                 nentry = duplicate_call_entry(entry, get_irn_link(entry->call), loop_depth);
1569                 list_add_tail(&nentry->list, &dst->calls);
1570         }
1571         dst->n_call_nodes += src->n_call_nodes;
1572         dst->n_nodes      += src->n_nodes;
1573 }
1574
1575 /*
1576  * Inlines small leave methods at call sites where the called address comes
1577  * from a Const node that references the entity representing the called
1578  * method.
1579  * The size argument is a rough measure for the code size of the method:
1580  * Methods where the obstack containing the firm graph is smaller than
1581  * size are inlined.
1582  */
1583 void inline_leave_functions(unsigned maxsize, unsigned leavesize,
1584                             unsigned size, int ignore_runtime)
1585 {
1586         inline_irg_env   *env;
1587         ir_graph         *irg;
1588         int              i, n_irgs;
1589         ir_graph         *rem;
1590         int              did_inline;
1591         wenv_t           wenv;
1592         call_entry       *entry, *next;
1593         const call_entry *centry;
1594         pmap             *copied_graphs;
1595         pmap_entry       *pm_entry;
1596
1597         rem = current_ir_graph;
1598         obstack_init(&temp_obst);
1599
1600         /* a map for the copied graphs, used to inline recursive calls */
1601         copied_graphs = pmap_create();
1602
1603         /* extend all irgs by a temporary data structure for inlining. */
1604         n_irgs = get_irp_n_irgs();
1605         for (i = 0; i < n_irgs; ++i)
1606                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
1607
1608         /* Pre-compute information in temporary data structure. */
1609         wenv.ignore_runtime = ignore_runtime;
1610         wenv.ignore_callers = 0;
1611         for (i = 0; i < n_irgs; ++i) {
1612                 ir_graph *irg = get_irp_irg(i);
1613
1614                 assert(get_irg_phase_state(irg) != phase_building);
1615                 free_callee_info(irg);
1616
1617                 assure_cf_loop(irg);
1618                 wenv.x = get_irg_link(irg);
1619                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1620         }
1621
1622         /* -- and now inline. -- */
1623
1624         /* Inline leaves recursively -- we might construct new leaves. */
1625         do {
1626                 did_inline = 0;
1627
1628                 for (i = 0; i < n_irgs; ++i) {
1629                         ir_node *call;
1630                         int phiproj_computed = 0;
1631
1632                         current_ir_graph = get_irp_irg(i);
1633                         env              = get_irg_link(current_ir_graph);
1634
1635                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1636                         list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1637                                 ir_graph            *callee;
1638                                 irg_inline_property  prop;
1639
1640                                 if (env->n_nodes > maxsize)
1641                                         break;
1642
1643                                 call   = entry->call;
1644                                 callee = entry->callee;
1645
1646                                 prop = get_irg_inline_property(callee);
1647                                 if (prop == irg_inline_forbidden || get_irg_additional_properties(callee) & mtp_property_weak) {
1648                                         /* do not inline forbidden / weak graphs */
1649                                         continue;
1650                                 }
1651
1652                                 if (is_leave(callee) && (
1653                                     is_smaller(callee, leavesize) || prop >= irg_inline_forced)) {
1654                                         if (!phiproj_computed) {
1655                                                 phiproj_computed = 1;
1656                                                 collect_phiprojs(current_ir_graph);
1657                                         }
1658                                         did_inline = inline_method(call, callee);
1659
1660                                         if (did_inline) {
1661                                                 inline_irg_env *callee_env = get_irg_link(callee);
1662
1663                                                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1664                                                 phiproj_computed = 0;
1665
1666                                                 /* Do some statistics */
1667                                                 env->got_inline = 1;
1668                                                 --env->n_call_nodes;
1669                                                 env->n_nodes += callee_env->n_nodes;
1670                                                 --callee_env->n_callers;
1671
1672                                                 /* remove this call from the list */
1673                                                 list_del(&entry->list);
1674                                                 continue;
1675                                         }
1676                                 }
1677                         }
1678                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1679                 }
1680         } while (did_inline);
1681
1682         /* inline other small functions. */
1683         for (i = 0; i < n_irgs; ++i) {
1684                 ir_node *call;
1685                 int phiproj_computed = 0;
1686
1687                 current_ir_graph = get_irp_irg(i);
1688                 env              = get_irg_link(current_ir_graph);
1689
1690                 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1691
1692                 /* note that the list of possible calls is updated during the process */
1693                 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1694                         irg_inline_property prop;
1695                         ir_graph            *callee;
1696                         pmap_entry          *e;
1697
1698                         call   = entry->call;
1699                         callee = entry->callee;
1700
1701                         prop = get_irg_inline_property(callee);
1702                         if (prop == irg_inline_forbidden || get_irg_additional_properties(callee) & mtp_property_weak) {
1703                                 /* do not inline forbidden / weak graphs */
1704                                 continue;
1705                         }
1706
1707                         e = pmap_find(copied_graphs, callee);
1708                         if (e != NULL) {
1709                                 /*
1710                                  * Remap callee if we have a copy.
1711                                  * FIXME: Should we do this only for recursive Calls ?
1712                                  */
1713                                 callee = e->value;
1714                         }
1715
1716                         if (prop >= irg_inline_forced ||
1717                             (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1718                                 if (current_ir_graph == callee) {
1719                                         /*
1720                                          * Recursive call: we cannot directly inline because we cannot walk
1721                                          * the graph and change it. So we have to make a copy of the graph
1722                                          * first.
1723                                          */
1724
1725                                         inline_irg_env *callee_env;
1726                                         ir_graph       *copy;
1727
1728                                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1729
1730                                         /*
1731                                          * No copy yet, create one.
1732                                          * Note that recursive methods are never leaves, so it is sufficient
1733                                          * to test this condition here.
1734                                          */
1735                                         copy = create_irg_copy(callee);
1736
1737                                         /* create_irg_copy() destroys the Proj links, recompute them */
1738                                         phiproj_computed = 0;
1739
1740                                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1741
1742                                         /* allocate new environment */
1743                                         callee_env = alloc_inline_irg_env();
1744                                         set_irg_link(copy, callee_env);
1745
1746                                         assure_cf_loop(copy);
1747                                         wenv.x              = callee_env;
1748                                         wenv.ignore_callers = 1;
1749                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1750
1751                                         /*
1752                                          * Enter the entity of the original graph. This is needed
1753                                          * for inline_method(). However, note that ent->irg still points
1754                                          * to callee, NOT to copy.
1755                                          */
1756                                         set_irg_entity(copy, get_irg_entity(callee));
1757
1758                                         pmap_insert(copied_graphs, callee, copy);
1759                                         callee = copy;
1760
1761                                         /* we have only one caller: the original graph */
1762                                         callee_env->n_callers      = 1;
1763                                         callee_env->n_callers_orig = 1;
1764                                 }
1765                                 if (! phiproj_computed) {
1766                                         phiproj_computed = 1;
1767                                         collect_phiprojs(current_ir_graph);
1768                                 }
1769                                 did_inline = inline_method(call, callee);
1770                                 if (did_inline) {
1771                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1772
1773                                         /* call was inlined, Phi/Projs for current graph must be recomputed */
1774                                         phiproj_computed = 0;
1775
1776                                         /* callee was inline. Append it's call list. */
1777                                         env->got_inline = 1;
1778                                         --env->n_call_nodes;
1779                                         append_call_list(env, callee_env, entry->loop_depth);
1780                                         --callee_env->n_callers;
1781
1782                                         /* after we have inlined callee, all called methods inside callee
1783                                            are now called once more */
1784                                         list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1785                                                 inline_irg_env *penv = get_irg_link(centry->callee);
1786                                                 ++penv->n_callers;
1787                                         }
1788
1789                                         /* remove this call from the list */
1790                                         list_del(&entry->list);
1791                                         continue;
1792                                 }
1793                         }
1794                 }
1795                 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1796         }
1797
1798         for (i = 0; i < n_irgs; ++i) {
1799                 irg = get_irp_irg(i);
1800                 env = get_irg_link(irg);
1801
1802                 if (env->got_inline) {
1803                         optimize_graph_df(irg);
1804                         optimize_cf(irg);
1805                 }
1806                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1807                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1808                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1809                         env->n_callers_orig, env->n_callers,
1810                         get_entity_name(get_irg_entity(irg))));
1811                 }
1812         }
1813
1814         /* kill the copied graphs: we don't need them anymore */
1815         foreach_pmap(copied_graphs, pm_entry) {
1816                 ir_graph *copy = pm_entry->value;
1817
1818                 /* reset the entity, otherwise it will be deleted in the next step ... */
1819                 set_irg_entity(copy, NULL);
1820                 free_ir_graph(copy);
1821         }
1822         pmap_destroy(copied_graphs);
1823
1824         obstack_free(&temp_obst, NULL);
1825         current_ir_graph = rem;
1826 }
1827
1828 struct inline_leave_functions_pass_t {
1829         ir_prog_pass_t pass;
1830         unsigned       maxsize;
1831         unsigned       leavesize;
1832         unsigned       size;
1833         int            ignore_runtime;
1834 };
1835
1836 /**
1837  * Wrapper to run inline_leave_functions() as a ir_prog pass.
1838  */
1839 static int inline_leave_functions_wrapper(ir_prog *irp, void *context) {
1840         struct inline_leave_functions_pass_t *pass = context;
1841
1842         (void)irp;
1843         inline_leave_functions(
1844                 pass->maxsize, pass->leavesize,
1845                 pass->size, pass->ignore_runtime);
1846         return 0;
1847 }
1848
1849 /* create a pass for inline_leave_functions() */
1850 ir_prog_pass_t *inline_leave_functions_pass(
1851         const char *name, unsigned maxsize, unsigned leavesize,
1852         unsigned size, int ignore_runtime) {
1853         struct inline_leave_functions_pass_t *pass =
1854                 XMALLOCZ(struct inline_leave_functions_pass_t);
1855
1856         pass->maxsize        = maxsize;
1857         pass->leavesize      = leavesize;
1858         pass->size           = size;
1859         pass->ignore_runtime = ignore_runtime;
1860
1861         return def_prog_pass_constructor(
1862                 &pass->pass,
1863                 name ? name : "inline_leave_functions",
1864                 inline_leave_functions_wrapper);
1865 }
1866
1867 /**
1868  * Calculate the parameter weights for transmitting the address of a local variable.
1869  */
1870 static unsigned calc_method_local_weight(ir_node *arg) {
1871         int      i, j, k;
1872         unsigned v, weight = 0;
1873
1874         for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
1875                 ir_node *succ = get_irn_out(arg, i);
1876
1877                 switch (get_irn_opcode(succ)) {
1878                 case iro_Load:
1879                 case iro_Store:
1880                         /* Loads and Store can be removed */
1881                         weight += 3;
1882                         break;
1883                 case iro_Sel:
1884                         /* check if all args are constant */
1885                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1886                                 ir_node *idx = get_Sel_index(succ, j);
1887                                 if (! is_Const(idx))
1888                                         return 0;
1889                         }
1890                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1891                            some unsupported node. */
1892                         v = calc_method_local_weight(succ);
1893                         if (v == 0)
1894                                 return 0;
1895                         /* we can kill one Sel with constant indexes, this is cheap */
1896                         weight += v + 1;
1897                         break;
1898                 case iro_Id:
1899                         /* when looking backward we might find Id nodes */
1900                         weight += calc_method_local_weight(succ);
1901                         break;
1902                 case iro_Tuple:
1903                         /* unoptimized tuple */
1904                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1905                                 ir_node *pred = get_Tuple_pred(succ, j);
1906                                 if (pred == arg) {
1907                                         /* look for Proj(j) */
1908                                         for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
1909                                                 ir_node *succ_succ = get_irn_out(succ, k);
1910                                                 if (is_Proj(succ_succ)) {
1911                                                         if (get_Proj_proj(succ_succ) == j) {
1912                                                                 /* found */
1913                                                                 weight += calc_method_local_weight(succ_succ);
1914                                                         }
1915                                                 } else {
1916                                                         /* this should NOT happen */
1917                                                         return 0;
1918                                                 }
1919                                         }
1920                                 }
1921                         }
1922                         break;
1923                 default:
1924                         /* any other node: unsupported yet or bad. */
1925                         return 0;
1926                 }
1927         }
1928         return weight;
1929 }
1930
1931 /**
1932  * Calculate the parameter weights for transmitting the address of a local variable.
1933  */
1934 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg) {
1935         ir_entity *ent = get_irg_entity(irg);
1936         ir_type  *mtp;
1937         int      nparams, i, proj_nr;
1938         ir_node  *irg_args, *arg;
1939
1940         mtp      = get_entity_type(ent);
1941         nparams  = get_method_n_params(mtp);
1942
1943         /* allocate a new array. currently used as 'analysed' flag */
1944         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1945
1946         /* If the method haven't parameters we have nothing to do. */
1947         if (nparams <= 0)
1948                 return;
1949
1950         assure_irg_outs(irg);
1951         irg_args = get_irg_args(irg);
1952         for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
1953                 arg     = get_irn_out(irg_args, i);
1954                 proj_nr = get_Proj_proj(arg);
1955                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1956         }
1957 }
1958
1959 /**
1960  * Calculate the benefice for transmitting an local variable address.
1961  * After inlining, the local variable might be transformed into a
1962  * SSA variable by scalar_replacement().
1963  */
1964 static unsigned get_method_local_adress_weight(ir_graph *callee, int pos) {
1965         inline_irg_env *env = get_irg_link(callee);
1966
1967         if (env->local_weights != NULL) {
1968                 if (pos < ARR_LEN(env->local_weights))
1969                         return env->local_weights[pos];
1970                 return 0;
1971         }
1972
1973         analyze_irg_local_weights(env, callee);
1974
1975         if (pos < ARR_LEN(env->local_weights))
1976                 return env->local_weights[pos];
1977         return 0;
1978 }
1979
1980 /**
1981  * Calculate a benefice value for inlining the given call.
1982  *
1983  * @param call       the call node we have to inspect
1984  * @param callee     the called graph
1985  */
1986 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
1987 {
1988         ir_node   *call = entry->call;
1989         ir_entity *ent  = get_irg_entity(callee);
1990         ir_node   *frame_ptr;
1991         ir_type   *mtp;
1992         int       weight = 0;
1993         int       i, n_params, all_const;
1994         unsigned  cc, v;
1995         irg_inline_property prop;
1996
1997         inline_irg_env *callee_env;
1998
1999         prop = get_irg_inline_property(callee);
2000         if (prop == irg_inline_forbidden) {
2001                 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
2002                     call, callee));
2003                 return entry->benefice = INT_MIN;
2004         }
2005
2006         if (get_irg_additional_properties(callee) & (mtp_property_noreturn | mtp_property_weak)) {
2007                 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
2008                     call, callee));
2009                 return entry->benefice = INT_MIN;
2010         }
2011
2012         /* costs for every passed parameter */
2013         n_params = get_Call_n_params(call);
2014         mtp      = get_entity_type(ent);
2015         cc       = get_method_calling_convention(mtp);
2016         if (cc & cc_reg_param) {
2017                 /* register parameter, smaller costs for register parameters */
2018                 int max_regs = cc & ~cc_bits;
2019
2020                 if (max_regs < n_params)
2021                         weight += max_regs * 2 + (n_params - max_regs) * 5;
2022                 else
2023                         weight += n_params * 2;
2024         } else {
2025                 /* parameters are passed an stack */
2026                 weight += 5 * n_params;
2027         }
2028
2029         /* constant parameters improve the benefice */
2030         frame_ptr = get_irg_frame(current_ir_graph);
2031         all_const = 1;
2032         for (i = 0; i < n_params; ++i) {
2033                 ir_node *param = get_Call_param(call, i);
2034
2035                 if (is_Const(param)) {
2036                         weight += get_method_param_weight(ent, i);
2037                 } else {
2038                         all_const = 0;
2039                         if (is_SymConst(param))
2040                                 weight += get_method_param_weight(ent, i);
2041                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
2042                                 /*
2043                                  * An address of a local variable is transmitted. After
2044                                  * inlining, scalar_replacement might be able to remove the
2045                                  * local variable, so honor this.
2046                                  */
2047                                 v = get_method_local_adress_weight(callee, i);
2048                                 weight += v;
2049                                 if (v > 0)
2050                                         entry->local_adr = 1;
2051                         }
2052                 }
2053         }
2054         entry->all_const = all_const;
2055
2056         callee_env = get_irg_link(callee);
2057         if (callee_env->n_callers == 1 &&
2058             callee != current_ir_graph &&
2059                 get_entity_visibility(ent) == visibility_local) {
2060                 weight += 700;
2061         }
2062
2063         /* give a bonus for functions with one block */
2064         if (callee_env->n_blocks == 1)
2065                 weight = weight * 3 / 2;
2066
2067         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
2068         if (callee_env->n_nodes < 30 && !callee_env->recursive)
2069                 weight += 2000;
2070
2071         /* and finally for leaves: they do not increase the register pressure
2072            because of callee safe registers */
2073         if (callee_env->n_call_nodes == 0)
2074                 weight += 400;
2075
2076         /** it's important to inline inner loops first */
2077         if (entry->loop_depth > 30)
2078                 weight += 30 * 1024;
2079         else
2080                 weight += entry->loop_depth * 1024;
2081
2082         /*
2083          * All arguments constant is probably a good sign, give an extra bonus
2084          */
2085         if (all_const)
2086                 weight += 1024;
2087
2088         return entry->benefice = weight;
2089 }
2090
2091 static ir_graph **irgs;
2092 static int      last_irg;
2093
2094 /**
2095  * Callgraph walker, collect all visited graphs.
2096  */
2097 static void callgraph_walker(ir_graph *irg, void *data) {
2098         (void) data;
2099         irgs[last_irg++] = irg;
2100 }
2101
2102 /**
2103  * Creates an inline order for all graphs.
2104  *
2105  * @return the list of graphs.
2106  */
2107 static ir_graph **create_irg_list(void) {
2108         ir_entity **free_methods;
2109         int       arr_len;
2110         int       n_irgs = get_irp_n_irgs();
2111
2112         cgana(&arr_len, &free_methods);
2113         xfree(free_methods);
2114
2115         compute_callgraph();
2116
2117         last_irg = 0;
2118         irgs     = XMALLOCNZ(ir_graph*, n_irgs);
2119
2120         callgraph_walk(NULL, callgraph_walker, NULL);
2121         assert(n_irgs == last_irg);
2122
2123         return irgs;
2124 }
2125
2126 /**
2127  * Push a call onto the priority list if its benefice is big enough.
2128  *
2129  * @param pqueue   the priority queue of calls
2130  * @param call     the call entry
2131  * @param inlien_threshold
2132  *                 the threshold value
2133  */
2134 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
2135                             int inline_threshold)
2136 {
2137         ir_graph            *callee  = call->callee;
2138         irg_inline_property prop     = get_irg_inline_property(callee);
2139         int                 benefice = calc_inline_benefice(call, callee);
2140
2141         DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
2142             get_irn_irg(call->call), call->call, callee, benefice));
2143
2144         if (prop < irg_inline_forced && benefice < inline_threshold) {
2145                 return;
2146         }
2147
2148         pqueue_put(pqueue, call, benefice);
2149 }
2150
2151 /**
2152  * Try to inline calls into a graph.
2153  *
2154  * @param irg      the graph into which we inline
2155  * @param maxsize  do NOT inline if the size of irg gets
2156  *                 bigger than this amount
2157  * @param inline_threshold
2158  *                 threshold value for inline decision
2159  * @param copied_graphs
2160  *                 map containing copied of recursive graphs
2161  */
2162 static void inline_into(ir_graph *irg, unsigned maxsize,
2163                         int inline_threshold, pmap *copied_graphs)
2164 {
2165         int            phiproj_computed = 0;
2166         inline_irg_env *env = get_irg_link(irg);
2167         call_entry     *curr_call;
2168         wenv_t         wenv;
2169         pqueue_t       *pqueue;
2170
2171         if (env->n_call_nodes == 0)
2172                 return;
2173
2174         if (env->n_nodes > maxsize) {
2175                 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
2176                 return;
2177         }
2178
2179         current_ir_graph = irg;
2180         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2181
2182         /* put irgs into the pqueue */
2183         pqueue = new_pqueue();
2184
2185         list_for_each_entry(call_entry, curr_call, &env->calls, list) {
2186                 assert(is_Call(curr_call->call));
2187                 maybe_push_call(pqueue, curr_call, inline_threshold);
2188         }
2189
2190         /* note that the list of possible calls is updated during the process */
2191         while (!pqueue_empty(pqueue)) {
2192                 int                 did_inline;
2193                 call_entry          *curr_call  = pqueue_pop_front(pqueue);
2194                 ir_graph            *callee     = curr_call->callee;
2195                 ir_node             *call_node  = curr_call->call;
2196                 inline_irg_env      *callee_env = get_irg_link(callee);
2197                 irg_inline_property prop        = get_irg_inline_property(callee);
2198                 int                 loop_depth;
2199                 const call_entry    *centry;
2200                 pmap_entry          *e;
2201
2202                 if ((prop < irg_inline_forced) && env->n_nodes + callee_env->n_nodes > maxsize) {
2203                         DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
2204                                                 env->n_nodes, callee, callee_env->n_nodes));
2205                         continue;
2206                 }
2207
2208                 e = pmap_find(copied_graphs, callee);
2209                 if (e != NULL) {
2210                         int benefice = curr_call->benefice;
2211                         /*
2212                          * Reduce the weight for recursive function IFF not all arguments are const.
2213                          * inlining recursive functions is rarely good.
2214                          */
2215                         if (!curr_call->all_const)
2216                                 benefice -= 2000;
2217                         if (benefice < inline_threshold)
2218                                 continue;
2219
2220                         /*
2221                          * Remap callee if we have a copy.
2222                          */
2223                         callee     = e->value;
2224                         callee_env = get_irg_link(callee);
2225                 }
2226
2227                 if (current_ir_graph == callee) {
2228                         /*
2229                          * Recursive call: we cannot directly inline because we cannot
2230                          * walk the graph and change it. So we have to make a copy of
2231                          * the graph first.
2232                          */
2233                         int benefice = curr_call->benefice;
2234                         ir_graph *copy;
2235
2236                         /*
2237                          * Reduce the weight for recursive function IFF not all arguments are const.
2238                          * inlining recursive functions is rarely good.
2239                          */
2240                         if (!curr_call->all_const)
2241                                 benefice -= 2000;
2242                         if (benefice < inline_threshold)
2243                                 continue;
2244
2245                         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2246
2247                         /*
2248                          * No copy yet, create one.
2249                          * Note that recursive methods are never leaves, so it is
2250                          * sufficient to test this condition here.
2251                          */
2252                         copy = create_irg_copy(callee);
2253
2254                         /* create_irg_copy() destroys the Proj links, recompute them */
2255                         phiproj_computed = 0;
2256
2257                         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2258
2259                         /* allocate a new environment */
2260                         callee_env = alloc_inline_irg_env();
2261                         set_irg_link(copy, callee_env);
2262
2263                         assure_cf_loop(copy);
2264                         wenv.x              = callee_env;
2265                         wenv.ignore_callers = 1;
2266                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
2267
2268                         /*
2269                          * Enter the entity of the original graph. This is needed
2270                          * for inline_method(). However, note that ent->irg still points
2271                          * to callee, NOT to copy.
2272                          */
2273                         set_irg_entity(copy, get_irg_entity(callee));
2274
2275                         pmap_insert(copied_graphs, callee, copy);
2276                         callee = copy;
2277
2278                         /* we have only one caller: the original graph */
2279                         callee_env->n_callers      = 1;
2280                         callee_env->n_callers_orig = 1;
2281                 }
2282                 if (! phiproj_computed) {
2283                         phiproj_computed = 1;
2284                         collect_phiprojs(current_ir_graph);
2285                 }
2286                 did_inline = inline_method(call_node, callee);
2287                 if (!did_inline)
2288                         continue;
2289
2290                 /* call was inlined, Phi/Projs for current graph must be recomputed */
2291                 phiproj_computed = 0;
2292
2293                 /* remove it from the caller list */
2294                 list_del(&curr_call->list);
2295
2296                 /* callee was inline. Append it's call list. */
2297                 env->got_inline = 1;
2298                 if (curr_call->local_adr)
2299                         env->local_vars = 1;
2300                 --env->n_call_nodes;
2301
2302                 /* we just generate a bunch of new calls */
2303                 loop_depth = curr_call->loop_depth;
2304                 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
2305                         inline_irg_env *penv = get_irg_link(centry->callee);
2306                         ir_node        *new_call;
2307                         call_entry     *new_entry;
2308
2309                         /* after we have inlined callee, all called methods inside
2310                          * callee are now called once more */
2311                         ++penv->n_callers;
2312
2313                         /* Note that the src list points to Call nodes in the inlined graph,
2314                          * but we need Call nodes in our graph. Luckily the inliner leaves
2315                          * this information in the link field. */
2316                         new_call = get_irn_link(centry->call);
2317                         assert(is_Call(new_call));
2318
2319                         new_entry = duplicate_call_entry(centry, new_call, loop_depth);
2320                         list_add_tail(&new_entry->list, &env->calls);
2321                         maybe_push_call(pqueue, new_entry, inline_threshold);
2322                 }
2323
2324                 env->n_call_nodes += callee_env->n_call_nodes;
2325                 env->n_nodes += callee_env->n_nodes;
2326                 --callee_env->n_callers;
2327         }
2328         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2329         del_pqueue(pqueue);
2330 }
2331
2332 /*
2333  * Heuristic inliner. Calculates a benefice value for every call and inlines
2334  * those calls with a value higher than the threshold.
2335  */
2336 void inline_functions(unsigned maxsize, int inline_threshold) {
2337         inline_irg_env   *env;
2338         int              i, n_irgs;
2339         ir_graph         *rem;
2340         wenv_t           wenv;
2341         pmap             *copied_graphs;
2342         pmap_entry       *pm_entry;
2343         ir_graph         **irgs;
2344
2345         rem = current_ir_graph;
2346         obstack_init(&temp_obst);
2347
2348         irgs = create_irg_list();
2349
2350         /* a map for the copied graphs, used to inline recursive calls */
2351         copied_graphs = pmap_create();
2352
2353         /* extend all irgs by a temporary data structure for inlining. */
2354         n_irgs = get_irp_n_irgs();
2355         for (i = 0; i < n_irgs; ++i)
2356                 set_irg_link(irgs[i], alloc_inline_irg_env());
2357
2358         /* Pre-compute information in temporary data structure. */
2359         wenv.ignore_runtime = 0;
2360         wenv.ignore_callers = 0;
2361         for (i = 0; i < n_irgs; ++i) {
2362                 ir_graph *irg = irgs[i];
2363
2364                 free_callee_info(irg);
2365
2366                 wenv.x = get_irg_link(irg);
2367                 assure_cf_loop(irg);
2368                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
2369         }
2370
2371         /* -- and now inline. -- */
2372         for (i = 0; i < n_irgs; ++i) {
2373                 ir_graph *irg = irgs[i];
2374
2375                 inline_into(irg, maxsize, inline_threshold, copied_graphs);
2376         }
2377
2378         for (i = 0; i < n_irgs; ++i) {
2379                 ir_graph *irg = irgs[i];
2380
2381                 env = get_irg_link(irg);
2382                 if (env->got_inline) {
2383                         /* this irg got calls inlined: optimize it */
2384                         if (get_opt_combo()) {
2385                                 if (env->local_vars) {
2386                                         scalar_replacement_opt(irg);
2387                                 }
2388                                 combo(irg);
2389                         } else {
2390                                 if (env->local_vars) {
2391                                         if (scalar_replacement_opt(irg)) {
2392                                                 optimize_graph_df(irg);
2393                                         }
2394                                 }
2395                                 optimize_cf(irg);
2396                         }
2397                 }
2398                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
2399                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
2400                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
2401                         env->n_callers_orig, env->n_callers,
2402                         get_entity_name(get_irg_entity(irg))));
2403                 }
2404         }
2405
2406         /* kill the copied graphs: we don't need them anymore */
2407         foreach_pmap(copied_graphs, pm_entry) {
2408                 ir_graph *copy = pm_entry->value;
2409
2410                 /* reset the entity, otherwise it will be deleted in the next step ... */
2411                 set_irg_entity(copy, NULL);
2412                 free_ir_graph(copy);
2413         }
2414         pmap_destroy(copied_graphs);
2415
2416         xfree(irgs);
2417
2418         obstack_free(&temp_obst, NULL);
2419         current_ir_graph = rem;
2420 }
2421
2422 struct inline_functions_pass_t {
2423         ir_prog_pass_t pass;
2424         unsigned       maxsize;
2425         int            inline_threshold;
2426 };
2427
2428 /**
2429  * Wrapper to run inline_functions() as a ir_prog pass.
2430  */
2431 static int inline_functions_wrapper(ir_prog *irp, void *context) {
2432         struct inline_functions_pass_t *pass = context;
2433
2434         (void)irp;
2435         inline_functions(pass->maxsize, pass->inline_threshold);
2436         return 0;
2437 }
2438
2439 /* create a ir_prog pass for inline_functions */
2440 ir_prog_pass_t *inline_functions_pass(
2441           const char *name, unsigned maxsize, int inline_threshold) {
2442         struct inline_functions_pass_t *pass =
2443                 XMALLOCZ(struct inline_functions_pass_t);
2444
2445         pass->maxsize          = maxsize;
2446         pass->inline_threshold = inline_threshold;
2447
2448         return def_prog_pass_constructor(
2449                 &pass->pass, name ? name : "inline_functions",
2450                 inline_functions_wrapper);
2451 }
2452
2453 void firm_init_inline(void) {
2454         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
2455 }