condeval is called jump threading now
[libfirm] / ir / opt / opt_inline.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief    Dead node elimination and Procedure Inlining.
23  * @author   Michael Beck, Goetz Lindenmaier
24  * @version  $Id$
25  */
26 #include "config.h"
27
28 #include <limits.h>
29 #include <assert.h>
30
31 #include "irnode_t.h"
32 #include "irgraph_t.h"
33 #include "irprog_t.h"
34
35 #include "iroptimize.h"
36 #include "ircons_t.h"
37 #include "iropt_t.h"
38 #include "irgopt.h"
39 #include "irgmod.h"
40 #include "irgwalk.h"
41
42 #include "array_t.h"
43 #include "list.h"
44 #include "pset.h"
45 #include "pmap.h"
46 #include "pdeq.h"
47 #include "xmalloc.h"
48 #include "pqueue.h"
49
50 #include "irouts.h"
51 #include "irloop_t.h"
52 #include "irbackedge_t.h"
53 #include "opt_inline_t.h"
54 #include "cgana.h"
55 #include "trouts.h"
56 #include "error.h"
57
58 #include "analyze_irg_args.h"
59 #include "iredges_t.h"
60 #include "irflag_t.h"
61 #include "irhooks.h"
62 #include "irtools.h"
63 #include "iropt_dbg.h"
64
65 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
66
67 /*------------------------------------------------------------------*/
68 /* Routines for dead node elimination / copying garbage collection  */
69 /* of the obstack.                                                  */
70 /*------------------------------------------------------------------*/
71
72 /**
73  * Remember the new node in the old node by using a field all nodes have.
74  */
75 #define set_new_node(oldn, newn)  set_irn_link(oldn, newn)
76
77 /**
78  * Get this new node, before the old node is forgotten.
79  */
80 #define get_new_node(oldn) get_irn_link(oldn)
81
82 /**
83  * Check if a new node was set.
84  */
85 #define has_new_node(n) (get_new_node(n) != NULL)
86
87 /**
88  * We use the block_visited flag to mark that we have computed the
89  * number of useful predecessors for this block.
90  * Further we encode the new arity in this flag in the old blocks.
91  * Remembering the arity is useful, as it saves a lot of pointer
92  * accesses.  This function is called for all Phi and Block nodes
93  * in a Block.
94  */
95 static inline int
96 compute_new_arity(ir_node *b) {
97         int i, res, irn_arity;
98         int irg_v, block_v;
99
100         irg_v = get_irg_block_visited(current_ir_graph);
101         block_v = get_Block_block_visited(b);
102         if (block_v >= irg_v) {
103                 /* we computed the number of preds for this block and saved it in the
104                    block_v flag */
105                 return block_v - irg_v;
106         } else {
107                 /* compute the number of good predecessors */
108                 res = irn_arity = get_irn_arity(b);
109                 for (i = 0; i < irn_arity; i++)
110                         if (is_Bad(get_irn_n(b, i))) res--;
111                         /* save it in the flag. */
112                         set_Block_block_visited(b, irg_v + res);
113                         return res;
114         }
115 }
116
117 /**
118  * Copies the node to the new obstack. The Ins of the new node point to
119  * the predecessors on the old obstack.  For block/phi nodes not all
120  * predecessors might be copied.  n->link points to the new node.
121  * For Phi and Block nodes the function allocates in-arrays with an arity
122  * only for useful predecessors.  The arity is determined by counting
123  * the non-bad predecessors of the block.
124  *
125  * @param n    The node to be copied
126  * @param env  if non-NULL, the node number attribute will be copied to the new node
127  *
128  * Note: Also used for loop unrolling.
129  */
130 static void copy_node(ir_node *n, void *env) {
131         ir_node *nn, *block;
132         int new_arity;
133         ir_op *op = get_irn_op(n);
134         (void) env;
135
136         if (op == op_Bad) {
137                 /* node copied already */
138                 return;
139         } else if (op == op_Block) {
140                 block = NULL;
141                 new_arity = compute_new_arity(n);
142                 n->attr.block.graph_arr = NULL;
143         } else {
144                 block = get_nodes_block(n);
145                 if (op == op_Phi) {
146                         new_arity = compute_new_arity(block);
147                 } else {
148                         new_arity = get_irn_arity(n);
149                 }
150         }
151         nn = new_ir_node(get_irn_dbg_info(n),
152                 current_ir_graph,
153                 block,
154                 op,
155                 get_irn_mode(n),
156                 new_arity,
157                 get_irn_in(n) + 1);
158         /* Copy the attributes.  These might point to additional data.  If this
159            was allocated on the old obstack the pointers now are dangling.  This
160            frees e.g. the memory of the graph_arr allocated in new_immBlock. */
161         if (op == op_Block) {
162                 /* we cannot allow blocks WITHOUT macroblock input */
163                 set_Block_MacroBlock(nn, get_Block_MacroBlock(n));
164         }
165         copy_node_attr(n, nn);
166
167         if (env != NULL) {
168                 /* for easier debugging, we want to copy the node numbers too */
169                 nn->node_nr = n->node_nr;
170         }
171
172         set_new_node(n, nn);
173         hook_dead_node_elim_subst(current_ir_graph, n, nn);
174 }
175
176 /**
177  * Copies new predecessors of old node to new node remembered in link.
178  * Spare the Bad predecessors of Phi and Block nodes.
179  */
180 static void copy_preds(ir_node *n, void *env) {
181         ir_node *nn, *block;
182         int i, j, irn_arity;
183         (void) env;
184
185         nn = get_new_node(n);
186
187         if (is_Block(n)) {
188                 /* copy the macro block header */
189                 ir_node *mbh = get_Block_MacroBlock(n);
190
191                 if (mbh == n) {
192                         /* this block is a macroblock header */
193                         set_Block_MacroBlock(nn, nn);
194                 } else {
195                         /* get the macro block header */
196                         ir_node *nmbh = get_new_node(mbh);
197                         assert(nmbh != NULL);
198                         set_Block_MacroBlock(nn, nmbh);
199                 }
200
201                 /* Don't copy Bad nodes. */
202                 j = 0;
203                 irn_arity = get_irn_arity(n);
204                 for (i = 0; i < irn_arity; i++) {
205                         if (! is_Bad(get_irn_n(n, i))) {
206                                 ir_node *pred = get_irn_n(n, i);
207                                 set_irn_n(nn, j, get_new_node(pred));
208                                 j++;
209                         }
210                 }
211                 /* repair the block visited flag from above misuse. Repair it in both
212                    graphs so that the old one can still be used. */
213                 set_Block_block_visited(nn, 0);
214                 set_Block_block_visited(n, 0);
215                 /* Local optimization could not merge two subsequent blocks if
216                    in array contained Bads.  Now it's possible.
217                    We don't call optimize_in_place as it requires
218                    that the fields in ir_graph are set properly. */
219                 if (!has_Block_entity(nn) &&
220                     get_opt_control_flow_straightening() &&
221                     get_Block_n_cfgpreds(nn) == 1 &&
222                     is_Jmp(get_Block_cfgpred(nn, 0))) {
223                         ir_node *old = get_nodes_block(get_Block_cfgpred(nn, 0));
224                         if (nn == old) {
225                                 /* Jmp jumps into the block it is in -- deal self cycle. */
226                                 assert(is_Bad(get_new_node(get_irg_bad(current_ir_graph))));
227                                 exchange(nn, get_new_node(get_irg_bad(current_ir_graph)));
228                         } else {
229                                 exchange(nn, old);
230                         }
231                 }
232         } else if (is_Phi(n) && get_irn_arity(n) > 0) {
233                 /* Don't copy node if corresponding predecessor in block is Bad.
234                    The Block itself should not be Bad. */
235                 block = get_nodes_block(n);
236                 set_nodes_block(nn, get_new_node(block));
237                 j = 0;
238                 irn_arity = get_irn_arity(n);
239                 for (i = 0; i < irn_arity; i++) {
240                         if (! is_Bad(get_irn_n(block, i))) {
241                                 ir_node *pred = get_irn_n(n, i);
242                                 set_irn_n(nn, j, get_new_node(pred));
243                                 /*if (is_backedge(n, i)) set_backedge(nn, j);*/
244                                 j++;
245                         }
246                 }
247                 /* If the pre walker reached this Phi after the post walker visited the
248                    block block_visited is > 0. */
249                 set_Block_block_visited(get_nodes_block(n), 0);
250                 /* Compacting the Phi's ins might generate Phis with only one
251                    predecessor. */
252                 if (get_irn_arity(nn) == 1)
253                         exchange(nn, get_irn_n(nn, 0));
254         } else {
255                 irn_arity = get_irn_arity(n);
256                 for (i = -1; i < irn_arity; i++)
257                         set_irn_n(nn, i, get_new_node(get_irn_n(n, i)));
258         }
259         /* Now the new node is complete.  We can add it to the hash table for CSE.
260            @@@ inlining aborts if we identify End. Why? */
261         if (!is_End(nn))
262                 add_identities(current_ir_graph->value_table, nn);
263 }
264
265 /**
266  * Copies the graph recursively, compacts the keep-alives of the end node.
267  *
268  * @param irg           the graph to be copied
269  * @param copy_node_nr  If non-zero, the node number will be copied
270  */
271 static void copy_graph(ir_graph *irg, int copy_node_nr) {
272         ir_node *oe, *ne, *ob, *nb, *om, *nm; /* old end, new end, old bad, new bad, old NoMem, new NoMem */
273         ir_node *ka;      /* keep alive */
274         int i, irn_arity;
275         unsigned long vfl;
276
277         /* Some nodes must be copied by hand, sigh */
278         vfl = get_irg_visited(irg);
279         set_irg_visited(irg, vfl + 1);
280
281         oe = get_irg_end(irg);
282         mark_irn_visited(oe);
283         /* copy the end node by hand, allocate dynamic in array! */
284         ne = new_ir_node(get_irn_dbg_info(oe),
285                 irg,
286                 NULL,
287                 op_End,
288                 mode_X,
289                 -1,
290                 NULL);
291         /* Copy the attributes.  Well, there might be some in the future... */
292         copy_node_attr(oe, ne);
293         set_new_node(oe, ne);
294
295         /* copy the Bad node */
296         ob = get_irg_bad(irg);
297         mark_irn_visited(ob);
298         nb = new_ir_node(get_irn_dbg_info(ob),
299                 irg,
300                 NULL,
301                 op_Bad,
302                 mode_T,
303                 0,
304                 NULL);
305         copy_node_attr(ob, nb);
306         set_new_node(ob, nb);
307
308         /* copy the NoMem node */
309         om = get_irg_no_mem(irg);
310         mark_irn_visited(om);
311         nm = new_ir_node(get_irn_dbg_info(om),
312                 irg,
313                 NULL,
314                 op_NoMem,
315                 mode_M,
316                 0,
317                 NULL);
318         copy_node_attr(om, nm);
319         set_new_node(om, nm);
320
321         /* copy the live nodes */
322         set_irg_visited(irg, vfl);
323         irg_walk(get_nodes_block(oe), copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
324
325         /* Note: from yet, the visited flag of the graph is equal to vfl + 1 */
326
327         /* visit the anchors as well */
328         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
329                 ir_node *n = get_irg_anchor(irg, i);
330
331                 if (n && (get_irn_visited(n) <= vfl)) {
332                         set_irg_visited(irg, vfl);
333                         irg_walk(n, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
334                 }
335         }
336
337         /* copy_preds for the end node ... */
338         set_nodes_block(ne, get_new_node(get_nodes_block(oe)));
339
340         /*- ... and now the keep alives. -*/
341         /* First pick the not marked block nodes and walk them.  We must pick these
342            first as else we will oversee blocks reachable from Phis. */
343         irn_arity = get_End_n_keepalives(oe);
344         for (i = 0; i < irn_arity; i++) {
345                 ka = get_End_keepalive(oe, i);
346                 if (is_Block(ka)) {
347                         if (get_irn_visited(ka) <= vfl) {
348                                 /* We must keep the block alive and copy everything reachable */
349                                 set_irg_visited(irg, vfl);
350                                 irg_walk(ka, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
351                         }
352                         add_End_keepalive(ne, get_new_node(ka));
353                 }
354         }
355
356         /* Now pick other nodes.  Here we will keep all! */
357         irn_arity = get_End_n_keepalives(oe);
358         for (i = 0; i < irn_arity; i++) {
359                 ka = get_End_keepalive(oe, i);
360                 if (!is_Block(ka)) {
361                         if (get_irn_visited(ka) <= vfl) {
362                                 /* We didn't copy the node yet.  */
363                                 set_irg_visited(irg, vfl);
364                                 irg_walk(ka, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
365                         }
366                         add_End_keepalive(ne, get_new_node(ka));
367                 }
368         }
369
370         /* start block sometimes only reached after keep alives */
371         set_nodes_block(nb, get_new_node(get_nodes_block(ob)));
372         set_nodes_block(nm, get_new_node(get_nodes_block(om)));
373 }
374
375 /**
376  * Copies the graph reachable from current_ir_graph->end to the obstack
377  * in current_ir_graph and fixes the environment.
378  * Then fixes the fields in current_ir_graph containing nodes of the
379  * graph.
380  *
381  * @param copy_node_nr  If non-zero, the node number will be copied
382  */
383 static void
384 copy_graph_env(int copy_node_nr) {
385         ir_graph *irg = current_ir_graph;
386         ir_node *old_end, *new_anchor;
387         int i;
388
389         /* remove end_except and end_reg nodes */
390         old_end = get_irg_end(irg);
391         set_irg_end_except (irg, old_end);
392         set_irg_end_reg    (irg, old_end);
393
394         /* Not all nodes remembered in irg might be reachable
395            from the end node.  Assure their link is set to NULL, so that
396            we can test whether new nodes have been computed. */
397         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
398                 ir_node *n = get_irg_anchor(irg, i);
399                 if (n != NULL)
400                         set_new_node(n, NULL);
401         }
402         /* we use the block walk flag for removing Bads from Blocks ins. */
403         inc_irg_block_visited(irg);
404
405         /* copy the graph */
406         copy_graph(irg, copy_node_nr);
407
408         /* fix the anchor */
409         old_end    = get_irg_end(irg);
410         new_anchor = new_Anchor(irg);
411
412         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
413                 ir_node *n = get_irg_anchor(irg, i);
414                 if (n)
415                         set_irn_n(new_anchor, i, get_new_node(n));
416         }
417         free_End(old_end);
418         irg->anchor = new_anchor;
419
420         /* ensure the new anchor is placed in the endblock */
421         set_nodes_block(new_anchor, get_irg_end_block(irg));
422 }
423
424 /**
425  * Copies all reachable nodes to a new obstack.  Removes bad inputs
426  * from block nodes and the corresponding inputs from Phi nodes.
427  * Merges single exit blocks with single entry blocks and removes
428  * 1-input Phis.
429  * Adds all new nodes to a new hash table for CSE.  Does not
430  * perform CSE, so the hash table might contain common subexpressions.
431  */
432 void dead_node_elimination(ir_graph *irg) {
433         ir_graph *rem;
434 #ifdef INTERPROCEDURAL_VIEW
435         int rem_ipview = get_interprocedural_view();
436 #endif
437         struct obstack *graveyard_obst = NULL;
438         struct obstack *rebirth_obst   = NULL;
439
440         edges_deactivate(irg);
441
442         /* inform statistics that we started a dead-node elimination run */
443         hook_dead_node_elim(irg, 1);
444
445         /* Remember external state of current_ir_graph. */
446         rem = current_ir_graph;
447         current_ir_graph = irg;
448 #ifdef INTERPROCEDURAL_VIEW
449         set_interprocedural_view(0);
450 #endif
451
452         assert(get_irg_phase_state(irg) != phase_building);
453
454         /* Handle graph state */
455         free_callee_info(irg);
456         free_irg_outs(irg);
457         free_trouts();
458
459         /* @@@ so far we loose loops when copying */
460         free_loop_information(irg);
461
462         set_irg_doms_inconsistent(irg);
463
464         /* A quiet place, where the old obstack can rest in peace,
465            until it will be cremated. */
466         graveyard_obst = irg->obst;
467
468         /* A new obstack, where the reachable nodes will be copied to. */
469         rebirth_obst = XMALLOC(struct obstack);
470         irg->obst = rebirth_obst;
471         obstack_init(irg->obst);
472         irg->last_node_idx = 0;
473
474         /* We also need a new value table for CSE */
475         del_identities(irg->value_table);
476         irg->value_table = new_identities();
477
478         /* Copy the graph from the old to the new obstack */
479         copy_graph_env(/*copy_node_nr=*/1);
480
481         /* Free memory from old unoptimized obstack */
482         obstack_free(graveyard_obst, 0);  /* First empty the obstack ... */
483         xfree(graveyard_obst);            /* ... then free it.           */
484
485         /* inform statistics that the run is over */
486         hook_dead_node_elim(irg, 0);
487
488         current_ir_graph = rem;
489 #ifdef INTERPROCEDURAL_VIEW
490         set_interprocedural_view(rem_ipview);
491 #endif
492 }
493
494 /**
495  * Relink bad predecessors of a block and store the old in array to the
496  * link field. This function is called by relink_bad_predecessors().
497  * The array of link field starts with the block operand at position 0.
498  * If block has bad predecessors, create a new in array without bad preds.
499  * Otherwise let in array untouched.
500  */
501 static void relink_bad_block_predecessors(ir_node *n, void *env) {
502         ir_node **new_in, *irn;
503         int i, new_irn_n, old_irn_arity, new_irn_arity = 0;
504         (void) env;
505
506         /* if link field of block is NULL, look for bad predecessors otherwise
507            this is already done */
508         if (is_Block(n) && get_irn_link(n) == NULL) {
509                 /* save old predecessors in link field (position 0 is the block operand)*/
510                 set_irn_link(n, get_irn_in(n));
511
512                 /* count predecessors without bad nodes */
513                 old_irn_arity = get_irn_arity(n);
514                 for (i = 0; i < old_irn_arity; i++)
515                         if (!is_Bad(get_irn_n(n, i)))
516                                 ++new_irn_arity;
517
518                 /* arity changing: set new predecessors without bad nodes */
519                 if (new_irn_arity < old_irn_arity) {
520                         /* Get new predecessor array. We do not resize the array, as we must
521                            keep the old one to update Phis. */
522                         new_in = NEW_ARR_D(ir_node *, current_ir_graph->obst, (new_irn_arity+1));
523
524                         /* set new predecessors in array */
525                         new_in[0] = NULL;
526                         new_irn_n = 1;
527                         for (i = 0; i < old_irn_arity; i++) {
528                                 irn = get_irn_n(n, i);
529                                 if (!is_Bad(irn)) {
530                                         new_in[new_irn_n] = irn;
531                                         is_backedge(n, i) ? set_backedge(n, new_irn_n-1) : set_not_backedge(n, new_irn_n-1);
532                                         ++new_irn_n;
533                                 }
534                         }
535                         /* ARR_SETLEN(int, n->attr.block.backedge, new_irn_arity); */
536                         ARR_SHRINKLEN(n->attr.block.backedge, new_irn_arity);
537                         n->in = new_in;
538                 } /* ir node has bad predecessors */
539         } /* Block is not relinked */
540 }
541
542 /**
543  * Relinks Bad predecessors from Blocks and Phis called by walker
544  * remove_bad_predecesors(). If n is a Block, call
545  * relink_bad_block_redecessors(). If n is a Phi-node, call also the relinking
546  * function of Phi's Block. If this block has bad predecessors, relink preds
547  * of the Phi-node.
548  */
549 static void relink_bad_predecessors(ir_node *n, void *env) {
550         ir_node *block, **old_in;
551         int i, old_irn_arity, new_irn_arity;
552
553         /* relink bad predecessors of a block */
554         if (is_Block(n))
555                 relink_bad_block_predecessors(n, env);
556
557         /* If Phi node relink its block and its predecessors */
558         if (is_Phi(n)) {
559                 /* Relink predecessors of phi's block */
560                 block = get_nodes_block(n);
561                 if (get_irn_link(block) == NULL)
562                         relink_bad_block_predecessors(block, env);
563
564                 old_in = (ir_node **)get_irn_link(block); /* Of Phi's Block */
565                 old_irn_arity = ARR_LEN(old_in);
566
567                 /* Relink Phi predecessors if count of predecessors changed */
568                 if (old_irn_arity != ARR_LEN(get_irn_in(block))) {
569                         /* set new predecessors in array
570                            n->in[0] remains the same block */
571                         new_irn_arity = 1;
572                         for(i = 1; i < old_irn_arity; i++)
573                                 if (!is_Bad(old_in[i])) {
574                                         n->in[new_irn_arity] = n->in[i];
575                                         is_backedge(n, i) ? set_backedge(n, new_irn_arity) : set_not_backedge(n, new_irn_arity);
576                                         ++new_irn_arity;
577                                 }
578
579                                 ARR_SETLEN(ir_node *, n->in, new_irn_arity);
580                                 ARR_SETLEN(int, n->attr.phi.u.backedge, new_irn_arity);
581                 }
582         } /* n is a Phi node */
583 }
584
585 /*
586  * Removes Bad Bad predecessors from Blocks and the corresponding
587  * inputs to Phi nodes as in dead_node_elimination but without
588  * copying the graph.
589  * On walking up set the link field to NULL, on walking down call
590  * relink_bad_predecessors() (This function stores the old in array
591  * to the link field and sets a new in array if arity of predecessors
592  * changes).
593  */
594 void remove_bad_predecessors(ir_graph *irg) {
595         panic("Fix backedge handling first");
596         irg_walk_graph(irg, firm_clear_link, relink_bad_predecessors, NULL);
597 }
598
599
600 /*
601    __                      _  __ __
602   (_     __    o     _    | \/  |_
603   __)|_| | \_/ | \_/(/_   |_/\__|__
604
605   The following stuff implements a facility that automatically patches
606   registered ir_node pointers to the new node when a dead node elimination occurs.
607 */
608
609 struct _survive_dce_t {
610         struct obstack obst;
611         pmap *places;
612         pmap *new_places;
613         hook_entry_t dead_node_elim;
614         hook_entry_t dead_node_elim_subst;
615 };
616
617 typedef struct _survive_dce_list_t {
618         struct _survive_dce_list_t *next;
619         ir_node **place;
620 } survive_dce_list_t;
621
622 static void dead_node_hook(void *context, ir_graph *irg, int start) {
623         survive_dce_t *sd = context;
624         (void) irg;
625
626         /* Create a new map before the dead node elimination is performed. */
627         if (start) {
628                 sd->new_places = pmap_create_ex(pmap_count(sd->places));
629         } else {
630                 /* Patch back all nodes if dead node elimination is over and something is to be done. */
631                 pmap_destroy(sd->places);
632                 sd->places     = sd->new_places;
633                 sd->new_places = NULL;
634         }
635 }
636
637 /**
638  * Hook called when dead node elimination replaces old by nw.
639  */
640 static void dead_node_subst_hook(void *context, ir_graph *irg, ir_node *old, ir_node *nw) {
641         survive_dce_t *sd = context;
642         survive_dce_list_t *list = pmap_get(sd->places, old);
643         (void) irg;
644
645         /* If the node is to be patched back, write the new address to all registered locations. */
646         if (list) {
647                 survive_dce_list_t *p;
648
649                 for (p = list; p; p = p->next)
650                         *(p->place) = nw;
651
652                 pmap_insert(sd->new_places, nw, list);
653         }
654 }
655
656 /**
657  * Make a new Survive DCE environment.
658  */
659 survive_dce_t *new_survive_dce(void) {
660         survive_dce_t *res = XMALLOC(survive_dce_t);
661         obstack_init(&res->obst);
662         res->places     = pmap_create();
663         res->new_places = NULL;
664
665         res->dead_node_elim.hook._hook_dead_node_elim = dead_node_hook;
666         res->dead_node_elim.context                   = res;
667         res->dead_node_elim.next                      = NULL;
668
669         res->dead_node_elim_subst.hook._hook_dead_node_elim_subst = dead_node_subst_hook;
670         res->dead_node_elim_subst.context = res;
671         res->dead_node_elim_subst.next    = NULL;
672
673         register_hook(hook_dead_node_elim, &res->dead_node_elim);
674         register_hook(hook_dead_node_elim_subst, &res->dead_node_elim_subst);
675         return res;
676 }
677
678 /**
679  * Free a Survive DCE environment.
680  */
681 void free_survive_dce(survive_dce_t *sd) {
682         obstack_free(&sd->obst, NULL);
683         pmap_destroy(sd->places);
684         unregister_hook(hook_dead_node_elim, &sd->dead_node_elim);
685         unregister_hook(hook_dead_node_elim_subst, &sd->dead_node_elim_subst);
686         xfree(sd);
687 }
688
689 /**
690  * Register a node pointer to be patched upon DCE.
691  * When DCE occurs, the node pointer specified by @p place will be
692  * patched to the new address of the node it is pointing to.
693  *
694  * @param sd    The Survive DCE environment.
695  * @param place The address of the node pointer.
696  */
697 void survive_dce_register_irn(survive_dce_t *sd, ir_node **place) {
698         if (*place != NULL) {
699                 ir_node *irn      = *place;
700                 survive_dce_list_t *curr = pmap_get(sd->places, irn);
701                 survive_dce_list_t *nw   = obstack_alloc(&sd->obst, sizeof(nw[0]));
702
703                 nw->next  = curr;
704                 nw->place = place;
705
706                 pmap_insert(sd->places, irn, nw);
707         }
708 }
709
710 /*--------------------------------------------------------------------*/
711 /*  Functionality for inlining                                         */
712 /*--------------------------------------------------------------------*/
713
714 /**
715  * Copy node for inlineing.  Updates attributes that change when
716  * inlineing but not for dead node elimination.
717  *
718  * Copies the node by calling copy_node() and then updates the entity if
719  * it's a local one.  env must be a pointer of the frame type of the
720  * inlined procedure. The new entities must be in the link field of
721  * the entities.
722  */
723 static void copy_node_inline(ir_node *n, void *env) {
724         ir_node *nn;
725         ir_type *frame_tp = (ir_type *)env;
726
727         copy_node(n, NULL);
728         if (is_Sel(n)) {
729                 nn = get_new_node(n);
730                 assert(is_Sel(nn));
731                 /* use copied entities from the new frame */
732                 if (get_entity_owner(get_Sel_entity(n)) == frame_tp) {
733                         set_Sel_entity(nn, get_entity_link(get_Sel_entity(n)));
734                 }
735         } else if (is_Block(n)) {
736                 nn = get_new_node(n);
737                 nn->attr.block.irg.irg = current_ir_graph;
738         }
739 }
740
741 /**
742  * Copies new predecessors of old node and move constants to
743  * the Start Block.
744  */
745 static void copy_preds_inline(ir_node *n, void *env) {
746         ir_node *nn;
747
748         copy_preds(n, env);
749         nn = skip_Id(get_new_node(n));
750         if (is_irn_constlike(nn)) {
751                 /* move Constants into the start block */
752                 set_nodes_block(nn, get_irg_start_block(current_ir_graph));
753
754                 n = identify_remember(current_ir_graph->value_table, nn);
755                 if (nn != n) {
756                         DBG_OPT_CSE(nn, n);
757                         exchange(nn, n);
758                 }
759         }
760 }
761
762 /**
763  * Walker: checks if P_value_arg_base is used.
764  */
765 static void find_addr(ir_node *node, void *env) {
766         int *allow_inline = env;
767         if (is_Sel(node)) {
768                 ir_graph *irg = current_ir_graph;
769                 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
770                         /* access to frame */
771                         ir_entity *ent = get_Sel_entity(node);
772                         if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
773                                 /* access to value_type */
774                                 *allow_inline = 0;
775                         }
776                 }
777         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
778                 /* From GCC:
779                  * Refuse to inline alloca call unless user explicitly forced so as this
780                  * may change program's memory overhead drastically when the function
781                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
782                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
783                  *
784                  * Sorrily this is true with our implementation also.
785                  * Moreover, we cannot differentiate between alloca() and VLA yet, so this
786                  * disables inlining of functions using VLA (with are completely save).
787                  *
788                  * 2 Solutions:
789                  * - add a flag to the Alloc node for "real" alloca() calls
790                  * - add a new Stack-Restore node at the end of a function using alloca()
791                  */
792                 *allow_inline = 0;
793         }
794 }
795
796 /**
797  * Check if we can inline a given call.
798  * Currently, we cannot inline two cases:
799  * - call with compound arguments
800  * - graphs that take the address of a parameter
801  *
802  * check these conditions here
803  */
804 static int can_inline(ir_node *call, ir_graph *called_graph) {
805         ir_type *call_type = get_Call_type(call);
806         int params, ress, i, res;
807         assert(is_Method_type(call_type));
808
809         params = get_method_n_params(call_type);
810         ress   = get_method_n_ress(call_type);
811
812         /* check parameters for compound arguments */
813         for (i = 0; i < params; ++i) {
814                 ir_type *p_type = get_method_param_type(call_type, i);
815
816                 if (is_compound_type(p_type))
817                         return 0;
818         }
819
820         /* check results for compound arguments */
821         for (i = 0; i < ress; ++i) {
822                 ir_type *r_type = get_method_res_type(call_type, i);
823
824                 if (is_compound_type(r_type))
825                         return 0;
826         }
827
828         res = 1;
829         irg_walk_graph(called_graph, find_addr, NULL, &res);
830
831         return res;
832 }
833
834 enum exc_mode {
835         exc_handler    = 0, /**< There is a handler. */
836         exc_to_end     = 1, /**< Branches to End. */
837         exc_no_handler = 2  /**< Exception handling not represented. */
838 };
839
840 /* Inlines a method at the given call site. */
841 int inline_method(ir_node *call, ir_graph *called_graph) {
842         ir_node             *pre_call;
843         ir_node             *post_call, *post_bl;
844         ir_node             *in[pn_Start_max];
845         ir_node             *end, *end_bl, *block;
846         ir_node             **res_pred;
847         ir_node             **cf_pred;
848         ir_node             **args_in;
849         ir_node             *ret, *phi;
850         int                 arity, n_ret, n_exc, n_res, i, n, j, rem_opt, irn_arity, n_params;
851         enum exc_mode       exc_handling;
852         ir_type             *called_frame, *curr_frame, *mtp, *ctp;
853         ir_entity           *ent;
854         ir_graph            *rem, *irg;
855         irg_inline_property prop = get_irg_inline_property(called_graph);
856         unsigned long       visited;
857
858         if (prop == irg_inline_forbidden)
859                 return 0;
860
861         ent = get_irg_entity(called_graph);
862
863         mtp = get_entity_type(ent);
864         ctp = get_Call_type(call);
865         n_params = get_method_n_params(mtp);
866         n_res    = get_method_n_ress(mtp);
867         if (n_params > get_method_n_params(ctp)) {
868                 /* this is a bad feature of C: without a prototype, we can
869                  * call a function with less parameters than needed. Currently
870                  * we don't support this, although we could use Unknown than. */
871                 return 0;
872         }
873         if (n_res != get_method_n_ress(ctp)) {
874                 return 0;
875         }
876
877         /* Argh, compiling C has some bad consequences:
878          * It is implementation dependent what happens in that case.
879          * We support inlining, if the bitsize of the types matches AND
880          * the same arithmetic is used. */
881         for (i = n_params - 1; i >= 0; --i) {
882                 ir_type *param_tp = get_method_param_type(mtp, i);
883                 ir_type *arg_tp   = get_method_param_type(ctp, i);
884
885                 if (param_tp != arg_tp) {
886                         ir_mode *pmode = get_type_mode(param_tp);
887                         ir_mode *amode = get_type_mode(arg_tp);
888
889                         if (pmode == NULL || amode == NULL)
890                                 return 0;
891                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
892                                 return 0;
893                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
894                                 return 0;
895                         /* otherwise we can simply "reinterpret" the bits */
896                 }
897         }
898         for (i = n_res - 1; i >= 0; --i) {
899                 ir_type *decl_res_tp = get_method_res_type(mtp, i);
900                 ir_type *used_res_tp = get_method_res_type(ctp, i);
901
902                 if (decl_res_tp != used_res_tp) {
903                         ir_mode *decl_mode = get_type_mode(decl_res_tp);
904                         ir_mode *used_mode = get_type_mode(used_res_tp);
905                         if (decl_mode == NULL || used_mode == NULL)
906                                 return 0;
907                         if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
908                                 return 0;
909                         if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
910                                 return 0;
911                         /* otherwise we can "reinterpret" the bits */
912                 }
913         }
914
915         irg = get_irn_irg(call);
916
917         /*
918          * We cannot inline a recursive call. The graph must be copied before
919          * the call the inline_method() using create_irg_copy().
920          */
921         if (called_graph == irg)
922                 return 0;
923
924         /*
925          * currently, we cannot inline two cases:
926          * - call with compound arguments
927          * - graphs that take the address of a parameter
928          */
929         if (! can_inline(call, called_graph))
930                 return 0;
931
932         rem = current_ir_graph;
933         current_ir_graph = irg;
934
935         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
936
937         /* --  Turn off optimizations, this can cause problems when allocating new nodes. -- */
938         rem_opt = get_opt_optimize();
939         set_optimize(0);
940
941         /* Handle graph state */
942         assert(get_irg_phase_state(irg) != phase_building);
943         assert(get_irg_pinned(irg) == op_pin_state_pinned);
944         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
945         set_irg_outs_inconsistent(irg);
946         set_irg_extblk_inconsistent(irg);
947         set_irg_doms_inconsistent(irg);
948         set_irg_loopinfo_inconsistent(irg);
949         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
950         set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
951
952         /* -- Check preconditions -- */
953         assert(is_Call(call));
954
955         /* here we know we WILL inline, so inform the statistics */
956         hook_inline(call, called_graph);
957
958         /* -- Decide how to handle exception control flow: Is there a handler
959            for the Call node, or do we branch directly to End on an exception?
960            exc_handling:
961            0 There is a handler.
962            1 Branches to End.
963            2 Exception handling not represented in Firm. -- */
964         {
965                 ir_node *proj, *Mproj = NULL, *Xproj = NULL;
966                 for (proj = get_irn_link(call); proj; proj = get_irn_link(proj)) {
967                         long proj_nr = get_Proj_proj(proj);
968                         if (proj_nr == pn_Call_X_except) Xproj = proj;
969                         if (proj_nr == pn_Call_M_except) Mproj = proj;
970                 }
971                 if      (Mproj) { assert(Xproj); exc_handling = exc_handler; } /*  Mproj           */
972                 else if (Xproj) {                exc_handling = exc_to_end; } /* !Mproj &&  Xproj   */
973                 else            {                exc_handling = exc_no_handler; } /* !Mproj && !Xproj   */
974         }
975
976         /* create the argument tuple */
977         NEW_ARR_A(ir_type *, args_in, n_params);
978
979         block = get_nodes_block(call);
980         for (i = n_params - 1; i >= 0; --i) {
981                 ir_node *arg      = get_Call_param(call, i);
982                 ir_type *param_tp = get_method_param_type(mtp, i);
983                 ir_mode *mode     = get_type_mode(param_tp);
984
985                 if (mode != get_irn_mode(arg)) {
986                         arg = new_r_Conv(block, arg, mode);
987                 }
988                 args_in[i] = arg;
989         }
990
991         /* --
992            the procedure and later replaces the Start node of the called graph.
993            Post_call is the old Call node and collects the results of the called
994            graph. Both will end up being a tuple.  -- */
995         post_bl = get_nodes_block(call);
996         set_irg_current_block(irg, post_bl);
997         /* XxMxPxPxPxT of Start + parameter of Call */
998         in[pn_Start_X_initial_exec]   = new_Jmp();
999         in[pn_Start_M]                = get_Call_mem(call);
1000         in[pn_Start_P_frame_base]     = get_irg_frame(irg);
1001         in[pn_Start_P_tls]            = get_irg_tls(irg);
1002         in[pn_Start_T_args]           = new_Tuple(n_params, args_in);
1003         pre_call = new_Tuple(pn_Start_max, in);
1004         post_call = call;
1005
1006         /* --
1007            The new block gets the ins of the old block, pre_call and all its
1008            predecessors and all Phi nodes. -- */
1009         part_block(pre_call);
1010
1011         /* -- Prepare state for dead node elimination -- */
1012         /* Visited flags in calling irg must be >= flag in called irg.
1013            Else walker and arity computation will not work. */
1014         if (get_irg_visited(irg) <= get_irg_visited(called_graph))
1015                 set_irg_visited(irg, get_irg_visited(called_graph) + 1);
1016         if (get_irg_block_visited(irg) < get_irg_block_visited(called_graph))
1017                 set_irg_block_visited(irg, get_irg_block_visited(called_graph));
1018         visited = get_irg_visited(irg);
1019
1020         /* Set pre_call as new Start node in link field of the start node of
1021            calling graph and pre_calls block as new block for the start block
1022            of calling graph.
1023            Further mark these nodes so that they are not visited by the
1024            copying. */
1025         set_irn_link(get_irg_start(called_graph), pre_call);
1026         set_irn_visited(get_irg_start(called_graph), visited);
1027         set_irn_link(get_irg_start_block(called_graph), get_nodes_block(pre_call));
1028         set_irn_visited(get_irg_start_block(called_graph), visited);
1029
1030         set_irn_link(get_irg_bad(called_graph), get_irg_bad(current_ir_graph));
1031         set_irn_visited(get_irg_bad(called_graph), visited);
1032
1033         set_irn_link(get_irg_no_mem(called_graph), get_irg_no_mem(current_ir_graph));
1034         set_irn_visited(get_irg_no_mem(called_graph), visited);
1035
1036         /* Initialize for compaction of in arrays */
1037         inc_irg_block_visited(irg);
1038
1039         /* -- Replicate local entities of the called_graph -- */
1040         /* copy the entities. */
1041         irp_reserve_resources(irp, IR_RESOURCE_ENTITY_LINK);
1042         called_frame = get_irg_frame_type(called_graph);
1043         curr_frame   = get_irg_frame_type(irg);
1044         for (i = 0, n = get_class_n_members(called_frame); i < n; ++i) {
1045                 ir_entity *new_ent, *old_ent;
1046                 old_ent = get_class_member(called_frame, i);
1047                 new_ent = copy_entity_own(old_ent, curr_frame);
1048                 set_entity_link(old_ent, new_ent);
1049         }
1050
1051         /* visited is > than that of called graph.  With this trick visited will
1052            remain unchanged so that an outer walker, e.g., searching the call nodes
1053             to inline, calling this inline will not visit the inlined nodes. */
1054         set_irg_visited(irg, get_irg_visited(irg)-1);
1055
1056         /* -- Performing dead node elimination inlines the graph -- */
1057         /* Copies the nodes to the obstack of current_ir_graph. Updates links to new
1058            entities. */
1059         irg_walk(get_irg_end(called_graph), copy_node_inline, copy_preds_inline,
1060                  get_irg_frame_type(called_graph));
1061
1062         irp_free_resources(irp, IR_RESOURCE_ENTITY_LINK);
1063
1064         /* Repair called_graph */
1065         set_irg_visited(called_graph, get_irg_visited(irg));
1066         set_irg_block_visited(called_graph, get_irg_block_visited(irg));
1067         set_Block_block_visited(get_irg_start_block(called_graph), 0);
1068
1069         /* -- Merge the end of the inlined procedure with the call site -- */
1070         /* We will turn the old Call node into a Tuple with the following
1071            predecessors:
1072            -1:  Block of Tuple.
1073            0: Phi of all Memories of Return statements.
1074            1: Jmp from new Block that merges the control flow from all exception
1075            predecessors of the old end block.
1076            2: Tuple of all arguments.
1077            3: Phi of Exception memories.
1078            In case the old Call directly branches to End on an exception we don't
1079            need the block merging all exceptions nor the Phi of the exception
1080            memories.
1081         */
1082
1083         /* -- Precompute some values -- */
1084         end_bl = get_new_node(get_irg_end_block(called_graph));
1085         end = get_new_node(get_irg_end(called_graph));
1086         arity = get_Block_n_cfgpreds(end_bl);    /* arity = n_exc + n_ret  */
1087         n_res = get_method_n_ress(get_Call_type(call));
1088
1089         res_pred = XMALLOCN(ir_node*, n_res);
1090         cf_pred  = XMALLOCN(ir_node*, arity);
1091
1092         set_irg_current_block(irg, post_bl); /* just to make sure */
1093
1094         /* -- archive keepalives -- */
1095         irn_arity = get_irn_arity(end);
1096         for (i = 0; i < irn_arity; i++) {
1097                 ir_node *ka = get_End_keepalive(end, i);
1098                 if (! is_Bad(ka))
1099                         add_End_keepalive(get_irg_end(irg), ka);
1100         }
1101
1102         /* The new end node will die.  We need not free as the in array is on the obstack:
1103            copy_node() only generated 'D' arrays. */
1104
1105         /* -- Replace Return nodes by Jump nodes. -- */
1106         n_ret = 0;
1107         for (i = 0; i < arity; i++) {
1108                 ir_node *ret;
1109                 ret = get_Block_cfgpred(end_bl, i);
1110                 if (is_Return(ret)) {
1111                         cf_pred[n_ret] = new_r_Jmp(get_nodes_block(ret));
1112                         n_ret++;
1113                 }
1114         }
1115         set_irn_in(post_bl, n_ret, cf_pred);
1116
1117         /* -- Build a Tuple for all results of the method.
1118            Add Phi node if there was more than one Return.  -- */
1119         turn_into_tuple(post_call, pn_Call_max);
1120         /* First the Memory-Phi */
1121         n_ret = 0;
1122         for (i = 0; i < arity; i++) {
1123                 ret = get_Block_cfgpred(end_bl, i);
1124                 if (is_Return(ret)) {
1125                         cf_pred[n_ret] = get_Return_mem(ret);
1126                         n_ret++;
1127                 }
1128         }
1129         phi = new_Phi(n_ret, cf_pred, mode_M);
1130         set_Tuple_pred(call, pn_Call_M_regular, phi);
1131         /* Conserve Phi-list for further inlinings -- but might be optimized */
1132         if (get_nodes_block(phi) == post_bl) {
1133                 set_irn_link(phi, get_irn_link(post_bl));
1134                 set_irn_link(post_bl, phi);
1135         }
1136         /* Now the real results */
1137         if (n_res > 0) {
1138                 for (j = 0; j < n_res; j++) {
1139                         ir_type *res_type = get_method_res_type(ctp, j);
1140                         ir_mode *res_mode = get_type_mode(res_type);
1141                         n_ret = 0;
1142                         for (i = 0; i < arity; i++) {
1143                                 ret = get_Block_cfgpred(end_bl, i);
1144                                 if (is_Return(ret)) {
1145                                         ir_node *res = get_Return_res(ret, j);
1146                                         if (get_irn_mode(res) != res_mode) {
1147                                                 ir_node *block = get_nodes_block(res);
1148                                                 res = new_r_Conv(block, res, res_mode);
1149                                         }
1150                                         cf_pred[n_ret] = res;
1151                                         n_ret++;
1152                                 }
1153                         }
1154                         if (n_ret > 0)
1155                                 phi = new_Phi(n_ret, cf_pred, get_irn_mode(cf_pred[0]));
1156                         else
1157                                 phi = new_Bad();
1158                         res_pred[j] = phi;
1159                         /* Conserve Phi-list for further inlinings -- but might be optimized */
1160                         if (get_nodes_block(phi) == post_bl) {
1161                                 set_Phi_next(phi, get_Block_phis(post_bl));
1162                                 set_Block_phis(post_bl, phi);
1163                         }
1164                 }
1165                 set_Tuple_pred(call, pn_Call_T_result, new_Tuple(n_res, res_pred));
1166         } else {
1167                 set_Tuple_pred(call, pn_Call_T_result, new_Bad());
1168         }
1169         /* handle the regular call */
1170         set_Tuple_pred(call, pn_Call_X_regular, new_Jmp());
1171
1172         /* For now, we cannot inline calls with value_base */
1173         set_Tuple_pred(call, pn_Call_P_value_res_base, new_Bad());
1174
1175         /* Finally the exception control flow.
1176            We have two (three) possible situations:
1177            First if the Call branches to an exception handler: We need to add a Phi node to
1178            collect the memory containing the exception objects.  Further we need
1179            to add another block to get a correct representation of this Phi.  To
1180            this block we add a Jmp that resolves into the X output of the Call
1181            when the Call is turned into a tuple.
1182            Second the Call branches to End, the exception is not handled.  Just
1183            add all inlined exception branches to the End node.
1184            Third: there is no Exception edge at all. Handle as case two. */
1185         if (exc_handling == exc_handler) {
1186                 n_exc = 0;
1187                 for (i = 0; i < arity; i++) {
1188                         ir_node *ret, *irn;
1189                         ret = get_Block_cfgpred(end_bl, i);
1190                         irn = skip_Proj(ret);
1191                         if (is_fragile_op(irn) || is_Raise(irn)) {
1192                                 cf_pred[n_exc] = ret;
1193                                 ++n_exc;
1194                         }
1195                 }
1196                 if (n_exc > 0) {
1197                         ir_node *block = new_Block(n_exc, cf_pred);
1198                         set_cur_block(block);
1199
1200                         set_Tuple_pred(call, pn_Call_X_except, new_Jmp());
1201                         /* The Phi for the memories with the exception objects */
1202                         n_exc = 0;
1203                         for (i = 0; i < arity; i++) {
1204                                 ir_node *ret;
1205                                 ret = skip_Proj(get_Block_cfgpred(end_bl, i));
1206                                 if (is_Call(ret)) {
1207                                         cf_pred[n_exc] = new_r_Proj(get_nodes_block(ret), ret, mode_M, 3);
1208                                         n_exc++;
1209                                 } else if (is_fragile_op(ret)) {
1210                                         /* We rely that all cfops have the memory output at the same position. */
1211                                         cf_pred[n_exc] = new_r_Proj(get_nodes_block(ret), ret, mode_M, 0);
1212                                         n_exc++;
1213                                 } else if (is_Raise(ret)) {
1214                                         cf_pred[n_exc] = new_r_Proj(get_nodes_block(ret), ret, mode_M, 1);
1215                                         n_exc++;
1216                                 }
1217                         }
1218                         set_Tuple_pred(call, pn_Call_M_except, new_Phi(n_exc, cf_pred, mode_M));
1219                 } else {
1220                         set_Tuple_pred(call, pn_Call_X_except, new_Bad());
1221                         set_Tuple_pred(call, pn_Call_M_except, new_Bad());
1222                 }
1223         } else {
1224                 ir_node *main_end_bl;
1225                 int main_end_bl_arity;
1226                 ir_node **end_preds;
1227
1228                 /* assert(exc_handling == 1 || no exceptions. ) */
1229                 n_exc = 0;
1230                 for (i = 0; i < arity; i++) {
1231                         ir_node *ret = get_Block_cfgpred(end_bl, i);
1232                         ir_node *irn = skip_Proj(ret);
1233
1234                         if (is_fragile_op(irn) || is_Raise(irn)) {
1235                                 cf_pred[n_exc] = ret;
1236                                 n_exc++;
1237                         }
1238                 }
1239                 main_end_bl       = get_irg_end_block(irg);
1240                 main_end_bl_arity = get_irn_arity(main_end_bl);
1241                 end_preds         = XMALLOCN(ir_node*, n_exc + main_end_bl_arity);
1242
1243                 for (i = 0; i < main_end_bl_arity; ++i)
1244                         end_preds[i] = get_irn_n(main_end_bl, i);
1245                 for (i = 0; i < n_exc; ++i)
1246                         end_preds[main_end_bl_arity + i] = cf_pred[i];
1247                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
1248                 set_Tuple_pred(call, pn_Call_X_except, new_Bad());
1249                 set_Tuple_pred(call, pn_Call_M_except, new_Bad());
1250                 free(end_preds);
1251         }
1252         free(res_pred);
1253         free(cf_pred);
1254
1255         /* --  Turn CSE back on. -- */
1256         set_optimize(rem_opt);
1257         current_ir_graph = rem;
1258
1259         return 1;
1260 }
1261
1262 /********************************************************************/
1263 /* Apply inlining to small methods.                                 */
1264 /********************************************************************/
1265
1266 static struct obstack  temp_obst;
1267
1268 /** Represents a possible inlinable call in a graph. */
1269 typedef struct _call_entry {
1270         ir_node    *call;       /**< The Call node. */
1271         ir_graph   *callee;     /**< The callee IR-graph. */
1272         list_head  list;        /**< List head for linking the next one. */
1273         int        loop_depth;  /**< The loop depth of this call. */
1274         int        benefice;    /**< The calculated benefice of this call. */
1275         unsigned   local_adr:1; /**< Set if this call gets an address of a local variable. */
1276         unsigned   all_const:1; /**< Set if this call has only constant parameters. */
1277 } call_entry;
1278
1279 /**
1280  * environment for inlining small irgs
1281  */
1282 typedef struct _inline_env_t {
1283         struct obstack obst;  /**< An obstack where call_entries are allocated on. */
1284         list_head      calls; /**< The call entry list. */
1285 } inline_env_t;
1286
1287 /**
1288  * Returns the irg called from a Call node. If the irg is not
1289  * known, NULL is returned.
1290  *
1291  * @param call  the call node
1292  */
1293 static ir_graph *get_call_called_irg(ir_node *call) {
1294         ir_node *addr;
1295
1296         addr = get_Call_ptr(call);
1297         if (is_Global(addr)) {
1298                 ir_entity *ent = get_Global_entity(addr);
1299                 return get_entity_irg(ent);
1300         }
1301
1302         return NULL;
1303 }
1304
1305 /**
1306  * Walker: Collect all calls to known graphs inside a graph.
1307  */
1308 static void collect_calls(ir_node *call, void *env) {
1309         (void) env;
1310         if (is_Call(call)) {
1311                 ir_graph *called_irg = get_call_called_irg(call);
1312
1313                 if (called_irg != NULL) {
1314                         /* The Call node calls a locally defined method.  Remember to inline. */
1315                         inline_env_t *ienv  = env;
1316                         call_entry   *entry = obstack_alloc(&ienv->obst, sizeof(*entry));
1317                         entry->call       = call;
1318                         entry->callee     = called_irg;
1319                         entry->loop_depth = 0;
1320                         entry->benefice   = 0;
1321                         entry->local_adr  = 0;
1322                         entry->all_const  = 0;
1323
1324                         list_add_tail(&entry->list, &ienv->calls);
1325                 }
1326         }
1327 }
1328
1329 /**
1330  * Inlines all small methods at call sites where the called address comes
1331  * from a Const node that references the entity representing the called
1332  * method.
1333  * The size argument is a rough measure for the code size of the method:
1334  * Methods where the obstack containing the firm graph is smaller than
1335  * size are inlined.
1336  */
1337 void inline_small_irgs(ir_graph *irg, int size) {
1338         ir_graph *rem = current_ir_graph;
1339         inline_env_t env;
1340         call_entry *entry;
1341
1342         current_ir_graph = irg;
1343         /* Handle graph state */
1344         assert(get_irg_phase_state(irg) != phase_building);
1345         free_callee_info(irg);
1346
1347         /* Find Call nodes to inline.
1348            (We can not inline during a walk of the graph, as inlining the same
1349            method several times changes the visited flag of the walked graph:
1350            after the first inlining visited of the callee equals visited of
1351            the caller.  With the next inlining both are increased.) */
1352         obstack_init(&env.obst);
1353         INIT_LIST_HEAD(&env.calls);
1354         irg_walk_graph(irg, NULL, collect_calls, &env);
1355
1356         if (! list_empty(&env.calls)) {
1357                 /* There are calls to inline */
1358                 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1359                 collect_phiprojs(irg);
1360
1361                 list_for_each_entry(call_entry, entry, &env.calls, list) {
1362                         ir_graph            *callee = entry->callee;
1363                         irg_inline_property prop    = get_irg_inline_property(callee);
1364
1365                         if (prop == irg_inline_forbidden || get_irg_additional_properties(callee) & mtp_property_weak) {
1366                                 /* do not inline forbidden / weak graphs */
1367                                 continue;
1368                         }
1369
1370                         if (prop >= irg_inline_forced ||
1371                             _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
1372                                 inline_method(entry->call, callee);
1373                         }
1374                 }
1375                 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1376         }
1377         obstack_free(&env.obst, NULL);
1378         current_ir_graph = rem;
1379 }
1380
1381 /**
1382  * Environment for inlining irgs.
1383  */
1384 typedef struct {
1385         list_head calls;             /**< List of of all call nodes in this graph. */
1386         unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
1387         unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
1388         unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
1389         unsigned  n_nodes_orig;      /**< for statistics */
1390         unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
1391         unsigned  n_call_nodes_orig; /**< for statistics */
1392         unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
1393         unsigned  n_callers_orig;    /**< for statistics */
1394         unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
1395         unsigned  local_vars:1;      /**< Set, if an inlined function got the address of a local variable. */
1396         unsigned  recursive:1;       /**< Set, if this function is self recursive. */
1397 } inline_irg_env;
1398
1399 /**
1400  * Allocate a new environment for inlining.
1401  */
1402 static inline_irg_env *alloc_inline_irg_env(void) {
1403         inline_irg_env *env    = obstack_alloc(&temp_obst, sizeof(*env));
1404         INIT_LIST_HEAD(&env->calls);
1405         env->local_weights     = NULL;
1406         env->n_nodes           = -2; /* do not count count Start, End */
1407         env->n_blocks          = -2; /* do not count count Start, End Block */
1408         env->n_nodes_orig      = -2; /* do not count Start, End */
1409         env->n_call_nodes      = 0;
1410         env->n_call_nodes_orig = 0;
1411         env->n_callers         = 0;
1412         env->n_callers_orig    = 0;
1413         env->got_inline        = 0;
1414         env->local_vars        = 0;
1415         env->recursive         = 0;
1416         return env;
1417 }
1418
1419 typedef struct walker_env {
1420         inline_irg_env *x;     /**< the inline environment */
1421         char ignore_runtime;   /**< the ignore runtime flag */
1422         char ignore_callers;   /**< if set, do change callers data */
1423 } wenv_t;
1424
1425 /**
1426  * post-walker: collect all calls in the inline-environment
1427  * of a graph and sum some statistics.
1428  */
1429 static void collect_calls2(ir_node *call, void *ctx) {
1430         wenv_t         *env = ctx;
1431         inline_irg_env *x = env->x;
1432         ir_opcode      code = get_irn_opcode(call);
1433         ir_graph       *callee;
1434         call_entry     *entry;
1435
1436         /* count meaningful nodes in irg */
1437         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
1438                 if (code != iro_Block) {
1439                         ++x->n_nodes;
1440                         ++x->n_nodes_orig;
1441                 } else {
1442                         ++x->n_blocks;
1443                 }
1444         }
1445
1446         if (code != iro_Call) return;
1447
1448         /* check, if it's a runtime call */
1449         if (env->ignore_runtime) {
1450                 ir_node *symc = get_Call_ptr(call);
1451
1452                 if (is_Global(symc)) {
1453                         ir_entity *ent = get_Global_entity(symc);
1454
1455                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
1456                                 return;
1457                 }
1458         }
1459
1460         /* collect all call nodes */
1461         ++x->n_call_nodes;
1462         ++x->n_call_nodes_orig;
1463
1464         callee = get_call_called_irg(call);
1465         if (callee != NULL) {
1466                 if (! env->ignore_callers) {
1467                         inline_irg_env *callee_env = get_irg_link(callee);
1468                         /* count all static callers */
1469                         ++callee_env->n_callers;
1470                         ++callee_env->n_callers_orig;
1471                 }
1472                 if (callee == current_ir_graph)
1473                         x->recursive = 1;
1474
1475                 /* link it in the list of possible inlinable entries */
1476                 entry = obstack_alloc(&temp_obst, sizeof(*entry));
1477                 entry->call       = call;
1478                 entry->callee     = callee;
1479                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
1480                 entry->benefice   = 0;
1481                 entry->local_adr  = 0;
1482                 entry->all_const  = 0;
1483
1484                 list_add_tail(&entry->list, &x->calls);
1485         }
1486 }
1487
1488 /**
1489  * Returns TRUE if the number of callers is 0 in the irg's environment,
1490  * hence this irg is a leave.
1491  */
1492 inline static int is_leave(ir_graph *irg) {
1493         inline_irg_env *env = get_irg_link(irg);
1494         return env->n_call_nodes == 0;
1495 }
1496
1497 /**
1498  * Returns TRUE if the number of nodes in the callee is
1499  * smaller then size in the irg's environment.
1500  */
1501 inline static int is_smaller(ir_graph *callee, unsigned size) {
1502         inline_irg_env *env = get_irg_link(callee);
1503         return env->n_nodes < size;
1504 }
1505
1506 /**
1507  * Duplicate a call entry.
1508  *
1509  * @param entry     the original entry to duplicate
1510  * @param new_call  the new call node
1511  * @param loop_depth_delta
1512  *                  delta value for the loop depth
1513  */
1514 static call_entry *duplicate_call_entry(const call_entry *entry,
1515                                         ir_node *new_call, int loop_depth_delta) {
1516         call_entry *nentry = obstack_alloc(&temp_obst, sizeof(*nentry));
1517         nentry->call       = new_call;
1518         nentry->callee     = entry->callee;
1519         nentry->benefice   = entry->benefice;
1520         nentry->loop_depth = entry->loop_depth + loop_depth_delta;
1521         nentry->local_adr  = entry->local_adr;
1522         nentry->all_const  = entry->all_const;
1523
1524         return nentry;
1525 }
1526
1527 /**
1528  * Append all call nodes of the source environment to the nodes of in the destination
1529  * environment.
1530  *
1531  * @param dst         destination environment
1532  * @param src         source environment
1533  * @param loop_depth  the loop depth of the call that is replaced by the src list
1534  */
1535 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth) {
1536         call_entry *entry, *nentry;
1537
1538         /* Note that the src list points to Call nodes in the inlined graph, but
1539            we need Call nodes in our graph. Luckily the inliner leaves this information
1540            in the link field. */
1541         list_for_each_entry(call_entry, entry, &src->calls, list) {
1542                 nentry = duplicate_call_entry(entry, get_irn_link(entry->call), loop_depth);
1543                 list_add_tail(&nentry->list, &dst->calls);
1544         }
1545         dst->n_call_nodes += src->n_call_nodes;
1546         dst->n_nodes      += src->n_nodes;
1547 }
1548
1549 /*
1550  * Inlines small leave methods at call sites where the called address comes
1551  * from a Const node that references the entity representing the called
1552  * method.
1553  * The size argument is a rough measure for the code size of the method:
1554  * Methods where the obstack containing the firm graph is smaller than
1555  * size are inlined.
1556  */
1557 void inline_leave_functions(unsigned maxsize, unsigned leavesize,
1558                             unsigned size, int ignore_runtime)
1559 {
1560         inline_irg_env   *env;
1561         ir_graph         *irg;
1562         int              i, n_irgs;
1563         ir_graph         *rem;
1564         int              did_inline;
1565         wenv_t           wenv;
1566         call_entry       *entry, *next;
1567         const call_entry *centry;
1568         pmap             *copied_graphs;
1569         pmap_entry       *pm_entry;
1570
1571         rem = current_ir_graph;
1572         obstack_init(&temp_obst);
1573
1574         /* a map for the copied graphs, used to inline recursive calls */
1575         copied_graphs = pmap_create();
1576
1577         /* extend all irgs by a temporary data structure for inlining. */
1578         n_irgs = get_irp_n_irgs();
1579         for (i = 0; i < n_irgs; ++i)
1580                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
1581
1582         /* Pre-compute information in temporary data structure. */
1583         wenv.ignore_runtime = ignore_runtime;
1584         wenv.ignore_callers = 0;
1585         for (i = 0; i < n_irgs; ++i) {
1586                 ir_graph *irg = get_irp_irg(i);
1587
1588                 assert(get_irg_phase_state(irg) != phase_building);
1589                 free_callee_info(irg);
1590
1591                 assure_cf_loop(irg);
1592                 wenv.x = get_irg_link(irg);
1593                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1594         }
1595
1596         /* -- and now inline. -- */
1597
1598         /* Inline leaves recursively -- we might construct new leaves. */
1599         do {
1600                 did_inline = 0;
1601
1602                 for (i = 0; i < n_irgs; ++i) {
1603                         ir_node *call;
1604                         int phiproj_computed = 0;
1605
1606                         current_ir_graph = get_irp_irg(i);
1607                         env              = get_irg_link(current_ir_graph);
1608
1609                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1610                         list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1611                                 ir_graph            *callee;
1612                                 irg_inline_property  prop;
1613
1614                                 if (env->n_nodes > maxsize)
1615                                         break;
1616
1617                                 call   = entry->call;
1618                                 callee = entry->callee;
1619
1620                                 prop = get_irg_inline_property(callee);
1621                                 if (prop == irg_inline_forbidden || get_irg_additional_properties(callee) & mtp_property_weak) {
1622                                         /* do not inline forbidden / weak graphs */
1623                                         continue;
1624                                 }
1625
1626                                 if (is_leave(callee) && (
1627                                     is_smaller(callee, leavesize) || prop >= irg_inline_forced)) {
1628                                         if (!phiproj_computed) {
1629                                                 phiproj_computed = 1;
1630                                                 collect_phiprojs(current_ir_graph);
1631                                         }
1632                                         did_inline = inline_method(call, callee);
1633
1634                                         if (did_inline) {
1635                                                 inline_irg_env *callee_env = get_irg_link(callee);
1636
1637                                                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1638                                                 phiproj_computed = 0;
1639
1640                                                 /* Do some statistics */
1641                                                 env->got_inline = 1;
1642                                                 --env->n_call_nodes;
1643                                                 env->n_nodes += callee_env->n_nodes;
1644                                                 --callee_env->n_callers;
1645
1646                                                 /* remove this call from the list */
1647                                                 list_del(&entry->list);
1648                                                 continue;
1649                                         }
1650                                 }
1651                         }
1652                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1653                 }
1654         } while (did_inline);
1655
1656         /* inline other small functions. */
1657         for (i = 0; i < n_irgs; ++i) {
1658                 ir_node *call;
1659                 int phiproj_computed = 0;
1660
1661                 current_ir_graph = get_irp_irg(i);
1662                 env              = get_irg_link(current_ir_graph);
1663
1664                 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1665
1666                 /* note that the list of possible calls is updated during the process */
1667                 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1668                         irg_inline_property prop;
1669                         ir_graph            *callee;
1670                         pmap_entry          *e;
1671
1672                         call   = entry->call;
1673                         callee = entry->callee;
1674
1675                         prop = get_irg_inline_property(callee);
1676                         if (prop == irg_inline_forbidden || get_irg_additional_properties(callee) & mtp_property_weak) {
1677                                 /* do not inline forbidden / weak graphs */
1678                                 continue;
1679                         }
1680
1681                         e = pmap_find(copied_graphs, callee);
1682                         if (e != NULL) {
1683                                 /*
1684                                  * Remap callee if we have a copy.
1685                                  * FIXME: Should we do this only for recursive Calls ?
1686                                  */
1687                                 callee = e->value;
1688                         }
1689
1690                         if (prop >= irg_inline_forced ||
1691                             (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1692                                 if (current_ir_graph == callee) {
1693                                         /*
1694                                          * Recursive call: we cannot directly inline because we cannot walk
1695                                          * the graph and change it. So we have to make a copy of the graph
1696                                          * first.
1697                                          */
1698
1699                                         inline_irg_env *callee_env;
1700                                         ir_graph       *copy;
1701
1702                                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1703
1704                                         /*
1705                                          * No copy yet, create one.
1706                                          * Note that recursive methods are never leaves, so it is sufficient
1707                                          * to test this condition here.
1708                                          */
1709                                         copy = create_irg_copy(callee);
1710
1711                                         /* create_irg_copy() destroys the Proj links, recompute them */
1712                                         phiproj_computed = 0;
1713
1714                                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1715
1716                                         /* allocate new environment */
1717                                         callee_env = alloc_inline_irg_env();
1718                                         set_irg_link(copy, callee_env);
1719
1720                                         assure_cf_loop(copy);
1721                                         wenv.x              = callee_env;
1722                                         wenv.ignore_callers = 1;
1723                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1724
1725                                         /*
1726                                          * Enter the entity of the original graph. This is needed
1727                                          * for inline_method(). However, note that ent->irg still points
1728                                          * to callee, NOT to copy.
1729                                          */
1730                                         set_irg_entity(copy, get_irg_entity(callee));
1731
1732                                         pmap_insert(copied_graphs, callee, copy);
1733                                         callee = copy;
1734
1735                                         /* we have only one caller: the original graph */
1736                                         callee_env->n_callers      = 1;
1737                                         callee_env->n_callers_orig = 1;
1738                                 }
1739                                 if (! phiproj_computed) {
1740                                         phiproj_computed = 1;
1741                                         collect_phiprojs(current_ir_graph);
1742                                 }
1743                                 did_inline = inline_method(call, callee);
1744                                 if (did_inline) {
1745                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1746
1747                                         /* call was inlined, Phi/Projs for current graph must be recomputed */
1748                                         phiproj_computed = 0;
1749
1750                                         /* callee was inline. Append it's call list. */
1751                                         env->got_inline = 1;
1752                                         --env->n_call_nodes;
1753                                         append_call_list(env, callee_env, entry->loop_depth);
1754                                         --callee_env->n_callers;
1755
1756                                         /* after we have inlined callee, all called methods inside callee
1757                                            are now called once more */
1758                                         list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1759                                                 inline_irg_env *penv = get_irg_link(centry->callee);
1760                                                 ++penv->n_callers;
1761                                         }
1762
1763                                         /* remove this call from the list */
1764                                         list_del(&entry->list);
1765                                         continue;
1766                                 }
1767                         }
1768                 }
1769                 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1770         }
1771
1772         for (i = 0; i < n_irgs; ++i) {
1773                 irg = get_irp_irg(i);
1774                 env = get_irg_link(irg);
1775
1776                 if (env->got_inline) {
1777                         optimize_graph_df(irg);
1778                         optimize_cf(irg);
1779                 }
1780                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1781                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1782                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1783                         env->n_callers_orig, env->n_callers,
1784                         get_entity_name(get_irg_entity(irg))));
1785                 }
1786         }
1787
1788         /* kill the copied graphs: we don't need them anymore */
1789         foreach_pmap(copied_graphs, pm_entry) {
1790                 ir_graph *copy = pm_entry->value;
1791
1792                 /* reset the entity, otherwise it will be deleted in the next step ... */
1793                 set_irg_entity(copy, NULL);
1794                 free_ir_graph(copy);
1795         }
1796         pmap_destroy(copied_graphs);
1797
1798         obstack_free(&temp_obst, NULL);
1799         current_ir_graph = rem;
1800 }
1801
1802 /**
1803  * Calculate the parameter weights for transmitting the address of a local variable.
1804  */
1805 static unsigned calc_method_local_weight(ir_node *arg) {
1806         int      i, j, k;
1807         unsigned v, weight = 0;
1808
1809         for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
1810                 ir_node *succ = get_irn_out(arg, i);
1811
1812                 switch (get_irn_opcode(succ)) {
1813                 case iro_Load:
1814                 case iro_Store:
1815                         /* Loads and Store can be removed */
1816                         weight += 3;
1817                         break;
1818                 case iro_Sel:
1819                         /* check if all args are constant */
1820                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1821                                 ir_node *idx = get_Sel_index(succ, j);
1822                                 if (! is_Const(idx))
1823                                         return 0;
1824                         }
1825                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1826                            some unsupported node. */
1827                         v = calc_method_local_weight(succ);
1828                         if (v == 0)
1829                                 return 0;
1830                         /* we can kill one Sel with constant indexes, this is cheap */
1831                         weight += v + 1;
1832                         break;
1833                 case iro_Id:
1834                         /* when looking backward we might find Id nodes */
1835                         weight += calc_method_local_weight(succ);
1836                         break;
1837                 case iro_Tuple:
1838                         /* unoptimized tuple */
1839                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1840                                 ir_node *pred = get_Tuple_pred(succ, j);
1841                                 if (pred == arg) {
1842                                         /* look for Proj(j) */
1843                                         for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
1844                                                 ir_node *succ_succ = get_irn_out(succ, k);
1845                                                 if (is_Proj(succ_succ)) {
1846                                                         if (get_Proj_proj(succ_succ) == j) {
1847                                                                 /* found */
1848                                                                 weight += calc_method_local_weight(succ_succ);
1849                                                         }
1850                                                 } else {
1851                                                         /* this should NOT happen */
1852                                                         return 0;
1853                                                 }
1854                                         }
1855                                 }
1856                         }
1857                         break;
1858                 default:
1859                         /* any other node: unsupported yet or bad. */
1860                         return 0;
1861                 }
1862         }
1863         return weight;
1864 }
1865
1866 /**
1867  * Calculate the parameter weights for transmitting the address of a local variable.
1868  */
1869 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg) {
1870         ir_entity *ent = get_irg_entity(irg);
1871         ir_type  *mtp;
1872         int      nparams, i, proj_nr;
1873         ir_node  *irg_args, *arg;
1874
1875         mtp      = get_entity_type(ent);
1876         nparams  = get_method_n_params(mtp);
1877
1878         /* allocate a new array. currently used as 'analysed' flag */
1879         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1880
1881         /* If the method haven't parameters we have nothing to do. */
1882         if (nparams <= 0)
1883                 return;
1884
1885         assure_irg_outs(irg);
1886         irg_args = get_irg_args(irg);
1887         for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
1888                 arg     = get_irn_out(irg_args, i);
1889                 proj_nr = get_Proj_proj(arg);
1890                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1891         }
1892 }
1893
1894 /**
1895  * Calculate the benefice for transmitting an local variable address.
1896  * After inlining, the local variable might be transformed into a
1897  * SSA variable by scalar_replacement().
1898  */
1899 static unsigned get_method_local_adress_weight(ir_graph *callee, int pos) {
1900         inline_irg_env *env = get_irg_link(callee);
1901
1902         if (env->local_weights != NULL) {
1903                 if (pos < ARR_LEN(env->local_weights))
1904                         return env->local_weights[pos];
1905                 return 0;
1906         }
1907
1908         analyze_irg_local_weights(env, callee);
1909
1910         if (pos < ARR_LEN(env->local_weights))
1911                 return env->local_weights[pos];
1912         return 0;
1913 }
1914
1915 /**
1916  * Calculate a benefice value for inlining the given call.
1917  *
1918  * @param call       the call node we have to inspect
1919  * @param callee     the called graph
1920  */
1921 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
1922 {
1923         ir_node   *call = entry->call;
1924         ir_entity *ent  = get_irg_entity(callee);
1925         ir_node   *frame_ptr;
1926         ir_type   *mtp;
1927         int       weight = 0;
1928         int       i, n_params, all_const;
1929         unsigned  cc, v;
1930         irg_inline_property prop;
1931
1932         inline_irg_env *callee_env;
1933
1934         prop = get_irg_inline_property(callee);
1935         if (prop == irg_inline_forbidden) {
1936                 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
1937                     call, callee));
1938                 return entry->benefice = INT_MIN;
1939         }
1940
1941         if (get_irg_additional_properties(callee) & (mtp_property_noreturn | mtp_property_weak)) {
1942                 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
1943                     call, callee));
1944                 return entry->benefice = INT_MIN;
1945         }
1946
1947         /* costs for every passed parameter */
1948         n_params = get_Call_n_params(call);
1949         mtp      = get_entity_type(ent);
1950         cc       = get_method_calling_convention(mtp);
1951         if (cc & cc_reg_param) {
1952                 /* register parameter, smaller costs for register parameters */
1953                 int max_regs = cc & ~cc_bits;
1954
1955                 if (max_regs < n_params)
1956                         weight += max_regs * 2 + (n_params - max_regs) * 5;
1957                 else
1958                         weight += n_params * 2;
1959         } else {
1960                 /* parameters are passed an stack */
1961                 weight += 5 * n_params;
1962         }
1963
1964         /* constant parameters improve the benefice */
1965         frame_ptr = get_irg_frame(current_ir_graph);
1966         all_const = 1;
1967         for (i = 0; i < n_params; ++i) {
1968                 ir_node *param = get_Call_param(call, i);
1969
1970                 if (is_Const(param)) {
1971                         weight += get_method_param_weight(ent, i);
1972                 } else {
1973                         all_const = 0;
1974                         if (is_SymConst(param))
1975                                 weight += get_method_param_weight(ent, i);
1976                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
1977                                 /*
1978                                  * An address of a local variable is transmitted. After
1979                                  * inlining, scalar_replacement might be able to remove the
1980                                  * local variable, so honor this.
1981                                  */
1982                                 v = get_method_local_adress_weight(callee, i);
1983                                 weight += v;
1984                                 if (v > 0)
1985                                         entry->local_adr = 1;
1986                         }
1987                 }
1988         }
1989         entry->all_const = all_const;
1990
1991         callee_env = get_irg_link(callee);
1992         if (callee_env->n_callers == 1 &&
1993             callee != current_ir_graph &&
1994                 get_entity_visibility(ent) == visibility_local) {
1995                 weight += 700;
1996         }
1997
1998         /* give a bonus for functions with one block */
1999         if (callee_env->n_blocks == 1)
2000                 weight = weight * 3 / 2;
2001
2002         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
2003         if (callee_env->n_nodes < 30 && !callee_env->recursive)
2004                 weight += 2000;
2005
2006         /* and finally for leaves: they do not increase the register pressure
2007            because of callee safe registers */
2008         if (callee_env->n_call_nodes == 0)
2009                 weight += 400;
2010
2011         /** it's important to inline inner loops first */
2012         if (entry->loop_depth > 30)
2013                 weight += 30 * 1024;
2014         else
2015                 weight += entry->loop_depth * 1024;
2016
2017         /*
2018          * All arguments constant is probably a good sign, give an extra bonus
2019          */
2020         if (all_const)
2021                 weight += 1024;
2022
2023         return entry->benefice = weight;
2024 }
2025
2026 static ir_graph **irgs;
2027 static int      last_irg;
2028
2029 /**
2030  * Callgraph walker, collect all visited graphs.
2031  */
2032 static void callgraph_walker(ir_graph *irg, void *data) {
2033         (void) data;
2034         irgs[last_irg++] = irg;
2035 }
2036
2037 /**
2038  * Creates an inline order for all graphs.
2039  *
2040  * @return the list of graphs.
2041  */
2042 static ir_graph **create_irg_list(void) {
2043         ir_entity **free_methods;
2044         int       arr_len;
2045         int       n_irgs = get_irp_n_irgs();
2046
2047         cgana(&arr_len, &free_methods);
2048         xfree(free_methods);
2049
2050         compute_callgraph();
2051
2052         last_irg = 0;
2053         irgs     = XMALLOCNZ(ir_graph*, n_irgs);
2054
2055         callgraph_walk(NULL, callgraph_walker, NULL);
2056         assert(n_irgs == last_irg);
2057
2058         return irgs;
2059 }
2060
2061 /**
2062  * Push a call onto the priority list if its benefice is big enough.
2063  *
2064  * @param pqueue   the priority queue of calls
2065  * @param call     the call entry
2066  * @param inlien_threshold
2067  *                 the threshold value
2068  */
2069 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
2070                             int inline_threshold)
2071 {
2072         ir_graph            *callee  = call->callee;
2073         irg_inline_property prop     = get_irg_inline_property(callee);
2074         int                 benefice = calc_inline_benefice(call, callee);
2075
2076         DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
2077             get_irn_irg(call->call), call->call, callee, benefice));
2078
2079         if (prop < irg_inline_forced && benefice < inline_threshold) {
2080                 return;
2081         }
2082
2083         pqueue_put(pqueue, call, benefice);
2084 }
2085
2086 /**
2087  * Try to inline calls into a graph.
2088  *
2089  * @param irg      the graph into which we inline
2090  * @param maxsize  do NOT inline if the size of irg gets
2091  *                 bigger than this amount
2092  * @param inline_threshold
2093  *                 threshold value for inline decision
2094  * @param copied_graphs
2095  *                 map containing copied of recursive graphs
2096  */
2097 static void inline_into(ir_graph *irg, unsigned maxsize,
2098                         int inline_threshold, pmap *copied_graphs)
2099 {
2100         int            phiproj_computed = 0;
2101         inline_irg_env *env = get_irg_link(irg);
2102         call_entry     *curr_call;
2103         wenv_t         wenv;
2104         pqueue_t       *pqueue;
2105
2106         if (env->n_call_nodes == 0)
2107                 return;
2108
2109         if (env->n_nodes > maxsize) {
2110                 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
2111                 return;
2112         }
2113
2114         current_ir_graph = irg;
2115         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2116
2117         /* put irgs into the pqueue */
2118         pqueue = new_pqueue();
2119
2120         list_for_each_entry(call_entry, curr_call, &env->calls, list) {
2121                 assert(is_Call(curr_call->call));
2122                 maybe_push_call(pqueue, curr_call, inline_threshold);
2123         }
2124
2125         /* note that the list of possible calls is updated during the process */
2126         while (!pqueue_empty(pqueue)) {
2127                 int                 did_inline;
2128                 call_entry          *curr_call  = pqueue_pop_front(pqueue);
2129                 ir_graph            *callee     = curr_call->callee;
2130                 ir_node             *call_node  = curr_call->call;
2131                 inline_irg_env      *callee_env = get_irg_link(callee);
2132                 irg_inline_property prop        = get_irg_inline_property(callee);
2133                 int                 loop_depth;
2134                 const call_entry    *centry;
2135                 pmap_entry          *e;
2136
2137                 if ((prop < irg_inline_forced) && env->n_nodes + callee_env->n_nodes > maxsize) {
2138                         DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
2139                                                 env->n_nodes, callee, callee_env->n_nodes));
2140                         continue;
2141                 }
2142
2143                 e = pmap_find(copied_graphs, callee);
2144                 if (e != NULL) {
2145                         int benefice = curr_call->benefice;
2146                         /*
2147                          * Reduce the weight for recursive function IFF not all arguments are const.
2148                          * inlining recursive functions is rarely good.
2149                          */
2150                         if (!curr_call->all_const)
2151                                 benefice -= 2000;
2152                         if (benefice < inline_threshold)
2153                                 continue;
2154
2155                         /*
2156                          * Remap callee if we have a copy.
2157                          */
2158                         callee     = e->value;
2159                         callee_env = get_irg_link(callee);
2160                 }
2161
2162                 if (current_ir_graph == callee) {
2163                         /*
2164                          * Recursive call: we cannot directly inline because we cannot
2165                          * walk the graph and change it. So we have to make a copy of
2166                          * the graph first.
2167                          */
2168                         int benefice = curr_call->benefice;
2169                         ir_graph *copy;
2170
2171                         /*
2172                          * Reduce the weight for recursive function IFF not all arguments are const.
2173                          * inlining recursive functions is rarely good.
2174                          */
2175                         if (!curr_call->all_const)
2176                                 benefice -= 2000;
2177                         if (benefice < inline_threshold)
2178                                 continue;
2179
2180                         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2181
2182                         /*
2183                          * No copy yet, create one.
2184                          * Note that recursive methods are never leaves, so it is
2185                          * sufficient to test this condition here.
2186                          */
2187                         copy = create_irg_copy(callee);
2188
2189                         /* create_irg_copy() destroys the Proj links, recompute them */
2190                         phiproj_computed = 0;
2191
2192                         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2193
2194                         /* allocate a new environment */
2195                         callee_env = alloc_inline_irg_env();
2196                         set_irg_link(copy, callee_env);
2197
2198                         assure_cf_loop(copy);
2199                         wenv.x              = callee_env;
2200                         wenv.ignore_callers = 1;
2201                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
2202
2203                         /*
2204                          * Enter the entity of the original graph. This is needed
2205                          * for inline_method(). However, note that ent->irg still points
2206                          * to callee, NOT to copy.
2207                          */
2208                         set_irg_entity(copy, get_irg_entity(callee));
2209
2210                         pmap_insert(copied_graphs, callee, copy);
2211                         callee = copy;
2212
2213                         /* we have only one caller: the original graph */
2214                         callee_env->n_callers      = 1;
2215                         callee_env->n_callers_orig = 1;
2216                 }
2217                 if (! phiproj_computed) {
2218                         phiproj_computed = 1;
2219                         collect_phiprojs(current_ir_graph);
2220                 }
2221                 did_inline = inline_method(call_node, callee);
2222                 if (!did_inline)
2223                         continue;
2224
2225                 /* call was inlined, Phi/Projs for current graph must be recomputed */
2226                 phiproj_computed = 0;
2227
2228                 /* remove it from the caller list */
2229                 list_del(&curr_call->list);
2230
2231                 /* callee was inline. Append it's call list. */
2232                 env->got_inline = 1;
2233                 if (curr_call->local_adr)
2234                         env->local_vars = 1;
2235                 --env->n_call_nodes;
2236
2237                 /* we just generate a bunch of new calls */
2238                 loop_depth = curr_call->loop_depth;
2239                 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
2240                         inline_irg_env *penv = get_irg_link(centry->callee);
2241                         ir_node        *new_call;
2242                         call_entry     *new_entry;
2243
2244                         /* after we have inlined callee, all called methods inside
2245                          * callee are now called once more */
2246                         ++penv->n_callers;
2247
2248                         /* Note that the src list points to Call nodes in the inlined graph,
2249                          * but we need Call nodes in our graph. Luckily the inliner leaves
2250                          * this information in the link field. */
2251                         new_call = get_irn_link(centry->call);
2252                         assert(is_Call(new_call));
2253
2254                         new_entry = duplicate_call_entry(centry, new_call, loop_depth);
2255                         list_add_tail(&new_entry->list, &env->calls);
2256                         maybe_push_call(pqueue, new_entry, inline_threshold);
2257                 }
2258
2259                 env->n_call_nodes += callee_env->n_call_nodes;
2260                 env->n_nodes += callee_env->n_nodes;
2261                 --callee_env->n_callers;
2262         }
2263         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2264         del_pqueue(pqueue);
2265 }
2266
2267 /*
2268  * Heuristic inliner. Calculates a benefice value for every call and inlines
2269  * those calls with a value higher than the threshold.
2270  */
2271 void inline_functions(unsigned maxsize, int inline_threshold) {
2272         inline_irg_env   *env;
2273         int              i, n_irgs;
2274         ir_graph         *rem;
2275         wenv_t           wenv;
2276         pmap             *copied_graphs;
2277         pmap_entry       *pm_entry;
2278         ir_graph         **irgs;
2279
2280         rem = current_ir_graph;
2281         obstack_init(&temp_obst);
2282
2283         irgs = create_irg_list();
2284
2285         /* a map for the copied graphs, used to inline recursive calls */
2286         copied_graphs = pmap_create();
2287
2288         /* extend all irgs by a temporary data structure for inlining. */
2289         n_irgs = get_irp_n_irgs();
2290         for (i = 0; i < n_irgs; ++i)
2291                 set_irg_link(irgs[i], alloc_inline_irg_env());
2292
2293         /* Pre-compute information in temporary data structure. */
2294         wenv.ignore_runtime = 0;
2295         wenv.ignore_callers = 0;
2296         for (i = 0; i < n_irgs; ++i) {
2297                 ir_graph *irg = irgs[i];
2298
2299                 free_callee_info(irg);
2300
2301                 wenv.x = get_irg_link(irg);
2302                 assure_cf_loop(irg);
2303                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
2304         }
2305
2306         /* -- and now inline. -- */
2307         for (i = 0; i < n_irgs; ++i) {
2308                 ir_graph *irg = irgs[i];
2309
2310                 inline_into(irg, maxsize, inline_threshold, copied_graphs);
2311         }
2312
2313         for (i = 0; i < n_irgs; ++i) {
2314                 ir_graph *irg = irgs[i];
2315
2316                 env = get_irg_link(irg);
2317                 if (env->got_inline) {
2318                         /* this irg got calls inlined: optimize it */
2319                         if (get_opt_combo()) {
2320                                 if (env->local_vars) {
2321                                         scalar_replacement_opt(irg);
2322                                 }
2323                                 combo(irg);
2324                         } else {
2325                                 if (env->local_vars) {
2326                                         if (scalar_replacement_opt(irg)) {
2327                                                 optimize_graph_df(irg);
2328                                         }
2329                                 }
2330                                 optimize_cf(irg);
2331                         }
2332                 }
2333                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
2334                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
2335                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
2336                         env->n_callers_orig, env->n_callers,
2337                         get_entity_name(get_irg_entity(irg))));
2338                 }
2339         }
2340
2341         /* kill the copied graphs: we don't need them anymore */
2342         foreach_pmap(copied_graphs, pm_entry) {
2343                 ir_graph *copy = pm_entry->value;
2344
2345                 /* reset the entity, otherwise it will be deleted in the next step ... */
2346                 set_irg_entity(copy, NULL);
2347                 free_ir_graph(copy);
2348         }
2349         pmap_destroy(copied_graphs);
2350
2351         xfree(irgs);
2352
2353         obstack_free(&temp_obst, NULL);
2354         current_ir_graph = rem;
2355 }
2356
2357 void firm_init_inline(void) {
2358         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
2359 }