6ade763252e4c5a5da29ac0f12964122b0232e7c
[libfirm] / ir / opt / opt_inline.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief    Dead node elimination and Procedure Inlining.
23  * @author   Michael Beck, Goetz Lindenmaier
24  * @version  $Id$
25  */
26 #include "config.h"
27
28 #include <limits.h>
29 #include <assert.h>
30
31 #include "irnode_t.h"
32 #include "irgraph_t.h"
33 #include "irprog_t.h"
34
35 #include "iroptimize.h"
36 #include "ircons_t.h"
37 #include "iropt_t.h"
38 #include "irgopt.h"
39 #include "irgmod.h"
40 #include "irgwalk.h"
41
42 #include "array_t.h"
43 #include "list.h"
44 #include "pset.h"
45 #include "pmap.h"
46 #include "pdeq.h"
47 #include "xmalloc.h"
48 #include "pqueue.h"
49
50 #include "irouts.h"
51 #include "irloop_t.h"
52 #include "irbackedge_t.h"
53 #include "opt_init.h"
54 #include "cgana.h"
55 #include "trouts.h"
56 #include "error.h"
57
58 #include "analyze_irg_args.h"
59 #include "iredges_t.h"
60 #include "irflag_t.h"
61 #include "irhooks.h"
62 #include "irtools.h"
63 #include "iropt_dbg.h"
64 #include "irpass_t.h"
65
66 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
67
68 /*------------------------------------------------------------------*/
69 /* Routines for dead node elimination / copying garbage collection  */
70 /* of the obstack.                                                  */
71 /*------------------------------------------------------------------*/
72
73 /**
74  * Remember the new node in the old node by using a field all nodes have.
75  */
76 #define set_new_node(oldn, newn)  set_irn_link(oldn, newn)
77
78 /**
79  * Get this new node, before the old node is forgotten.
80  */
81 #define get_new_node(oldn) get_irn_link(oldn)
82
83 /**
84  * Check if a new node was set.
85  */
86 #define has_new_node(n) (get_new_node(n) != NULL)
87
88 /**
89  * We use the block_visited flag to mark that we have computed the
90  * number of useful predecessors for this block.
91  * Further we encode the new arity in this flag in the old blocks.
92  * Remembering the arity is useful, as it saves a lot of pointer
93  * accesses.  This function is called for all Phi and Block nodes
94  * in a Block.
95  */
96 static inline int compute_new_arity(ir_node *b)
97 {
98         int i, res, irn_arity;
99         int irg_v, block_v;
100
101         irg_v = get_irg_block_visited(current_ir_graph);
102         block_v = get_Block_block_visited(b);
103         if (block_v >= irg_v) {
104                 /* we computed the number of preds for this block and saved it in the
105                    block_v flag */
106                 return block_v - irg_v;
107         } else {
108                 /* compute the number of good predecessors */
109                 res = irn_arity = get_irn_arity(b);
110                 for (i = 0; i < irn_arity; i++)
111                         if (is_Bad(get_irn_n(b, i))) res--;
112                         /* save it in the flag. */
113                         set_Block_block_visited(b, irg_v + res);
114                         return res;
115         }
116 }
117
118 /**
119  * Copies the node to the new obstack. The Ins of the new node point to
120  * the predecessors on the old obstack.  For block/phi nodes not all
121  * predecessors might be copied.  n->link points to the new node.
122  * For Phi and Block nodes the function allocates in-arrays with an arity
123  * only for useful predecessors.  The arity is determined by counting
124  * the non-bad predecessors of the block.
125  *
126  * @param n    The node to be copied
127  * @param env  if non-NULL, the node number attribute will be copied to the new node
128  *
129  * Note: Also used for loop unrolling.
130  */
131 static void copy_node(ir_node *n, void *env)
132 {
133         ir_node *nn, *block;
134         int new_arity;
135         ir_op *op = get_irn_op(n);
136         (void) env;
137
138         if (op == op_Bad) {
139                 /* node copied already */
140                 return;
141         } else if (op == op_Block) {
142                 block = NULL;
143                 new_arity = compute_new_arity(n);
144                 n->attr.block.graph_arr = NULL;
145         } else {
146                 block = get_nodes_block(n);
147                 if (op == op_Phi) {
148                         new_arity = compute_new_arity(block);
149                 } else {
150                         new_arity = get_irn_arity(n);
151                 }
152         }
153         nn = new_ir_node(get_irn_dbg_info(n),
154                 current_ir_graph,
155                 block,
156                 op,
157                 get_irn_mode(n),
158                 new_arity,
159                 get_irn_in(n) + 1);
160         /* Copy the attributes.  These might point to additional data.  If this
161            was allocated on the old obstack the pointers now are dangling.  This
162            frees e.g. the memory of the graph_arr allocated in new_immBlock. */
163         if (op == op_Block) {
164                 /* we cannot allow blocks WITHOUT macroblock input */
165                 set_Block_MacroBlock(nn, get_Block_MacroBlock(n));
166         }
167         copy_node_attr(n, nn);
168
169         if (env != NULL) {
170                 /* for easier debugging, we want to copy the node numbers too */
171                 nn->node_nr = n->node_nr;
172         }
173
174         set_new_node(n, nn);
175         hook_dead_node_elim_subst(current_ir_graph, n, nn);
176 }
177
178 /**
179  * Copies new predecessors of old node to new node remembered in link.
180  * Spare the Bad predecessors of Phi and Block nodes.
181  */
182 static void copy_preds(ir_node *n, void *env)
183 {
184         ir_node *nn, *block;
185         int i, j, irn_arity;
186         (void) env;
187
188         nn = get_new_node(n);
189
190         if (is_Block(n)) {
191                 /* copy the macro block header */
192                 ir_node *mbh = get_Block_MacroBlock(n);
193
194                 if (mbh == n) {
195                         /* this block is a macroblock header */
196                         set_Block_MacroBlock(nn, nn);
197                 } else {
198                         /* get the macro block header */
199                         ir_node *nmbh = get_new_node(mbh);
200                         assert(nmbh != NULL);
201                         set_Block_MacroBlock(nn, nmbh);
202                 }
203
204                 /* Don't copy Bad nodes. */
205                 j = 0;
206                 irn_arity = get_irn_arity(n);
207                 for (i = 0; i < irn_arity; i++) {
208                         if (! is_Bad(get_irn_n(n, i))) {
209                                 ir_node *pred = get_irn_n(n, i);
210                                 set_irn_n(nn, j, get_new_node(pred));
211                                 j++;
212                         }
213                 }
214                 /* repair the block visited flag from above misuse. Repair it in both
215                    graphs so that the old one can still be used. */
216                 set_Block_block_visited(nn, 0);
217                 set_Block_block_visited(n, 0);
218                 /* Local optimization could not merge two subsequent blocks if
219                    in array contained Bads.  Now it's possible.
220                    We don't call optimize_in_place as it requires
221                    that the fields in ir_graph are set properly. */
222                 if (!has_Block_entity(nn) &&
223                     get_opt_control_flow_straightening() &&
224                     get_Block_n_cfgpreds(nn) == 1 &&
225                     is_Jmp(get_Block_cfgpred(nn, 0))) {
226                         ir_node *old = get_nodes_block(get_Block_cfgpred(nn, 0));
227                         if (nn == old) {
228                                 /* Jmp jumps into the block it is in -- deal self cycle. */
229                                 assert(is_Bad(get_new_node(get_irg_bad(current_ir_graph))));
230                                 exchange(nn, get_new_node(get_irg_bad(current_ir_graph)));
231                         } else {
232                                 exchange(nn, old);
233                         }
234                 }
235         } else if (is_Phi(n) && get_irn_arity(n) > 0) {
236                 /* Don't copy node if corresponding predecessor in block is Bad.
237                    The Block itself should not be Bad. */
238                 block = get_nodes_block(n);
239                 set_nodes_block(nn, get_new_node(block));
240                 j = 0;
241                 irn_arity = get_irn_arity(n);
242                 for (i = 0; i < irn_arity; i++) {
243                         if (! is_Bad(get_irn_n(block, i))) {
244                                 ir_node *pred = get_irn_n(n, i);
245                                 set_irn_n(nn, j, get_new_node(pred));
246                                 /*if (is_backedge(n, i)) set_backedge(nn, j);*/
247                                 j++;
248                         }
249                 }
250                 /* If the pre walker reached this Phi after the post walker visited the
251                    block block_visited is > 0. */
252                 set_Block_block_visited(get_nodes_block(n), 0);
253                 /* Compacting the Phi's ins might generate Phis with only one
254                    predecessor. */
255                 if (get_irn_arity(nn) == 1)
256                         exchange(nn, get_irn_n(nn, 0));
257         } else {
258                 irn_arity = get_irn_arity(n);
259                 for (i = -1; i < irn_arity; i++)
260                         set_irn_n(nn, i, get_new_node(get_irn_n(n, i)));
261         }
262         /* Now the new node is complete.  We can add it to the hash table for CSE.
263            @@@ inlining aborts if we identify End. Why? */
264         if (!is_End(nn))
265                 add_identities(current_ir_graph->value_table, nn);
266 }
267
268 /**
269  * Copies the graph recursively, compacts the keep-alives of the end node.
270  *
271  * @param irg           the graph to be copied
272  * @param copy_node_nr  If non-zero, the node number will be copied
273  */
274 static void copy_graph(ir_graph *irg, int copy_node_nr)
275 {
276         ir_node *oe, *ne, *ob, *nb, *om, *nm; /* old end, new end, old bad, new bad, old NoMem, new NoMem */
277         ir_node *ka;      /* keep alive */
278         int i, irn_arity;
279         unsigned long vfl;
280
281         /* Some nodes must be copied by hand, sigh */
282         vfl = get_irg_visited(irg);
283         set_irg_visited(irg, vfl + 1);
284
285         oe = get_irg_end(irg);
286         mark_irn_visited(oe);
287         /* copy the end node by hand, allocate dynamic in array! */
288         ne = new_ir_node(get_irn_dbg_info(oe),
289                 irg,
290                 NULL,
291                 op_End,
292                 mode_X,
293                 -1,
294                 NULL);
295         /* Copy the attributes.  Well, there might be some in the future... */
296         copy_node_attr(oe, ne);
297         set_new_node(oe, ne);
298
299         /* copy the Bad node */
300         ob = get_irg_bad(irg);
301         mark_irn_visited(ob);
302         nb = new_ir_node(get_irn_dbg_info(ob),
303                 irg,
304                 NULL,
305                 op_Bad,
306                 mode_T,
307                 0,
308                 NULL);
309         copy_node_attr(ob, nb);
310         set_new_node(ob, nb);
311
312         /* copy the NoMem node */
313         om = get_irg_no_mem(irg);
314         mark_irn_visited(om);
315         nm = new_ir_node(get_irn_dbg_info(om),
316                 irg,
317                 NULL,
318                 op_NoMem,
319                 mode_M,
320                 0,
321                 NULL);
322         copy_node_attr(om, nm);
323         set_new_node(om, nm);
324
325         /* copy the live nodes */
326         set_irg_visited(irg, vfl);
327         irg_walk(get_nodes_block(oe), copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
328
329         /* Note: from yet, the visited flag of the graph is equal to vfl + 1 */
330
331         /* visit the anchors as well */
332         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
333                 ir_node *n = get_irg_anchor(irg, i);
334
335                 if (n && (get_irn_visited(n) <= vfl)) {
336                         set_irg_visited(irg, vfl);
337                         irg_walk(n, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
338                 }
339         }
340
341         /* copy_preds for the end node ... */
342         set_nodes_block(ne, get_new_node(get_nodes_block(oe)));
343
344         /*- ... and now the keep alives. -*/
345         /* First pick the not marked block nodes and walk them.  We must pick these
346            first as else we will oversee blocks reachable from Phis. */
347         irn_arity = get_End_n_keepalives(oe);
348         for (i = 0; i < irn_arity; i++) {
349                 ka = get_End_keepalive(oe, i);
350                 if (is_Block(ka)) {
351                         if (get_irn_visited(ka) <= vfl) {
352                                 /* We must keep the block alive and copy everything reachable */
353                                 set_irg_visited(irg, vfl);
354                                 irg_walk(ka, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
355                         }
356                         add_End_keepalive(ne, get_new_node(ka));
357                 }
358         }
359
360         /* Now pick other nodes.  Here we will keep all! */
361         irn_arity = get_End_n_keepalives(oe);
362         for (i = 0; i < irn_arity; i++) {
363                 ka = get_End_keepalive(oe, i);
364                 if (!is_Block(ka)) {
365                         if (get_irn_visited(ka) <= vfl) {
366                                 /* We didn't copy the node yet.  */
367                                 set_irg_visited(irg, vfl);
368                                 irg_walk(ka, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
369                         }
370                         add_End_keepalive(ne, get_new_node(ka));
371                 }
372         }
373
374         /* start block sometimes only reached after keep alives */
375         set_nodes_block(nb, get_new_node(get_nodes_block(ob)));
376         set_nodes_block(nm, get_new_node(get_nodes_block(om)));
377 }
378
379 /**
380  * Copies the graph reachable from current_ir_graph->end to the obstack
381  * in current_ir_graph and fixes the environment.
382  * Then fixes the fields in current_ir_graph containing nodes of the
383  * graph.
384  *
385  * @param copy_node_nr  If non-zero, the node number will be copied
386  */
387 static void copy_graph_env(int copy_node_nr)
388 {
389         ir_graph *irg = current_ir_graph;
390         ir_node *old_end, *new_anchor;
391         int i;
392
393         /* remove end_except and end_reg nodes */
394         old_end = get_irg_end(irg);
395         set_irg_end_except (irg, old_end);
396         set_irg_end_reg    (irg, old_end);
397
398         /* Not all nodes remembered in irg might be reachable
399            from the end node.  Assure their link is set to NULL, so that
400            we can test whether new nodes have been computed. */
401         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
402                 ir_node *n = get_irg_anchor(irg, i);
403                 if (n != NULL)
404                         set_new_node(n, NULL);
405         }
406         /* we use the block walk flag for removing Bads from Blocks ins. */
407         inc_irg_block_visited(irg);
408
409         /* copy the graph */
410         copy_graph(irg, copy_node_nr);
411
412         /* fix the anchor */
413         old_end    = get_irg_end(irg);
414         new_anchor = new_Anchor(irg);
415
416         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
417                 ir_node *n = get_irg_anchor(irg, i);
418                 if (n)
419                         set_irn_n(new_anchor, i, get_new_node(n));
420         }
421         free_End(old_end);
422         irg->anchor = new_anchor;
423
424         /* ensure the new anchor is placed in the endblock */
425         set_nodes_block(new_anchor, get_irg_end_block(irg));
426 }
427
428 /**
429  * Copies all reachable nodes to a new obstack.  Removes bad inputs
430  * from block nodes and the corresponding inputs from Phi nodes.
431  * Merges single exit blocks with single entry blocks and removes
432  * 1-input Phis.
433  * Adds all new nodes to a new hash table for CSE.  Does not
434  * perform CSE, so the hash table might contain common subexpressions.
435  */
436 void dead_node_elimination(ir_graph *irg)
437 {
438         ir_graph *rem;
439 #ifdef INTERPROCEDURAL_VIEW
440         int rem_ipview = get_interprocedural_view();
441 #endif
442         struct obstack *graveyard_obst = NULL;
443         struct obstack *rebirth_obst   = NULL;
444
445         edges_deactivate(irg);
446
447         /* inform statistics that we started a dead-node elimination run */
448         hook_dead_node_elim(irg, 1);
449
450         /* Remember external state of current_ir_graph. */
451         rem = current_ir_graph;
452         current_ir_graph = irg;
453 #ifdef INTERPROCEDURAL_VIEW
454         set_interprocedural_view(0);
455 #endif
456
457         assert(get_irg_phase_state(irg) != phase_building);
458
459         /* Handle graph state */
460         free_callee_info(irg);
461         free_irg_outs(irg);
462         free_trouts();
463
464         /* @@@ so far we loose loops when copying */
465         free_loop_information(irg);
466
467         set_irg_doms_inconsistent(irg);
468
469         /* A quiet place, where the old obstack can rest in peace,
470            until it will be cremated. */
471         graveyard_obst = irg->obst;
472
473         /* A new obstack, where the reachable nodes will be copied to. */
474         rebirth_obst = XMALLOC(struct obstack);
475         irg->obst = rebirth_obst;
476         obstack_init(irg->obst);
477         irg->last_node_idx = 0;
478
479         /* We also need a new value table for CSE */
480         del_identities(irg->value_table);
481         irg->value_table = new_identities();
482
483         /* Copy the graph from the old to the new obstack */
484         copy_graph_env(/*copy_node_nr=*/1);
485
486         /* Free memory from old unoptimized obstack */
487         obstack_free(graveyard_obst, 0);  /* First empty the obstack ... */
488         xfree(graveyard_obst);            /* ... then free it.           */
489
490         /* inform statistics that the run is over */
491         hook_dead_node_elim(irg, 0);
492
493         current_ir_graph = rem;
494 #ifdef INTERPROCEDURAL_VIEW
495         set_interprocedural_view(rem_ipview);
496 #endif
497 }
498
499 ir_graph_pass_t *dead_node_elimination_pass(const char *name)
500 {
501         return def_graph_pass(name ? name : "dce", dead_node_elimination);
502 }
503
504 /**
505  * Relink bad predecessors of a block and store the old in array to the
506  * link field. This function is called by relink_bad_predecessors().
507  * The array of link field starts with the block operand at position 0.
508  * If block has bad predecessors, create a new in array without bad preds.
509  * Otherwise let in array untouched.
510  */
511 static void relink_bad_block_predecessors(ir_node *n, void *env)
512 {
513         ir_node **new_in, *irn;
514         int i, new_irn_n, old_irn_arity, new_irn_arity = 0;
515         (void) env;
516
517         /* if link field of block is NULL, look for bad predecessors otherwise
518            this is already done */
519         if (is_Block(n) && get_irn_link(n) == NULL) {
520                 /* save old predecessors in link field (position 0 is the block operand)*/
521                 set_irn_link(n, get_irn_in(n));
522
523                 /* count predecessors without bad nodes */
524                 old_irn_arity = get_irn_arity(n);
525                 for (i = 0; i < old_irn_arity; i++)
526                         if (!is_Bad(get_irn_n(n, i)))
527                                 ++new_irn_arity;
528
529                 /* arity changing: set new predecessors without bad nodes */
530                 if (new_irn_arity < old_irn_arity) {
531                         /* Get new predecessor array. We do not resize the array, as we must
532                            keep the old one to update Phis. */
533                         new_in = NEW_ARR_D(ir_node *, current_ir_graph->obst, (new_irn_arity+1));
534
535                         /* set new predecessors in array */
536                         new_in[0] = NULL;
537                         new_irn_n = 1;
538                         for (i = 0; i < old_irn_arity; i++) {
539                                 irn = get_irn_n(n, i);
540                                 if (!is_Bad(irn)) {
541                                         new_in[new_irn_n] = irn;
542                                         is_backedge(n, i) ? set_backedge(n, new_irn_n-1) : set_not_backedge(n, new_irn_n-1);
543                                         ++new_irn_n;
544                                 }
545                         }
546                         /* ARR_SETLEN(int, n->attr.block.backedge, new_irn_arity); */
547                         ARR_SHRINKLEN(n->attr.block.backedge, new_irn_arity);
548                         n->in = new_in;
549                 } /* ir node has bad predecessors */
550         } /* Block is not relinked */
551 }
552
553 /**
554  * Relinks Bad predecessors from Blocks and Phis called by walker
555  * remove_bad_predecesors(). If n is a Block, call
556  * relink_bad_block_redecessors(). If n is a Phi-node, call also the relinking
557  * function of Phi's Block. If this block has bad predecessors, relink preds
558  * of the Phi-node.
559  */
560 static void relink_bad_predecessors(ir_node *n, void *env)
561 {
562         ir_node *block, **old_in;
563         int i, old_irn_arity, new_irn_arity;
564
565         /* relink bad predecessors of a block */
566         if (is_Block(n))
567                 relink_bad_block_predecessors(n, env);
568
569         /* If Phi node relink its block and its predecessors */
570         if (is_Phi(n)) {
571                 /* Relink predecessors of phi's block */
572                 block = get_nodes_block(n);
573                 if (get_irn_link(block) == NULL)
574                         relink_bad_block_predecessors(block, env);
575
576                 old_in = (ir_node **)get_irn_link(block); /* Of Phi's Block */
577                 old_irn_arity = ARR_LEN(old_in);
578
579                 /* Relink Phi predecessors if count of predecessors changed */
580                 if (old_irn_arity != ARR_LEN(get_irn_in(block))) {
581                         /* set new predecessors in array
582                            n->in[0] remains the same block */
583                         new_irn_arity = 1;
584                         for (i = 1; i < old_irn_arity; i++)
585                                 if (!is_Bad(old_in[i])) {
586                                         n->in[new_irn_arity] = n->in[i];
587                                         is_backedge(n, i) ? set_backedge(n, new_irn_arity) : set_not_backedge(n, new_irn_arity);
588                                         ++new_irn_arity;
589                                 }
590
591                                 ARR_SETLEN(ir_node *, n->in, new_irn_arity);
592                                 ARR_SETLEN(int, n->attr.phi.u.backedge, new_irn_arity);
593                 }
594         } /* n is a Phi node */
595 }
596
597 /*
598  * Removes Bad Bad predecessors from Blocks and the corresponding
599  * inputs to Phi nodes as in dead_node_elimination but without
600  * copying the graph.
601  * On walking up set the link field to NULL, on walking down call
602  * relink_bad_predecessors() (This function stores the old in array
603  * to the link field and sets a new in array if arity of predecessors
604  * changes).
605  */
606 void remove_bad_predecessors(ir_graph *irg)
607 {
608         panic("Fix backedge handling first");
609         irg_walk_graph(irg, firm_clear_link, relink_bad_predecessors, NULL);
610 }
611
612
613 /*
614    __                      _  __ __
615   (_     __    o     _    | \/  |_
616   __)|_| | \_/ | \_/(/_   |_/\__|__
617
618   The following stuff implements a facility that automatically patches
619   registered ir_node pointers to the new node when a dead node elimination occurs.
620 */
621
622 struct _survive_dce_t {
623         struct obstack obst;
624         pmap *places;
625         pmap *new_places;
626         hook_entry_t dead_node_elim;
627         hook_entry_t dead_node_elim_subst;
628 };
629
630 typedef struct _survive_dce_list_t {
631         struct _survive_dce_list_t *next;
632         ir_node **place;
633 } survive_dce_list_t;
634
635 static void dead_node_hook(void *context, ir_graph *irg, int start)
636 {
637         survive_dce_t *sd = context;
638         (void) irg;
639
640         /* Create a new map before the dead node elimination is performed. */
641         if (start) {
642                 sd->new_places = pmap_create_ex(pmap_count(sd->places));
643         } else {
644                 /* Patch back all nodes if dead node elimination is over and something is to be done. */
645                 pmap_destroy(sd->places);
646                 sd->places     = sd->new_places;
647                 sd->new_places = NULL;
648         }
649 }
650
651 /**
652  * Hook called when dead node elimination replaces old by nw.
653  */
654 static void dead_node_subst_hook(void *context, ir_graph *irg, ir_node *old, ir_node *nw)
655 {
656         survive_dce_t *sd = context;
657         survive_dce_list_t *list = pmap_get(sd->places, old);
658         (void) irg;
659
660         /* If the node is to be patched back, write the new address to all registered locations. */
661         if (list) {
662                 survive_dce_list_t *p;
663
664                 for (p = list; p; p = p->next)
665                         *(p->place) = nw;
666
667                 pmap_insert(sd->new_places, nw, list);
668         }
669 }
670
671 /**
672  * Make a new Survive DCE environment.
673  */
674 survive_dce_t *new_survive_dce(void)
675 {
676         survive_dce_t *res = XMALLOC(survive_dce_t);
677         obstack_init(&res->obst);
678         res->places     = pmap_create();
679         res->new_places = NULL;
680
681         res->dead_node_elim.hook._hook_dead_node_elim = dead_node_hook;
682         res->dead_node_elim.context                   = res;
683         res->dead_node_elim.next                      = NULL;
684
685         res->dead_node_elim_subst.hook._hook_dead_node_elim_subst = dead_node_subst_hook;
686         res->dead_node_elim_subst.context = res;
687         res->dead_node_elim_subst.next    = NULL;
688
689         register_hook(hook_dead_node_elim, &res->dead_node_elim);
690         register_hook(hook_dead_node_elim_subst, &res->dead_node_elim_subst);
691         return res;
692 }
693
694 /**
695  * Free a Survive DCE environment.
696  */
697 void free_survive_dce(survive_dce_t *sd)
698 {
699         obstack_free(&sd->obst, NULL);
700         pmap_destroy(sd->places);
701         unregister_hook(hook_dead_node_elim, &sd->dead_node_elim);
702         unregister_hook(hook_dead_node_elim_subst, &sd->dead_node_elim_subst);
703         xfree(sd);
704 }
705
706 /**
707  * Register a node pointer to be patched upon DCE.
708  * When DCE occurs, the node pointer specified by @p place will be
709  * patched to the new address of the node it is pointing to.
710  *
711  * @param sd    The Survive DCE environment.
712  * @param place The address of the node pointer.
713  */
714 void survive_dce_register_irn(survive_dce_t *sd, ir_node **place)
715 {
716         if (*place != NULL) {
717                 ir_node *irn      = *place;
718                 survive_dce_list_t *curr = pmap_get(sd->places, irn);
719                 survive_dce_list_t *nw   = OALLOC(&sd->obst, survive_dce_list_t);
720
721                 nw->next  = curr;
722                 nw->place = place;
723
724                 pmap_insert(sd->places, irn, nw);
725         }
726 }
727
728 /*--------------------------------------------------------------------*/
729 /*  Functionality for inlining                                         */
730 /*--------------------------------------------------------------------*/
731
732 /**
733  * Copy node for inlineing.  Updates attributes that change when
734  * inlineing but not for dead node elimination.
735  *
736  * Copies the node by calling copy_node() and then updates the entity if
737  * it's a local one.  env must be a pointer of the frame type of the
738  * inlined procedure. The new entities must be in the link field of
739  * the entities.
740  */
741 static void copy_node_inline(ir_node *n, void *env)
742 {
743         ir_node *nn;
744         ir_type *frame_tp = (ir_type *)env;
745
746         copy_node(n, NULL);
747         if (is_Sel(n)) {
748                 nn = get_new_node(n);
749                 assert(is_Sel(nn));
750                 /* use copied entities from the new frame */
751                 if (get_entity_owner(get_Sel_entity(n)) == frame_tp) {
752                         set_Sel_entity(nn, get_entity_link(get_Sel_entity(n)));
753                 }
754         } else if (is_Block(n)) {
755                 nn = get_new_node(n);
756                 nn->attr.block.irg.irg = current_ir_graph;
757         }
758 }
759
760 /**
761  * Copies new predecessors of old node and move constants to
762  * the Start Block.
763  */
764 static void copy_preds_inline(ir_node *n, void *env)
765 {
766         ir_node *nn;
767
768         copy_preds(n, env);
769         nn = skip_Id(get_new_node(n));
770         if (is_irn_constlike(nn)) {
771                 /* move Constants into the start block */
772                 set_nodes_block(nn, get_irg_start_block(current_ir_graph));
773
774                 n = identify_remember(current_ir_graph->value_table, nn);
775                 if (nn != n) {
776                         DBG_OPT_CSE(nn, n);
777                         exchange(nn, n);
778                 }
779         }
780 }
781
782 /**
783  * Walker: checks if P_value_arg_base is used.
784  */
785 static void find_addr(ir_node *node, void *env)
786 {
787         int *allow_inline = env;
788         if (is_Sel(node)) {
789                 ir_graph *irg = current_ir_graph;
790                 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
791                         /* access to frame */
792                         ir_entity *ent = get_Sel_entity(node);
793                         if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
794                                 /* access to value_type */
795                                 *allow_inline = 0;
796                         }
797                 }
798         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
799                 /* From GCC:
800                  * Refuse to inline alloca call unless user explicitly forced so as this
801                  * may change program's memory overhead drastically when the function
802                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
803                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
804                  *
805                  * Sorrily this is true with our implementation also.
806                  * Moreover, we cannot differentiate between alloca() and VLA yet, so this
807                  * disables inlining of functions using VLA (with are completely save).
808                  *
809                  * 2 Solutions:
810                  * - add a flag to the Alloc node for "real" alloca() calls
811                  * - add a new Stack-Restore node at the end of a function using alloca()
812                  */
813                 *allow_inline = 0;
814         }
815 }
816
817 /**
818  * Check if we can inline a given call.
819  * Currently, we cannot inline two cases:
820  * - call with compound arguments
821  * - graphs that take the address of a parameter
822  *
823  * check these conditions here
824  */
825 static int can_inline(ir_node *call, ir_graph *called_graph)
826 {
827         ir_type *call_type = get_Call_type(call);
828         int params, ress, i, res;
829         assert(is_Method_type(call_type));
830
831         params = get_method_n_params(call_type);
832         ress   = get_method_n_ress(call_type);
833
834         /* check parameters for compound arguments */
835         for (i = 0; i < params; ++i) {
836                 ir_type *p_type = get_method_param_type(call_type, i);
837
838                 if (is_compound_type(p_type))
839                         return 0;
840         }
841
842         /* check results for compound arguments */
843         for (i = 0; i < ress; ++i) {
844                 ir_type *r_type = get_method_res_type(call_type, i);
845
846                 if (is_compound_type(r_type))
847                         return 0;
848         }
849
850         res = 1;
851         irg_walk_graph(called_graph, find_addr, NULL, &res);
852
853         return res;
854 }
855
856 enum exc_mode {
857         exc_handler,    /**< There is a handler. */
858         exc_no_handler  /**< Exception handling not represented. */
859 };
860
861 /* Inlines a method at the given call site. */
862 int inline_method(ir_node *call, ir_graph *called_graph)
863 {
864         ir_node             *pre_call;
865         ir_node             *post_call, *post_bl;
866         ir_node             *in[pn_Start_max];
867         ir_node             *end, *end_bl, *block;
868         ir_node             **res_pred;
869         ir_node             **cf_pred;
870         ir_node             **args_in;
871         ir_node             *ret, *phi;
872         int                 arity, n_ret, n_exc, n_res, i, n, j, rem_opt, irn_arity, n_params;
873         int                 n_mem_phi;
874         enum exc_mode       exc_handling;
875         ir_type             *called_frame, *curr_frame, *mtp, *ctp;
876         ir_entity           *ent;
877         ir_graph            *rem, *irg;
878         irg_inline_property prop = get_irg_inline_property(called_graph);
879         unsigned long       visited;
880
881         if (prop == irg_inline_forbidden)
882                 return 0;
883
884         ent = get_irg_entity(called_graph);
885
886         mtp = get_entity_type(ent);
887         ctp = get_Call_type(call);
888         n_params = get_method_n_params(mtp);
889         n_res    = get_method_n_ress(mtp);
890         if (n_params > get_method_n_params(ctp)) {
891                 /* this is a bad feature of C: without a prototype, we can
892                  * call a function with less parameters than needed. Currently
893                  * we don't support this, although we could use Unknown than. */
894                 return 0;
895         }
896         if (n_res != get_method_n_ress(ctp)) {
897                 return 0;
898         }
899
900         /* Argh, compiling C has some bad consequences:
901          * It is implementation dependent what happens in that case.
902          * We support inlining, if the bitsize of the types matches AND
903          * the same arithmetic is used. */
904         for (i = n_params - 1; i >= 0; --i) {
905                 ir_type *param_tp = get_method_param_type(mtp, i);
906                 ir_type *arg_tp   = get_method_param_type(ctp, i);
907
908                 if (param_tp != arg_tp) {
909                         ir_mode *pmode = get_type_mode(param_tp);
910                         ir_mode *amode = get_type_mode(arg_tp);
911
912                         if (pmode == NULL || amode == NULL)
913                                 return 0;
914                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
915                                 return 0;
916                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
917                                 return 0;
918                         /* otherwise we can simply "reinterpret" the bits */
919                 }
920         }
921         for (i = n_res - 1; i >= 0; --i) {
922                 ir_type *decl_res_tp = get_method_res_type(mtp, i);
923                 ir_type *used_res_tp = get_method_res_type(ctp, i);
924
925                 if (decl_res_tp != used_res_tp) {
926                         ir_mode *decl_mode = get_type_mode(decl_res_tp);
927                         ir_mode *used_mode = get_type_mode(used_res_tp);
928                         if (decl_mode == NULL || used_mode == NULL)
929                                 return 0;
930                         if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
931                                 return 0;
932                         if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
933                                 return 0;
934                         /* otherwise we can "reinterpret" the bits */
935                 }
936         }
937
938         irg = get_irn_irg(call);
939
940         /*
941          * We cannot inline a recursive call. The graph must be copied before
942          * the call the inline_method() using create_irg_copy().
943          */
944         if (called_graph == irg)
945                 return 0;
946
947         /*
948          * currently, we cannot inline two cases:
949          * - call with compound arguments
950          * - graphs that take the address of a parameter
951          */
952         if (! can_inline(call, called_graph))
953                 return 0;
954
955         rem = current_ir_graph;
956         current_ir_graph = irg;
957
958         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
959
960         /* --  Turn off optimizations, this can cause problems when allocating new nodes. -- */
961         rem_opt = get_opt_optimize();
962         set_optimize(0);
963
964         /* Handle graph state */
965         assert(get_irg_phase_state(irg) != phase_building);
966         assert(get_irg_pinned(irg) == op_pin_state_pinned);
967         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
968         set_irg_outs_inconsistent(irg);
969         set_irg_extblk_inconsistent(irg);
970         set_irg_doms_inconsistent(irg);
971         set_irg_loopinfo_inconsistent(irg);
972         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
973         set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
974
975         /* -- Check preconditions -- */
976         assert(is_Call(call));
977
978         /* here we know we WILL inline, so inform the statistics */
979         hook_inline(call, called_graph);
980
981         /* -- Decide how to handle exception control flow: Is there a handler
982            for the Call node, or do we branch directly to End on an exception?
983            exc_handling:
984            0 There is a handler.
985            2 Exception handling not represented in Firm. -- */
986         {
987                 ir_node *Xproj = NULL;
988                 ir_node *proj;
989                 for (proj = get_irn_link(call); proj; proj = get_irn_link(proj)) {
990                         long proj_nr = get_Proj_proj(proj);
991                         if (proj_nr == pn_Call_X_except) Xproj = proj;
992                 }
993                 exc_handling = Xproj != NULL ? exc_handler : exc_no_handler;
994         }
995
996         /* create the argument tuple */
997         NEW_ARR_A(ir_type *, args_in, n_params);
998
999         block = get_nodes_block(call);
1000         for (i = n_params - 1; i >= 0; --i) {
1001                 ir_node *arg      = get_Call_param(call, i);
1002                 ir_type *param_tp = get_method_param_type(mtp, i);
1003                 ir_mode *mode     = get_type_mode(param_tp);
1004
1005                 if (mode != get_irn_mode(arg)) {
1006                         arg = new_r_Conv(block, arg, mode);
1007                 }
1008                 args_in[i] = arg;
1009         }
1010
1011         /* --
1012            the procedure and later replaces the Start node of the called graph.
1013            Post_call is the old Call node and collects the results of the called
1014            graph. Both will end up being a tuple.  -- */
1015         post_bl = get_nodes_block(call);
1016         set_irg_current_block(irg, post_bl);
1017         /* XxMxPxPxPxT of Start + parameter of Call */
1018         in[pn_Start_X_initial_exec]   = new_Jmp();
1019         in[pn_Start_M]                = get_Call_mem(call);
1020         in[pn_Start_P_frame_base]     = get_irg_frame(irg);
1021         in[pn_Start_P_tls]            = get_irg_tls(irg);
1022         in[pn_Start_T_args]           = new_Tuple(n_params, args_in);
1023         pre_call = new_Tuple(pn_Start_max, in);
1024         post_call = call;
1025
1026         /* --
1027            The new block gets the ins of the old block, pre_call and all its
1028            predecessors and all Phi nodes. -- */
1029         part_block(pre_call);
1030
1031         /* -- Prepare state for dead node elimination -- */
1032         /* Visited flags in calling irg must be >= flag in called irg.
1033            Else walker and arity computation will not work. */
1034         if (get_irg_visited(irg) <= get_irg_visited(called_graph))
1035                 set_irg_visited(irg, get_irg_visited(called_graph) + 1);
1036         if (get_irg_block_visited(irg) < get_irg_block_visited(called_graph))
1037                 set_irg_block_visited(irg, get_irg_block_visited(called_graph));
1038         visited = get_irg_visited(irg);
1039
1040         /* Set pre_call as new Start node in link field of the start node of
1041            calling graph and pre_calls block as new block for the start block
1042            of calling graph.
1043            Further mark these nodes so that they are not visited by the
1044            copying. */
1045         set_irn_link(get_irg_start(called_graph), pre_call);
1046         set_irn_visited(get_irg_start(called_graph), visited);
1047         set_irn_link(get_irg_start_block(called_graph), get_nodes_block(pre_call));
1048         set_irn_visited(get_irg_start_block(called_graph), visited);
1049
1050         set_irn_link(get_irg_bad(called_graph), get_irg_bad(current_ir_graph));
1051         set_irn_visited(get_irg_bad(called_graph), visited);
1052
1053         set_irn_link(get_irg_no_mem(called_graph), get_irg_no_mem(current_ir_graph));
1054         set_irn_visited(get_irg_no_mem(called_graph), visited);
1055
1056         /* Initialize for compaction of in arrays */
1057         inc_irg_block_visited(irg);
1058
1059         /* -- Replicate local entities of the called_graph -- */
1060         /* copy the entities. */
1061         irp_reserve_resources(irp, IR_RESOURCE_ENTITY_LINK);
1062         called_frame = get_irg_frame_type(called_graph);
1063         curr_frame   = get_irg_frame_type(irg);
1064         for (i = 0, n = get_class_n_members(called_frame); i < n; ++i) {
1065                 ir_entity *new_ent, *old_ent;
1066                 old_ent = get_class_member(called_frame, i);
1067                 new_ent = copy_entity_own(old_ent, curr_frame);
1068                 set_entity_link(old_ent, new_ent);
1069         }
1070
1071         /* visited is > than that of called graph.  With this trick visited will
1072            remain unchanged so that an outer walker, e.g., searching the call nodes
1073             to inline, calling this inline will not visit the inlined nodes. */
1074         set_irg_visited(irg, get_irg_visited(irg)-1);
1075
1076         /* -- Performing dead node elimination inlines the graph -- */
1077         /* Copies the nodes to the obstack of current_ir_graph. Updates links to new
1078            entities. */
1079         irg_walk(get_irg_end(called_graph), copy_node_inline, copy_preds_inline,
1080                  get_irg_frame_type(called_graph));
1081
1082         irp_free_resources(irp, IR_RESOURCE_ENTITY_LINK);
1083
1084         /* Repair called_graph */
1085         set_irg_visited(called_graph, get_irg_visited(irg));
1086         set_irg_block_visited(called_graph, get_irg_block_visited(irg));
1087         set_Block_block_visited(get_irg_start_block(called_graph), 0);
1088
1089         /* -- Merge the end of the inlined procedure with the call site -- */
1090         /* We will turn the old Call node into a Tuple with the following
1091            predecessors:
1092            -1:  Block of Tuple.
1093            0: Phi of all Memories of Return statements.
1094            1: Jmp from new Block that merges the control flow from all exception
1095            predecessors of the old end block.
1096            2: Tuple of all arguments.
1097            3: Phi of Exception memories.
1098            In case the old Call directly branches to End on an exception we don't
1099            need the block merging all exceptions nor the Phi of the exception
1100            memories.
1101         */
1102
1103         /* -- Precompute some values -- */
1104         end_bl = get_new_node(get_irg_end_block(called_graph));
1105         end = get_new_node(get_irg_end(called_graph));
1106         arity = get_Block_n_cfgpreds(end_bl);    /* arity = n_exc + n_ret  */
1107         n_res = get_method_n_ress(get_Call_type(call));
1108
1109         res_pred = XMALLOCN(ir_node*, n_res);
1110         cf_pred  = XMALLOCN(ir_node*, arity);
1111
1112         set_irg_current_block(irg, post_bl); /* just to make sure */
1113
1114         /* -- archive keepalives -- */
1115         irn_arity = get_irn_arity(end);
1116         for (i = 0; i < irn_arity; i++) {
1117                 ir_node *ka = get_End_keepalive(end, i);
1118                 if (! is_Bad(ka))
1119                         add_End_keepalive(get_irg_end(irg), ka);
1120         }
1121
1122         /* The new end node will die.  We need not free as the in array is on the obstack:
1123            copy_node() only generated 'D' arrays. */
1124
1125         /* -- Replace Return nodes by Jump nodes. -- */
1126         n_ret = 0;
1127         for (i = 0; i < arity; i++) {
1128                 ir_node *ret;
1129                 ret = get_Block_cfgpred(end_bl, i);
1130                 if (is_Return(ret)) {
1131                         cf_pred[n_ret] = new_r_Jmp(get_nodes_block(ret));
1132                         n_ret++;
1133                 }
1134         }
1135         set_irn_in(post_bl, n_ret, cf_pred);
1136
1137         /* -- Build a Tuple for all results of the method.
1138            Add Phi node if there was more than one Return.  -- */
1139         turn_into_tuple(post_call, pn_Call_max);
1140         /* First the Memory-Phi */
1141         n_mem_phi = 0;
1142         for (i = 0; i < arity; i++) {
1143                 ret = get_Block_cfgpred(end_bl, i);
1144                 if (is_Return(ret)) {
1145                         cf_pred[n_mem_phi++] = get_Return_mem(ret);
1146                 }
1147                 /* memory output for some exceptions is directly connected to End */
1148                 if (is_Call(ret)) {
1149                         cf_pred[n_mem_phi++] = new_r_Proj(get_nodes_block(ret), ret, mode_M, 3);
1150                 } else if (is_fragile_op(ret)) {
1151                         /* We rely that all cfops have the memory output at the same position. */
1152                         cf_pred[n_mem_phi++] = new_r_Proj(get_nodes_block(ret), ret, mode_M, 0);
1153                 } else if (is_Raise(ret)) {
1154                         cf_pred[n_mem_phi++] = new_r_Proj(get_nodes_block(ret), ret, mode_M, 1);
1155                 }
1156         }
1157         phi = new_Phi(n_mem_phi, cf_pred, mode_M);
1158         set_Tuple_pred(call, pn_Call_M, phi);
1159         /* Conserve Phi-list for further inlinings -- but might be optimized */
1160         if (get_nodes_block(phi) == post_bl) {
1161                 set_irn_link(phi, get_irn_link(post_bl));
1162                 set_irn_link(post_bl, phi);
1163         }
1164         /* Now the real results */
1165         if (n_res > 0) {
1166                 for (j = 0; j < n_res; j++) {
1167                         ir_type *res_type = get_method_res_type(ctp, j);
1168                         ir_mode *res_mode = get_type_mode(res_type);
1169                         n_ret = 0;
1170                         for (i = 0; i < arity; i++) {
1171                                 ret = get_Block_cfgpred(end_bl, i);
1172                                 if (is_Return(ret)) {
1173                                         ir_node *res = get_Return_res(ret, j);
1174                                         if (get_irn_mode(res) != res_mode) {
1175                                                 ir_node *block = get_nodes_block(res);
1176                                                 res = new_r_Conv(block, res, res_mode);
1177                                         }
1178                                         cf_pred[n_ret] = res;
1179                                         n_ret++;
1180                                 }
1181                         }
1182                         if (n_ret > 0)
1183                                 phi = new_Phi(n_ret, cf_pred, get_irn_mode(cf_pred[0]));
1184                         else
1185                                 phi = new_Bad();
1186                         res_pred[j] = phi;
1187                         /* Conserve Phi-list for further inlinings -- but might be optimized */
1188                         if (get_nodes_block(phi) == post_bl) {
1189                                 set_Phi_next(phi, get_Block_phis(post_bl));
1190                                 set_Block_phis(post_bl, phi);
1191                         }
1192                 }
1193                 set_Tuple_pred(call, pn_Call_T_result, new_Tuple(n_res, res_pred));
1194         } else {
1195                 set_Tuple_pred(call, pn_Call_T_result, new_Bad());
1196         }
1197         /* handle the regular call */
1198         set_Tuple_pred(call, pn_Call_X_regular, new_Jmp());
1199
1200         /* For now, we cannot inline calls with value_base */
1201         set_Tuple_pred(call, pn_Call_P_value_res_base, new_Bad());
1202
1203         /* Finally the exception control flow.
1204            We have two possible situations:
1205            First if the Call branches to an exception handler:
1206            We need to add a Phi node to
1207            collect the memory containing the exception objects.  Further we need
1208            to add another block to get a correct representation of this Phi.  To
1209            this block we add a Jmp that resolves into the X output of the Call
1210            when the Call is turned into a tuple.
1211            Second: There is no exception edge. Just add all inlined exception
1212            branches to the End node.
1213          */
1214         if (exc_handling == exc_handler) {
1215                 n_exc = 0;
1216                 for (i = 0; i < arity; i++) {
1217                         ir_node *ret, *irn;
1218                         ret = get_Block_cfgpred(end_bl, i);
1219                         irn = skip_Proj(ret);
1220                         if (is_fragile_op(irn) || is_Raise(irn)) {
1221                                 cf_pred[n_exc] = ret;
1222                                 ++n_exc;
1223                         }
1224                 }
1225                 if (n_exc > 0) {
1226                         if (n_exc == 1) {
1227                                 /* simple fix */
1228                                 set_Tuple_pred(call, pn_Call_X_except, cf_pred[0]);
1229                         } else {
1230                                 ir_node *block = new_Block(n_exc, cf_pred);
1231                                 set_cur_block(block);
1232                                 set_Tuple_pred(call, pn_Call_X_except, new_Jmp());
1233                         }
1234                 } else {
1235                         set_Tuple_pred(call, pn_Call_X_except, new_Bad());
1236                 }
1237         } else {
1238                 ir_node *main_end_bl;
1239                 int main_end_bl_arity;
1240                 ir_node **end_preds;
1241
1242                 /* assert(exc_handling == 1 || no exceptions. ) */
1243                 n_exc = 0;
1244                 for (i = 0; i < arity; i++) {
1245                         ir_node *ret = get_Block_cfgpred(end_bl, i);
1246                         ir_node *irn = skip_Proj(ret);
1247
1248                         if (is_fragile_op(irn) || is_Raise(irn)) {
1249                                 cf_pred[n_exc] = ret;
1250                                 n_exc++;
1251                         }
1252                 }
1253                 main_end_bl       = get_irg_end_block(irg);
1254                 main_end_bl_arity = get_irn_arity(main_end_bl);
1255                 end_preds         = XMALLOCN(ir_node*, n_exc + main_end_bl_arity);
1256
1257                 for (i = 0; i < main_end_bl_arity; ++i)
1258                         end_preds[i] = get_irn_n(main_end_bl, i);
1259                 for (i = 0; i < n_exc; ++i)
1260                         end_preds[main_end_bl_arity + i] = cf_pred[i];
1261                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
1262                 set_Tuple_pred(call, pn_Call_X_except, new_Bad());
1263                 free(end_preds);
1264         }
1265         free(res_pred);
1266         free(cf_pred);
1267
1268         /* --  Turn CSE back on. -- */
1269         set_optimize(rem_opt);
1270         current_ir_graph = rem;
1271
1272         return 1;
1273 }
1274
1275 /********************************************************************/
1276 /* Apply inlining to small methods.                                 */
1277 /********************************************************************/
1278
1279 static struct obstack  temp_obst;
1280
1281 /** Represents a possible inlinable call in a graph. */
1282 typedef struct _call_entry {
1283         ir_node    *call;       /**< The Call node. */
1284         ir_graph   *callee;     /**< The callee IR-graph. */
1285         list_head  list;        /**< List head for linking the next one. */
1286         int        loop_depth;  /**< The loop depth of this call. */
1287         int        benefice;    /**< The calculated benefice of this call. */
1288         unsigned   local_adr:1; /**< Set if this call gets an address of a local variable. */
1289         unsigned   all_const:1; /**< Set if this call has only constant parameters. */
1290 } call_entry;
1291
1292 /**
1293  * environment for inlining small irgs
1294  */
1295 typedef struct _inline_env_t {
1296         struct obstack obst;  /**< An obstack where call_entries are allocated on. */
1297         list_head      calls; /**< The call entry list. */
1298 } inline_env_t;
1299
1300 /**
1301  * Returns the irg called from a Call node. If the irg is not
1302  * known, NULL is returned.
1303  *
1304  * @param call  the call node
1305  */
1306 static ir_graph *get_call_called_irg(ir_node *call)
1307 {
1308         ir_node *addr;
1309
1310         addr = get_Call_ptr(call);
1311         if (is_Global(addr)) {
1312                 ir_entity *ent = get_Global_entity(addr);
1313                 /* we don't know which function gets finally bound to a weak symbol */
1314                 if (get_entity_linkage(ent) & IR_LINKAGE_WEAK)
1315                         return NULL;
1316
1317                 return get_entity_irg(ent);
1318         }
1319
1320         return NULL;
1321 }
1322
1323 /**
1324  * Walker: Collect all calls to known graphs inside a graph.
1325  */
1326 static void collect_calls(ir_node *call, void *env)
1327 {
1328         (void) env;
1329         if (is_Call(call)) {
1330                 ir_graph *called_irg = get_call_called_irg(call);
1331
1332                 if (called_irg != NULL) {
1333                         /* The Call node calls a locally defined method.  Remember to inline. */
1334                         inline_env_t *ienv  = env;
1335                         call_entry   *entry = OALLOC(&ienv->obst, call_entry);
1336                         entry->call       = call;
1337                         entry->callee     = called_irg;
1338                         entry->loop_depth = 0;
1339                         entry->benefice   = 0;
1340                         entry->local_adr  = 0;
1341                         entry->all_const  = 0;
1342
1343                         list_add_tail(&entry->list, &ienv->calls);
1344                 }
1345         }
1346 }
1347
1348 /**
1349  * Inlines all small methods at call sites where the called address comes
1350  * from a Const node that references the entity representing the called
1351  * method.
1352  * The size argument is a rough measure for the code size of the method:
1353  * Methods where the obstack containing the firm graph is smaller than
1354  * size are inlined.
1355  */
1356 void inline_small_irgs(ir_graph *irg, int size)
1357 {
1358         ir_graph *rem = current_ir_graph;
1359         inline_env_t env;
1360         call_entry *entry;
1361
1362         current_ir_graph = irg;
1363         /* Handle graph state */
1364         assert(get_irg_phase_state(irg) != phase_building);
1365         free_callee_info(irg);
1366
1367         /* Find Call nodes to inline.
1368            (We can not inline during a walk of the graph, as inlining the same
1369            method several times changes the visited flag of the walked graph:
1370            after the first inlining visited of the callee equals visited of
1371            the caller.  With the next inlining both are increased.) */
1372         obstack_init(&env.obst);
1373         INIT_LIST_HEAD(&env.calls);
1374         irg_walk_graph(irg, NULL, collect_calls, &env);
1375
1376         if (! list_empty(&env.calls)) {
1377                 /* There are calls to inline */
1378                 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1379                 collect_phiprojs(irg);
1380
1381                 list_for_each_entry(call_entry, entry, &env.calls, list) {
1382                         ir_graph            *callee = entry->callee;
1383                         irg_inline_property prop    = get_irg_inline_property(callee);
1384
1385                         if (prop == irg_inline_forbidden) {
1386                                 continue;
1387                         }
1388
1389                         if (prop >= irg_inline_forced ||
1390                             _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
1391                                 inline_method(entry->call, callee);
1392                         }
1393                 }
1394                 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1395         }
1396         obstack_free(&env.obst, NULL);
1397         current_ir_graph = rem;
1398 }
1399
1400 struct inline_small_irgs_pass_t {
1401         ir_graph_pass_t pass;
1402         int            size;
1403 };
1404
1405 /**
1406  * Wrapper to run inline_small_irgs() as a pass.
1407  */
1408 static int inline_small_irgs_wrapper(ir_graph *irg, void *context)
1409 {
1410         struct inline_small_irgs_pass_t *pass = context;
1411
1412         inline_small_irgs(irg, pass->size);
1413         return 0;
1414 }
1415
1416 /* create a pass for inline_small_irgs() */
1417 ir_graph_pass_t *inline_small_irgs_pass(const char *name, int size)
1418 {
1419         struct inline_small_irgs_pass_t *pass =
1420                 XMALLOCZ(struct inline_small_irgs_pass_t);
1421
1422         pass->size = size;
1423         return def_graph_pass_constructor(
1424                 &pass->pass, name ? name : "inline_small_irgs", inline_small_irgs_wrapper);
1425 }
1426
1427 /**
1428  * Environment for inlining irgs.
1429  */
1430 typedef struct {
1431         list_head calls;             /**< List of of all call nodes in this graph. */
1432         unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
1433         unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
1434         unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
1435         unsigned  n_nodes_orig;      /**< for statistics */
1436         unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
1437         unsigned  n_call_nodes_orig; /**< for statistics */
1438         unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
1439         unsigned  n_callers_orig;    /**< for statistics */
1440         unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
1441         unsigned  recursive:1;       /**< Set, if this function is self recursive. */
1442 } inline_irg_env;
1443
1444 /**
1445  * Allocate a new environment for inlining.
1446  */
1447 static inline_irg_env *alloc_inline_irg_env(void)
1448 {
1449         inline_irg_env *env    = OALLOC(&temp_obst, inline_irg_env);
1450         INIT_LIST_HEAD(&env->calls);
1451         env->local_weights     = NULL;
1452         env->n_nodes           = -2; /* do not count count Start, End */
1453         env->n_blocks          = -2; /* do not count count Start, End Block */
1454         env->n_nodes_orig      = -2; /* do not count Start, End */
1455         env->n_call_nodes      = 0;
1456         env->n_call_nodes_orig = 0;
1457         env->n_callers         = 0;
1458         env->n_callers_orig    = 0;
1459         env->got_inline        = 0;
1460         env->recursive         = 0;
1461         return env;
1462 }
1463
1464 typedef struct walker_env {
1465         inline_irg_env *x;     /**< the inline environment */
1466         char ignore_runtime;   /**< the ignore runtime flag */
1467         char ignore_callers;   /**< if set, do change callers data */
1468 } wenv_t;
1469
1470 /**
1471  * post-walker: collect all calls in the inline-environment
1472  * of a graph and sum some statistics.
1473  */
1474 static void collect_calls2(ir_node *call, void *ctx)
1475 {
1476         wenv_t         *env = ctx;
1477         inline_irg_env *x = env->x;
1478         ir_opcode      code = get_irn_opcode(call);
1479         ir_graph       *callee;
1480         call_entry     *entry;
1481
1482         /* count meaningful nodes in irg */
1483         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
1484                 if (code != iro_Block) {
1485                         ++x->n_nodes;
1486                         ++x->n_nodes_orig;
1487                 } else {
1488                         ++x->n_blocks;
1489                 }
1490         }
1491
1492         if (code != iro_Call) return;
1493
1494         /* check, if it's a runtime call */
1495         if (env->ignore_runtime) {
1496                 ir_node *symc = get_Call_ptr(call);
1497
1498                 if (is_Global(symc)) {
1499                         ir_entity *ent = get_Global_entity(symc);
1500
1501                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
1502                                 return;
1503                 }
1504         }
1505
1506         /* collect all call nodes */
1507         ++x->n_call_nodes;
1508         ++x->n_call_nodes_orig;
1509
1510         callee = get_call_called_irg(call);
1511         if (callee != NULL) {
1512                 if (! env->ignore_callers) {
1513                         inline_irg_env *callee_env = get_irg_link(callee);
1514                         /* count all static callers */
1515                         ++callee_env->n_callers;
1516                         ++callee_env->n_callers_orig;
1517                 }
1518                 if (callee == current_ir_graph)
1519                         x->recursive = 1;
1520
1521                 /* link it in the list of possible inlinable entries */
1522                 entry = OALLOC(&temp_obst, call_entry);
1523                 entry->call       = call;
1524                 entry->callee     = callee;
1525                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
1526                 entry->benefice   = 0;
1527                 entry->local_adr  = 0;
1528                 entry->all_const  = 0;
1529
1530                 list_add_tail(&entry->list, &x->calls);
1531         }
1532 }
1533
1534 /**
1535  * Returns TRUE if the number of callers is 0 in the irg's environment,
1536  * hence this irg is a leave.
1537  */
1538 inline static int is_leave(ir_graph *irg)
1539 {
1540         inline_irg_env *env = get_irg_link(irg);
1541         return env->n_call_nodes == 0;
1542 }
1543
1544 /**
1545  * Returns TRUE if the number of nodes in the callee is
1546  * smaller then size in the irg's environment.
1547  */
1548 inline static int is_smaller(ir_graph *callee, unsigned size)
1549 {
1550         inline_irg_env *env = get_irg_link(callee);
1551         return env->n_nodes < size;
1552 }
1553
1554 /**
1555  * Duplicate a call entry.
1556  *
1557  * @param entry     the original entry to duplicate
1558  * @param new_call  the new call node
1559  * @param loop_depth_delta
1560  *                  delta value for the loop depth
1561  */
1562 static call_entry *duplicate_call_entry(const call_entry *entry,
1563                                         ir_node *new_call, int loop_depth_delta) {
1564         call_entry *nentry = OALLOC(&temp_obst, call_entry);
1565         nentry->call       = new_call;
1566         nentry->callee     = entry->callee;
1567         nentry->benefice   = entry->benefice;
1568         nentry->loop_depth = entry->loop_depth + loop_depth_delta;
1569         nentry->local_adr  = entry->local_adr;
1570         nentry->all_const  = entry->all_const;
1571
1572         return nentry;
1573 }
1574
1575 /**
1576  * Append all call nodes of the source environment to the nodes of in the destination
1577  * environment.
1578  *
1579  * @param dst         destination environment
1580  * @param src         source environment
1581  * @param loop_depth  the loop depth of the call that is replaced by the src list
1582  */
1583 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth)
1584 {
1585         call_entry *entry, *nentry;
1586
1587         /* Note that the src list points to Call nodes in the inlined graph, but
1588            we need Call nodes in our graph. Luckily the inliner leaves this information
1589            in the link field. */
1590         list_for_each_entry(call_entry, entry, &src->calls, list) {
1591                 nentry = duplicate_call_entry(entry, get_irn_link(entry->call), loop_depth);
1592                 list_add_tail(&nentry->list, &dst->calls);
1593         }
1594         dst->n_call_nodes += src->n_call_nodes;
1595         dst->n_nodes      += src->n_nodes;
1596 }
1597
1598 /*
1599  * Inlines small leave methods at call sites where the called address comes
1600  * from a Const node that references the entity representing the called
1601  * method.
1602  * The size argument is a rough measure for the code size of the method:
1603  * Methods where the obstack containing the firm graph is smaller than
1604  * size are inlined.
1605  */
1606 void inline_leave_functions(unsigned maxsize, unsigned leavesize,
1607                             unsigned size, int ignore_runtime)
1608 {
1609         inline_irg_env   *env;
1610         ir_graph         *irg;
1611         int              i, n_irgs;
1612         ir_graph         *rem;
1613         int              did_inline;
1614         wenv_t           wenv;
1615         call_entry       *entry, *next;
1616         const call_entry *centry;
1617         pmap             *copied_graphs;
1618         pmap_entry       *pm_entry;
1619
1620         rem = current_ir_graph;
1621         obstack_init(&temp_obst);
1622
1623         /* a map for the copied graphs, used to inline recursive calls */
1624         copied_graphs = pmap_create();
1625
1626         /* extend all irgs by a temporary data structure for inlining. */
1627         n_irgs = get_irp_n_irgs();
1628         for (i = 0; i < n_irgs; ++i)
1629                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
1630
1631         /* Pre-compute information in temporary data structure. */
1632         wenv.ignore_runtime = ignore_runtime;
1633         wenv.ignore_callers = 0;
1634         for (i = 0; i < n_irgs; ++i) {
1635                 ir_graph *irg = get_irp_irg(i);
1636
1637                 assert(get_irg_phase_state(irg) != phase_building);
1638                 free_callee_info(irg);
1639
1640                 assure_cf_loop(irg);
1641                 wenv.x = get_irg_link(irg);
1642                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1643         }
1644
1645         /* -- and now inline. -- */
1646
1647         /* Inline leaves recursively -- we might construct new leaves. */
1648         do {
1649                 did_inline = 0;
1650
1651                 for (i = 0; i < n_irgs; ++i) {
1652                         ir_node *call;
1653                         int phiproj_computed = 0;
1654
1655                         current_ir_graph = get_irp_irg(i);
1656                         env              = get_irg_link(current_ir_graph);
1657
1658                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1659                         list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1660                                 ir_graph            *callee;
1661                                 irg_inline_property  prop;
1662
1663                                 if (env->n_nodes > maxsize)
1664                                         break;
1665
1666                                 call   = entry->call;
1667                                 callee = entry->callee;
1668
1669                                 prop = get_irg_inline_property(callee);
1670                                 if (prop == irg_inline_forbidden) {
1671                                         continue;
1672                                 }
1673
1674                                 if (is_leave(callee) && (
1675                                     is_smaller(callee, leavesize) || prop >= irg_inline_forced)) {
1676                                         if (!phiproj_computed) {
1677                                                 phiproj_computed = 1;
1678                                                 collect_phiprojs(current_ir_graph);
1679                                         }
1680                                         did_inline = inline_method(call, callee);
1681
1682                                         if (did_inline) {
1683                                                 inline_irg_env *callee_env = get_irg_link(callee);
1684
1685                                                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1686                                                 phiproj_computed = 0;
1687
1688                                                 /* Do some statistics */
1689                                                 env->got_inline = 1;
1690                                                 --env->n_call_nodes;
1691                                                 env->n_nodes += callee_env->n_nodes;
1692                                                 --callee_env->n_callers;
1693
1694                                                 /* remove this call from the list */
1695                                                 list_del(&entry->list);
1696                                                 continue;
1697                                         }
1698                                 }
1699                         }
1700                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1701                 }
1702         } while (did_inline);
1703
1704         /* inline other small functions. */
1705         for (i = 0; i < n_irgs; ++i) {
1706                 ir_node *call;
1707                 int phiproj_computed = 0;
1708
1709                 current_ir_graph = get_irp_irg(i);
1710                 env              = get_irg_link(current_ir_graph);
1711
1712                 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1713
1714                 /* note that the list of possible calls is updated during the process */
1715                 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1716                         irg_inline_property prop;
1717                         ir_graph            *callee;
1718                         pmap_entry          *e;
1719
1720                         call   = entry->call;
1721                         callee = entry->callee;
1722
1723                         prop = get_irg_inline_property(callee);
1724                         if (prop == irg_inline_forbidden) {
1725                                 continue;
1726                         }
1727
1728                         e = pmap_find(copied_graphs, callee);
1729                         if (e != NULL) {
1730                                 /*
1731                                  * Remap callee if we have a copy.
1732                                  * FIXME: Should we do this only for recursive Calls ?
1733                                  */
1734                                 callee = e->value;
1735                         }
1736
1737                         if (prop >= irg_inline_forced ||
1738                             (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1739                                 if (current_ir_graph == callee) {
1740                                         /*
1741                                          * Recursive call: we cannot directly inline because we cannot walk
1742                                          * the graph and change it. So we have to make a copy of the graph
1743                                          * first.
1744                                          */
1745
1746                                         inline_irg_env *callee_env;
1747                                         ir_graph       *copy;
1748
1749                                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1750
1751                                         /*
1752                                          * No copy yet, create one.
1753                                          * Note that recursive methods are never leaves, so it is sufficient
1754                                          * to test this condition here.
1755                                          */
1756                                         copy = create_irg_copy(callee);
1757
1758                                         /* create_irg_copy() destroys the Proj links, recompute them */
1759                                         phiproj_computed = 0;
1760
1761                                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1762
1763                                         /* allocate new environment */
1764                                         callee_env = alloc_inline_irg_env();
1765                                         set_irg_link(copy, callee_env);
1766
1767                                         assure_cf_loop(copy);
1768                                         wenv.x              = callee_env;
1769                                         wenv.ignore_callers = 1;
1770                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1771
1772                                         /*
1773                                          * Enter the entity of the original graph. This is needed
1774                                          * for inline_method(). However, note that ent->irg still points
1775                                          * to callee, NOT to copy.
1776                                          */
1777                                         set_irg_entity(copy, get_irg_entity(callee));
1778
1779                                         pmap_insert(copied_graphs, callee, copy);
1780                                         callee = copy;
1781
1782                                         /* we have only one caller: the original graph */
1783                                         callee_env->n_callers      = 1;
1784                                         callee_env->n_callers_orig = 1;
1785                                 }
1786                                 if (! phiproj_computed) {
1787                                         phiproj_computed = 1;
1788                                         collect_phiprojs(current_ir_graph);
1789                                 }
1790                                 did_inline = inline_method(call, callee);
1791                                 if (did_inline) {
1792                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1793
1794                                         /* call was inlined, Phi/Projs for current graph must be recomputed */
1795                                         phiproj_computed = 0;
1796
1797                                         /* callee was inline. Append it's call list. */
1798                                         env->got_inline = 1;
1799                                         --env->n_call_nodes;
1800                                         append_call_list(env, callee_env, entry->loop_depth);
1801                                         --callee_env->n_callers;
1802
1803                                         /* after we have inlined callee, all called methods inside callee
1804                                            are now called once more */
1805                                         list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1806                                                 inline_irg_env *penv = get_irg_link(centry->callee);
1807                                                 ++penv->n_callers;
1808                                         }
1809
1810                                         /* remove this call from the list */
1811                                         list_del(&entry->list);
1812                                         continue;
1813                                 }
1814                         }
1815                 }
1816                 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1817         }
1818
1819         for (i = 0; i < n_irgs; ++i) {
1820                 irg = get_irp_irg(i);
1821                 env = get_irg_link(irg);
1822
1823                 if (env->got_inline) {
1824                         optimize_graph_df(irg);
1825                         optimize_cf(irg);
1826                 }
1827                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1828                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1829                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1830                         env->n_callers_orig, env->n_callers,
1831                         get_entity_name(get_irg_entity(irg))));
1832                 }
1833         }
1834
1835         /* kill the copied graphs: we don't need them anymore */
1836         foreach_pmap(copied_graphs, pm_entry) {
1837                 ir_graph *copy = pm_entry->value;
1838
1839                 /* reset the entity, otherwise it will be deleted in the next step ... */
1840                 set_irg_entity(copy, NULL);
1841                 free_ir_graph(copy);
1842         }
1843         pmap_destroy(copied_graphs);
1844
1845         obstack_free(&temp_obst, NULL);
1846         current_ir_graph = rem;
1847 }
1848
1849 struct inline_leave_functions_pass_t {
1850         ir_prog_pass_t pass;
1851         unsigned       maxsize;
1852         unsigned       leavesize;
1853         unsigned       size;
1854         int            ignore_runtime;
1855 };
1856
1857 /**
1858  * Wrapper to run inline_leave_functions() as a ir_prog pass.
1859  */
1860 static int inline_leave_functions_wrapper(ir_prog *irp, void *context)
1861 {
1862         struct inline_leave_functions_pass_t *pass = context;
1863
1864         (void)irp;
1865         inline_leave_functions(
1866                 pass->maxsize, pass->leavesize,
1867                 pass->size, pass->ignore_runtime);
1868         return 0;
1869 }
1870
1871 /* create a pass for inline_leave_functions() */
1872 ir_prog_pass_t *inline_leave_functions_pass(
1873         const char *name, unsigned maxsize, unsigned leavesize,
1874         unsigned size, int ignore_runtime) {
1875         struct inline_leave_functions_pass_t *pass =
1876                 XMALLOCZ(struct inline_leave_functions_pass_t);
1877
1878         pass->maxsize        = maxsize;
1879         pass->leavesize      = leavesize;
1880         pass->size           = size;
1881         pass->ignore_runtime = ignore_runtime;
1882
1883         return def_prog_pass_constructor(
1884                 &pass->pass,
1885                 name ? name : "inline_leave_functions",
1886                 inline_leave_functions_wrapper);
1887 }
1888
1889 /**
1890  * Calculate the parameter weights for transmitting the address of a local variable.
1891  */
1892 static unsigned calc_method_local_weight(ir_node *arg)
1893 {
1894         int      i, j, k;
1895         unsigned v, weight = 0;
1896
1897         for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
1898                 ir_node *succ = get_irn_out(arg, i);
1899
1900                 switch (get_irn_opcode(succ)) {
1901                 case iro_Load:
1902                 case iro_Store:
1903                         /* Loads and Store can be removed */
1904                         weight += 3;
1905                         break;
1906                 case iro_Sel:
1907                         /* check if all args are constant */
1908                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1909                                 ir_node *idx = get_Sel_index(succ, j);
1910                                 if (! is_Const(idx))
1911                                         return 0;
1912                         }
1913                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1914                            some unsupported node. */
1915                         v = calc_method_local_weight(succ);
1916                         if (v == 0)
1917                                 return 0;
1918                         /* we can kill one Sel with constant indexes, this is cheap */
1919                         weight += v + 1;
1920                         break;
1921                 case iro_Id:
1922                         /* when looking backward we might find Id nodes */
1923                         weight += calc_method_local_weight(succ);
1924                         break;
1925                 case iro_Tuple:
1926                         /* unoptimized tuple */
1927                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1928                                 ir_node *pred = get_Tuple_pred(succ, j);
1929                                 if (pred == arg) {
1930                                         /* look for Proj(j) */
1931                                         for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
1932                                                 ir_node *succ_succ = get_irn_out(succ, k);
1933                                                 if (is_Proj(succ_succ)) {
1934                                                         if (get_Proj_proj(succ_succ) == j) {
1935                                                                 /* found */
1936                                                                 weight += calc_method_local_weight(succ_succ);
1937                                                         }
1938                                                 } else {
1939                                                         /* this should NOT happen */
1940                                                         return 0;
1941                                                 }
1942                                         }
1943                                 }
1944                         }
1945                         break;
1946                 default:
1947                         /* any other node: unsupported yet or bad. */
1948                         return 0;
1949                 }
1950         }
1951         return weight;
1952 }
1953
1954 /**
1955  * Calculate the parameter weights for transmitting the address of a local variable.
1956  */
1957 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg)
1958 {
1959         ir_entity *ent = get_irg_entity(irg);
1960         ir_type  *mtp;
1961         int      nparams, i, proj_nr;
1962         ir_node  *irg_args, *arg;
1963
1964         mtp      = get_entity_type(ent);
1965         nparams  = get_method_n_params(mtp);
1966
1967         /* allocate a new array. currently used as 'analysed' flag */
1968         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1969
1970         /* If the method haven't parameters we have nothing to do. */
1971         if (nparams <= 0)
1972                 return;
1973
1974         assure_irg_outs(irg);
1975         irg_args = get_irg_args(irg);
1976         for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
1977                 arg     = get_irn_out(irg_args, i);
1978                 proj_nr = get_Proj_proj(arg);
1979                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1980         }
1981 }
1982
1983 /**
1984  * Calculate the benefice for transmitting an local variable address.
1985  * After inlining, the local variable might be transformed into a
1986  * SSA variable by scalar_replacement().
1987  */
1988 static unsigned get_method_local_adress_weight(ir_graph *callee, int pos)
1989 {
1990         inline_irg_env *env = get_irg_link(callee);
1991
1992         if (env->local_weights != NULL) {
1993                 if (pos < ARR_LEN(env->local_weights))
1994                         return env->local_weights[pos];
1995                 return 0;
1996         }
1997
1998         analyze_irg_local_weights(env, callee);
1999
2000         if (pos < ARR_LEN(env->local_weights))
2001                 return env->local_weights[pos];
2002         return 0;
2003 }
2004
2005 /**
2006  * Calculate a benefice value for inlining the given call.
2007  *
2008  * @param call       the call node we have to inspect
2009  * @param callee     the called graph
2010  */
2011 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
2012 {
2013         ir_node   *call = entry->call;
2014         ir_entity *ent  = get_irg_entity(callee);
2015         ir_node   *frame_ptr;
2016         ir_type   *mtp;
2017         int       weight = 0;
2018         int       i, n_params, all_const;
2019         unsigned  cc, v;
2020         irg_inline_property prop;
2021
2022         inline_irg_env *callee_env;
2023
2024         prop = get_irg_inline_property(callee);
2025         if (prop == irg_inline_forbidden) {
2026                 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
2027                     call, callee));
2028                 return entry->benefice = INT_MIN;
2029         }
2030
2031         if (get_irg_additional_properties(callee) & mtp_property_noreturn) {
2032                 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
2033                     call, callee));
2034                 return entry->benefice = INT_MIN;
2035         }
2036
2037         /* costs for every passed parameter */
2038         n_params = get_Call_n_params(call);
2039         mtp      = get_entity_type(ent);
2040         cc       = get_method_calling_convention(mtp);
2041         if (cc & cc_reg_param) {
2042                 /* register parameter, smaller costs for register parameters */
2043                 int max_regs = cc & ~cc_bits;
2044
2045                 if (max_regs < n_params)
2046                         weight += max_regs * 2 + (n_params - max_regs) * 5;
2047                 else
2048                         weight += n_params * 2;
2049         } else {
2050                 /* parameters are passed an stack */
2051                 weight += 5 * n_params;
2052         }
2053
2054         /* constant parameters improve the benefice */
2055         frame_ptr = get_irg_frame(current_ir_graph);
2056         all_const = 1;
2057         for (i = 0; i < n_params; ++i) {
2058                 ir_node *param = get_Call_param(call, i);
2059
2060                 if (is_Const(param)) {
2061                         weight += get_method_param_weight(ent, i);
2062                 } else {
2063                         all_const = 0;
2064                         if (is_SymConst(param))
2065                                 weight += get_method_param_weight(ent, i);
2066                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
2067                                 /*
2068                                  * An address of a local variable is transmitted. After
2069                                  * inlining, scalar_replacement might be able to remove the
2070                                  * local variable, so honor this.
2071                                  */
2072                                 v = get_method_local_adress_weight(callee, i);
2073                                 weight += v;
2074                                 if (v > 0)
2075                                         entry->local_adr = 1;
2076                         }
2077                 }
2078         }
2079         entry->all_const = all_const;
2080
2081         callee_env = get_irg_link(callee);
2082         if (callee_env->n_callers == 1 &&
2083             callee != current_ir_graph &&
2084             !entity_is_externally_visible(ent)) {
2085                 weight += 700;
2086         }
2087
2088         /* give a bonus for functions with one block */
2089         if (callee_env->n_blocks == 1)
2090                 weight = weight * 3 / 2;
2091
2092         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
2093         if (callee_env->n_nodes < 30 && !callee_env->recursive)
2094                 weight += 2000;
2095
2096         /* and finally for leaves: they do not increase the register pressure
2097            because of callee safe registers */
2098         if (callee_env->n_call_nodes == 0)
2099                 weight += 400;
2100
2101         /** it's important to inline inner loops first */
2102         if (entry->loop_depth > 30)
2103                 weight += 30 * 1024;
2104         else
2105                 weight += entry->loop_depth * 1024;
2106
2107         /*
2108          * All arguments constant is probably a good sign, give an extra bonus
2109          */
2110         if (all_const)
2111                 weight += 1024;
2112
2113         return entry->benefice = weight;
2114 }
2115
2116 static ir_graph **irgs;
2117 static int      last_irg;
2118
2119 /**
2120  * Callgraph walker, collect all visited graphs.
2121  */
2122 static void callgraph_walker(ir_graph *irg, void *data)
2123 {
2124         (void) data;
2125         irgs[last_irg++] = irg;
2126 }
2127
2128 /**
2129  * Creates an inline order for all graphs.
2130  *
2131  * @return the list of graphs.
2132  */
2133 static ir_graph **create_irg_list(void)
2134 {
2135         ir_entity **free_methods;
2136         int       arr_len;
2137         int       n_irgs = get_irp_n_irgs();
2138
2139         cgana(&arr_len, &free_methods);
2140         xfree(free_methods);
2141
2142         compute_callgraph();
2143
2144         last_irg = 0;
2145         irgs     = XMALLOCNZ(ir_graph*, n_irgs);
2146
2147         callgraph_walk(NULL, callgraph_walker, NULL);
2148         assert(n_irgs == last_irg);
2149
2150         return irgs;
2151 }
2152
2153 /**
2154  * Push a call onto the priority list if its benefice is big enough.
2155  *
2156  * @param pqueue   the priority queue of calls
2157  * @param call     the call entry
2158  * @param inlien_threshold
2159  *                 the threshold value
2160  */
2161 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
2162                             int inline_threshold)
2163 {
2164         ir_graph            *callee  = call->callee;
2165         irg_inline_property prop     = get_irg_inline_property(callee);
2166         int                 benefice = calc_inline_benefice(call, callee);
2167
2168         DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
2169             get_irn_irg(call->call), call->call, callee, benefice));
2170
2171         if (prop < irg_inline_forced && benefice < inline_threshold) {
2172                 return;
2173         }
2174
2175         pqueue_put(pqueue, call, benefice);
2176 }
2177
2178 /**
2179  * Try to inline calls into a graph.
2180  *
2181  * @param irg      the graph into which we inline
2182  * @param maxsize  do NOT inline if the size of irg gets
2183  *                 bigger than this amount
2184  * @param inline_threshold
2185  *                 threshold value for inline decision
2186  * @param copied_graphs
2187  *                 map containing copied of recursive graphs
2188  */
2189 static void inline_into(ir_graph *irg, unsigned maxsize,
2190                         int inline_threshold, pmap *copied_graphs)
2191 {
2192         int            phiproj_computed = 0;
2193         inline_irg_env *env = get_irg_link(irg);
2194         call_entry     *curr_call;
2195         wenv_t         wenv;
2196         pqueue_t       *pqueue;
2197
2198         if (env->n_call_nodes == 0)
2199                 return;
2200
2201         if (env->n_nodes > maxsize) {
2202                 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
2203                 return;
2204         }
2205
2206         current_ir_graph = irg;
2207         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2208
2209         /* put irgs into the pqueue */
2210         pqueue = new_pqueue();
2211
2212         list_for_each_entry(call_entry, curr_call, &env->calls, list) {
2213                 assert(is_Call(curr_call->call));
2214                 maybe_push_call(pqueue, curr_call, inline_threshold);
2215         }
2216
2217         /* note that the list of possible calls is updated during the process */
2218         while (!pqueue_empty(pqueue)) {
2219                 int                 did_inline;
2220                 call_entry          *curr_call  = pqueue_pop_front(pqueue);
2221                 ir_graph            *callee     = curr_call->callee;
2222                 ir_node             *call_node  = curr_call->call;
2223                 inline_irg_env      *callee_env = get_irg_link(callee);
2224                 irg_inline_property prop        = get_irg_inline_property(callee);
2225                 int                 loop_depth;
2226                 const call_entry    *centry;
2227                 pmap_entry          *e;
2228
2229                 if ((prop < irg_inline_forced) && env->n_nodes + callee_env->n_nodes > maxsize) {
2230                         DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
2231                                                 env->n_nodes, callee, callee_env->n_nodes));
2232                         continue;
2233                 }
2234
2235                 e = pmap_find(copied_graphs, callee);
2236                 if (e != NULL) {
2237                         int benefice = curr_call->benefice;
2238                         /*
2239                          * Reduce the weight for recursive function IFF not all arguments are const.
2240                          * inlining recursive functions is rarely good.
2241                          */
2242                         if (!curr_call->all_const)
2243                                 benefice -= 2000;
2244                         if (benefice < inline_threshold)
2245                                 continue;
2246
2247                         /*
2248                          * Remap callee if we have a copy.
2249                          */
2250                         callee     = e->value;
2251                         callee_env = get_irg_link(callee);
2252                 }
2253
2254                 if (current_ir_graph == callee) {
2255                         /*
2256                          * Recursive call: we cannot directly inline because we cannot
2257                          * walk the graph and change it. So we have to make a copy of
2258                          * the graph first.
2259                          */
2260                         int benefice = curr_call->benefice;
2261                         ir_graph *copy;
2262
2263                         /*
2264                          * Reduce the weight for recursive function IFF not all arguments are const.
2265                          * inlining recursive functions is rarely good.
2266                          */
2267                         if (!curr_call->all_const)
2268                                 benefice -= 2000;
2269                         if (benefice < inline_threshold)
2270                                 continue;
2271
2272                         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2273
2274                         /*
2275                          * No copy yet, create one.
2276                          * Note that recursive methods are never leaves, so it is
2277                          * sufficient to test this condition here.
2278                          */
2279                         copy = create_irg_copy(callee);
2280
2281                         /* create_irg_copy() destroys the Proj links, recompute them */
2282                         phiproj_computed = 0;
2283
2284                         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2285
2286                         /* allocate a new environment */
2287                         callee_env = alloc_inline_irg_env();
2288                         set_irg_link(copy, callee_env);
2289
2290                         assure_cf_loop(copy);
2291                         wenv.x              = callee_env;
2292                         wenv.ignore_callers = 1;
2293                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
2294
2295                         /*
2296                          * Enter the entity of the original graph. This is needed
2297                          * for inline_method(). However, note that ent->irg still points
2298                          * to callee, NOT to copy.
2299                          */
2300                         set_irg_entity(copy, get_irg_entity(callee));
2301
2302                         pmap_insert(copied_graphs, callee, copy);
2303                         callee = copy;
2304
2305                         /* we have only one caller: the original graph */
2306                         callee_env->n_callers      = 1;
2307                         callee_env->n_callers_orig = 1;
2308                 }
2309                 if (! phiproj_computed) {
2310                         phiproj_computed = 1;
2311                         collect_phiprojs(current_ir_graph);
2312                 }
2313                 did_inline = inline_method(call_node, callee);
2314                 if (!did_inline)
2315                         continue;
2316
2317                 /* call was inlined, Phi/Projs for current graph must be recomputed */
2318                 phiproj_computed = 0;
2319
2320                 /* remove it from the caller list */
2321                 list_del(&curr_call->list);
2322
2323                 /* callee was inline. Append it's call list. */
2324                 env->got_inline = 1;
2325                 --env->n_call_nodes;
2326
2327                 /* we just generate a bunch of new calls */
2328                 loop_depth = curr_call->loop_depth;
2329                 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
2330                         inline_irg_env *penv = get_irg_link(centry->callee);
2331                         ir_node        *new_call;
2332                         call_entry     *new_entry;
2333
2334                         /* after we have inlined callee, all called methods inside
2335                          * callee are now called once more */
2336                         ++penv->n_callers;
2337
2338                         /* Note that the src list points to Call nodes in the inlined graph,
2339                          * but we need Call nodes in our graph. Luckily the inliner leaves
2340                          * this information in the link field. */
2341                         new_call = get_irn_link(centry->call);
2342                         assert(is_Call(new_call));
2343
2344                         new_entry = duplicate_call_entry(centry, new_call, loop_depth);
2345                         list_add_tail(&new_entry->list, &env->calls);
2346                         maybe_push_call(pqueue, new_entry, inline_threshold);
2347                 }
2348
2349                 env->n_call_nodes += callee_env->n_call_nodes;
2350                 env->n_nodes += callee_env->n_nodes;
2351                 --callee_env->n_callers;
2352         }
2353         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2354         del_pqueue(pqueue);
2355 }
2356
2357 /*
2358  * Heuristic inliner. Calculates a benefice value for every call and inlines
2359  * those calls with a value higher than the threshold.
2360  */
2361 void inline_functions(unsigned maxsize, int inline_threshold,
2362                       opt_ptr after_inline_opt)
2363 {
2364         inline_irg_env   *env;
2365         int              i, n_irgs;
2366         ir_graph         *rem;
2367         wenv_t           wenv;
2368         pmap             *copied_graphs;
2369         pmap_entry       *pm_entry;
2370         ir_graph         **irgs;
2371
2372         rem = current_ir_graph;
2373         obstack_init(&temp_obst);
2374
2375         irgs = create_irg_list();
2376
2377         /* a map for the copied graphs, used to inline recursive calls */
2378         copied_graphs = pmap_create();
2379
2380         /* extend all irgs by a temporary data structure for inlining. */
2381         n_irgs = get_irp_n_irgs();
2382         for (i = 0; i < n_irgs; ++i)
2383                 set_irg_link(irgs[i], alloc_inline_irg_env());
2384
2385         /* Pre-compute information in temporary data structure. */
2386         wenv.ignore_runtime = 0;
2387         wenv.ignore_callers = 0;
2388         for (i = 0; i < n_irgs; ++i) {
2389                 ir_graph *irg = irgs[i];
2390
2391                 free_callee_info(irg);
2392
2393                 wenv.x = get_irg_link(irg);
2394                 assure_cf_loop(irg);
2395                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
2396         }
2397
2398         /* -- and now inline. -- */
2399         for (i = 0; i < n_irgs; ++i) {
2400                 ir_graph *irg = irgs[i];
2401
2402                 inline_into(irg, maxsize, inline_threshold, copied_graphs);
2403         }
2404
2405         for (i = 0; i < n_irgs; ++i) {
2406                 ir_graph *irg = irgs[i];
2407
2408                 env = get_irg_link(irg);
2409                 if (env->got_inline && after_inline_opt != NULL) {
2410                         /* this irg got calls inlined: optimize it */
2411                         after_inline_opt(irg);
2412                 }
2413                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
2414                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
2415                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
2416                         env->n_callers_orig, env->n_callers,
2417                         get_entity_name(get_irg_entity(irg))));
2418                 }
2419         }
2420
2421         /* kill the copied graphs: we don't need them anymore */
2422         foreach_pmap(copied_graphs, pm_entry) {
2423                 ir_graph *copy = pm_entry->value;
2424
2425                 /* reset the entity, otherwise it will be deleted in the next step ... */
2426                 set_irg_entity(copy, NULL);
2427                 free_ir_graph(copy);
2428         }
2429         pmap_destroy(copied_graphs);
2430
2431         xfree(irgs);
2432
2433         obstack_free(&temp_obst, NULL);
2434         current_ir_graph = rem;
2435 }
2436
2437 struct inline_functions_pass_t {
2438         ir_prog_pass_t pass;
2439         unsigned       maxsize;
2440         int            inline_threshold;
2441         opt_ptr        after_inline_opt;
2442 };
2443
2444 /**
2445  * Wrapper to run inline_functions() as a ir_prog pass.
2446  */
2447 static int inline_functions_wrapper(ir_prog *irp, void *context)
2448 {
2449         struct inline_functions_pass_t *pass = context;
2450
2451         (void)irp;
2452         inline_functions(pass->maxsize, pass->inline_threshold,
2453                          pass->after_inline_opt);
2454         return 0;
2455 }
2456
2457 /* create a ir_prog pass for inline_functions */
2458 ir_prog_pass_t *inline_functions_pass(
2459           const char *name, unsigned maxsize, int inline_threshold,
2460           opt_ptr after_inline_opt) {
2461         struct inline_functions_pass_t *pass =
2462                 XMALLOCZ(struct inline_functions_pass_t);
2463
2464         pass->maxsize          = maxsize;
2465         pass->inline_threshold = inline_threshold;
2466         pass->after_inline_opt = after_inline_opt;
2467
2468         return def_prog_pass_constructor(
2469                 &pass->pass, name ? name : "inline_functions",
2470                 inline_functions_wrapper);
2471 }
2472
2473 void firm_init_inline(void)
2474 {
2475         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
2476 }