263c4ca0b21b56142034b2a6742edd64affe83cf
[libfirm] / ir / opt / opt_inline.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief    Dead node elimination and Procedure Inlining.
23  * @author   Michael Beck, Goetz Lindenmaier
24  * @version  $Id$
25  */
26 #include "config.h"
27
28 #include <limits.h>
29 #include <assert.h>
30
31 #include "irnode_t.h"
32 #include "irgraph_t.h"
33 #include "irprog_t.h"
34
35 #include "iroptimize.h"
36 #include "ircons_t.h"
37 #include "iropt_t.h"
38 #include "irgopt.h"
39 #include "irgmod.h"
40 #include "irgwalk.h"
41
42 #include "array_t.h"
43 #include "list.h"
44 #include "pset.h"
45 #include "pmap.h"
46 #include "pdeq.h"
47 #include "xmalloc.h"
48 #include "pqueue.h"
49
50 #include "irouts.h"
51 #include "irloop_t.h"
52 #include "irbackedge_t.h"
53 #include "opt_init.h"
54 #include "cgana.h"
55 #include "trouts.h"
56 #include "error.h"
57
58 #include "analyze_irg_args.h"
59 #include "iredges_t.h"
60 #include "irflag_t.h"
61 #include "irhooks.h"
62 #include "irtools.h"
63 #include "iropt_dbg.h"
64 #include "irpass_t.h"
65
66 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
67
68 /*------------------------------------------------------------------*/
69 /* Routines for dead node elimination / copying garbage collection  */
70 /* of the obstack.                                                  */
71 /*------------------------------------------------------------------*/
72
73 /**
74  * Remember the new node in the old node by using a field all nodes have.
75  */
76 #define set_new_node(oldn, newn)  set_irn_link(oldn, newn)
77
78 /**
79  * Get this new node, before the old node is forgotten.
80  */
81 #define get_new_node(oldn) get_irn_link(oldn)
82
83 /**
84  * Check if a new node was set.
85  */
86 #define has_new_node(n) (get_new_node(n) != NULL)
87
88 /**
89  * We use the block_visited flag to mark that we have computed the
90  * number of useful predecessors for this block.
91  * Further we encode the new arity in this flag in the old blocks.
92  * Remembering the arity is useful, as it saves a lot of pointer
93  * accesses.  This function is called for all Phi and Block nodes
94  * in a Block.
95  */
96 static inline int
97 compute_new_arity(ir_node *b)
98 {
99         int i, res, irn_arity;
100         int irg_v, block_v;
101
102         irg_v = get_irg_block_visited(current_ir_graph);
103         block_v = get_Block_block_visited(b);
104         if (block_v >= irg_v) {
105                 /* we computed the number of preds for this block and saved it in the
106                    block_v flag */
107                 return block_v - irg_v;
108         } else {
109                 /* compute the number of good predecessors */
110                 res = irn_arity = get_irn_arity(b);
111                 for (i = 0; i < irn_arity; i++)
112                         if (is_Bad(get_irn_n(b, i))) res--;
113                         /* save it in the flag. */
114                         set_Block_block_visited(b, irg_v + res);
115                         return res;
116         }
117 }
118
119 /**
120  * Copies the node to the new obstack. The Ins of the new node point to
121  * the predecessors on the old obstack.  For block/phi nodes not all
122  * predecessors might be copied.  n->link points to the new node.
123  * For Phi and Block nodes the function allocates in-arrays with an arity
124  * only for useful predecessors.  The arity is determined by counting
125  * the non-bad predecessors of the block.
126  *
127  * @param n    The node to be copied
128  * @param env  if non-NULL, the node number attribute will be copied to the new node
129  *
130  * Note: Also used for loop unrolling.
131  */
132 static void copy_node(ir_node *n, void *env)
133 {
134         ir_node *nn, *block;
135         int new_arity;
136         ir_op *op = get_irn_op(n);
137         (void) env;
138
139         if (op == op_Bad) {
140                 /* node copied already */
141                 return;
142         } else if (op == op_Block) {
143                 block = NULL;
144                 new_arity = compute_new_arity(n);
145                 n->attr.block.graph_arr = NULL;
146         } else {
147                 block = get_nodes_block(n);
148                 if (op == op_Phi) {
149                         new_arity = compute_new_arity(block);
150                 } else {
151                         new_arity = get_irn_arity(n);
152                 }
153         }
154         nn = new_ir_node(get_irn_dbg_info(n),
155                 current_ir_graph,
156                 block,
157                 op,
158                 get_irn_mode(n),
159                 new_arity,
160                 get_irn_in(n) + 1);
161         /* Copy the attributes.  These might point to additional data.  If this
162            was allocated on the old obstack the pointers now are dangling.  This
163            frees e.g. the memory of the graph_arr allocated in new_immBlock. */
164         if (op == op_Block) {
165                 /* we cannot allow blocks WITHOUT macroblock input */
166                 set_Block_MacroBlock(nn, get_Block_MacroBlock(n));
167         }
168         copy_node_attr(n, nn);
169
170         if (env != NULL) {
171                 /* for easier debugging, we want to copy the node numbers too */
172                 nn->node_nr = n->node_nr;
173         }
174
175         set_new_node(n, nn);
176         hook_dead_node_elim_subst(current_ir_graph, n, nn);
177 }
178
179 /**
180  * Copies new predecessors of old node to new node remembered in link.
181  * Spare the Bad predecessors of Phi and Block nodes.
182  */
183 static void copy_preds(ir_node *n, void *env)
184 {
185         ir_node *nn, *block;
186         int i, j, irn_arity;
187         (void) env;
188
189         nn = get_new_node(n);
190
191         if (is_Block(n)) {
192                 /* copy the macro block header */
193                 ir_node *mbh = get_Block_MacroBlock(n);
194
195                 if (mbh == n) {
196                         /* this block is a macroblock header */
197                         set_Block_MacroBlock(nn, nn);
198                 } else {
199                         /* get the macro block header */
200                         ir_node *nmbh = get_new_node(mbh);
201                         assert(nmbh != NULL);
202                         set_Block_MacroBlock(nn, nmbh);
203                 }
204
205                 /* Don't copy Bad nodes. */
206                 j = 0;
207                 irn_arity = get_irn_arity(n);
208                 for (i = 0; i < irn_arity; i++) {
209                         if (! is_Bad(get_irn_n(n, i))) {
210                                 ir_node *pred = get_irn_n(n, i);
211                                 set_irn_n(nn, j, get_new_node(pred));
212                                 j++;
213                         }
214                 }
215                 /* repair the block visited flag from above misuse. Repair it in both
216                    graphs so that the old one can still be used. */
217                 set_Block_block_visited(nn, 0);
218                 set_Block_block_visited(n, 0);
219                 /* Local optimization could not merge two subsequent blocks if
220                    in array contained Bads.  Now it's possible.
221                    We don't call optimize_in_place as it requires
222                    that the fields in ir_graph are set properly. */
223                 if (!has_Block_entity(nn) &&
224                     get_opt_control_flow_straightening() &&
225                     get_Block_n_cfgpreds(nn) == 1 &&
226                     is_Jmp(get_Block_cfgpred(nn, 0))) {
227                         ir_node *old = get_nodes_block(get_Block_cfgpred(nn, 0));
228                         if (nn == old) {
229                                 /* Jmp jumps into the block it is in -- deal self cycle. */
230                                 assert(is_Bad(get_new_node(get_irg_bad(current_ir_graph))));
231                                 exchange(nn, get_new_node(get_irg_bad(current_ir_graph)));
232                         } else {
233                                 exchange(nn, old);
234                         }
235                 }
236         } else if (is_Phi(n) && get_irn_arity(n) > 0) {
237                 /* Don't copy node if corresponding predecessor in block is Bad.
238                    The Block itself should not be Bad. */
239                 block = get_nodes_block(n);
240                 set_nodes_block(nn, get_new_node(block));
241                 j = 0;
242                 irn_arity = get_irn_arity(n);
243                 for (i = 0; i < irn_arity; i++) {
244                         if (! is_Bad(get_irn_n(block, i))) {
245                                 ir_node *pred = get_irn_n(n, i);
246                                 set_irn_n(nn, j, get_new_node(pred));
247                                 /*if (is_backedge(n, i)) set_backedge(nn, j);*/
248                                 j++;
249                         }
250                 }
251                 /* If the pre walker reached this Phi after the post walker visited the
252                    block block_visited is > 0. */
253                 set_Block_block_visited(get_nodes_block(n), 0);
254                 /* Compacting the Phi's ins might generate Phis with only one
255                    predecessor. */
256                 if (get_irn_arity(nn) == 1)
257                         exchange(nn, get_irn_n(nn, 0));
258         } else {
259                 irn_arity = get_irn_arity(n);
260                 for (i = -1; i < irn_arity; i++)
261                         set_irn_n(nn, i, get_new_node(get_irn_n(n, i)));
262         }
263         /* Now the new node is complete.  We can add it to the hash table for CSE.
264            @@@ inlining aborts if we identify End. Why? */
265         if (!is_End(nn))
266                 add_identities(current_ir_graph->value_table, nn);
267 }
268
269 /**
270  * Copies the graph recursively, compacts the keep-alives of the end node.
271  *
272  * @param irg           the graph to be copied
273  * @param copy_node_nr  If non-zero, the node number will be copied
274  */
275 static void copy_graph(ir_graph *irg, int copy_node_nr)
276 {
277         ir_node *oe, *ne, *ob, *nb, *om, *nm; /* old end, new end, old bad, new bad, old NoMem, new NoMem */
278         ir_node *ka;      /* keep alive */
279         int i, irn_arity;
280         unsigned long vfl;
281
282         /* Some nodes must be copied by hand, sigh */
283         vfl = get_irg_visited(irg);
284         set_irg_visited(irg, vfl + 1);
285
286         oe = get_irg_end(irg);
287         mark_irn_visited(oe);
288         /* copy the end node by hand, allocate dynamic in array! */
289         ne = new_ir_node(get_irn_dbg_info(oe),
290                 irg,
291                 NULL,
292                 op_End,
293                 mode_X,
294                 -1,
295                 NULL);
296         /* Copy the attributes.  Well, there might be some in the future... */
297         copy_node_attr(oe, ne);
298         set_new_node(oe, ne);
299
300         /* copy the Bad node */
301         ob = get_irg_bad(irg);
302         mark_irn_visited(ob);
303         nb = new_ir_node(get_irn_dbg_info(ob),
304                 irg,
305                 NULL,
306                 op_Bad,
307                 mode_T,
308                 0,
309                 NULL);
310         copy_node_attr(ob, nb);
311         set_new_node(ob, nb);
312
313         /* copy the NoMem node */
314         om = get_irg_no_mem(irg);
315         mark_irn_visited(om);
316         nm = new_ir_node(get_irn_dbg_info(om),
317                 irg,
318                 NULL,
319                 op_NoMem,
320                 mode_M,
321                 0,
322                 NULL);
323         copy_node_attr(om, nm);
324         set_new_node(om, nm);
325
326         /* copy the live nodes */
327         set_irg_visited(irg, vfl);
328         irg_walk(get_nodes_block(oe), copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
329
330         /* Note: from yet, the visited flag of the graph is equal to vfl + 1 */
331
332         /* visit the anchors as well */
333         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
334                 ir_node *n = get_irg_anchor(irg, i);
335
336                 if (n && (get_irn_visited(n) <= vfl)) {
337                         set_irg_visited(irg, vfl);
338                         irg_walk(n, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
339                 }
340         }
341
342         /* copy_preds for the end node ... */
343         set_nodes_block(ne, get_new_node(get_nodes_block(oe)));
344
345         /*- ... and now the keep alives. -*/
346         /* First pick the not marked block nodes and walk them.  We must pick these
347            first as else we will oversee blocks reachable from Phis. */
348         irn_arity = get_End_n_keepalives(oe);
349         for (i = 0; i < irn_arity; i++) {
350                 ka = get_End_keepalive(oe, i);
351                 if (is_Block(ka)) {
352                         if (get_irn_visited(ka) <= vfl) {
353                                 /* We must keep the block alive and copy everything reachable */
354                                 set_irg_visited(irg, vfl);
355                                 irg_walk(ka, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
356                         }
357                         add_End_keepalive(ne, get_new_node(ka));
358                 }
359         }
360
361         /* Now pick other nodes.  Here we will keep all! */
362         irn_arity = get_End_n_keepalives(oe);
363         for (i = 0; i < irn_arity; i++) {
364                 ka = get_End_keepalive(oe, i);
365                 if (!is_Block(ka)) {
366                         if (get_irn_visited(ka) <= vfl) {
367                                 /* We didn't copy the node yet.  */
368                                 set_irg_visited(irg, vfl);
369                                 irg_walk(ka, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
370                         }
371                         add_End_keepalive(ne, get_new_node(ka));
372                 }
373         }
374
375         /* start block sometimes only reached after keep alives */
376         set_nodes_block(nb, get_new_node(get_nodes_block(ob)));
377         set_nodes_block(nm, get_new_node(get_nodes_block(om)));
378 }
379
380 /**
381  * Copies the graph reachable from current_ir_graph->end to the obstack
382  * in current_ir_graph and fixes the environment.
383  * Then fixes the fields in current_ir_graph containing nodes of the
384  * graph.
385  *
386  * @param copy_node_nr  If non-zero, the node number will be copied
387  */
388 static void
389 copy_graph_env(int copy_node_nr)
390 {
391         ir_graph *irg = current_ir_graph;
392         ir_node *old_end, *new_anchor;
393         int i;
394
395         /* remove end_except and end_reg nodes */
396         old_end = get_irg_end(irg);
397         set_irg_end_except (irg, old_end);
398         set_irg_end_reg    (irg, old_end);
399
400         /* Not all nodes remembered in irg might be reachable
401            from the end node.  Assure their link is set to NULL, so that
402            we can test whether new nodes have been computed. */
403         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
404                 ir_node *n = get_irg_anchor(irg, i);
405                 if (n != NULL)
406                         set_new_node(n, NULL);
407         }
408         /* we use the block walk flag for removing Bads from Blocks ins. */
409         inc_irg_block_visited(irg);
410
411         /* copy the graph */
412         copy_graph(irg, copy_node_nr);
413
414         /* fix the anchor */
415         old_end    = get_irg_end(irg);
416         new_anchor = new_Anchor(irg);
417
418         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
419                 ir_node *n = get_irg_anchor(irg, i);
420                 if (n)
421                         set_irn_n(new_anchor, i, get_new_node(n));
422         }
423         free_End(old_end);
424         irg->anchor = new_anchor;
425
426         /* ensure the new anchor is placed in the endblock */
427         set_nodes_block(new_anchor, get_irg_end_block(irg));
428 }
429
430 /**
431  * Copies all reachable nodes to a new obstack.  Removes bad inputs
432  * from block nodes and the corresponding inputs from Phi nodes.
433  * Merges single exit blocks with single entry blocks and removes
434  * 1-input Phis.
435  * Adds all new nodes to a new hash table for CSE.  Does not
436  * perform CSE, so the hash table might contain common subexpressions.
437  */
438 void dead_node_elimination(ir_graph *irg)
439 {
440         ir_graph *rem;
441 #ifdef INTERPROCEDURAL_VIEW
442         int rem_ipview = get_interprocedural_view();
443 #endif
444         struct obstack *graveyard_obst = NULL;
445         struct obstack *rebirth_obst   = NULL;
446
447         edges_deactivate(irg);
448
449         /* inform statistics that we started a dead-node elimination run */
450         hook_dead_node_elim(irg, 1);
451
452         /* Remember external state of current_ir_graph. */
453         rem = current_ir_graph;
454         current_ir_graph = irg;
455 #ifdef INTERPROCEDURAL_VIEW
456         set_interprocedural_view(0);
457 #endif
458
459         assert(get_irg_phase_state(irg) != phase_building);
460
461         /* Handle graph state */
462         free_callee_info(irg);
463         free_irg_outs(irg);
464         free_trouts();
465
466         /* @@@ so far we loose loops when copying */
467         free_loop_information(irg);
468
469         set_irg_doms_inconsistent(irg);
470
471         /* A quiet place, where the old obstack can rest in peace,
472            until it will be cremated. */
473         graveyard_obst = irg->obst;
474
475         /* A new obstack, where the reachable nodes will be copied to. */
476         rebirth_obst = XMALLOC(struct obstack);
477         irg->obst = rebirth_obst;
478         obstack_init(irg->obst);
479         irg->last_node_idx = 0;
480
481         /* We also need a new value table for CSE */
482         del_identities(irg->value_table);
483         irg->value_table = new_identities();
484
485         /* Copy the graph from the old to the new obstack */
486         copy_graph_env(/*copy_node_nr=*/1);
487
488         /* Free memory from old unoptimized obstack */
489         obstack_free(graveyard_obst, 0);  /* First empty the obstack ... */
490         xfree(graveyard_obst);            /* ... then free it.           */
491
492         /* inform statistics that the run is over */
493         hook_dead_node_elim(irg, 0);
494
495         current_ir_graph = rem;
496 #ifdef INTERPROCEDURAL_VIEW
497         set_interprocedural_view(rem_ipview);
498 #endif
499 }
500
501 ir_graph_pass_t *dead_node_elimination_pass(const char *name)
502 {
503         return def_graph_pass(name ? name : "dce", dead_node_elimination);
504 }
505
506 /**
507  * Relink bad predecessors of a block and store the old in array to the
508  * link field. This function is called by relink_bad_predecessors().
509  * The array of link field starts with the block operand at position 0.
510  * If block has bad predecessors, create a new in array without bad preds.
511  * Otherwise let in array untouched.
512  */
513 static void relink_bad_block_predecessors(ir_node *n, void *env)
514 {
515         ir_node **new_in, *irn;
516         int i, new_irn_n, old_irn_arity, new_irn_arity = 0;
517         (void) env;
518
519         /* if link field of block is NULL, look for bad predecessors otherwise
520            this is already done */
521         if (is_Block(n) && get_irn_link(n) == NULL) {
522                 /* save old predecessors in link field (position 0 is the block operand)*/
523                 set_irn_link(n, get_irn_in(n));
524
525                 /* count predecessors without bad nodes */
526                 old_irn_arity = get_irn_arity(n);
527                 for (i = 0; i < old_irn_arity; i++)
528                         if (!is_Bad(get_irn_n(n, i)))
529                                 ++new_irn_arity;
530
531                 /* arity changing: set new predecessors without bad nodes */
532                 if (new_irn_arity < old_irn_arity) {
533                         /* Get new predecessor array. We do not resize the array, as we must
534                            keep the old one to update Phis. */
535                         new_in = NEW_ARR_D(ir_node *, current_ir_graph->obst, (new_irn_arity+1));
536
537                         /* set new predecessors in array */
538                         new_in[0] = NULL;
539                         new_irn_n = 1;
540                         for (i = 0; i < old_irn_arity; i++) {
541                                 irn = get_irn_n(n, i);
542                                 if (!is_Bad(irn)) {
543                                         new_in[new_irn_n] = irn;
544                                         is_backedge(n, i) ? set_backedge(n, new_irn_n-1) : set_not_backedge(n, new_irn_n-1);
545                                         ++new_irn_n;
546                                 }
547                         }
548                         /* ARR_SETLEN(int, n->attr.block.backedge, new_irn_arity); */
549                         ARR_SHRINKLEN(n->attr.block.backedge, new_irn_arity);
550                         n->in = new_in;
551                 } /* ir node has bad predecessors */
552         } /* Block is not relinked */
553 }
554
555 /**
556  * Relinks Bad predecessors from Blocks and Phis called by walker
557  * remove_bad_predecesors(). If n is a Block, call
558  * relink_bad_block_redecessors(). If n is a Phi-node, call also the relinking
559  * function of Phi's Block. If this block has bad predecessors, relink preds
560  * of the Phi-node.
561  */
562 static void relink_bad_predecessors(ir_node *n, void *env)
563 {
564         ir_node *block, **old_in;
565         int i, old_irn_arity, new_irn_arity;
566
567         /* relink bad predecessors of a block */
568         if (is_Block(n))
569                 relink_bad_block_predecessors(n, env);
570
571         /* If Phi node relink its block and its predecessors */
572         if (is_Phi(n)) {
573                 /* Relink predecessors of phi's block */
574                 block = get_nodes_block(n);
575                 if (get_irn_link(block) == NULL)
576                         relink_bad_block_predecessors(block, env);
577
578                 old_in = (ir_node **)get_irn_link(block); /* Of Phi's Block */
579                 old_irn_arity = ARR_LEN(old_in);
580
581                 /* Relink Phi predecessors if count of predecessors changed */
582                 if (old_irn_arity != ARR_LEN(get_irn_in(block))) {
583                         /* set new predecessors in array
584                            n->in[0] remains the same block */
585                         new_irn_arity = 1;
586                         for(i = 1; i < old_irn_arity; i++)
587                                 if (!is_Bad(old_in[i])) {
588                                         n->in[new_irn_arity] = n->in[i];
589                                         is_backedge(n, i) ? set_backedge(n, new_irn_arity) : set_not_backedge(n, new_irn_arity);
590                                         ++new_irn_arity;
591                                 }
592
593                                 ARR_SETLEN(ir_node *, n->in, new_irn_arity);
594                                 ARR_SETLEN(int, n->attr.phi.u.backedge, new_irn_arity);
595                 }
596         } /* n is a Phi node */
597 }
598
599 /*
600  * Removes Bad Bad predecessors from Blocks and the corresponding
601  * inputs to Phi nodes as in dead_node_elimination but without
602  * copying the graph.
603  * On walking up set the link field to NULL, on walking down call
604  * relink_bad_predecessors() (This function stores the old in array
605  * to the link field and sets a new in array if arity of predecessors
606  * changes).
607  */
608 void remove_bad_predecessors(ir_graph *irg)
609 {
610         panic("Fix backedge handling first");
611         irg_walk_graph(irg, firm_clear_link, relink_bad_predecessors, NULL);
612 }
613
614
615 /*
616    __                      _  __ __
617   (_     __    o     _    | \/  |_
618   __)|_| | \_/ | \_/(/_   |_/\__|__
619
620   The following stuff implements a facility that automatically patches
621   registered ir_node pointers to the new node when a dead node elimination occurs.
622 */
623
624 struct _survive_dce_t {
625         struct obstack obst;
626         pmap *places;
627         pmap *new_places;
628         hook_entry_t dead_node_elim;
629         hook_entry_t dead_node_elim_subst;
630 };
631
632 typedef struct _survive_dce_list_t {
633         struct _survive_dce_list_t *next;
634         ir_node **place;
635 } survive_dce_list_t;
636
637 static void dead_node_hook(void *context, ir_graph *irg, int start)
638 {
639         survive_dce_t *sd = context;
640         (void) irg;
641
642         /* Create a new map before the dead node elimination is performed. */
643         if (start) {
644                 sd->new_places = pmap_create_ex(pmap_count(sd->places));
645         } else {
646                 /* Patch back all nodes if dead node elimination is over and something is to be done. */
647                 pmap_destroy(sd->places);
648                 sd->places     = sd->new_places;
649                 sd->new_places = NULL;
650         }
651 }
652
653 /**
654  * Hook called when dead node elimination replaces old by nw.
655  */
656 static void dead_node_subst_hook(void *context, ir_graph *irg, ir_node *old, ir_node *nw)
657 {
658         survive_dce_t *sd = context;
659         survive_dce_list_t *list = pmap_get(sd->places, old);
660         (void) irg;
661
662         /* If the node is to be patched back, write the new address to all registered locations. */
663         if (list) {
664                 survive_dce_list_t *p;
665
666                 for (p = list; p; p = p->next)
667                         *(p->place) = nw;
668
669                 pmap_insert(sd->new_places, nw, list);
670         }
671 }
672
673 /**
674  * Make a new Survive DCE environment.
675  */
676 survive_dce_t *new_survive_dce(void)
677 {
678         survive_dce_t *res = XMALLOC(survive_dce_t);
679         obstack_init(&res->obst);
680         res->places     = pmap_create();
681         res->new_places = NULL;
682
683         res->dead_node_elim.hook._hook_dead_node_elim = dead_node_hook;
684         res->dead_node_elim.context                   = res;
685         res->dead_node_elim.next                      = NULL;
686
687         res->dead_node_elim_subst.hook._hook_dead_node_elim_subst = dead_node_subst_hook;
688         res->dead_node_elim_subst.context = res;
689         res->dead_node_elim_subst.next    = NULL;
690
691         register_hook(hook_dead_node_elim, &res->dead_node_elim);
692         register_hook(hook_dead_node_elim_subst, &res->dead_node_elim_subst);
693         return res;
694 }
695
696 /**
697  * Free a Survive DCE environment.
698  */
699 void free_survive_dce(survive_dce_t *sd)
700 {
701         obstack_free(&sd->obst, NULL);
702         pmap_destroy(sd->places);
703         unregister_hook(hook_dead_node_elim, &sd->dead_node_elim);
704         unregister_hook(hook_dead_node_elim_subst, &sd->dead_node_elim_subst);
705         xfree(sd);
706 }
707
708 /**
709  * Register a node pointer to be patched upon DCE.
710  * When DCE occurs, the node pointer specified by @p place will be
711  * patched to the new address of the node it is pointing to.
712  *
713  * @param sd    The Survive DCE environment.
714  * @param place The address of the node pointer.
715  */
716 void survive_dce_register_irn(survive_dce_t *sd, ir_node **place)
717 {
718         if (*place != NULL) {
719                 ir_node *irn      = *place;
720                 survive_dce_list_t *curr = pmap_get(sd->places, irn);
721                 survive_dce_list_t *nw   = OALLOC(&sd->obst, survive_dce_list_t);
722
723                 nw->next  = curr;
724                 nw->place = place;
725
726                 pmap_insert(sd->places, irn, nw);
727         }
728 }
729
730 /*--------------------------------------------------------------------*/
731 /*  Functionality for inlining                                         */
732 /*--------------------------------------------------------------------*/
733
734 /**
735  * Copy node for inlineing.  Updates attributes that change when
736  * inlineing but not for dead node elimination.
737  *
738  * Copies the node by calling copy_node() and then updates the entity if
739  * it's a local one.  env must be a pointer of the frame type of the
740  * inlined procedure. The new entities must be in the link field of
741  * the entities.
742  */
743 static void copy_node_inline(ir_node *n, void *env)
744 {
745         ir_node *nn;
746         ir_type *frame_tp = (ir_type *)env;
747
748         copy_node(n, NULL);
749         if (is_Sel(n)) {
750                 nn = get_new_node(n);
751                 assert(is_Sel(nn));
752                 /* use copied entities from the new frame */
753                 if (get_entity_owner(get_Sel_entity(n)) == frame_tp) {
754                         set_Sel_entity(nn, get_entity_link(get_Sel_entity(n)));
755                 }
756         } else if (is_Block(n)) {
757                 nn = get_new_node(n);
758                 nn->attr.block.irg.irg = current_ir_graph;
759         }
760 }
761
762 /**
763  * Copies new predecessors of old node and move constants to
764  * the Start Block.
765  */
766 static void copy_preds_inline(ir_node *n, void *env)
767 {
768         ir_node *nn;
769
770         copy_preds(n, env);
771         nn = skip_Id(get_new_node(n));
772         if (is_irn_constlike(nn)) {
773                 /* move Constants into the start block */
774                 set_nodes_block(nn, get_irg_start_block(current_ir_graph));
775
776                 n = identify_remember(current_ir_graph->value_table, nn);
777                 if (nn != n) {
778                         DBG_OPT_CSE(nn, n);
779                         exchange(nn, n);
780                 }
781         }
782 }
783
784 /**
785  * Walker: checks if P_value_arg_base is used.
786  */
787 static void find_addr(ir_node *node, void *env)
788 {
789         int *allow_inline = env;
790         if (is_Sel(node)) {
791                 ir_graph *irg = current_ir_graph;
792                 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
793                         /* access to frame */
794                         ir_entity *ent = get_Sel_entity(node);
795                         if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
796                                 /* access to value_type */
797                                 *allow_inline = 0;
798                         }
799                 }
800         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
801                 /* From GCC:
802                  * Refuse to inline alloca call unless user explicitly forced so as this
803                  * may change program's memory overhead drastically when the function
804                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
805                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
806                  *
807                  * Sorrily this is true with our implementation also.
808                  * Moreover, we cannot differentiate between alloca() and VLA yet, so this
809                  * disables inlining of functions using VLA (with are completely save).
810                  *
811                  * 2 Solutions:
812                  * - add a flag to the Alloc node for "real" alloca() calls
813                  * - add a new Stack-Restore node at the end of a function using alloca()
814                  */
815                 *allow_inline = 0;
816         }
817 }
818
819 /**
820  * Check if we can inline a given call.
821  * Currently, we cannot inline two cases:
822  * - call with compound arguments
823  * - graphs that take the address of a parameter
824  *
825  * check these conditions here
826  */
827 static int can_inline(ir_node *call, ir_graph *called_graph)
828 {
829         ir_type *call_type = get_Call_type(call);
830         int params, ress, i, res;
831         assert(is_Method_type(call_type));
832
833         params = get_method_n_params(call_type);
834         ress   = get_method_n_ress(call_type);
835
836         /* check parameters for compound arguments */
837         for (i = 0; i < params; ++i) {
838                 ir_type *p_type = get_method_param_type(call_type, i);
839
840                 if (is_compound_type(p_type))
841                         return 0;
842         }
843
844         /* check results for compound arguments */
845         for (i = 0; i < ress; ++i) {
846                 ir_type *r_type = get_method_res_type(call_type, i);
847
848                 if (is_compound_type(r_type))
849                         return 0;
850         }
851
852         res = 1;
853         irg_walk_graph(called_graph, find_addr, NULL, &res);
854
855         return res;
856 }
857
858 enum exc_mode {
859         exc_handler,    /**< There is a handler. */
860         exc_no_handler  /**< Exception handling not represented. */
861 };
862
863 /* Inlines a method at the given call site. */
864 int inline_method(ir_node *call, ir_graph *called_graph)
865 {
866         ir_node             *pre_call;
867         ir_node             *post_call, *post_bl;
868         ir_node             *in[pn_Start_max];
869         ir_node             *end, *end_bl, *block;
870         ir_node             **res_pred;
871         ir_node             **cf_pred;
872         ir_node             **args_in;
873         ir_node             *ret, *phi;
874         int                 arity, n_ret, n_exc, n_res, i, n, j, rem_opt, irn_arity, n_params;
875         int                 n_mem_phi;
876         enum exc_mode       exc_handling;
877         ir_type             *called_frame, *curr_frame, *mtp, *ctp;
878         ir_entity           *ent;
879         ir_graph            *rem, *irg;
880         irg_inline_property prop = get_irg_inline_property(called_graph);
881         unsigned long       visited;
882
883         if (prop == irg_inline_forbidden)
884                 return 0;
885
886         ent = get_irg_entity(called_graph);
887
888         mtp = get_entity_type(ent);
889         ctp = get_Call_type(call);
890         n_params = get_method_n_params(mtp);
891         n_res    = get_method_n_ress(mtp);
892         if (n_params > get_method_n_params(ctp)) {
893                 /* this is a bad feature of C: without a prototype, we can
894                  * call a function with less parameters than needed. Currently
895                  * we don't support this, although we could use Unknown than. */
896                 return 0;
897         }
898         if (n_res != get_method_n_ress(ctp)) {
899                 return 0;
900         }
901
902         /* Argh, compiling C has some bad consequences:
903          * It is implementation dependent what happens in that case.
904          * We support inlining, if the bitsize of the types matches AND
905          * the same arithmetic is used. */
906         for (i = n_params - 1; i >= 0; --i) {
907                 ir_type *param_tp = get_method_param_type(mtp, i);
908                 ir_type *arg_tp   = get_method_param_type(ctp, i);
909
910                 if (param_tp != arg_tp) {
911                         ir_mode *pmode = get_type_mode(param_tp);
912                         ir_mode *amode = get_type_mode(arg_tp);
913
914                         if (pmode == NULL || amode == NULL)
915                                 return 0;
916                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
917                                 return 0;
918                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
919                                 return 0;
920                         /* otherwise we can simply "reinterpret" the bits */
921                 }
922         }
923         for (i = n_res - 1; i >= 0; --i) {
924                 ir_type *decl_res_tp = get_method_res_type(mtp, i);
925                 ir_type *used_res_tp = get_method_res_type(ctp, i);
926
927                 if (decl_res_tp != used_res_tp) {
928                         ir_mode *decl_mode = get_type_mode(decl_res_tp);
929                         ir_mode *used_mode = get_type_mode(used_res_tp);
930                         if (decl_mode == NULL || used_mode == NULL)
931                                 return 0;
932                         if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
933                                 return 0;
934                         if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
935                                 return 0;
936                         /* otherwise we can "reinterpret" the bits */
937                 }
938         }
939
940         irg = get_irn_irg(call);
941
942         /*
943          * We cannot inline a recursive call. The graph must be copied before
944          * the call the inline_method() using create_irg_copy().
945          */
946         if (called_graph == irg)
947                 return 0;
948
949         /*
950          * currently, we cannot inline two cases:
951          * - call with compound arguments
952          * - graphs that take the address of a parameter
953          */
954         if (! can_inline(call, called_graph))
955                 return 0;
956
957         rem = current_ir_graph;
958         current_ir_graph = irg;
959
960         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
961
962         /* --  Turn off optimizations, this can cause problems when allocating new nodes. -- */
963         rem_opt = get_opt_optimize();
964         set_optimize(0);
965
966         /* Handle graph state */
967         assert(get_irg_phase_state(irg) != phase_building);
968         assert(get_irg_pinned(irg) == op_pin_state_pinned);
969         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
970         set_irg_outs_inconsistent(irg);
971         set_irg_extblk_inconsistent(irg);
972         set_irg_doms_inconsistent(irg);
973         set_irg_loopinfo_inconsistent(irg);
974         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
975         set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
976
977         /* -- Check preconditions -- */
978         assert(is_Call(call));
979
980         /* here we know we WILL inline, so inform the statistics */
981         hook_inline(call, called_graph);
982
983         /* -- Decide how to handle exception control flow: Is there a handler
984            for the Call node, or do we branch directly to End on an exception?
985            exc_handling:
986            0 There is a handler.
987            2 Exception handling not represented in Firm. -- */
988         {
989                 ir_node *Xproj = NULL;
990                 ir_node *proj;
991                 for (proj = get_irn_link(call); proj; proj = get_irn_link(proj)) {
992                         long proj_nr = get_Proj_proj(proj);
993                         if (proj_nr == pn_Call_X_except) Xproj = proj;
994                 }
995                 exc_handling = Xproj != NULL ? exc_handler : exc_no_handler;
996         }
997
998         /* create the argument tuple */
999         NEW_ARR_A(ir_type *, args_in, n_params);
1000
1001         block = get_nodes_block(call);
1002         for (i = n_params - 1; i >= 0; --i) {
1003                 ir_node *arg      = get_Call_param(call, i);
1004                 ir_type *param_tp = get_method_param_type(mtp, i);
1005                 ir_mode *mode     = get_type_mode(param_tp);
1006
1007                 if (mode != get_irn_mode(arg)) {
1008                         arg = new_r_Conv(block, arg, mode);
1009                 }
1010                 args_in[i] = arg;
1011         }
1012
1013         /* --
1014            the procedure and later replaces the Start node of the called graph.
1015            Post_call is the old Call node and collects the results of the called
1016            graph. Both will end up being a tuple.  -- */
1017         post_bl = get_nodes_block(call);
1018         set_irg_current_block(irg, post_bl);
1019         /* XxMxPxPxPxT of Start + parameter of Call */
1020         in[pn_Start_X_initial_exec]   = new_Jmp();
1021         in[pn_Start_M]                = get_Call_mem(call);
1022         in[pn_Start_P_frame_base]     = get_irg_frame(irg);
1023         in[pn_Start_P_tls]            = get_irg_tls(irg);
1024         in[pn_Start_T_args]           = new_Tuple(n_params, args_in);
1025         pre_call = new_Tuple(pn_Start_max, in);
1026         post_call = call;
1027
1028         /* --
1029            The new block gets the ins of the old block, pre_call and all its
1030            predecessors and all Phi nodes. -- */
1031         part_block(pre_call);
1032
1033         /* -- Prepare state for dead node elimination -- */
1034         /* Visited flags in calling irg must be >= flag in called irg.
1035            Else walker and arity computation will not work. */
1036         if (get_irg_visited(irg) <= get_irg_visited(called_graph))
1037                 set_irg_visited(irg, get_irg_visited(called_graph) + 1);
1038         if (get_irg_block_visited(irg) < get_irg_block_visited(called_graph))
1039                 set_irg_block_visited(irg, get_irg_block_visited(called_graph));
1040         visited = get_irg_visited(irg);
1041
1042         /* Set pre_call as new Start node in link field of the start node of
1043            calling graph and pre_calls block as new block for the start block
1044            of calling graph.
1045            Further mark these nodes so that they are not visited by the
1046            copying. */
1047         set_irn_link(get_irg_start(called_graph), pre_call);
1048         set_irn_visited(get_irg_start(called_graph), visited);
1049         set_irn_link(get_irg_start_block(called_graph), get_nodes_block(pre_call));
1050         set_irn_visited(get_irg_start_block(called_graph), visited);
1051
1052         set_irn_link(get_irg_bad(called_graph), get_irg_bad(current_ir_graph));
1053         set_irn_visited(get_irg_bad(called_graph), visited);
1054
1055         set_irn_link(get_irg_no_mem(called_graph), get_irg_no_mem(current_ir_graph));
1056         set_irn_visited(get_irg_no_mem(called_graph), visited);
1057
1058         /* Initialize for compaction of in arrays */
1059         inc_irg_block_visited(irg);
1060
1061         /* -- Replicate local entities of the called_graph -- */
1062         /* copy the entities. */
1063         irp_reserve_resources(irp, IR_RESOURCE_ENTITY_LINK);
1064         called_frame = get_irg_frame_type(called_graph);
1065         curr_frame   = get_irg_frame_type(irg);
1066         for (i = 0, n = get_class_n_members(called_frame); i < n; ++i) {
1067                 ir_entity *new_ent, *old_ent;
1068                 old_ent = get_class_member(called_frame, i);
1069                 new_ent = copy_entity_own(old_ent, curr_frame);
1070                 set_entity_link(old_ent, new_ent);
1071         }
1072
1073         /* visited is > than that of called graph.  With this trick visited will
1074            remain unchanged so that an outer walker, e.g., searching the call nodes
1075             to inline, calling this inline will not visit the inlined nodes. */
1076         set_irg_visited(irg, get_irg_visited(irg)-1);
1077
1078         /* -- Performing dead node elimination inlines the graph -- */
1079         /* Copies the nodes to the obstack of current_ir_graph. Updates links to new
1080            entities. */
1081         irg_walk(get_irg_end(called_graph), copy_node_inline, copy_preds_inline,
1082                  get_irg_frame_type(called_graph));
1083
1084         irp_free_resources(irp, IR_RESOURCE_ENTITY_LINK);
1085
1086         /* Repair called_graph */
1087         set_irg_visited(called_graph, get_irg_visited(irg));
1088         set_irg_block_visited(called_graph, get_irg_block_visited(irg));
1089         set_Block_block_visited(get_irg_start_block(called_graph), 0);
1090
1091         /* -- Merge the end of the inlined procedure with the call site -- */
1092         /* We will turn the old Call node into a Tuple with the following
1093            predecessors:
1094            -1:  Block of Tuple.
1095            0: Phi of all Memories of Return statements.
1096            1: Jmp from new Block that merges the control flow from all exception
1097            predecessors of the old end block.
1098            2: Tuple of all arguments.
1099            3: Phi of Exception memories.
1100            In case the old Call directly branches to End on an exception we don't
1101            need the block merging all exceptions nor the Phi of the exception
1102            memories.
1103         */
1104
1105         /* -- Precompute some values -- */
1106         end_bl = get_new_node(get_irg_end_block(called_graph));
1107         end = get_new_node(get_irg_end(called_graph));
1108         arity = get_Block_n_cfgpreds(end_bl);    /* arity = n_exc + n_ret  */
1109         n_res = get_method_n_ress(get_Call_type(call));
1110
1111         res_pred = XMALLOCN(ir_node*, n_res);
1112         cf_pred  = XMALLOCN(ir_node*, arity);
1113
1114         set_irg_current_block(irg, post_bl); /* just to make sure */
1115
1116         /* -- archive keepalives -- */
1117         irn_arity = get_irn_arity(end);
1118         for (i = 0; i < irn_arity; i++) {
1119                 ir_node *ka = get_End_keepalive(end, i);
1120                 if (! is_Bad(ka))
1121                         add_End_keepalive(get_irg_end(irg), ka);
1122         }
1123
1124         /* The new end node will die.  We need not free as the in array is on the obstack:
1125            copy_node() only generated 'D' arrays. */
1126
1127         /* -- Replace Return nodes by Jump nodes. -- */
1128         n_ret = 0;
1129         for (i = 0; i < arity; i++) {
1130                 ir_node *ret;
1131                 ret = get_Block_cfgpred(end_bl, i);
1132                 if (is_Return(ret)) {
1133                         cf_pred[n_ret] = new_r_Jmp(get_nodes_block(ret));
1134                         n_ret++;
1135                 }
1136         }
1137         set_irn_in(post_bl, n_ret, cf_pred);
1138
1139         /* -- Build a Tuple for all results of the method.
1140            Add Phi node if there was more than one Return.  -- */
1141         turn_into_tuple(post_call, pn_Call_max);
1142         /* First the Memory-Phi */
1143         n_mem_phi = 0;
1144         for (i = 0; i < arity; i++) {
1145                 ret = get_Block_cfgpred(end_bl, i);
1146                 if (is_Return(ret)) {
1147                         cf_pred[n_mem_phi++] = get_Return_mem(ret);
1148                 }
1149                 /* memory output for some exceptions is directly connected to End */
1150                 if (is_Call(ret)) {
1151                         cf_pred[n_mem_phi++] = new_r_Proj(get_nodes_block(ret), ret, mode_M, 3);
1152                 } else if (is_fragile_op(ret)) {
1153                         /* We rely that all cfops have the memory output at the same position. */
1154                         cf_pred[n_mem_phi++] = new_r_Proj(get_nodes_block(ret), ret, mode_M, 0);
1155                 } else if (is_Raise(ret)) {
1156                         cf_pred[n_mem_phi++] = new_r_Proj(get_nodes_block(ret), ret, mode_M, 1);
1157                 }
1158         }
1159         phi = new_Phi(n_mem_phi, cf_pred, mode_M);
1160         set_Tuple_pred(call, pn_Call_M, phi);
1161         /* Conserve Phi-list for further inlinings -- but might be optimized */
1162         if (get_nodes_block(phi) == post_bl) {
1163                 set_irn_link(phi, get_irn_link(post_bl));
1164                 set_irn_link(post_bl, phi);
1165         }
1166         /* Now the real results */
1167         if (n_res > 0) {
1168                 for (j = 0; j < n_res; j++) {
1169                         ir_type *res_type = get_method_res_type(ctp, j);
1170                         ir_mode *res_mode = get_type_mode(res_type);
1171                         n_ret = 0;
1172                         for (i = 0; i < arity; i++) {
1173                                 ret = get_Block_cfgpred(end_bl, i);
1174                                 if (is_Return(ret)) {
1175                                         ir_node *res = get_Return_res(ret, j);
1176                                         if (get_irn_mode(res) != res_mode) {
1177                                                 ir_node *block = get_nodes_block(res);
1178                                                 res = new_r_Conv(block, res, res_mode);
1179                                         }
1180                                         cf_pred[n_ret] = res;
1181                                         n_ret++;
1182                                 }
1183                         }
1184                         if (n_ret > 0)
1185                                 phi = new_Phi(n_ret, cf_pred, get_irn_mode(cf_pred[0]));
1186                         else
1187                                 phi = new_Bad();
1188                         res_pred[j] = phi;
1189                         /* Conserve Phi-list for further inlinings -- but might be optimized */
1190                         if (get_nodes_block(phi) == post_bl) {
1191                                 set_Phi_next(phi, get_Block_phis(post_bl));
1192                                 set_Block_phis(post_bl, phi);
1193                         }
1194                 }
1195                 set_Tuple_pred(call, pn_Call_T_result, new_Tuple(n_res, res_pred));
1196         } else {
1197                 set_Tuple_pred(call, pn_Call_T_result, new_Bad());
1198         }
1199         /* handle the regular call */
1200         set_Tuple_pred(call, pn_Call_X_regular, new_Jmp());
1201
1202         /* For now, we cannot inline calls with value_base */
1203         set_Tuple_pred(call, pn_Call_P_value_res_base, new_Bad());
1204
1205         /* Finally the exception control flow.
1206            We have two possible situations:
1207            First if the Call branches to an exception handler:
1208            We need to add a Phi node to
1209            collect the memory containing the exception objects.  Further we need
1210            to add another block to get a correct representation of this Phi.  To
1211            this block we add a Jmp that resolves into the X output of the Call
1212            when the Call is turned into a tuple.
1213            Second: There is no exception edge. Just add all inlined exception
1214            branches to the End node.
1215          */
1216         if (exc_handling == exc_handler) {
1217                 n_exc = 0;
1218                 for (i = 0; i < arity; i++) {
1219                         ir_node *ret, *irn;
1220                         ret = get_Block_cfgpred(end_bl, i);
1221                         irn = skip_Proj(ret);
1222                         if (is_fragile_op(irn) || is_Raise(irn)) {
1223                                 cf_pred[n_exc] = ret;
1224                                 ++n_exc;
1225                         }
1226                 }
1227                 if (n_exc > 0) {
1228                         if (n_exc == 1) {
1229                                 /* simple fix */
1230                                 set_Tuple_pred(call, pn_Call_X_except, cf_pred[0]);
1231                         } else {
1232                                 ir_node *block = new_Block(n_exc, cf_pred);
1233                                 set_cur_block(block);
1234                                 set_Tuple_pred(call, pn_Call_X_except, new_Jmp());
1235                         }
1236                 } else {
1237                         set_Tuple_pred(call, pn_Call_X_except, new_Bad());
1238                 }
1239         } else {
1240                 ir_node *main_end_bl;
1241                 int main_end_bl_arity;
1242                 ir_node **end_preds;
1243
1244                 /* assert(exc_handling == 1 || no exceptions. ) */
1245                 n_exc = 0;
1246                 for (i = 0; i < arity; i++) {
1247                         ir_node *ret = get_Block_cfgpred(end_bl, i);
1248                         ir_node *irn = skip_Proj(ret);
1249
1250                         if (is_fragile_op(irn) || is_Raise(irn)) {
1251                                 cf_pred[n_exc] = ret;
1252                                 n_exc++;
1253                         }
1254                 }
1255                 main_end_bl       = get_irg_end_block(irg);
1256                 main_end_bl_arity = get_irn_arity(main_end_bl);
1257                 end_preds         = XMALLOCN(ir_node*, n_exc + main_end_bl_arity);
1258
1259                 for (i = 0; i < main_end_bl_arity; ++i)
1260                         end_preds[i] = get_irn_n(main_end_bl, i);
1261                 for (i = 0; i < n_exc; ++i)
1262                         end_preds[main_end_bl_arity + i] = cf_pred[i];
1263                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
1264                 set_Tuple_pred(call, pn_Call_X_except, new_Bad());
1265                 free(end_preds);
1266         }
1267         free(res_pred);
1268         free(cf_pred);
1269
1270         /* --  Turn CSE back on. -- */
1271         set_optimize(rem_opt);
1272         current_ir_graph = rem;
1273
1274         return 1;
1275 }
1276
1277 /********************************************************************/
1278 /* Apply inlining to small methods.                                 */
1279 /********************************************************************/
1280
1281 static struct obstack  temp_obst;
1282
1283 /** Represents a possible inlinable call in a graph. */
1284 typedef struct _call_entry {
1285         ir_node    *call;       /**< The Call node. */
1286         ir_graph   *callee;     /**< The callee IR-graph. */
1287         list_head  list;        /**< List head for linking the next one. */
1288         int        loop_depth;  /**< The loop depth of this call. */
1289         int        benefice;    /**< The calculated benefice of this call. */
1290         unsigned   local_adr:1; /**< Set if this call gets an address of a local variable. */
1291         unsigned   all_const:1; /**< Set if this call has only constant parameters. */
1292 } call_entry;
1293
1294 /**
1295  * environment for inlining small irgs
1296  */
1297 typedef struct _inline_env_t {
1298         struct obstack obst;  /**< An obstack where call_entries are allocated on. */
1299         list_head      calls; /**< The call entry list. */
1300 } inline_env_t;
1301
1302 /**
1303  * Returns the irg called from a Call node. If the irg is not
1304  * known, NULL is returned.
1305  *
1306  * @param call  the call node
1307  */
1308 static ir_graph *get_call_called_irg(ir_node *call)
1309 {
1310         ir_node *addr;
1311
1312         addr = get_Call_ptr(call);
1313         if (is_Global(addr)) {
1314                 ir_entity *ent = get_Global_entity(addr);
1315                 /* we don't know which function gets finally bound to a weak symbol */
1316                 if (get_entity_linkage(ent) & IR_LINKAGE_WEAK)
1317                         return NULL;
1318
1319                 return get_entity_irg(ent);
1320         }
1321
1322         return NULL;
1323 }
1324
1325 /**
1326  * Walker: Collect all calls to known graphs inside a graph.
1327  */
1328 static void collect_calls(ir_node *call, void *env)
1329 {
1330         (void) env;
1331         if (is_Call(call)) {
1332                 ir_graph *called_irg = get_call_called_irg(call);
1333
1334                 if (called_irg != NULL) {
1335                         /* The Call node calls a locally defined method.  Remember to inline. */
1336                         inline_env_t *ienv  = env;
1337                         call_entry   *entry = OALLOC(&ienv->obst, call_entry);
1338                         entry->call       = call;
1339                         entry->callee     = called_irg;
1340                         entry->loop_depth = 0;
1341                         entry->benefice   = 0;
1342                         entry->local_adr  = 0;
1343                         entry->all_const  = 0;
1344
1345                         list_add_tail(&entry->list, &ienv->calls);
1346                 }
1347         }
1348 }
1349
1350 /**
1351  * Inlines all small methods at call sites where the called address comes
1352  * from a Const node that references the entity representing the called
1353  * method.
1354  * The size argument is a rough measure for the code size of the method:
1355  * Methods where the obstack containing the firm graph is smaller than
1356  * size are inlined.
1357  */
1358 void inline_small_irgs(ir_graph *irg, int size)
1359 {
1360         ir_graph *rem = current_ir_graph;
1361         inline_env_t env;
1362         call_entry *entry;
1363
1364         current_ir_graph = irg;
1365         /* Handle graph state */
1366         assert(get_irg_phase_state(irg) != phase_building);
1367         free_callee_info(irg);
1368
1369         /* Find Call nodes to inline.
1370            (We can not inline during a walk of the graph, as inlining the same
1371            method several times changes the visited flag of the walked graph:
1372            after the first inlining visited of the callee equals visited of
1373            the caller.  With the next inlining both are increased.) */
1374         obstack_init(&env.obst);
1375         INIT_LIST_HEAD(&env.calls);
1376         irg_walk_graph(irg, NULL, collect_calls, &env);
1377
1378         if (! list_empty(&env.calls)) {
1379                 /* There are calls to inline */
1380                 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1381                 collect_phiprojs(irg);
1382
1383                 list_for_each_entry(call_entry, entry, &env.calls, list) {
1384                         ir_graph            *callee = entry->callee;
1385                         irg_inline_property prop    = get_irg_inline_property(callee);
1386
1387                         if (prop == irg_inline_forbidden) {
1388                                 continue;
1389                         }
1390
1391                         if (prop >= irg_inline_forced ||
1392                             _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
1393                                 inline_method(entry->call, callee);
1394                         }
1395                 }
1396                 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1397         }
1398         obstack_free(&env.obst, NULL);
1399         current_ir_graph = rem;
1400 }
1401
1402 struct inline_small_irgs_pass_t {
1403         ir_graph_pass_t pass;
1404         int            size;
1405 };
1406
1407 /**
1408  * Wrapper to run inline_small_irgs() as a pass.
1409  */
1410 static int inline_small_irgs_wrapper(ir_graph *irg, void *context)
1411 {
1412         struct inline_small_irgs_pass_t *pass = context;
1413
1414         inline_small_irgs(irg, pass->size);
1415         return 0;
1416 }
1417
1418 /* create a pass for inline_small_irgs() */
1419 ir_graph_pass_t *inline_small_irgs_pass(const char *name, int size)
1420 {
1421         struct inline_small_irgs_pass_t *pass =
1422                 XMALLOCZ(struct inline_small_irgs_pass_t);
1423
1424         pass->size = size;
1425         return def_graph_pass_constructor(
1426                 &pass->pass, name ? name : "inline_small_irgs", inline_small_irgs_wrapper);
1427 }
1428
1429 /**
1430  * Environment for inlining irgs.
1431  */
1432 typedef struct {
1433         list_head calls;             /**< List of of all call nodes in this graph. */
1434         unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
1435         unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
1436         unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
1437         unsigned  n_nodes_orig;      /**< for statistics */
1438         unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
1439         unsigned  n_call_nodes_orig; /**< for statistics */
1440         unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
1441         unsigned  n_callers_orig;    /**< for statistics */
1442         unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
1443         unsigned  recursive:1;       /**< Set, if this function is self recursive. */
1444 } inline_irg_env;
1445
1446 /**
1447  * Allocate a new environment for inlining.
1448  */
1449 static inline_irg_env *alloc_inline_irg_env(void)
1450 {
1451         inline_irg_env *env    = OALLOC(&temp_obst, inline_irg_env);
1452         INIT_LIST_HEAD(&env->calls);
1453         env->local_weights     = NULL;
1454         env->n_nodes           = -2; /* do not count count Start, End */
1455         env->n_blocks          = -2; /* do not count count Start, End Block */
1456         env->n_nodes_orig      = -2; /* do not count Start, End */
1457         env->n_call_nodes      = 0;
1458         env->n_call_nodes_orig = 0;
1459         env->n_callers         = 0;
1460         env->n_callers_orig    = 0;
1461         env->got_inline        = 0;
1462         env->recursive         = 0;
1463         return env;
1464 }
1465
1466 typedef struct walker_env {
1467         inline_irg_env *x;     /**< the inline environment */
1468         char ignore_runtime;   /**< the ignore runtime flag */
1469         char ignore_callers;   /**< if set, do change callers data */
1470 } wenv_t;
1471
1472 /**
1473  * post-walker: collect all calls in the inline-environment
1474  * of a graph and sum some statistics.
1475  */
1476 static void collect_calls2(ir_node *call, void *ctx)
1477 {
1478         wenv_t         *env = ctx;
1479         inline_irg_env *x = env->x;
1480         ir_opcode      code = get_irn_opcode(call);
1481         ir_graph       *callee;
1482         call_entry     *entry;
1483
1484         /* count meaningful nodes in irg */
1485         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
1486                 if (code != iro_Block) {
1487                         ++x->n_nodes;
1488                         ++x->n_nodes_orig;
1489                 } else {
1490                         ++x->n_blocks;
1491                 }
1492         }
1493
1494         if (code != iro_Call) return;
1495
1496         /* check, if it's a runtime call */
1497         if (env->ignore_runtime) {
1498                 ir_node *symc = get_Call_ptr(call);
1499
1500                 if (is_Global(symc)) {
1501                         ir_entity *ent = get_Global_entity(symc);
1502
1503                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
1504                                 return;
1505                 }
1506         }
1507
1508         /* collect all call nodes */
1509         ++x->n_call_nodes;
1510         ++x->n_call_nodes_orig;
1511
1512         callee = get_call_called_irg(call);
1513         if (callee != NULL) {
1514                 if (! env->ignore_callers) {
1515                         inline_irg_env *callee_env = get_irg_link(callee);
1516                         /* count all static callers */
1517                         ++callee_env->n_callers;
1518                         ++callee_env->n_callers_orig;
1519                 }
1520                 if (callee == current_ir_graph)
1521                         x->recursive = 1;
1522
1523                 /* link it in the list of possible inlinable entries */
1524                 entry = OALLOC(&temp_obst, call_entry);
1525                 entry->call       = call;
1526                 entry->callee     = callee;
1527                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
1528                 entry->benefice   = 0;
1529                 entry->local_adr  = 0;
1530                 entry->all_const  = 0;
1531
1532                 list_add_tail(&entry->list, &x->calls);
1533         }
1534 }
1535
1536 /**
1537  * Returns TRUE if the number of callers is 0 in the irg's environment,
1538  * hence this irg is a leave.
1539  */
1540 inline static int is_leave(ir_graph *irg)
1541 {
1542         inline_irg_env *env = get_irg_link(irg);
1543         return env->n_call_nodes == 0;
1544 }
1545
1546 /**
1547  * Returns TRUE if the number of nodes in the callee is
1548  * smaller then size in the irg's environment.
1549  */
1550 inline static int is_smaller(ir_graph *callee, unsigned size)
1551 {
1552         inline_irg_env *env = get_irg_link(callee);
1553         return env->n_nodes < size;
1554 }
1555
1556 /**
1557  * Duplicate a call entry.
1558  *
1559  * @param entry     the original entry to duplicate
1560  * @param new_call  the new call node
1561  * @param loop_depth_delta
1562  *                  delta value for the loop depth
1563  */
1564 static call_entry *duplicate_call_entry(const call_entry *entry,
1565                                         ir_node *new_call, int loop_depth_delta) {
1566         call_entry *nentry = OALLOC(&temp_obst, call_entry);
1567         nentry->call       = new_call;
1568         nentry->callee     = entry->callee;
1569         nentry->benefice   = entry->benefice;
1570         nentry->loop_depth = entry->loop_depth + loop_depth_delta;
1571         nentry->local_adr  = entry->local_adr;
1572         nentry->all_const  = entry->all_const;
1573
1574         return nentry;
1575 }
1576
1577 /**
1578  * Append all call nodes of the source environment to the nodes of in the destination
1579  * environment.
1580  *
1581  * @param dst         destination environment
1582  * @param src         source environment
1583  * @param loop_depth  the loop depth of the call that is replaced by the src list
1584  */
1585 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth)
1586 {
1587         call_entry *entry, *nentry;
1588
1589         /* Note that the src list points to Call nodes in the inlined graph, but
1590            we need Call nodes in our graph. Luckily the inliner leaves this information
1591            in the link field. */
1592         list_for_each_entry(call_entry, entry, &src->calls, list) {
1593                 nentry = duplicate_call_entry(entry, get_irn_link(entry->call), loop_depth);
1594                 list_add_tail(&nentry->list, &dst->calls);
1595         }
1596         dst->n_call_nodes += src->n_call_nodes;
1597         dst->n_nodes      += src->n_nodes;
1598 }
1599
1600 /*
1601  * Inlines small leave methods at call sites where the called address comes
1602  * from a Const node that references the entity representing the called
1603  * method.
1604  * The size argument is a rough measure for the code size of the method:
1605  * Methods where the obstack containing the firm graph is smaller than
1606  * size are inlined.
1607  */
1608 void inline_leave_functions(unsigned maxsize, unsigned leavesize,
1609                             unsigned size, int ignore_runtime)
1610 {
1611         inline_irg_env   *env;
1612         ir_graph         *irg;
1613         int              i, n_irgs;
1614         ir_graph         *rem;
1615         int              did_inline;
1616         wenv_t           wenv;
1617         call_entry       *entry, *next;
1618         const call_entry *centry;
1619         pmap             *copied_graphs;
1620         pmap_entry       *pm_entry;
1621
1622         rem = current_ir_graph;
1623         obstack_init(&temp_obst);
1624
1625         /* a map for the copied graphs, used to inline recursive calls */
1626         copied_graphs = pmap_create();
1627
1628         /* extend all irgs by a temporary data structure for inlining. */
1629         n_irgs = get_irp_n_irgs();
1630         for (i = 0; i < n_irgs; ++i)
1631                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
1632
1633         /* Pre-compute information in temporary data structure. */
1634         wenv.ignore_runtime = ignore_runtime;
1635         wenv.ignore_callers = 0;
1636         for (i = 0; i < n_irgs; ++i) {
1637                 ir_graph *irg = get_irp_irg(i);
1638
1639                 assert(get_irg_phase_state(irg) != phase_building);
1640                 free_callee_info(irg);
1641
1642                 assure_cf_loop(irg);
1643                 wenv.x = get_irg_link(irg);
1644                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1645         }
1646
1647         /* -- and now inline. -- */
1648
1649         /* Inline leaves recursively -- we might construct new leaves. */
1650         do {
1651                 did_inline = 0;
1652
1653                 for (i = 0; i < n_irgs; ++i) {
1654                         ir_node *call;
1655                         int phiproj_computed = 0;
1656
1657                         current_ir_graph = get_irp_irg(i);
1658                         env              = get_irg_link(current_ir_graph);
1659
1660                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1661                         list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1662                                 ir_graph            *callee;
1663                                 irg_inline_property  prop;
1664
1665                                 if (env->n_nodes > maxsize)
1666                                         break;
1667
1668                                 call   = entry->call;
1669                                 callee = entry->callee;
1670
1671                                 prop = get_irg_inline_property(callee);
1672                                 if (prop == irg_inline_forbidden) {
1673                                         continue;
1674                                 }
1675
1676                                 if (is_leave(callee) && (
1677                                     is_smaller(callee, leavesize) || prop >= irg_inline_forced)) {
1678                                         if (!phiproj_computed) {
1679                                                 phiproj_computed = 1;
1680                                                 collect_phiprojs(current_ir_graph);
1681                                         }
1682                                         did_inline = inline_method(call, callee);
1683
1684                                         if (did_inline) {
1685                                                 inline_irg_env *callee_env = get_irg_link(callee);
1686
1687                                                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1688                                                 phiproj_computed = 0;
1689
1690                                                 /* Do some statistics */
1691                                                 env->got_inline = 1;
1692                                                 --env->n_call_nodes;
1693                                                 env->n_nodes += callee_env->n_nodes;
1694                                                 --callee_env->n_callers;
1695
1696                                                 /* remove this call from the list */
1697                                                 list_del(&entry->list);
1698                                                 continue;
1699                                         }
1700                                 }
1701                         }
1702                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1703                 }
1704         } while (did_inline);
1705
1706         /* inline other small functions. */
1707         for (i = 0; i < n_irgs; ++i) {
1708                 ir_node *call;
1709                 int phiproj_computed = 0;
1710
1711                 current_ir_graph = get_irp_irg(i);
1712                 env              = get_irg_link(current_ir_graph);
1713
1714                 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1715
1716                 /* note that the list of possible calls is updated during the process */
1717                 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1718                         irg_inline_property prop;
1719                         ir_graph            *callee;
1720                         pmap_entry          *e;
1721
1722                         call   = entry->call;
1723                         callee = entry->callee;
1724
1725                         prop = get_irg_inline_property(callee);
1726                         if (prop == irg_inline_forbidden) {
1727                                 continue;
1728                         }
1729
1730                         e = pmap_find(copied_graphs, callee);
1731                         if (e != NULL) {
1732                                 /*
1733                                  * Remap callee if we have a copy.
1734                                  * FIXME: Should we do this only for recursive Calls ?
1735                                  */
1736                                 callee = e->value;
1737                         }
1738
1739                         if (prop >= irg_inline_forced ||
1740                             (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1741                                 if (current_ir_graph == callee) {
1742                                         /*
1743                                          * Recursive call: we cannot directly inline because we cannot walk
1744                                          * the graph and change it. So we have to make a copy of the graph
1745                                          * first.
1746                                          */
1747
1748                                         inline_irg_env *callee_env;
1749                                         ir_graph       *copy;
1750
1751                                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1752
1753                                         /*
1754                                          * No copy yet, create one.
1755                                          * Note that recursive methods are never leaves, so it is sufficient
1756                                          * to test this condition here.
1757                                          */
1758                                         copy = create_irg_copy(callee);
1759
1760                                         /* create_irg_copy() destroys the Proj links, recompute them */
1761                                         phiproj_computed = 0;
1762
1763                                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1764
1765                                         /* allocate new environment */
1766                                         callee_env = alloc_inline_irg_env();
1767                                         set_irg_link(copy, callee_env);
1768
1769                                         assure_cf_loop(copy);
1770                                         wenv.x              = callee_env;
1771                                         wenv.ignore_callers = 1;
1772                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1773
1774                                         /*
1775                                          * Enter the entity of the original graph. This is needed
1776                                          * for inline_method(). However, note that ent->irg still points
1777                                          * to callee, NOT to copy.
1778                                          */
1779                                         set_irg_entity(copy, get_irg_entity(callee));
1780
1781                                         pmap_insert(copied_graphs, callee, copy);
1782                                         callee = copy;
1783
1784                                         /* we have only one caller: the original graph */
1785                                         callee_env->n_callers      = 1;
1786                                         callee_env->n_callers_orig = 1;
1787                                 }
1788                                 if (! phiproj_computed) {
1789                                         phiproj_computed = 1;
1790                                         collect_phiprojs(current_ir_graph);
1791                                 }
1792                                 did_inline = inline_method(call, callee);
1793                                 if (did_inline) {
1794                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1795
1796                                         /* call was inlined, Phi/Projs for current graph must be recomputed */
1797                                         phiproj_computed = 0;
1798
1799                                         /* callee was inline. Append it's call list. */
1800                                         env->got_inline = 1;
1801                                         --env->n_call_nodes;
1802                                         append_call_list(env, callee_env, entry->loop_depth);
1803                                         --callee_env->n_callers;
1804
1805                                         /* after we have inlined callee, all called methods inside callee
1806                                            are now called once more */
1807                                         list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1808                                                 inline_irg_env *penv = get_irg_link(centry->callee);
1809                                                 ++penv->n_callers;
1810                                         }
1811
1812                                         /* remove this call from the list */
1813                                         list_del(&entry->list);
1814                                         continue;
1815                                 }
1816                         }
1817                 }
1818                 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1819         }
1820
1821         for (i = 0; i < n_irgs; ++i) {
1822                 irg = get_irp_irg(i);
1823                 env = get_irg_link(irg);
1824
1825                 if (env->got_inline) {
1826                         optimize_graph_df(irg);
1827                         optimize_cf(irg);
1828                 }
1829                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1830                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1831                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1832                         env->n_callers_orig, env->n_callers,
1833                         get_entity_name(get_irg_entity(irg))));
1834                 }
1835         }
1836
1837         /* kill the copied graphs: we don't need them anymore */
1838         foreach_pmap(copied_graphs, pm_entry) {
1839                 ir_graph *copy = pm_entry->value;
1840
1841                 /* reset the entity, otherwise it will be deleted in the next step ... */
1842                 set_irg_entity(copy, NULL);
1843                 free_ir_graph(copy);
1844         }
1845         pmap_destroy(copied_graphs);
1846
1847         obstack_free(&temp_obst, NULL);
1848         current_ir_graph = rem;
1849 }
1850
1851 struct inline_leave_functions_pass_t {
1852         ir_prog_pass_t pass;
1853         unsigned       maxsize;
1854         unsigned       leavesize;
1855         unsigned       size;
1856         int            ignore_runtime;
1857 };
1858
1859 /**
1860  * Wrapper to run inline_leave_functions() as a ir_prog pass.
1861  */
1862 static int inline_leave_functions_wrapper(ir_prog *irp, void *context)
1863 {
1864         struct inline_leave_functions_pass_t *pass = context;
1865
1866         (void)irp;
1867         inline_leave_functions(
1868                 pass->maxsize, pass->leavesize,
1869                 pass->size, pass->ignore_runtime);
1870         return 0;
1871 }
1872
1873 /* create a pass for inline_leave_functions() */
1874 ir_prog_pass_t *inline_leave_functions_pass(
1875         const char *name, unsigned maxsize, unsigned leavesize,
1876         unsigned size, int ignore_runtime) {
1877         struct inline_leave_functions_pass_t *pass =
1878                 XMALLOCZ(struct inline_leave_functions_pass_t);
1879
1880         pass->maxsize        = maxsize;
1881         pass->leavesize      = leavesize;
1882         pass->size           = size;
1883         pass->ignore_runtime = ignore_runtime;
1884
1885         return def_prog_pass_constructor(
1886                 &pass->pass,
1887                 name ? name : "inline_leave_functions",
1888                 inline_leave_functions_wrapper);
1889 }
1890
1891 /**
1892  * Calculate the parameter weights for transmitting the address of a local variable.
1893  */
1894 static unsigned calc_method_local_weight(ir_node *arg)
1895 {
1896         int      i, j, k;
1897         unsigned v, weight = 0;
1898
1899         for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
1900                 ir_node *succ = get_irn_out(arg, i);
1901
1902                 switch (get_irn_opcode(succ)) {
1903                 case iro_Load:
1904                 case iro_Store:
1905                         /* Loads and Store can be removed */
1906                         weight += 3;
1907                         break;
1908                 case iro_Sel:
1909                         /* check if all args are constant */
1910                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1911                                 ir_node *idx = get_Sel_index(succ, j);
1912                                 if (! is_Const(idx))
1913                                         return 0;
1914                         }
1915                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1916                            some unsupported node. */
1917                         v = calc_method_local_weight(succ);
1918                         if (v == 0)
1919                                 return 0;
1920                         /* we can kill one Sel with constant indexes, this is cheap */
1921                         weight += v + 1;
1922                         break;
1923                 case iro_Id:
1924                         /* when looking backward we might find Id nodes */
1925                         weight += calc_method_local_weight(succ);
1926                         break;
1927                 case iro_Tuple:
1928                         /* unoptimized tuple */
1929                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1930                                 ir_node *pred = get_Tuple_pred(succ, j);
1931                                 if (pred == arg) {
1932                                         /* look for Proj(j) */
1933                                         for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
1934                                                 ir_node *succ_succ = get_irn_out(succ, k);
1935                                                 if (is_Proj(succ_succ)) {
1936                                                         if (get_Proj_proj(succ_succ) == j) {
1937                                                                 /* found */
1938                                                                 weight += calc_method_local_weight(succ_succ);
1939                                                         }
1940                                                 } else {
1941                                                         /* this should NOT happen */
1942                                                         return 0;
1943                                                 }
1944                                         }
1945                                 }
1946                         }
1947                         break;
1948                 default:
1949                         /* any other node: unsupported yet or bad. */
1950                         return 0;
1951                 }
1952         }
1953         return weight;
1954 }
1955
1956 /**
1957  * Calculate the parameter weights for transmitting the address of a local variable.
1958  */
1959 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg)
1960 {
1961         ir_entity *ent = get_irg_entity(irg);
1962         ir_type  *mtp;
1963         int      nparams, i, proj_nr;
1964         ir_node  *irg_args, *arg;
1965
1966         mtp      = get_entity_type(ent);
1967         nparams  = get_method_n_params(mtp);
1968
1969         /* allocate a new array. currently used as 'analysed' flag */
1970         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1971
1972         /* If the method haven't parameters we have nothing to do. */
1973         if (nparams <= 0)
1974                 return;
1975
1976         assure_irg_outs(irg);
1977         irg_args = get_irg_args(irg);
1978         for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
1979                 arg     = get_irn_out(irg_args, i);
1980                 proj_nr = get_Proj_proj(arg);
1981                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1982         }
1983 }
1984
1985 /**
1986  * Calculate the benefice for transmitting an local variable address.
1987  * After inlining, the local variable might be transformed into a
1988  * SSA variable by scalar_replacement().
1989  */
1990 static unsigned get_method_local_adress_weight(ir_graph *callee, int pos)
1991 {
1992         inline_irg_env *env = get_irg_link(callee);
1993
1994         if (env->local_weights != NULL) {
1995                 if (pos < ARR_LEN(env->local_weights))
1996                         return env->local_weights[pos];
1997                 return 0;
1998         }
1999
2000         analyze_irg_local_weights(env, callee);
2001
2002         if (pos < ARR_LEN(env->local_weights))
2003                 return env->local_weights[pos];
2004         return 0;
2005 }
2006
2007 /**
2008  * Calculate a benefice value for inlining the given call.
2009  *
2010  * @param call       the call node we have to inspect
2011  * @param callee     the called graph
2012  */
2013 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
2014 {
2015         ir_node   *call = entry->call;
2016         ir_entity *ent  = get_irg_entity(callee);
2017         ir_node   *frame_ptr;
2018         ir_type   *mtp;
2019         int       weight = 0;
2020         int       i, n_params, all_const;
2021         unsigned  cc, v;
2022         irg_inline_property prop;
2023
2024         inline_irg_env *callee_env;
2025
2026         prop = get_irg_inline_property(callee);
2027         if (prop == irg_inline_forbidden) {
2028                 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
2029                     call, callee));
2030                 return entry->benefice = INT_MIN;
2031         }
2032
2033         if (get_irg_additional_properties(callee) & mtp_property_noreturn) {
2034                 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
2035                     call, callee));
2036                 return entry->benefice = INT_MIN;
2037         }
2038
2039         /* costs for every passed parameter */
2040         n_params = get_Call_n_params(call);
2041         mtp      = get_entity_type(ent);
2042         cc       = get_method_calling_convention(mtp);
2043         if (cc & cc_reg_param) {
2044                 /* register parameter, smaller costs for register parameters */
2045                 int max_regs = cc & ~cc_bits;
2046
2047                 if (max_regs < n_params)
2048                         weight += max_regs * 2 + (n_params - max_regs) * 5;
2049                 else
2050                         weight += n_params * 2;
2051         } else {
2052                 /* parameters are passed an stack */
2053                 weight += 5 * n_params;
2054         }
2055
2056         /* constant parameters improve the benefice */
2057         frame_ptr = get_irg_frame(current_ir_graph);
2058         all_const = 1;
2059         for (i = 0; i < n_params; ++i) {
2060                 ir_node *param = get_Call_param(call, i);
2061
2062                 if (is_Const(param)) {
2063                         weight += get_method_param_weight(ent, i);
2064                 } else {
2065                         all_const = 0;
2066                         if (is_SymConst(param))
2067                                 weight += get_method_param_weight(ent, i);
2068                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
2069                                 /*
2070                                  * An address of a local variable is transmitted. After
2071                                  * inlining, scalar_replacement might be able to remove the
2072                                  * local variable, so honor this.
2073                                  */
2074                                 v = get_method_local_adress_weight(callee, i);
2075                                 weight += v;
2076                                 if (v > 0)
2077                                         entry->local_adr = 1;
2078                         }
2079                 }
2080         }
2081         entry->all_const = all_const;
2082
2083         callee_env = get_irg_link(callee);
2084         if (callee_env->n_callers == 1 &&
2085             callee != current_ir_graph &&
2086             !entity_is_externally_visible(ent)) {
2087                 weight += 700;
2088         }
2089
2090         /* give a bonus for functions with one block */
2091         if (callee_env->n_blocks == 1)
2092                 weight = weight * 3 / 2;
2093
2094         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
2095         if (callee_env->n_nodes < 30 && !callee_env->recursive)
2096                 weight += 2000;
2097
2098         /* and finally for leaves: they do not increase the register pressure
2099            because of callee safe registers */
2100         if (callee_env->n_call_nodes == 0)
2101                 weight += 400;
2102
2103         /** it's important to inline inner loops first */
2104         if (entry->loop_depth > 30)
2105                 weight += 30 * 1024;
2106         else
2107                 weight += entry->loop_depth * 1024;
2108
2109         /*
2110          * All arguments constant is probably a good sign, give an extra bonus
2111          */
2112         if (all_const)
2113                 weight += 1024;
2114
2115         return entry->benefice = weight;
2116 }
2117
2118 static ir_graph **irgs;
2119 static int      last_irg;
2120
2121 /**
2122  * Callgraph walker, collect all visited graphs.
2123  */
2124 static void callgraph_walker(ir_graph *irg, void *data)
2125 {
2126         (void) data;
2127         irgs[last_irg++] = irg;
2128 }
2129
2130 /**
2131  * Creates an inline order for all graphs.
2132  *
2133  * @return the list of graphs.
2134  */
2135 static ir_graph **create_irg_list(void)
2136 {
2137         ir_entity **free_methods;
2138         int       arr_len;
2139         int       n_irgs = get_irp_n_irgs();
2140
2141         cgana(&arr_len, &free_methods);
2142         xfree(free_methods);
2143
2144         compute_callgraph();
2145
2146         last_irg = 0;
2147         irgs     = XMALLOCNZ(ir_graph*, n_irgs);
2148
2149         callgraph_walk(NULL, callgraph_walker, NULL);
2150         assert(n_irgs == last_irg);
2151
2152         return irgs;
2153 }
2154
2155 /**
2156  * Push a call onto the priority list if its benefice is big enough.
2157  *
2158  * @param pqueue   the priority queue of calls
2159  * @param call     the call entry
2160  * @param inlien_threshold
2161  *                 the threshold value
2162  */
2163 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
2164                             int inline_threshold)
2165 {
2166         ir_graph            *callee  = call->callee;
2167         irg_inline_property prop     = get_irg_inline_property(callee);
2168         int                 benefice = calc_inline_benefice(call, callee);
2169
2170         DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
2171             get_irn_irg(call->call), call->call, callee, benefice));
2172
2173         if (prop < irg_inline_forced && benefice < inline_threshold) {
2174                 return;
2175         }
2176
2177         pqueue_put(pqueue, call, benefice);
2178 }
2179
2180 /**
2181  * Try to inline calls into a graph.
2182  *
2183  * @param irg      the graph into which we inline
2184  * @param maxsize  do NOT inline if the size of irg gets
2185  *                 bigger than this amount
2186  * @param inline_threshold
2187  *                 threshold value for inline decision
2188  * @param copied_graphs
2189  *                 map containing copied of recursive graphs
2190  */
2191 static void inline_into(ir_graph *irg, unsigned maxsize,
2192                         int inline_threshold, pmap *copied_graphs)
2193 {
2194         int            phiproj_computed = 0;
2195         inline_irg_env *env = get_irg_link(irg);
2196         call_entry     *curr_call;
2197         wenv_t         wenv;
2198         pqueue_t       *pqueue;
2199
2200         if (env->n_call_nodes == 0)
2201                 return;
2202
2203         if (env->n_nodes > maxsize) {
2204                 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
2205                 return;
2206         }
2207
2208         current_ir_graph = irg;
2209         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2210
2211         /* put irgs into the pqueue */
2212         pqueue = new_pqueue();
2213
2214         list_for_each_entry(call_entry, curr_call, &env->calls, list) {
2215                 assert(is_Call(curr_call->call));
2216                 maybe_push_call(pqueue, curr_call, inline_threshold);
2217         }
2218
2219         /* note that the list of possible calls is updated during the process */
2220         while (!pqueue_empty(pqueue)) {
2221                 int                 did_inline;
2222                 call_entry          *curr_call  = pqueue_pop_front(pqueue);
2223                 ir_graph            *callee     = curr_call->callee;
2224                 ir_node             *call_node  = curr_call->call;
2225                 inline_irg_env      *callee_env = get_irg_link(callee);
2226                 irg_inline_property prop        = get_irg_inline_property(callee);
2227                 int                 loop_depth;
2228                 const call_entry    *centry;
2229                 pmap_entry          *e;
2230
2231                 if ((prop < irg_inline_forced) && env->n_nodes + callee_env->n_nodes > maxsize) {
2232                         DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
2233                                                 env->n_nodes, callee, callee_env->n_nodes));
2234                         continue;
2235                 }
2236
2237                 e = pmap_find(copied_graphs, callee);
2238                 if (e != NULL) {
2239                         int benefice = curr_call->benefice;
2240                         /*
2241                          * Reduce the weight for recursive function IFF not all arguments are const.
2242                          * inlining recursive functions is rarely good.
2243                          */
2244                         if (!curr_call->all_const)
2245                                 benefice -= 2000;
2246                         if (benefice < inline_threshold)
2247                                 continue;
2248
2249                         /*
2250                          * Remap callee if we have a copy.
2251                          */
2252                         callee     = e->value;
2253                         callee_env = get_irg_link(callee);
2254                 }
2255
2256                 if (current_ir_graph == callee) {
2257                         /*
2258                          * Recursive call: we cannot directly inline because we cannot
2259                          * walk the graph and change it. So we have to make a copy of
2260                          * the graph first.
2261                          */
2262                         int benefice = curr_call->benefice;
2263                         ir_graph *copy;
2264
2265                         /*
2266                          * Reduce the weight for recursive function IFF not all arguments are const.
2267                          * inlining recursive functions is rarely good.
2268                          */
2269                         if (!curr_call->all_const)
2270                                 benefice -= 2000;
2271                         if (benefice < inline_threshold)
2272                                 continue;
2273
2274                         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2275
2276                         /*
2277                          * No copy yet, create one.
2278                          * Note that recursive methods are never leaves, so it is
2279                          * sufficient to test this condition here.
2280                          */
2281                         copy = create_irg_copy(callee);
2282
2283                         /* create_irg_copy() destroys the Proj links, recompute them */
2284                         phiproj_computed = 0;
2285
2286                         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2287
2288                         /* allocate a new environment */
2289                         callee_env = alloc_inline_irg_env();
2290                         set_irg_link(copy, callee_env);
2291
2292                         assure_cf_loop(copy);
2293                         wenv.x              = callee_env;
2294                         wenv.ignore_callers = 1;
2295                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
2296
2297                         /*
2298                          * Enter the entity of the original graph. This is needed
2299                          * for inline_method(). However, note that ent->irg still points
2300                          * to callee, NOT to copy.
2301                          */
2302                         set_irg_entity(copy, get_irg_entity(callee));
2303
2304                         pmap_insert(copied_graphs, callee, copy);
2305                         callee = copy;
2306
2307                         /* we have only one caller: the original graph */
2308                         callee_env->n_callers      = 1;
2309                         callee_env->n_callers_orig = 1;
2310                 }
2311                 if (! phiproj_computed) {
2312                         phiproj_computed = 1;
2313                         collect_phiprojs(current_ir_graph);
2314                 }
2315                 did_inline = inline_method(call_node, callee);
2316                 if (!did_inline)
2317                         continue;
2318
2319                 /* call was inlined, Phi/Projs for current graph must be recomputed */
2320                 phiproj_computed = 0;
2321
2322                 /* remove it from the caller list */
2323                 list_del(&curr_call->list);
2324
2325                 /* callee was inline. Append it's call list. */
2326                 env->got_inline = 1;
2327                 --env->n_call_nodes;
2328
2329                 /* we just generate a bunch of new calls */
2330                 loop_depth = curr_call->loop_depth;
2331                 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
2332                         inline_irg_env *penv = get_irg_link(centry->callee);
2333                         ir_node        *new_call;
2334                         call_entry     *new_entry;
2335
2336                         /* after we have inlined callee, all called methods inside
2337                          * callee are now called once more */
2338                         ++penv->n_callers;
2339
2340                         /* Note that the src list points to Call nodes in the inlined graph,
2341                          * but we need Call nodes in our graph. Luckily the inliner leaves
2342                          * this information in the link field. */
2343                         new_call = get_irn_link(centry->call);
2344                         assert(is_Call(new_call));
2345
2346                         new_entry = duplicate_call_entry(centry, new_call, loop_depth);
2347                         list_add_tail(&new_entry->list, &env->calls);
2348                         maybe_push_call(pqueue, new_entry, inline_threshold);
2349                 }
2350
2351                 env->n_call_nodes += callee_env->n_call_nodes;
2352                 env->n_nodes += callee_env->n_nodes;
2353                 --callee_env->n_callers;
2354         }
2355         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2356         del_pqueue(pqueue);
2357 }
2358
2359 /*
2360  * Heuristic inliner. Calculates a benefice value for every call and inlines
2361  * those calls with a value higher than the threshold.
2362  */
2363 void inline_functions(unsigned maxsize, int inline_threshold,
2364                       opt_ptr after_inline_opt)
2365 {
2366         inline_irg_env   *env;
2367         int              i, n_irgs;
2368         ir_graph         *rem;
2369         wenv_t           wenv;
2370         pmap             *copied_graphs;
2371         pmap_entry       *pm_entry;
2372         ir_graph         **irgs;
2373
2374         rem = current_ir_graph;
2375         obstack_init(&temp_obst);
2376
2377         irgs = create_irg_list();
2378
2379         /* a map for the copied graphs, used to inline recursive calls */
2380         copied_graphs = pmap_create();
2381
2382         /* extend all irgs by a temporary data structure for inlining. */
2383         n_irgs = get_irp_n_irgs();
2384         for (i = 0; i < n_irgs; ++i)
2385                 set_irg_link(irgs[i], alloc_inline_irg_env());
2386
2387         /* Pre-compute information in temporary data structure. */
2388         wenv.ignore_runtime = 0;
2389         wenv.ignore_callers = 0;
2390         for (i = 0; i < n_irgs; ++i) {
2391                 ir_graph *irg = irgs[i];
2392
2393                 free_callee_info(irg);
2394
2395                 wenv.x = get_irg_link(irg);
2396                 assure_cf_loop(irg);
2397                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
2398         }
2399
2400         /* -- and now inline. -- */
2401         for (i = 0; i < n_irgs; ++i) {
2402                 ir_graph *irg = irgs[i];
2403
2404                 inline_into(irg, maxsize, inline_threshold, copied_graphs);
2405         }
2406
2407         for (i = 0; i < n_irgs; ++i) {
2408                 ir_graph *irg = irgs[i];
2409
2410                 env = get_irg_link(irg);
2411                 if (env->got_inline && after_inline_opt != NULL) {
2412                         /* this irg got calls inlined: optimize it */
2413                         after_inline_opt(irg);
2414                 }
2415                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
2416                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
2417                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
2418                         env->n_callers_orig, env->n_callers,
2419                         get_entity_name(get_irg_entity(irg))));
2420                 }
2421         }
2422
2423         /* kill the copied graphs: we don't need them anymore */
2424         foreach_pmap(copied_graphs, pm_entry) {
2425                 ir_graph *copy = pm_entry->value;
2426
2427                 /* reset the entity, otherwise it will be deleted in the next step ... */
2428                 set_irg_entity(copy, NULL);
2429                 free_ir_graph(copy);
2430         }
2431         pmap_destroy(copied_graphs);
2432
2433         xfree(irgs);
2434
2435         obstack_free(&temp_obst, NULL);
2436         current_ir_graph = rem;
2437 }
2438
2439 struct inline_functions_pass_t {
2440         ir_prog_pass_t pass;
2441         unsigned       maxsize;
2442         int            inline_threshold;
2443         opt_ptr        after_inline_opt;
2444 };
2445
2446 /**
2447  * Wrapper to run inline_functions() as a ir_prog pass.
2448  */
2449 static int inline_functions_wrapper(ir_prog *irp, void *context)
2450 {
2451         struct inline_functions_pass_t *pass = context;
2452
2453         (void)irp;
2454         inline_functions(pass->maxsize, pass->inline_threshold,
2455                          pass->after_inline_opt);
2456         return 0;
2457 }
2458
2459 /* create a ir_prog pass for inline_functions */
2460 ir_prog_pass_t *inline_functions_pass(
2461           const char *name, unsigned maxsize, int inline_threshold,
2462           opt_ptr after_inline_opt) {
2463         struct inline_functions_pass_t *pass =
2464                 XMALLOCZ(struct inline_functions_pass_t);
2465
2466         pass->maxsize          = maxsize;
2467         pass->inline_threshold = inline_threshold;
2468         pass->after_inline_opt = after_inline_opt;
2469
2470         return def_prog_pass_constructor(
2471                 &pass->pass, name ? name : "inline_functions",
2472                 inline_functions_wrapper);
2473 }
2474
2475 void firm_init_inline(void)
2476 {
2477         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
2478 }