86751d1603f88e2a060c39c5603ac0d1002910e7
[libfirm] / ir / opt / opt_inline.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief    Dead node elimination and Procedure Inlining.
23  * @author   Michael Beck, Goetz Lindenmaier
24  * @version  $Id$
25  */
26 #include "config.h"
27
28 #include <limits.h>
29 #include <assert.h>
30
31 #include "irnode_t.h"
32 #include "irgraph_t.h"
33 #include "irprog_t.h"
34
35 #include "iroptimize.h"
36 #include "ircons_t.h"
37 #include "iropt_t.h"
38 #include "irgopt.h"
39 #include "irgmod.h"
40 #include "irgwalk.h"
41
42 #include "array_t.h"
43 #include "list.h"
44 #include "pset.h"
45 #include "pmap.h"
46 #include "pdeq.h"
47 #include "xmalloc.h"
48 #include "pqueue.h"
49
50 #include "irouts.h"
51 #include "irloop_t.h"
52 #include "irbackedge_t.h"
53 #include "opt_init.h"
54 #include "cgana.h"
55 #include "trouts.h"
56 #include "error.h"
57
58 #include "analyze_irg_args.h"
59 #include "iredges_t.h"
60 #include "irflag_t.h"
61 #include "irhooks.h"
62 #include "irtools.h"
63 #include "iropt_dbg.h"
64 #include "irpass_t.h"
65
66 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
67
68 /*------------------------------------------------------------------*/
69 /* Routines for dead node elimination / copying garbage collection  */
70 /* of the obstack.                                                  */
71 /*------------------------------------------------------------------*/
72
73 /**
74  * Remember the new node in the old node by using a field all nodes have.
75  */
76 #define set_new_node(oldn, newn)  set_irn_link(oldn, newn)
77
78 /**
79  * Get this new node, before the old node is forgotten.
80  */
81 #define get_new_node(oldn) get_irn_link(oldn)
82
83 /**
84  * Check if a new node was set.
85  */
86 #define has_new_node(n) (get_new_node(n) != NULL)
87
88 /**
89  * We use the block_visited flag to mark that we have computed the
90  * number of useful predecessors for this block.
91  * Further we encode the new arity in this flag in the old blocks.
92  * Remembering the arity is useful, as it saves a lot of pointer
93  * accesses.  This function is called for all Phi and Block nodes
94  * in a Block.
95  */
96 static inline int
97 compute_new_arity(ir_node *b) {
98         int i, res, irn_arity;
99         int irg_v, block_v;
100
101         irg_v = get_irg_block_visited(current_ir_graph);
102         block_v = get_Block_block_visited(b);
103         if (block_v >= irg_v) {
104                 /* we computed the number of preds for this block and saved it in the
105                    block_v flag */
106                 return block_v - irg_v;
107         } else {
108                 /* compute the number of good predecessors */
109                 res = irn_arity = get_irn_arity(b);
110                 for (i = 0; i < irn_arity; i++)
111                         if (is_Bad(get_irn_n(b, i))) res--;
112                         /* save it in the flag. */
113                         set_Block_block_visited(b, irg_v + res);
114                         return res;
115         }
116 }
117
118 /**
119  * Copies the node to the new obstack. The Ins of the new node point to
120  * the predecessors on the old obstack.  For block/phi nodes not all
121  * predecessors might be copied.  n->link points to the new node.
122  * For Phi and Block nodes the function allocates in-arrays with an arity
123  * only for useful predecessors.  The arity is determined by counting
124  * the non-bad predecessors of the block.
125  *
126  * @param n    The node to be copied
127  * @param env  if non-NULL, the node number attribute will be copied to the new node
128  *
129  * Note: Also used for loop unrolling.
130  */
131 static void copy_node(ir_node *n, void *env) {
132         ir_node *nn, *block;
133         int new_arity;
134         ir_op *op = get_irn_op(n);
135         (void) env;
136
137         if (op == op_Bad) {
138                 /* node copied already */
139                 return;
140         } else if (op == op_Block) {
141                 block = NULL;
142                 new_arity = compute_new_arity(n);
143                 n->attr.block.graph_arr = NULL;
144         } else {
145                 block = get_nodes_block(n);
146                 if (op == op_Phi) {
147                         new_arity = compute_new_arity(block);
148                 } else {
149                         new_arity = get_irn_arity(n);
150                 }
151         }
152         nn = new_ir_node(get_irn_dbg_info(n),
153                 current_ir_graph,
154                 block,
155                 op,
156                 get_irn_mode(n),
157                 new_arity,
158                 get_irn_in(n) + 1);
159         /* Copy the attributes.  These might point to additional data.  If this
160            was allocated on the old obstack the pointers now are dangling.  This
161            frees e.g. the memory of the graph_arr allocated in new_immBlock. */
162         if (op == op_Block) {
163                 /* we cannot allow blocks WITHOUT macroblock input */
164                 set_Block_MacroBlock(nn, get_Block_MacroBlock(n));
165         }
166         copy_node_attr(n, nn);
167
168         if (env != NULL) {
169                 /* for easier debugging, we want to copy the node numbers too */
170                 nn->node_nr = n->node_nr;
171         }
172
173         set_new_node(n, nn);
174         hook_dead_node_elim_subst(current_ir_graph, n, nn);
175 }
176
177 /**
178  * Copies new predecessors of old node to new node remembered in link.
179  * Spare the Bad predecessors of Phi and Block nodes.
180  */
181 static void copy_preds(ir_node *n, void *env) {
182         ir_node *nn, *block;
183         int i, j, irn_arity;
184         (void) env;
185
186         nn = get_new_node(n);
187
188         if (is_Block(n)) {
189                 /* copy the macro block header */
190                 ir_node *mbh = get_Block_MacroBlock(n);
191
192                 if (mbh == n) {
193                         /* this block is a macroblock header */
194                         set_Block_MacroBlock(nn, nn);
195                 } else {
196                         /* get the macro block header */
197                         ir_node *nmbh = get_new_node(mbh);
198                         assert(nmbh != NULL);
199                         set_Block_MacroBlock(nn, nmbh);
200                 }
201
202                 /* Don't copy Bad nodes. */
203                 j = 0;
204                 irn_arity = get_irn_arity(n);
205                 for (i = 0; i < irn_arity; i++) {
206                         if (! is_Bad(get_irn_n(n, i))) {
207                                 ir_node *pred = get_irn_n(n, i);
208                                 set_irn_n(nn, j, get_new_node(pred));
209                                 j++;
210                         }
211                 }
212                 /* repair the block visited flag from above misuse. Repair it in both
213                    graphs so that the old one can still be used. */
214                 set_Block_block_visited(nn, 0);
215                 set_Block_block_visited(n, 0);
216                 /* Local optimization could not merge two subsequent blocks if
217                    in array contained Bads.  Now it's possible.
218                    We don't call optimize_in_place as it requires
219                    that the fields in ir_graph are set properly. */
220                 if (!has_Block_entity(nn) &&
221                     get_opt_control_flow_straightening() &&
222                     get_Block_n_cfgpreds(nn) == 1 &&
223                     is_Jmp(get_Block_cfgpred(nn, 0))) {
224                         ir_node *old = get_nodes_block(get_Block_cfgpred(nn, 0));
225                         if (nn == old) {
226                                 /* Jmp jumps into the block it is in -- deal self cycle. */
227                                 assert(is_Bad(get_new_node(get_irg_bad(current_ir_graph))));
228                                 exchange(nn, get_new_node(get_irg_bad(current_ir_graph)));
229                         } else {
230                                 exchange(nn, old);
231                         }
232                 }
233         } else if (is_Phi(n) && get_irn_arity(n) > 0) {
234                 /* Don't copy node if corresponding predecessor in block is Bad.
235                    The Block itself should not be Bad. */
236                 block = get_nodes_block(n);
237                 set_nodes_block(nn, get_new_node(block));
238                 j = 0;
239                 irn_arity = get_irn_arity(n);
240                 for (i = 0; i < irn_arity; i++) {
241                         if (! is_Bad(get_irn_n(block, i))) {
242                                 ir_node *pred = get_irn_n(n, i);
243                                 set_irn_n(nn, j, get_new_node(pred));
244                                 /*if (is_backedge(n, i)) set_backedge(nn, j);*/
245                                 j++;
246                         }
247                 }
248                 /* If the pre walker reached this Phi after the post walker visited the
249                    block block_visited is > 0. */
250                 set_Block_block_visited(get_nodes_block(n), 0);
251                 /* Compacting the Phi's ins might generate Phis with only one
252                    predecessor. */
253                 if (get_irn_arity(nn) == 1)
254                         exchange(nn, get_irn_n(nn, 0));
255         } else {
256                 irn_arity = get_irn_arity(n);
257                 for (i = -1; i < irn_arity; i++)
258                         set_irn_n(nn, i, get_new_node(get_irn_n(n, i)));
259         }
260         /* Now the new node is complete.  We can add it to the hash table for CSE.
261            @@@ inlining aborts if we identify End. Why? */
262         if (!is_End(nn))
263                 add_identities(current_ir_graph->value_table, nn);
264 }
265
266 /**
267  * Copies the graph recursively, compacts the keep-alives of the end node.
268  *
269  * @param irg           the graph to be copied
270  * @param copy_node_nr  If non-zero, the node number will be copied
271  */
272 static void copy_graph(ir_graph *irg, int copy_node_nr) {
273         ir_node *oe, *ne, *ob, *nb, *om, *nm; /* old end, new end, old bad, new bad, old NoMem, new NoMem */
274         ir_node *ka;      /* keep alive */
275         int i, irn_arity;
276         unsigned long vfl;
277
278         /* Some nodes must be copied by hand, sigh */
279         vfl = get_irg_visited(irg);
280         set_irg_visited(irg, vfl + 1);
281
282         oe = get_irg_end(irg);
283         mark_irn_visited(oe);
284         /* copy the end node by hand, allocate dynamic in array! */
285         ne = new_ir_node(get_irn_dbg_info(oe),
286                 irg,
287                 NULL,
288                 op_End,
289                 mode_X,
290                 -1,
291                 NULL);
292         /* Copy the attributes.  Well, there might be some in the future... */
293         copy_node_attr(oe, ne);
294         set_new_node(oe, ne);
295
296         /* copy the Bad node */
297         ob = get_irg_bad(irg);
298         mark_irn_visited(ob);
299         nb = new_ir_node(get_irn_dbg_info(ob),
300                 irg,
301                 NULL,
302                 op_Bad,
303                 mode_T,
304                 0,
305                 NULL);
306         copy_node_attr(ob, nb);
307         set_new_node(ob, nb);
308
309         /* copy the NoMem node */
310         om = get_irg_no_mem(irg);
311         mark_irn_visited(om);
312         nm = new_ir_node(get_irn_dbg_info(om),
313                 irg,
314                 NULL,
315                 op_NoMem,
316                 mode_M,
317                 0,
318                 NULL);
319         copy_node_attr(om, nm);
320         set_new_node(om, nm);
321
322         /* copy the live nodes */
323         set_irg_visited(irg, vfl);
324         irg_walk(get_nodes_block(oe), copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
325
326         /* Note: from yet, the visited flag of the graph is equal to vfl + 1 */
327
328         /* visit the anchors as well */
329         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
330                 ir_node *n = get_irg_anchor(irg, i);
331
332                 if (n && (get_irn_visited(n) <= vfl)) {
333                         set_irg_visited(irg, vfl);
334                         irg_walk(n, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
335                 }
336         }
337
338         /* copy_preds for the end node ... */
339         set_nodes_block(ne, get_new_node(get_nodes_block(oe)));
340
341         /*- ... and now the keep alives. -*/
342         /* First pick the not marked block nodes and walk them.  We must pick these
343            first as else we will oversee blocks reachable from Phis. */
344         irn_arity = get_End_n_keepalives(oe);
345         for (i = 0; i < irn_arity; i++) {
346                 ka = get_End_keepalive(oe, i);
347                 if (is_Block(ka)) {
348                         if (get_irn_visited(ka) <= vfl) {
349                                 /* We must keep the block alive and copy everything reachable */
350                                 set_irg_visited(irg, vfl);
351                                 irg_walk(ka, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
352                         }
353                         add_End_keepalive(ne, get_new_node(ka));
354                 }
355         }
356
357         /* Now pick other nodes.  Here we will keep all! */
358         irn_arity = get_End_n_keepalives(oe);
359         for (i = 0; i < irn_arity; i++) {
360                 ka = get_End_keepalive(oe, i);
361                 if (!is_Block(ka)) {
362                         if (get_irn_visited(ka) <= vfl) {
363                                 /* We didn't copy the node yet.  */
364                                 set_irg_visited(irg, vfl);
365                                 irg_walk(ka, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
366                         }
367                         add_End_keepalive(ne, get_new_node(ka));
368                 }
369         }
370
371         /* start block sometimes only reached after keep alives */
372         set_nodes_block(nb, get_new_node(get_nodes_block(ob)));
373         set_nodes_block(nm, get_new_node(get_nodes_block(om)));
374 }
375
376 /**
377  * Copies the graph reachable from current_ir_graph->end to the obstack
378  * in current_ir_graph and fixes the environment.
379  * Then fixes the fields in current_ir_graph containing nodes of the
380  * graph.
381  *
382  * @param copy_node_nr  If non-zero, the node number will be copied
383  */
384 static void
385 copy_graph_env(int copy_node_nr) {
386         ir_graph *irg = current_ir_graph;
387         ir_node *old_end, *new_anchor;
388         int i;
389
390         /* remove end_except and end_reg nodes */
391         old_end = get_irg_end(irg);
392         set_irg_end_except (irg, old_end);
393         set_irg_end_reg    (irg, old_end);
394
395         /* Not all nodes remembered in irg might be reachable
396            from the end node.  Assure their link is set to NULL, so that
397            we can test whether new nodes have been computed. */
398         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
399                 ir_node *n = get_irg_anchor(irg, i);
400                 if (n != NULL)
401                         set_new_node(n, NULL);
402         }
403         /* we use the block walk flag for removing Bads from Blocks ins. */
404         inc_irg_block_visited(irg);
405
406         /* copy the graph */
407         copy_graph(irg, copy_node_nr);
408
409         /* fix the anchor */
410         old_end    = get_irg_end(irg);
411         new_anchor = new_Anchor(irg);
412
413         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
414                 ir_node *n = get_irg_anchor(irg, i);
415                 if (n)
416                         set_irn_n(new_anchor, i, get_new_node(n));
417         }
418         free_End(old_end);
419         irg->anchor = new_anchor;
420
421         /* ensure the new anchor is placed in the endblock */
422         set_nodes_block(new_anchor, get_irg_end_block(irg));
423 }
424
425 /**
426  * Copies all reachable nodes to a new obstack.  Removes bad inputs
427  * from block nodes and the corresponding inputs from Phi nodes.
428  * Merges single exit blocks with single entry blocks and removes
429  * 1-input Phis.
430  * Adds all new nodes to a new hash table for CSE.  Does not
431  * perform CSE, so the hash table might contain common subexpressions.
432  */
433 void dead_node_elimination(ir_graph *irg) {
434         ir_graph *rem;
435 #ifdef INTERPROCEDURAL_VIEW
436         int rem_ipview = get_interprocedural_view();
437 #endif
438         struct obstack *graveyard_obst = NULL;
439         struct obstack *rebirth_obst   = NULL;
440
441         edges_deactivate(irg);
442
443         /* inform statistics that we started a dead-node elimination run */
444         hook_dead_node_elim(irg, 1);
445
446         /* Remember external state of current_ir_graph. */
447         rem = current_ir_graph;
448         current_ir_graph = irg;
449 #ifdef INTERPROCEDURAL_VIEW
450         set_interprocedural_view(0);
451 #endif
452
453         assert(get_irg_phase_state(irg) != phase_building);
454
455         /* Handle graph state */
456         free_callee_info(irg);
457         free_irg_outs(irg);
458         free_trouts();
459
460         /* @@@ so far we loose loops when copying */
461         free_loop_information(irg);
462
463         set_irg_doms_inconsistent(irg);
464
465         /* A quiet place, where the old obstack can rest in peace,
466            until it will be cremated. */
467         graveyard_obst = irg->obst;
468
469         /* A new obstack, where the reachable nodes will be copied to. */
470         rebirth_obst = XMALLOC(struct obstack);
471         irg->obst = rebirth_obst;
472         obstack_init(irg->obst);
473         irg->last_node_idx = 0;
474
475         /* We also need a new value table for CSE */
476         del_identities(irg->value_table);
477         irg->value_table = new_identities();
478
479         /* Copy the graph from the old to the new obstack */
480         copy_graph_env(/*copy_node_nr=*/1);
481
482         /* Free memory from old unoptimized obstack */
483         obstack_free(graveyard_obst, 0);  /* First empty the obstack ... */
484         xfree(graveyard_obst);            /* ... then free it.           */
485
486         /* inform statistics that the run is over */
487         hook_dead_node_elim(irg, 0);
488
489         current_ir_graph = rem;
490 #ifdef INTERPROCEDURAL_VIEW
491         set_interprocedural_view(rem_ipview);
492 #endif
493 }
494
495 ir_graph_pass_t *dead_node_elimination_pass(const char *name) {
496         return def_graph_pass(name ? name : "dce", dead_node_elimination);
497 }
498
499 /**
500  * Relink bad predecessors of a block and store the old in array to the
501  * link field. This function is called by relink_bad_predecessors().
502  * The array of link field starts with the block operand at position 0.
503  * If block has bad predecessors, create a new in array without bad preds.
504  * Otherwise let in array untouched.
505  */
506 static void relink_bad_block_predecessors(ir_node *n, void *env) {
507         ir_node **new_in, *irn;
508         int i, new_irn_n, old_irn_arity, new_irn_arity = 0;
509         (void) env;
510
511         /* if link field of block is NULL, look for bad predecessors otherwise
512            this is already done */
513         if (is_Block(n) && get_irn_link(n) == NULL) {
514                 /* save old predecessors in link field (position 0 is the block operand)*/
515                 set_irn_link(n, get_irn_in(n));
516
517                 /* count predecessors without bad nodes */
518                 old_irn_arity = get_irn_arity(n);
519                 for (i = 0; i < old_irn_arity; i++)
520                         if (!is_Bad(get_irn_n(n, i)))
521                                 ++new_irn_arity;
522
523                 /* arity changing: set new predecessors without bad nodes */
524                 if (new_irn_arity < old_irn_arity) {
525                         /* Get new predecessor array. We do not resize the array, as we must
526                            keep the old one to update Phis. */
527                         new_in = NEW_ARR_D(ir_node *, current_ir_graph->obst, (new_irn_arity+1));
528
529                         /* set new predecessors in array */
530                         new_in[0] = NULL;
531                         new_irn_n = 1;
532                         for (i = 0; i < old_irn_arity; i++) {
533                                 irn = get_irn_n(n, i);
534                                 if (!is_Bad(irn)) {
535                                         new_in[new_irn_n] = irn;
536                                         is_backedge(n, i) ? set_backedge(n, new_irn_n-1) : set_not_backedge(n, new_irn_n-1);
537                                         ++new_irn_n;
538                                 }
539                         }
540                         /* ARR_SETLEN(int, n->attr.block.backedge, new_irn_arity); */
541                         ARR_SHRINKLEN(n->attr.block.backedge, new_irn_arity);
542                         n->in = new_in;
543                 } /* ir node has bad predecessors */
544         } /* Block is not relinked */
545 }
546
547 /**
548  * Relinks Bad predecessors from Blocks and Phis called by walker
549  * remove_bad_predecesors(). If n is a Block, call
550  * relink_bad_block_redecessors(). If n is a Phi-node, call also the relinking
551  * function of Phi's Block. If this block has bad predecessors, relink preds
552  * of the Phi-node.
553  */
554 static void relink_bad_predecessors(ir_node *n, void *env) {
555         ir_node *block, **old_in;
556         int i, old_irn_arity, new_irn_arity;
557
558         /* relink bad predecessors of a block */
559         if (is_Block(n))
560                 relink_bad_block_predecessors(n, env);
561
562         /* If Phi node relink its block and its predecessors */
563         if (is_Phi(n)) {
564                 /* Relink predecessors of phi's block */
565                 block = get_nodes_block(n);
566                 if (get_irn_link(block) == NULL)
567                         relink_bad_block_predecessors(block, env);
568
569                 old_in = (ir_node **)get_irn_link(block); /* Of Phi's Block */
570                 old_irn_arity = ARR_LEN(old_in);
571
572                 /* Relink Phi predecessors if count of predecessors changed */
573                 if (old_irn_arity != ARR_LEN(get_irn_in(block))) {
574                         /* set new predecessors in array
575                            n->in[0] remains the same block */
576                         new_irn_arity = 1;
577                         for(i = 1; i < old_irn_arity; i++)
578                                 if (!is_Bad(old_in[i])) {
579                                         n->in[new_irn_arity] = n->in[i];
580                                         is_backedge(n, i) ? set_backedge(n, new_irn_arity) : set_not_backedge(n, new_irn_arity);
581                                         ++new_irn_arity;
582                                 }
583
584                                 ARR_SETLEN(ir_node *, n->in, new_irn_arity);
585                                 ARR_SETLEN(int, n->attr.phi.u.backedge, new_irn_arity);
586                 }
587         } /* n is a Phi node */
588 }
589
590 /*
591  * Removes Bad Bad predecessors from Blocks and the corresponding
592  * inputs to Phi nodes as in dead_node_elimination but without
593  * copying the graph.
594  * On walking up set the link field to NULL, on walking down call
595  * relink_bad_predecessors() (This function stores the old in array
596  * to the link field and sets a new in array if arity of predecessors
597  * changes).
598  */
599 void remove_bad_predecessors(ir_graph *irg) {
600         panic("Fix backedge handling first");
601         irg_walk_graph(irg, firm_clear_link, relink_bad_predecessors, NULL);
602 }
603
604
605 /*
606    __                      _  __ __
607   (_     __    o     _    | \/  |_
608   __)|_| | \_/ | \_/(/_   |_/\__|__
609
610   The following stuff implements a facility that automatically patches
611   registered ir_node pointers to the new node when a dead node elimination occurs.
612 */
613
614 struct _survive_dce_t {
615         struct obstack obst;
616         pmap *places;
617         pmap *new_places;
618         hook_entry_t dead_node_elim;
619         hook_entry_t dead_node_elim_subst;
620 };
621
622 typedef struct _survive_dce_list_t {
623         struct _survive_dce_list_t *next;
624         ir_node **place;
625 } survive_dce_list_t;
626
627 static void dead_node_hook(void *context, ir_graph *irg, int start) {
628         survive_dce_t *sd = context;
629         (void) irg;
630
631         /* Create a new map before the dead node elimination is performed. */
632         if (start) {
633                 sd->new_places = pmap_create_ex(pmap_count(sd->places));
634         } else {
635                 /* Patch back all nodes if dead node elimination is over and something is to be done. */
636                 pmap_destroy(sd->places);
637                 sd->places     = sd->new_places;
638                 sd->new_places = NULL;
639         }
640 }
641
642 /**
643  * Hook called when dead node elimination replaces old by nw.
644  */
645 static void dead_node_subst_hook(void *context, ir_graph *irg, ir_node *old, ir_node *nw) {
646         survive_dce_t *sd = context;
647         survive_dce_list_t *list = pmap_get(sd->places, old);
648         (void) irg;
649
650         /* If the node is to be patched back, write the new address to all registered locations. */
651         if (list) {
652                 survive_dce_list_t *p;
653
654                 for (p = list; p; p = p->next)
655                         *(p->place) = nw;
656
657                 pmap_insert(sd->new_places, nw, list);
658         }
659 }
660
661 /**
662  * Make a new Survive DCE environment.
663  */
664 survive_dce_t *new_survive_dce(void) {
665         survive_dce_t *res = XMALLOC(survive_dce_t);
666         obstack_init(&res->obst);
667         res->places     = pmap_create();
668         res->new_places = NULL;
669
670         res->dead_node_elim.hook._hook_dead_node_elim = dead_node_hook;
671         res->dead_node_elim.context                   = res;
672         res->dead_node_elim.next                      = NULL;
673
674         res->dead_node_elim_subst.hook._hook_dead_node_elim_subst = dead_node_subst_hook;
675         res->dead_node_elim_subst.context = res;
676         res->dead_node_elim_subst.next    = NULL;
677
678         register_hook(hook_dead_node_elim, &res->dead_node_elim);
679         register_hook(hook_dead_node_elim_subst, &res->dead_node_elim_subst);
680         return res;
681 }
682
683 /**
684  * Free a Survive DCE environment.
685  */
686 void free_survive_dce(survive_dce_t *sd) {
687         obstack_free(&sd->obst, NULL);
688         pmap_destroy(sd->places);
689         unregister_hook(hook_dead_node_elim, &sd->dead_node_elim);
690         unregister_hook(hook_dead_node_elim_subst, &sd->dead_node_elim_subst);
691         xfree(sd);
692 }
693
694 /**
695  * Register a node pointer to be patched upon DCE.
696  * When DCE occurs, the node pointer specified by @p place will be
697  * patched to the new address of the node it is pointing to.
698  *
699  * @param sd    The Survive DCE environment.
700  * @param place The address of the node pointer.
701  */
702 void survive_dce_register_irn(survive_dce_t *sd, ir_node **place) {
703         if (*place != NULL) {
704                 ir_node *irn      = *place;
705                 survive_dce_list_t *curr = pmap_get(sd->places, irn);
706                 survive_dce_list_t *nw   = OALLOC(&sd->obst, survive_dce_list_t);
707
708                 nw->next  = curr;
709                 nw->place = place;
710
711                 pmap_insert(sd->places, irn, nw);
712         }
713 }
714
715 /*--------------------------------------------------------------------*/
716 /*  Functionality for inlining                                         */
717 /*--------------------------------------------------------------------*/
718
719 /**
720  * Copy node for inlineing.  Updates attributes that change when
721  * inlineing but not for dead node elimination.
722  *
723  * Copies the node by calling copy_node() and then updates the entity if
724  * it's a local one.  env must be a pointer of the frame type of the
725  * inlined procedure. The new entities must be in the link field of
726  * the entities.
727  */
728 static void copy_node_inline(ir_node *n, void *env) {
729         ir_node *nn;
730         ir_type *frame_tp = (ir_type *)env;
731
732         copy_node(n, NULL);
733         if (is_Sel(n)) {
734                 nn = get_new_node(n);
735                 assert(is_Sel(nn));
736                 /* use copied entities from the new frame */
737                 if (get_entity_owner(get_Sel_entity(n)) == frame_tp) {
738                         set_Sel_entity(nn, get_entity_link(get_Sel_entity(n)));
739                 }
740         } else if (is_Block(n)) {
741                 nn = get_new_node(n);
742                 nn->attr.block.irg.irg = current_ir_graph;
743         }
744 }
745
746 /**
747  * Copies new predecessors of old node and move constants to
748  * the Start Block.
749  */
750 static void copy_preds_inline(ir_node *n, void *env) {
751         ir_node *nn;
752
753         copy_preds(n, env);
754         nn = skip_Id(get_new_node(n));
755         if (is_irn_constlike(nn)) {
756                 /* move Constants into the start block */
757                 set_nodes_block(nn, get_irg_start_block(current_ir_graph));
758
759                 n = identify_remember(current_ir_graph->value_table, nn);
760                 if (nn != n) {
761                         DBG_OPT_CSE(nn, n);
762                         exchange(nn, n);
763                 }
764         }
765 }
766
767 /**
768  * Walker: checks if P_value_arg_base is used.
769  */
770 static void find_addr(ir_node *node, void *env) {
771         int *allow_inline = env;
772         if (is_Sel(node)) {
773                 ir_graph *irg = current_ir_graph;
774                 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
775                         /* access to frame */
776                         ir_entity *ent = get_Sel_entity(node);
777                         if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
778                                 /* access to value_type */
779                                 *allow_inline = 0;
780                         }
781                 }
782         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
783                 /* From GCC:
784                  * Refuse to inline alloca call unless user explicitly forced so as this
785                  * may change program's memory overhead drastically when the function
786                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
787                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
788                  *
789                  * Sorrily this is true with our implementation also.
790                  * Moreover, we cannot differentiate between alloca() and VLA yet, so this
791                  * disables inlining of functions using VLA (with are completely save).
792                  *
793                  * 2 Solutions:
794                  * - add a flag to the Alloc node for "real" alloca() calls
795                  * - add a new Stack-Restore node at the end of a function using alloca()
796                  */
797                 *allow_inline = 0;
798         }
799 }
800
801 /**
802  * Check if we can inline a given call.
803  * Currently, we cannot inline two cases:
804  * - call with compound arguments
805  * - graphs that take the address of a parameter
806  *
807  * check these conditions here
808  */
809 static int can_inline(ir_node *call, ir_graph *called_graph) {
810         ir_type *call_type = get_Call_type(call);
811         int params, ress, i, res;
812         assert(is_Method_type(call_type));
813
814         params = get_method_n_params(call_type);
815         ress   = get_method_n_ress(call_type);
816
817         /* check parameters for compound arguments */
818         for (i = 0; i < params; ++i) {
819                 ir_type *p_type = get_method_param_type(call_type, i);
820
821                 if (is_compound_type(p_type))
822                         return 0;
823         }
824
825         /* check results for compound arguments */
826         for (i = 0; i < ress; ++i) {
827                 ir_type *r_type = get_method_res_type(call_type, i);
828
829                 if (is_compound_type(r_type))
830                         return 0;
831         }
832
833         res = 1;
834         irg_walk_graph(called_graph, find_addr, NULL, &res);
835
836         return res;
837 }
838
839 enum exc_mode {
840         exc_handler,    /**< There is a handler. */
841         exc_no_handler  /**< Exception handling not represented. */
842 };
843
844 /* Inlines a method at the given call site. */
845 int inline_method(ir_node *call, ir_graph *called_graph) {
846         ir_node             *pre_call;
847         ir_node             *post_call, *post_bl;
848         ir_node             *in[pn_Start_max];
849         ir_node             *end, *end_bl, *block;
850         ir_node             **res_pred;
851         ir_node             **cf_pred;
852         ir_node             **args_in;
853         ir_node             *ret, *phi;
854         int                 arity, n_ret, n_exc, n_res, i, n, j, rem_opt, irn_arity, n_params;
855         int                 n_mem_phi;
856         enum exc_mode       exc_handling;
857         ir_type             *called_frame, *curr_frame, *mtp, *ctp;
858         ir_entity           *ent;
859         ir_graph            *rem, *irg;
860         irg_inline_property prop = get_irg_inline_property(called_graph);
861         unsigned long       visited;
862
863         if (prop == irg_inline_forbidden)
864                 return 0;
865
866         ent = get_irg_entity(called_graph);
867
868         mtp = get_entity_type(ent);
869         ctp = get_Call_type(call);
870         n_params = get_method_n_params(mtp);
871         n_res    = get_method_n_ress(mtp);
872         if (n_params > get_method_n_params(ctp)) {
873                 /* this is a bad feature of C: without a prototype, we can
874                  * call a function with less parameters than needed. Currently
875                  * we don't support this, although we could use Unknown than. */
876                 return 0;
877         }
878         if (n_res != get_method_n_ress(ctp)) {
879                 return 0;
880         }
881
882         /* Argh, compiling C has some bad consequences:
883          * It is implementation dependent what happens in that case.
884          * We support inlining, if the bitsize of the types matches AND
885          * the same arithmetic is used. */
886         for (i = n_params - 1; i >= 0; --i) {
887                 ir_type *param_tp = get_method_param_type(mtp, i);
888                 ir_type *arg_tp   = get_method_param_type(ctp, i);
889
890                 if (param_tp != arg_tp) {
891                         ir_mode *pmode = get_type_mode(param_tp);
892                         ir_mode *amode = get_type_mode(arg_tp);
893
894                         if (pmode == NULL || amode == NULL)
895                                 return 0;
896                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
897                                 return 0;
898                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
899                                 return 0;
900                         /* otherwise we can simply "reinterpret" the bits */
901                 }
902         }
903         for (i = n_res - 1; i >= 0; --i) {
904                 ir_type *decl_res_tp = get_method_res_type(mtp, i);
905                 ir_type *used_res_tp = get_method_res_type(ctp, i);
906
907                 if (decl_res_tp != used_res_tp) {
908                         ir_mode *decl_mode = get_type_mode(decl_res_tp);
909                         ir_mode *used_mode = get_type_mode(used_res_tp);
910                         if (decl_mode == NULL || used_mode == NULL)
911                                 return 0;
912                         if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
913                                 return 0;
914                         if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
915                                 return 0;
916                         /* otherwise we can "reinterpret" the bits */
917                 }
918         }
919
920         irg = get_irn_irg(call);
921
922         /*
923          * We cannot inline a recursive call. The graph must be copied before
924          * the call the inline_method() using create_irg_copy().
925          */
926         if (called_graph == irg)
927                 return 0;
928
929         /*
930          * currently, we cannot inline two cases:
931          * - call with compound arguments
932          * - graphs that take the address of a parameter
933          */
934         if (! can_inline(call, called_graph))
935                 return 0;
936
937         rem = current_ir_graph;
938         current_ir_graph = irg;
939
940         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
941
942         /* --  Turn off optimizations, this can cause problems when allocating new nodes. -- */
943         rem_opt = get_opt_optimize();
944         set_optimize(0);
945
946         /* Handle graph state */
947         assert(get_irg_phase_state(irg) != phase_building);
948         assert(get_irg_pinned(irg) == op_pin_state_pinned);
949         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
950         set_irg_outs_inconsistent(irg);
951         set_irg_extblk_inconsistent(irg);
952         set_irg_doms_inconsistent(irg);
953         set_irg_loopinfo_inconsistent(irg);
954         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
955         set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
956
957         /* -- Check preconditions -- */
958         assert(is_Call(call));
959
960         /* here we know we WILL inline, so inform the statistics */
961         hook_inline(call, called_graph);
962
963         /* -- Decide how to handle exception control flow: Is there a handler
964            for the Call node, or do we branch directly to End on an exception?
965            exc_handling:
966            0 There is a handler.
967            2 Exception handling not represented in Firm. -- */
968         {
969                 ir_node *Xproj = NULL;
970                 ir_node *proj;
971                 for (proj = get_irn_link(call); proj; proj = get_irn_link(proj)) {
972                         long proj_nr = get_Proj_proj(proj);
973                         if (proj_nr == pn_Call_X_except) Xproj = proj;
974                 }
975                 exc_handling = Xproj != NULL ? exc_handler : exc_no_handler;
976         }
977
978         /* create the argument tuple */
979         NEW_ARR_A(ir_type *, args_in, n_params);
980
981         block = get_nodes_block(call);
982         for (i = n_params - 1; i >= 0; --i) {
983                 ir_node *arg      = get_Call_param(call, i);
984                 ir_type *param_tp = get_method_param_type(mtp, i);
985                 ir_mode *mode     = get_type_mode(param_tp);
986
987                 if (mode != get_irn_mode(arg)) {
988                         arg = new_r_Conv(block, arg, mode);
989                 }
990                 args_in[i] = arg;
991         }
992
993         /* --
994            the procedure and later replaces the Start node of the called graph.
995            Post_call is the old Call node and collects the results of the called
996            graph. Both will end up being a tuple.  -- */
997         post_bl = get_nodes_block(call);
998         set_irg_current_block(irg, post_bl);
999         /* XxMxPxPxPxT of Start + parameter of Call */
1000         in[pn_Start_X_initial_exec]   = new_Jmp();
1001         in[pn_Start_M]                = get_Call_mem(call);
1002         in[pn_Start_P_frame_base]     = get_irg_frame(irg);
1003         in[pn_Start_P_tls]            = get_irg_tls(irg);
1004         in[pn_Start_T_args]           = new_Tuple(n_params, args_in);
1005         pre_call = new_Tuple(pn_Start_max, in);
1006         post_call = call;
1007
1008         /* --
1009            The new block gets the ins of the old block, pre_call and all its
1010            predecessors and all Phi nodes. -- */
1011         part_block(pre_call);
1012
1013         /* -- Prepare state for dead node elimination -- */
1014         /* Visited flags in calling irg must be >= flag in called irg.
1015            Else walker and arity computation will not work. */
1016         if (get_irg_visited(irg) <= get_irg_visited(called_graph))
1017                 set_irg_visited(irg, get_irg_visited(called_graph) + 1);
1018         if (get_irg_block_visited(irg) < get_irg_block_visited(called_graph))
1019                 set_irg_block_visited(irg, get_irg_block_visited(called_graph));
1020         visited = get_irg_visited(irg);
1021
1022         /* Set pre_call as new Start node in link field of the start node of
1023            calling graph and pre_calls block as new block for the start block
1024            of calling graph.
1025            Further mark these nodes so that they are not visited by the
1026            copying. */
1027         set_irn_link(get_irg_start(called_graph), pre_call);
1028         set_irn_visited(get_irg_start(called_graph), visited);
1029         set_irn_link(get_irg_start_block(called_graph), get_nodes_block(pre_call));
1030         set_irn_visited(get_irg_start_block(called_graph), visited);
1031
1032         set_irn_link(get_irg_bad(called_graph), get_irg_bad(current_ir_graph));
1033         set_irn_visited(get_irg_bad(called_graph), visited);
1034
1035         set_irn_link(get_irg_no_mem(called_graph), get_irg_no_mem(current_ir_graph));
1036         set_irn_visited(get_irg_no_mem(called_graph), visited);
1037
1038         /* Initialize for compaction of in arrays */
1039         inc_irg_block_visited(irg);
1040
1041         /* -- Replicate local entities of the called_graph -- */
1042         /* copy the entities. */
1043         irp_reserve_resources(irp, IR_RESOURCE_ENTITY_LINK);
1044         called_frame = get_irg_frame_type(called_graph);
1045         curr_frame   = get_irg_frame_type(irg);
1046         for (i = 0, n = get_class_n_members(called_frame); i < n; ++i) {
1047                 ir_entity *new_ent, *old_ent;
1048                 old_ent = get_class_member(called_frame, i);
1049                 new_ent = copy_entity_own(old_ent, curr_frame);
1050                 set_entity_link(old_ent, new_ent);
1051         }
1052
1053         /* visited is > than that of called graph.  With this trick visited will
1054            remain unchanged so that an outer walker, e.g., searching the call nodes
1055             to inline, calling this inline will not visit the inlined nodes. */
1056         set_irg_visited(irg, get_irg_visited(irg)-1);
1057
1058         /* -- Performing dead node elimination inlines the graph -- */
1059         /* Copies the nodes to the obstack of current_ir_graph. Updates links to new
1060            entities. */
1061         irg_walk(get_irg_end(called_graph), copy_node_inline, copy_preds_inline,
1062                  get_irg_frame_type(called_graph));
1063
1064         irp_free_resources(irp, IR_RESOURCE_ENTITY_LINK);
1065
1066         /* Repair called_graph */
1067         set_irg_visited(called_graph, get_irg_visited(irg));
1068         set_irg_block_visited(called_graph, get_irg_block_visited(irg));
1069         set_Block_block_visited(get_irg_start_block(called_graph), 0);
1070
1071         /* -- Merge the end of the inlined procedure with the call site -- */
1072         /* We will turn the old Call node into a Tuple with the following
1073            predecessors:
1074            -1:  Block of Tuple.
1075            0: Phi of all Memories of Return statements.
1076            1: Jmp from new Block that merges the control flow from all exception
1077            predecessors of the old end block.
1078            2: Tuple of all arguments.
1079            3: Phi of Exception memories.
1080            In case the old Call directly branches to End on an exception we don't
1081            need the block merging all exceptions nor the Phi of the exception
1082            memories.
1083         */
1084
1085         /* -- Precompute some values -- */
1086         end_bl = get_new_node(get_irg_end_block(called_graph));
1087         end = get_new_node(get_irg_end(called_graph));
1088         arity = get_Block_n_cfgpreds(end_bl);    /* arity = n_exc + n_ret  */
1089         n_res = get_method_n_ress(get_Call_type(call));
1090
1091         res_pred = XMALLOCN(ir_node*, n_res);
1092         cf_pred  = XMALLOCN(ir_node*, arity);
1093
1094         set_irg_current_block(irg, post_bl); /* just to make sure */
1095
1096         /* -- archive keepalives -- */
1097         irn_arity = get_irn_arity(end);
1098         for (i = 0; i < irn_arity; i++) {
1099                 ir_node *ka = get_End_keepalive(end, i);
1100                 if (! is_Bad(ka))
1101                         add_End_keepalive(get_irg_end(irg), ka);
1102         }
1103
1104         /* The new end node will die.  We need not free as the in array is on the obstack:
1105            copy_node() only generated 'D' arrays. */
1106
1107         /* -- Replace Return nodes by Jump nodes. -- */
1108         n_ret = 0;
1109         for (i = 0; i < arity; i++) {
1110                 ir_node *ret;
1111                 ret = get_Block_cfgpred(end_bl, i);
1112                 if (is_Return(ret)) {
1113                         cf_pred[n_ret] = new_r_Jmp(get_nodes_block(ret));
1114                         n_ret++;
1115                 }
1116         }
1117         set_irn_in(post_bl, n_ret, cf_pred);
1118
1119         /* -- Build a Tuple for all results of the method.
1120            Add Phi node if there was more than one Return.  -- */
1121         turn_into_tuple(post_call, pn_Call_max);
1122         /* First the Memory-Phi */
1123         n_mem_phi = 0;
1124         for (i = 0; i < arity; i++) {
1125                 ret = get_Block_cfgpred(end_bl, i);
1126                 if (is_Return(ret)) {
1127                         cf_pred[n_mem_phi++] = get_Return_mem(ret);
1128                 }
1129                 /* memory output for some exceptions is directly connected to End */
1130                 if (is_Call(ret)) {
1131                         cf_pred[n_mem_phi++] = new_r_Proj(get_nodes_block(ret), ret, mode_M, 3);
1132                 } else if (is_fragile_op(ret)) {
1133                         /* We rely that all cfops have the memory output at the same position. */
1134                         cf_pred[n_mem_phi++] = new_r_Proj(get_nodes_block(ret), ret, mode_M, 0);
1135                 } else if (is_Raise(ret)) {
1136                         cf_pred[n_mem_phi++] = new_r_Proj(get_nodes_block(ret), ret, mode_M, 1);
1137                 }
1138         }
1139         phi = new_Phi(n_mem_phi, cf_pred, mode_M);
1140         set_Tuple_pred(call, pn_Call_M, phi);
1141         /* Conserve Phi-list for further inlinings -- but might be optimized */
1142         if (get_nodes_block(phi) == post_bl) {
1143                 set_irn_link(phi, get_irn_link(post_bl));
1144                 set_irn_link(post_bl, phi);
1145         }
1146         /* Now the real results */
1147         if (n_res > 0) {
1148                 for (j = 0; j < n_res; j++) {
1149                         ir_type *res_type = get_method_res_type(ctp, j);
1150                         ir_mode *res_mode = get_type_mode(res_type);
1151                         n_ret = 0;
1152                         for (i = 0; i < arity; i++) {
1153                                 ret = get_Block_cfgpred(end_bl, i);
1154                                 if (is_Return(ret)) {
1155                                         ir_node *res = get_Return_res(ret, j);
1156                                         if (get_irn_mode(res) != res_mode) {
1157                                                 ir_node *block = get_nodes_block(res);
1158                                                 res = new_r_Conv(block, res, res_mode);
1159                                         }
1160                                         cf_pred[n_ret] = res;
1161                                         n_ret++;
1162                                 }
1163                         }
1164                         if (n_ret > 0)
1165                                 phi = new_Phi(n_ret, cf_pred, get_irn_mode(cf_pred[0]));
1166                         else
1167                                 phi = new_Bad();
1168                         res_pred[j] = phi;
1169                         /* Conserve Phi-list for further inlinings -- but might be optimized */
1170                         if (get_nodes_block(phi) == post_bl) {
1171                                 set_Phi_next(phi, get_Block_phis(post_bl));
1172                                 set_Block_phis(post_bl, phi);
1173                         }
1174                 }
1175                 set_Tuple_pred(call, pn_Call_T_result, new_Tuple(n_res, res_pred));
1176         } else {
1177                 set_Tuple_pred(call, pn_Call_T_result, new_Bad());
1178         }
1179         /* handle the regular call */
1180         set_Tuple_pred(call, pn_Call_X_regular, new_Jmp());
1181
1182         /* For now, we cannot inline calls with value_base */
1183         set_Tuple_pred(call, pn_Call_P_value_res_base, new_Bad());
1184
1185         /* Finally the exception control flow.
1186            We have two possible situations:
1187            First if the Call branches to an exception handler:
1188            We need to add a Phi node to
1189            collect the memory containing the exception objects.  Further we need
1190            to add another block to get a correct representation of this Phi.  To
1191            this block we add a Jmp that resolves into the X output of the Call
1192            when the Call is turned into a tuple.
1193            Second: There is no exception edge. Just add all inlined exception
1194            branches to the End node.
1195          */
1196         if (exc_handling == exc_handler) {
1197                 n_exc = 0;
1198                 for (i = 0; i < arity; i++) {
1199                         ir_node *ret, *irn;
1200                         ret = get_Block_cfgpred(end_bl, i);
1201                         irn = skip_Proj(ret);
1202                         if (is_fragile_op(irn) || is_Raise(irn)) {
1203                                 cf_pred[n_exc] = ret;
1204                                 ++n_exc;
1205                         }
1206                 }
1207                 if (n_exc > 0) {
1208                         ir_node *block = new_Block(n_exc, cf_pred);
1209                         set_cur_block(block);
1210                         set_Tuple_pred(call, pn_Call_X_except, new_Jmp());
1211                 } else {
1212                         set_Tuple_pred(call, pn_Call_X_except, new_Bad());
1213                 }
1214         } else {
1215                 ir_node *main_end_bl;
1216                 int main_end_bl_arity;
1217                 ir_node **end_preds;
1218
1219                 /* assert(exc_handling == 1 || no exceptions. ) */
1220                 n_exc = 0;
1221                 for (i = 0; i < arity; i++) {
1222                         ir_node *ret = get_Block_cfgpred(end_bl, i);
1223                         ir_node *irn = skip_Proj(ret);
1224
1225                         if (is_fragile_op(irn) || is_Raise(irn)) {
1226                                 cf_pred[n_exc] = ret;
1227                                 n_exc++;
1228                         }
1229                 }
1230                 main_end_bl       = get_irg_end_block(irg);
1231                 main_end_bl_arity = get_irn_arity(main_end_bl);
1232                 end_preds         = XMALLOCN(ir_node*, n_exc + main_end_bl_arity);
1233
1234                 for (i = 0; i < main_end_bl_arity; ++i)
1235                         end_preds[i] = get_irn_n(main_end_bl, i);
1236                 for (i = 0; i < n_exc; ++i)
1237                         end_preds[main_end_bl_arity + i] = cf_pred[i];
1238                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
1239                 set_Tuple_pred(call, pn_Call_X_except, new_Bad());
1240                 free(end_preds);
1241         }
1242         free(res_pred);
1243         free(cf_pred);
1244
1245         /* --  Turn CSE back on. -- */
1246         set_optimize(rem_opt);
1247         current_ir_graph = rem;
1248
1249         return 1;
1250 }
1251
1252 /********************************************************************/
1253 /* Apply inlining to small methods.                                 */
1254 /********************************************************************/
1255
1256 static struct obstack  temp_obst;
1257
1258 /** Represents a possible inlinable call in a graph. */
1259 typedef struct _call_entry {
1260         ir_node    *call;       /**< The Call node. */
1261         ir_graph   *callee;     /**< The callee IR-graph. */
1262         list_head  list;        /**< List head for linking the next one. */
1263         int        loop_depth;  /**< The loop depth of this call. */
1264         int        benefice;    /**< The calculated benefice of this call. */
1265         unsigned   local_adr:1; /**< Set if this call gets an address of a local variable. */
1266         unsigned   all_const:1; /**< Set if this call has only constant parameters. */
1267 } call_entry;
1268
1269 /**
1270  * environment for inlining small irgs
1271  */
1272 typedef struct _inline_env_t {
1273         struct obstack obst;  /**< An obstack where call_entries are allocated on. */
1274         list_head      calls; /**< The call entry list. */
1275 } inline_env_t;
1276
1277 /**
1278  * Returns the irg called from a Call node. If the irg is not
1279  * known, NULL is returned.
1280  *
1281  * @param call  the call node
1282  */
1283 static ir_graph *get_call_called_irg(ir_node *call) {
1284         ir_node *addr;
1285
1286         addr = get_Call_ptr(call);
1287         if (is_Global(addr)) {
1288                 ir_entity *ent = get_Global_entity(addr);
1289                 return get_entity_irg(ent);
1290         }
1291
1292         return NULL;
1293 }
1294
1295 /**
1296  * Walker: Collect all calls to known graphs inside a graph.
1297  */
1298 static void collect_calls(ir_node *call, void *env) {
1299         (void) env;
1300         if (is_Call(call)) {
1301                 ir_graph *called_irg = get_call_called_irg(call);
1302
1303                 if (called_irg != NULL) {
1304                         /* The Call node calls a locally defined method.  Remember to inline. */
1305                         inline_env_t *ienv  = env;
1306                         call_entry   *entry = OALLOC(&ienv->obst, call_entry);
1307                         entry->call       = call;
1308                         entry->callee     = called_irg;
1309                         entry->loop_depth = 0;
1310                         entry->benefice   = 0;
1311                         entry->local_adr  = 0;
1312                         entry->all_const  = 0;
1313
1314                         list_add_tail(&entry->list, &ienv->calls);
1315                 }
1316         }
1317 }
1318
1319 /**
1320  * Inlines all small methods at call sites where the called address comes
1321  * from a Const node that references the entity representing the called
1322  * method.
1323  * The size argument is a rough measure for the code size of the method:
1324  * Methods where the obstack containing the firm graph is smaller than
1325  * size are inlined.
1326  */
1327 void inline_small_irgs(ir_graph *irg, int size) {
1328         ir_graph *rem = current_ir_graph;
1329         inline_env_t env;
1330         call_entry *entry;
1331
1332         current_ir_graph = irg;
1333         /* Handle graph state */
1334         assert(get_irg_phase_state(irg) != phase_building);
1335         free_callee_info(irg);
1336
1337         /* Find Call nodes to inline.
1338            (We can not inline during a walk of the graph, as inlining the same
1339            method several times changes the visited flag of the walked graph:
1340            after the first inlining visited of the callee equals visited of
1341            the caller.  With the next inlining both are increased.) */
1342         obstack_init(&env.obst);
1343         INIT_LIST_HEAD(&env.calls);
1344         irg_walk_graph(irg, NULL, collect_calls, &env);
1345
1346         if (! list_empty(&env.calls)) {
1347                 /* There are calls to inline */
1348                 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1349                 collect_phiprojs(irg);
1350
1351                 list_for_each_entry(call_entry, entry, &env.calls, list) {
1352                         ir_graph            *callee = entry->callee;
1353                         irg_inline_property prop    = get_irg_inline_property(callee);
1354
1355                         if (prop == irg_inline_forbidden || get_irg_additional_properties(callee) & mtp_property_weak) {
1356                                 /* do not inline forbidden / weak graphs */
1357                                 continue;
1358                         }
1359
1360                         if (prop >= irg_inline_forced ||
1361                             _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
1362                                 inline_method(entry->call, callee);
1363                         }
1364                 }
1365                 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1366         }
1367         obstack_free(&env.obst, NULL);
1368         current_ir_graph = rem;
1369 }
1370
1371 struct inline_small_irgs_pass_t {
1372         ir_graph_pass_t pass;
1373         int            size;
1374 };
1375
1376 /**
1377  * Wrapper to run inline_small_irgs() as a pass.
1378  */
1379 static int inline_small_irgs_wrapper(ir_graph *irg, void *context) {
1380         struct inline_small_irgs_pass_t *pass = context;
1381
1382         inline_small_irgs(irg, pass->size);
1383         return 0;
1384 }
1385
1386 /* create a pass for inline_small_irgs() */
1387 ir_graph_pass_t *inline_small_irgs_pass(const char *name, int size) {
1388         struct inline_small_irgs_pass_t *pass =
1389                 XMALLOCZ(struct inline_small_irgs_pass_t);
1390
1391         pass->size = size;
1392         return def_graph_pass_constructor(
1393                 &pass->pass, name ? name : "inline_small_irgs", inline_small_irgs_wrapper);
1394 }
1395
1396 /**
1397  * Environment for inlining irgs.
1398  */
1399 typedef struct {
1400         list_head calls;             /**< List of of all call nodes in this graph. */
1401         unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
1402         unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
1403         unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
1404         unsigned  n_nodes_orig;      /**< for statistics */
1405         unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
1406         unsigned  n_call_nodes_orig; /**< for statistics */
1407         unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
1408         unsigned  n_callers_orig;    /**< for statistics */
1409         unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
1410         unsigned  local_vars:1;      /**< Set, if an inlined function got the address of a local variable. */
1411         unsigned  recursive:1;       /**< Set, if this function is self recursive. */
1412 } inline_irg_env;
1413
1414 /**
1415  * Allocate a new environment for inlining.
1416  */
1417 static inline_irg_env *alloc_inline_irg_env(void) {
1418         inline_irg_env *env    = OALLOC(&temp_obst, inline_irg_env);
1419         INIT_LIST_HEAD(&env->calls);
1420         env->local_weights     = NULL;
1421         env->n_nodes           = -2; /* do not count count Start, End */
1422         env->n_blocks          = -2; /* do not count count Start, End Block */
1423         env->n_nodes_orig      = -2; /* do not count Start, End */
1424         env->n_call_nodes      = 0;
1425         env->n_call_nodes_orig = 0;
1426         env->n_callers         = 0;
1427         env->n_callers_orig    = 0;
1428         env->got_inline        = 0;
1429         env->local_vars        = 0;
1430         env->recursive         = 0;
1431         return env;
1432 }
1433
1434 typedef struct walker_env {
1435         inline_irg_env *x;     /**< the inline environment */
1436         char ignore_runtime;   /**< the ignore runtime flag */
1437         char ignore_callers;   /**< if set, do change callers data */
1438 } wenv_t;
1439
1440 /**
1441  * post-walker: collect all calls in the inline-environment
1442  * of a graph and sum some statistics.
1443  */
1444 static void collect_calls2(ir_node *call, void *ctx) {
1445         wenv_t         *env = ctx;
1446         inline_irg_env *x = env->x;
1447         ir_opcode      code = get_irn_opcode(call);
1448         ir_graph       *callee;
1449         call_entry     *entry;
1450
1451         /* count meaningful nodes in irg */
1452         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
1453                 if (code != iro_Block) {
1454                         ++x->n_nodes;
1455                         ++x->n_nodes_orig;
1456                 } else {
1457                         ++x->n_blocks;
1458                 }
1459         }
1460
1461         if (code != iro_Call) return;
1462
1463         /* check, if it's a runtime call */
1464         if (env->ignore_runtime) {
1465                 ir_node *symc = get_Call_ptr(call);
1466
1467                 if (is_Global(symc)) {
1468                         ir_entity *ent = get_Global_entity(symc);
1469
1470                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
1471                                 return;
1472                 }
1473         }
1474
1475         /* collect all call nodes */
1476         ++x->n_call_nodes;
1477         ++x->n_call_nodes_orig;
1478
1479         callee = get_call_called_irg(call);
1480         if (callee != NULL) {
1481                 if (! env->ignore_callers) {
1482                         inline_irg_env *callee_env = get_irg_link(callee);
1483                         /* count all static callers */
1484                         ++callee_env->n_callers;
1485                         ++callee_env->n_callers_orig;
1486                 }
1487                 if (callee == current_ir_graph)
1488                         x->recursive = 1;
1489
1490                 /* link it in the list of possible inlinable entries */
1491                 entry = OALLOC(&temp_obst, call_entry);
1492                 entry->call       = call;
1493                 entry->callee     = callee;
1494                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
1495                 entry->benefice   = 0;
1496                 entry->local_adr  = 0;
1497                 entry->all_const  = 0;
1498
1499                 list_add_tail(&entry->list, &x->calls);
1500         }
1501 }
1502
1503 /**
1504  * Returns TRUE if the number of callers is 0 in the irg's environment,
1505  * hence this irg is a leave.
1506  */
1507 inline static int is_leave(ir_graph *irg) {
1508         inline_irg_env *env = get_irg_link(irg);
1509         return env->n_call_nodes == 0;
1510 }
1511
1512 /**
1513  * Returns TRUE if the number of nodes in the callee is
1514  * smaller then size in the irg's environment.
1515  */
1516 inline static int is_smaller(ir_graph *callee, unsigned size) {
1517         inline_irg_env *env = get_irg_link(callee);
1518         return env->n_nodes < size;
1519 }
1520
1521 /**
1522  * Duplicate a call entry.
1523  *
1524  * @param entry     the original entry to duplicate
1525  * @param new_call  the new call node
1526  * @param loop_depth_delta
1527  *                  delta value for the loop depth
1528  */
1529 static call_entry *duplicate_call_entry(const call_entry *entry,
1530                                         ir_node *new_call, int loop_depth_delta) {
1531         call_entry *nentry = OALLOC(&temp_obst, call_entry);
1532         nentry->call       = new_call;
1533         nentry->callee     = entry->callee;
1534         nentry->benefice   = entry->benefice;
1535         nentry->loop_depth = entry->loop_depth + loop_depth_delta;
1536         nentry->local_adr  = entry->local_adr;
1537         nentry->all_const  = entry->all_const;
1538
1539         return nentry;
1540 }
1541
1542 /**
1543  * Append all call nodes of the source environment to the nodes of in the destination
1544  * environment.
1545  *
1546  * @param dst         destination environment
1547  * @param src         source environment
1548  * @param loop_depth  the loop depth of the call that is replaced by the src list
1549  */
1550 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth) {
1551         call_entry *entry, *nentry;
1552
1553         /* Note that the src list points to Call nodes in the inlined graph, but
1554            we need Call nodes in our graph. Luckily the inliner leaves this information
1555            in the link field. */
1556         list_for_each_entry(call_entry, entry, &src->calls, list) {
1557                 nentry = duplicate_call_entry(entry, get_irn_link(entry->call), loop_depth);
1558                 list_add_tail(&nentry->list, &dst->calls);
1559         }
1560         dst->n_call_nodes += src->n_call_nodes;
1561         dst->n_nodes      += src->n_nodes;
1562 }
1563
1564 /*
1565  * Inlines small leave methods at call sites where the called address comes
1566  * from a Const node that references the entity representing the called
1567  * method.
1568  * The size argument is a rough measure for the code size of the method:
1569  * Methods where the obstack containing the firm graph is smaller than
1570  * size are inlined.
1571  */
1572 void inline_leave_functions(unsigned maxsize, unsigned leavesize,
1573                             unsigned size, int ignore_runtime)
1574 {
1575         inline_irg_env   *env;
1576         ir_graph         *irg;
1577         int              i, n_irgs;
1578         ir_graph         *rem;
1579         int              did_inline;
1580         wenv_t           wenv;
1581         call_entry       *entry, *next;
1582         const call_entry *centry;
1583         pmap             *copied_graphs;
1584         pmap_entry       *pm_entry;
1585
1586         rem = current_ir_graph;
1587         obstack_init(&temp_obst);
1588
1589         /* a map for the copied graphs, used to inline recursive calls */
1590         copied_graphs = pmap_create();
1591
1592         /* extend all irgs by a temporary data structure for inlining. */
1593         n_irgs = get_irp_n_irgs();
1594         for (i = 0; i < n_irgs; ++i)
1595                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
1596
1597         /* Pre-compute information in temporary data structure. */
1598         wenv.ignore_runtime = ignore_runtime;
1599         wenv.ignore_callers = 0;
1600         for (i = 0; i < n_irgs; ++i) {
1601                 ir_graph *irg = get_irp_irg(i);
1602
1603                 assert(get_irg_phase_state(irg) != phase_building);
1604                 free_callee_info(irg);
1605
1606                 assure_cf_loop(irg);
1607                 wenv.x = get_irg_link(irg);
1608                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1609         }
1610
1611         /* -- and now inline. -- */
1612
1613         /* Inline leaves recursively -- we might construct new leaves. */
1614         do {
1615                 did_inline = 0;
1616
1617                 for (i = 0; i < n_irgs; ++i) {
1618                         ir_node *call;
1619                         int phiproj_computed = 0;
1620
1621                         current_ir_graph = get_irp_irg(i);
1622                         env              = get_irg_link(current_ir_graph);
1623
1624                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1625                         list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1626                                 ir_graph            *callee;
1627                                 irg_inline_property  prop;
1628
1629                                 if (env->n_nodes > maxsize)
1630                                         break;
1631
1632                                 call   = entry->call;
1633                                 callee = entry->callee;
1634
1635                                 prop = get_irg_inline_property(callee);
1636                                 if (prop == irg_inline_forbidden || get_irg_additional_properties(callee) & mtp_property_weak) {
1637                                         /* do not inline forbidden / weak graphs */
1638                                         continue;
1639                                 }
1640
1641                                 if (is_leave(callee) && (
1642                                     is_smaller(callee, leavesize) || prop >= irg_inline_forced)) {
1643                                         if (!phiproj_computed) {
1644                                                 phiproj_computed = 1;
1645                                                 collect_phiprojs(current_ir_graph);
1646                                         }
1647                                         did_inline = inline_method(call, callee);
1648
1649                                         if (did_inline) {
1650                                                 inline_irg_env *callee_env = get_irg_link(callee);
1651
1652                                                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1653                                                 phiproj_computed = 0;
1654
1655                                                 /* Do some statistics */
1656                                                 env->got_inline = 1;
1657                                                 --env->n_call_nodes;
1658                                                 env->n_nodes += callee_env->n_nodes;
1659                                                 --callee_env->n_callers;
1660
1661                                                 /* remove this call from the list */
1662                                                 list_del(&entry->list);
1663                                                 continue;
1664                                         }
1665                                 }
1666                         }
1667                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1668                 }
1669         } while (did_inline);
1670
1671         /* inline other small functions. */
1672         for (i = 0; i < n_irgs; ++i) {
1673                 ir_node *call;
1674                 int phiproj_computed = 0;
1675
1676                 current_ir_graph = get_irp_irg(i);
1677                 env              = get_irg_link(current_ir_graph);
1678
1679                 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1680
1681                 /* note that the list of possible calls is updated during the process */
1682                 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1683                         irg_inline_property prop;
1684                         ir_graph            *callee;
1685                         pmap_entry          *e;
1686
1687                         call   = entry->call;
1688                         callee = entry->callee;
1689
1690                         prop = get_irg_inline_property(callee);
1691                         if (prop == irg_inline_forbidden || get_irg_additional_properties(callee) & mtp_property_weak) {
1692                                 /* do not inline forbidden / weak graphs */
1693                                 continue;
1694                         }
1695
1696                         e = pmap_find(copied_graphs, callee);
1697                         if (e != NULL) {
1698                                 /*
1699                                  * Remap callee if we have a copy.
1700                                  * FIXME: Should we do this only for recursive Calls ?
1701                                  */
1702                                 callee = e->value;
1703                         }
1704
1705                         if (prop >= irg_inline_forced ||
1706                             (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1707                                 if (current_ir_graph == callee) {
1708                                         /*
1709                                          * Recursive call: we cannot directly inline because we cannot walk
1710                                          * the graph and change it. So we have to make a copy of the graph
1711                                          * first.
1712                                          */
1713
1714                                         inline_irg_env *callee_env;
1715                                         ir_graph       *copy;
1716
1717                                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1718
1719                                         /*
1720                                          * No copy yet, create one.
1721                                          * Note that recursive methods are never leaves, so it is sufficient
1722                                          * to test this condition here.
1723                                          */
1724                                         copy = create_irg_copy(callee);
1725
1726                                         /* create_irg_copy() destroys the Proj links, recompute them */
1727                                         phiproj_computed = 0;
1728
1729                                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1730
1731                                         /* allocate new environment */
1732                                         callee_env = alloc_inline_irg_env();
1733                                         set_irg_link(copy, callee_env);
1734
1735                                         assure_cf_loop(copy);
1736                                         wenv.x              = callee_env;
1737                                         wenv.ignore_callers = 1;
1738                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1739
1740                                         /*
1741                                          * Enter the entity of the original graph. This is needed
1742                                          * for inline_method(). However, note that ent->irg still points
1743                                          * to callee, NOT to copy.
1744                                          */
1745                                         set_irg_entity(copy, get_irg_entity(callee));
1746
1747                                         pmap_insert(copied_graphs, callee, copy);
1748                                         callee = copy;
1749
1750                                         /* we have only one caller: the original graph */
1751                                         callee_env->n_callers      = 1;
1752                                         callee_env->n_callers_orig = 1;
1753                                 }
1754                                 if (! phiproj_computed) {
1755                                         phiproj_computed = 1;
1756                                         collect_phiprojs(current_ir_graph);
1757                                 }
1758                                 did_inline = inline_method(call, callee);
1759                                 if (did_inline) {
1760                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1761
1762                                         /* call was inlined, Phi/Projs for current graph must be recomputed */
1763                                         phiproj_computed = 0;
1764
1765                                         /* callee was inline. Append it's call list. */
1766                                         env->got_inline = 1;
1767                                         --env->n_call_nodes;
1768                                         append_call_list(env, callee_env, entry->loop_depth);
1769                                         --callee_env->n_callers;
1770
1771                                         /* after we have inlined callee, all called methods inside callee
1772                                            are now called once more */
1773                                         list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1774                                                 inline_irg_env *penv = get_irg_link(centry->callee);
1775                                                 ++penv->n_callers;
1776                                         }
1777
1778                                         /* remove this call from the list */
1779                                         list_del(&entry->list);
1780                                         continue;
1781                                 }
1782                         }
1783                 }
1784                 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1785         }
1786
1787         for (i = 0; i < n_irgs; ++i) {
1788                 irg = get_irp_irg(i);
1789                 env = get_irg_link(irg);
1790
1791                 if (env->got_inline) {
1792                         optimize_graph_df(irg);
1793                         optimize_cf(irg);
1794                 }
1795                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1796                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1797                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1798                         env->n_callers_orig, env->n_callers,
1799                         get_entity_name(get_irg_entity(irg))));
1800                 }
1801         }
1802
1803         /* kill the copied graphs: we don't need them anymore */
1804         foreach_pmap(copied_graphs, pm_entry) {
1805                 ir_graph *copy = pm_entry->value;
1806
1807                 /* reset the entity, otherwise it will be deleted in the next step ... */
1808                 set_irg_entity(copy, NULL);
1809                 free_ir_graph(copy);
1810         }
1811         pmap_destroy(copied_graphs);
1812
1813         obstack_free(&temp_obst, NULL);
1814         current_ir_graph = rem;
1815 }
1816
1817 struct inline_leave_functions_pass_t {
1818         ir_prog_pass_t pass;
1819         unsigned       maxsize;
1820         unsigned       leavesize;
1821         unsigned       size;
1822         int            ignore_runtime;
1823 };
1824
1825 /**
1826  * Wrapper to run inline_leave_functions() as a ir_prog pass.
1827  */
1828 static int inline_leave_functions_wrapper(ir_prog *irp, void *context) {
1829         struct inline_leave_functions_pass_t *pass = context;
1830
1831         (void)irp;
1832         inline_leave_functions(
1833                 pass->maxsize, pass->leavesize,
1834                 pass->size, pass->ignore_runtime);
1835         return 0;
1836 }
1837
1838 /* create a pass for inline_leave_functions() */
1839 ir_prog_pass_t *inline_leave_functions_pass(
1840         const char *name, unsigned maxsize, unsigned leavesize,
1841         unsigned size, int ignore_runtime) {
1842         struct inline_leave_functions_pass_t *pass =
1843                 XMALLOCZ(struct inline_leave_functions_pass_t);
1844
1845         pass->maxsize        = maxsize;
1846         pass->leavesize      = leavesize;
1847         pass->size           = size;
1848         pass->ignore_runtime = ignore_runtime;
1849
1850         return def_prog_pass_constructor(
1851                 &pass->pass,
1852                 name ? name : "inline_leave_functions",
1853                 inline_leave_functions_wrapper);
1854 }
1855
1856 /**
1857  * Calculate the parameter weights for transmitting the address of a local variable.
1858  */
1859 static unsigned calc_method_local_weight(ir_node *arg) {
1860         int      i, j, k;
1861         unsigned v, weight = 0;
1862
1863         for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
1864                 ir_node *succ = get_irn_out(arg, i);
1865
1866                 switch (get_irn_opcode(succ)) {
1867                 case iro_Load:
1868                 case iro_Store:
1869                         /* Loads and Store can be removed */
1870                         weight += 3;
1871                         break;
1872                 case iro_Sel:
1873                         /* check if all args are constant */
1874                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1875                                 ir_node *idx = get_Sel_index(succ, j);
1876                                 if (! is_Const(idx))
1877                                         return 0;
1878                         }
1879                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1880                            some unsupported node. */
1881                         v = calc_method_local_weight(succ);
1882                         if (v == 0)
1883                                 return 0;
1884                         /* we can kill one Sel with constant indexes, this is cheap */
1885                         weight += v + 1;
1886                         break;
1887                 case iro_Id:
1888                         /* when looking backward we might find Id nodes */
1889                         weight += calc_method_local_weight(succ);
1890                         break;
1891                 case iro_Tuple:
1892                         /* unoptimized tuple */
1893                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1894                                 ir_node *pred = get_Tuple_pred(succ, j);
1895                                 if (pred == arg) {
1896                                         /* look for Proj(j) */
1897                                         for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
1898                                                 ir_node *succ_succ = get_irn_out(succ, k);
1899                                                 if (is_Proj(succ_succ)) {
1900                                                         if (get_Proj_proj(succ_succ) == j) {
1901                                                                 /* found */
1902                                                                 weight += calc_method_local_weight(succ_succ);
1903                                                         }
1904                                                 } else {
1905                                                         /* this should NOT happen */
1906                                                         return 0;
1907                                                 }
1908                                         }
1909                                 }
1910                         }
1911                         break;
1912                 default:
1913                         /* any other node: unsupported yet or bad. */
1914                         return 0;
1915                 }
1916         }
1917         return weight;
1918 }
1919
1920 /**
1921  * Calculate the parameter weights for transmitting the address of a local variable.
1922  */
1923 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg) {
1924         ir_entity *ent = get_irg_entity(irg);
1925         ir_type  *mtp;
1926         int      nparams, i, proj_nr;
1927         ir_node  *irg_args, *arg;
1928
1929         mtp      = get_entity_type(ent);
1930         nparams  = get_method_n_params(mtp);
1931
1932         /* allocate a new array. currently used as 'analysed' flag */
1933         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
1934
1935         /* If the method haven't parameters we have nothing to do. */
1936         if (nparams <= 0)
1937                 return;
1938
1939         assure_irg_outs(irg);
1940         irg_args = get_irg_args(irg);
1941         for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
1942                 arg     = get_irn_out(irg_args, i);
1943                 proj_nr = get_Proj_proj(arg);
1944                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
1945         }
1946 }
1947
1948 /**
1949  * Calculate the benefice for transmitting an local variable address.
1950  * After inlining, the local variable might be transformed into a
1951  * SSA variable by scalar_replacement().
1952  */
1953 static unsigned get_method_local_adress_weight(ir_graph *callee, int pos) {
1954         inline_irg_env *env = get_irg_link(callee);
1955
1956         if (env->local_weights != NULL) {
1957                 if (pos < ARR_LEN(env->local_weights))
1958                         return env->local_weights[pos];
1959                 return 0;
1960         }
1961
1962         analyze_irg_local_weights(env, callee);
1963
1964         if (pos < ARR_LEN(env->local_weights))
1965                 return env->local_weights[pos];
1966         return 0;
1967 }
1968
1969 /**
1970  * Calculate a benefice value for inlining the given call.
1971  *
1972  * @param call       the call node we have to inspect
1973  * @param callee     the called graph
1974  */
1975 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
1976 {
1977         ir_node   *call = entry->call;
1978         ir_entity *ent  = get_irg_entity(callee);
1979         ir_node   *frame_ptr;
1980         ir_type   *mtp;
1981         int       weight = 0;
1982         int       i, n_params, all_const;
1983         unsigned  cc, v;
1984         irg_inline_property prop;
1985
1986         inline_irg_env *callee_env;
1987
1988         prop = get_irg_inline_property(callee);
1989         if (prop == irg_inline_forbidden) {
1990                 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
1991                     call, callee));
1992                 return entry->benefice = INT_MIN;
1993         }
1994
1995         if (get_irg_additional_properties(callee) & (mtp_property_noreturn | mtp_property_weak)) {
1996                 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
1997                     call, callee));
1998                 return entry->benefice = INT_MIN;
1999         }
2000
2001         /* costs for every passed parameter */
2002         n_params = get_Call_n_params(call);
2003         mtp      = get_entity_type(ent);
2004         cc       = get_method_calling_convention(mtp);
2005         if (cc & cc_reg_param) {
2006                 /* register parameter, smaller costs for register parameters */
2007                 int max_regs = cc & ~cc_bits;
2008
2009                 if (max_regs < n_params)
2010                         weight += max_regs * 2 + (n_params - max_regs) * 5;
2011                 else
2012                         weight += n_params * 2;
2013         } else {
2014                 /* parameters are passed an stack */
2015                 weight += 5 * n_params;
2016         }
2017
2018         /* constant parameters improve the benefice */
2019         frame_ptr = get_irg_frame(current_ir_graph);
2020         all_const = 1;
2021         for (i = 0; i < n_params; ++i) {
2022                 ir_node *param = get_Call_param(call, i);
2023
2024                 if (is_Const(param)) {
2025                         weight += get_method_param_weight(ent, i);
2026                 } else {
2027                         all_const = 0;
2028                         if (is_SymConst(param))
2029                                 weight += get_method_param_weight(ent, i);
2030                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
2031                                 /*
2032                                  * An address of a local variable is transmitted. After
2033                                  * inlining, scalar_replacement might be able to remove the
2034                                  * local variable, so honor this.
2035                                  */
2036                                 v = get_method_local_adress_weight(callee, i);
2037                                 weight += v;
2038                                 if (v > 0)
2039                                         entry->local_adr = 1;
2040                         }
2041                 }
2042         }
2043         entry->all_const = all_const;
2044
2045         callee_env = get_irg_link(callee);
2046         if (callee_env->n_callers == 1 &&
2047             callee != current_ir_graph &&
2048                 get_entity_visibility(ent) == visibility_local) {
2049                 weight += 700;
2050         }
2051
2052         /* give a bonus for functions with one block */
2053         if (callee_env->n_blocks == 1)
2054                 weight = weight * 3 / 2;
2055
2056         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
2057         if (callee_env->n_nodes < 30 && !callee_env->recursive)
2058                 weight += 2000;
2059
2060         /* and finally for leaves: they do not increase the register pressure
2061            because of callee safe registers */
2062         if (callee_env->n_call_nodes == 0)
2063                 weight += 400;
2064
2065         /** it's important to inline inner loops first */
2066         if (entry->loop_depth > 30)
2067                 weight += 30 * 1024;
2068         else
2069                 weight += entry->loop_depth * 1024;
2070
2071         /*
2072          * All arguments constant is probably a good sign, give an extra bonus
2073          */
2074         if (all_const)
2075                 weight += 1024;
2076
2077         return entry->benefice = weight;
2078 }
2079
2080 static ir_graph **irgs;
2081 static int      last_irg;
2082
2083 /**
2084  * Callgraph walker, collect all visited graphs.
2085  */
2086 static void callgraph_walker(ir_graph *irg, void *data) {
2087         (void) data;
2088         irgs[last_irg++] = irg;
2089 }
2090
2091 /**
2092  * Creates an inline order for all graphs.
2093  *
2094  * @return the list of graphs.
2095  */
2096 static ir_graph **create_irg_list(void) {
2097         ir_entity **free_methods;
2098         int       arr_len;
2099         int       n_irgs = get_irp_n_irgs();
2100
2101         cgana(&arr_len, &free_methods);
2102         xfree(free_methods);
2103
2104         compute_callgraph();
2105
2106         last_irg = 0;
2107         irgs     = XMALLOCNZ(ir_graph*, n_irgs);
2108
2109         callgraph_walk(NULL, callgraph_walker, NULL);
2110         assert(n_irgs == last_irg);
2111
2112         return irgs;
2113 }
2114
2115 /**
2116  * Push a call onto the priority list if its benefice is big enough.
2117  *
2118  * @param pqueue   the priority queue of calls
2119  * @param call     the call entry
2120  * @param inlien_threshold
2121  *                 the threshold value
2122  */
2123 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
2124                             int inline_threshold)
2125 {
2126         ir_graph            *callee  = call->callee;
2127         irg_inline_property prop     = get_irg_inline_property(callee);
2128         int                 benefice = calc_inline_benefice(call, callee);
2129
2130         DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
2131             get_irn_irg(call->call), call->call, callee, benefice));
2132
2133         if (prop < irg_inline_forced && benefice < inline_threshold) {
2134                 return;
2135         }
2136
2137         pqueue_put(pqueue, call, benefice);
2138 }
2139
2140 /**
2141  * Try to inline calls into a graph.
2142  *
2143  * @param irg      the graph into which we inline
2144  * @param maxsize  do NOT inline if the size of irg gets
2145  *                 bigger than this amount
2146  * @param inline_threshold
2147  *                 threshold value for inline decision
2148  * @param copied_graphs
2149  *                 map containing copied of recursive graphs
2150  */
2151 static void inline_into(ir_graph *irg, unsigned maxsize,
2152                         int inline_threshold, pmap *copied_graphs)
2153 {
2154         int            phiproj_computed = 0;
2155         inline_irg_env *env = get_irg_link(irg);
2156         call_entry     *curr_call;
2157         wenv_t         wenv;
2158         pqueue_t       *pqueue;
2159
2160         if (env->n_call_nodes == 0)
2161                 return;
2162
2163         if (env->n_nodes > maxsize) {
2164                 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
2165                 return;
2166         }
2167
2168         current_ir_graph = irg;
2169         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2170
2171         /* put irgs into the pqueue */
2172         pqueue = new_pqueue();
2173
2174         list_for_each_entry(call_entry, curr_call, &env->calls, list) {
2175                 assert(is_Call(curr_call->call));
2176                 maybe_push_call(pqueue, curr_call, inline_threshold);
2177         }
2178
2179         /* note that the list of possible calls is updated during the process */
2180         while (!pqueue_empty(pqueue)) {
2181                 int                 did_inline;
2182                 call_entry          *curr_call  = pqueue_pop_front(pqueue);
2183                 ir_graph            *callee     = curr_call->callee;
2184                 ir_node             *call_node  = curr_call->call;
2185                 inline_irg_env      *callee_env = get_irg_link(callee);
2186                 irg_inline_property prop        = get_irg_inline_property(callee);
2187                 int                 loop_depth;
2188                 const call_entry    *centry;
2189                 pmap_entry          *e;
2190
2191                 if ((prop < irg_inline_forced) && env->n_nodes + callee_env->n_nodes > maxsize) {
2192                         DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
2193                                                 env->n_nodes, callee, callee_env->n_nodes));
2194                         continue;
2195                 }
2196
2197                 e = pmap_find(copied_graphs, callee);
2198                 if (e != NULL) {
2199                         int benefice = curr_call->benefice;
2200                         /*
2201                          * Reduce the weight for recursive function IFF not all arguments are const.
2202                          * inlining recursive functions is rarely good.
2203                          */
2204                         if (!curr_call->all_const)
2205                                 benefice -= 2000;
2206                         if (benefice < inline_threshold)
2207                                 continue;
2208
2209                         /*
2210                          * Remap callee if we have a copy.
2211                          */
2212                         callee     = e->value;
2213                         callee_env = get_irg_link(callee);
2214                 }
2215
2216                 if (current_ir_graph == callee) {
2217                         /*
2218                          * Recursive call: we cannot directly inline because we cannot
2219                          * walk the graph and change it. So we have to make a copy of
2220                          * the graph first.
2221                          */
2222                         int benefice = curr_call->benefice;
2223                         ir_graph *copy;
2224
2225                         /*
2226                          * Reduce the weight for recursive function IFF not all arguments are const.
2227                          * inlining recursive functions is rarely good.
2228                          */
2229                         if (!curr_call->all_const)
2230                                 benefice -= 2000;
2231                         if (benefice < inline_threshold)
2232                                 continue;
2233
2234                         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2235
2236                         /*
2237                          * No copy yet, create one.
2238                          * Note that recursive methods are never leaves, so it is
2239                          * sufficient to test this condition here.
2240                          */
2241                         copy = create_irg_copy(callee);
2242
2243                         /* create_irg_copy() destroys the Proj links, recompute them */
2244                         phiproj_computed = 0;
2245
2246                         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2247
2248                         /* allocate a new environment */
2249                         callee_env = alloc_inline_irg_env();
2250                         set_irg_link(copy, callee_env);
2251
2252                         assure_cf_loop(copy);
2253                         wenv.x              = callee_env;
2254                         wenv.ignore_callers = 1;
2255                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
2256
2257                         /*
2258                          * Enter the entity of the original graph. This is needed
2259                          * for inline_method(). However, note that ent->irg still points
2260                          * to callee, NOT to copy.
2261                          */
2262                         set_irg_entity(copy, get_irg_entity(callee));
2263
2264                         pmap_insert(copied_graphs, callee, copy);
2265                         callee = copy;
2266
2267                         /* we have only one caller: the original graph */
2268                         callee_env->n_callers      = 1;
2269                         callee_env->n_callers_orig = 1;
2270                 }
2271                 if (! phiproj_computed) {
2272                         phiproj_computed = 1;
2273                         collect_phiprojs(current_ir_graph);
2274                 }
2275                 did_inline = inline_method(call_node, callee);
2276                 if (!did_inline)
2277                         continue;
2278
2279                 /* call was inlined, Phi/Projs for current graph must be recomputed */
2280                 phiproj_computed = 0;
2281
2282                 /* remove it from the caller list */
2283                 list_del(&curr_call->list);
2284
2285                 /* callee was inline. Append it's call list. */
2286                 env->got_inline = 1;
2287                 if (curr_call->local_adr)
2288                         env->local_vars = 1;
2289                 --env->n_call_nodes;
2290
2291                 /* we just generate a bunch of new calls */
2292                 loop_depth = curr_call->loop_depth;
2293                 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
2294                         inline_irg_env *penv = get_irg_link(centry->callee);
2295                         ir_node        *new_call;
2296                         call_entry     *new_entry;
2297
2298                         /* after we have inlined callee, all called methods inside
2299                          * callee are now called once more */
2300                         ++penv->n_callers;
2301
2302                         /* Note that the src list points to Call nodes in the inlined graph,
2303                          * but we need Call nodes in our graph. Luckily the inliner leaves
2304                          * this information in the link field. */
2305                         new_call = get_irn_link(centry->call);
2306                         assert(is_Call(new_call));
2307
2308                         new_entry = duplicate_call_entry(centry, new_call, loop_depth);
2309                         list_add_tail(&new_entry->list, &env->calls);
2310                         maybe_push_call(pqueue, new_entry, inline_threshold);
2311                 }
2312
2313                 env->n_call_nodes += callee_env->n_call_nodes;
2314                 env->n_nodes += callee_env->n_nodes;
2315                 --callee_env->n_callers;
2316         }
2317         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2318         del_pqueue(pqueue);
2319 }
2320
2321 /*
2322  * Heuristic inliner. Calculates a benefice value for every call and inlines
2323  * those calls with a value higher than the threshold.
2324  */
2325 void inline_functions(unsigned maxsize, int inline_threshold) {
2326         inline_irg_env   *env;
2327         int              i, n_irgs;
2328         ir_graph         *rem;
2329         wenv_t           wenv;
2330         pmap             *copied_graphs;
2331         pmap_entry       *pm_entry;
2332         ir_graph         **irgs;
2333
2334         rem = current_ir_graph;
2335         obstack_init(&temp_obst);
2336
2337         irgs = create_irg_list();
2338
2339         /* a map for the copied graphs, used to inline recursive calls */
2340         copied_graphs = pmap_create();
2341
2342         /* extend all irgs by a temporary data structure for inlining. */
2343         n_irgs = get_irp_n_irgs();
2344         for (i = 0; i < n_irgs; ++i)
2345                 set_irg_link(irgs[i], alloc_inline_irg_env());
2346
2347         /* Pre-compute information in temporary data structure. */
2348         wenv.ignore_runtime = 0;
2349         wenv.ignore_callers = 0;
2350         for (i = 0; i < n_irgs; ++i) {
2351                 ir_graph *irg = irgs[i];
2352
2353                 free_callee_info(irg);
2354
2355                 wenv.x = get_irg_link(irg);
2356                 assure_cf_loop(irg);
2357                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
2358         }
2359
2360         /* -- and now inline. -- */
2361         for (i = 0; i < n_irgs; ++i) {
2362                 ir_graph *irg = irgs[i];
2363
2364                 inline_into(irg, maxsize, inline_threshold, copied_graphs);
2365         }
2366
2367         for (i = 0; i < n_irgs; ++i) {
2368                 ir_graph *irg = irgs[i];
2369
2370                 env = get_irg_link(irg);
2371                 if (env->got_inline) {
2372                         /* this irg got calls inlined: optimize it */
2373                         if (get_opt_combo()) {
2374                                 if (env->local_vars) {
2375                                         scalar_replacement_opt(irg);
2376                                 }
2377                                 combo(irg);
2378                         } else {
2379                                 if (env->local_vars) {
2380                                         if (scalar_replacement_opt(irg)) {
2381                                                 optimize_graph_df(irg);
2382                                         }
2383                                 }
2384                                 optimize_cf(irg);
2385                         }
2386                 }
2387                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
2388                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
2389                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
2390                         env->n_callers_orig, env->n_callers,
2391                         get_entity_name(get_irg_entity(irg))));
2392                 }
2393         }
2394
2395         /* kill the copied graphs: we don't need them anymore */
2396         foreach_pmap(copied_graphs, pm_entry) {
2397                 ir_graph *copy = pm_entry->value;
2398
2399                 /* reset the entity, otherwise it will be deleted in the next step ... */
2400                 set_irg_entity(copy, NULL);
2401                 free_ir_graph(copy);
2402         }
2403         pmap_destroy(copied_graphs);
2404
2405         xfree(irgs);
2406
2407         obstack_free(&temp_obst, NULL);
2408         current_ir_graph = rem;
2409 }
2410
2411 struct inline_functions_pass_t {
2412         ir_prog_pass_t pass;
2413         unsigned       maxsize;
2414         int            inline_threshold;
2415 };
2416
2417 /**
2418  * Wrapper to run inline_functions() as a ir_prog pass.
2419  */
2420 static int inline_functions_wrapper(ir_prog *irp, void *context) {
2421         struct inline_functions_pass_t *pass = context;
2422
2423         (void)irp;
2424         inline_functions(pass->maxsize, pass->inline_threshold);
2425         return 0;
2426 }
2427
2428 /* create a ir_prog pass for inline_functions */
2429 ir_prog_pass_t *inline_functions_pass(
2430           const char *name, unsigned maxsize, int inline_threshold) {
2431         struct inline_functions_pass_t *pass =
2432                 XMALLOCZ(struct inline_functions_pass_t);
2433
2434         pass->maxsize          = maxsize;
2435         pass->inline_threshold = inline_threshold;
2436
2437         return def_prog_pass_constructor(
2438                 &pass->pass, name ? name : "inline_functions",
2439                 inline_functions_wrapper);
2440 }
2441
2442 void firm_init_inline(void) {
2443         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
2444 }