fix for my last commit
[libfirm] / ir / opt / opt_inline.c
1 /*
2  * Copyright (C) 1995-2008 University of Karlsruhe.  All right reserved.
3  *
4  * This file is part of libFirm.
5  *
6  * This file may be distributed and/or modified under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation and appearing in the file LICENSE.GPL included in the
9  * packaging of this file.
10  *
11  * Licensees holding valid libFirm Professional Edition licenses may use
12  * this file in accordance with the libFirm Commercial License.
13  * Agreement provided with the Software.
14  *
15  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
16  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE.
18  */
19
20 /**
21  * @file
22  * @brief    Dead node elimination and Procedure Inlining.
23  * @author   Michael Beck, Goetz Lindenmaier
24  * @version  $Id$
25  */
26 #include "config.h"
27
28 #include <limits.h>
29 #include <assert.h>
30
31 #include "irnode_t.h"
32 #include "irgraph_t.h"
33 #include "irprog_t.h"
34
35 #include "iroptimize.h"
36 #include "ircons_t.h"
37 #include "iropt_t.h"
38 #include "irgopt.h"
39 #include "irgmod.h"
40 #include "irgwalk.h"
41
42 #include "array_t.h"
43 #include "list.h"
44 #include "pset.h"
45 #include "pmap.h"
46 #include "pdeq.h"
47 #include "xmalloc.h"
48 #include "pqueue.h"
49
50 #include "irouts.h"
51 #include "irloop_t.h"
52 #include "irbackedge_t.h"
53 #include "opt_init.h"
54 #include "cgana.h"
55 #include "trouts.h"
56 #include "error.h"
57
58 #include "analyze_irg_args.h"
59 #include "iredges_t.h"
60 #include "irflag_t.h"
61 #include "irhooks.h"
62 #include "irtools.h"
63 #include "iropt_dbg.h"
64 #include "irpass_t.h"
65 #include "irphase_t.h"
66
67 DEBUG_ONLY(static firm_dbg_module_t *dbg;)
68
69 /*------------------------------------------------------------------*/
70 /* Routines for dead node elimination / copying garbage collection  */
71 /* of the obstack.                                                  */
72 /*------------------------------------------------------------------*/
73
74 /**
75  * Remember the new node in the old node by using a field all nodes have.
76  */
77 #define set_new_node(oldn, newn)  set_irn_link(oldn, newn)
78
79 /**
80  * Get this new node, before the old node is forgotten.
81  */
82 #define get_new_node(oldn) get_irn_link(oldn)
83
84 /**
85  * Check if a new node was set.
86  */
87 #define has_new_node(n) (get_new_node(n) != NULL)
88
89 /** a pointer to the new phases */
90 static ir_phase *new_phases[PHASE_LAST];
91
92 /**
93  * We use the block_visited flag to mark that we have computed the
94  * number of useful predecessors for this block.
95  * Further we encode the new arity in this flag in the old blocks.
96  * Remembering the arity is useful, as it saves a lot of pointer
97  * accesses.  This function is called for all Phi and Block nodes
98  * in a Block.
99  */
100 static inline int compute_new_arity(ir_node *b)
101 {
102         int i, res, irn_arity;
103         int irg_v, block_v;
104
105         irg_v = get_irg_block_visited(current_ir_graph);
106         block_v = get_Block_block_visited(b);
107         if (block_v >= irg_v) {
108                 /* we computed the number of preds for this block and saved it in the
109                    block_v flag */
110                 return block_v - irg_v;
111         } else {
112                 /* compute the number of good predecessors */
113                 res = irn_arity = get_irn_arity(b);
114                 for (i = 0; i < irn_arity; i++)
115                         if (is_Bad(get_irn_n(b, i))) res--;
116                         /* save it in the flag. */
117                         set_Block_block_visited(b, irg_v + res);
118                         return res;
119         }
120 }
121
122 /**
123  * Copies the node to the new obstack. The Ins of the new node point to
124  * the predecessors on the old obstack.  For block/phi nodes not all
125  * predecessors might be copied.  n->link points to the new node.
126  * For Phi and Block nodes the function allocates in-arrays with an arity
127  * only for useful predecessors.  The arity is determined by counting
128  * the non-bad predecessors of the block.
129  *
130  * @param n    The node to be copied
131  * @param env  if non-NULL, the node number attribute will be copied to the new node
132  *
133  * Note: Also used for loop unrolling.
134  */
135 static void copy_node(ir_node *n, void *env)
136 {
137         ir_node *nn, *block;
138         int new_arity, i;
139         ir_phase *old_ph;
140         ir_op *op = get_irn_op(n);
141         (void) env;
142
143         if (op == op_Bad) {
144                 /* node copied already */
145                 return;
146         } else if (op == op_Block) {
147                 block = NULL;
148                 new_arity = compute_new_arity(n);
149                 n->attr.block.graph_arr = NULL;
150         } else {
151                 block = get_nodes_block(n);
152                 if (op == op_Phi) {
153                         new_arity = compute_new_arity(block);
154                 } else {
155                         new_arity = get_irn_arity(n);
156                 }
157         }
158         nn = new_ir_node(get_irn_dbg_info(n),
159                 current_ir_graph,
160                 block,
161                 op,
162                 get_irn_mode(n),
163                 new_arity,
164                 get_irn_in(n) + 1);
165         /* Copy the attributes.  These might point to additional data.  If this
166            was allocated on the old obstack the pointers now are dangling.  This
167            frees e.g. the memory of the graph_arr allocated in new_immBlock. */
168         if (op == op_Block) {
169                 /* we cannot allow blocks WITHOUT macroblock input */
170                 set_Block_MacroBlock(nn, get_Block_MacroBlock(n));
171         }
172         copy_node_attr(n, nn);
173
174         /* preserve the phase information for this node */
175         for (i = PHASE_NOT_IRG_MANAGED+1; i < PHASE_LAST; i++) {
176                 old_ph = get_irg_phase(current_ir_graph, i);
177                 if (old_ph != NULL) {
178                         if(phase_get_irn_data(old_ph, n)) {
179                                 phase_set_irn_data(new_phases[i], nn, phase_get_irn_data(old_ph, n));
180                         }
181                 }
182         }
183
184         if (env != NULL) {
185                 /* for easier debugging, we want to copy the node numbers too */
186                 nn->node_nr = n->node_nr;
187         }
188
189         set_new_node(n, nn);
190         hook_dead_node_elim_subst(current_ir_graph, n, nn);
191 }
192
193 /**
194  * Copies new predecessors of old node to new node remembered in link.
195  * Spare the Bad predecessors of Phi and Block nodes.
196  */
197 static void copy_preds(ir_node *n, void *env)
198 {
199         ir_node *nn, *block;
200         int i, j, irn_arity;
201         (void) env;
202
203         nn = get_new_node(n);
204
205         if (is_Block(n)) {
206                 /* copy the macro block header */
207                 ir_node *mbh = get_Block_MacroBlock(n);
208
209                 if (mbh == n) {
210                         /* this block is a macroblock header */
211                         set_Block_MacroBlock(nn, nn);
212                 } else {
213                         /* get the macro block header */
214                         ir_node *nmbh = get_new_node(mbh);
215                         assert(nmbh != NULL);
216                         set_Block_MacroBlock(nn, nmbh);
217                 }
218
219                 /* Don't copy Bad nodes. */
220                 j = 0;
221                 irn_arity = get_irn_arity(n);
222                 for (i = 0; i < irn_arity; i++) {
223                         if (! is_Bad(get_irn_n(n, i))) {
224                                 ir_node *pred = get_irn_n(n, i);
225                                 set_irn_n(nn, j, get_new_node(pred));
226                                 j++;
227                         }
228                 }
229                 /* repair the block visited flag from above misuse. Repair it in both
230                    graphs so that the old one can still be used. */
231                 set_Block_block_visited(nn, 0);
232                 set_Block_block_visited(n, 0);
233                 /* Local optimization could not merge two subsequent blocks if
234                    in array contained Bads.  Now it's possible.
235                    We don't call optimize_in_place as it requires
236                    that the fields in ir_graph are set properly. */
237                 if (!has_Block_entity(nn) &&
238                     get_opt_control_flow_straightening() &&
239                     get_Block_n_cfgpreds(nn) == 1 &&
240                     is_Jmp(get_Block_cfgpred(nn, 0))) {
241                         ir_node *old = get_nodes_block(get_Block_cfgpred(nn, 0));
242                         if (nn == old) {
243                                 /* Jmp jumps into the block it is in -- deal self cycle. */
244                                 assert(is_Bad(get_new_node(get_irg_bad(current_ir_graph))));
245                                 exchange(nn, get_new_node(get_irg_bad(current_ir_graph)));
246                         } else {
247                                 exchange(nn, old);
248                         }
249                 }
250         } else if (is_Phi(n) && get_irn_arity(n) > 0) {
251                 /* Don't copy node if corresponding predecessor in block is Bad.
252                    The Block itself should not be Bad. */
253                 block = get_nodes_block(n);
254                 set_nodes_block(nn, get_new_node(block));
255                 j = 0;
256                 irn_arity = get_irn_arity(n);
257                 for (i = 0; i < irn_arity; i++) {
258                         if (! is_Bad(get_irn_n(block, i))) {
259                                 ir_node *pred = get_irn_n(n, i);
260                                 set_irn_n(nn, j, get_new_node(pred));
261                                 /*if (is_backedge(n, i)) set_backedge(nn, j);*/
262                                 j++;
263                         }
264                 }
265                 /* If the pre walker reached this Phi after the post walker visited the
266                    block block_visited is > 0. */
267                 set_Block_block_visited(get_nodes_block(n), 0);
268                 /* Compacting the Phi's ins might generate Phis with only one
269                    predecessor. */
270                 if (get_irn_arity(nn) == 1)
271                         exchange(nn, get_irn_n(nn, 0));
272         } else {
273                 irn_arity = get_irn_arity(n);
274                 for (i = -1; i < irn_arity; i++)
275                         set_irn_n(nn, i, get_new_node(get_irn_n(n, i)));
276         }
277         /* Now the new node is complete.  We can add it to the hash table for CSE.
278            @@@ inlining aborts if we identify End. Why? */
279         if (!is_End(nn))
280                 add_identities(current_ir_graph->value_table, nn);
281 }
282
283 /**
284  * Copies the graph recursively, compacts the keep-alives of the end node.
285  *
286  * @param irg           the graph to be copied
287  * @param copy_node_nr  If non-zero, the node number will be copied
288  */
289 static void copy_graph(ir_graph *irg, int copy_node_nr)
290 {
291         ir_node *oe, *ne, *ob, *nb, *om, *nm; /* old end, new end, old bad, new bad, old NoMem, new NoMem */
292         ir_node *ka;      /* keep alive */
293         int i, irn_arity;
294         unsigned long vfl;
295
296         /* Some nodes must be copied by hand, sigh */
297         vfl = get_irg_visited(irg);
298         set_irg_visited(irg, vfl + 1);
299
300         oe = get_irg_end(irg);
301         mark_irn_visited(oe);
302         /* copy the end node by hand, allocate dynamic in array! */
303         ne = new_ir_node(get_irn_dbg_info(oe),
304                 irg,
305                 NULL,
306                 op_End,
307                 mode_X,
308                 -1,
309                 NULL);
310         /* Copy the attributes.  Well, there might be some in the future... */
311         copy_node_attr(oe, ne);
312         set_new_node(oe, ne);
313
314         /* copy the Bad node */
315         ob = get_irg_bad(irg);
316         mark_irn_visited(ob);
317         nb = new_ir_node(get_irn_dbg_info(ob),
318                 irg,
319                 NULL,
320                 op_Bad,
321                 mode_T,
322                 0,
323                 NULL);
324         copy_node_attr(ob, nb);
325         set_new_node(ob, nb);
326
327         /* copy the NoMem node */
328         om = get_irg_no_mem(irg);
329         mark_irn_visited(om);
330         nm = new_ir_node(get_irn_dbg_info(om),
331                 irg,
332                 NULL,
333                 op_NoMem,
334                 mode_M,
335                 0,
336                 NULL);
337         copy_node_attr(om, nm);
338         set_new_node(om, nm);
339
340         /* copy the live nodes */
341         set_irg_visited(irg, vfl);
342         irg_walk(get_nodes_block(oe), copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
343
344         /* Note: from yet, the visited flag of the graph is equal to vfl + 1 */
345
346         /* visit the anchors as well */
347         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
348                 ir_node *n = get_irg_anchor(irg, i);
349
350                 if (n && (get_irn_visited(n) <= vfl)) {
351                         set_irg_visited(irg, vfl);
352                         irg_walk(n, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
353                 }
354         }
355
356         /* copy_preds for the end node ... */
357         set_nodes_block(ne, get_new_node(get_nodes_block(oe)));
358
359         /*- ... and now the keep alives. -*/
360         /* First pick the not marked block nodes and walk them.  We must pick these
361            first as else we will oversee blocks reachable from Phis. */
362         irn_arity = get_End_n_keepalives(oe);
363         for (i = 0; i < irn_arity; i++) {
364                 ka = get_End_keepalive(oe, i);
365                 if (is_Block(ka)) {
366                         if (get_irn_visited(ka) <= vfl) {
367                                 /* We must keep the block alive and copy everything reachable */
368                                 set_irg_visited(irg, vfl);
369                                 irg_walk(ka, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
370                         }
371                         add_End_keepalive(ne, get_new_node(ka));
372                 }
373         }
374
375         /* Now pick other nodes.  Here we will keep all! */
376         irn_arity = get_End_n_keepalives(oe);
377         for (i = 0; i < irn_arity; i++) {
378                 ka = get_End_keepalive(oe, i);
379                 if (!is_Block(ka)) {
380                         if (get_irn_visited(ka) <= vfl) {
381                                 /* We didn't copy the node yet.  */
382                                 set_irg_visited(irg, vfl);
383                                 irg_walk(ka, copy_node, copy_preds, INT_TO_PTR(copy_node_nr));
384                         }
385                         add_End_keepalive(ne, get_new_node(ka));
386                 }
387         }
388
389         /* start block sometimes only reached after keep alives */
390         set_nodes_block(nb, get_new_node(get_nodes_block(ob)));
391         set_nodes_block(nm, get_new_node(get_nodes_block(om)));
392 }
393
394 /**
395  * Copies the graph reachable from current_ir_graph->end to the obstack
396  * in current_ir_graph and fixes the environment.
397  * Then fixes the fields in current_ir_graph containing nodes of the
398  * graph.
399  *
400  * @param copy_node_nr  If non-zero, the node number will be copied
401  */
402 static void copy_graph_env(int copy_node_nr)
403 {
404         ir_graph *irg = current_ir_graph;
405         ir_node *old_end, *new_anchor;
406         ir_phase *old_ph;
407         int i;
408
409         /* init the new_phases array */
410         for (i = PHASE_NOT_IRG_MANAGED+1; i < PHASE_LAST; i++) {
411                 old_ph = get_irg_phase(irg, i);
412                 if (old_ph == NULL) {
413                         new_phases[i] = NULL;
414                 } else {
415                         new_phases[i] = xmalloc(sizeof(ir_phase));
416                         phase_init(new_phases[i], "", irg, old_ph->growth_factor,
417                                         old_ph->data_init, old_ph->priv);
418                 }
419         }
420
421
422         /* remove end_except and end_reg nodes */
423         old_end = get_irg_end(irg);
424         set_irg_end_except (irg, old_end);
425         set_irg_end_reg    (irg, old_end);
426
427         /* Not all nodes remembered in irg might be reachable
428            from the end node.  Assure their link is set to NULL, so that
429            we can test whether new nodes have been computed. */
430         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
431                 ir_node *n = get_irg_anchor(irg, i);
432                 if (n != NULL)
433                         set_new_node(n, NULL);
434         }
435         /* we use the block walk flag for removing Bads from Blocks ins. */
436         inc_irg_block_visited(irg);
437
438         /* copy the graph */
439         copy_graph(irg, copy_node_nr);
440
441         /* fix the anchor */
442         old_end    = get_irg_end(irg);
443         new_anchor = new_Anchor(irg);
444
445         for (i = get_irg_n_anchors(irg) - 1; i >= 0; --i) {
446                 ir_node *n = get_irg_anchor(irg, i);
447                 if (n) {
448                         set_irn_n(new_anchor, i, get_new_node(n));
449                 }
450         }
451
452         /* copy the new phases into the irg */
453         for (i = PHASE_NOT_IRG_MANAGED+1; i < PHASE_LAST; i++) {
454                 old_ph = get_irg_phase(irg, i);
455                 if (old_ph != NULL) {
456                         free_irg_phase(irg, i);
457                         irg->phases[i] = new_phases[i];
458                 }
459         }
460
461         free_End(old_end);
462         irg->anchor = new_anchor;
463
464         /* ensure the new anchor is placed in the endblock */
465         set_nodes_block(new_anchor, get_irg_end_block(irg));
466 }
467
468 /**
469  * Copies all reachable nodes to a new obstack.  Removes bad inputs
470  * from block nodes and the corresponding inputs from Phi nodes.
471  * Merges single exit blocks with single entry blocks and removes
472  * 1-input Phis.
473  * Adds all new nodes to a new hash table for CSE.  Does not
474  * perform CSE, so the hash table might contain common subexpressions.
475  */
476 void dead_node_elimination(ir_graph *irg)
477 {
478         ir_graph *rem;
479 #ifdef INTERPROCEDURAL_VIEW
480         int rem_ipview = get_interprocedural_view();
481 #endif
482         struct obstack *graveyard_obst = NULL;
483         struct obstack *rebirth_obst   = NULL;
484
485         edges_deactivate(irg);
486
487         /* inform statistics that we started a dead-node elimination run */
488         hook_dead_node_elim(irg, 1);
489
490         /* Remember external state of current_ir_graph. */
491         rem = current_ir_graph;
492         current_ir_graph = irg;
493 #ifdef INTERPROCEDURAL_VIEW
494         set_interprocedural_view(0);
495 #endif
496
497         assert(get_irg_phase_state(irg) != phase_building);
498
499         /* Handle graph state */
500         free_callee_info(irg);
501         free_irg_outs(irg);
502         free_trouts();
503
504         /* @@@ so far we loose loops when copying */
505         free_loop_information(irg);
506
507         set_irg_doms_inconsistent(irg);
508
509         /* A quiet place, where the old obstack can rest in peace,
510            until it will be cremated. */
511         graveyard_obst = irg->obst;
512
513         /* A new obstack, where the reachable nodes will be copied to. */
514         rebirth_obst = XMALLOC(struct obstack);
515         irg->obst = rebirth_obst;
516         obstack_init(irg->obst);
517         irg->last_node_idx = 0;
518
519         /* We also need a new value table for CSE */
520         del_identities(irg->value_table);
521         irg->value_table = new_identities();
522
523         /* Copy the graph from the old to the new obstack */
524         copy_graph_env(/*copy_node_nr=*/1);
525
526         /* Free memory from old unoptimized obstack */
527         obstack_free(graveyard_obst, 0);  /* First empty the obstack ... */
528         xfree(graveyard_obst);            /* ... then free it.           */
529
530         /* inform statistics that the run is over */
531         hook_dead_node_elim(irg, 0);
532
533         current_ir_graph = rem;
534 #ifdef INTERPROCEDURAL_VIEW
535         set_interprocedural_view(rem_ipview);
536 #endif
537 }
538
539 ir_graph_pass_t *dead_node_elimination_pass(const char *name)
540 {
541         return def_graph_pass(name ? name : "dce", dead_node_elimination);
542 }
543
544 /**
545  * Relink bad predecessors of a block and store the old in array to the
546  * link field. This function is called by relink_bad_predecessors().
547  * The array of link field starts with the block operand at position 0.
548  * If block has bad predecessors, create a new in array without bad preds.
549  * Otherwise let in array untouched.
550  */
551 static void relink_bad_block_predecessors(ir_node *n, void *env)
552 {
553         ir_node **new_in, *irn;
554         int i, new_irn_n, old_irn_arity, new_irn_arity = 0;
555         (void) env;
556
557         /* if link field of block is NULL, look for bad predecessors otherwise
558            this is already done */
559         if (is_Block(n) && get_irn_link(n) == NULL) {
560                 /* save old predecessors in link field (position 0 is the block operand)*/
561                 set_irn_link(n, get_irn_in(n));
562
563                 /* count predecessors without bad nodes */
564                 old_irn_arity = get_irn_arity(n);
565                 for (i = 0; i < old_irn_arity; i++)
566                         if (!is_Bad(get_irn_n(n, i)))
567                                 ++new_irn_arity;
568
569                 /* arity changing: set new predecessors without bad nodes */
570                 if (new_irn_arity < old_irn_arity) {
571                         /* Get new predecessor array. We do not resize the array, as we must
572                            keep the old one to update Phis. */
573                         new_in = NEW_ARR_D(ir_node *, current_ir_graph->obst, (new_irn_arity+1));
574
575                         /* set new predecessors in array */
576                         new_in[0] = NULL;
577                         new_irn_n = 1;
578                         for (i = 0; i < old_irn_arity; i++) {
579                                 irn = get_irn_n(n, i);
580                                 if (!is_Bad(irn)) {
581                                         new_in[new_irn_n] = irn;
582                                         is_backedge(n, i) ? set_backedge(n, new_irn_n-1) : set_not_backedge(n, new_irn_n-1);
583                                         ++new_irn_n;
584                                 }
585                         }
586                         /* ARR_SETLEN(int, n->attr.block.backedge, new_irn_arity); */
587                         ARR_SHRINKLEN(n->attr.block.backedge, new_irn_arity);
588                         n->in = new_in;
589                 } /* ir node has bad predecessors */
590         } /* Block is not relinked */
591 }
592
593 /**
594  * Relinks Bad predecessors from Blocks and Phis called by walker
595  * remove_bad_predecesors(). If n is a Block, call
596  * relink_bad_block_redecessors(). If n is a Phi-node, call also the relinking
597  * function of Phi's Block. If this block has bad predecessors, relink preds
598  * of the Phi-node.
599  */
600 static void relink_bad_predecessors(ir_node *n, void *env)
601 {
602         ir_node *block, **old_in;
603         int i, old_irn_arity, new_irn_arity;
604
605         /* relink bad predecessors of a block */
606         if (is_Block(n))
607                 relink_bad_block_predecessors(n, env);
608
609         /* If Phi node relink its block and its predecessors */
610         if (is_Phi(n)) {
611                 /* Relink predecessors of phi's block */
612                 block = get_nodes_block(n);
613                 if (get_irn_link(block) == NULL)
614                         relink_bad_block_predecessors(block, env);
615
616                 old_in = (ir_node **)get_irn_link(block); /* Of Phi's Block */
617                 old_irn_arity = ARR_LEN(old_in);
618
619                 /* Relink Phi predecessors if count of predecessors changed */
620                 if (old_irn_arity != ARR_LEN(get_irn_in(block))) {
621                         /* set new predecessors in array
622                            n->in[0] remains the same block */
623                         new_irn_arity = 1;
624                         for (i = 1; i < old_irn_arity; i++)
625                                 if (!is_Bad(old_in[i])) {
626                                         n->in[new_irn_arity] = n->in[i];
627                                         is_backedge(n, i) ? set_backedge(n, new_irn_arity) : set_not_backedge(n, new_irn_arity);
628                                         ++new_irn_arity;
629                                 }
630
631                                 ARR_SETLEN(ir_node *, n->in, new_irn_arity);
632                                 ARR_SETLEN(int, n->attr.phi.u.backedge, new_irn_arity);
633                 }
634         } /* n is a Phi node */
635 }
636
637 /*
638  * Removes Bad Bad predecessors from Blocks and the corresponding
639  * inputs to Phi nodes as in dead_node_elimination but without
640  * copying the graph.
641  * On walking up set the link field to NULL, on walking down call
642  * relink_bad_predecessors() (This function stores the old in array
643  * to the link field and sets a new in array if arity of predecessors
644  * changes).
645  */
646 void remove_bad_predecessors(ir_graph *irg)
647 {
648         panic("Fix backedge handling first");
649         irg_walk_graph(irg, firm_clear_link, relink_bad_predecessors, NULL);
650 }
651
652
653 /*
654    __                      _  __ __
655   (_     __    o     _    | \/  |_
656   __)|_| | \_/ | \_/(/_   |_/\__|__
657
658   The following stuff implements a facility that automatically patches
659   registered ir_node pointers to the new node when a dead node elimination occurs.
660 */
661
662 struct _survive_dce_t {
663         struct obstack obst;
664         pmap *places;
665         pmap *new_places;
666         hook_entry_t dead_node_elim;
667         hook_entry_t dead_node_elim_subst;
668 };
669
670 typedef struct _survive_dce_list_t {
671         struct _survive_dce_list_t *next;
672         ir_node **place;
673 } survive_dce_list_t;
674
675 static void dead_node_hook(void *context, ir_graph *irg, int start)
676 {
677         survive_dce_t *sd = context;
678         (void) irg;
679
680         /* Create a new map before the dead node elimination is performed. */
681         if (start) {
682                 sd->new_places = pmap_create_ex(pmap_count(sd->places));
683         } else {
684                 /* Patch back all nodes if dead node elimination is over and something is to be done. */
685                 pmap_destroy(sd->places);
686                 sd->places     = sd->new_places;
687                 sd->new_places = NULL;
688         }
689 }
690
691 /**
692  * Hook called when dead node elimination replaces old by nw.
693  */
694 static void dead_node_subst_hook(void *context, ir_graph *irg, ir_node *old, ir_node *nw)
695 {
696         survive_dce_t *sd = context;
697         survive_dce_list_t *list = pmap_get(sd->places, old);
698         (void) irg;
699
700         /* If the node is to be patched back, write the new address to all registered locations. */
701         if (list) {
702                 survive_dce_list_t *p;
703
704                 for (p = list; p; p = p->next)
705                         *(p->place) = nw;
706
707                 pmap_insert(sd->new_places, nw, list);
708         }
709 }
710
711 /**
712  * Make a new Survive DCE environment.
713  */
714 survive_dce_t *new_survive_dce(void)
715 {
716         survive_dce_t *res = XMALLOC(survive_dce_t);
717         obstack_init(&res->obst);
718         res->places     = pmap_create();
719         res->new_places = NULL;
720
721         res->dead_node_elim.hook._hook_dead_node_elim = dead_node_hook;
722         res->dead_node_elim.context                   = res;
723         res->dead_node_elim.next                      = NULL;
724
725         res->dead_node_elim_subst.hook._hook_dead_node_elim_subst = dead_node_subst_hook;
726         res->dead_node_elim_subst.context = res;
727         res->dead_node_elim_subst.next    = NULL;
728
729         register_hook(hook_dead_node_elim, &res->dead_node_elim);
730         register_hook(hook_dead_node_elim_subst, &res->dead_node_elim_subst);
731         return res;
732 }
733
734 /**
735  * Free a Survive DCE environment.
736  */
737 void free_survive_dce(survive_dce_t *sd)
738 {
739         obstack_free(&sd->obst, NULL);
740         pmap_destroy(sd->places);
741         unregister_hook(hook_dead_node_elim, &sd->dead_node_elim);
742         unregister_hook(hook_dead_node_elim_subst, &sd->dead_node_elim_subst);
743         xfree(sd);
744 }
745
746 /**
747  * Register a node pointer to be patched upon DCE.
748  * When DCE occurs, the node pointer specified by @p place will be
749  * patched to the new address of the node it is pointing to.
750  *
751  * @param sd    The Survive DCE environment.
752  * @param place The address of the node pointer.
753  */
754 void survive_dce_register_irn(survive_dce_t *sd, ir_node **place)
755 {
756         if (*place != NULL) {
757                 ir_node *irn      = *place;
758                 survive_dce_list_t *curr = pmap_get(sd->places, irn);
759                 survive_dce_list_t *nw   = OALLOC(&sd->obst, survive_dce_list_t);
760
761                 nw->next  = curr;
762                 nw->place = place;
763
764                 pmap_insert(sd->places, irn, nw);
765         }
766 }
767
768 /*--------------------------------------------------------------------*/
769 /*  Functionality for inlining                                         */
770 /*--------------------------------------------------------------------*/
771
772 /**
773  * Copy node for inlineing.  Updates attributes that change when
774  * inlineing but not for dead node elimination.
775  *
776  * Copies the node by calling copy_node() and then updates the entity if
777  * it's a local one.  env must be a pointer of the frame type of the
778  * inlined procedure. The new entities must be in the link field of
779  * the entities.
780  */
781 static void copy_node_inline(ir_node *n, void *env)
782 {
783         ir_node *nn;
784         ir_type *frame_tp = (ir_type *)env;
785
786         copy_node(n, NULL);
787         if (is_Sel(n)) {
788                 nn = get_new_node(n);
789                 assert(is_Sel(nn));
790                 /* use copied entities from the new frame */
791                 if (get_entity_owner(get_Sel_entity(n)) == frame_tp) {
792                         set_Sel_entity(nn, get_entity_link(get_Sel_entity(n)));
793                 }
794         } else if (is_Block(n)) {
795                 nn = get_new_node(n);
796                 nn->attr.block.irg.irg = current_ir_graph;
797         }
798 }
799
800 /**
801  * Copies new predecessors of old node and move constants to
802  * the Start Block.
803  */
804 static void copy_preds_inline(ir_node *n, void *env)
805 {
806         ir_node *nn;
807
808         copy_preds(n, env);
809         nn = skip_Id(get_new_node(n));
810         if (is_irn_constlike(nn)) {
811                 /* move Constants into the start block */
812                 set_nodes_block(nn, get_irg_start_block(current_ir_graph));
813
814                 n = identify_remember(current_ir_graph->value_table, nn);
815                 if (nn != n) {
816                         DBG_OPT_CSE(nn, n);
817                         exchange(nn, n);
818                 }
819         }
820 }
821
822 /**
823  * Walker: checks if P_value_arg_base is used.
824  */
825 static void find_addr(ir_node *node, void *env)
826 {
827         int *allow_inline = env;
828         if (is_Sel(node)) {
829                 ir_graph *irg = current_ir_graph;
830                 if (get_Sel_ptr(node) == get_irg_frame(irg)) {
831                         /* access to frame */
832                         ir_entity *ent = get_Sel_entity(node);
833                         if (get_entity_owner(ent) != get_irg_frame_type(irg)) {
834                                 /* access to value_type */
835                                 *allow_inline = 0;
836                         }
837                 }
838         } else if (is_Alloc(node) && get_Alloc_where(node) == stack_alloc) {
839                 /* From GCC:
840                  * Refuse to inline alloca call unless user explicitly forced so as this
841                  * may change program's memory overhead drastically when the function
842                  * using alloca is called in loop.  In GCC present in SPEC2000 inlining
843                  * into schedule_block cause it to require 2GB of ram instead of 256MB.
844                  *
845                  * Sorrily this is true with our implementation also.
846                  * Moreover, we cannot differentiate between alloca() and VLA yet, so this
847                  * disables inlining of functions using VLA (with are completely save).
848                  *
849                  * 2 Solutions:
850                  * - add a flag to the Alloc node for "real" alloca() calls
851                  * - add a new Stack-Restore node at the end of a function using alloca()
852                  */
853                 *allow_inline = 0;
854         }
855 }
856
857 /**
858  * Check if we can inline a given call.
859  * Currently, we cannot inline two cases:
860  * - call with compound arguments
861  * - graphs that take the address of a parameter
862  *
863  * check these conditions here
864  */
865 static int can_inline(ir_node *call, ir_graph *called_graph)
866 {
867         ir_type *call_type = get_Call_type(call);
868         int params, ress, i, res;
869         assert(is_Method_type(call_type));
870
871         params = get_method_n_params(call_type);
872         ress   = get_method_n_ress(call_type);
873
874         /* check parameters for compound arguments */
875         for (i = 0; i < params; ++i) {
876                 ir_type *p_type = get_method_param_type(call_type, i);
877
878                 if (is_compound_type(p_type))
879                         return 0;
880         }
881
882         /* check results for compound arguments */
883         for (i = 0; i < ress; ++i) {
884                 ir_type *r_type = get_method_res_type(call_type, i);
885
886                 if (is_compound_type(r_type))
887                         return 0;
888         }
889
890         res = 1;
891         irg_walk_graph(called_graph, find_addr, NULL, &res);
892
893         return res;
894 }
895
896 enum exc_mode {
897         exc_handler,    /**< There is a handler. */
898         exc_no_handler  /**< Exception handling not represented. */
899 };
900
901 /* Inlines a method at the given call site. */
902 int inline_method(ir_node *call, ir_graph *called_graph)
903 {
904         ir_node             *pre_call;
905         ir_node             *post_call, *post_bl;
906         ir_node             *in[pn_Start_max];
907         ir_node             *end, *end_bl, *block;
908         ir_node             **res_pred;
909         ir_node             **cf_pred;
910         ir_node             **args_in;
911         ir_node             *ret, *phi;
912         int                 arity, n_ret, n_exc, n_res, i, n, j, rem_opt, irn_arity, n_params;
913         int                 n_mem_phi;
914         enum exc_mode       exc_handling;
915         ir_type             *called_frame, *curr_frame, *mtp, *ctp;
916         ir_entity           *ent;
917         ir_graph            *rem, *irg;
918         irg_inline_property prop = get_irg_inline_property(called_graph);
919         unsigned long       visited;
920
921         if (prop == irg_inline_forbidden)
922                 return 0;
923
924         ent = get_irg_entity(called_graph);
925
926         mtp = get_entity_type(ent);
927         ctp = get_Call_type(call);
928         n_params = get_method_n_params(mtp);
929         n_res    = get_method_n_ress(mtp);
930         if (n_params > get_method_n_params(ctp)) {
931                 /* this is a bad feature of C: without a prototype, we can
932                  * call a function with less parameters than needed. Currently
933                  * we don't support this, although we could use Unknown than. */
934                 return 0;
935         }
936         if (n_res != get_method_n_ress(ctp)) {
937                 return 0;
938         }
939
940         /* Argh, compiling C has some bad consequences:
941          * It is implementation dependent what happens in that case.
942          * We support inlining, if the bitsize of the types matches AND
943          * the same arithmetic is used. */
944         for (i = n_params - 1; i >= 0; --i) {
945                 ir_type *param_tp = get_method_param_type(mtp, i);
946                 ir_type *arg_tp   = get_method_param_type(ctp, i);
947
948                 if (param_tp != arg_tp) {
949                         ir_mode *pmode = get_type_mode(param_tp);
950                         ir_mode *amode = get_type_mode(arg_tp);
951
952                         if (pmode == NULL || amode == NULL)
953                                 return 0;
954                         if (get_mode_size_bits(pmode) != get_mode_size_bits(amode))
955                                 return 0;
956                         if (get_mode_arithmetic(pmode) != get_mode_arithmetic(amode))
957                                 return 0;
958                         /* otherwise we can simply "reinterpret" the bits */
959                 }
960         }
961         for (i = n_res - 1; i >= 0; --i) {
962                 ir_type *decl_res_tp = get_method_res_type(mtp, i);
963                 ir_type *used_res_tp = get_method_res_type(ctp, i);
964
965                 if (decl_res_tp != used_res_tp) {
966                         ir_mode *decl_mode = get_type_mode(decl_res_tp);
967                         ir_mode *used_mode = get_type_mode(used_res_tp);
968                         if (decl_mode == NULL || used_mode == NULL)
969                                 return 0;
970                         if (get_mode_size_bits(decl_mode) != get_mode_size_bits(used_mode))
971                                 return 0;
972                         if (get_mode_arithmetic(decl_mode) != get_mode_arithmetic(used_mode))
973                                 return 0;
974                         /* otherwise we can "reinterpret" the bits */
975                 }
976         }
977
978         irg = get_irn_irg(call);
979
980         /*
981          * We cannot inline a recursive call. The graph must be copied before
982          * the call the inline_method() using create_irg_copy().
983          */
984         if (called_graph == irg)
985                 return 0;
986
987         /*
988          * currently, we cannot inline two cases:
989          * - call with compound arguments
990          * - graphs that take the address of a parameter
991          */
992         if (! can_inline(call, called_graph))
993                 return 0;
994
995         rem = current_ir_graph;
996         current_ir_graph = irg;
997
998         DB((dbg, LEVEL_1, "Inlining %+F(%+F) into %+F\n", call, called_graph, irg));
999
1000         /* --  Turn off optimizations, this can cause problems when allocating new nodes. -- */
1001         rem_opt = get_opt_optimize();
1002         set_optimize(0);
1003
1004         /* Handle graph state */
1005         assert(get_irg_phase_state(irg) != phase_building);
1006         assert(get_irg_pinned(irg) == op_pin_state_pinned);
1007         assert(get_irg_pinned(called_graph) == op_pin_state_pinned);
1008         set_irg_outs_inconsistent(irg);
1009         set_irg_extblk_inconsistent(irg);
1010         set_irg_doms_inconsistent(irg);
1011         set_irg_loopinfo_inconsistent(irg);
1012         set_irg_callee_info_state(irg, irg_callee_info_inconsistent);
1013         set_irg_entity_usage_state(irg, ir_entity_usage_not_computed);
1014
1015         /* -- Check preconditions -- */
1016         assert(is_Call(call));
1017
1018         /* here we know we WILL inline, so inform the statistics */
1019         hook_inline(call, called_graph);
1020
1021         /* -- Decide how to handle exception control flow: Is there a handler
1022            for the Call node, or do we branch directly to End on an exception?
1023            exc_handling:
1024            0 There is a handler.
1025            2 Exception handling not represented in Firm. -- */
1026         {
1027                 ir_node *Xproj = NULL;
1028                 ir_node *proj;
1029                 for (proj = get_irn_link(call); proj; proj = get_irn_link(proj)) {
1030                         long proj_nr = get_Proj_proj(proj);
1031                         if (proj_nr == pn_Call_X_except) Xproj = proj;
1032                 }
1033                 exc_handling = Xproj != NULL ? exc_handler : exc_no_handler;
1034         }
1035
1036         /* create the argument tuple */
1037         NEW_ARR_A(ir_type *, args_in, n_params);
1038
1039         block = get_nodes_block(call);
1040         for (i = n_params - 1; i >= 0; --i) {
1041                 ir_node *arg      = get_Call_param(call, i);
1042                 ir_type *param_tp = get_method_param_type(mtp, i);
1043                 ir_mode *mode     = get_type_mode(param_tp);
1044
1045                 if (mode != get_irn_mode(arg)) {
1046                         arg = new_r_Conv(block, arg, mode);
1047                 }
1048                 args_in[i] = arg;
1049         }
1050
1051         /* --
1052            the procedure and later replaces the Start node of the called graph.
1053            Post_call is the old Call node and collects the results of the called
1054            graph. Both will end up being a tuple.  -- */
1055         post_bl = get_nodes_block(call);
1056         set_irg_current_block(irg, post_bl);
1057         /* XxMxPxPxPxT of Start + parameter of Call */
1058         in[pn_Start_X_initial_exec]   = new_Jmp();
1059         in[pn_Start_M]                = get_Call_mem(call);
1060         in[pn_Start_P_frame_base]     = get_irg_frame(irg);
1061         in[pn_Start_P_tls]            = get_irg_tls(irg);
1062         in[pn_Start_T_args]           = new_Tuple(n_params, args_in);
1063         pre_call = new_Tuple(pn_Start_max, in);
1064         post_call = call;
1065
1066         /* --
1067            The new block gets the ins of the old block, pre_call and all its
1068            predecessors and all Phi nodes. -- */
1069         part_block(pre_call);
1070
1071         /* -- Prepare state for dead node elimination -- */
1072         /* Visited flags in calling irg must be >= flag in called irg.
1073            Else walker and arity computation will not work. */
1074         if (get_irg_visited(irg) <= get_irg_visited(called_graph))
1075                 set_irg_visited(irg, get_irg_visited(called_graph) + 1);
1076         if (get_irg_block_visited(irg) < get_irg_block_visited(called_graph))
1077                 set_irg_block_visited(irg, get_irg_block_visited(called_graph));
1078         visited = get_irg_visited(irg);
1079
1080         /* Set pre_call as new Start node in link field of the start node of
1081            calling graph and pre_calls block as new block for the start block
1082            of calling graph.
1083            Further mark these nodes so that they are not visited by the
1084            copying. */
1085         set_irn_link(get_irg_start(called_graph), pre_call);
1086         set_irn_visited(get_irg_start(called_graph), visited);
1087         set_irn_link(get_irg_start_block(called_graph), get_nodes_block(pre_call));
1088         set_irn_visited(get_irg_start_block(called_graph), visited);
1089
1090         set_irn_link(get_irg_bad(called_graph), get_irg_bad(current_ir_graph));
1091         set_irn_visited(get_irg_bad(called_graph), visited);
1092
1093         set_irn_link(get_irg_no_mem(called_graph), get_irg_no_mem(current_ir_graph));
1094         set_irn_visited(get_irg_no_mem(called_graph), visited);
1095
1096         /* Initialize for compaction of in arrays */
1097         inc_irg_block_visited(irg);
1098
1099         /* -- Replicate local entities of the called_graph -- */
1100         /* copy the entities. */
1101         irp_reserve_resources(irp, IR_RESOURCE_ENTITY_LINK);
1102         called_frame = get_irg_frame_type(called_graph);
1103         curr_frame   = get_irg_frame_type(irg);
1104         for (i = 0, n = get_class_n_members(called_frame); i < n; ++i) {
1105                 ir_entity *new_ent, *old_ent;
1106                 old_ent = get_class_member(called_frame, i);
1107                 new_ent = copy_entity_own(old_ent, curr_frame);
1108                 set_entity_link(old_ent, new_ent);
1109         }
1110
1111         /* visited is > than that of called graph.  With this trick visited will
1112            remain unchanged so that an outer walker, e.g., searching the call nodes
1113             to inline, calling this inline will not visit the inlined nodes. */
1114         set_irg_visited(irg, get_irg_visited(irg)-1);
1115
1116         /* -- Performing dead node elimination inlines the graph -- */
1117         /* Copies the nodes to the obstack of current_ir_graph. Updates links to new
1118            entities. */
1119         irg_walk(get_irg_end(called_graph), copy_node_inline, copy_preds_inline,
1120                  get_irg_frame_type(called_graph));
1121
1122         irp_free_resources(irp, IR_RESOURCE_ENTITY_LINK);
1123
1124         /* Repair called_graph */
1125         set_irg_visited(called_graph, get_irg_visited(irg));
1126         set_irg_block_visited(called_graph, get_irg_block_visited(irg));
1127         set_Block_block_visited(get_irg_start_block(called_graph), 0);
1128
1129         /* -- Merge the end of the inlined procedure with the call site -- */
1130         /* We will turn the old Call node into a Tuple with the following
1131            predecessors:
1132            -1:  Block of Tuple.
1133            0: Phi of all Memories of Return statements.
1134            1: Jmp from new Block that merges the control flow from all exception
1135            predecessors of the old end block.
1136            2: Tuple of all arguments.
1137            3: Phi of Exception memories.
1138            In case the old Call directly branches to End on an exception we don't
1139            need the block merging all exceptions nor the Phi of the exception
1140            memories.
1141         */
1142
1143         /* -- Precompute some values -- */
1144         end_bl = get_new_node(get_irg_end_block(called_graph));
1145         end = get_new_node(get_irg_end(called_graph));
1146         arity = get_Block_n_cfgpreds(end_bl);    /* arity = n_exc + n_ret  */
1147         n_res = get_method_n_ress(get_Call_type(call));
1148
1149         res_pred = XMALLOCN(ir_node*, n_res);
1150         cf_pred  = XMALLOCN(ir_node*, arity);
1151
1152         set_irg_current_block(irg, post_bl); /* just to make sure */
1153
1154         /* -- archive keepalives -- */
1155         irn_arity = get_irn_arity(end);
1156         for (i = 0; i < irn_arity; i++) {
1157                 ir_node *ka = get_End_keepalive(end, i);
1158                 if (! is_Bad(ka))
1159                         add_End_keepalive(get_irg_end(irg), ka);
1160         }
1161
1162         /* The new end node will die.  We need not free as the in array is on the obstack:
1163            copy_node() only generated 'D' arrays. */
1164
1165         /* -- Replace Return nodes by Jump nodes. -- */
1166         n_ret = 0;
1167         for (i = 0; i < arity; i++) {
1168                 ir_node *ret;
1169                 ret = get_Block_cfgpred(end_bl, i);
1170                 if (is_Return(ret)) {
1171                         cf_pred[n_ret] = new_r_Jmp(get_nodes_block(ret));
1172                         n_ret++;
1173                 }
1174         }
1175         set_irn_in(post_bl, n_ret, cf_pred);
1176
1177         /* -- Build a Tuple for all results of the method.
1178            Add Phi node if there was more than one Return.  -- */
1179         turn_into_tuple(post_call, pn_Call_max);
1180         /* First the Memory-Phi */
1181         n_mem_phi = 0;
1182         for (i = 0; i < arity; i++) {
1183                 ret = get_Block_cfgpred(end_bl, i);
1184                 if (is_Return(ret)) {
1185                         cf_pred[n_mem_phi++] = get_Return_mem(ret);
1186                 }
1187                 /* memory output for some exceptions is directly connected to End */
1188                 if (is_Call(ret)) {
1189                         cf_pred[n_mem_phi++] = new_r_Proj(get_nodes_block(ret), ret, mode_M, 3);
1190                 } else if (is_fragile_op(ret)) {
1191                         /* We rely that all cfops have the memory output at the same position. */
1192                         cf_pred[n_mem_phi++] = new_r_Proj(get_nodes_block(ret), ret, mode_M, 0);
1193                 } else if (is_Raise(ret)) {
1194                         cf_pred[n_mem_phi++] = new_r_Proj(get_nodes_block(ret), ret, mode_M, 1);
1195                 }
1196         }
1197         phi = new_Phi(n_mem_phi, cf_pred, mode_M);
1198         set_Tuple_pred(call, pn_Call_M, phi);
1199         /* Conserve Phi-list for further inlinings -- but might be optimized */
1200         if (get_nodes_block(phi) == post_bl) {
1201                 set_irn_link(phi, get_irn_link(post_bl));
1202                 set_irn_link(post_bl, phi);
1203         }
1204         /* Now the real results */
1205         if (n_res > 0) {
1206                 for (j = 0; j < n_res; j++) {
1207                         ir_type *res_type = get_method_res_type(ctp, j);
1208                         ir_mode *res_mode = get_type_mode(res_type);
1209                         n_ret = 0;
1210                         for (i = 0; i < arity; i++) {
1211                                 ret = get_Block_cfgpred(end_bl, i);
1212                                 if (is_Return(ret)) {
1213                                         ir_node *res = get_Return_res(ret, j);
1214                                         if (get_irn_mode(res) != res_mode) {
1215                                                 ir_node *block = get_nodes_block(res);
1216                                                 res = new_r_Conv(block, res, res_mode);
1217                                         }
1218                                         cf_pred[n_ret] = res;
1219                                         n_ret++;
1220                                 }
1221                         }
1222                         if (n_ret > 0)
1223                                 phi = new_Phi(n_ret, cf_pred, get_irn_mode(cf_pred[0]));
1224                         else
1225                                 phi = new_Bad();
1226                         res_pred[j] = phi;
1227                         /* Conserve Phi-list for further inlinings -- but might be optimized */
1228                         if (get_nodes_block(phi) == post_bl) {
1229                                 set_Phi_next(phi, get_Block_phis(post_bl));
1230                                 set_Block_phis(post_bl, phi);
1231                         }
1232                 }
1233                 set_Tuple_pred(call, pn_Call_T_result, new_Tuple(n_res, res_pred));
1234         } else {
1235                 set_Tuple_pred(call, pn_Call_T_result, new_Bad());
1236         }
1237         /* handle the regular call */
1238         set_Tuple_pred(call, pn_Call_X_regular, new_Jmp());
1239
1240         /* For now, we cannot inline calls with value_base */
1241         set_Tuple_pred(call, pn_Call_P_value_res_base, new_Bad());
1242
1243         /* Finally the exception control flow.
1244            We have two possible situations:
1245            First if the Call branches to an exception handler:
1246            We need to add a Phi node to
1247            collect the memory containing the exception objects.  Further we need
1248            to add another block to get a correct representation of this Phi.  To
1249            this block we add a Jmp that resolves into the X output of the Call
1250            when the Call is turned into a tuple.
1251            Second: There is no exception edge. Just add all inlined exception
1252            branches to the End node.
1253          */
1254         if (exc_handling == exc_handler) {
1255                 n_exc = 0;
1256                 for (i = 0; i < arity; i++) {
1257                         ir_node *ret, *irn;
1258                         ret = get_Block_cfgpred(end_bl, i);
1259                         irn = skip_Proj(ret);
1260                         if (is_fragile_op(irn) || is_Raise(irn)) {
1261                                 cf_pred[n_exc] = ret;
1262                                 ++n_exc;
1263                         }
1264                 }
1265                 if (n_exc > 0) {
1266                         if (n_exc == 1) {
1267                                 /* simple fix */
1268                                 set_Tuple_pred(call, pn_Call_X_except, cf_pred[0]);
1269                         } else {
1270                                 ir_node *block = new_Block(n_exc, cf_pred);
1271                                 set_cur_block(block);
1272                                 set_Tuple_pred(call, pn_Call_X_except, new_Jmp());
1273                         }
1274                 } else {
1275                         set_Tuple_pred(call, pn_Call_X_except, new_Bad());
1276                 }
1277         } else {
1278                 ir_node *main_end_bl;
1279                 int main_end_bl_arity;
1280                 ir_node **end_preds;
1281
1282                 /* assert(exc_handling == 1 || no exceptions. ) */
1283                 n_exc = 0;
1284                 for (i = 0; i < arity; i++) {
1285                         ir_node *ret = get_Block_cfgpred(end_bl, i);
1286                         ir_node *irn = skip_Proj(ret);
1287
1288                         if (is_fragile_op(irn) || is_Raise(irn)) {
1289                                 cf_pred[n_exc] = ret;
1290                                 n_exc++;
1291                         }
1292                 }
1293                 main_end_bl       = get_irg_end_block(irg);
1294                 main_end_bl_arity = get_irn_arity(main_end_bl);
1295                 end_preds         = XMALLOCN(ir_node*, n_exc + main_end_bl_arity);
1296
1297                 for (i = 0; i < main_end_bl_arity; ++i)
1298                         end_preds[i] = get_irn_n(main_end_bl, i);
1299                 for (i = 0; i < n_exc; ++i)
1300                         end_preds[main_end_bl_arity + i] = cf_pred[i];
1301                 set_irn_in(main_end_bl, n_exc + main_end_bl_arity, end_preds);
1302                 set_Tuple_pred(call, pn_Call_X_except, new_Bad());
1303                 free(end_preds);
1304         }
1305         free(res_pred);
1306         free(cf_pred);
1307
1308         /* --  Turn CSE back on. -- */
1309         set_optimize(rem_opt);
1310         current_ir_graph = rem;
1311
1312         return 1;
1313 }
1314
1315 /********************************************************************/
1316 /* Apply inlining to small methods.                                 */
1317 /********************************************************************/
1318
1319 static struct obstack  temp_obst;
1320
1321 /** Represents a possible inlinable call in a graph. */
1322 typedef struct _call_entry {
1323         ir_node    *call;       /**< The Call node. */
1324         ir_graph   *callee;     /**< The callee IR-graph. */
1325         list_head  list;        /**< List head for linking the next one. */
1326         int        loop_depth;  /**< The loop depth of this call. */
1327         int        benefice;    /**< The calculated benefice of this call. */
1328         unsigned   local_adr:1; /**< Set if this call gets an address of a local variable. */
1329         unsigned   all_const:1; /**< Set if this call has only constant parameters. */
1330 } call_entry;
1331
1332 /**
1333  * environment for inlining small irgs
1334  */
1335 typedef struct _inline_env_t {
1336         struct obstack obst;  /**< An obstack where call_entries are allocated on. */
1337         list_head      calls; /**< The call entry list. */
1338 } inline_env_t;
1339
1340 /**
1341  * Returns the irg called from a Call node. If the irg is not
1342  * known, NULL is returned.
1343  *
1344  * @param call  the call node
1345  */
1346 static ir_graph *get_call_called_irg(ir_node *call)
1347 {
1348         ir_node *addr;
1349
1350         addr = get_Call_ptr(call);
1351         if (is_Global(addr)) {
1352                 ir_entity *ent = get_Global_entity(addr);
1353                 /* we don't know which function gets finally bound to a weak symbol */
1354                 if (get_entity_linkage(ent) & IR_LINKAGE_WEAK)
1355                         return NULL;
1356
1357                 return get_entity_irg(ent);
1358         }
1359
1360         return NULL;
1361 }
1362
1363 /**
1364  * Walker: Collect all calls to known graphs inside a graph.
1365  */
1366 static void collect_calls(ir_node *call, void *env)
1367 {
1368         (void) env;
1369         if (is_Call(call)) {
1370                 ir_graph *called_irg = get_call_called_irg(call);
1371
1372                 if (called_irg != NULL) {
1373                         /* The Call node calls a locally defined method.  Remember to inline. */
1374                         inline_env_t *ienv  = env;
1375                         call_entry   *entry = OALLOC(&ienv->obst, call_entry);
1376                         entry->call       = call;
1377                         entry->callee     = called_irg;
1378                         entry->loop_depth = 0;
1379                         entry->benefice   = 0;
1380                         entry->local_adr  = 0;
1381                         entry->all_const  = 0;
1382
1383                         list_add_tail(&entry->list, &ienv->calls);
1384                 }
1385         }
1386 }
1387
1388 /**
1389  * Inlines all small methods at call sites where the called address comes
1390  * from a Const node that references the entity representing the called
1391  * method.
1392  * The size argument is a rough measure for the code size of the method:
1393  * Methods where the obstack containing the firm graph is smaller than
1394  * size are inlined.
1395  */
1396 void inline_small_irgs(ir_graph *irg, int size)
1397 {
1398         ir_graph *rem = current_ir_graph;
1399         inline_env_t env;
1400         call_entry *entry;
1401
1402         current_ir_graph = irg;
1403         /* Handle graph state */
1404         assert(get_irg_phase_state(irg) != phase_building);
1405         free_callee_info(irg);
1406
1407         /* Find Call nodes to inline.
1408            (We can not inline during a walk of the graph, as inlining the same
1409            method several times changes the visited flag of the walked graph:
1410            after the first inlining visited of the callee equals visited of
1411            the caller.  With the next inlining both are increased.) */
1412         obstack_init(&env.obst);
1413         INIT_LIST_HEAD(&env.calls);
1414         irg_walk_graph(irg, NULL, collect_calls, &env);
1415
1416         if (! list_empty(&env.calls)) {
1417                 /* There are calls to inline */
1418                 ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1419                 collect_phiprojs(irg);
1420
1421                 list_for_each_entry(call_entry, entry, &env.calls, list) {
1422                         ir_graph            *callee = entry->callee;
1423                         irg_inline_property prop    = get_irg_inline_property(callee);
1424
1425                         if (prop == irg_inline_forbidden) {
1426                                 continue;
1427                         }
1428
1429                         if (prop >= irg_inline_forced ||
1430                             _obstack_memory_used(callee->obst) - (int)obstack_room(callee->obst) < size) {
1431                                 inline_method(entry->call, callee);
1432                         }
1433                 }
1434                 ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1435         }
1436         obstack_free(&env.obst, NULL);
1437         current_ir_graph = rem;
1438 }
1439
1440 struct inline_small_irgs_pass_t {
1441         ir_graph_pass_t pass;
1442         int            size;
1443 };
1444
1445 /**
1446  * Wrapper to run inline_small_irgs() as a pass.
1447  */
1448 static int inline_small_irgs_wrapper(ir_graph *irg, void *context)
1449 {
1450         struct inline_small_irgs_pass_t *pass = context;
1451
1452         inline_small_irgs(irg, pass->size);
1453         return 0;
1454 }
1455
1456 /* create a pass for inline_small_irgs() */
1457 ir_graph_pass_t *inline_small_irgs_pass(const char *name, int size)
1458 {
1459         struct inline_small_irgs_pass_t *pass =
1460                 XMALLOCZ(struct inline_small_irgs_pass_t);
1461
1462         pass->size = size;
1463         return def_graph_pass_constructor(
1464                 &pass->pass, name ? name : "inline_small_irgs", inline_small_irgs_wrapper);
1465 }
1466
1467 /**
1468  * Environment for inlining irgs.
1469  */
1470 typedef struct {
1471         list_head calls;             /**< List of of all call nodes in this graph. */
1472         unsigned  *local_weights;    /**< Once allocated, the beneficial weight for transmitting local addresses. */
1473         unsigned  n_nodes;           /**< Number of nodes in graph except Id, Tuple, Proj, Start, End. */
1474         unsigned  n_blocks;          /**< Number of Blocks in graph without Start and End block. */
1475         unsigned  n_nodes_orig;      /**< for statistics */
1476         unsigned  n_call_nodes;      /**< Number of Call nodes in the graph. */
1477         unsigned  n_call_nodes_orig; /**< for statistics */
1478         unsigned  n_callers;         /**< Number of known graphs that call this graphs. */
1479         unsigned  n_callers_orig;    /**< for statistics */
1480         unsigned  got_inline:1;      /**< Set, if at least one call inside this graph was inlined. */
1481         unsigned  recursive:1;       /**< Set, if this function is self recursive. */
1482 } inline_irg_env;
1483
1484 /**
1485  * Allocate a new environment for inlining.
1486  */
1487 static inline_irg_env *alloc_inline_irg_env(void)
1488 {
1489         inline_irg_env *env    = OALLOC(&temp_obst, inline_irg_env);
1490         INIT_LIST_HEAD(&env->calls);
1491         env->local_weights     = NULL;
1492         env->n_nodes           = -2; /* do not count count Start, End */
1493         env->n_blocks          = -2; /* do not count count Start, End Block */
1494         env->n_nodes_orig      = -2; /* do not count Start, End */
1495         env->n_call_nodes      = 0;
1496         env->n_call_nodes_orig = 0;
1497         env->n_callers         = 0;
1498         env->n_callers_orig    = 0;
1499         env->got_inline        = 0;
1500         env->recursive         = 0;
1501         return env;
1502 }
1503
1504 typedef struct walker_env {
1505         inline_irg_env *x;     /**< the inline environment */
1506         char ignore_runtime;   /**< the ignore runtime flag */
1507         char ignore_callers;   /**< if set, do change callers data */
1508 } wenv_t;
1509
1510 /**
1511  * post-walker: collect all calls in the inline-environment
1512  * of a graph and sum some statistics.
1513  */
1514 static void collect_calls2(ir_node *call, void *ctx)
1515 {
1516         wenv_t         *env = ctx;
1517         inline_irg_env *x = env->x;
1518         ir_opcode      code = get_irn_opcode(call);
1519         ir_graph       *callee;
1520         call_entry     *entry;
1521
1522         /* count meaningful nodes in irg */
1523         if (code != iro_Proj && code != iro_Tuple && code != iro_Sync) {
1524                 if (code != iro_Block) {
1525                         ++x->n_nodes;
1526                         ++x->n_nodes_orig;
1527                 } else {
1528                         ++x->n_blocks;
1529                 }
1530         }
1531
1532         if (code != iro_Call) return;
1533
1534         /* check, if it's a runtime call */
1535         if (env->ignore_runtime) {
1536                 ir_node *symc = get_Call_ptr(call);
1537
1538                 if (is_Global(symc)) {
1539                         ir_entity *ent = get_Global_entity(symc);
1540
1541                         if (get_entity_additional_properties(ent) & mtp_property_runtime)
1542                                 return;
1543                 }
1544         }
1545
1546         /* collect all call nodes */
1547         ++x->n_call_nodes;
1548         ++x->n_call_nodes_orig;
1549
1550         callee = get_call_called_irg(call);
1551         if (callee != NULL) {
1552                 if (! env->ignore_callers) {
1553                         inline_irg_env *callee_env = get_irg_link(callee);
1554                         /* count all static callers */
1555                         ++callee_env->n_callers;
1556                         ++callee_env->n_callers_orig;
1557                 }
1558                 if (callee == current_ir_graph)
1559                         x->recursive = 1;
1560
1561                 /* link it in the list of possible inlinable entries */
1562                 entry = OALLOC(&temp_obst, call_entry);
1563                 entry->call       = call;
1564                 entry->callee     = callee;
1565                 entry->loop_depth = get_irn_loop(get_nodes_block(call))->depth;
1566                 entry->benefice   = 0;
1567                 entry->local_adr  = 0;
1568                 entry->all_const  = 0;
1569
1570                 list_add_tail(&entry->list, &x->calls);
1571         }
1572 }
1573
1574 /**
1575  * Returns TRUE if the number of callers is 0 in the irg's environment,
1576  * hence this irg is a leave.
1577  */
1578 inline static int is_leave(ir_graph *irg)
1579 {
1580         inline_irg_env *env = get_irg_link(irg);
1581         return env->n_call_nodes == 0;
1582 }
1583
1584 /**
1585  * Returns TRUE if the number of nodes in the callee is
1586  * smaller then size in the irg's environment.
1587  */
1588 inline static int is_smaller(ir_graph *callee, unsigned size)
1589 {
1590         inline_irg_env *env = get_irg_link(callee);
1591         return env->n_nodes < size;
1592 }
1593
1594 /**
1595  * Duplicate a call entry.
1596  *
1597  * @param entry     the original entry to duplicate
1598  * @param new_call  the new call node
1599  * @param loop_depth_delta
1600  *                  delta value for the loop depth
1601  */
1602 static call_entry *duplicate_call_entry(const call_entry *entry,
1603                                         ir_node *new_call, int loop_depth_delta)
1604 {
1605         call_entry *nentry = OALLOC(&temp_obst, call_entry);
1606         nentry->call       = new_call;
1607         nentry->callee     = entry->callee;
1608         nentry->benefice   = entry->benefice;
1609         nentry->loop_depth = entry->loop_depth + loop_depth_delta;
1610         nentry->local_adr  = entry->local_adr;
1611         nentry->all_const  = entry->all_const;
1612
1613         return nentry;
1614 }
1615
1616 /**
1617  * Append all call nodes of the source environment to the nodes of in the destination
1618  * environment.
1619  *
1620  * @param dst         destination environment
1621  * @param src         source environment
1622  * @param loop_depth  the loop depth of the call that is replaced by the src list
1623  */
1624 static void append_call_list(inline_irg_env *dst, inline_irg_env *src, int loop_depth)
1625 {
1626         call_entry *entry, *nentry;
1627
1628         /* Note that the src list points to Call nodes in the inlined graph, but
1629            we need Call nodes in our graph. Luckily the inliner leaves this information
1630            in the link field. */
1631         list_for_each_entry(call_entry, entry, &src->calls, list) {
1632                 nentry = duplicate_call_entry(entry, get_irn_link(entry->call), loop_depth);
1633                 list_add_tail(&nentry->list, &dst->calls);
1634         }
1635         dst->n_call_nodes += src->n_call_nodes;
1636         dst->n_nodes      += src->n_nodes;
1637 }
1638
1639 /*
1640  * Inlines small leave methods at call sites where the called address comes
1641  * from a Const node that references the entity representing the called
1642  * method.
1643  * The size argument is a rough measure for the code size of the method:
1644  * Methods where the obstack containing the firm graph is smaller than
1645  * size are inlined.
1646  */
1647 void inline_leave_functions(unsigned maxsize, unsigned leavesize,
1648                             unsigned size, int ignore_runtime)
1649 {
1650         inline_irg_env   *env;
1651         ir_graph         *irg;
1652         int              i, n_irgs;
1653         ir_graph         *rem;
1654         int              did_inline;
1655         wenv_t           wenv;
1656         call_entry       *entry, *next;
1657         const call_entry *centry;
1658         pmap             *copied_graphs;
1659         pmap_entry       *pm_entry;
1660
1661         rem = current_ir_graph;
1662         obstack_init(&temp_obst);
1663
1664         /* a map for the copied graphs, used to inline recursive calls */
1665         copied_graphs = pmap_create();
1666
1667         /* extend all irgs by a temporary data structure for inlining. */
1668         n_irgs = get_irp_n_irgs();
1669         for (i = 0; i < n_irgs; ++i)
1670                 set_irg_link(get_irp_irg(i), alloc_inline_irg_env());
1671
1672         /* Pre-compute information in temporary data structure. */
1673         wenv.ignore_runtime = ignore_runtime;
1674         wenv.ignore_callers = 0;
1675         for (i = 0; i < n_irgs; ++i) {
1676                 ir_graph *irg = get_irp_irg(i);
1677
1678                 assert(get_irg_phase_state(irg) != phase_building);
1679                 free_callee_info(irg);
1680
1681                 assure_cf_loop(irg);
1682                 wenv.x = get_irg_link(irg);
1683                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
1684         }
1685
1686         /* -- and now inline. -- */
1687
1688         /* Inline leaves recursively -- we might construct new leaves. */
1689         do {
1690                 did_inline = 0;
1691
1692                 for (i = 0; i < n_irgs; ++i) {
1693                         ir_node *call;
1694                         int phiproj_computed = 0;
1695
1696                         current_ir_graph = get_irp_irg(i);
1697                         env              = get_irg_link(current_ir_graph);
1698
1699                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1700                         list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1701                                 ir_graph            *callee;
1702                                 irg_inline_property  prop;
1703
1704                                 if (env->n_nodes > maxsize)
1705                                         break;
1706
1707                                 call   = entry->call;
1708                                 callee = entry->callee;
1709
1710                                 prop = get_irg_inline_property(callee);
1711                                 if (prop == irg_inline_forbidden) {
1712                                         continue;
1713                                 }
1714
1715                                 if (is_leave(callee) && (
1716                                     is_smaller(callee, leavesize) || prop >= irg_inline_forced)) {
1717                                         if (!phiproj_computed) {
1718                                                 phiproj_computed = 1;
1719                                                 collect_phiprojs(current_ir_graph);
1720                                         }
1721                                         did_inline = inline_method(call, callee);
1722
1723                                         if (did_inline) {
1724                                                 inline_irg_env *callee_env = get_irg_link(callee);
1725
1726                                                 /* call was inlined, Phi/Projs for current graph must be recomputed */
1727                                                 phiproj_computed = 0;
1728
1729                                                 /* Do some statistics */
1730                                                 env->got_inline = 1;
1731                                                 --env->n_call_nodes;
1732                                                 env->n_nodes += callee_env->n_nodes;
1733                                                 --callee_env->n_callers;
1734
1735                                                 /* remove this call from the list */
1736                                                 list_del(&entry->list);
1737                                                 continue;
1738                                         }
1739                                 }
1740                         }
1741                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1742                 }
1743         } while (did_inline);
1744
1745         /* inline other small functions. */
1746         for (i = 0; i < n_irgs; ++i) {
1747                 ir_node *call;
1748                 int phiproj_computed = 0;
1749
1750                 current_ir_graph = get_irp_irg(i);
1751                 env              = get_irg_link(current_ir_graph);
1752
1753                 ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1754
1755                 /* note that the list of possible calls is updated during the process */
1756                 list_for_each_entry_safe(call_entry, entry, next, &env->calls, list) {
1757                         irg_inline_property prop;
1758                         ir_graph            *callee;
1759                         pmap_entry          *e;
1760
1761                         call   = entry->call;
1762                         callee = entry->callee;
1763
1764                         prop = get_irg_inline_property(callee);
1765                         if (prop == irg_inline_forbidden) {
1766                                 continue;
1767                         }
1768
1769                         e = pmap_find(copied_graphs, callee);
1770                         if (e != NULL) {
1771                                 /*
1772                                  * Remap callee if we have a copy.
1773                                  * FIXME: Should we do this only for recursive Calls ?
1774                                  */
1775                                 callee = e->value;
1776                         }
1777
1778                         if (prop >= irg_inline_forced ||
1779                             (is_smaller(callee, size) && env->n_nodes < maxsize) /* small function */) {
1780                                 if (current_ir_graph == callee) {
1781                                         /*
1782                                          * Recursive call: we cannot directly inline because we cannot walk
1783                                          * the graph and change it. So we have to make a copy of the graph
1784                                          * first.
1785                                          */
1786
1787                                         inline_irg_env *callee_env;
1788                                         ir_graph       *copy;
1789
1790                                         ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1791
1792                                         /*
1793                                          * No copy yet, create one.
1794                                          * Note that recursive methods are never leaves, so it is sufficient
1795                                          * to test this condition here.
1796                                          */
1797                                         copy = create_irg_copy(callee);
1798
1799                                         /* create_irg_copy() destroys the Proj links, recompute them */
1800                                         phiproj_computed = 0;
1801
1802                                         ir_reserve_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1803
1804                                         /* allocate new environment */
1805                                         callee_env = alloc_inline_irg_env();
1806                                         set_irg_link(copy, callee_env);
1807
1808                                         assure_cf_loop(copy);
1809                                         wenv.x              = callee_env;
1810                                         wenv.ignore_callers = 1;
1811                                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
1812
1813                                         /*
1814                                          * Enter the entity of the original graph. This is needed
1815                                          * for inline_method(). However, note that ent->irg still points
1816                                          * to callee, NOT to copy.
1817                                          */
1818                                         set_irg_entity(copy, get_irg_entity(callee));
1819
1820                                         pmap_insert(copied_graphs, callee, copy);
1821                                         callee = copy;
1822
1823                                         /* we have only one caller: the original graph */
1824                                         callee_env->n_callers      = 1;
1825                                         callee_env->n_callers_orig = 1;
1826                                 }
1827                                 if (! phiproj_computed) {
1828                                         phiproj_computed = 1;
1829                                         collect_phiprojs(current_ir_graph);
1830                                 }
1831                                 did_inline = inline_method(call, callee);
1832                                 if (did_inline) {
1833                                         inline_irg_env *callee_env = (inline_irg_env *)get_irg_link(callee);
1834
1835                                         /* call was inlined, Phi/Projs for current graph must be recomputed */
1836                                         phiproj_computed = 0;
1837
1838                                         /* callee was inline. Append it's call list. */
1839                                         env->got_inline = 1;
1840                                         --env->n_call_nodes;
1841                                         append_call_list(env, callee_env, entry->loop_depth);
1842                                         --callee_env->n_callers;
1843
1844                                         /* after we have inlined callee, all called methods inside callee
1845                                            are now called once more */
1846                                         list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
1847                                                 inline_irg_env *penv = get_irg_link(centry->callee);
1848                                                 ++penv->n_callers;
1849                                         }
1850
1851                                         /* remove this call from the list */
1852                                         list_del(&entry->list);
1853                                         continue;
1854                                 }
1855                         }
1856                 }
1857                 ir_free_resources(current_ir_graph, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
1858         }
1859
1860         for (i = 0; i < n_irgs; ++i) {
1861                 irg = get_irp_irg(i);
1862                 env = get_irg_link(irg);
1863
1864                 if (env->got_inline) {
1865                         optimize_graph_df(irg);
1866                         optimize_cf(irg);
1867                 }
1868                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
1869                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
1870                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
1871                         env->n_callers_orig, env->n_callers,
1872                         get_entity_name(get_irg_entity(irg))));
1873                 }
1874         }
1875
1876         /* kill the copied graphs: we don't need them anymore */
1877         foreach_pmap(copied_graphs, pm_entry) {
1878                 ir_graph *copy = pm_entry->value;
1879
1880                 /* reset the entity, otherwise it will be deleted in the next step ... */
1881                 set_irg_entity(copy, NULL);
1882                 free_ir_graph(copy);
1883         }
1884         pmap_destroy(copied_graphs);
1885
1886         obstack_free(&temp_obst, NULL);
1887         current_ir_graph = rem;
1888 }
1889
1890 struct inline_leave_functions_pass_t {
1891         ir_prog_pass_t pass;
1892         unsigned       maxsize;
1893         unsigned       leavesize;
1894         unsigned       size;
1895         int            ignore_runtime;
1896 };
1897
1898 /**
1899  * Wrapper to run inline_leave_functions() as a ir_prog pass.
1900  */
1901 static int inline_leave_functions_wrapper(ir_prog *irp, void *context)
1902 {
1903         struct inline_leave_functions_pass_t *pass = context;
1904
1905         (void)irp;
1906         inline_leave_functions(
1907                 pass->maxsize, pass->leavesize,
1908                 pass->size, pass->ignore_runtime);
1909         return 0;
1910 }
1911
1912 /* create a pass for inline_leave_functions() */
1913 ir_prog_pass_t *inline_leave_functions_pass(
1914         const char *name, unsigned maxsize, unsigned leavesize,
1915         unsigned size, int ignore_runtime)
1916 {
1917         struct inline_leave_functions_pass_t *pass =
1918                 XMALLOCZ(struct inline_leave_functions_pass_t);
1919
1920         pass->maxsize        = maxsize;
1921         pass->leavesize      = leavesize;
1922         pass->size           = size;
1923         pass->ignore_runtime = ignore_runtime;
1924
1925         return def_prog_pass_constructor(
1926                 &pass->pass,
1927                 name ? name : "inline_leave_functions",
1928                 inline_leave_functions_wrapper);
1929 }
1930
1931 /**
1932  * Calculate the parameter weights for transmitting the address of a local variable.
1933  */
1934 static unsigned calc_method_local_weight(ir_node *arg)
1935 {
1936         int      i, j, k;
1937         unsigned v, weight = 0;
1938
1939         for (i = get_irn_n_outs(arg) - 1; i >= 0; --i) {
1940                 ir_node *succ = get_irn_out(arg, i);
1941
1942                 switch (get_irn_opcode(succ)) {
1943                 case iro_Load:
1944                 case iro_Store:
1945                         /* Loads and Store can be removed */
1946                         weight += 3;
1947                         break;
1948                 case iro_Sel:
1949                         /* check if all args are constant */
1950                         for (j = get_Sel_n_indexs(succ) - 1; j >= 0; --j) {
1951                                 ir_node *idx = get_Sel_index(succ, j);
1952                                 if (! is_Const(idx))
1953                                         return 0;
1954                         }
1955                         /* Check users on this Sel. Note: if a 0 is returned here, there was
1956                            some unsupported node. */
1957                         v = calc_method_local_weight(succ);
1958                         if (v == 0)
1959                                 return 0;
1960                         /* we can kill one Sel with constant indexes, this is cheap */
1961                         weight += v + 1;
1962                         break;
1963                 case iro_Id:
1964                         /* when looking backward we might find Id nodes */
1965                         weight += calc_method_local_weight(succ);
1966                         break;
1967                 case iro_Tuple:
1968                         /* unoptimized tuple */
1969                         for (j = get_Tuple_n_preds(succ) - 1; j >= 0; --j) {
1970                                 ir_node *pred = get_Tuple_pred(succ, j);
1971                                 if (pred == arg) {
1972                                         /* look for Proj(j) */
1973                                         for (k = get_irn_n_outs(succ) - 1; k >= 0; --k) {
1974                                                 ir_node *succ_succ = get_irn_out(succ, k);
1975                                                 if (is_Proj(succ_succ)) {
1976                                                         if (get_Proj_proj(succ_succ) == j) {
1977                                                                 /* found */
1978                                                                 weight += calc_method_local_weight(succ_succ);
1979                                                         }
1980                                                 } else {
1981                                                         /* this should NOT happen */
1982                                                         return 0;
1983                                                 }
1984                                         }
1985                                 }
1986                         }
1987                         break;
1988                 default:
1989                         /* any other node: unsupported yet or bad. */
1990                         return 0;
1991                 }
1992         }
1993         return weight;
1994 }
1995
1996 /**
1997  * Calculate the parameter weights for transmitting the address of a local variable.
1998  */
1999 static void analyze_irg_local_weights(inline_irg_env *env, ir_graph *irg)
2000 {
2001         ir_entity *ent = get_irg_entity(irg);
2002         ir_type  *mtp;
2003         int      nparams, i, proj_nr;
2004         ir_node  *irg_args, *arg;
2005
2006         mtp      = get_entity_type(ent);
2007         nparams  = get_method_n_params(mtp);
2008
2009         /* allocate a new array. currently used as 'analysed' flag */
2010         env->local_weights = NEW_ARR_D(unsigned, &temp_obst, nparams);
2011
2012         /* If the method haven't parameters we have nothing to do. */
2013         if (nparams <= 0)
2014                 return;
2015
2016         assure_irg_outs(irg);
2017         irg_args = get_irg_args(irg);
2018         for (i = get_irn_n_outs(irg_args) - 1; i >= 0; --i) {
2019                 arg     = get_irn_out(irg_args, i);
2020                 proj_nr = get_Proj_proj(arg);
2021                 env->local_weights[proj_nr] = calc_method_local_weight(arg);
2022         }
2023 }
2024
2025 /**
2026  * Calculate the benefice for transmitting an local variable address.
2027  * After inlining, the local variable might be transformed into a
2028  * SSA variable by scalar_replacement().
2029  */
2030 static unsigned get_method_local_adress_weight(ir_graph *callee, int pos)
2031 {
2032         inline_irg_env *env = get_irg_link(callee);
2033
2034         if (env->local_weights != NULL) {
2035                 if (pos < ARR_LEN(env->local_weights))
2036                         return env->local_weights[pos];
2037                 return 0;
2038         }
2039
2040         analyze_irg_local_weights(env, callee);
2041
2042         if (pos < ARR_LEN(env->local_weights))
2043                 return env->local_weights[pos];
2044         return 0;
2045 }
2046
2047 /**
2048  * Calculate a benefice value for inlining the given call.
2049  *
2050  * @param call       the call node we have to inspect
2051  * @param callee     the called graph
2052  */
2053 static int calc_inline_benefice(call_entry *entry, ir_graph *callee)
2054 {
2055         ir_node   *call = entry->call;
2056         ir_entity *ent  = get_irg_entity(callee);
2057         ir_node   *frame_ptr;
2058         ir_type   *mtp;
2059         int       weight = 0;
2060         int       i, n_params, all_const;
2061         unsigned  cc, v;
2062         irg_inline_property prop;
2063
2064         inline_irg_env *callee_env;
2065
2066         prop = get_irg_inline_property(callee);
2067         if (prop == irg_inline_forbidden) {
2068                 DB((dbg, LEVEL_2, "In %+F Call to %+F: inlining forbidden\n",
2069                     call, callee));
2070                 return entry->benefice = INT_MIN;
2071         }
2072
2073         if (get_irg_additional_properties(callee) & mtp_property_noreturn) {
2074                 DB((dbg, LEVEL_2, "In %+F Call to %+F: not inlining noreturn or weak\n",
2075                     call, callee));
2076                 return entry->benefice = INT_MIN;
2077         }
2078
2079         /* costs for every passed parameter */
2080         n_params = get_Call_n_params(call);
2081         mtp      = get_entity_type(ent);
2082         cc       = get_method_calling_convention(mtp);
2083         if (cc & cc_reg_param) {
2084                 /* register parameter, smaller costs for register parameters */
2085                 int max_regs = cc & ~cc_bits;
2086
2087                 if (max_regs < n_params)
2088                         weight += max_regs * 2 + (n_params - max_regs) * 5;
2089                 else
2090                         weight += n_params * 2;
2091         } else {
2092                 /* parameters are passed an stack */
2093                 weight += 5 * n_params;
2094         }
2095
2096         /* constant parameters improve the benefice */
2097         frame_ptr = get_irg_frame(current_ir_graph);
2098         all_const = 1;
2099         for (i = 0; i < n_params; ++i) {
2100                 ir_node *param = get_Call_param(call, i);
2101
2102                 if (is_Const(param)) {
2103                         weight += get_method_param_weight(ent, i);
2104                 } else {
2105                         all_const = 0;
2106                         if (is_SymConst(param))
2107                                 weight += get_method_param_weight(ent, i);
2108                         else if (is_Sel(param) && get_Sel_ptr(param) == frame_ptr) {
2109                                 /*
2110                                  * An address of a local variable is transmitted. After
2111                                  * inlining, scalar_replacement might be able to remove the
2112                                  * local variable, so honor this.
2113                                  */
2114                                 v = get_method_local_adress_weight(callee, i);
2115                                 weight += v;
2116                                 if (v > 0)
2117                                         entry->local_adr = 1;
2118                         }
2119                 }
2120         }
2121         entry->all_const = all_const;
2122
2123         callee_env = get_irg_link(callee);
2124         if (callee_env->n_callers == 1 &&
2125             callee != current_ir_graph &&
2126             !entity_is_externally_visible(ent)) {
2127                 weight += 700;
2128         }
2129
2130         /* give a bonus for functions with one block */
2131         if (callee_env->n_blocks == 1)
2132                 weight = weight * 3 / 2;
2133
2134         /* and one for small non-recursive functions: we want them to be inlined in mostly every case */
2135         if (callee_env->n_nodes < 30 && !callee_env->recursive)
2136                 weight += 2000;
2137
2138         /* and finally for leaves: they do not increase the register pressure
2139            because of callee safe registers */
2140         if (callee_env->n_call_nodes == 0)
2141                 weight += 400;
2142
2143         /** it's important to inline inner loops first */
2144         if (entry->loop_depth > 30)
2145                 weight += 30 * 1024;
2146         else
2147                 weight += entry->loop_depth * 1024;
2148
2149         /*
2150          * All arguments constant is probably a good sign, give an extra bonus
2151          */
2152         if (all_const)
2153                 weight += 1024;
2154
2155         return entry->benefice = weight;
2156 }
2157
2158 static ir_graph **irgs;
2159 static int      last_irg;
2160
2161 /**
2162  * Callgraph walker, collect all visited graphs.
2163  */
2164 static void callgraph_walker(ir_graph *irg, void *data)
2165 {
2166         (void) data;
2167         irgs[last_irg++] = irg;
2168 }
2169
2170 /**
2171  * Creates an inline order for all graphs.
2172  *
2173  * @return the list of graphs.
2174  */
2175 static ir_graph **create_irg_list(void)
2176 {
2177         ir_entity **free_methods;
2178         int       arr_len;
2179         int       n_irgs = get_irp_n_irgs();
2180
2181         cgana(&arr_len, &free_methods);
2182         xfree(free_methods);
2183
2184         compute_callgraph();
2185
2186         last_irg = 0;
2187         irgs     = XMALLOCNZ(ir_graph*, n_irgs);
2188
2189         callgraph_walk(NULL, callgraph_walker, NULL);
2190         assert(n_irgs == last_irg);
2191
2192         return irgs;
2193 }
2194
2195 /**
2196  * Push a call onto the priority list if its benefice is big enough.
2197  *
2198  * @param pqueue   the priority queue of calls
2199  * @param call     the call entry
2200  * @param inlien_threshold
2201  *                 the threshold value
2202  */
2203 static void maybe_push_call(pqueue_t *pqueue, call_entry *call,
2204                             int inline_threshold)
2205 {
2206         ir_graph            *callee  = call->callee;
2207         irg_inline_property prop     = get_irg_inline_property(callee);
2208         int                 benefice = calc_inline_benefice(call, callee);
2209
2210         DB((dbg, LEVEL_2, "In %+F Call %+F to %+F has benefice %d\n",
2211             get_irn_irg(call->call), call->call, callee, benefice));
2212
2213         if (prop < irg_inline_forced && benefice < inline_threshold) {
2214                 return;
2215         }
2216
2217         pqueue_put(pqueue, call, benefice);
2218 }
2219
2220 /**
2221  * Try to inline calls into a graph.
2222  *
2223  * @param irg      the graph into which we inline
2224  * @param maxsize  do NOT inline if the size of irg gets
2225  *                 bigger than this amount
2226  * @param inline_threshold
2227  *                 threshold value for inline decision
2228  * @param copied_graphs
2229  *                 map containing copied of recursive graphs
2230  */
2231 static void inline_into(ir_graph *irg, unsigned maxsize,
2232                         int inline_threshold, pmap *copied_graphs)
2233 {
2234         int            phiproj_computed = 0;
2235         inline_irg_env *env = get_irg_link(irg);
2236         call_entry     *curr_call;
2237         wenv_t         wenv;
2238         pqueue_t       *pqueue;
2239
2240         if (env->n_call_nodes == 0)
2241                 return;
2242
2243         if (env->n_nodes > maxsize) {
2244                 DB((dbg, LEVEL_2, "%+F: too big (%d)\n", irg, env->n_nodes));
2245                 return;
2246         }
2247
2248         current_ir_graph = irg;
2249         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2250
2251         /* put irgs into the pqueue */
2252         pqueue = new_pqueue();
2253
2254         list_for_each_entry(call_entry, curr_call, &env->calls, list) {
2255                 assert(is_Call(curr_call->call));
2256                 maybe_push_call(pqueue, curr_call, inline_threshold);
2257         }
2258
2259         /* note that the list of possible calls is updated during the process */
2260         while (!pqueue_empty(pqueue)) {
2261                 int                 did_inline;
2262                 call_entry          *curr_call  = pqueue_pop_front(pqueue);
2263                 ir_graph            *callee     = curr_call->callee;
2264                 ir_node             *call_node  = curr_call->call;
2265                 inline_irg_env      *callee_env = get_irg_link(callee);
2266                 irg_inline_property prop        = get_irg_inline_property(callee);
2267                 int                 loop_depth;
2268                 const call_entry    *centry;
2269                 pmap_entry          *e;
2270
2271                 if ((prop < irg_inline_forced) && env->n_nodes + callee_env->n_nodes > maxsize) {
2272                         DB((dbg, LEVEL_2, "%+F: too big (%d) + %+F (%d)\n", irg,
2273                                                 env->n_nodes, callee, callee_env->n_nodes));
2274                         continue;
2275                 }
2276
2277                 e = pmap_find(copied_graphs, callee);
2278                 if (e != NULL) {
2279                         int benefice = curr_call->benefice;
2280                         /*
2281                          * Reduce the weight for recursive function IFF not all arguments are const.
2282                          * inlining recursive functions is rarely good.
2283                          */
2284                         if (!curr_call->all_const)
2285                                 benefice -= 2000;
2286                         if (benefice < inline_threshold)
2287                                 continue;
2288
2289                         /*
2290                          * Remap callee if we have a copy.
2291                          */
2292                         callee     = e->value;
2293                         callee_env = get_irg_link(callee);
2294                 }
2295
2296                 if (current_ir_graph == callee) {
2297                         /*
2298                          * Recursive call: we cannot directly inline because we cannot
2299                          * walk the graph and change it. So we have to make a copy of
2300                          * the graph first.
2301                          */
2302                         int benefice = curr_call->benefice;
2303                         ir_graph *copy;
2304
2305                         /*
2306                          * Reduce the weight for recursive function IFF not all arguments are const.
2307                          * inlining recursive functions is rarely good.
2308                          */
2309                         if (!curr_call->all_const)
2310                                 benefice -= 2000;
2311                         if (benefice < inline_threshold)
2312                                 continue;
2313
2314                         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2315
2316                         /*
2317                          * No copy yet, create one.
2318                          * Note that recursive methods are never leaves, so it is
2319                          * sufficient to test this condition here.
2320                          */
2321                         copy = create_irg_copy(callee);
2322
2323                         /* create_irg_copy() destroys the Proj links, recompute them */
2324                         phiproj_computed = 0;
2325
2326                         ir_reserve_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2327
2328                         /* allocate a new environment */
2329                         callee_env = alloc_inline_irg_env();
2330                         set_irg_link(copy, callee_env);
2331
2332                         assure_cf_loop(copy);
2333                         wenv.x              = callee_env;
2334                         wenv.ignore_callers = 1;
2335                         irg_walk_graph(copy, NULL, collect_calls2, &wenv);
2336
2337                         /*
2338                          * Enter the entity of the original graph. This is needed
2339                          * for inline_method(). However, note that ent->irg still points
2340                          * to callee, NOT to copy.
2341                          */
2342                         set_irg_entity(copy, get_irg_entity(callee));
2343
2344                         pmap_insert(copied_graphs, callee, copy);
2345                         callee = copy;
2346
2347                         /* we have only one caller: the original graph */
2348                         callee_env->n_callers      = 1;
2349                         callee_env->n_callers_orig = 1;
2350                 }
2351                 if (! phiproj_computed) {
2352                         phiproj_computed = 1;
2353                         collect_phiprojs(current_ir_graph);
2354                 }
2355                 did_inline = inline_method(call_node, callee);
2356                 if (!did_inline)
2357                         continue;
2358
2359                 /* call was inlined, Phi/Projs for current graph must be recomputed */
2360                 phiproj_computed = 0;
2361
2362                 /* remove it from the caller list */
2363                 list_del(&curr_call->list);
2364
2365                 /* callee was inline. Append it's call list. */
2366                 env->got_inline = 1;
2367                 --env->n_call_nodes;
2368
2369                 /* we just generate a bunch of new calls */
2370                 loop_depth = curr_call->loop_depth;
2371                 list_for_each_entry(call_entry, centry, &callee_env->calls, list) {
2372                         inline_irg_env *penv = get_irg_link(centry->callee);
2373                         ir_node        *new_call;
2374                         call_entry     *new_entry;
2375
2376                         /* after we have inlined callee, all called methods inside
2377                          * callee are now called once more */
2378                         ++penv->n_callers;
2379
2380                         /* Note that the src list points to Call nodes in the inlined graph,
2381                          * but we need Call nodes in our graph. Luckily the inliner leaves
2382                          * this information in the link field. */
2383                         new_call = get_irn_link(centry->call);
2384                         assert(is_Call(new_call));
2385
2386                         new_entry = duplicate_call_entry(centry, new_call, loop_depth);
2387                         list_add_tail(&new_entry->list, &env->calls);
2388                         maybe_push_call(pqueue, new_entry, inline_threshold);
2389                 }
2390
2391                 env->n_call_nodes += callee_env->n_call_nodes;
2392                 env->n_nodes += callee_env->n_nodes;
2393                 --callee_env->n_callers;
2394         }
2395         ir_free_resources(irg, IR_RESOURCE_IRN_LINK|IR_RESOURCE_PHI_LIST);
2396         del_pqueue(pqueue);
2397 }
2398
2399 /*
2400  * Heuristic inliner. Calculates a benefice value for every call and inlines
2401  * those calls with a value higher than the threshold.
2402  */
2403 void inline_functions(unsigned maxsize, int inline_threshold,
2404                       opt_ptr after_inline_opt)
2405 {
2406         inline_irg_env   *env;
2407         int              i, n_irgs;
2408         ir_graph         *rem;
2409         wenv_t           wenv;
2410         pmap             *copied_graphs;
2411         pmap_entry       *pm_entry;
2412         ir_graph         **irgs;
2413
2414         rem = current_ir_graph;
2415         obstack_init(&temp_obst);
2416
2417         irgs = create_irg_list();
2418
2419         /* a map for the copied graphs, used to inline recursive calls */
2420         copied_graphs = pmap_create();
2421
2422         /* extend all irgs by a temporary data structure for inlining. */
2423         n_irgs = get_irp_n_irgs();
2424         for (i = 0; i < n_irgs; ++i)
2425                 set_irg_link(irgs[i], alloc_inline_irg_env());
2426
2427         /* Pre-compute information in temporary data structure. */
2428         wenv.ignore_runtime = 0;
2429         wenv.ignore_callers = 0;
2430         for (i = 0; i < n_irgs; ++i) {
2431                 ir_graph *irg = irgs[i];
2432
2433                 free_callee_info(irg);
2434
2435                 wenv.x = get_irg_link(irg);
2436                 assure_cf_loop(irg);
2437                 irg_walk_graph(irg, NULL, collect_calls2, &wenv);
2438         }
2439
2440         /* -- and now inline. -- */
2441         for (i = 0; i < n_irgs; ++i) {
2442                 ir_graph *irg = irgs[i];
2443
2444                 inline_into(irg, maxsize, inline_threshold, copied_graphs);
2445         }
2446
2447         for (i = 0; i < n_irgs; ++i) {
2448                 ir_graph *irg = irgs[i];
2449
2450                 env = get_irg_link(irg);
2451                 if (env->got_inline && after_inline_opt != NULL) {
2452                         /* this irg got calls inlined: optimize it */
2453                         after_inline_opt(irg);
2454                 }
2455                 if (env->got_inline || (env->n_callers_orig != env->n_callers)) {
2456                         DB((dbg, LEVEL_1, "Nodes:%3d ->%3d, calls:%3d ->%3d, callers:%3d ->%3d, -- %s\n",
2457                         env->n_nodes_orig, env->n_nodes, env->n_call_nodes_orig, env->n_call_nodes,
2458                         env->n_callers_orig, env->n_callers,
2459                         get_entity_name(get_irg_entity(irg))));
2460                 }
2461         }
2462
2463         /* kill the copied graphs: we don't need them anymore */
2464         foreach_pmap(copied_graphs, pm_entry) {
2465                 ir_graph *copy = pm_entry->value;
2466
2467                 /* reset the entity, otherwise it will be deleted in the next step ... */
2468                 set_irg_entity(copy, NULL);
2469                 free_ir_graph(copy);
2470         }
2471         pmap_destroy(copied_graphs);
2472
2473         xfree(irgs);
2474
2475         obstack_free(&temp_obst, NULL);
2476         current_ir_graph = rem;
2477 }
2478
2479 struct inline_functions_pass_t {
2480         ir_prog_pass_t pass;
2481         unsigned       maxsize;
2482         int            inline_threshold;
2483         opt_ptr        after_inline_opt;
2484 };
2485
2486 /**
2487  * Wrapper to run inline_functions() as a ir_prog pass.
2488  */
2489 static int inline_functions_wrapper(ir_prog *irp, void *context)
2490 {
2491         struct inline_functions_pass_t *pass = context;
2492
2493         (void)irp;
2494         inline_functions(pass->maxsize, pass->inline_threshold,
2495                          pass->after_inline_opt);
2496         return 0;
2497 }
2498
2499 /* create a ir_prog pass for inline_functions */
2500 ir_prog_pass_t *inline_functions_pass(
2501           const char *name, unsigned maxsize, int inline_threshold,
2502           opt_ptr after_inline_opt)
2503 {
2504         struct inline_functions_pass_t *pass =
2505                 XMALLOCZ(struct inline_functions_pass_t);
2506
2507         pass->maxsize          = maxsize;
2508         pass->inline_threshold = inline_threshold;
2509         pass->after_inline_opt = after_inline_opt;
2510
2511         return def_prog_pass_constructor(
2512                 &pass->pass, name ? name : "inline_functions",
2513                 inline_functions_wrapper);
2514 }
2515
2516 void firm_init_inline(void)
2517 {
2518         FIRM_DBG_REGISTER(dbg, "firm.opt.inline");
2519 }